diff options
author | Federico Ceratto <federico.ceratto@gmail.com> | 2018-03-27 21:28:27 +0000 |
---|---|---|
committer | Federico Ceratto <federico.ceratto@gmail.com> | 2018-03-27 21:28:27 +0000 |
commit | eb7cc2640201f168bbd9a05799fd2dbe823bb57c (patch) | |
tree | 1f88da02b7ee3c16ced7b5f7dc4aadb190766e7c /src | |
parent | Release v. 1.9.0+dfsg-1 to Unstable (diff) | |
parent | New upstream version 1.10.0+dfsg (diff) | |
download | netdata-eb7cc2640201f168bbd9a05799fd2dbe823bb57c.tar.xz netdata-eb7cc2640201f168bbd9a05799fd2dbe823bb57c.zip |
Update upstream source from tag 'upstream/1.10.0+dfsg'
Update to upstream version '1.10.0+dfsg'
with Debian dir fa5485f3d9aea3038a19eff06ba33374ac5c5d7c
Diffstat (limited to '')
107 files changed, 6857 insertions, 3028 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index 1a1d37483..df174cbd1 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -70,6 +70,7 @@ netdata_SOURCES = \ health_json.c \ health_log.c \ inlined.h \ + locks.c \ locks.h \ log.c \ log.h \ @@ -79,16 +80,10 @@ netdata_SOURCES = \ plugin_checks.h \ plugin_idlejitter.c \ plugin_idlejitter.h \ - plugin_nfacct.c \ - plugin_nfacct.h \ - plugin_tc.c \ - plugin_tc.h \ plugins_d.c \ plugins_d.h \ popen.c \ popen.h \ - proc_self_mountinfo.c \ - proc_self_mountinfo.h \ procfile.c \ procfile.h \ registry.c \ @@ -133,8 +128,8 @@ netdata_SOURCES = \ statsd.h \ storage_number.c \ storage_number.h \ - sys_devices_system_edac_mc.c \ - sys_devices_system_node.c \ + threads.c \ + threads.h \ unit_test.c \ unit_test.h \ url.c \ @@ -179,10 +174,14 @@ else netdata_SOURCES += \ ipc.c \ ipc.h \ + plugin_nfacct.c \ + plugin_nfacct.h \ plugin_proc.c \ plugin_proc.h \ plugin_proc_diskspace.c \ plugin_proc_diskspace.h \ + plugin_tc.c \ + plugin_tc.h \ proc_diskstats.c \ proc_interrupts.c \ proc_softirqs.c \ @@ -200,6 +199,8 @@ netdata_SOURCES += \ proc_net_softnet_stat.c \ proc_net_stat_conntrack.c \ proc_net_stat_synproxy.c \ + proc_self_mountinfo.c \ + proc_self_mountinfo.h \ zfs_common.c \ zfs_common.h \ proc_spl_kstat_zfs.c \ @@ -208,7 +209,10 @@ netdata_SOURCES += \ proc_vmstat.c \ proc_uptime.c \ sys_kernel_mm_ksm.c \ + sys_devices_system_edac_mc.c \ + sys_devices_system_node.c \ sys_fs_cgroup.c \ + sys_fs_btrfs.c \ $(NULL) endif endif @@ -222,19 +226,33 @@ netdata_LDADD = \ apps_plugin_SOURCES = \ apps_plugin.c \ - avl.c avl.h \ - clocks.c clocks.h \ - common.c common.h \ + avl.c \ + avl.h \ + clocks.c \ + clocks.h \ + common.c \ + common.h \ inlined.h \ + locks.c \ + locks.h \ log.c log.h \ - procfile.c procfile.h \ - web_buffer.c web_buffer.h \ + procfile.c \ + procfile.h \ + threads.c \ + threads.h \ + web_buffer.c \ + web_buffer.h \ $(NULL) if FREEBSD apps_plugin_SOURCES += \ plugin_freebsd.h \ $(NULL) +else +apps_plugin_SOURCES += \ + adaptive_resortable_list.c \ + adaptive_resortable_list.h \ + $(NULL) endif apps_plugin_LDADD = \ @@ -244,11 +262,18 @@ apps_plugin_LDADD = \ freeipmi_plugin_SOURCES = \ freeipmi_plugin.c \ - clocks.c clocks.h \ - common.c common.h \ + clocks.c \ + clocks.h \ + common.c \ + common.h \ inlined.h \ + locks.c \ + locks.h \ log.c log.h \ - procfile.c procfile.h \ + procfile.c \ + procfile.h \ + threads.c \ + threads.h \ $(NULL) freeipmi_plugin_LDADD = \ @@ -257,13 +282,23 @@ freeipmi_plugin_LDADD = \ cgroup_network_SOURCES = \ cgroup-network.c \ - clocks.c clocks.h \ - common.c common.h \ + clocks.c \ + clocks.h \ + common.c \ + common.h \ inlined.h \ - log.c log.h \ - procfile.c procfile.h \ - popen.c popen.h \ - signals.c signals.h \ + locks.c \ + locks.h \ + log.c \ + log.h \ + procfile.c \ + procfile.h \ + popen.c \ + popen.h \ + signals.c \ + signals.h \ + threads.c \ + threads.h \ $(NULL) cgroup_network_LDADD = \ diff --git a/src/Makefile.in b/src/Makefile.in index bf902c21c..75f85632e 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -108,10 +108,14 @@ plugins_PROGRAMS = $(am__EXEEXT_1) $(am__EXEEXT_2) $(am__EXEEXT_3) @FREEBSD_FALSE@@MACOS_FALSE@am__append_6 = \ @FREEBSD_FALSE@@MACOS_FALSE@ ipc.c \ @FREEBSD_FALSE@@MACOS_FALSE@ ipc.h \ +@FREEBSD_FALSE@@MACOS_FALSE@ plugin_nfacct.c \ +@FREEBSD_FALSE@@MACOS_FALSE@ plugin_nfacct.h \ @FREEBSD_FALSE@@MACOS_FALSE@ plugin_proc.c \ @FREEBSD_FALSE@@MACOS_FALSE@ plugin_proc.h \ @FREEBSD_FALSE@@MACOS_FALSE@ plugin_proc_diskspace.c \ @FREEBSD_FALSE@@MACOS_FALSE@ plugin_proc_diskspace.h \ +@FREEBSD_FALSE@@MACOS_FALSE@ plugin_tc.c \ +@FREEBSD_FALSE@@MACOS_FALSE@ plugin_tc.h \ @FREEBSD_FALSE@@MACOS_FALSE@ proc_diskstats.c \ @FREEBSD_FALSE@@MACOS_FALSE@ proc_interrupts.c \ @FREEBSD_FALSE@@MACOS_FALSE@ proc_softirqs.c \ @@ -129,6 +133,8 @@ plugins_PROGRAMS = $(am__EXEEXT_1) $(am__EXEEXT_2) $(am__EXEEXT_3) @FREEBSD_FALSE@@MACOS_FALSE@ proc_net_softnet_stat.c \ @FREEBSD_FALSE@@MACOS_FALSE@ proc_net_stat_conntrack.c \ @FREEBSD_FALSE@@MACOS_FALSE@ proc_net_stat_synproxy.c \ +@FREEBSD_FALSE@@MACOS_FALSE@ proc_self_mountinfo.c \ +@FREEBSD_FALSE@@MACOS_FALSE@ proc_self_mountinfo.h \ @FREEBSD_FALSE@@MACOS_FALSE@ zfs_common.c \ @FREEBSD_FALSE@@MACOS_FALSE@ zfs_common.h \ @FREEBSD_FALSE@@MACOS_FALSE@ proc_spl_kstat_zfs.c \ @@ -137,13 +143,21 @@ plugins_PROGRAMS = $(am__EXEEXT_1) $(am__EXEEXT_2) $(am__EXEEXT_3) @FREEBSD_FALSE@@MACOS_FALSE@ proc_vmstat.c \ @FREEBSD_FALSE@@MACOS_FALSE@ proc_uptime.c \ @FREEBSD_FALSE@@MACOS_FALSE@ sys_kernel_mm_ksm.c \ +@FREEBSD_FALSE@@MACOS_FALSE@ sys_devices_system_edac_mc.c \ +@FREEBSD_FALSE@@MACOS_FALSE@ sys_devices_system_node.c \ @FREEBSD_FALSE@@MACOS_FALSE@ sys_fs_cgroup.c \ +@FREEBSD_FALSE@@MACOS_FALSE@ sys_fs_btrfs.c \ @FREEBSD_FALSE@@MACOS_FALSE@ $(NULL) @FREEBSD_TRUE@am__append_7 = \ @FREEBSD_TRUE@ plugin_freebsd.h \ @FREEBSD_TRUE@ $(NULL) +@FREEBSD_FALSE@am__append_8 = \ +@FREEBSD_FALSE@ adaptive_resortable_list.c \ +@FREEBSD_FALSE@ adaptive_resortable_list.h \ +@FREEBSD_FALSE@ $(NULL) + subdir = src DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ $(top_srcdir)/depcomp $(dist_cache_DATA) $(dist_log_DATA) \ @@ -172,24 +186,29 @@ am__installdirs = "$(DESTDIR)$(pluginsdir)" "$(DESTDIR)$(sbindir)" \ "$(DESTDIR)$(registrydir)" "$(DESTDIR)$(varlibdir)" PROGRAMS = $(plugins_PROGRAMS) $(sbin_PROGRAMS) am__apps_plugin_SOURCES_DIST = apps_plugin.c avl.c avl.h clocks.c \ - clocks.h common.c common.h inlined.h log.c log.h procfile.c \ - procfile.h web_buffer.c web_buffer.h plugin_freebsd.h + clocks.h common.c common.h inlined.h locks.c locks.h log.c \ + log.h procfile.c procfile.h threads.c threads.h web_buffer.c \ + web_buffer.h plugin_freebsd.h adaptive_resortable_list.c \ + adaptive_resortable_list.h am__objects_1 = +@FREEBSD_FALSE@am__objects_2 = adaptive_resortable_list.$(OBJEXT) am_apps_plugin_OBJECTS = apps_plugin.$(OBJEXT) avl.$(OBJEXT) \ - clocks.$(OBJEXT) common.$(OBJEXT) log.$(OBJEXT) \ - procfile.$(OBJEXT) web_buffer.$(OBJEXT) $(am__objects_1) + clocks.$(OBJEXT) common.$(OBJEXT) locks.$(OBJEXT) \ + log.$(OBJEXT) procfile.$(OBJEXT) threads.$(OBJEXT) \ + web_buffer.$(OBJEXT) $(am__objects_1) $(am__objects_2) apps_plugin_OBJECTS = $(am_apps_plugin_OBJECTS) am__DEPENDENCIES_1 = apps_plugin_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) am_cgroup_network_OBJECTS = cgroup-network.$(OBJEXT) clocks.$(OBJEXT) \ - common.$(OBJEXT) log.$(OBJEXT) procfile.$(OBJEXT) \ - popen.$(OBJEXT) signals.$(OBJEXT) + common.$(OBJEXT) locks.$(OBJEXT) log.$(OBJEXT) \ + procfile.$(OBJEXT) popen.$(OBJEXT) signals.$(OBJEXT) \ + threads.$(OBJEXT) cgroup_network_OBJECTS = $(am_cgroup_network_OBJECTS) cgroup_network_DEPENDENCIES = $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) am_freeipmi_plugin_OBJECTS = freeipmi_plugin.$(OBJEXT) \ - clocks.$(OBJEXT) common.$(OBJEXT) log.$(OBJEXT) \ - procfile.$(OBJEXT) + clocks.$(OBJEXT) common.$(OBJEXT) locks.$(OBJEXT) \ + log.$(OBJEXT) procfile.$(OBJEXT) threads.$(OBJEXT) freeipmi_plugin_OBJECTS = $(am_freeipmi_plugin_OBJECTS) freeipmi_plugin_DEPENDENCIES = $(am__DEPENDENCIES_1) am__netdata_SOURCES_DIST = adaptive_resortable_list.c \ @@ -198,55 +217,57 @@ am__netdata_SOURCES_DIST = adaptive_resortable_list.c \ backends.h clocks.c clocks.h common.c common.h daemon.c \ daemon.h dictionary.c dictionary.h eval.c eval.h \ global_statistics.c global_statistics.h health.c health.h \ - health_config.c health_json.c health_log.c inlined.h locks.h \ - log.c log.h main.c main.h plugin_checks.c plugin_checks.h \ - plugin_idlejitter.c plugin_idlejitter.h plugin_nfacct.c \ - plugin_nfacct.h plugin_tc.c plugin_tc.h plugins_d.c \ - plugins_d.h popen.c popen.h proc_self_mountinfo.c \ - proc_self_mountinfo.h procfile.c procfile.h registry.c \ - registry.h registry_db.c registry_init.c registry_internals.c \ - registry_internals.h registry_log.c registry_machine.c \ - registry_machine.h registry_person.c registry_person.h \ - registry_url.c registry_url.h rrd.c rrd.h rrd2json.c \ - rrd2json.h rrd2json_api_old.c rrd2json_api_old.h rrdcalc.c \ - rrdcalctemplate.c rrddim.c rrddimvar.c rrdfamily.c rrdhost.c \ - rrdpush.c rrdpush.h rrdset.c rrdsetvar.c rrdvar.c signals.c \ - signals.h simple_pattern.c simple_pattern.h socket.c socket.h \ - statistical.c statistical.h statsd.c statsd.h storage_number.c \ - storage_number.h sys_devices_system_edac_mc.c \ - sys_devices_system_node.c unit_test.c unit_test.h url.c url.h \ - web_api_old.c web_api_old.h web_api_v1.c web_api_v1.h \ - web_buffer.c web_buffer.h web_buffer_svg.c web_buffer_svg.h \ - web_client.c web_client.h web_server.c web_server.h \ - plugin_freebsd.c plugin_freebsd.h freebsd_sysctl.c \ - freebsd_getmntinfo.c freebsd_getifaddrs.c freebsd_devstat.c \ - zfs_common.c zfs_common.h freebsd_kstat_zfs.c freebsd_ipfw.c \ - plugin_macos.c plugin_macos.h macos_sysctl.c macos_mach_smi.c \ - macos_fw.c ipc.c ipc.h plugin_proc.c plugin_proc.h \ - plugin_proc_diskspace.c plugin_proc_diskspace.h \ - proc_diskstats.c proc_interrupts.c proc_softirqs.c \ - proc_loadavg.c proc_meminfo.c proc_net_dev.c \ + health_config.c health_json.c health_log.c inlined.h locks.c \ + locks.h log.c log.h main.c main.h plugin_checks.c \ + plugin_checks.h plugin_idlejitter.c plugin_idlejitter.h \ + plugins_d.c plugins_d.h popen.c popen.h procfile.c procfile.h \ + registry.c registry.h registry_db.c registry_init.c \ + registry_internals.c registry_internals.h registry_log.c \ + registry_machine.c registry_machine.h registry_person.c \ + registry_person.h registry_url.c registry_url.h rrd.c rrd.h \ + rrd2json.c rrd2json.h rrd2json_api_old.c rrd2json_api_old.h \ + rrdcalc.c rrdcalctemplate.c rrddim.c rrddimvar.c rrdfamily.c \ + rrdhost.c rrdpush.c rrdpush.h rrdset.c rrdsetvar.c rrdvar.c \ + signals.c signals.h simple_pattern.c simple_pattern.h socket.c \ + socket.h statistical.c statistical.h statsd.c statsd.h \ + storage_number.c storage_number.h threads.c threads.h \ + unit_test.c unit_test.h url.c url.h web_api_old.c \ + web_api_old.h web_api_v1.c web_api_v1.h web_buffer.c \ + web_buffer.h web_buffer_svg.c web_buffer_svg.h web_client.c \ + web_client.h web_server.c web_server.h plugin_freebsd.c \ + plugin_freebsd.h freebsd_sysctl.c freebsd_getmntinfo.c \ + freebsd_getifaddrs.c freebsd_devstat.c zfs_common.c \ + zfs_common.h freebsd_kstat_zfs.c freebsd_ipfw.c plugin_macos.c \ + plugin_macos.h macos_sysctl.c macos_mach_smi.c macos_fw.c \ + ipc.c ipc.h plugin_nfacct.c plugin_nfacct.h plugin_proc.c \ + plugin_proc.h plugin_proc_diskspace.c plugin_proc_diskspace.h \ + plugin_tc.c plugin_tc.h proc_diskstats.c proc_interrupts.c \ + proc_softirqs.c proc_loadavg.c proc_meminfo.c proc_net_dev.c \ proc_net_ip_vs_stats.c proc_net_netstat.c proc_net_rpc_nfs.c \ proc_net_rpc_nfsd.c proc_net_snmp.c proc_net_snmp6.c \ proc_net_sockstat.c proc_net_sockstat6.c \ proc_net_softnet_stat.c proc_net_stat_conntrack.c \ - proc_net_stat_synproxy.c proc_spl_kstat_zfs.c proc_stat.c \ + proc_net_stat_synproxy.c proc_self_mountinfo.c \ + proc_self_mountinfo.h proc_spl_kstat_zfs.c proc_stat.c \ proc_sys_kernel_random_entropy_avail.c proc_vmstat.c \ - proc_uptime.c sys_kernel_mm_ksm.c sys_fs_cgroup.c -@FREEBSD_TRUE@am__objects_2 = plugin_freebsd.$(OBJEXT) \ + proc_uptime.c sys_kernel_mm_ksm.c sys_devices_system_edac_mc.c \ + sys_devices_system_node.c sys_fs_cgroup.c sys_fs_btrfs.c +@FREEBSD_TRUE@am__objects_3 = plugin_freebsd.$(OBJEXT) \ @FREEBSD_TRUE@ freebsd_sysctl.$(OBJEXT) \ @FREEBSD_TRUE@ freebsd_getmntinfo.$(OBJEXT) \ @FREEBSD_TRUE@ freebsd_getifaddrs.$(OBJEXT) \ @FREEBSD_TRUE@ freebsd_devstat.$(OBJEXT) zfs_common.$(OBJEXT) \ @FREEBSD_TRUE@ freebsd_kstat_zfs.$(OBJEXT) \ @FREEBSD_TRUE@ freebsd_ipfw.$(OBJEXT) -@FREEBSD_FALSE@@MACOS_TRUE@am__objects_3 = plugin_macos.$(OBJEXT) \ +@FREEBSD_FALSE@@MACOS_TRUE@am__objects_4 = plugin_macos.$(OBJEXT) \ @FREEBSD_FALSE@@MACOS_TRUE@ macos_sysctl.$(OBJEXT) \ @FREEBSD_FALSE@@MACOS_TRUE@ macos_mach_smi.$(OBJEXT) \ @FREEBSD_FALSE@@MACOS_TRUE@ macos_fw.$(OBJEXT) -@FREEBSD_FALSE@@MACOS_FALSE@am__objects_4 = ipc.$(OBJEXT) \ +@FREEBSD_FALSE@@MACOS_FALSE@am__objects_5 = ipc.$(OBJEXT) \ +@FREEBSD_FALSE@@MACOS_FALSE@ plugin_nfacct.$(OBJEXT) \ @FREEBSD_FALSE@@MACOS_FALSE@ plugin_proc.$(OBJEXT) \ @FREEBSD_FALSE@@MACOS_FALSE@ plugin_proc_diskspace.$(OBJEXT) \ +@FREEBSD_FALSE@@MACOS_FALSE@ plugin_tc.$(OBJEXT) \ @FREEBSD_FALSE@@MACOS_FALSE@ proc_diskstats.$(OBJEXT) \ @FREEBSD_FALSE@@MACOS_FALSE@ proc_interrupts.$(OBJEXT) \ @FREEBSD_FALSE@@MACOS_FALSE@ proc_softirqs.$(OBJEXT) \ @@ -264,6 +285,7 @@ am__netdata_SOURCES_DIST = adaptive_resortable_list.c \ @FREEBSD_FALSE@@MACOS_FALSE@ proc_net_softnet_stat.$(OBJEXT) \ @FREEBSD_FALSE@@MACOS_FALSE@ proc_net_stat_conntrack.$(OBJEXT) \ @FREEBSD_FALSE@@MACOS_FALSE@ proc_net_stat_synproxy.$(OBJEXT) \ +@FREEBSD_FALSE@@MACOS_FALSE@ proc_self_mountinfo.$(OBJEXT) \ @FREEBSD_FALSE@@MACOS_FALSE@ zfs_common.$(OBJEXT) \ @FREEBSD_FALSE@@MACOS_FALSE@ proc_spl_kstat_zfs.$(OBJEXT) \ @FREEBSD_FALSE@@MACOS_FALSE@ proc_stat.$(OBJEXT) \ @@ -271,34 +293,35 @@ am__netdata_SOURCES_DIST = adaptive_resortable_list.c \ @FREEBSD_FALSE@@MACOS_FALSE@ proc_vmstat.$(OBJEXT) \ @FREEBSD_FALSE@@MACOS_FALSE@ proc_uptime.$(OBJEXT) \ @FREEBSD_FALSE@@MACOS_FALSE@ sys_kernel_mm_ksm.$(OBJEXT) \ -@FREEBSD_FALSE@@MACOS_FALSE@ sys_fs_cgroup.$(OBJEXT) +@FREEBSD_FALSE@@MACOS_FALSE@ sys_devices_system_edac_mc.$(OBJEXT) \ +@FREEBSD_FALSE@@MACOS_FALSE@ sys_devices_system_node.$(OBJEXT) \ +@FREEBSD_FALSE@@MACOS_FALSE@ sys_fs_cgroup.$(OBJEXT) \ +@FREEBSD_FALSE@@MACOS_FALSE@ sys_fs_btrfs.$(OBJEXT) am_netdata_OBJECTS = adaptive_resortable_list.$(OBJEXT) \ appconfig.$(OBJEXT) avl.$(OBJEXT) backend_prometheus.$(OBJEXT) \ backends.$(OBJEXT) clocks.$(OBJEXT) common.$(OBJEXT) \ daemon.$(OBJEXT) dictionary.$(OBJEXT) eval.$(OBJEXT) \ global_statistics.$(OBJEXT) health.$(OBJEXT) \ health_config.$(OBJEXT) health_json.$(OBJEXT) \ - health_log.$(OBJEXT) log.$(OBJEXT) main.$(OBJEXT) \ - plugin_checks.$(OBJEXT) plugin_idlejitter.$(OBJEXT) \ - plugin_nfacct.$(OBJEXT) plugin_tc.$(OBJEXT) \ - plugins_d.$(OBJEXT) popen.$(OBJEXT) \ - proc_self_mountinfo.$(OBJEXT) procfile.$(OBJEXT) \ - registry.$(OBJEXT) registry_db.$(OBJEXT) \ - registry_init.$(OBJEXT) registry_internals.$(OBJEXT) \ - registry_log.$(OBJEXT) registry_machine.$(OBJEXT) \ - registry_person.$(OBJEXT) registry_url.$(OBJEXT) rrd.$(OBJEXT) \ - rrd2json.$(OBJEXT) rrd2json_api_old.$(OBJEXT) \ - rrdcalc.$(OBJEXT) rrdcalctemplate.$(OBJEXT) rrddim.$(OBJEXT) \ - rrddimvar.$(OBJEXT) rrdfamily.$(OBJEXT) rrdhost.$(OBJEXT) \ - rrdpush.$(OBJEXT) rrdset.$(OBJEXT) rrdsetvar.$(OBJEXT) \ - rrdvar.$(OBJEXT) signals.$(OBJEXT) simple_pattern.$(OBJEXT) \ - socket.$(OBJEXT) statistical.$(OBJEXT) statsd.$(OBJEXT) \ - storage_number.$(OBJEXT) sys_devices_system_edac_mc.$(OBJEXT) \ - sys_devices_system_node.$(OBJEXT) unit_test.$(OBJEXT) \ + health_log.$(OBJEXT) locks.$(OBJEXT) log.$(OBJEXT) \ + main.$(OBJEXT) plugin_checks.$(OBJEXT) \ + plugin_idlejitter.$(OBJEXT) plugins_d.$(OBJEXT) \ + popen.$(OBJEXT) procfile.$(OBJEXT) registry.$(OBJEXT) \ + registry_db.$(OBJEXT) registry_init.$(OBJEXT) \ + registry_internals.$(OBJEXT) registry_log.$(OBJEXT) \ + registry_machine.$(OBJEXT) registry_person.$(OBJEXT) \ + registry_url.$(OBJEXT) rrd.$(OBJEXT) rrd2json.$(OBJEXT) \ + rrd2json_api_old.$(OBJEXT) rrdcalc.$(OBJEXT) \ + rrdcalctemplate.$(OBJEXT) rrddim.$(OBJEXT) rrddimvar.$(OBJEXT) \ + rrdfamily.$(OBJEXT) rrdhost.$(OBJEXT) rrdpush.$(OBJEXT) \ + rrdset.$(OBJEXT) rrdsetvar.$(OBJEXT) rrdvar.$(OBJEXT) \ + signals.$(OBJEXT) simple_pattern.$(OBJEXT) socket.$(OBJEXT) \ + statistical.$(OBJEXT) statsd.$(OBJEXT) \ + storage_number.$(OBJEXT) threads.$(OBJEXT) unit_test.$(OBJEXT) \ url.$(OBJEXT) web_api_old.$(OBJEXT) web_api_v1.$(OBJEXT) \ web_buffer.$(OBJEXT) web_buffer_svg.$(OBJEXT) \ - web_client.$(OBJEXT) web_server.$(OBJEXT) $(am__objects_2) \ - $(am__objects_3) $(am__objects_4) + web_client.$(OBJEXT) web_server.$(OBJEXT) $(am__objects_3) \ + $(am__objects_4) $(am__objects_5) netdata_OBJECTS = $(am_netdata_OBJECTS) netdata_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) @@ -567,27 +590,25 @@ netdata_SOURCES = adaptive_resortable_list.c \ backends.h clocks.c clocks.h common.c common.h daemon.c \ daemon.h dictionary.c dictionary.h eval.c eval.h \ global_statistics.c global_statistics.h health.c health.h \ - health_config.c health_json.c health_log.c inlined.h locks.h \ - log.c log.h main.c main.h plugin_checks.c plugin_checks.h \ - plugin_idlejitter.c plugin_idlejitter.h plugin_nfacct.c \ - plugin_nfacct.h plugin_tc.c plugin_tc.h plugins_d.c \ - plugins_d.h popen.c popen.h proc_self_mountinfo.c \ - proc_self_mountinfo.h procfile.c procfile.h registry.c \ - registry.h registry_db.c registry_init.c registry_internals.c \ - registry_internals.h registry_log.c registry_machine.c \ - registry_machine.h registry_person.c registry_person.h \ - registry_url.c registry_url.h rrd.c rrd.h rrd2json.c \ - rrd2json.h rrd2json_api_old.c rrd2json_api_old.h rrdcalc.c \ - rrdcalctemplate.c rrddim.c rrddimvar.c rrdfamily.c rrdhost.c \ - rrdpush.c rrdpush.h rrdset.c rrdsetvar.c rrdvar.c signals.c \ - signals.h simple_pattern.c simple_pattern.h socket.c socket.h \ - statistical.c statistical.h statsd.c statsd.h storage_number.c \ - storage_number.h sys_devices_system_edac_mc.c \ - sys_devices_system_node.c unit_test.c unit_test.h url.c url.h \ - web_api_old.c web_api_old.h web_api_v1.c web_api_v1.h \ - web_buffer.c web_buffer.h web_buffer_svg.c web_buffer_svg.h \ - web_client.c web_client.h web_server.c web_server.h $(NULL) \ - $(am__append_4) $(am__append_5) $(am__append_6) + health_config.c health_json.c health_log.c inlined.h locks.c \ + locks.h log.c log.h main.c main.h plugin_checks.c \ + plugin_checks.h plugin_idlejitter.c plugin_idlejitter.h \ + plugins_d.c plugins_d.h popen.c popen.h procfile.c procfile.h \ + registry.c registry.h registry_db.c registry_init.c \ + registry_internals.c registry_internals.h registry_log.c \ + registry_machine.c registry_machine.h registry_person.c \ + registry_person.h registry_url.c registry_url.h rrd.c rrd.h \ + rrd2json.c rrd2json.h rrd2json_api_old.c rrd2json_api_old.h \ + rrdcalc.c rrdcalctemplate.c rrddim.c rrddimvar.c rrdfamily.c \ + rrdhost.c rrdpush.c rrdpush.h rrdset.c rrdsetvar.c rrdvar.c \ + signals.c signals.h simple_pattern.c simple_pattern.h socket.c \ + socket.h statistical.c statistical.h statsd.c statsd.h \ + storage_number.c storage_number.h threads.c threads.h \ + unit_test.c unit_test.h url.c url.h web_api_old.c \ + web_api_old.h web_api_v1.c web_api_v1.h web_buffer.c \ + web_buffer.h web_buffer_svg.c web_buffer_svg.h web_client.c \ + web_client.h web_server.c web_server.h $(NULL) $(am__append_4) \ + $(am__append_5) $(am__append_6) netdata_LDADD = \ $(OPTIONAL_MATH_LIBS) \ $(OPTIONAL_NFACCT_LIBS) \ @@ -596,8 +617,9 @@ netdata_LDADD = \ $(NULL) apps_plugin_SOURCES = apps_plugin.c avl.c avl.h clocks.c clocks.h \ - common.c common.h inlined.h log.c log.h procfile.c procfile.h \ - web_buffer.c web_buffer.h $(NULL) $(am__append_7) + common.c common.h inlined.h locks.c locks.h log.c log.h \ + procfile.c procfile.h threads.c threads.h web_buffer.c \ + web_buffer.h $(NULL) $(am__append_7) $(am__append_8) apps_plugin_LDADD = \ $(OPTIONAL_MATH_LIBS) \ $(OPTIONAL_LIBCAP_LIBS) \ @@ -605,11 +627,18 @@ apps_plugin_LDADD = \ freeipmi_plugin_SOURCES = \ freeipmi_plugin.c \ - clocks.c clocks.h \ - common.c common.h \ + clocks.c \ + clocks.h \ + common.c \ + common.h \ inlined.h \ + locks.c \ + locks.h \ log.c log.h \ - procfile.c procfile.h \ + procfile.c \ + procfile.h \ + threads.c \ + threads.h \ $(NULL) freeipmi_plugin_LDADD = \ @@ -618,13 +647,23 @@ freeipmi_plugin_LDADD = \ cgroup_network_SOURCES = \ cgroup-network.c \ - clocks.c clocks.h \ - common.c common.h \ + clocks.c \ + clocks.h \ + common.c \ + common.h \ inlined.h \ - log.c log.h \ - procfile.c procfile.h \ - popen.c popen.h \ - signals.c signals.h \ + locks.c \ + locks.h \ + log.c \ + log.h \ + procfile.c \ + procfile.h \ + popen.c \ + popen.h \ + signals.c \ + signals.h \ + threads.c \ + threads.h \ $(NULL) cgroup_network_LDADD = \ @@ -798,6 +837,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/health_json.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/health_log.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ipc.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/locks.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/log.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/macos_fw.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/macos_mach_smi.Po@am__quote@ @@ -866,8 +906,10 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/storage_number.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sys_devices_system_edac_mc.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sys_devices_system_node.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sys_fs_btrfs.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sys_fs_cgroup.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sys_kernel_mm_ksm.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/threads.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unit_test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/url.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/web_api_old.Po@am__quote@ diff --git a/src/adaptive_resortable_list.c b/src/adaptive_resortable_list.c index add1d8c96..c564efff3 100644 --- a/src/adaptive_resortable_list.c +++ b/src/adaptive_resortable_list.c @@ -96,9 +96,14 @@ void arl_begin(ARL_BASE *base) { } #endif - if(unlikely(base->added || base->iteration % base->rechecks) == 1) { + if(unlikely(base->iteration > 0 && (base->added || (base->iteration % base->rechecks) == 0))) { + int wanted_equals_expected = ((base->iteration % base->rechecks) == 0); + + // fprintf(stderr, "\n\narl_begin() rechecking, added %zu, iteration %zu, rechecks %zu, wanted_equals_expected %d\n\n\n", base->added, base->iteration, base->rechecks, wanted_equals_expected); + base->added = 0; - base->wanted = 0; + base->wanted = (wanted_equals_expected)?base->expected:0; + ARL_ENTRY *e = base->head; while(e) { if(e->flags & ARL_ENTRY_FLAG_FOUND) { @@ -107,7 +112,7 @@ void arl_begin(ARL_BASE *base) { e->flags &= ~ARL_ENTRY_FLAG_FOUND; // count it in wanted - if(e->flags & ARL_ENTRY_FLAG_EXPECTED) + if(!wanted_equals_expected && e->flags & ARL_ENTRY_FLAG_EXPECTED) base->wanted++; } @@ -155,10 +160,11 @@ void arl_begin(ARL_BASE *base) { // register an expected keyword to the ARL // together with its destination ( i.e. the output of the processor() ) -ARL_ENTRY *arl_expect(ARL_BASE *base, const char *keyword, void *dst) { +ARL_ENTRY *arl_expect_custom(ARL_BASE *base, const char *keyword, void (*processor)(const char *name, uint32_t hash, const char *value, void *dst), void *dst) { ARL_ENTRY *e = callocz(1, sizeof(ARL_ENTRY)); e->name = strdupz(keyword); e->hash = simple_hash(e->name); + e->processor = (processor)?processor:base->processor; e->dst = dst; e->flags = ARL_ENTRY_FLAG_EXPECTED; e->prev = NULL; @@ -198,7 +204,7 @@ int arl_find_or_create_and_relink(ARL_BASE *base, const char *s, const char *val // run the processor for it if(unlikely(e->dst)) { - base->processor(e->name, hash, value, e->dst); + e->processor(e->name, hash, value, e->dst); base->found++; } @@ -254,8 +260,10 @@ int arl_find_or_create_and_relink(ARL_BASE *base, const char *s, const char *val if(unlikely(!base->next_keyword)) base->next_keyword = base->head; - if(unlikely(base->found == base->wanted)) + if(unlikely(base->found == base->wanted)) { + // fprintf(stderr, "FOUND ALL WANTED 1: found = %zu, wanted = %zu, expected %zu\n", base->found, base->wanted, base->expected); return 1; + } return 0; } diff --git a/src/adaptive_resortable_list.h b/src/adaptive_resortable_list.h index c007fa31e..d05a8ede7 100644 --- a/src/adaptive_resortable_list.h +++ b/src/adaptive_resortable_list.h @@ -51,6 +51,9 @@ typedef struct arl_entry { uint8_t flags; // ARL_ENTRY_FLAG_* + // the processor to do the job + void (*processor)(const char *name, uint32_t hash, const char *value, void *dst); + // double linked list for fast re-linkings struct arl_entry *prev, *next; } ARL_ENTRY; @@ -102,7 +105,8 @@ extern void arl_free(ARL_BASE *arl_base); // register an expected keyword to the ARL // together with its destination ( i.e. the output of the processor() ) -extern ARL_ENTRY *arl_expect(ARL_BASE *base, const char *keyword, void *dst); +extern ARL_ENTRY *arl_expect_custom(ARL_BASE *base, const char *keyword, void (*processor)(const char *name, uint32_t hash, const char *value, void *dst), void *dst); +#define arl_expect(base, keyword, dst) arl_expect_custom(base, keyword, NULL, dst) // an internal call to complete the check() call extern int arl_find_or_create_and_relink(ARL_BASE *base, const char *s, const char *value); @@ -138,7 +142,7 @@ static inline int arl_check(ARL_BASE *base, const char *keyword, const char *val // execute the processor if(unlikely(e->dst)) { - base->processor(e->name, e->hash, value, e->dst); + e->processor(e->name, e->hash, value, e->dst); base->found++; } @@ -148,8 +152,10 @@ static inline int arl_check(ARL_BASE *base, const char *keyword, const char *val base->next_keyword = base->head; // stop if we collected all the values for this iteration - if(unlikely(base->found == base->wanted)) + if(unlikely(base->found == base->wanted)) { + // fprintf(stderr, "FOUND ALL WANTED 2: found = %zu, wanted = %zu, expected %zu\n", base->found, base->wanted, base->expected); return 1; + } return 0; } diff --git a/src/appconfig.c b/src/appconfig.c index 40cade818..2424864b5 100644 --- a/src/appconfig.c +++ b/src/appconfig.c @@ -110,8 +110,8 @@ static int appconfig_section_compare(void *a, void *b) { else return strcmp(((struct section *)a)->name, ((struct section *)b)->name); } -#define appconfig_index_add(root, cfg) (struct section *)avl_insert_lock(&root->index, (avl *)(cfg)) -#define appconfig_index_del(root, cfg) (struct section *)avl_remove_lock(&root->index, (avl *)(cfg)) +#define appconfig_index_add(root, cfg) (struct section *)avl_insert_lock(&(root)->index, (avl *)(cfg)) +#define appconfig_index_del(root, cfg) (struct section *)avl_remove_lock(&(root)->index, (avl *)(cfg)) static struct section *appconfig_index_find(struct config *root, const char *name, uint32_t hash) { struct section tmp; @@ -297,10 +297,10 @@ long long appconfig_get_number(struct config *root, const char *section, const c return strtoll(s, NULL, 0); } -long double appconfig_get_float(struct config *root, const char *section, const char *name, long double value) +LONG_DOUBLE appconfig_get_float(struct config *root, const char *section, const char *name, LONG_DOUBLE value) { char buffer[100], *s; - sprintf(buffer, "%0.5Lf", value); + sprintf(buffer, "%0.5" LONG_DOUBLE_MODIFIER, value); s = appconfig_get(root, section, name, buffer); if(!s) return value; @@ -407,10 +407,10 @@ long long appconfig_set_number(struct config *root, const char *section, const c return value; } -long double appconfig_set_float(struct config *root, const char *section, const char *name, long double value) +LONG_DOUBLE appconfig_set_float(struct config *root, const char *section, const char *name, LONG_DOUBLE value) { char buffer[100]; - sprintf(buffer, "%0.5Lf", value); + sprintf(buffer, "%0.5" LONG_DOUBLE_MODIFIER, value); appconfig_set(root, section, name, buffer); diff --git a/src/appconfig.h b/src/appconfig.h index b8c2ee80c..7d056e6be 100644 --- a/src/appconfig.h +++ b/src/appconfig.h @@ -35,14 +35,14 @@ extern int appconfig_load(struct config *root, char *filename, int overwrite_use extern char *appconfig_get(struct config *root, const char *section, const char *name, const char *default_value); extern long long appconfig_get_number(struct config *root, const char *section, const char *name, long long value); -extern long double appconfig_get_float(struct config *root, const char *section, const char *name, long double value); +extern LONG_DOUBLE appconfig_get_float(struct config *root, const char *section, const char *name, LONG_DOUBLE value); extern int appconfig_get_boolean(struct config *root, const char *section, const char *name, int value); extern int appconfig_get_boolean_ondemand(struct config *root, const char *section, const char *name, int value); extern const char *appconfig_set(struct config *root, const char *section, const char *name, const char *value); extern const char *appconfig_set_default(struct config *root, const char *section, const char *name, const char *value); extern long long appconfig_set_number(struct config *root, const char *section, const char *name, long long value); -extern long double appconfig_set_float(struct config *root, const char *section, const char *name, long double value); +extern LONG_DOUBLE appconfig_set_float(struct config *root, const char *section, const char *name, LONG_DOUBLE value); extern int appconfig_set_boolean(struct config *root, const char *section, const char *name, int value); extern int appconfig_exists(struct config *root, const char *section, const char *name); diff --git a/src/apps_plugin.c b/src/apps_plugin.c index 3ac79777b..8595da6c2 100644 --- a/src/apps_plugin.c +++ b/src/apps_plugin.c @@ -162,13 +162,12 @@ struct target { kernel_uint_t num_threads; // kernel_uint_t rss; - kernel_uint_t statm_size; - kernel_uint_t statm_resident; - kernel_uint_t statm_share; - // kernel_uint_t statm_text; - // kernel_uint_t statm_lib; - // kernel_uint_t statm_data; - // kernel_uint_t statm_dirty; + kernel_uint_t status_vmsize; + kernel_uint_t status_vmrss; + kernel_uint_t status_vmshared; + kernel_uint_t status_rssfile; + kernel_uint_t status_rssshmem; + kernel_uint_t status_vmswap; kernel_uint_t io_logical_bytes_read; kernel_uint_t io_logical_bytes_written; @@ -287,13 +286,15 @@ struct pid_stat { uid_t uid; gid_t gid; - kernel_uint_t statm_size; - kernel_uint_t statm_resident; - kernel_uint_t statm_share; - // kernel_uint_t statm_text; - // kernel_uint_t statm_lib; - // kernel_uint_t statm_data; - // kernel_uint_t statm_dirty; + kernel_uint_t status_vmsize; + kernel_uint_t status_vmrss; + kernel_uint_t status_vmshared; + kernel_uint_t status_rssfile; + kernel_uint_t status_rssshmem; + kernel_uint_t status_vmswap; +#ifndef __FreeBSD__ + ARL_BASE *status_arl; +#endif kernel_uint_t io_logical_bytes_read_raw; kernel_uint_t io_logical_bytes_written_raw; @@ -337,7 +338,7 @@ struct pid_stat { char *fds_dirname; // the full directory name in /proc/PID/fd char *stat_filename; - char *statm_filename; + char *status_filename; char *io_filename; char *cmdline_filename; @@ -346,10 +347,12 @@ struct pid_stat { struct pid_stat *next; }; +size_t pagesize; + // log each problem once per process // log flood protection flags (log_thrown) #define PID_LOG_IO 0x00000001 -#define PID_LOG_STATM 0x00000002 +#define PID_LOG_STATUS 0x00000002 #define PID_LOG_CMDLINE 0x00000004 #define PID_LOG_FDS 0x00000008 #define PID_LOG_STAT 0x00000010 @@ -694,7 +697,10 @@ static inline void del_pid_entry(pid_t pid) { freez(p->fds); freez(p->fds_dirname); freez(p->stat_filename); - freez(p->statm_filename); + freez(p->status_filename); +#ifndef __FreeBSD__ + arl_free(p->status_arl); +#endif freez(p->io_filename); freez(p->cmdline_filename); freez(p->cmdline); @@ -715,19 +721,35 @@ static inline int managed_log(struct pid_stat *p, uint32_t log, int status) { p->log_thrown |= log; switch(log) { case PID_LOG_IO: + #ifdef __FreeBSD__ + error("Cannot fetch process %d I/O info (command '%s')", p->pid, p->comm); + #else error("Cannot process %s/proc/%d/io (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); + #endif break; - case PID_LOG_STATM: - error("Cannot process %s/proc/%d/statm (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); + case PID_LOG_STATUS: + #ifdef __FreeBSD__ + error("Cannot fetch process %d status info (command '%s')", p->pid, p->comm); + #else + error("Cannot process %s/proc/%d/status (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); + #endif break; case PID_LOG_CMDLINE: + #ifdef __FreeBSD__ + error("Cannot fetch process %d command line (command '%s')", p->pid, p->comm); + #else error("Cannot process %s/proc/%d/cmdline (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); + #endif break; case PID_LOG_FDS: + #ifdef __FreeBSD__ + error("Cannot fetch process %d files (command '%s')", p->pid, p->comm); + #else error("Cannot process entries in %s/proc/%d/fd (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); + #endif break; case PID_LOG_STAT: @@ -832,51 +854,170 @@ cleanup: return 0; } -static inline int read_proc_pid_ownership(struct pid_stat *p, void *ptr) { - (void)ptr; +// ---------------------------------------------------------------------------- +// macro to calculate the incremental rate of a value +// each parameter is accessed only ONCE - so it is safe to pass function calls +// or other macros as parameters + +#define incremental_rate(rate_variable, last_kernel_variable, new_kernel_value, collected_usec, last_collected_usec) { \ + kernel_uint_t _new_tmp = new_kernel_value; \ + (rate_variable) = (_new_tmp - (last_kernel_variable)) * (USEC_PER_SEC * RATES_DETAIL) / ((collected_usec) - (last_collected_usec)); \ + (last_kernel_variable) = _new_tmp; \ + } + +// the same macro for struct pid members +#define pid_incremental_rate(type, var, value) \ + incremental_rate(var, var##_raw, value, p->type##_collected_usec, p->last_##type##_collected_usec) + + +// ---------------------------------------------------------------------------- + +#ifndef __FreeBSD__ +struct arl_callback_ptr { + struct pid_stat *p; + procfile *ff; + size_t line; +}; + +void arl_callback_status_uid(const char *name, uint32_t hash, const char *value, void *dst) { + (void)name; (void)hash; (void)value; + struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; + if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 5)) return; + + //const char *real_uid = procfile_lineword(aptr->ff, aptr->line, 1); + const char *effective_uid = procfile_lineword(aptr->ff, aptr->line, 2); + //const char *saved_uid = procfile_lineword(aptr->ff, aptr->line, 3); + //const char *filesystem_uid = procfile_lineword(aptr->ff, aptr->line, 4); + + if(likely(effective_uid && *effective_uid)) + aptr->p->uid = (uid_t)str2l(effective_uid); +} + +void arl_callback_status_gid(const char *name, uint32_t hash, const char *value, void *dst) { + (void)name; (void)hash; (void)value; + struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; + if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 5)) return; + + //const char *real_gid = procfile_lineword(aptr->ff, aptr->line, 1); + const char *effective_gid = procfile_lineword(aptr->ff, aptr->line, 2); + //const char *saved_gid = procfile_lineword(aptr->ff, aptr->line, 3); + //const char *filesystem_gid = procfile_lineword(aptr->ff, aptr->line, 4); + + if(likely(effective_gid && *effective_gid)) + aptr->p->gid = (uid_t)str2l(effective_gid); +} + +void arl_callback_status_vmsize(const char *name, uint32_t hash, const char *value, void *dst) { + (void)name; (void)hash; (void)value; + struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; + if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 3)) return; + + aptr->p->status_vmsize = str2kernel_uint_t(procfile_lineword(aptr->ff, aptr->line, 1)); +} + +void arl_callback_status_vmswap(const char *name, uint32_t hash, const char *value, void *dst) { + (void)name; (void)hash; (void)value; + struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; + if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 3)) return; + + aptr->p->status_vmswap = str2kernel_uint_t(procfile_lineword(aptr->ff, aptr->line, 1)); +} + +void arl_callback_status_vmrss(const char *name, uint32_t hash, const char *value, void *dst) { + (void)name; (void)hash; (void)value; + struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; + if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 3)) return; + + aptr->p->status_vmrss = str2kernel_uint_t(procfile_lineword(aptr->ff, aptr->line, 1)); +} + +void arl_callback_status_rssfile(const char *name, uint32_t hash, const char *value, void *dst) { + (void)name; (void)hash; (void)value; + struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; + if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 3)) return; + + aptr->p->status_rssfile = str2kernel_uint_t(procfile_lineword(aptr->ff, aptr->line, 1)); +} + +void arl_callback_status_rssshmem(const char *name, uint32_t hash, const char *value, void *dst) { + (void)name; (void)hash; (void)value; + struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; + if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 3)) return; + + aptr->p->status_rssshmem = str2kernel_uint_t(procfile_lineword(aptr->ff, aptr->line, 1)); +} +#endif // !__FreeBSD__ + +static inline int read_proc_pid_status(struct pid_stat *p, void *ptr) { + p->status_vmsize = 0; + p->status_vmrss = 0; + p->status_vmshared = 0; + p->status_rssfile = 0; + p->status_rssshmem = 0; + p->status_vmswap = 0; + #ifdef __FreeBSD__ struct kinfo_proc *proc_info = (struct kinfo_proc *)ptr; - p->uid = proc_info->ki_uid; - p->gid = proc_info->ki_groups[0]; - + p->uid = proc_info->ki_uid; + p->gid = proc_info->ki_groups[0]; + p->status_vmsize = proc_info->ki_size / 1024; // in kB + p->status_vmrss = proc_info->ki_rssize * pagesize / 1024; // in kB + // FIXME: what about shared and swap memory on FreeBSD? return 1; #else - if(unlikely(!p->stat_filename)) { - error("pid %d does not have a stat_filename", p->pid); - return 0; - } + (void)ptr; - // ---------------------------------------- - // read uid and gid + static struct arl_callback_ptr arl_ptr; + static procfile *ff = NULL; - struct stat st; - if(stat(p->stat_filename, &st) != 0) { - error("Cannot stat file '%s'", p->stat_filename); - return 1; + if(unlikely(!p->status_arl)) { + p->status_arl = arl_create("/proc/pid/status", NULL, 60); + arl_expect_custom(p->status_arl, "Uid", arl_callback_status_uid, &arl_ptr); + arl_expect_custom(p->status_arl, "Gid", arl_callback_status_gid, &arl_ptr); + arl_expect_custom(p->status_arl, "VmSize", arl_callback_status_vmsize, &arl_ptr); + arl_expect_custom(p->status_arl, "VmRSS", arl_callback_status_vmrss, &arl_ptr); + arl_expect_custom(p->status_arl, "RssFile", arl_callback_status_rssfile, &arl_ptr); + arl_expect_custom(p->status_arl, "RssShmem", arl_callback_status_rssshmem, &arl_ptr); + arl_expect_custom(p->status_arl, "VmSwap", arl_callback_status_vmswap, &arl_ptr); } - p->uid = st.st_uid; - p->gid = st.st_gid; + if(unlikely(!p->status_filename)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/proc/%d/status", netdata_configured_host_prefix, p->pid); + p->status_filename = strdupz(filename); + } - return 1; -#endif -} + ff = procfile_reopen(ff, p->status_filename, (!ff)?" \t:,-()/":NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO); + if(unlikely(!ff)) return 0; -// ---------------------------------------------------------------------------- -// macro to calculate the incremental rate of a value -// each parameter is accessed only ONCE - so it is safe to pass function calls -// or other macros as parameters + ff = procfile_readall(ff); + if(unlikely(!ff)) return 0; -#define incremental_rate(rate_variable, last_kernel_variable, new_kernel_value, collected_usec, last_collected_usec) { \ - kernel_uint_t _new_tmp = new_kernel_value; \ - rate_variable = (_new_tmp - last_kernel_variable) * (USEC_PER_SEC * RATES_DETAIL) / (collected_usec - last_collected_usec); \ - last_kernel_variable = _new_tmp; \ + calls_counter++; + + // let ARL use this pid + arl_ptr.p = p; + arl_ptr.ff = ff; + + size_t lines = procfile_lines(ff), l; + arl_begin(p->status_arl); + + for(l = 0; l < lines ;l++) { + // fprintf(stderr, "CHECK: line %zu of %zu, key '%s' = '%s'\n", l, lines, procfile_lineword(ff, l, 0), procfile_lineword(ff, l, 1)); + arl_ptr.line = l; + if(unlikely(arl_check(p->status_arl, + procfile_lineword(ff, l, 0), + procfile_lineword(ff, l, 1)))) break; } -// the same macro for struct pid members -#define pid_incremental_rate(type, var, value) \ - incremental_rate(var, var##_raw, value, p->type##_collected_usec, p->last_##type##_collected_usec) + p->status_vmshared = p->status_rssfile + p->status_rssshmem; + + // fprintf(stderr, "%s uid %d, gid %d, VmSize %zu, VmRSS %zu, RssFile %zu, RssShmem %zu, shared %zu\n", p->comm, (int)p->uid, (int)p->gid, p->status_vmsize, p->status_vmrss, p->status_rssfile, p->status_rssshmem, p->status_vmshared); + + return 1; +#endif +} // ---------------------------------------------------------------------------- @@ -930,7 +1071,7 @@ static inline int read_proc_pid_stat(struct pid_stat *p, void *ptr) { // p->flags = str2uint64_t(procfile_lineword(ff, 0, 8)); #endif - if(strcmp(p->comm, comm)) { + if(strcmp(p->comm, comm) != 0) { if(unlikely(debug)) { if(p->comm[0]) fprintf(stderr, "apps.plugin: \tpid %d (%s) changed name to '%s'\n", p->pid, p->comm, comm); @@ -955,7 +1096,7 @@ static inline int read_proc_pid_stat(struct pid_stat *p, void *ptr) { pid_incremental_rate(stat, p->utime, (kernel_uint_t)proc_info->ki_rusage.ru_utime.tv_sec * 100 + proc_info->ki_rusage.ru_utime.tv_usec / 10000); pid_incremental_rate(stat, p->stime, (kernel_uint_t)proc_info->ki_rusage.ru_stime.tv_sec * 100 + proc_info->ki_rusage.ru_stime.tv_usec / 10000); pid_incremental_rate(stat, p->cutime, (kernel_uint_t)proc_info->ki_rusage_ch.ru_utime.tv_sec * 100 + proc_info->ki_rusage_ch.ru_utime.tv_usec / 10000); - pid_incremental_rate(stat, p->cstime, (kernel_uint_t)proc_info->ki_rusage_ch.ru_stime.tv_sec * 100 + proc_info->ki_rusage_ch.ru_utime.tv_usec / 10000); + pid_incremental_rate(stat, p->cstime, (kernel_uint_t)proc_info->ki_rusage_ch.ru_stime.tv_sec * 100 + proc_info->ki_rusage_ch.ru_stime.tv_usec / 10000); p->num_threads = proc_info->ki_numthreads; @@ -1045,57 +1186,6 @@ cleanup: return 0; } -static inline int read_proc_pid_statm(struct pid_stat *p, void *ptr) { - (void)ptr; -#ifdef __FreeBSD__ - struct kinfo_proc *proc_info = (struct kinfo_proc *)ptr; -#else - static procfile *ff = NULL; - - if(unlikely(!p->statm_filename)) { - char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/proc/%d/statm", netdata_configured_host_prefix, p->pid); - p->statm_filename = strdupz(filename); - } - - ff = procfile_reopen(ff, p->statm_filename, NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO); - if(unlikely(!ff)) goto cleanup; - - ff = procfile_readall(ff); - if(unlikely(!ff)) goto cleanup; -#endif - - calls_counter++; - -#ifdef __FreeBSD__ - p->statm_size = proc_info->ki_size / sysconf(_SC_PAGESIZE); - p->statm_resident = proc_info->ki_rssize; - p->statm_share = 0; // do we have to use ru_ixrss here? -#else - p->statm_size = str2kernel_uint_t(procfile_lineword(ff, 0, 0)); - p->statm_resident = str2kernel_uint_t(procfile_lineword(ff, 0, 1)); - p->statm_share = str2kernel_uint_t(procfile_lineword(ff, 0, 2)); - // p->statm_text = str2kernel_uint_t(procfile_lineword(ff, 0, 3)); - // p->statm_lib = str2kernel_uint_t(procfile_lineword(ff, 0, 4)); - // p->statm_data = str2kernel_uint_t(procfile_lineword(ff, 0, 5)); - // p->statm_dirty = str2kernel_uint_t(procfile_lineword(ff, 0, 6)); -#endif - - return 1; - -#ifndef __FreeBSD__ -cleanup: - p->statm_size = 0; - p->statm_resident = 0; - p->statm_share = 0; - // p->statm_text = 0; - // p->statm_lib = 0; - // p->statm_data = 0; - // p->statm_dirty = 0; - return 0; -#endif -} - static inline int read_proc_pid_io(struct pid_stat *p, void *ptr) { (void)ptr; #ifdef __FreeBSD__ @@ -1979,7 +2069,7 @@ static inline void link_all_processes_to_their_parents(void) { // 1. read all files in /proc // 2. for each numeric directory: // i. read /proc/pid/stat -// ii. read /proc/pid/statm +// ii. read /proc/pid/status // iii. read /proc/pid/io (requires root access) // iii. read the entries in directory /proc/pid/fd (requires root access) // for each entry: @@ -1992,7 +2082,7 @@ static inline void link_all_processes_to_their_parents(void) { // to avoid filling up all disk space // if debug is enabled, all errors are printed -#ifndef __FreeBSD__ +#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) static int compar_pid(const void *pid1, const void *pid2) { struct pid_stat *p1 = all_pids[*((pid_t *)pid1)]; @@ -2006,8 +2096,8 @@ static int compar_pid(const void *pid1, const void *pid2) { #endif static inline int collect_data_for_pid(pid_t pid, void *ptr) { - if(unlikely(pid < INIT_PID || pid > pid_max)) { - error("Invalid pid %d read (expected %d to %d). Ignoring process.", pid, INIT_PID, pid_max); + if(unlikely(pid < 0 || pid > pid_max)) { + error("Invalid pid %d read (expected %d to %d). Ignoring process.", pid, 0, pid_max); return 0; } @@ -2024,8 +2114,6 @@ static inline int collect_data_for_pid(pid_t pid, void *ptr) { // there is no reason to proceed if we cannot get its status return 0; - read_proc_pid_ownership(p, ptr); - // check its parent pid if(unlikely(p->ppid < 0 || p->ppid > pid_max)) { error("Pid %d (command '%s') states invalid parent pid %d. Using 0.", pid, p->comm, p->ppid); @@ -2038,10 +2126,10 @@ static inline int collect_data_for_pid(pid_t pid, void *ptr) { managed_log(p, PID_LOG_IO, read_proc_pid_io(p, ptr)); // -------------------------------------------------------------------- - // /proc/<pid>/statm + // /proc/<pid>/status - if(unlikely(!managed_log(p, PID_LOG_STATM, read_proc_pid_statm(p, ptr)))) - // there is no reason to proceed if we cannot get its memory status + if(unlikely(!managed_log(p, PID_LOG_STATUS, read_proc_pid_status(p, ptr)))) + // there is no reason to proceed if we cannot get its status return 0; // -------------------------------------------------------------------- @@ -2069,28 +2157,46 @@ static int collect_data_for_all_processes(void) { #ifdef __FreeBSD__ int i, procnum; - size_t procbase_size; - static struct kinfo_proc *procbase; - int mib[3]; + static size_t procbase_size = 0; + static struct kinfo_proc *procbase = NULL; - mib[0] = CTL_KERN; - mib[1] = KERN_PROC; - mib[2] = KERN_PROC_PROC; - if (unlikely(sysctl(mib, 3, NULL, &procbase_size, NULL, 0))) { + size_t new_procbase_size; + + int mib[3] = { CTL_KERN, KERN_PROC, KERN_PROC_PROC }; + if (unlikely(sysctl(mib, 3, NULL, &new_procbase_size, NULL, 0))) { error("sysctl error: Can't get processes data size"); return 0; } - procbase = reallocz(procbase, procbase_size); - if (unlikely(sysctl(mib, 3, procbase, &procbase_size, NULL, 0))) { + + // give it some air for processes that may be started + // during this little time. + new_procbase_size += 100 * sizeof(struct kinfo_proc); + + // increase the buffer if needed + if(new_procbase_size > procbase_size) { + procbase_size = new_procbase_size; + procbase = reallocz(procbase, procbase_size); + } + + // sysctl() gets from new_procbase_size the buffer size + // and also returns to it the amount of data filled in + new_procbase_size = procbase_size; + + // get the processes from the system + if (unlikely(sysctl(mib, 3, procbase, &new_procbase_size, NULL, 0))) { error("sysctl error: Can't get processes data"); return 0; } - procnum = procbase_size / sizeof(struct kinfo_proc); + + // based on the amount of data filled in + // calculate the number of processes we got + procnum = new_procbase_size / sizeof(struct kinfo_proc); + #endif if(all_pids_count) { -#ifndef __FreeBSD__ +#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) size_t slc = 0; #endif for(p = root_of_pids; p ; p = p->next) { @@ -2107,7 +2213,7 @@ static int collect_data_for_all_processes(void) { #if (ALL_PIDS_ARE_READ_INSTANTLY == 0) if(unlikely(slc != all_pids_count)) { - error("Internal error: I was thinking I had %zu processes in my arrays, but it seems there are more.", all_pids_count); + error("Internal error: I was thinking I had %zu processes in my arrays, but it seems there are %zu.", all_pids_count, slc); all_pids_count = slc; } @@ -2130,7 +2236,7 @@ static int collect_data_for_all_processes(void) { } #ifdef __FreeBSD__ - for (i = INIT_PID; i < procnum - INIT_PID; ++i) { + for (i = 0 ; i < procnum ; ++i) { pid_t pid = procbase[i].ki_pid; collect_data_for_pid(pid, &procbase[i]); } @@ -2258,21 +2364,17 @@ static void apply_apps_groups_targets_inheritance(void) { if(unlikely(!p->sortlist && !p->children_count)) p->sortlist = sortlist++; - // if this process does not have any children - // and is not already merged - // and has a parent - // and its parent has children - // and the target of this process and its parent is the same, or the parent does not have a target - // and its parent is not init - // then, mark them as merged. if(unlikely( - !p->children_count - && !p->merged - && p->parent - && p->parent->children_count + !p->children_count // if this process does not have any children + && !p->merged // and is not already merged + && p->parent // and has a parent + && p->parent->children_count // and its parent has children + // and the target of this process and its parent is the same, + // or the parent does not have a target && (p->target == p->parent->target || !p->parent->target) - && p->ppid != INIT_PID + && p->ppid != INIT_PID // and its parent is not init )) { + // mark it as merged p->parent->children_count--; p->merged = 1; @@ -2296,6 +2398,10 @@ static void apply_apps_groups_targets_inheritance(void) { if(all_pids[INIT_PID]) all_pids[INIT_PID]->target = apps_groups_default_target; + // pid 0 goes always to default target + if(all_pids[0]) + all_pids[0]->target = apps_groups_default_target; + // give a default target on all top level processes if(unlikely(debug)) loops++; for(p = root_of_pids; p ; p = p->next) { @@ -2353,13 +2459,12 @@ static size_t zero_all_targets(struct target *root) { // w->rss = 0; w->processes = 0; - w->statm_size = 0; - w->statm_resident = 0; - w->statm_share = 0; - // w->statm_text = 0; - // w->statm_lib = 0; - // w->statm_data = 0; - // w->statm_dirty = 0; + w->status_vmsize = 0; + w->status_vmrss = 0; + w->status_vmshared = 0; + w->status_rssfile = 0; + w->status_rssshmem = 0; + w->status_vmswap = 0; w->io_logical_bytes_read = 0; w->io_logical_bytes_written = 0; @@ -2505,13 +2610,12 @@ static inline void aggregate_pid_on_target(struct target *w, struct pid_stat *p, // w->rss += p->rss; - w->statm_size += p->statm_size; - w->statm_resident += p->statm_resident; - w->statm_share += p->statm_share; - // w->statm_text += p->statm_text; - // w->statm_lib += p->statm_lib; - // w->statm_data += p->statm_data; - // w->statm_dirty += p->statm_dirty; + w->status_vmsize += p->status_vmsize; + w->status_vmrss += p->status_vmrss; + w->status_vmshared += p->status_vmshared; + w->status_rssfile += p->status_rssfile; + w->status_rssshmem += p->status_rssshmem; + w->status_vmswap += p->status_vmswap; w->io_logical_bytes_read += p->io_logical_bytes_read; w->io_logical_bytes_written += p->io_logical_bytes_written; @@ -2944,17 +3048,26 @@ static void send_collected_data_to_netdata(struct target *root, const char *type send_BEGIN(type, "mem", dt); for (w = root; w ; w = w->next) { if(unlikely(w->exposed)) - send_SET(w->name, (w->statm_resident > w->statm_share)?(w->statm_resident - w->statm_share):0ULL); + send_SET(w->name, (w->status_vmrss > w->status_vmshared)?(w->status_vmrss - w->status_vmshared):0ULL); } send_END(); send_BEGIN(type, "vmem", dt); for (w = root; w ; w = w->next) { if(unlikely(w->exposed)) - send_SET(w->name, w->statm_size); + send_SET(w->name, w->status_vmsize); } send_END(); +#ifndef __FreeBSD__ + send_BEGIN(type, "swap", dt); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + send_SET(w->name, w->status_vmswap); + } + send_END(); +#endif + send_BEGIN(type, "minor_faults", dt); for (w = root; w ; w = w->next) { if(unlikely(w->exposed)) @@ -3056,22 +3169,22 @@ static void send_charts_updates_to_netdata(struct target *root, const char *type fprintf(stdout, "CHART %s.mem '' '%s Real Memory (w/o shared)' 'MB' mem %s.mem stacked 20003 %d\n", type, title, type, update_every); for (w = root; w ; w = w->next) { if(unlikely(w->exposed)) - fprintf(stdout, "DIMENSION %s '' absolute %ld %ld\n", w->name, sysconf(_SC_PAGESIZE), 1024L*1024L); + fprintf(stdout, "DIMENSION %s '' absolute %ld %ld\n", w->name, 1L, 1024L); } - fprintf(stdout, "CHART %s.vmem '' '%s Virtual Memory Size' 'MB' mem %s.vmem stacked 20004 %d\n", type, title, type, update_every); + fprintf(stdout, "CHART %s.vmem '' '%s Virtual Memory Size' 'MB' mem %s.vmem stacked 20005 %d\n", type, title, type, update_every); for (w = root; w ; w = w->next) { if(unlikely(w->exposed)) - fprintf(stdout, "DIMENSION %s '' absolute %ld %ld\n", w->name, sysconf(_SC_PAGESIZE), 1024L*1024L); + fprintf(stdout, "DIMENSION %s '' absolute %ld %ld\n", w->name, 1L, 1024L); } - fprintf(stdout, "CHART %s.threads '' '%s Threads' 'threads' processes %s.threads stacked 20005 %d\n", type, title, type, update_every); + fprintf(stdout, "CHART %s.threads '' '%s Threads' 'threads' processes %s.threads stacked 20006 %d\n", type, title, type, update_every); for (w = root; w ; w = w->next) { if(unlikely(w->exposed)) fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name); } - fprintf(stdout, "CHART %s.processes '' '%s Processes' 'processes' processes %s.processes stacked 20004 %d\n", type, title, type, update_every); + fprintf(stdout, "CHART %s.processes '' '%s Processes' 'processes' processes %s.processes stacked 20007 %d\n", type, title, type, update_every); for (w = root; w ; w = w->next) { if(unlikely(w->exposed)) fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name); @@ -3097,7 +3210,15 @@ static void send_charts_updates_to_netdata(struct target *root, const char *type } } - fprintf(stdout, "CHART %s.major_faults '' '%s Major Page Faults (swap read)' 'page faults/s' swap %s.major_faults stacked 20010 %d\n", type, title, type, update_every); +#ifndef __FreeBSD__ + fprintf(stdout, "CHART %s.swap '' '%s Swap Memory' 'MB' swap %s.swap stacked 20011 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute %ld %ld\n", w->name, 1L, 1024L); + } +#endif + + fprintf(stdout, "CHART %s.major_faults '' '%s Major Page Faults (swap read)' 'page faults/s' swap %s.major_faults stacked 20012 %d\n", type, title, type, update_every); for (w = root; w ; w = w->next) { if(unlikely(w->exposed)) fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, RATES_DETAIL); @@ -3388,6 +3509,8 @@ static int check_capabilities() { int main(int argc, char **argv) { // debug_flags = D_PROCFILE; + pagesize = (size_t)sysconf(_SC_PAGESIZE); + // set the name for logging program_name = "apps.plugin"; @@ -3469,7 +3592,7 @@ int main(int argc, char **argv) { #warning "compiling for profiling" static int profiling_count=0; profiling_count++; - if(unlikely(profiling_count > 1000)) exit(0); + if(unlikely(profiling_count > 2000)) exit(0); usec_t dt = update_every * USEC_PER_SEC; #else usec_t dt = heartbeat_next(&hb, step); diff --git a/src/backend_prometheus.c b/src/backend_prometheus.c index 88ec2c65b..bfcda9297 100644 --- a/src/backend_prometheus.c +++ b/src/backend_prometheus.c @@ -2,23 +2,29 @@ // ---------------------------------------------------------------------------- // PROMETHEUS -// /api/v1/allmetrics?format=prometheus +// /api/v1/allmetrics?format=prometheus and /api/v1/allmetrics?format=prometheus_all_hosts static struct prometheus_server { const char *server; uint32_t hash; + RRDHOST *host; time_t last_access; struct prometheus_server *next; } *prometheus_server_root = NULL; -static inline time_t prometheus_server_last_access(const char *server, time_t now) { +static inline time_t prometheus_server_last_access(const char *server, RRDHOST *host, time_t now) { + static netdata_mutex_t prometheus_server_root_mutex = NETDATA_MUTEX_INITIALIZER; + uint32_t hash = simple_hash(server); + netdata_mutex_lock(&prometheus_server_root_mutex); + struct prometheus_server *ps; for(ps = prometheus_server_root; ps ;ps = ps->next) { - if (hash == ps->hash && !strcmp(server, ps->server)) { + if (host == ps->host && hash == ps->hash && !strcmp(server, ps->server)) { time_t last = ps->last_access; ps->last_access = now; + netdata_mutex_unlock(&prometheus_server_root_mutex); return last; } } @@ -26,10 +32,12 @@ static inline time_t prometheus_server_last_access(const char *server, time_t no ps = callocz(1, sizeof(struct prometheus_server)); ps->server = strdupz(server); ps->hash = hash; + ps->host = host; ps->last_access = now; ps->next = prometheus_server_root; prometheus_server_root = ps; + netdata_mutex_unlock(&prometheus_server_root_mutex); return 0; } @@ -102,7 +110,7 @@ static inline char *prometheus_units_copy(char *d, const char *s, size_t usable) #define PROMETHEUS_ELEMENT_MAX 256 #define PROMETHEUS_LABELS_MAX 1024 -static void rrd_stats_api_v1_charts_allmetrics_prometheus(RRDHOST *host, BUFFER *wb, const char *prefix, uint32_t options, time_t after, time_t before, int allhosts, int help, int types, int names) { +static void rrd_stats_api_v1_charts_allmetrics_prometheus(RRDHOST *host, BUFFER *wb, const char *prefix, uint32_t options, time_t after, time_t before, int allhosts, int help, int types, int names, int timestamps) { rrdhost_rdlock(host); char hostname[PROMETHEUS_ELEMENT_MAX + 1]; @@ -110,14 +118,49 @@ static void rrd_stats_api_v1_charts_allmetrics_prometheus(RRDHOST *host, BUFFER char labels[PROMETHEUS_LABELS_MAX + 1] = ""; if(allhosts) { - if(host->tags && *(host->tags)) - buffer_sprintf(wb, "netdata_host_tags{instance=\"%s\",%s} 1 %llu\n", hostname, host->tags, now_realtime_usec() / USEC_PER_MS); + if(timestamps) + buffer_sprintf(wb, "netdata_info{instance=\"%s\",application=\"%s\",version=\"%s\"} 1 %llu\n", hostname, host->program_name, host->program_version, now_realtime_usec() / USEC_PER_MS); + else + buffer_sprintf(wb, "netdata_info{instance=\"%s\",application=\"%s\",version=\"%s\"} 1\n", hostname, host->program_name, host->program_version); + + if(host->tags && *(host->tags)) { + if(timestamps) { + buffer_sprintf(wb, "netdata_host_tags_info{instance=\"%s\",%s} 1 %llu\n", hostname, host->tags, now_realtime_usec() / USEC_PER_MS); + + // deprecated, exists only for compatibility with older queries + buffer_sprintf(wb, "netdata_host_tags{instance=\"%s\",%s} 1 %llu\n", hostname, host->tags, now_realtime_usec() / USEC_PER_MS); + } + else { + buffer_sprintf(wb, "netdata_host_tags_info{instance=\"%s\",%s} 1\n", hostname, host->tags); + + // deprecated, exists only for compatibility with older queries + buffer_sprintf(wb, "netdata_host_tags{instance=\"%s\",%s} 1\n", hostname, host->tags); + } + + } snprintfz(labels, PROMETHEUS_LABELS_MAX, ",instance=\"%s\"", hostname); } else { - if(host->tags && *(host->tags)) - buffer_sprintf(wb, "netdata_host_tags{%s} 1 %llu\n", host->tags, now_realtime_usec() / USEC_PER_MS); + if(timestamps) + buffer_sprintf(wb, "netdata_info{instance=\"%s\",application=\"%s\",version=\"%s\"} 1 %llu\n", hostname, host->program_name, host->program_version, now_realtime_usec() / USEC_PER_MS); + else + buffer_sprintf(wb, "netdata_info{instance=\"%s\",application=\"%s\",version=\"%s\"} 1\n", hostname, host->program_name, host->program_version); + + if(host->tags && *(host->tags)) { + if(timestamps) { + buffer_sprintf(wb, "netdata_host_tags_info{%s} 1 %llu\n", host->tags, now_realtime_usec() / USEC_PER_MS); + + // deprecated, exists only for compatibility with older queries + buffer_sprintf(wb, "netdata_host_tags{%s} 1 %llu\n", host->tags, now_realtime_usec() / USEC_PER_MS); + } + else { + buffer_sprintf(wb, "netdata_host_tags_info{%s} 1\n", host->tags); + + // deprecated, exists only for compatibility with older queries + buffer_sprintf(wb, "netdata_host_tags{%s} 1\n", host->tags); + } + } } // for each chart @@ -207,18 +250,31 @@ static void rrd_stats_api_v1_charts_allmetrics_prometheus(RRDHOST *host, BUFFER , t ); - buffer_sprintf(wb - , "%s_%s%s{chart=\"%s\",family=\"%s\",dimension=\"%s\"%s} " COLLECTED_NUMBER_FORMAT " %llu\n" - , prefix - , context - , suffix - , chart - , family - , dimension - , labels - , rd->last_collected_value - , timeval_msec(&rd->last_collected_time) - ); + if(timestamps) + buffer_sprintf(wb + , "%s_%s%s{chart=\"%s\",family=\"%s\",dimension=\"%s\"%s} " COLLECTED_NUMBER_FORMAT " %llu\n" + , prefix + , context + , suffix + , chart + , family + , dimension + , labels + , rd->last_collected_value + , timeval_msec(&rd->last_collected_time) + ); + else + buffer_sprintf(wb + , "%s_%s%s{chart=\"%s\",family=\"%s\",dimension=\"%s\"%s} " COLLECTED_NUMBER_FORMAT "\n" + , prefix + , context + , suffix + , chart + , family + , dimension + , labels + , rd->last_collected_value + ); } else { // the dimensions of the chart, do not have the same algorithm, multiplier or divisor @@ -253,18 +309,31 @@ static void rrd_stats_api_v1_charts_allmetrics_prometheus(RRDHOST *host, BUFFER , t ); - buffer_sprintf(wb - , "%s_%s_%s%s{chart=\"%s\",family=\"%s\"%s} " COLLECTED_NUMBER_FORMAT " %llu\n" - , prefix - , context - , dimension - , suffix - , chart - , family - , labels - , rd->last_collected_value - , timeval_msec(&rd->last_collected_time) - ); + if(timestamps) + buffer_sprintf(wb + , "%s_%s_%s%s{chart=\"%s\",family=\"%s\"%s} " COLLECTED_NUMBER_FORMAT " %llu\n" + , prefix + , context + , dimension + , suffix + , chart + , family + , labels + , rd->last_collected_value + , timeval_msec(&rd->last_collected_time) + ); + else + buffer_sprintf(wb + , "%s_%s_%s%s{chart=\"%s\",family=\"%s\"%s} " COLLECTED_NUMBER_FORMAT "\n" + , prefix + , context + , dimension + , suffix + , chart + , family + , labels + , rd->last_collected_value + ); } } else { @@ -302,18 +371,31 @@ static void rrd_stats_api_v1_charts_allmetrics_prometheus(RRDHOST *host, BUFFER , suffix ); - buffer_sprintf(wb, "%s_%s%s%s{chart=\"%s\",family=\"%s\",dimension=\"%s\"%s} " CALCULATED_NUMBER_FORMAT " %llu\n" - , prefix - , context - , units - , suffix - , chart - , family - , dimension - , labels - , value - , last_t * MSEC_PER_SEC - ); + if(timestamps) + buffer_sprintf(wb, "%s_%s%s%s{chart=\"%s\",family=\"%s\",dimension=\"%s\"%s} " CALCULATED_NUMBER_FORMAT " %llu\n" + , prefix + , context + , units + , suffix + , chart + , family + , dimension + , labels + , value + , last_t * MSEC_PER_SEC + ); + else + buffer_sprintf(wb, "%s_%s%s%s{chart=\"%s\",family=\"%s\",dimension=\"%s\"%s} " CALCULATED_NUMBER_FORMAT "\n" + , prefix + , context + , units + , suffix + , chart + , family + , dimension + , labels + , value + ); } } } @@ -329,7 +411,7 @@ static void rrd_stats_api_v1_charts_allmetrics_prometheus(RRDHOST *host, BUFFER static inline time_t prometheus_preparation(RRDHOST *host, BUFFER *wb, uint32_t options, const char *server, time_t now, int help) { if(!server || !*server) server = "default"; - time_t after = prometheus_server_last_access(server, now); + time_t after = prometheus_server_last_access(server, host, now); int first_seen = 0; if(!after) { @@ -374,16 +456,16 @@ static inline time_t prometheus_preparation(RRDHOST *host, BUFFER *wb, uint32_t return after; } -void rrd_stats_api_v1_charts_allmetrics_prometheus_single_host(RRDHOST *host, BUFFER *wb, const char *server, const char *prefix, uint32_t options, int help, int types, int names) { +void rrd_stats_api_v1_charts_allmetrics_prometheus_single_host(RRDHOST *host, BUFFER *wb, const char *server, const char *prefix, uint32_t options, int help, int types, int names, int timestamps) { time_t before = now_realtime_sec(); // we start at the point we had stopped before time_t after = prometheus_preparation(host, wb, options, server, before, help); - rrd_stats_api_v1_charts_allmetrics_prometheus(host, wb, prefix, options, after, before, 0, help, types, names); + rrd_stats_api_v1_charts_allmetrics_prometheus(host, wb, prefix, options, after, before, 0, help, types, names, timestamps); } -void rrd_stats_api_v1_charts_allmetrics_prometheus_all_hosts(RRDHOST *host, BUFFER *wb, const char *server, const char *prefix, uint32_t options, int help, int types, int names) { +void rrd_stats_api_v1_charts_allmetrics_prometheus_all_hosts(RRDHOST *host, BUFFER *wb, const char *server, const char *prefix, uint32_t options, int help, int types, int names, int timestamps) { time_t before = now_realtime_sec(); // we start at the point we had stopped before @@ -391,7 +473,7 @@ void rrd_stats_api_v1_charts_allmetrics_prometheus_all_hosts(RRDHOST *host, BUFF rrd_rdlock(); rrdhost_foreach_read(host) { - rrd_stats_api_v1_charts_allmetrics_prometheus(host, wb, prefix, options, after, before, 1, help, types, names); + rrd_stats_api_v1_charts_allmetrics_prometheus(host, wb, prefix, options, after, before, 1, help, types, names, timestamps); } rrd_unlock(); } diff --git a/src/backend_prometheus.h b/src/backend_prometheus.h index 53dddb0d2..b1a021baa 100644 --- a/src/backend_prometheus.h +++ b/src/backend_prometheus.h @@ -5,7 +5,7 @@ #ifndef NETDATA_BACKEND_PROMETHEUS_H #define NETDATA_BACKEND_PROMETHEUS_H -extern void rrd_stats_api_v1_charts_allmetrics_prometheus_single_host(RRDHOST *host, BUFFER *wb, const char *server, const char *prefix, uint32_t options, int help, int types, int names); -extern void rrd_stats_api_v1_charts_allmetrics_prometheus_all_hosts(RRDHOST *host, BUFFER *wb, const char *server, const char *prefix, uint32_t options, int help, int types, int names); +extern void rrd_stats_api_v1_charts_allmetrics_prometheus_single_host(RRDHOST *host, BUFFER *wb, const char *server, const char *prefix, uint32_t options, int help, int types, int names, int timestamps); +extern void rrd_stats_api_v1_charts_allmetrics_prometheus_all_hosts(RRDHOST *host, BUFFER *wb, const char *server, const char *prefix, uint32_t options, int help, int types, int names, int timestamps); #endif //NETDATA_BACKEND_PROMETHEUS_H diff --git a/src/backends.c b/src/backends.c index df9a1ccbc..1360638f2 100644 --- a/src/backends.c +++ b/src/backends.c @@ -56,6 +56,8 @@ inline calculated_number backend_calculate_value_from_stored_data( , time_t *first_timestamp // the first point of the database used in this response , time_t *last_timestamp // the timestamp that should be reported to backend ) { + RRDHOST *host = st->rrdhost; + // find the edges of the rrd database for this chart time_t first_t = rrdset_first_entry_t(st); time_t last_t = rrdset_last_entry_t(st); @@ -87,7 +89,7 @@ inline calculated_number backend_calculate_value_from_stored_data( if(unlikely(before < first_t || after > last_t)) { // the chart has not been updated in the wanted timeframe debug(D_BACKEND, "BACKEND: %s.%s.%s: aligned timeframe %lu to %lu is outside the chart's database range %lu to %lu", - st->rrdhost->hostname, st->id, rd->id, + host->hostname, st->id, rd->id, (unsigned long)after, (unsigned long)before, (unsigned long)first_t, (unsigned long)last_t ); @@ -124,7 +126,7 @@ inline calculated_number backend_calculate_value_from_stored_data( if(unlikely(!counter)) { debug(D_BACKEND, "BACKEND: %s.%s.%s: no values stored in database for range %lu to %lu", - st->rrdhost->hostname, st->id, rd->id, + host->hostname, st->id, rd->id, (unsigned long)after, (unsigned long)before ); return NAN; @@ -345,9 +347,24 @@ static inline int format_dimension_collected_json_plaintext( (void)before; (void)options; + const char *tags_pre = "", *tags_post = "", *tags = host->tags; + if(!tags) tags = ""; + + if(*tags) { + if(*tags == '{' || *tags == '[' || *tags == '"') { + tags_pre = "\"host_tags\":"; + tags_post = ","; + } + else { + tags_pre = "\"host_tags\":\""; + tags_post = "\","; + } + } + buffer_sprintf(b, "{" "\"prefix\":\"%s\"," "\"hostname\":\"%s\"," + "%s%s%s" "\"chart_id\":\"%s\"," "\"chart_name\":\"%s\"," @@ -360,9 +377,10 @@ static inline int format_dimension_collected_json_plaintext( "\"name\":\"%s\"," "\"value\":" COLLECTED_NUMBER_FORMAT "," - "\"timestamp\": %u}\n", + "\"timestamp\": %u}\n", prefix, hostname, + tags_pre, tags, tags_post, st->id, st->name, @@ -398,9 +416,24 @@ static inline int format_dimension_stored_json_plaintext( calculated_number value = backend_calculate_value_from_stored_data(st, rd, after, before, options, &first_t, &last_t); if(!isnan(value)) { + const char *tags_pre = "", *tags_post = "", *tags = host->tags; + if(!tags) tags = ""; + + if(*tags) { + if(*tags == '{' || *tags == '[' || *tags == '"') { + tags_pre = "\"host_tags\":"; + tags_post = ","; + } + else { + tags_pre = "\"host_tags\":\""; + tags_post = "\","; + } + } + buffer_sprintf(b, "{" "\"prefix\":\"%s\"," "\"hostname\":\"%s\"," + "%s%s%s" "\"chart_id\":\"%s\"," "\"chart_name\":\"%s\"," @@ -416,7 +449,8 @@ static inline int format_dimension_stored_json_plaintext( "\"timestamp\": %u}\n", prefix, hostname, - + tags_pre, tags, tags_post, + st->id, st->name, st->family, @@ -445,8 +479,11 @@ static inline int process_json_response(BUFFER *b) { // the backend thread static SIMPLE_PATTERN *charts_pattern = NULL; +static SIMPLE_PATTERN *hosts_pattern = NULL; inline int backends_can_send_rrdset(uint32_t options, RRDSET *st) { + RRDHOST *host = st->rrdhost; + if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_BACKEND_IGNORE))) return 0; @@ -456,18 +493,18 @@ inline int backends_can_send_rrdset(uint32_t options, RRDSET *st) { rrdset_flag_set(st, RRDSET_FLAG_BACKEND_SEND); else { rrdset_flag_set(st, RRDSET_FLAG_BACKEND_IGNORE); - debug(D_BACKEND, "BACKEND: not sending chart '%s' of host '%s', because it is disabled for backends.", st->id, st->rrdhost->hostname); + debug(D_BACKEND, "BACKEND: not sending chart '%s' of host '%s', because it is disabled for backends.", st->id, host->hostname); return 0; } } if(unlikely(!rrdset_is_available_for_backends(st))) { - debug(D_BACKEND, "BACKEND: not sending chart '%s' of host '%s', because it is not available for backends.", st->id, st->rrdhost->hostname); + debug(D_BACKEND, "BACKEND: not sending chart '%s' of host '%s', because it is not available for backends.", st->id, host->hostname); return 0; } if(unlikely(st->rrd_memory_mode == RRD_MEMORY_MODE_NONE && !((options & BACKEND_SOURCE_BITS) == BACKEND_SOURCE_DATA_AS_COLLECTED))) { - debug(D_BACKEND, "BACKEND: not sending chart '%s' of host '%s' because its memory mode is '%s' and the backend requires database access.", st->id, st->rrdhost->hostname, rrd_memory_mode_name(st->rrdhost->rrd_memory_mode)); + debug(D_BACKEND, "BACKEND: not sending chart '%s' of host '%s' because its memory mode is '%s' and the backend requires database access.", st->id, host->hostname, rrd_memory_mode_name(host->rrd_memory_mode)); return 0; } @@ -494,23 +531,24 @@ inline uint32_t backend_parse_data_source(const char *source, uint32_t mode) { return mode; } +static void backends_main_cleanup(void *ptr) { + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + + info("cleaning up..."); + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} + void *backends_main(void *ptr) { + netdata_thread_cleanup_push(backends_main_cleanup, ptr); + int default_port = 0; int sock = -1; - struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; - BUFFER *b = buffer_create(1), *response = buffer_create(1); int (*backend_request_formatter)(BUFFER *, const char *, RRDHOST *, const char *, RRDSET *, RRDDIM *, time_t, time_t, uint32_t) = NULL; int (*backend_response_checker)(BUFFER *) = NULL; - info("BACKEND: thread created with task id %d", gettid()); - - if(pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0) - error("BACKEND: cannot set pthread cancel type to DEFERRED."); - - if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0) - error("BACKEND: cannot set pthread cancel state to ENABLE."); - // ------------------------------------------------------------------------ // collect configuration options @@ -529,7 +567,8 @@ void *backends_main(void *ptr) { long timeoutms = config_get_number(CONFIG_SECTION_BACKEND, "timeout ms", backend_update_every * 2 * 1000); backend_send_names = config_get_boolean(CONFIG_SECTION_BACKEND, "send names instead of ids", backend_send_names); - charts_pattern = simple_pattern_create(config_get(CONFIG_SECTION_BACKEND, "send charts matching", "*"), SIMPLE_PATTERN_EXACT); + charts_pattern = simple_pattern_create(config_get(CONFIG_SECTION_BACKEND, "send charts matching", "*"), NULL, SIMPLE_PATTERN_EXACT); + hosts_pattern = simple_pattern_create(config_get(CONFIG_SECTION_BACKEND, "send hosts matching", "localhost *"), NULL, SIMPLE_PATTERN_EXACT); // ------------------------------------------------------------------------ @@ -660,7 +699,7 @@ void *backends_main(void *ptr) { heartbeat_t hb; heartbeat_init(&hb); - for(;;) { + while(!netdata_exit) { // ------------------------------------------------------------------------ // Wait for the next iteration point. @@ -672,10 +711,7 @@ void *backends_main(void *ptr) { // ------------------------------------------------------------------------ // add to the buffer the data we need to send to the backend - int pthreadoldcancelstate; - - if(unlikely(pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &pthreadoldcancelstate) != 0)) - error("BACKEND: cannot set pthread cancel state to DISABLE."); + netdata_thread_disable_cancelability(); size_t count_hosts = 0; size_t count_charts_total = 0; @@ -684,6 +720,21 @@ void *backends_main(void *ptr) { rrd_rdlock(); RRDHOST *host; rrdhost_foreach_read(host) { + if(unlikely(!rrdhost_flag_check(host, RRDHOST_FLAG_BACKEND_SEND|RRDHOST_FLAG_BACKEND_DONT_SEND))) { + char *name = (host == localhost)?"localhost":host->hostname; + if (!hosts_pattern || simple_pattern_matches(hosts_pattern, name)) { + rrdhost_flag_set(host, RRDHOST_FLAG_BACKEND_SEND); + info("enabled backend for host '%s'", name); + } + else { + rrdhost_flag_set(host, RRDHOST_FLAG_BACKEND_DONT_SEND); + info("disabled backend for host '%s'", name); + } + } + + if(unlikely(!rrdhost_flag_check(host, RRDHOST_FLAG_BACKEND_SEND))) + continue; + rrdhost_rdlock(host); count_hosts++; @@ -724,10 +775,9 @@ void *backends_main(void *ptr) { } rrd_unlock(); - debug(D_BACKEND, "BACKEND: buffer has %zu bytes, added metrics for %zu dimensions, of %zu charts, from %zu hosts", buffer_strlen(b), count_dims_total, count_charts_total, count_hosts); + netdata_thread_enable_cancelability(); - if(unlikely(pthread_setcancelstate(pthreadoldcancelstate, NULL) != 0)) - error("BACKEND: cannot set pthread cancel state to RESTORE (%d).", pthreadoldcancelstate); + debug(D_BACKEND, "BACKEND: buffer has %zu bytes, added metrics for %zu dimensions, of %zu charts, from %zu hosts", buffer_strlen(b), count_dims_total, count_charts_total, count_hosts); // ------------------------------------------------------------------------ @@ -914,9 +964,6 @@ cleanup: buffer_free(b); buffer_free(response); - info("BACKEND: thread exiting"); - - static_thread->enabled = 0; - pthread_exit(NULL); + netdata_thread_cleanup_pop(1); return NULL; } diff --git a/src/cgroup-network.c b/src/cgroup-network.c index 7b1a02342..0e2d5163a 100644 --- a/src/cgroup-network.c +++ b/src/cgroup-network.c @@ -8,17 +8,28 @@ #include <sched.h> #endif +char *host_prefix = ""; + +char environment_variable2[FILENAME_MAX + 50] = ""; +char *environment[] = { + "PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin", + environment_variable2, + NULL +}; + + // ---------------------------------------------------------------------------- // callback required by fatal() void netdata_cleanup_and_exit(int ret) { exit(ret); } - void health_reload(void) {}; void rrdhost_save_all(void) {}; +// ---------------------------------------------------------------------------- + struct iface { const char *device; uint32_t hash; @@ -87,6 +98,19 @@ struct iface *read_proc_net_dev(const char *prefix) { return root; } +void free_iface(struct iface *iface) { + freez((void *)iface->device); + freez(iface); +} + +void free_host_ifaces(struct iface *iface) { + while(iface) { + struct iface *t = iface->next; + free_iface(iface); + iface = t; + } +} + int iface_is_eligible(struct iface *iface) { if(iface->iflink != iface->ifindex) return 1; @@ -247,25 +271,18 @@ int switch_namespace(const char *prefix, pid_t pid) { #endif } -pid_t read_pid_from_cgroup(const char *path) { - char buffer[FILENAME_MAX + 1]; - - snprintfz(buffer, FILENAME_MAX, "%s/cgroup.procs", path); - FILE *fp = fopen(buffer, "r"); - if(!fp) { - error("Cannot read file '%s'.", buffer); - snprintfz(buffer, FILENAME_MAX, "%s/tasks", path); - fp = fopen(buffer, "r"); - } - +pid_t read_pid_from_cgroup_file(const char *filename) { + FILE *fp = fopen(filename, "r"); if(!fp) { - error("Cannot read file '%s'.", buffer); + error("Cannot read file '%s'.", filename); return 0; } + char buffer[100 + 1]; pid_t pid = 0; char *s; - while((s = fgets(buffer, FILENAME_MAX, fp))) { + while((s = fgets(buffer, 100, fp))) { + buffer[100] = '\0'; pid = atoi(s); if(pid > 0) break; } @@ -274,6 +291,46 @@ pid_t read_pid_from_cgroup(const char *path) { return pid; } +pid_t read_pid_from_cgroup_files(const char *path) { + char filename[FILENAME_MAX + 1]; + + snprintfz(filename, FILENAME_MAX, "%s/cgroup.procs", path); + pid_t pid = read_pid_from_cgroup_file(filename); + if(pid > 0) return pid; + + snprintfz(filename, FILENAME_MAX, "%s/tasks", path); + return read_pid_from_cgroup_file(filename); +} + +pid_t read_pid_from_cgroup(const char *path) { + pid_t pid = read_pid_from_cgroup_files(path); + if (pid > 0) return pid; + + DIR *dir = opendir(path); + if (!dir) { + error("cannot read directory '%s'", path); + return 0; + } + + struct dirent *de = NULL; + while ((de = readdir(dir))) { + if (de->d_type == DT_DIR + && ( + (de->d_name[0] == '.' && de->d_name[1] == '\0') + || (de->d_name[0] == '.' && de->d_name[1] == '.' && de->d_name[2] == '\0') + )) + continue; + + if (de->d_type == DT_DIR) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/%s", path, de->d_name); + pid = read_pid_from_cgroup(filename); + if(pid > 0) break; + } + } + closedir(dir); + return pid; +} // ---------------------------------------------------------------------------- // send the result to netdata @@ -298,7 +355,7 @@ void add_device(const char *host, const char *guest) { if(f->host_device_hash == hash && strcmp(host, f->host_device) == 0) { if(guest && !f->guest_device) - f->guest_device = strdup(guest); + f->guest_device = strdupz(guest); return; } @@ -334,21 +391,36 @@ void detect_veth_interfaces(pid_t pid) { const char *prefix = getenv("NETDATA_HOST_PREFIX"); host = read_proc_net_dev(prefix); - if(!host) - fatal("cannot read host interface list."); + if(!host) { + errno = 0; + error("cannot read host interface list."); + return; + } - if(!eligible_ifaces(host)) - fatal("there are no double-linked host interfaces available."); + if(!eligible_ifaces(host)) { + errno = 0; + error("there are no double-linked host interfaces available."); + goto cleanup; + } - if(switch_namespace(prefix, pid)) - fatal("cannot switch to the namespace of pid %u", (unsigned int)pid); + if(switch_namespace(prefix, pid)) { + errno = 0; + error("cannot switch to the namespace of pid %u", (unsigned int) pid); + goto cleanup; + } cgroup = read_proc_net_dev(NULL); - if(!cgroup) - fatal("cannot read cgroup interface list."); + if(!cgroup) { + errno = 0; + error("cannot read cgroup interface list."); + goto cleanup; + } - if(!eligible_ifaces(cgroup)) - fatal("there are not double-linked cgroup interfaces available."); + if(!eligible_ifaces(cgroup)) { + errno = 0; + error("there are not double-linked cgroup interfaces available."); + goto cleanup; + } for(h = host; h ; h = h->next) { if(iface_is_eligible(h)) { @@ -359,34 +431,29 @@ void detect_veth_interfaces(pid_t pid) { } } } + +cleanup: + free_host_ifaces(host); } // ---------------------------------------------------------------------------- // call the external helper #define CGROUP_NETWORK_INTERFACE_MAX_LINE 2048 -void call_the_helper(const char *me, pid_t pid, const char *cgroup) { - const char *pluginsdir = getenv("NETDATA_PLUGINS_DIR"); - char *m = NULL; - - if(!pluginsdir || !*pluginsdir) { - m = strdupz(me); - pluginsdir = dirname(m); - } - +void call_the_helper(pid_t pid, const char *cgroup) { if(setresuid(0, 0, 0) == -1) error("setresuid(0, 0, 0) failed."); char buffer[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1]; if(cgroup) - snprintfz(buffer, CGROUP_NETWORK_INTERFACE_MAX_LINE, "exec %s/cgroup-network-helper.sh --cgroup '%s'", pluginsdir, cgroup); + snprintfz(buffer, CGROUP_NETWORK_INTERFACE_MAX_LINE, "exec " PLUGINS_DIR "/cgroup-network-helper.sh --cgroup '%s'", cgroup); else - snprintfz(buffer, CGROUP_NETWORK_INTERFACE_MAX_LINE, "exec %s/cgroup-network-helper.sh --pid %d", pluginsdir, pid); + snprintfz(buffer, CGROUP_NETWORK_INTERFACE_MAX_LINE, "exec " PLUGINS_DIR "/cgroup-network-helper.sh --pid %d", pid); info("running: %s", buffer); pid_t cgroup_pid; - FILE *fp = mypopen(buffer, &cgroup_pid); + FILE *fp = mypopene(buffer, &cgroup_pid, environment); if(fp) { char *s; while((s = fgets(buffer, CGROUP_NETWORK_INTERFACE_MAX_LINE, fp))) { @@ -409,10 +476,102 @@ void call_the_helper(const char *me, pid_t pid, const char *cgroup) { } else error("cannot execute cgroup-network helper script: %s", buffer); +} + +int is_valid_path_symbol(char c) { + switch(c) { + case '/': // path separators + case '\\': // needed for virsh domains \x2d1\x2dname + case ' ': // space + case '-': // hyphen + case '_': // underscore + case '.': // dot + case ',': // comma + return 1; + + default: + return 0; + } +} + +// we will pass this path a shell script running as root +// so, we need to make sure the path will be valid +// and will not include anything that could allow +// the caller use shell expansion for gaining escalated +// privileges. +int verify_path(const char *path) { + struct stat sb; + + char c; + const char *s = path; + while((c = *s++)) { + if(!( isalnum(c) || is_valid_path_symbol(c) )) { + error("invalid character in path '%s'", path); + return -1; + } + } + + if(strstr(path, "\\") && !strstr(path, "\\x")) { + error("invalid escape sequence in path '%s'", path); + return 1; + } + + if(strstr(path, "/../")) { + error("invalid parent path sequence detected in '%s'", path); + return 1; + } + + if(path[0] != '/') { + error("only absolute path names are supported - invalid path '%s'", path); + return -1; + } + + if (stat(path, &sb) == -1) { + error("cannot stat() path '%s'", path); + return -1; + } - freez(m); + if((sb.st_mode & S_IFMT) != S_IFDIR) { + error("path '%s' is not a directory", path); + return -1; + } + + return 0; } +/* +char *fix_path_variable(void) { + const char *path = getenv("PATH"); + if(!path || !*path) return 0; + + char *p = strdupz(path); + char *safe_path = callocz(1, strlen(p) + strlen("PATH=") + 1); + strcpy(safe_path, "PATH="); + + int added = 0; + char *ptr = p; + while(ptr && *ptr) { + char *s = strsep(&ptr, ":"); + if(s && *s) { + if(verify_path(s) == -1) { + error("the PATH variable includes an invalid path '%s' - removed it.", s); + } + else { + info("the PATH variable includes a valid path '%s'.", s); + if(added) strcat(safe_path, ":"); + strcat(safe_path, s); + added++; + } + } + } + + info("unsafe PATH: '%s'.", path); + info(" safe PATH: '%s'.", safe_path); + + freez(p); + return safe_path; +} +*/ // ---------------------------------------------------------------------------- // main @@ -429,6 +588,25 @@ int main(int argc, char **argv) { program_version = VERSION; error_log_syslog = 0; + + // ------------------------------------------------------------------------ + // make sure NETDATA_HOST_PREFIX is safe + + host_prefix = getenv("NETDATA_HOST_PREFIX"); + if(!host_prefix || !*host_prefix) + host_prefix = ""; + + if(host_prefix[0] != '\0' && verify_path(host_prefix) == -1) + fatal("invalid NETDATA_HOST_PREFIX '%s'", host_prefix); + + // ------------------------------------------------------------------------ + // build a safe environment for our script + + // the first environment variable is a fixed PATH= + snprintfz(environment_variable2, sizeof(environment_variable2) - 1, "NETDATA_HOST_PREFIX=%s", host_prefix); + + // ------------------------------------------------------------------------ + if(argc == 2 && (!strcmp(argv[1], "version") || !strcmp(argv[1], "-version") || !strcmp(argv[1], "--version") || !strcmp(argv[1], "-v") || !strcmp(argv[1], "-V"))) { fprintf(stderr, "cgroup-network %s\n", VERSION); exit(0); @@ -442,18 +620,23 @@ int main(int argc, char **argv) { if(pid <= 0) { errno = 0; - fatal("Invalid pid %d given", (int) pid); + error("Invalid pid %d given", (int) pid); + return 2; } - call_the_helper(argv[0], pid, NULL); + call_the_helper(pid, NULL); } else if(!strcmp(argv[1], "--cgroup")) { - pid = read_pid_from_cgroup(argv[2]); - call_the_helper(argv[0], pid, argv[2]); + char *cgroup = argv[2]; + if(verify_path(cgroup) == -1) + fatal("cgroup '%s' does not exist or is not valid.", cgroup); + + pid = read_pid_from_cgroup(cgroup); + call_the_helper(pid, cgroup); if(pid <= 0 && !detected_devices) { errno = 0; - fatal("Invalid pid %d read from cgroup '%s'", (int) pid, argv[2]); + error("Cannot find a cgroup PID from cgroup '%s'", cgroup); } } else @@ -462,5 +645,7 @@ int main(int argc, char **argv) { if(pid > 0) detect_veth_interfaces(pid); - return send_devices(); + int found = send_devices(); + if(found <= 0) return 1; + return 0; } diff --git a/src/common.c b/src/common.c index a976e96eb..94fd5e429 100644 --- a/src/common.c +++ b/src/common.c @@ -19,6 +19,7 @@ char *netdata_configured_home_dir = NULL; char *netdata_configured_host_prefix = NULL; char *netdata_configured_timezone = NULL; +struct rlimit rlimit_nofile = { .rlim_cur = 1024, .rlim_max = 1024 }; int enable_ksm = 1; volatile sig_atomic_t netdata_exit = 0; @@ -904,6 +905,30 @@ void strreverse(char *begin, char *end) { } } +char *strsep_on_1char(char **ptr, char c) { + if(unlikely(!ptr || !*ptr)) + return NULL; + + // remember the position we started + char *s = *ptr; + + // skip separators in front + while(*s == c) s++; + char *ret = s; + + // find the next separator + while(*s++) { + if(unlikely(*s == c)) { + *s++ = '\0'; + *ptr = s; + return ret; + } + } + + *ptr = NULL; + return ret; +} + char *mystrsep(char **ptr, char *s) { char *p = ""; while (p && !p[0] && *ptr) p = strsep(ptr, s); @@ -1116,22 +1141,6 @@ int fd_is_valid(int fd) { return fcntl(fd, F_GETFD) != -1 || errno != EBADF; } -pid_t gettid(void) { -#ifdef __FreeBSD__ - return (pid_t)pthread_getthreadid_np(); -#elif defined(__APPLE__) -#if (defined __MAC_OS_X_VERSION_MIN_REQUIRED && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1060) - uint64_t curthreadid; - pthread_threadid_np(NULL, &curthreadid); - return (pid_t)curthreadid; -#else /* __MAC_OS_X_VERSION_MIN_REQUIRED */ - return (pid_t)pthread_self; -#endif /* __MAC_OS_X_VERSION_MIN_REQUIRED */ -#else /* __APPLE__*/ - return (pid_t)syscall(SYS_gettid); -#endif /* __FreeBSD__, __APPLE__*/ -} - char *fgets_trim_len(char *buf, size_t buf_size, FILE *fp, size_t *len) { char *s = fgets(buf, (int)buf_size, fp); if (!s) return NULL; diff --git a/src/common.h b/src/common.h index 667fe9d76..15fc50a6a 100644 --- a/src/common.h +++ b/src/common.h @@ -5,6 +5,7 @@ #include <config.h> #endif + // ---------------------------------------------------------------------------- // system include files for all netdata C programs @@ -99,6 +100,7 @@ #ifdef STORAGE_WITH_MATH #include <math.h> +#include <float.h> #endif #if defined(HAVE_INTTYPES_H) @@ -116,6 +118,28 @@ #endif // ---------------------------------------------------------------------------- +// netdata chart priorities + +// This is a work in progress - to scope is to collect here all chart priorities. +// These should be based on the CONTEXT of the charts + the chart id when needed +// - for each SECTION +1000 (or +X000 for big sections) +// - for each FAMILY +100 +// - for each CHART +10 + +// Memory Section - 1xxx +#define NETDATA_CHART_PRIO_MEM_SYSTEM 1000 +#define NETDATA_CHART_PRIO_MEM_SYSTEM_AVAILABLE 1010 +#define NETDATA_CHART_PRIO_MEM_SYSTEM_COMMITTED 1020 +#define NETDATA_CHART_PRIO_MEM_SYSTEM_PGFAULTS 1030 +#define NETDATA_CHART_PRIO_MEM_KERNEL 1100 +#define NETDATA_CHART_PRIO_MEM_SLAB 1200 +#define NETDATA_CHART_PRIO_MEM_HUGEPAGES 1250 +#define NETDATA_CHART_PRIO_MEM_KSM 1300 +#define NETDATA_CHART_PRIO_MEM_NUMA 1400 +#define NETDATA_CHART_PRIO_MEM_HW 1500 + + +// ---------------------------------------------------------------------------- // netdata common definitions #if (SIZEOF_VOID_P == 8) @@ -136,6 +160,12 @@ #define NEVERNULL #endif +#ifdef HAVE_FUNC_ATTRIBUTE_NOINLINE +#define NOINLINE __attribute__((noinline)) +#else +#define NOINLINE +#endif + #ifdef HAVE_FUNC_ATTRIBUTE_MALLOC #define MALLOCLIKE __attribute__((malloc)) #else @@ -163,7 +193,7 @@ #ifdef abs #undef abs #endif -#define abs(x) ((x < 0)? -x : x) +#define abs(x) (((x) < 0)? (-(x)) : (x)) #define GUID_LEN 36 @@ -172,6 +202,7 @@ #include "clocks.h" #include "log.h" +#include "threads.h" #include "locks.h" #include "simple_pattern.h" #include "avl.h" @@ -292,9 +323,9 @@ extern int memory_file_save(const char *filename, void *mem, size_t size); extern int fd_is_valid(int fd); -extern int enable_ksm; +extern struct rlimit rlimit_nofile; -extern pid_t gettid(void); +extern int enable_ksm; extern int sleep_usec(usec_t usec); diff --git a/src/daemon.c b/src/daemon.c index 5c5333a36..471c62c6e 100644 --- a/src/daemon.c +++ b/src/daemon.c @@ -121,10 +121,10 @@ int become_user(const char *username, int pid_fd) { } #ifndef OOM_SCORE_ADJ_MAX -#define OOM_SCORE_ADJ_MAX 1000 +#define OOM_SCORE_ADJ_MAX (1000) #endif #ifndef OOM_SCORE_ADJ_MIN -#define OOM_SCORE_ADJ_MIN -1000 +#define OOM_SCORE_ADJ_MIN (-1000) #endif static void oom_score_adj(void) { @@ -265,6 +265,7 @@ static void sched_setscheduler_set(void) { for(i = 0 ; scheduler_defaults[i].name ; i++) { if(!strcmp(name, scheduler_defaults[i].name)) { found = 1; + policy = scheduler_defaults[i].policy; priority = scheduler_defaults[i].priority; flags = scheduler_defaults[i].flags; @@ -275,14 +276,16 @@ static void sched_setscheduler_set(void) { priority = (int)config_get_number(CONFIG_SECTION_GLOBAL, "process scheduling priority", priority); #ifdef HAVE_SCHED_GET_PRIORITY_MIN + errno = 0; if(priority < sched_get_priority_min(policy)) { - error("scheduler %s priority %d is below the minimum %d. Using the minimum.", name, priority, sched_get_priority_min(policy)); + error("scheduler %s (%d) priority %d is below the minimum %d. Using the minimum.", name, policy, priority, sched_get_priority_min(policy)); priority = sched_get_priority_min(policy); } #endif #ifdef HAVE_SCHED_GET_PRIORITY_MAX + errno = 0; if(priority > sched_get_priority_max(policy)) { - error("scheduler %s priority %d is above the maximum %d. Using the maximum.", name, priority, sched_get_priority_max(policy)); + error("scheduler %s (%d) priority %d is above the maximum %d. Using the maximum.", name, policy, priority, sched_get_priority_max(policy)); priority = sched_get_priority_max(policy); } #endif @@ -291,7 +294,7 @@ static void sched_setscheduler_set(void) { } if(!found) { - error("Unknown scheduling policy %s - falling back to nice()", name); + error("Unknown scheduling policy '%s' - falling back to nice", name); goto fallback; } @@ -299,12 +302,13 @@ static void sched_setscheduler_set(void) { .sched_priority = priority }; + errno = 0; i = sched_setscheduler(0, policy, ¶m); if(i != 0) { - error("Cannot adjust netdata scheduling policy to %s (%d), with priority %d. Falling back to nice", name, policy, priority); + error("Cannot adjust netdata scheduling policy to %s (%d), with priority %d. Falling back to nice.", name, policy, priority); } else { - debug(D_SYSTEM, "Adjusted netdata scheduling policy to %s (%d), with priority %d.", name, policy, priority); + info("Adjusted netdata scheduling policy to %s (%d), with priority %d.", name, policy, priority); if(!(flags & SCHED_FLAG_USE_NICE)) return; } diff --git a/src/eval.h b/src/eval.h index cd271148c..6a5562fd4 100644 --- a/src/eval.h +++ b/src/eval.h @@ -6,7 +6,6 @@ typedef struct eval_variable { char *name; uint32_t hash; - struct rrdvar *rrdvar; struct eval_variable *next; } EVAL_VARIABLE; diff --git a/src/freebsd_devstat.c b/src/freebsd_devstat.c index 2ed64ad49..ed7466ead 100644 --- a/src/freebsd_devstat.c +++ b/src/freebsd_devstat.c @@ -221,6 +221,7 @@ int do_kern_devstat(int update_every, usec_t dt) { excluded_disks = simple_pattern_create( config_get(CONFIG_SECTION_KERN_DEVSTAT, "disable by default disks matching", DELAULT_EXLUDED_DISKS) + , NULL , SIMPLE_PATTERN_EXACT ); } diff --git a/src/freebsd_getifaddrs.c b/src/freebsd_getifaddrs.c index 94c0a6a4b..73f8f1824 100644 --- a/src/freebsd_getifaddrs.c +++ b/src/freebsd_getifaddrs.c @@ -170,8 +170,8 @@ int do_getifaddrs(int update_every, usec_t dt) { CONFIG_BOOLEAN_AUTO); excluded_interfaces = simple_pattern_create( - config_get(CONFIG_SECTION_GETIFADDRS, "disable by default interfaces matching", - DELAULT_EXLUDED_INTERFACES) + config_get(CONFIG_SECTION_GETIFADDRS, "disable by default interfaces matching", DELAULT_EXLUDED_INTERFACES) + , NULL , SIMPLE_PATTERN_EXACT ); } diff --git a/src/freebsd_getmntinfo.c b/src/freebsd_getmntinfo.c index 66be53315..ea82b9fd1 100644 --- a/src/freebsd_getmntinfo.c +++ b/src/freebsd_getmntinfo.c @@ -122,7 +122,7 @@ static struct mount_point *get_mount_point(const char *name) { int do_getmntinfo(int update_every, usec_t dt) { (void)dt; -#define DELAULT_EXLUDED_PATHS "/proc/*" +#define DELAULT_EXCLUDED_PATHS "/proc/*" // taken from gnulib/mountlist.c and shortened to FreeBSD related fstypes #define DEFAULT_EXCLUDED_FILESYSTEMS "autofs procfs subfs devfs none" #define CONFIG_SECTION_GETMNTINFO "plugin:freebsd:getmntinfo" @@ -142,14 +142,16 @@ int do_getmntinfo(int update_every, usec_t dt) { excluded_mountpoints = simple_pattern_create( config_get(CONFIG_SECTION_GETMNTINFO, "exclude space metrics on paths", - DELAULT_EXLUDED_PATHS), - SIMPLE_PATTERN_EXACT + DELAULT_EXCLUDED_PATHS) + , NULL + , SIMPLE_PATTERN_EXACT ); excluded_filesystems = simple_pattern_create( config_get(CONFIG_SECTION_GETMNTINFO, "exclude space metrics on filesystems", - DEFAULT_EXCLUDED_FILESYSTEMS), - SIMPLE_PATTERN_EXACT + DEFAULT_EXCLUDED_FILESYSTEMS) + , NULL + , SIMPLE_PATTERN_EXACT ); } diff --git a/src/freebsd_sysctl.c b/src/freebsd_sysctl.c index 9f5615df8..1e11255aa 100644 --- a/src/freebsd_sysctl.c +++ b/src/freebsd_sysctl.c @@ -273,7 +273,7 @@ int do_vm_vmtotal(int update_every, usec_t dt) { "MB", "freebsd", "vm.vmtotal", - 5000, + NETDATA_CHART_PRIO_MEM_SYSTEM_COMMITTED, update_every, RRDSET_TYPE_AREA ); @@ -1107,7 +1107,7 @@ int do_vm_stats_sys_v_pgfaults(int update_every, usec_t dt) { "page faults/s", "freebsd", "vm.stats.vm.v_pgfaults", - 500, + NETDATA_CHART_PRIO_MEM_SYSTEM_PGFAULTS, update_every, RRDSET_TYPE_LINE ); diff --git a/src/freeipmi_plugin.c b/src/freeipmi_plugin.c index 9cd736bba..df4c019a4 100644 --- a/src/freeipmi_plugin.c +++ b/src/freeipmi_plugin.c @@ -538,6 +538,10 @@ static void excluded_record_ids_parse(const char *s) { if(n != 0) { excluded_record_ids = realloc(excluded_record_ids, (excluded_record_ids_length + 1) * sizeof(int)); + if(!excluded_record_ids) { + fprintf(stderr, "freeipmi.plugin: failed to allocate memory. Exiting."); + exit(1); + } excluded_record_ids[excluded_record_ids_length++] = (int)n; } } diff --git a/src/global_statistics.c b/src/global_statistics.c index c184b6d68..4f34e92df 100644 --- a/src/global_statistics.c +++ b/src/global_statistics.c @@ -7,7 +7,8 @@ volatile struct global_statistics global_statistics = { .bytes_received = 0, .bytes_sent = 0, .content_size = 0, - .compressed_content_size = 0 + .compressed_content_size = 0, + .web_client_count = 1 }; netdata_mutex_t global_statistics_mutex = NETDATA_MUTEX_INITIALIZER; @@ -38,7 +39,7 @@ void finished_web_request_statistics(uint64_t dt, __atomic_fetch_add(&global_statistics.compressed_content_size, compressed_content_size, __ATOMIC_SEQ_CST); #else #warning NOT using atomic operations - using locks for global statistics - if (web_server_mode == WEB_SERVER_MODE_MULTI_THREADED) + if (web_server_is_multithreaded) global_statistics_lock(); if (dt > global_statistics.web_usec_max) @@ -51,35 +52,39 @@ void finished_web_request_statistics(uint64_t dt, global_statistics.content_size += content_size; global_statistics.compressed_content_size += compressed_content_size; - if (web_server_mode == WEB_SERVER_MODE_MULTI_THREADED) + if (web_server_is_multithreaded) global_statistics_unlock(); #endif } -void web_client_connected(void) { +uint64_t web_client_connected(void) { #if defined(HAVE_C___ATOMIC) && !defined(NETDATA_NO_ATOMIC_INSTRUCTIONS) __atomic_fetch_add(&global_statistics.connected_clients, 1, __ATOMIC_SEQ_CST); + uint64_t id = __atomic_fetch_add(&global_statistics.web_client_count, 1, __ATOMIC_SEQ_CST); #else - if (web_server_mode == WEB_SERVER_MODE_MULTI_THREADED) + if (web_server_is_multithreaded) global_statistics_lock(); global_statistics.connected_clients++; + uint64_t id = global_statistics.web_client_count++; - if (web_server_mode == WEB_SERVER_MODE_MULTI_THREADED) + if (web_server_is_multithreaded) global_statistics_unlock(); #endif + + return id; } void web_client_disconnected(void) { #if defined(HAVE_C___ATOMIC) && !defined(NETDATA_NO_ATOMIC_INSTRUCTIONS) __atomic_fetch_sub(&global_statistics.connected_clients, 1, __ATOMIC_SEQ_CST); #else - if (web_server_mode == WEB_SERVER_MODE_MULTI_THREADED) + if (web_server_is_multithreaded) global_statistics_lock(); global_statistics.connected_clients--; - if (web_server_mode == WEB_SERVER_MODE_MULTI_THREADED) + if (web_server_is_multithreaded) global_statistics_unlock(); #endif } @@ -95,6 +100,7 @@ inline void global_statistics_copy(struct global_statistics *gs, uint8_t options gs->bytes_sent = __atomic_fetch_add(&global_statistics.bytes_sent, 0, __ATOMIC_SEQ_CST); gs->content_size = __atomic_fetch_add(&global_statistics.content_size, 0, __ATOMIC_SEQ_CST); gs->compressed_content_size = __atomic_fetch_add(&global_statistics.compressed_content_size, 0, __ATOMIC_SEQ_CST); + gs->web_client_count = __atomic_fetch_add(&global_statistics.web_client_count, 0, __ATOMIC_SEQ_CST); if(options & GLOBAL_STATS_RESET_WEB_USEC_MAX) { uint64_t n = 0; diff --git a/src/global_statistics.h b/src/global_statistics.h index d28aa4401..62fee6e36 100644 --- a/src/global_statistics.h +++ b/src/global_statistics.h @@ -14,6 +14,8 @@ struct global_statistics { volatile uint64_t bytes_sent; volatile uint64_t content_size; volatile uint64_t compressed_content_size; + + volatile uint64_t web_client_count; }; extern volatile struct global_statistics global_statistics; @@ -26,7 +28,7 @@ extern void finished_web_request_statistics(uint64_t dt, uint64_t content_size, uint64_t compressed_content_size); -extern void web_client_connected(void); +extern uint64_t web_client_connected(void); extern void web_client_disconnected(void); #define GLOBAL_STATS_RESET_WEB_USEC_MAX 0x01 diff --git a/src/health.c b/src/health.c index dfa7007b9..04e04f089 100644 --- a/src/health.c +++ b/src/health.c @@ -145,7 +145,7 @@ static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) { const char *exec = (ae->exec) ? ae->exec : host->health_default_exec; const char *recipient = (ae->recipient) ? ae->recipient : host->health_default_recipient; - snprintfz(command_to_run, ALARM_EXEC_COMMAND_LENGTH, "exec %s '%s' '%s' '%u' '%u' '%u' '%lu' '%s' '%s' '%s' '%s' '%s' '%0.0Lf' '%0.0Lf' '%s' '%u' '%u' '%s' '%s' '%s' '%s'", + snprintfz(command_to_run, ALARM_EXEC_COMMAND_LENGTH, "exec %s '%s' '%s' '%u' '%u' '%u' '%lu' '%s' '%s' '%s' '%s' '%s' '" CALCULATED_NUMBER_FORMAT_ZERO "' '" CALCULATED_NUMBER_FORMAT_ZERO "' '%s' '%u' '%u' '%s' '%s' '%s' '%s'", exec, recipient, host->registry_hostname, @@ -192,7 +192,7 @@ done: } static inline void health_process_notifications(RRDHOST *host, ALARM_ENTRY *ae) { - debug(D_HEALTH, "Health alarm '%s.%s' = %0.2Lf - changed status from %s to %s", + debug(D_HEALTH, "Health alarm '%s.%s' = " CALCULATED_NUMBER_FORMAT_AUTO " - changed status from %s to %s", ae->chart?ae->chart:"NOCHART", ae->name, ae->new_value, rrdcalc_status2string(ae->old_status), @@ -338,22 +338,21 @@ static inline int check_if_resumed_from_suspention(void) { return ret; } -void *health_main(void *ptr) { +static void health_main_cleanup(void *ptr) { struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; - info("HEALTH thread created with task id %d", gettid()); + info("cleaning up..."); - if(pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0) - error("Cannot set pthread cancel type to DEFERRED."); + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} - if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0) - error("Cannot set pthread cancel state to ENABLE."); +void *health_main(void *ptr) { + netdata_thread_cleanup_push(health_main_cleanup, ptr); int min_run_every = (int)config_get_number(CONFIG_SECTION_HEALTH, "run at least every seconds", 10); if(min_run_every < 1) min_run_every = 1; - BUFFER *wb = buffer_create(100); - time_t now = now_realtime_sec(); time_t hibernation_delay = config_get_number(CONFIG_SECTION_HEALTH, "postpone alarms during hibernation for seconds", 60); @@ -362,7 +361,7 @@ void *health_main(void *ptr) { loop++; debug(D_HEALTH, "Health monitoring iteration no %u started", loop); - int oldstate, runnable = 0, apply_hibernation_delay = 0; + int runnable = 0, apply_hibernation_delay = 0; time_t next_run = now + min_run_every; RRDCALC *rc; @@ -374,9 +373,6 @@ void *health_main(void *ptr) { ); } - if(unlikely(pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate) != 0)) - error("Cannot set pthread cancel state to DISABLE."); - rrd_rdlock(); RRDHOST *host; @@ -424,13 +420,14 @@ void *health_main(void *ptr) { int value_is_null = 0; int ret = rrdset2value_api_v1(rc->rrdset - , wb + , NULL , &rc->value , rc->dimensions , 1 , rc->after , rc->before , rc->group + , 0 , rc->options , &rc->db_after , &rc->db_before @@ -721,9 +718,6 @@ void *health_main(void *ptr) { rrd_unlock(); - if(unlikely(pthread_setcancelstate(oldstate, NULL) != 0)) - error("Cannot set pthread cancel state to RESTORE (%d).", oldstate); - if(unlikely(netdata_exit)) break; @@ -738,11 +732,6 @@ void *health_main(void *ptr) { } // forever - buffer_free(wb); - - info("HEALTH thread exiting"); - - static_thread->enabled = 0; - pthread_exit(NULL); + netdata_thread_cleanup_pop(1); return NULL; } diff --git a/src/health_config.c b/src/health_config.c index 108eecc4a..a25ee7227 100644 --- a/src/health_config.c +++ b/src/health_config.c @@ -44,7 +44,7 @@ static inline int rrdcalc_add_alarm_from_config(RRDHOST *host, RRDCALC *rc) { rc->id = rrdcalc_get_unique_id(host, rc->chart, rc->name, &rc->next_event_id); - debug(D_HEALTH, "Health configuration adding alarm '%s.%s' (%u): exec '%s', recipient '%s', green %Lf, red %Lf, lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f", + debug(D_HEALTH, "Health configuration adding alarm '%s.%s' (%u): exec '%s', recipient '%s', green " CALCULATED_NUMBER_FORMAT_AUTO ", red " CALCULATED_NUMBER_FORMAT_AUTO ", lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f", rc->chart?rc->chart:"NOCHART", rc->name, rc->id, @@ -99,7 +99,7 @@ static inline int rrdcalctemplate_add_template_from_config(RRDHOST *host, RRDCAL } } - debug(D_HEALTH, "Health configuration adding template '%s': context '%s', exec '%s', recipient '%s', green %Lf, red %Lf, lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f", + debug(D_HEALTH, "Health configuration adding template '%s': context '%s', exec '%s', recipient '%s', green " CALCULATED_NUMBER_FORMAT_AUTO ", red " CALCULATED_NUMBER_FORMAT_AUTO ", lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f", rt->name, (rt->context)?rt->context:"NONE", (rt->exec)?rt->exec:"DEFAULT", @@ -372,8 +372,14 @@ static inline int health_parse_db_lookup( else if(!strcasecmp(key, "unaligned")) { *options |= RRDR_OPTION_NOT_ALIGNED; } + else if(!strcasecmp(key, "match-ids") || !strcasecmp(key, "match_ids")) { + *options |= RRDR_OPTION_MATCH_IDS; + } + else if(!strcasecmp(key, "match-names") || !strcasecmp(key, "match_names")) { + *options |= RRDR_OPTION_MATCH_NAMES; + } else if(!strcasecmp(key, "of")) { - if(*s && strcasecmp(s, "all")) + if(*s && strcasecmp(s, "all") != 0) *dimensions = strdupz(s); break; } @@ -556,7 +562,7 @@ int health_readfile(RRDHOST *host, const char *path, const char *filename) { } else if(hash == hash_os && !strcasecmp(key, HEALTH_OS_KEY)) { char *os_match = value; - SIMPLE_PATTERN *os_pattern = simple_pattern_create(os_match, SIMPLE_PATTERN_EXACT); + SIMPLE_PATTERN *os_pattern = simple_pattern_create(os_match, NULL, SIMPLE_PATTERN_EXACT); if(!simple_pattern_matches(os_pattern, host->os)) { if(rc) @@ -572,7 +578,7 @@ int health_readfile(RRDHOST *host, const char *path, const char *filename) { } else if(hash == hash_host && !strcasecmp(key, HEALTH_HOST_KEY)) { char *host_match = value; - SIMPLE_PATTERN *host_pattern = simple_pattern_create(host_match, SIMPLE_PATTERN_EXACT); + SIMPLE_PATTERN *host_pattern = simple_pattern_create(host_match, NULL, SIMPLE_PATTERN_EXACT); if(!simple_pattern_matches(host_pattern, host->hostname)) { if(rc) @@ -589,7 +595,7 @@ int health_readfile(RRDHOST *host, const char *path, const char *filename) { else if(rc) { if(hash == hash_on && !strcasecmp(key, HEALTH_ON_KEY)) { if(rc->chart) { - if(strcmp(rc->chart, value)) + if(strcmp(rc->chart, value) != 0) error("Health configuration at line %zu of file '%s/%s' for alarm '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').", line, path, filename, rc->name, key, rc->chart, value, value); @@ -653,7 +659,7 @@ int health_readfile(RRDHOST *host, const char *path, const char *filename) { } else if(hash == hash_exec && !strcasecmp(key, HEALTH_EXEC_KEY)) { if(rc->exec) { - if(strcmp(rc->exec, value)) + if(strcmp(rc->exec, value) != 0) error("Health configuration at line %zu of file '%s/%s' for alarm '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').", line, path, filename, rc->name, key, rc->exec, value, value); @@ -663,7 +669,7 @@ int health_readfile(RRDHOST *host, const char *path, const char *filename) { } else if(hash == hash_recipient && !strcasecmp(key, HEALTH_RECIPIENT_KEY)) { if(rc->recipient) { - if(strcmp(rc->recipient, value)) + if(strcmp(rc->recipient, value) != 0) error("Health configuration at line %zu of file '%s/%s' for alarm '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').", line, path, filename, rc->name, key, rc->recipient, value, value); @@ -673,7 +679,7 @@ int health_readfile(RRDHOST *host, const char *path, const char *filename) { } else if(hash == hash_units && !strcasecmp(key, HEALTH_UNITS_KEY)) { if(rc->units) { - if(strcmp(rc->units, value)) + if(strcmp(rc->units, value) != 0) error("Health configuration at line %zu of file '%s/%s' for alarm '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').", line, path, filename, rc->name, key, rc->units, value, value); @@ -684,7 +690,7 @@ int health_readfile(RRDHOST *host, const char *path, const char *filename) { } else if(hash == hash_info && !strcasecmp(key, HEALTH_INFO_KEY)) { if(rc->info) { - if(strcmp(rc->info, value)) + if(strcmp(rc->info, value) != 0) error("Health configuration at line %zu of file '%s/%s' for alarm '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').", line, path, filename, rc->name, key, rc->info, value, value); @@ -707,7 +713,7 @@ int health_readfile(RRDHOST *host, const char *path, const char *filename) { else if(rt) { if(hash == hash_on && !strcasecmp(key, HEALTH_ON_KEY)) { if(rt->context) { - if(strcmp(rt->context, value)) + if(strcmp(rt->context, value) != 0) error("Health configuration at line %zu of file '%s/%s' for template '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').", line, path, filename, rt->name, key, rt->context, value, value); @@ -721,7 +727,7 @@ int health_readfile(RRDHOST *host, const char *path, const char *filename) { simple_pattern_free(rt->family_pattern); rt->family_match = strdupz(value); - rt->family_pattern = simple_pattern_create(rt->family_match, SIMPLE_PATTERN_EXACT); + rt->family_pattern = simple_pattern_create(rt->family_match, NULL, SIMPLE_PATTERN_EXACT); } else if(hash == hash_lookup && !strcasecmp(key, HEALTH_LOOKUP_KEY)) { health_parse_db_lookup(line, path, filename, value, &rt->group, &rt->after, &rt->before, @@ -777,7 +783,7 @@ int health_readfile(RRDHOST *host, const char *path, const char *filename) { } else if(hash == hash_exec && !strcasecmp(key, HEALTH_EXEC_KEY)) { if(rt->exec) { - if(strcmp(rt->exec, value)) + if(strcmp(rt->exec, value) != 0) error("Health configuration at line %zu of file '%s/%s' for template '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').", line, path, filename, rt->name, key, rt->exec, value, value); @@ -787,7 +793,7 @@ int health_readfile(RRDHOST *host, const char *path, const char *filename) { } else if(hash == hash_recipient && !strcasecmp(key, HEALTH_RECIPIENT_KEY)) { if(rt->recipient) { - if(strcmp(rt->recipient, value)) + if(strcmp(rt->recipient, value) != 0) error("Health configuration at line %zu of file '%s/%s' for template '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').", line, path, filename, rt->name, key, rt->recipient, value, value); @@ -797,7 +803,7 @@ int health_readfile(RRDHOST *host, const char *path, const char *filename) { } else if(hash == hash_units && !strcasecmp(key, HEALTH_UNITS_KEY)) { if(rt->units) { - if(strcmp(rt->units, value)) + if(strcmp(rt->units, value) != 0) error("Health configuration at line %zu of file '%s/%s' for template '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').", line, path, filename, rt->name, key, rt->units, value, value); @@ -808,7 +814,7 @@ int health_readfile(RRDHOST *host, const char *path, const char *filename) { } else if(hash == hash_info && !strcasecmp(key, HEALTH_INFO_KEY)) { if(rt->info) { - if(strcmp(rt->info, value)) + if(strcmp(rt->info, value) != 0) error("Health configuration at line %zu of file '%s/%s' for template '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').", line, path, filename, rt->name, key, rt->info, value, value); diff --git a/src/health_json.c b/src/health_json.c index a9697aaa7..aba7425d7 100644 --- a/src/health_json.c +++ b/src/health_json.c @@ -2,8 +2,12 @@ #include "common.h" static inline void health_string2json(BUFFER *wb, const char *prefix, const char *label, const char *value, const char *suffix) { - if(value && *value) - buffer_sprintf(wb, "%s\"%s\":\"%s\"%s", prefix, label, value, suffix); + if(value && *value) { + buffer_sprintf(wb, "%s\"%s\":\"", prefix, label); + buffer_strcat_htmlescape(wb, value); + buffer_strcat(wb, "\""); + buffer_strcat(wb, suffix); + } else buffer_sprintf(wb, "%s\"%s\":null%s", prefix, label, suffix); } @@ -27,7 +31,6 @@ static inline void health_alarm_entry2json_nolock(BUFFER *wb, ALARM_ENTRY *ae, R "\t\t\"exec_code\": %d,\n" "\t\t\"source\": \"%s\",\n" "\t\t\"units\": \"%s\",\n" - "\t\t\"info\": \"%s\",\n" "\t\t\"when\": %lu,\n" "\t\t\"duration\": %lu,\n" "\t\t\"non_clear_duration\": %lu,\n" @@ -55,7 +58,6 @@ static inline void health_alarm_entry2json_nolock(BUFFER *wb, ALARM_ENTRY *ae, R , ae->exec_code , ae->source , ae->units?ae->units:"" - , ae->info?ae->info:"" , (unsigned long)ae->when , (unsigned long)ae->duration , (unsigned long)ae->non_clear_duration @@ -69,6 +71,8 @@ static inline void health_alarm_entry2json_nolock(BUFFER *wb, ALARM_ENTRY *ae, R , ae->old_value_string ); + health_string2json(wb, "\t\t", "info", ae->info?ae->info:"", ",\n"); + if(unlikely(ae->flags & HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION)) { buffer_strcat(wb, "\t\t\"no_clear_notification\": true,\n"); } diff --git a/src/health_log.c b/src/health_log.c index 0314b086c..a44fbadb0 100644 --- a/src/health_log.c +++ b/src/health_log.c @@ -77,7 +77,7 @@ inline void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae) { "\t%08x\t%08x\t%08x" "\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" "\t%d\t%d\t%d\t%d" - "\t%Lf\t%Lf" + "\t" CALCULATED_NUMBER_FORMAT_AUTO "\t" CALCULATED_NUMBER_FORMAT_AUTO "\n" , (ae->flags & HEALTH_ENTRY_FLAG_SAVED)?'U':'A' , host->hostname @@ -109,8 +109,8 @@ inline void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae) { , ae->old_status , ae->delay - , (long double)ae->new_value - , (long double)ae->old_value + , ae->new_value + , ae->old_value ) < 0)) error("HEALTH [%s]: failed to save alarm log entry to '%s'. Health data may be lost in case of abnormal restart.", host->hostname, host->health_log_filename); else { diff --git a/src/locks.c b/src/locks.c new file mode 100644 index 000000000..c5b42c921 --- /dev/null +++ b/src/locks.c @@ -0,0 +1,319 @@ +#include "common.h" + +// ---------------------------------------------------------------------------- +// automatic thread cancelability management, based on locks + +static __thread int netdata_thread_first_cancelability = 0; +static __thread int netdata_thread_lock_cancelability = 0; + +inline void netdata_thread_disable_cancelability(void) { + int old; + int ret = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &old); + if(ret != 0) + error("THREAD_CANCELABILITY: pthread_setcancelstate() on thread %s returned error %d", netdata_thread_tag(), ret); + else { + if(!netdata_thread_lock_cancelability) + netdata_thread_first_cancelability = old; + + netdata_thread_lock_cancelability++; + } +} + +inline void netdata_thread_enable_cancelability(void) { + if(netdata_thread_lock_cancelability < 1) { + error("THREAD_CANCELABILITY: netdata_thread_enable_cancelability(): invalid thread cancelability count %d on thread %s - results will be undefined - please report this!", netdata_thread_lock_cancelability, netdata_thread_tag()); + } + else if(netdata_thread_lock_cancelability == 1) { + int old = 1; + int ret = pthread_setcancelstate(netdata_thread_first_cancelability, &old); + if(ret != 0) + error("THREAD_CANCELABILITY: pthread_setcancelstate() on thread %s returned error %d", netdata_thread_tag(), ret); + else { + if(old != PTHREAD_CANCEL_DISABLE) + error("THREAD_CANCELABILITY: netdata_thread_enable_cancelability(): old thread cancelability on thread %s was changed, expected DISABLED (%d), found %s (%d) - please report this!", netdata_thread_tag(), PTHREAD_CANCEL_DISABLE, (old == PTHREAD_CANCEL_ENABLE)?"ENABLED":"UNKNOWN", old); + } + + netdata_thread_lock_cancelability = 0; + } + else + netdata_thread_lock_cancelability--; +} + +// ---------------------------------------------------------------------------- +// mutex + +int __netdata_mutex_init(netdata_mutex_t *mutex) { + int ret = pthread_mutex_init(mutex, NULL); + if(unlikely(ret != 0)) + error("MUTEX_LOCK: failed to initialize (code %d).", ret); + return ret; +} + +int __netdata_mutex_lock(netdata_mutex_t *mutex) { + netdata_thread_disable_cancelability(); + + int ret = pthread_mutex_lock(mutex); + if(unlikely(ret != 0)) { + netdata_thread_enable_cancelability(); + error("MUTEX_LOCK: failed to get lock (code %d)", ret); + } + return ret; +} + +int __netdata_mutex_trylock(netdata_mutex_t *mutex) { + netdata_thread_disable_cancelability(); + + int ret = pthread_mutex_trylock(mutex); + if(ret != 0) + netdata_thread_enable_cancelability(); + + return ret; +} + +int __netdata_mutex_unlock(netdata_mutex_t *mutex) { + int ret = pthread_mutex_unlock(mutex); + if(unlikely(ret != 0)) + error("MUTEX_LOCK: failed to unlock (code %d).", ret); + else + netdata_thread_enable_cancelability(); + + return ret; +} + +int netdata_mutex_init_debug( const char *file, const char *function, const unsigned long line, netdata_mutex_t *mutex) { + usec_t start = 0; + + if(unlikely(debug_flags & D_LOCKS)) { + start = now_boottime_usec(); + debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_init(0x%p) from %lu@%s, %s()", mutex, line, file, function); + } + + int ret = __netdata_mutex_init(mutex); + + debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_init(0x%p) = %d in %llu usec, from %lu@%s, %s()", mutex, ret, now_boottime_usec() - start, line, file, function); + + return ret; +} + +int netdata_mutex_lock_debug( const char *file, const char *function, const unsigned long line, netdata_mutex_t *mutex) { + usec_t start = 0; + + if(unlikely(debug_flags & D_LOCKS)) { + start = now_boottime_usec(); + debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_lock(0x%p) from %lu@%s, %s()", mutex, line, file, function); + } + + int ret = __netdata_mutex_lock(mutex); + + debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_lock(0x%p) = %d in %llu usec, from %lu@%s, %s()", mutex, ret, now_boottime_usec() - start, line, file, function); + + return ret; +} + +int netdata_mutex_trylock_debug( const char *file, const char *function, const unsigned long line, netdata_mutex_t *mutex) { + usec_t start = 0; + + if(unlikely(debug_flags & D_LOCKS)) { + start = now_boottime_usec(); + debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_trylock(0x%p) from %lu@%s, %s()", mutex, line, file, function); + } + + int ret = __netdata_mutex_trylock(mutex); + + debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_trylock(0x%p) = %d in %llu usec, from %lu@%s, %s()", mutex, ret, now_boottime_usec() - start, line, file, function); + + return ret; +} + +int netdata_mutex_unlock_debug( const char *file, const char *function, const unsigned long line, netdata_mutex_t *mutex) { + usec_t start = 0; + + if(unlikely(debug_flags & D_LOCKS)) { + start = now_boottime_usec(); + debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_unlock(0x%p) from %lu@%s, %s()", mutex, line, file, function); + } + + int ret = __netdata_mutex_unlock(mutex); + + debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_unlock(0x%p) = %d in %llu usec, from %lu@%s, %s()", mutex, ret, now_boottime_usec() - start, line, file, function); + + return ret; +} + + +// ---------------------------------------------------------------------------- +// r/w lock + +int __netdata_rwlock_destroy(netdata_rwlock_t *rwlock) { + int ret = pthread_rwlock_destroy(rwlock); + if(unlikely(ret != 0)) + error("RW_LOCK: failed to destroy lock (code %d)", ret); + return ret; +} + +int __netdata_rwlock_init(netdata_rwlock_t *rwlock) { + int ret = pthread_rwlock_init(rwlock, NULL); + if(unlikely(ret != 0)) + error("RW_LOCK: failed to initialize lock (code %d)", ret); + return ret; +} + +int __netdata_rwlock_rdlock(netdata_rwlock_t *rwlock) { + netdata_thread_disable_cancelability(); + + int ret = pthread_rwlock_rdlock(rwlock); + if(unlikely(ret != 0)) { + netdata_thread_enable_cancelability(); + error("RW_LOCK: failed to obtain read lock (code %d)", ret); + } + + return ret; +} + +int __netdata_rwlock_wrlock(netdata_rwlock_t *rwlock) { + netdata_thread_disable_cancelability(); + + int ret = pthread_rwlock_wrlock(rwlock); + if(unlikely(ret != 0)) { + error("RW_LOCK: failed to obtain write lock (code %d)", ret); + netdata_thread_enable_cancelability(); + } + + return ret; +} + +int __netdata_rwlock_unlock(netdata_rwlock_t *rwlock) { + int ret = pthread_rwlock_unlock(rwlock); + if(unlikely(ret != 0)) + error("RW_LOCK: failed to release lock (code %d)", ret); + else + netdata_thread_enable_cancelability(); + + return ret; +} + +int __netdata_rwlock_tryrdlock(netdata_rwlock_t *rwlock) { + netdata_thread_disable_cancelability(); + + int ret = pthread_rwlock_tryrdlock(rwlock); + if(ret != 0) + netdata_thread_enable_cancelability(); + + return ret; +} + +int __netdata_rwlock_trywrlock(netdata_rwlock_t *rwlock) { + netdata_thread_disable_cancelability(); + + int ret = pthread_rwlock_trywrlock(rwlock); + if(ret != 0) + netdata_thread_enable_cancelability(); + + return ret; +} + + +int netdata_rwlock_destroy_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock) { + usec_t start = 0; + + if(unlikely(debug_flags & D_LOCKS)) { + start = now_boottime_usec(); + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_destroy(0x%p) from %lu@%s, %s()", rwlock, line, file, function); + } + + int ret = __netdata_rwlock_destroy(rwlock); + + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_destroy(0x%p) = %d in %llu usec, from %lu@%s, %s()", rwlock, ret, now_boottime_usec() - start, line, file, function); + + return ret; +} + +int netdata_rwlock_init_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock) { + usec_t start = 0; + + if(unlikely(debug_flags & D_LOCKS)) { + start = now_boottime_usec(); + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_init(0x%p) from %lu@%s, %s()", rwlock, line, file, function); + } + + int ret = __netdata_rwlock_init(rwlock); + + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_init(0x%p) = %d in %llu usec, from %lu@%s, %s()", rwlock, ret, now_boottime_usec() - start, line, file, function); + + return ret; +} + +int netdata_rwlock_rdlock_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock) { + usec_t start = 0; + + if(unlikely(debug_flags & D_LOCKS)) { + start = now_boottime_usec(); + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_rdlock(0x%p) from %lu@%s, %s()", rwlock, line, file, function); + } + + int ret = __netdata_rwlock_rdlock(rwlock); + + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_rdlock(0x%p) = %d in %llu usec, from %lu@%s, %s()", rwlock, ret, now_boottime_usec() - start, line, file, function); + + return ret; +} + +int netdata_rwlock_wrlock_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock) { + usec_t start = 0; + + if(unlikely(debug_flags & D_LOCKS)) { + start = now_boottime_usec(); + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_wrlock(0x%p) from %lu@%s, %s()", rwlock, line, file, function); + } + + int ret = __netdata_rwlock_wrlock(rwlock); + + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_wrlock(0x%p) = %d in %llu usec, from %lu@%s, %s()", rwlock, ret, now_boottime_usec() - start, line, file, function); + + return ret; +} + +int netdata_rwlock_unlock_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock) { + usec_t start = 0; + + if(unlikely(debug_flags & D_LOCKS)) { + start = now_boottime_usec(); + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_unlock(0x%p) from %lu@%s, %s()", rwlock, line, file, function); + } + + int ret = __netdata_rwlock_unlock(rwlock); + + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_unlock(0x%p) = %d in %llu usec, from %lu@%s, %s()", rwlock, ret, now_boottime_usec() - start, line, file, function); + + return ret; +} + +int netdata_rwlock_tryrdlock_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock) { + usec_t start = 0; + + if(unlikely(debug_flags & D_LOCKS)) { + start = now_boottime_usec(); + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_tryrdlock(0x%p) from %lu@%s, %s()", rwlock, line, file, function); + } + + int ret = __netdata_rwlock_tryrdlock(rwlock); + + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_tryrdlock(0x%p) = %d in %llu usec, from %lu@%s, %s()", rwlock, ret, now_boottime_usec() - start, line, file, function); + + return ret; +} + +int netdata_rwlock_trywrlock_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock) { + usec_t start = 0; + + if(unlikely(debug_flags & D_LOCKS)) { + start = now_boottime_usec(); + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_trywrlock(0x%p) from %lu@%s, %s()", rwlock, line, file, function); + } + + int ret = __netdata_rwlock_trywrlock(rwlock); + + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_trywrlock(0x%p) = %d in %llu usec, from %lu@%s, %s()", rwlock, ret, now_boottime_usec() - start, line, file, function); + + return ret; +} diff --git a/src/locks.h b/src/locks.h index 76533f636..36962fef2 100644 --- a/src/locks.h +++ b/src/locks.h @@ -1,276 +1,48 @@ #ifndef NETDATA_LOCKS_H #define NETDATA_LOCKS_H -// ---------------------------------------------------------------------------- -// mutex - typedef pthread_mutex_t netdata_mutex_t; - #define NETDATA_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER -static inline int __netdata_mutex_init(netdata_mutex_t *mutex) { - int ret = pthread_mutex_init(mutex, NULL); - if(unlikely(ret != 0)) - error("MUTEX_LOCK: failed to initialize (code %d).", ret); - return ret; -} - -static inline int __netdata_mutex_lock(netdata_mutex_t *mutex) { - int ret = pthread_mutex_lock(mutex); - if(unlikely(ret != 0)) - error("MUTEX_LOCK: failed to get lock (code %d)", ret); - return ret; -} - -static inline int __netdata_mutex_trylock(netdata_mutex_t *mutex) { - int ret = pthread_mutex_trylock(mutex); - return ret; -} +typedef pthread_rwlock_t netdata_rwlock_t; +#define NETDATA_RWLOCK_INITIALIZER PTHREAD_RWLOCK_INITIALIZER -static inline int __netdata_mutex_unlock(netdata_mutex_t *mutex) { - int ret = pthread_mutex_unlock(mutex); - if(unlikely(ret != 0)) - error("MUTEX_LOCK: failed to unlock (code %d).", ret); - return ret; -} +extern int __netdata_mutex_init(netdata_mutex_t *mutex); +extern int __netdata_mutex_lock(netdata_mutex_t *mutex); +extern int __netdata_mutex_trylock(netdata_mutex_t *mutex); +extern int __netdata_mutex_unlock(netdata_mutex_t *mutex); + +extern int __netdata_rwlock_destroy(netdata_rwlock_t *rwlock); +extern int __netdata_rwlock_init(netdata_rwlock_t *rwlock); +extern int __netdata_rwlock_rdlock(netdata_rwlock_t *rwlock); +extern int __netdata_rwlock_wrlock(netdata_rwlock_t *rwlock); +extern int __netdata_rwlock_unlock(netdata_rwlock_t *rwlock); +extern int __netdata_rwlock_tryrdlock(netdata_rwlock_t *rwlock); +extern int __netdata_rwlock_trywrlock(netdata_rwlock_t *rwlock); + +extern int netdata_mutex_init_debug( const char *file, const char *function, const unsigned long line, netdata_mutex_t *mutex); +extern int netdata_mutex_lock_debug( const char *file, const char *function, const unsigned long line, netdata_mutex_t *mutex); +extern int netdata_mutex_trylock_debug( const char *file, const char *function, const unsigned long line, netdata_mutex_t *mutex); +extern int netdata_mutex_unlock_debug( const char *file, const char *function, const unsigned long line, netdata_mutex_t *mutex); + +extern int netdata_rwlock_destroy_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock); +extern int netdata_rwlock_init_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock); +extern int netdata_rwlock_rdlock_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock); +extern int netdata_rwlock_wrlock_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock); +extern int netdata_rwlock_unlock_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock); +extern int netdata_rwlock_tryrdlock_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock); +extern int netdata_rwlock_trywrlock_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock); + +extern void netdata_thread_disable_cancelability(void); +extern void netdata_thread_enable_cancelability(void); #ifdef NETDATA_INTERNAL_CHECKS -static inline int netdata_mutex_init_debug( const char *file, const char *function, const unsigned long line, netdata_mutex_t *mutex) { - usec_t start = 0; - - if(unlikely(debug_flags & D_LOCKS)) { - start = now_boottime_usec(); - debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_init(0x%p) from %lu@%s, %s()", mutex, line, file, function); - } - - int ret = __netdata_mutex_init(mutex); - - debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_init(0x%p) = %d in %llu usec, from %lu@%s, %s()", mutex, ret, now_boottime_usec() - start, line, file, function); - - return ret; -} - -static inline int netdata_mutex_lock_debug( const char *file, const char *function, const unsigned long line, netdata_mutex_t *mutex) { - usec_t start = 0; - - if(unlikely(debug_flags & D_LOCKS)) { - start = now_boottime_usec(); - debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_lock(0x%p) from %lu@%s, %s()", mutex, line, file, function); - } - - int ret = __netdata_mutex_lock(mutex); - - debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_lock(0x%p) = %d in %llu usec, from %lu@%s, %s()", mutex, ret, now_boottime_usec() - start, line, file, function); - - return ret; -} - -static inline int netdata_mutex_trylock_debug( const char *file, const char *function, const unsigned long line, netdata_mutex_t *mutex) { - usec_t start = 0; - - if(unlikely(debug_flags & D_LOCKS)) { - start = now_boottime_usec(); - debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_trylock(0x%p) from %lu@%s, %s()", mutex, line, file, function); - } - - int ret = __netdata_mutex_trylock(mutex); - - debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_trylock(0x%p) = %d in %llu usec, from %lu@%s, %s()", mutex, ret, now_boottime_usec() - start, line, file, function); - - return ret; -} - -static inline int netdata_mutex_unlock_debug( const char *file, const char *function, const unsigned long line, netdata_mutex_t *mutex) { - usec_t start = 0; - - if(unlikely(debug_flags & D_LOCKS)) { - start = now_boottime_usec(); - debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_unlock(0x%p) from %lu@%s, %s()", mutex, line, file, function); - } - - int ret = __netdata_mutex_unlock(mutex); - - debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_unlock(0x%p) = %d in %llu usec, from %lu@%s, %s()", mutex, ret, now_boottime_usec() - start, line, file, function); - - return ret; -} - #define netdata_mutex_init(mutex) netdata_mutex_init_debug(__FILE__, __FUNCTION__, __LINE__, mutex) #define netdata_mutex_lock(mutex) netdata_mutex_lock_debug(__FILE__, __FUNCTION__, __LINE__, mutex) #define netdata_mutex_trylock(mutex) netdata_mutex_trylock_debug(__FILE__, __FUNCTION__, __LINE__, mutex) #define netdata_mutex_unlock(mutex) netdata_mutex_unlock_debug(__FILE__, __FUNCTION__, __LINE__, mutex) -#else // !NETDATA_INTERNAL_CHECKS - -#define netdata_mutex_init(mutex) __netdata_mutex_init(mutex) -#define netdata_mutex_lock(mutex) __netdata_mutex_lock(mutex) -#define netdata_mutex_trylock(mutex) __netdata_mutex_trylock(mutex) -#define netdata_mutex_unlock(mutex) __netdata_mutex_unlock(mutex) - -#endif // NETDATA_INTERNAL_CHECKS - - -// ---------------------------------------------------------------------------- -// r/w lock - -typedef pthread_rwlock_t netdata_rwlock_t; - -#define NETDATA_RWLOCK_INITIALIZER PTHREAD_RWLOCK_INITIALIZER - -static inline int __netdata_rwlock_destroy(netdata_rwlock_t *rwlock) { - int ret = pthread_rwlock_destroy(rwlock); - if(unlikely(ret != 0)) - error("RW_LOCK: failed to destroy lock (code %d)", ret); - return ret; -} - -static inline int __netdata_rwlock_init(netdata_rwlock_t *rwlock) { - int ret = pthread_rwlock_init(rwlock, NULL); - if(unlikely(ret != 0)) - error("RW_LOCK: failed to initialize lock (code %d)", ret); - return ret; -} - -static inline int __netdata_rwlock_rdlock(netdata_rwlock_t *rwlock) { - int ret = pthread_rwlock_rdlock(rwlock); - if(unlikely(ret != 0)) - error("RW_LOCK: failed to obtain read lock (code %d)", ret); - return ret; -} - -static inline int __netdata_rwlock_wrlock(netdata_rwlock_t *rwlock) { - int ret = pthread_rwlock_wrlock(rwlock); - if(unlikely(ret != 0)) - error("RW_LOCK: failed to obtain write lock (code %d)", ret); - return ret; -} - -static inline int __netdata_rwlock_unlock(netdata_rwlock_t *rwlock) { - int ret = pthread_rwlock_unlock(rwlock); - if(unlikely(ret != 0)) - error("RW_LOCK: failed to release lock (code %d)", ret); - return ret; -} - -static inline int __netdata_rwlock_tryrdlock(netdata_rwlock_t *rwlock) { - int ret = pthread_rwlock_tryrdlock(rwlock); - return ret; -} - -static inline int __netdata_rwlock_trywrlock(netdata_rwlock_t *rwlock) { - int ret = pthread_rwlock_trywrlock(rwlock); - return ret; -} - - -#ifdef NETDATA_INTERNAL_CHECKS - -static inline int netdata_rwlock_destroy_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock) { - usec_t start = 0; - - if(unlikely(debug_flags & D_LOCKS)) { - start = now_boottime_usec(); - debug(D_LOCKS, "RW_LOCK: netdata_rwlock_destroy(0x%p) from %lu@%s, %s()", rwlock, line, file, function); - } - - int ret = __netdata_rwlock_destroy(rwlock); - - debug(D_LOCKS, "RW_LOCK: netdata_rwlock_destroy(0x%p) = %d in %llu usec, from %lu@%s, %s()", rwlock, ret, now_boottime_usec() - start, line, file, function); - - return ret; -} - -static inline int netdata_rwlock_init_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock) { - usec_t start = 0; - - if(unlikely(debug_flags & D_LOCKS)) { - start = now_boottime_usec(); - debug(D_LOCKS, "RW_LOCK: netdata_rwlock_init(0x%p) from %lu@%s, %s()", rwlock, line, file, function); - } - - int ret = __netdata_rwlock_init(rwlock); - - debug(D_LOCKS, "RW_LOCK: netdata_rwlock_init(0x%p) = %d in %llu usec, from %lu@%s, %s()", rwlock, ret, now_boottime_usec() - start, line, file, function); - - return ret; -} - -static inline int netdata_rwlock_rdlock_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock) { - usec_t start = 0; - - if(unlikely(debug_flags & D_LOCKS)) { - start = now_boottime_usec(); - debug(D_LOCKS, "RW_LOCK: netdata_rwlock_rdlock(0x%p) from %lu@%s, %s()", rwlock, line, file, function); - } - - int ret = __netdata_rwlock_rdlock(rwlock); - - debug(D_LOCKS, "RW_LOCK: netdata_rwlock_rdlock(0x%p) = %d in %llu usec, from %lu@%s, %s()", rwlock, ret, now_boottime_usec() - start, line, file, function); - - return ret; -} - -static inline int netdata_rwlock_wrlock_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock) { - usec_t start = 0; - - if(unlikely(debug_flags & D_LOCKS)) { - start = now_boottime_usec(); - debug(D_LOCKS, "RW_LOCK: netdata_rwlock_wrlock(0x%p) from %lu@%s, %s()", rwlock, line, file, function); - } - - int ret = __netdata_rwlock_wrlock(rwlock); - - debug(D_LOCKS, "RW_LOCK: netdata_rwlock_wrlock(0x%p) = %d in %llu usec, from %lu@%s, %s()", rwlock, ret, now_boottime_usec() - start, line, file, function); - - return ret; -} - -static inline int netdata_rwlock_unlock_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock) { - usec_t start = 0; - - if(unlikely(debug_flags & D_LOCKS)) { - start = now_boottime_usec(); - debug(D_LOCKS, "RW_LOCK: netdata_rwlock_unlock(0x%p) from %lu@%s, %s()", rwlock, line, file, function); - } - - int ret = __netdata_rwlock_unlock(rwlock); - - debug(D_LOCKS, "RW_LOCK: netdata_rwlock_unlock(0x%p) = %d in %llu usec, from %lu@%s, %s()", rwlock, ret, now_boottime_usec() - start, line, file, function); - - return ret; -} - -static inline int netdata_rwlock_tryrdlock_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock) { - usec_t start = 0; - - if(unlikely(debug_flags & D_LOCKS)) { - start = now_boottime_usec(); - debug(D_LOCKS, "RW_LOCK: netdata_rwlock_tryrdlock(0x%p) from %lu@%s, %s()", rwlock, line, file, function); - } - - int ret = __netdata_rwlock_tryrdlock(rwlock); - - debug(D_LOCKS, "RW_LOCK: netdata_rwlock_tryrdlock(0x%p) = %d in %llu usec, from %lu@%s, %s()", rwlock, ret, now_boottime_usec() - start, line, file, function); - - return ret; -} - -static inline int netdata_rwlock_trywrlock_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock) { - usec_t start = 0; - - if(unlikely(debug_flags & D_LOCKS)) { - start = now_boottime_usec(); - debug(D_LOCKS, "RW_LOCK: netdata_rwlock_trywrlock(0x%p) from %lu@%s, %s()", rwlock, line, file, function); - } - - int ret = __netdata_rwlock_trywrlock(rwlock); - - debug(D_LOCKS, "RW_LOCK: netdata_rwlock_trywrlock(0x%p) = %d in %llu usec, from %lu@%s, %s()", rwlock, ret, now_boottime_usec() - start, line, file, function); - - return ret; -} - #define netdata_rwlock_destroy(rwlock) netdata_rwlock_destroy_debug(__FILE__, __FUNCTION__, __LINE__, rwlock) #define netdata_rwlock_init(rwlock) netdata_rwlock_init_debug(__FILE__, __FUNCTION__, __LINE__, rwlock) #define netdata_rwlock_rdlock(rwlock) netdata_rwlock_rdlock_debug(__FILE__, __FUNCTION__, __LINE__, rwlock) @@ -281,6 +53,11 @@ static inline int netdata_rwlock_trywrlock_debug( const char *file, const char * #else // !NETDATA_INTERNAL_CHECKS +#define netdata_mutex_init(mutex) __netdata_mutex_init(mutex) +#define netdata_mutex_lock(mutex) __netdata_mutex_lock(mutex) +#define netdata_mutex_trylock(mutex) __netdata_mutex_trylock(mutex) +#define netdata_mutex_unlock(mutex) __netdata_mutex_unlock(mutex) + #define netdata_rwlock_destroy(rwlock) __netdata_rwlock_destroy(rwlock) #define netdata_rwlock_init(rwlock) __netdata_rwlock_init(rwlock) #define netdata_rwlock_rdlock(rwlock) __netdata_rwlock_rdlock(rwlock) @@ -1,5 +1,7 @@ #include "common.h" +int web_server_is_multithreaded = 1; + const char *program_name = ""; uint64_t debug_flags = DEBUG; @@ -55,13 +57,16 @@ static inline void log_unlock() { } int open_log_file(int fd, FILE **fp, const char *filename, int *enabled_syslog) { - int f; + int f, devnull = 0; - if(!filename || !*filename || !strcmp(filename, "none")) + if(!filename || !*filename || !strcmp(filename, "none") || !strcmp(filename, "/dev/null")) { filename = "/dev/null"; + devnull = 1; + } if(!strcmp(filename, "syslog")) { filename = "/dev/null"; + devnull = 1; syslog_init(); if(enabled_syslog) *enabled_syslog = 1; } @@ -70,8 +75,10 @@ int open_log_file(int fd, FILE **fp, const char *filename, int *enabled_syslog) // don't do anything if the user is willing // to have the standard one if(!strcmp(filename, "system")) { - if(fd != -1) return fd; - filename = "stdout"; + if(fd != -1 && fp != &stdaccess) + return fd; + + filename = "stderr"; } if(!strcmp(filename, "stdout")) @@ -88,6 +95,11 @@ int open_log_file(int fd, FILE **fp, const char *filename, int *enabled_syslog) } } + if(devnull && fp == &stdaccess) { + fd = -1; + *fp = NULL; + } + // if there is a level-2 file pointer // flush it before switching the level-1 fds if(fp && *fp) @@ -246,7 +258,7 @@ void debug_int( const char *file, const char *function, const unsigned long line log_date(date, LOG_DATE_LENGTH); va_start( args, fmt ); - printf("%s: %s DEBUG (%04lu@%-10.10s:%-15.15s): ", date, program_name, line, file, function); + printf("%s: %s DEBUG : %s : (%04lu@%-10.10s:%-15.15s): ", date, program_name, netdata_thread_tag(), line, file, function); vprintf(fmt, args); va_end( args ); putchar('\n'); @@ -282,8 +294,8 @@ void info_int( const char *file, const char *function, const unsigned long line, log_lock(); va_start( args, fmt ); - if(debug_flags) fprintf(stderr, "%s: %s INFO : (%04lu@%-10.10s:%-15.15s): ", date, program_name, line, file, function); - else fprintf(stderr, "%s: %s INFO : ", date, program_name); + if(debug_flags) fprintf(stderr, "%s: %s INFO : %s : (%04lu@%-10.10s:%-15.15s): ", date, program_name, netdata_thread_tag(), line, file, function); + else fprintf(stderr, "%s: %s INFO : %s : ", date, program_name, netdata_thread_tag()); vfprintf( stderr, fmt, args ); va_end( args ); @@ -338,8 +350,8 @@ void error_int( const char *prefix, const char *file, const char *function, cons log_lock(); va_start( args, fmt ); - if(debug_flags) fprintf(stderr, "%s: %s %s: (%04lu@%-10.10s:%-15.15s): ", date, program_name, prefix, line, file, function); - else fprintf(stderr, "%s: %s %s: ", date, program_name, prefix); + if(debug_flags) fprintf(stderr, "%s: %s %-5.5s : %s : (%04lu@%-10.10s:%-15.15s): ", date, program_name, prefix, netdata_thread_tag(), line, file, function); + else fprintf(stderr, "%s: %s %-5.5s : %s : ", date, program_name, prefix, netdata_thread_tag()); vfprintf( stderr, fmt, args ); va_end( args ); @@ -369,8 +381,8 @@ void fatal_int( const char *file, const char *function, const unsigned long line log_lock(); va_start( args, fmt ); - if(debug_flags) fprintf(stderr, "%s: %s FATAL: (%04lu@%-10.10s:%-15.15s): ", date, program_name, line, file, function); - else fprintf(stderr, "%s: %s FATAL: ", date, program_name); + if(debug_flags) fprintf(stderr, "%s: %s FATAL : %s : (%04lu@%-10.10s:%-15.15s): ", date, program_name, netdata_thread_tag(), line, file, function); + else fprintf(stderr, "%s: %s FATAL : %s :", date, program_name, netdata_thread_tag()); vfprintf( stderr, fmt, args ); va_end( args ); @@ -397,7 +409,8 @@ void log_access( const char *fmt, ... ) { if(stdaccess) { static netdata_mutex_t access_mutex = NETDATA_MUTEX_INITIALIZER; - netdata_mutex_lock(&access_mutex); + if(web_server_is_multithreaded) + netdata_mutex_lock(&access_mutex); char date[LOG_DATE_LENGTH]; log_date(date, LOG_DATE_LENGTH); @@ -408,6 +421,7 @@ void log_access( const char *fmt, ... ) { va_end( args ); fputc('\n', stdaccess); - netdata_mutex_unlock(&access_mutex); + if(web_server_is_multithreaded) + netdata_mutex_unlock(&access_mutex); } } @@ -38,6 +38,8 @@ //#define DEBUG 0xffffffff #define DEBUG (0) +extern int web_server_is_multithreaded; + extern uint64_t debug_flags; extern const char *program_name; diff --git a/src/macos_mach_smi.c b/src/macos_mach_smi.c index bcde589f0..47d32a9f7 100644 --- a/src/macos_mach_smi.c +++ b/src/macos_mach_smi.c @@ -196,7 +196,7 @@ int do_macos_mach_smi(int update_every, usec_t dt) { , "page faults/s" , "macos" , "mach_smi" - , 500 + , NETDATA_CHART_PRIO_MEM_SYSTEM_PGFAULTS , update_every , RRDSET_TYPE_LINE ); diff --git a/src/main.c b/src/main.c index b14ebc0c1..798c7f0fc 100644 --- a/src/main.c +++ b/src/main.c @@ -3,31 +3,37 @@ extern void *cgroups_main(void *ptr); void netdata_cleanup_and_exit(int ret) { - netdata_exit = 1; + // enabling this, is wrong + // because the threads will be cancelled while cleaning up + // netdata_exit = 1; error_log_limit_unlimited(); + info("EXIT: netdata prepares to exit with code %d...", ret); - debug(D_EXIT, "Called: netdata_cleanup_and_exit()"); - - // cleanup the database + // cleanup/save the database and exit + info("EXIT: cleaning up the database..."); rrdhost_cleanup_all(); + if(!ret) { + // exit cleanly + + // stop everything + info("EXIT: stopping master threads..."); + cancel_main_threads(); + + // free the database + info("EXIT: freeing database memory..."); + rrdhost_free_all(); + } + // unlink the pid if(pidfile[0]) { + info("EXIT: removing netdata PID file '%s'...", pidfile); if(unlink(pidfile) != 0) - error("Cannot unlink pidfile '%s'.", pidfile); + error("EXIT: cannot unlink pidfile '%s'.", pidfile); } -#ifdef NETDATA_INTERNAL_CHECKS - // kill all childs - //kill_childs(); - - // free database - sleep(2); - rrdhost_free_all(); -#endif - - info("netdata exiting. Bye bye..."); + info("EXIT: all done - netdata is now exiting - bye bye..."); exit(ret); } @@ -36,37 +42,38 @@ struct netdata_static_thread static_threads[] = { #ifdef INTERNAL_PLUGIN_NFACCT // nfacct requires root access // so, we build it as an external plugin with setuid to root - {"nfacct", CONFIG_SECTION_PLUGINS, "nfacct", 1, NULL, NULL, nfacct_main}, + {"PLUGIN[nfacct]", CONFIG_SECTION_PLUGINS, "nfacct", 1, NULL, NULL, nfacct_main}, #endif #ifdef NETDATA_INTERNAL_CHECKS // debugging plugin - {"check", CONFIG_SECTION_PLUGINS, "checks", 0, NULL, NULL, checks_main}, + {"PLUGIN[check]", CONFIG_SECTION_PLUGINS, "checks", 0, NULL, NULL, checks_main}, #endif #if defined(__FreeBSD__) // FreeBSD internal plugins - {"freebsd", CONFIG_SECTION_PLUGINS, "freebsd", 1, NULL, NULL, freebsd_main}, + {"PLUGIN[freebsd]", CONFIG_SECTION_PLUGINS, "freebsd", 1, NULL, NULL, freebsd_main}, #elif defined(__APPLE__) // macOS internal plugins - {"macos", CONFIG_SECTION_PLUGINS, "macos", 1, NULL, NULL, macos_main}, + {"PLUGIN[macos]", CONFIG_SECTION_PLUGINS, "macos", 1, NULL, NULL, macos_main}, #else // linux internal plugins - {"proc", CONFIG_SECTION_PLUGINS, "proc", 1, NULL, NULL, proc_main}, - {"diskspace", CONFIG_SECTION_PLUGINS, "diskspace", 1, NULL, NULL, proc_diskspace_main}, - {"cgroups", CONFIG_SECTION_PLUGINS, "cgroups", 1, NULL, NULL, cgroups_main}, - {"tc", CONFIG_SECTION_PLUGINS, "tc", 1, NULL, NULL, tc_main}, + {"PLUGIN[proc]", CONFIG_SECTION_PLUGINS, "proc", 1, NULL, NULL, proc_main}, + {"PLUGIN[diskspace]", CONFIG_SECTION_PLUGINS, "diskspace", 1, NULL, NULL, proc_diskspace_main}, + {"PLUGIN[cgroup]", CONFIG_SECTION_PLUGINS, "cgroups", 1, NULL, NULL, cgroups_main}, + {"PLUGIN[tc]", CONFIG_SECTION_PLUGINS, "tc", 1, NULL, NULL, tc_main}, #endif /* __FreeBSD__, __APPLE__*/ // common plugins for all systems - {"idlejitter", CONFIG_SECTION_PLUGINS, "idlejitter", 1, NULL, NULL, cpuidlejitter_main}, - {"backends", NULL, NULL, 1, NULL, NULL, backends_main}, - {"health", NULL, NULL, 1, NULL, NULL, health_main}, - {"plugins.d", NULL, NULL, 1, NULL, NULL, pluginsd_main}, - {"web", NULL, NULL, 1, NULL, NULL, socket_listen_main_multi_threaded}, - {"web-single-threaded", NULL, NULL, 0, NULL, NULL, socket_listen_main_single_threaded}, - {"push-metrics", NULL, NULL, 0, NULL, NULL, rrdpush_sender_thread}, - {"statsd", NULL, NULL, 1, NULL, NULL, statsd_main}, + {"PLUGIN[idlejitter]", CONFIG_SECTION_PLUGINS, "idlejitter", 1, NULL, NULL, cpuidlejitter_main}, + {"BACKENDS", NULL, NULL, 1, NULL, NULL, backends_main}, + {"HEALTH", NULL, NULL, 1, NULL, NULL, health_main}, + {"PLUGINSD", NULL, NULL, 1, NULL, NULL, pluginsd_main}, + {"WEB_SERVER[multi]", NULL, NULL, 1, NULL, NULL, socket_listen_main_multi_threaded}, + {"WEB_SERVER[single]", NULL, NULL, 0, NULL, NULL, socket_listen_main_single_threaded}, + {"WEB_SERVER[static1]", NULL, NULL, 0, NULL, NULL, socket_listen_main_static_threaded}, + {"STREAM", NULL, NULL, 0, NULL, NULL, rrdpush_sender_thread}, + {"STATSD", NULL, NULL, 1, NULL, NULL, statsd_main}, {NULL, NULL, NULL, 0, NULL, NULL, NULL} }; @@ -76,6 +83,7 @@ void web_server_threading_selection(void) { int multi_threaded = (web_server_mode == WEB_SERVER_MODE_MULTI_THREADED); int single_threaded = (web_server_mode == WEB_SERVER_MODE_SINGLE_THREADED); + int static_threaded = (web_server_mode == WEB_SERVER_MODE_STATIC_THREADED); int i; for (i = 0; static_threads[i].name; i++) { @@ -84,22 +92,26 @@ void web_server_threading_selection(void) { if (static_threads[i].start_routine == socket_listen_main_single_threaded) static_threads[i].enabled = single_threaded; + + if (static_threads[i].start_routine == socket_listen_main_static_threaded) + static_threads[i].enabled = static_threaded; } } void web_server_config_options(void) { - web_client_timeout = (int) config_get_number(CONFIG_SECTION_WEB, "disconnect idle clients after seconds", DEFAULT_DISCONNECT_IDLE_WEB_CLIENTS_AFTER_SECONDS); + web_client_timeout = (int) config_get_number(CONFIG_SECTION_WEB, "disconnect idle clients after seconds", web_client_timeout); + web_client_first_request_timeout = (int) config_get_number(CONFIG_SECTION_WEB, "timeout for first request", web_client_first_request_timeout); respect_web_browser_do_not_track_policy = config_get_boolean(CONFIG_SECTION_WEB, "respect do not track policy", respect_web_browser_do_not_track_policy); web_x_frame_options = config_get(CONFIG_SECTION_WEB, "x-frame-options response header", ""); if(!*web_x_frame_options) web_x_frame_options = NULL; - web_allow_connections_from = simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow connections from", "localhost *"), SIMPLE_PATTERN_EXACT); - web_allow_dashboard_from = simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow dashboard from", "localhost *"), SIMPLE_PATTERN_EXACT); - web_allow_badges_from = simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow badges from", "*"), SIMPLE_PATTERN_EXACT); - web_allow_registry_from = simple_pattern_create(config_get(CONFIG_SECTION_REGISTRY, "allow from", "*"), SIMPLE_PATTERN_EXACT); - web_allow_streaming_from = simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow streaming from", "*"), SIMPLE_PATTERN_EXACT); - web_allow_netdataconf_from = simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow netdata.conf from", "localhost fd* 10.* 192.168.* 172.16.* 172.17.* 172.18.* 172.19.* 172.20.* 172.21.* 172.22.* 172.23.* 172.24.* 172.25.* 172.26.* 172.27.* 172.28.* 172.29.* 172.30.* 172.31.*"), SIMPLE_PATTERN_EXACT); + web_allow_connections_from = simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow connections from", "localhost *"), NULL, SIMPLE_PATTERN_EXACT); + web_allow_dashboard_from = simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow dashboard from", "localhost *"), NULL, SIMPLE_PATTERN_EXACT); + web_allow_badges_from = simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow badges from", "*"), NULL, SIMPLE_PATTERN_EXACT); + web_allow_registry_from = simple_pattern_create(config_get(CONFIG_SECTION_REGISTRY, "allow from", "*"), NULL, SIMPLE_PATTERN_EXACT); + web_allow_streaming_from = simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow streaming from", "*"), NULL, SIMPLE_PATTERN_EXACT); + web_allow_netdataconf_from = simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow netdata.conf from", "localhost fd* 10.* 192.168.* 172.16.* 172.17.* 172.18.* 172.19.* 172.20.* 172.21.* 172.22.* 172.23.* 172.24.* 172.25.* 172.26.* 172.27.* 172.28.* 172.29.* 172.30.* 172.31.*"), NULL, SIMPLE_PATTERN_EXACT); #ifdef NETDATA_WITH_ZLIB web_enable_gzip = config_get_boolean(CONFIG_SECTION_WEB, "enable gzip compression", web_enable_gzip); @@ -177,51 +189,37 @@ int killpid(pid_t pid, int sig) return ret; } -void kill_childs() -{ +void cancel_main_threads() { error_log_limit_unlimited(); - siginfo_t info; - - struct web_client *w; - for(w = web_clients; w ; w = w->next) { - info("Stopping web client %s", w->client_ip); - pthread_cancel(w->thread); - // it is detached - // pthread_join(w->thread, NULL); - - WEB_CLIENT_IS_OBSOLETE(w); - } - - int i; + int i, found = 0, max = 5 * USEC_PER_SEC, step = 100000; for (i = 0; static_threads[i].name != NULL ; i++) { - if(static_threads[i].enabled) { - info("Stopping %s thread", static_threads[i].name); - pthread_cancel(*static_threads[i].thread); - // it is detached - // pthread_join(*static_threads[i].thread, NULL); - - static_threads[i].enabled = 0; + if(static_threads[i].enabled == NETDATA_MAIN_THREAD_RUNNING) { + info("EXIT: Stopping master thread: %s", static_threads[i].name); + netdata_thread_cancel(*static_threads[i].thread); + found++; } } - if(tc_child_pid) { - info("Killing tc-qos-helper process %d", tc_child_pid); - if(killpid(tc_child_pid, SIGTERM) != -1) - waitid(P_PID, (id_t) tc_child_pid, &info, WEXITED); - - tc_child_pid = 0; + while(found && max > 0) { + max -= step; + info("Waiting %d threads to finish...", found); + sleep_usec(step); + found = 0; + for (i = 0; static_threads[i].name != NULL ; i++) { + if (static_threads[i].enabled != NETDATA_MAIN_THREAD_EXITED) + found++; + } } - // stop all running plugins - pluginsd_stop_all_external_plugins(); - - // if, for any reason there is any child exited - // catch it here - info("Cleaning up an other children"); - waitid(P_PID, 0, &info, WEXITED|WNOHANG); - - info("All threads/childs stopped."); + if(found) { + for (i = 0; static_threads[i].name != NULL ; i++) { + if (static_threads[i].enabled != NETDATA_MAIN_THREAD_EXITED) + error("Master thread %s takes too long to exit. Giving up...", static_threads[i].name); + } + } + else + info("All threads finished."); } struct option_def option_definitions[] = { @@ -628,8 +626,7 @@ int main(int argc, char **argv) { int i; int config_loaded = 0; int dont_fork = 0; - size_t wanted_stacksize = 0, stacksize = 0; - pthread_attr_t attr; + size_t default_stacksize; // set the name for logging program_name = "netdata"; @@ -774,7 +771,7 @@ int main(int argc, char **argv) { size_t len = strlen(needle) + 1; char wildcarded[len]; - SIMPLE_PATTERN *p = simple_pattern_create(heystack, SIMPLE_PATTERN_EXACT); + SIMPLE_PATTERN *p = simple_pattern_create(heystack, NULL, SIMPLE_PATTERN_EXACT); int ret = simple_pattern_matches_extract(p, needle, wildcarded, len); simple_pattern_free(p); @@ -954,21 +951,8 @@ int main(int argc, char **argv) { // setup the signals we want to use signals_init(); - - // -------------------------------------------------------------------- - // get the required stack size of the threads of netdata - - i = pthread_attr_init(&attr); - if(i != 0) - fatal("pthread_attr_init() failed with code %d.", i); - - i = pthread_attr_getstacksize(&attr, &stacksize); - if(i != 0) - fatal("pthread_attr_getstacksize() failed with code %d.", i); - else - debug(D_OPTIONS, "initial pthread stack size is %zu bytes", stacksize); - - wanted_stacksize = (size_t)config_get_number(CONFIG_SECTION_GLOBAL, "pthread stack size", (long)stacksize); + // setup threads configs + default_stacksize = netdata_threads_init(); // -------------------------------------------------------------------- @@ -1021,6 +1005,11 @@ int main(int argc, char **argv) { } #endif /* NETDATA_INTERNAL_CHECKS */ + // get the max file limit + if(getrlimit(RLIMIT_NOFILE, &rlimit_nofile) != 0) + error("getrlimit(RLIMIT_NOFILE) failed"); + else + info("resources control: allowed file descriptors: soft = %zu, max = %zu", rlimit_nofile.rlim_cur, rlimit_nofile.rlim_max); // fork, switch user, create pid file, set process priority if(become_daemon(dont_fork, user) == -1) @@ -1033,18 +1022,7 @@ int main(int argc, char **argv) { web_files_uid(); web_files_gid(); - - // ------------------------------------------------------------------------ - // set default pthread stack size - after we have forked - - if(stacksize < wanted_stacksize) { - i = pthread_attr_setstacksize(&attr, wanted_stacksize); - if(i != 0) - fatal("pthread_attr_setstacksize() to %zu bytes, failed with code %d.", wanted_stacksize, i); - else - debug(D_SYSTEM, "Successfully set pthread stacksize to %zu bytes", wanted_stacksize); - } - + netdata_threads_init_after_fork((size_t)config_get_number(CONFIG_SECTION_GLOBAL, "pthread stack size", (long)default_stacksize)); // ------------------------------------------------------------------------ // initialize rrd, registry, health, rrdpush, etc. @@ -1067,15 +1045,9 @@ int main(int argc, char **argv) { struct netdata_static_thread *st = &static_threads[i]; if(st->enabled) { - st->thread = mallocz(sizeof(pthread_t)); - + st->thread = mallocz(sizeof(netdata_thread_t)); debug(D_SYSTEM, "Starting thread %s.", st->name); - - if(pthread_create(st->thread, &attr, st->start_routine, st)) - error("failed to create new thread for %s.", st->name); - - else if(pthread_detach(*st->thread)) - error("Cannot request detach of newly created %s thread.", st->name); + netdata_thread_create(st->thread, st->name, NETDATA_THREAD_OPTION_DEFAULT, st->start_routine, st); } else debug(D_SYSTEM, "Not starting thread %s.", st->name); } diff --git a/src/main.h b/src/main.h index 09567bc7c..d29bf74e7 100644 --- a/src/main.h +++ b/src/main.h @@ -1,6 +1,10 @@ #ifndef NETDATA_MAIN_H #define NETDATA_MAIN_H 1 +#define NETDATA_MAIN_THREAD_RUNNING CONFIG_BOOLEAN_YES +#define NETDATA_MAIN_THREAD_EXITING (CONFIG_BOOLEAN_YES + 1) +#define NETDATA_MAIN_THREAD_EXITED CONFIG_BOOLEAN_NO + /** * This struct contains information about command line options. */ @@ -22,15 +26,15 @@ struct netdata_static_thread { char *config_section; char *config_name; - volatile int enabled; + volatile sig_atomic_t enabled; - pthread_t *thread; + netdata_thread_t *thread; void (*init_routine) (void); void *(*start_routine) (void *); }; -extern void kill_childs(void); +extern void cancel_main_threads(void); extern int killpid(pid_t pid, int signal); extern void netdata_cleanup_and_exit(int ret) NORETURN; diff --git a/src/plugin_checks.c b/src/plugin_checks.c index 9c1e42cc6..b99b97d40 100644 --- a/src/plugin_checks.c +++ b/src/plugin_checks.c @@ -2,16 +2,17 @@ #ifdef NETDATA_INTERNAL_CHECKS -void *checks_main(void *ptr) { +static void checks_main_cleanup(void *ptr) { struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; - info("CHECKS thread created with task id %d", gettid()); + info("cleaning up..."); - if(pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0) - error("Cannot set pthread cancel type to DEFERRED."); + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} - if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0) - error("Cannot set pthread cancel state to ENABLE."); +void *checks_main(void *ptr) { + netdata_thread_cleanup_push(checks_main_cleanup, ptr); usec_t usec = 0, susec = localhost->rrd_update_every * USEC_PER_SEC, loop_usec = 0, total_susec = 0; struct timeval now, last, loop; @@ -72,7 +73,7 @@ void *checks_main(void *ptr) { rrddim_add(check3, "apps.plugin", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); now_realtime_timeval(&last); - while(1) { + while(!netdata_exit) { usleep(susec); // find the time to sleep in order to wait exactly update_every seconds @@ -119,10 +120,7 @@ void *checks_main(void *ptr) { rrdset_done(check3); } - info("CHECKS thread exiting"); - - static_thread->enabled = 0; - pthread_exit(NULL); + netdata_thread_cleanup_pop(1); return NULL; } diff --git a/src/plugin_freebsd.c b/src/plugin_freebsd.c index a7825d850..a0d3dc2ea 100644 --- a/src/plugin_freebsd.c +++ b/src/plugin_freebsd.c @@ -66,22 +66,23 @@ static struct freebsd_module { { .name = NULL, .dim = NULL, .enabled = 0, .func = NULL } }; -void *freebsd_main(void *ptr) { +static void freebsd_main_cleanup(void *ptr) { struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; - info("FREEBSD Plugin thread created with task id %d", gettid()); + info("cleaning up..."); - if(pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0) - error("Cannot set pthread cancel type to DEFERRED."); + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} - if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0) - error("Cannot set pthread cancel state to ENABLE."); +void *freebsd_main(void *ptr) { + netdata_thread_cleanup_push(freebsd_main_cleanup, ptr); int vdo_cpu_netdata = config_get_boolean("plugin:freebsd", "netdata server resources", 1); // initialize FreeBSD plugin if (freebsd_plugin_init()) - netdata_exit = 1; + netdata_cleanup_and_exit(1); // check the enabled status for each module int i; @@ -97,7 +98,7 @@ void *freebsd_main(void *ptr) { heartbeat_t hb; heartbeat_init(&hb); - for(;;) { + while(!netdata_exit) { usec_t hb_dt = heartbeat_next(&hb, step); usec_t duration = 0ULL; @@ -167,9 +168,6 @@ void *freebsd_main(void *ptr) { } } - info("FREEBSD thread exiting"); - - static_thread->enabled = 0; - pthread_exit(NULL); + netdata_thread_cleanup_pop(1); return NULL; } diff --git a/src/plugin_idlejitter.c b/src/plugin_idlejitter.c index 8d9336835..77bd95d55 100644 --- a/src/plugin_idlejitter.c +++ b/src/plugin_idlejitter.c @@ -2,16 +2,17 @@ #define CPU_IDLEJITTER_SLEEP_TIME_MS 20 -void *cpuidlejitter_main(void *ptr) { +static void cpuidlejitter_main_cleanup(void *ptr) { struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; - info("IDLEJITTER thread created with task id %d", gettid()); + info("cleaning up..."); - if(pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0) - error("Cannot set pthread cancel type to DEFERRED."); + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} - if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0) - error("Cannot set pthread cancel state to ENABLE."); +void *cpuidlejitter_main(void *ptr) { + netdata_thread_cleanup_push(cpuidlejitter_main_cleanup, ptr); usec_t sleep_ut = config_get_number("plugin:idlejitter", "loop time in ms", CPU_IDLEJITTER_SLEEP_TIME_MS) * USEC_PER_MS; if(sleep_ut <= 0) { @@ -23,13 +24,13 @@ void *cpuidlejitter_main(void *ptr) { "system" , "idlejitter" , NULL - , "processes" + , "idlejitter" , NULL , "CPU Idle Jitter" , "microseconds lost/s" , "idlejitter" , NULL - , 9999 + , 800 , localhost->rrd_update_every , RRDSET_TYPE_AREA ); @@ -40,6 +41,7 @@ void *cpuidlejitter_main(void *ptr) { usec_t update_every_ut = localhost->rrd_update_every * USEC_PER_SEC; struct timeval before, after; unsigned long long counter; + for(counter = 0; 1 ;counter++) { int iterations = 0; usec_t error_total = 0, @@ -82,10 +84,7 @@ void *cpuidlejitter_main(void *ptr) { } } - info("IDLEJITTER thread exiting"); - - static_thread->enabled = 0; - pthread_exit(NULL); + netdata_thread_cleanup_pop(1); return NULL; } diff --git a/src/plugin_macos.c b/src/plugin_macos.c index 4e84a084d..6ac3d25d1 100644 --- a/src/plugin_macos.c +++ b/src/plugin_macos.c @@ -1,15 +1,16 @@ #include "common.h" -void *macos_main(void *ptr) { +static void macos_main_cleanup(void *ptr) { struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; - info("MACOS Plugin thread created with task id %d", gettid()); + info("cleaning up..."); - if(pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0) - error("Cannot set pthread cancel type to DEFERRED."); + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} - if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0) - error("Cannot set pthread cancel state to ENABLE."); +void *macos_main(void *ptr) { + netdata_thread_cleanup_push(macos_main_cleanup, ptr); // when ZERO, attempt to do it int vdo_cpu_netdata = !config_get_boolean("plugin:macos", "netdata server resources", 1); @@ -25,7 +26,8 @@ void *macos_main(void *ptr) { usec_t step = localhost->rrd_update_every * USEC_PER_SEC; heartbeat_t hb; heartbeat_init(&hb); - for(;;) { + + while(!netdata_exit) { usec_t hb_dt = heartbeat_next(&hb, step); if(unlikely(netdata_exit)) break; @@ -60,9 +62,6 @@ void *macos_main(void *ptr) { } } - info("MACOS thread exiting"); - - static_thread->enabled = 0; - pthread_exit(NULL); + netdata_thread_cleanup_pop(1); return NULL; } diff --git a/src/plugin_nfacct.c b/src/plugin_nfacct.c index 8319c6726..02815ef04 100644 --- a/src/plugin_nfacct.c +++ b/src/plugin_nfacct.c @@ -751,17 +751,24 @@ static void nfacct_send_metrics() { // ---------------------------------------------------------------------------- -void *nfacct_main(void *ptr) { +static void nfacct_main_cleanup(void *ptr) { struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + info("cleaning up..."); - info("NETFILTER thread created with task id %d", gettid()); +#ifdef DO_NFACCT + nfacct_cleanup(); +#endif - if(pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0) - error("NETFILTER: Cannot set pthread cancel type to DEFERRED."); +#ifdef DO_NFSTAT + nfstat_cleanup(); +#endif - if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0) - error("NETFILTER: Cannot set pthread cancel state to ENABLE."); + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} +void *nfacct_main(void *ptr) { + netdata_thread_cleanup_push(nfacct_main_cleanup, ptr); int update_every = (int)config_get_number("plugin:netfilter", "update every", localhost->rrd_update_every); if(update_every < localhost->rrd_update_every) @@ -805,18 +812,7 @@ void *nfacct_main(void *ptr) { #endif } - info("NETFILTER thread exiting"); - -#ifdef DO_NFACCT - nfacct_cleanup(); -#endif - -#ifdef DO_NFSTAT - nfstat_cleanup(); -#endif - - static_thread->enabled = 0; - pthread_exit(NULL); + netdata_thread_cleanup_pop(1); return NULL; } diff --git a/src/plugin_proc.c b/src/plugin_proc.c index c4249c847..e0afb0d6d 100644 --- a/src/plugin_proc.c +++ b/src/plugin_proc.c @@ -54,6 +54,9 @@ static struct proc_module { // ZFS metrics { .name = "/proc/spl/kstat/zfs/arcstats", .dim = "zfs_arcstats", .func = do_proc_spl_kstat_zfs_arcstats }, + // BTRFS metrics + { .name = "/sys/fs/btrfs", .dim = "btrfs", .func = do_sys_fs_btrfs }, + // IPC metrics { .name = "ipc", .dim = "ipc", .func = do_ipc }, @@ -61,16 +64,17 @@ static struct proc_module { { .name = NULL, .dim = NULL, .func = NULL } }; -void *proc_main(void *ptr) { +static void proc_main_cleanup(void *ptr) { struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; - info("PROC Plugin thread created with task id %d", gettid()); + info("cleaning up..."); - if(pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0) - error("Cannot set pthread cancel type to DEFERRED."); + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} - if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0) - error("Cannot set pthread cancel state to ENABLE."); +void *proc_main(void *ptr) { + netdata_thread_cleanup_push(proc_main_cleanup, ptr); int vdo_cpu_netdata = config_get_boolean("plugin:proc", "netdata server resources", 1); @@ -88,7 +92,7 @@ void *proc_main(void *ptr) { heartbeat_t hb; heartbeat_init(&hb); - for(;;) { + while(!netdata_exit) { usec_t hb_dt = heartbeat_next(&hb, step); usec_t duration = 0ULL; @@ -158,10 +162,7 @@ void *proc_main(void *ptr) { } } - info("PROC thread exiting"); - - static_thread->enabled = 0; - pthread_exit(NULL); + netdata_thread_cleanup_pop(1); return NULL; } diff --git a/src/plugin_proc.h b/src/plugin_proc.h index fa5675440..a7f9b4e38 100644 --- a/src/plugin_proc.h +++ b/src/plugin_proc.h @@ -26,6 +26,7 @@ extern int do_proc_uptime(int update_every, usec_t dt); extern int do_proc_sys_devices_system_edac_mc(int update_every, usec_t dt); extern int do_proc_sys_devices_system_node(int update_every, usec_t dt); extern int do_proc_spl_kstat_zfs_arcstats(int update_every, usec_t dt); +extern int do_sys_fs_btrfs(int update_every, usec_t dt); extern int do_proc_net_sockstat(int update_every, usec_t dt); extern int do_proc_net_sockstat6(int update_every, usec_t dt); diff --git a/src/plugin_proc_diskspace.c b/src/plugin_proc_diskspace.c index e41e76182..0a229f38e 100644 --- a/src/plugin_proc_diskspace.c +++ b/src/plugin_proc_diskspace.c @@ -1,6 +1,6 @@ #include "common.h" -#define DELAULT_EXLUDED_PATHS "/proc/* /sys/* /var/run/user/* /run/user/* /snap/* /var/lib/docker/*" +#define DELAULT_EXCLUDED_PATHS "/proc/* /sys/* /var/run/user/* /run/user/* /snap/* /var/lib/docker/*" #define DEFAULT_EXCLUDED_FILESYSTEMS "" #define CONFIG_SECTION_DISKSPACE "plugin:proc:diskspace" @@ -13,8 +13,8 @@ static inline void mountinfo_reload(int force) { time_t now = now_realtime_sec(); if(force || now - last_loaded >= check_for_new_mountpoints_every) { - // mountinfo_free() can be called with NULL disk_mountinfo_root - mountinfo_free(disk_mountinfo_root); + // mountinfo_free_all() can be called with NULL disk_mountinfo_root + mountinfo_free_all(disk_mountinfo_root); // re-read mountinfo in case something changed disk_mountinfo_root = mountinfo_read(0); @@ -46,7 +46,7 @@ struct mount_point_metadata { static DICTIONARY *dict_mountpoints = NULL; -#define rrdset_obsolete_and_pointer_null(st) do { if(st) { rrdset_is_obsolete(st); st = NULL; } } while(st) +#define rrdset_obsolete_and_pointer_null(st) do { if(st) { rrdset_is_obsolete(st); (st) = NULL; } } while(st) int mount_point_cleanup(void *entry, void *data) { (void)data; @@ -96,13 +96,15 @@ static inline void do_disk_space_stats(struct mountinfo *mi, int update_every) { } excluded_mountpoints = simple_pattern_create( - config_get(CONFIG_SECTION_DISKSPACE, "exclude space metrics on paths", DELAULT_EXLUDED_PATHS), - mode + config_get(CONFIG_SECTION_DISKSPACE, "exclude space metrics on paths", DELAULT_EXCLUDED_PATHS) + , NULL + , mode ); excluded_filesystems = simple_pattern_create( - config_get(CONFIG_SECTION_DISKSPACE, "exclude space metrics on filesystems", DEFAULT_EXCLUDED_FILESYSTEMS), - SIMPLE_PATTERN_EXACT + config_get(CONFIG_SECTION_DISKSPACE, "exclude space metrics on filesystems", DEFAULT_EXCLUDED_FILESYSTEMS) + , NULL + , SIMPLE_PATTERN_EXACT ); dict_mountpoints = dictionary_create(DICTIONARY_FLAG_SINGLE_THREADED); @@ -326,16 +328,17 @@ static inline void do_disk_space_stats(struct mountinfo *mi, int update_every) { m->collected++; } -void *proc_diskspace_main(void *ptr) { +static void diskspace_main_cleanup(void *ptr) { struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; - info("DISKSPACE thread created with task id %d", gettid()); + info("cleaning up..."); - if(pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0) - error("DISKSPACE: Cannot set pthread cancel type to DEFERRED."); + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} - if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0) - error("DISKSPACE: Cannot set pthread cancel state to ENABLE."); +void *proc_diskspace_main(void *ptr) { + netdata_thread_cleanup_push(diskspace_main_cleanup, ptr); int vdo_cpu_netdata = config_get_boolean("plugin:proc", "netdata server resources", 1); @@ -355,7 +358,7 @@ void *proc_diskspace_main(void *ptr) { usec_t step = update_every * USEC_PER_SEC; heartbeat_t hb; heartbeat_init(&hb); - for(;;) { + while(!netdata_exit) { duration = heartbeat_dt_usec(&hb); /* usec_t hb_dt = */ heartbeat_next(&hb, step); @@ -452,9 +455,6 @@ void *proc_diskspace_main(void *ptr) { } } - info("DISKSPACE thread exiting"); - - static_thread->enabled = 0; - pthread_exit(NULL); + netdata_thread_cleanup_pop(1); return NULL; } diff --git a/src/plugin_tc.c b/src/plugin_tc.c index 0d8fd3d6b..4b6d84e11 100644 --- a/src/plugin_tc.c +++ b/src/plugin_tc.c @@ -828,17 +828,32 @@ static inline void tc_split_words(char *str, char **words, int max_words) { while(i < max_words) words[i++] = NULL; } -volatile pid_t tc_child_pid = 0; -void *tc_main(void *ptr) { +static pid_t tc_child_pid = 0; + +static void tc_main_cleanup(void *ptr) { struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; - info("TC thread created with task id %d", gettid()); + info("cleaning up..."); - if(pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0) - error("Cannot set pthread cancel type to DEFERRED."); + if(tc_child_pid) { + info("TC: killing with SIGTERM tc-qos-helper process %d", tc_child_pid); + if(killpid(tc_child_pid, SIGTERM) != -1) { + siginfo_t info; - if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0) - error("Cannot set pthread cancel state to ENABLE."); + info("TC: waiting for tc plugin child process pid %d to exit...", tc_child_pid); + waitid(P_PID, (id_t) tc_child_pid, &info, WEXITED); + // info("TC: finished tc plugin child process pid %d.", tc_child_pid); + } + + tc_child_pid = 0; + } + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +void *tc_main(void *ptr) { + netdata_thread_cleanup_push(tc_main_cleanup, ptr); struct rusage thread; @@ -863,10 +878,8 @@ void *tc_main(void *ptr) { snprintfz(buffer, TC_LINE_MAX, "%s/tc-qos-helper.sh", netdata_configured_plugins_dir); char *tc_script = config_get("plugin:tc", "script to run to get tc values", buffer); - - for(;1;) { - if(unlikely(netdata_exit)) break; + while(!netdata_exit) { FILE *fp; struct tc_device *device = NULL; struct tc_class *class = NULL; @@ -965,14 +978,10 @@ void *tc_main(void *ptr) { // debug(D_TC_LOOP, "END line"); if(likely(device)) { - if(pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL) != 0) - error("Cannot set pthread cancel state to DISABLE."); - + netdata_thread_disable_cancelability(); tc_device_commit(device); // tc_device_free(device); - - if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0) - error("Cannot set pthread cancel state to ENABLE."); + netdata_thread_enable_cancelability(); } device = NULL; @@ -1149,10 +1158,7 @@ void *tc_main(void *ptr) { sleep((unsigned int) localhost->rrd_update_every); } -cleanup: - info("TC thread exiting"); - - static_thread->enabled = 0; - pthread_exit(NULL); +cleanup: ; // added semi-colon to prevent older gcc error: label at end of compound statement + netdata_thread_cleanup_pop(1); return NULL; } diff --git a/src/plugin_tc.h b/src/plugin_tc.h index 9a0a19cce..792c6496f 100644 --- a/src/plugin_tc.h +++ b/src/plugin_tc.h @@ -1,7 +1,6 @@ #ifndef NETDATA_PLUGIN_TC_H #define NETDATA_PLUGIN_TC_H 1 -extern volatile pid_t tc_child_pid; extern void *tc_main(void *ptr); #endif /* NETDATA_PLUGIN_TC_H */ diff --git a/src/plugins_d.c b/src/plugins_d.c index d0f29f4d4..5693dda06 100644 --- a/src/plugins_d.c +++ b/src/plugins_d.c @@ -131,7 +131,7 @@ inline size_t pluginsd_process(RRDHOST *host, struct plugind *cd, FILE *fp, int clearerr(fp); if(unlikely(fileno(fp) == -1)) { - error("PLUGINSD: %s: file is not a valid stream.", cd->fullfilename); + error("file descriptor given is not a valid stream"); goto cleanup; } @@ -140,7 +140,7 @@ inline size_t pluginsd_process(RRDHOST *host, struct plugind *cd, FILE *fp, int char *r = fgets(line, PLUGINSD_LINE_MAX, fp); if(unlikely(!r)) { - error("PLUGINSD: %s : read failed.", cd->fullfilename); + error("read failed"); break; } @@ -148,12 +148,9 @@ inline size_t pluginsd_process(RRDHOST *host, struct plugind *cd, FILE *fp, int line[PLUGINSD_LINE_MAX] = '\0'; - // debug(D_PLUGINSD, "PLUGINSD: %s: %s", cd->filename, line); - int w = pluginsd_split_words(line, words, PLUGINSD_MAX_WORDS); char *s = words[0]; if(unlikely(!s || !*s || !w)) { - // debug(D_PLUGINSD, "PLUGINSD: empty line"); continue; } @@ -164,7 +161,7 @@ inline size_t pluginsd_process(RRDHOST *host, struct plugind *cd, FILE *fp, int char *value = words[2]; if(unlikely(!dimension || !*dimension)) { - error("PLUGINSD: '%s' is requesting a SET on chart '%s' of host '%s', without a dimension. Disabling it.", cd->fullfilename, st->id, host->hostname); + error("requested a SET on chart '%s' of host '%s', without a dimension. Disabling it.", st->id, host->hostname); enabled = 0; break; } @@ -172,17 +169,18 @@ inline size_t pluginsd_process(RRDHOST *host, struct plugind *cd, FILE *fp, int if(unlikely(!value || !*value)) value = NULL; if(unlikely(!st)) { - error("PLUGINSD: '%s' is requesting a SET on dimension %s with value %s on host '%s', without a BEGIN. Disabling it.", cd->fullfilename, dimension, value?value:"<nothing>", host->hostname); + error("requested a SET on dimension %s with value %s on host '%s', without a BEGIN. Disabling it.", dimension, value?value:"<nothing>", host->hostname); enabled = 0; break; } - if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_DEBUG))) debug(D_PLUGINSD, "PLUGINSD: '%s' is setting dimension %s/%s to %s", cd->fullfilename, st->id, dimension, value?value:"<nothing>"); + if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_DEBUG))) + debug(D_PLUGINSD, "is setting dimension %s/%s to %s", st->id, dimension, value?value:"<nothing>"); if(value) { RRDDIM *rd = rrddim_find(st, dimension); if(unlikely(!rd)) { - error("PLUGINSD: '%s' is requesting a SET to dimension with id '%s' on stats '%s' (%s) on host '%s', which does not exist. Disabling it.", cd->fullfilename, dimension, st->name, st->id, st->rrdhost->hostname); + error("requested a SET to dimension with id '%s' on stats '%s' (%s) on host '%s', which does not exist. Disabling it.", dimension, st->name, st->id, st->rrdhost->hostname); enabled = 0; break; } @@ -195,14 +193,14 @@ inline size_t pluginsd_process(RRDHOST *host, struct plugind *cd, FILE *fp, int char *microseconds_txt = words[2]; if(unlikely(!id)) { - error("PLUGINSD: '%s' is requesting a BEGIN without a chart id for host '%s'. Disabling it.", cd->fullfilename, host->hostname); + error("requested a BEGIN without a chart id for host '%s'. Disabling it.", host->hostname); enabled = 0; break; } st = rrdset_find(host, id); if(unlikely(!st)) { - error("PLUGINSD: '%s' is requesting a BEGIN on chart '%s', which does not exist on host '%s'. Disabling it.", cd->fullfilename, id, host->hostname); + error("requested a BEGIN on chart '%s', which does not exist on host '%s'. Disabling it.", id, host->hostname); enabled = 0; break; } @@ -222,12 +220,13 @@ inline size_t pluginsd_process(RRDHOST *host, struct plugind *cd, FILE *fp, int } else if(likely(hash == END_HASH && !strcmp(s, PLUGINSD_KEYWORD_END))) { if(unlikely(!st)) { - error("PLUGINSD: '%s' is requesting an END, without a BEGIN on host '%s'. Disabling it.", cd->fullfilename, host->hostname); + error("requested an END, without a BEGIN on host '%s'. Disabling it.", host->hostname); enabled = 0; break; } - if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_DEBUG))) debug(D_PLUGINSD, "PLUGINSD: '%s' is requesting an END on chart %s", cd->fullfilename, st->id); + if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_DEBUG))) + debug(D_PLUGINSD, "requested an END on chart %s", st->id); rrdset_done(st); st = NULL; @@ -259,7 +258,7 @@ inline size_t pluginsd_process(RRDHOST *host, struct plugind *cd, FILE *fp, int // make sure we have the required variables if(unlikely(!type || !*type || !id || !*id)) { - error("PLUGINSD: '%s' is requesting a CHART, without a type.id, on host '%s'. Disabling it.", cd->fullfilename, host->hostname); + error("requested a CHART, without a type.id, on host '%s'. Disabling it.", host->hostname); enabled = 0; break; } @@ -295,7 +294,7 @@ inline size_t pluginsd_process(RRDHOST *host, struct plugind *cd, FILE *fp, int if(unlikely(!title)) title = ""; if(unlikely(!units)) units = "unknown"; - debug(D_PLUGINSD, "PLUGINSD: Creating chart type='%s', id='%s', name='%s', family='%s', context='%s', chart='%s', priority=%d, update_every=%d" + debug(D_PLUGINSD, "creating chart type='%s', id='%s', name='%s', family='%s', context='%s', chart='%s', priority=%d, update_every=%d" , type, id , name?name:"" , family?family:"" @@ -332,6 +331,11 @@ inline size_t pluginsd_process(RRDHOST *host, struct plugind *cd, FILE *fp, int else rrdset_flag_clear(st, RRDSET_FLAG_DETAIL); + if(strstr(options, "hidden")) + rrdset_flag_set(st, RRDSET_FLAG_HIDDEN); + else + rrdset_flag_clear(st, RRDSET_FLAG_HIDDEN); + if(strstr(options, "store_first")) rrdset_flag_set(st, RRDSET_FLAG_STORE_FIRST); else @@ -352,13 +356,13 @@ inline size_t pluginsd_process(RRDHOST *host, struct plugind *cd, FILE *fp, int char *options = words[6]; if(unlikely(!id || !*id)) { - error("PLUGINSD: '%s' is requesting a DIMENSION, without an id, host '%s' and chart '%s'. Disabling it.", cd->fullfilename, host->hostname, st?st->id:"UNSET"); + error("requested a DIMENSION, without an id, host '%s' and chart '%s'. Disabling it.", host->hostname, st?st->id:"UNSET"); enabled = 0; break; } if(unlikely(!st)) { - error("PLUGINSD: '%s' is requesting a DIMENSION, without a CHART, on host '%s'. Disabling it.", cd->fullfilename, host->hostname); + error("requested a DIMENSION, without a CHART, on host '%s'. Disabling it.", host->hostname); enabled = 0; break; } @@ -374,7 +378,7 @@ inline size_t pluginsd_process(RRDHOST *host, struct plugind *cd, FILE *fp, int if(unlikely(!algorithm || !*algorithm)) algorithm = "absolute"; if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_DEBUG))) - debug(D_PLUGINSD, "PLUGINSD: Creating dimension in chart %s, id='%s', name='%s', algorithm='%s', multiplier=%ld, divisor=%ld, hidden='%s'" + debug(D_PLUGINSD, "creating dimension in chart %s, id='%s', name='%s', algorithm='%s', multiplier=%ld, divisor=%ld, hidden='%s'" , st->id , id , name?name:"" @@ -412,7 +416,7 @@ inline size_t pluginsd_process(RRDHOST *host, struct plugind *cd, FILE *fp, int } if(unlikely(!name || !*name)) { - error("PLUGINSD: '%s' is requesting a VARIABLE on host '%s', without a variable name. Disabling it.", cd->fullfilename, host->hostname); + error("requested a VARIABLE on host '%s', without a variable name. Disabling it.", host->hostname); enabled = 0; break; } @@ -426,38 +430,38 @@ inline size_t pluginsd_process(RRDHOST *host, struct plugind *cd, FILE *fp, int if(unlikely(endptr && *endptr)) { if(endptr == value) - error("PLUGINSD: '%s': the value '%s' of VARIABLE '%s' on host '%s' cannot be parsed as a number", cd->fullfilename, value, name, host->hostname); + error("the value '%s' of VARIABLE '%s' on host '%s' cannot be parsed as a number", value, name, host->hostname); else - error("PLUGINSD: '%s': the value '%s' of VARIABLE '%s' on host '%s' has leftovers: '%s'", cd->fullfilename, value, name, host->hostname, endptr); + error("the value '%s' of VARIABLE '%s' on host '%s' has leftovers: '%s'", value, name, host->hostname, endptr); } if(global) { RRDVAR *rv = rrdvar_custom_host_variable_create(host, name); if (rv) rrdvar_custom_host_variable_set(host, rv, v); - else error("PLUGINSD: '%s': cannot find/create HOST VARIABLE '%s' on host '%s'", cd->fullfilename, name, host->hostname); + else error("cannot find/create HOST VARIABLE '%s' on host '%s'", name, host->hostname); } else if(st) { RRDSETVAR *rs = rrdsetvar_custom_chart_variable_create(st, name); if (rs) rrdsetvar_custom_chart_variable_set(rs, v); - else error("PLUGINSD: '%s': cannot find/create CHART VARIABLE '%s' on host '%s', chart '%s'", cd->fullfilename, name, host->hostname, st->id); + else error("cannot find/create CHART VARIABLE '%s' on host '%s', chart '%s'", name, host->hostname, st->id); } else - error("PLUGINSD: '%s': cannot find/create CHART VARIABLE '%s' on host '%s' without a chart", cd->fullfilename, name, host->hostname); + error("cannot find/create CHART VARIABLE '%s' on host '%s' without a chart", name, host->hostname); } else - error("PLUGINSD: '%s': cannot set %s VARIABLE '%s' on host '%s' to an empty value", cd->fullfilename, (global)?"HOST":"CHART", name, host->hostname); + error("cannot set %s VARIABLE '%s' on host '%s' to an empty value", (global)?"HOST":"CHART", name, host->hostname); } else if(likely(hash == FLUSH_HASH && !strcmp(s, PLUGINSD_KEYWORD_FLUSH))) { - debug(D_PLUGINSD, "PLUGINSD: '%s' is requesting a FLUSH", cd->fullfilename); + debug(D_PLUGINSD, "requested a FLUSH"); st = NULL; } else if(unlikely(hash == DISABLE_HASH && !strcmp(s, PLUGINSD_KEYWORD_DISABLE))) { - info("PLUGINSD: '%s' called DISABLE. Disabling it.", cd->fullfilename); + info("called DISABLE. Disabling it."); enabled = 0; break; } else { - error("PLUGINSD: '%s' is sending command '%s' which is not known by netdata, for host '%s'. Disabling it.", cd->fullfilename, s, host->hostname); + error("sent command '%s' which is not known by netdata, for host '%s'. Disabling it.", s, host->hostname); enabled = 0; break; } @@ -476,51 +480,66 @@ cleanup: return count; } +static void pluginsd_worker_thread_cleanup(void *arg) { + struct plugind *cd = (struct plugind *)arg; + + if(cd->enabled && !cd->obsolete) { + cd->obsolete = 1; + + info("data collection thread exiting"); + + if (cd->pid) { + siginfo_t info; + info("killing child process pid %d", cd->pid); + if (killpid(cd->pid, SIGTERM) != -1) { + info("waiting for child process pid %d to exit...", cd->pid); + waitid(P_PID, (id_t) cd->pid, &info, WEXITED); + } + cd->pid = 0; + } + } +} + void *pluginsd_worker_thread(void *arg) { + netdata_thread_cleanup_push(pluginsd_worker_thread_cleanup, arg); + struct plugind *cd = (struct plugind *)arg; - cd->obsolete = 0; + cd->obsolete = 0; size_t count = 0; - for(;;) { - if(unlikely(netdata_exit)) break; - + while(!netdata_exit) { FILE *fp = mypopen(cd->cmd, &cd->pid); if(unlikely(!fp)) { error("Cannot popen(\"%s\", \"r\").", cd->cmd); break; } - info("PLUGINSD: '%s' running on pid %d", cd->fullfilename, cd->pid); - + info("connected to '%s' running on pid %d", cd->fullfilename, cd->pid); count = pluginsd_process(localhost, cd, fp, 0); - error("PLUGINSD: plugin '%s' disconnected.", cd->fullfilename); - + error("'%s' (pid %d) disconnected after %zu successful data collections (ENDs).", cd->fullfilename, cd->pid, count); killpid(cd->pid, SIGTERM); - info("PLUGINSD: '%s' on pid %d stopped after %zu successful data collections (ENDs).", cd->fullfilename, cd->pid, count); - // get the return code int code = mypclose(fp, cd->pid); - if(unlikely(netdata_exit)) break; - else if(code != 0) { + if(code != 0) { // the plugin reports failure if(likely(!cd->successful_collections)) { // nothing collected - disable it - error("PLUGINSD: '%s' exited with error code %d. Disabling it.", cd->fullfilename, code); + error("'%s' (pid %d) exited with error code %d. Disabling it.", cd->fullfilename, cd->pid, code); cd->enabled = 0; } else { // we have collected something if(likely(cd->serial_failures <= 10)) { - error("PLUGINSD: '%s' exited with error code %d, but has given useful output in the past (%zu times). %s", cd->fullfilename, code, cd->successful_collections, cd->enabled?"Waiting a bit before starting it again.":"Will not start it again - it is disabled."); + error("'%s' (pid %d) exited with error code %d, but has given useful output in the past (%zu times). %s", cd->fullfilename, cd->pid, code, cd->successful_collections, cd->enabled?"Waiting a bit before starting it again.":"Will not start it again - it is disabled."); sleep((unsigned int) (cd->update_every * 10)); } else { - error("PLUGINSD: '%s' exited with error code %d, but has given useful output in the past (%zu times). We tried %zu times to restart it, but it failed to generate data. Disabling it.", cd->fullfilename, code, cd->successful_collections, cd->serial_failures); + error("'%s' (pid %d) exited with error code %d, but has given useful output in the past (%zu times). We tried %zu times to restart it, but it failed to generate data. Disabling it.", cd->fullfilename, cd->pid, code, cd->successful_collections, cd->serial_failures); cd->enabled = 0; } } @@ -532,11 +551,11 @@ void *pluginsd_worker_thread(void *arg) { // we have collected nothing so far if(likely(cd->serial_failures <= 10)) { - error("PLUGINSD: '%s' (pid %d) does not generate useful output but it reports success (exits with 0). %s.", cd->fullfilename, cd->pid, cd->enabled?"Waiting a bit before starting it again.":"Will not start it again - it is disabled."); + error("'%s' (pid %d) does not generate useful output but it reports success (exits with 0). %s.", cd->fullfilename, cd->pid, cd->enabled?"Waiting a bit before starting it again.":"Will not start it again - it is now disabled."); sleep((unsigned int) (cd->update_every * 10)); } else { - error("PLUGINSD: '%s' (pid %d) does not generate useful output, although it reports success (exits with 0), but we have tried %zu times to collect something. Disabling it.", cd->fullfilename, cd->pid, cd->serial_failures); + error("'%s' (pid %d) does not generate useful output, although it reports success (exits with 0), but we have tried %zu times to collect something. Disabling it.", cd->fullfilename, cd->pid, cd->serial_failures); cd->enabled = 0; } } @@ -548,23 +567,29 @@ void *pluginsd_worker_thread(void *arg) { if(unlikely(!cd->enabled)) break; } - info("PLUGINSD: '%s' thread exiting", cd->fullfilename); - - cd->obsolete = 1; - pthread_exit(NULL); + netdata_thread_cleanup_pop(1); return NULL; } -void *pluginsd_main(void *ptr) { - struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; +static void pluginsd_main_cleanup(void *data) { + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)data; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + info("cleaning up..."); - info("PLUGINS.D thread created with task id %d", gettid()); + struct plugind *cd; + for (cd = pluginsd_root; cd; cd = cd->next) { + if (cd->enabled && !cd->obsolete) { + info("stopping plugin thread: %s", cd->id); + netdata_thread_cancel(cd->thread); + } + } - if(pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0) - error("Cannot set pthread cancel type to DEFERRED."); + info("cleanup completed."); + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} - if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0) - error("Cannot set pthread cancel state to ENABLE."); +void *pluginsd_main(void *ptr) { + netdata_thread_cleanup_push(pluginsd_main_cleanup, ptr); int automatic_run = config_get_boolean(CONFIG_SECTION_PLUGINS, "enable running new plugins", 1); int scan_frequency = (int) config_get_number(CONFIG_SECTION_PLUGINS, "check for new plugins every", 60); @@ -574,9 +599,7 @@ void *pluginsd_main(void *ptr) { // so that we don't log broken directories on each loop int directory_errors[PLUGINSD_MAX_DIRECTORIES] = { 0 }; - for(;;) { - if(unlikely(netdata_exit)) break; - + while(!netdata_exit) { int idx; const char *directory_name; @@ -588,7 +611,7 @@ void *pluginsd_main(void *ptr) { if(unlikely(!dir)) { if(directory_errors[idx] != errno) { directory_errors[idx] = errno; - error("PLUGINSD: Cannot open plugins directory '%s'.", directory_name); + error("cannot open plugins directory '%s'", directory_name); } continue; } @@ -597,14 +620,14 @@ void *pluginsd_main(void *ptr) { while(likely((file = readdir(dir)))) { if(unlikely(netdata_exit)) break; - debug(D_PLUGINSD, "PLUGINSD: Examining file '%s'", file->d_name); + debug(D_PLUGINSD, "examining file '%s'", file->d_name); if(unlikely(strcmp(file->d_name, ".") == 0 || strcmp(file->d_name, "..") == 0)) continue; int len = (int) strlen(file->d_name); if(unlikely(len <= (int)PLUGINSD_FILE_SUFFIX_LEN)) continue; if(unlikely(strcmp(PLUGINSD_FILE_SUFFIX, &file->d_name[len - (int)PLUGINSD_FILE_SUFFIX_LEN]) != 0)) { - debug(D_PLUGINSD, "PLUGINSD: File '%s' does not end in '%s'.", file->d_name, PLUGINSD_FILE_SUFFIX); + debug(D_PLUGINSD, "file '%s' does not end in '%s'", file->d_name, PLUGINSD_FILE_SUFFIX); continue; } @@ -613,7 +636,7 @@ void *pluginsd_main(void *ptr) { int enabled = config_get_boolean(CONFIG_SECTION_PLUGINS, pluginname, automatic_run); if(unlikely(!enabled)) { - debug(D_PLUGINSD, "PLUGINSD: plugin '%s' is not enabled", file->d_name); + debug(D_PLUGINSD, "plugin '%s' is not enabled", file->d_name); continue; } @@ -623,7 +646,7 @@ void *pluginsd_main(void *ptr) { if(unlikely(strcmp(cd->filename, file->d_name) == 0)) break; if(likely(cd && !cd->obsolete)) { - debug(D_PLUGINSD, "PLUGINSD: plugin '%s' is already running", cd->filename); + debug(D_PLUGINSD, "plugin '%s' is already running", cd->filename); continue; } @@ -652,12 +675,10 @@ void *pluginsd_main(void *ptr) { cd->obsolete = 1; if(cd->enabled) { + char tag[NETDATA_THREAD_TAG_MAX + 1]; + snprintfz(tag, NETDATA_THREAD_TAG_MAX, "PLUGINSD[%s]", pluginname); // spawn a new thread for it - if(unlikely(pthread_create(&cd->thread, NULL, pluginsd_worker_thread, cd) != 0)) - error("PLUGINSD: failed to create new thread for plugin '%s'.", cd->filename); - - else if(unlikely(pthread_detach(cd->thread) != 0)) - error("PLUGINSD: Cannot request detach of newly created thread for plugin '%s'.", cd->filename); + netdata_thread_create(&cd->thread, tag, NETDATA_THREAD_OPTION_DEFAULT, pluginsd_worker_thread, cd); } } } @@ -668,31 +689,6 @@ void *pluginsd_main(void *ptr) { sleep((unsigned int) scan_frequency); } - info("PLUGINS.D thread exiting"); - - static_thread->enabled = 0; - pthread_exit(NULL); + netdata_thread_cleanup_pop(1); return NULL; } - - -void pluginsd_stop_all_external_plugins() { - siginfo_t info; - struct plugind *cd; - for(cd = pluginsd_root ; cd ; cd = cd->next) { - if(cd->enabled && !cd->obsolete) { - info("Stopping %s plugin thread", cd->id); - pthread_cancel(cd->thread); - - if(cd->pid) { - info("killing %s plugin child process pid %d", cd->id, cd->pid); - if(killpid(cd->pid, SIGTERM) != -1) - waitid(P_PID, (id_t) cd->pid, &info, WEXITED); - - cd->pid = 0; - } - - cd->obsolete = 1; - } - } -} diff --git a/src/plugins_d.h b/src/plugins_d.h index 4d708386f..692d7cae1 100644 --- a/src/plugins_d.h +++ b/src/plugins_d.h @@ -26,8 +26,8 @@ struct plugind { char fullfilename[FILENAME_MAX+1]; // with path char cmd[PLUGINSD_CMD_MAX+1]; // the command that it executes - pid_t pid; - pthread_t thread; + volatile pid_t pid; + netdata_thread_t thread; size_t successful_collections; // the number of times we have seen // values collected from this plugin @@ -36,8 +36,8 @@ struct plugind { // without collecting values int update_every; // the plugin default data collection frequency - volatile int obsolete; // do not touch this structure after setting this to 1 - volatile int enabled; // if this is enabled or not + volatile sig_atomic_t obsolete; // do not touch this structure after setting this to 1 + volatile sig_atomic_t enabled; // if this is enabled or not time_t started_t; @@ -47,7 +47,6 @@ struct plugind { extern struct plugind *pluginsd_root; extern void *pluginsd_main(void *ptr); -extern void pluginsd_stop_all_external_plugins(void); extern size_t pluginsd_process(RRDHOST *host, struct plugind *cd, FILE *fp, int trust_durations); extern int pluginsd_split_words(char *str, char **words, int max_words); diff --git a/src/popen.c b/src/popen.c index 27be61774..eda1c05b9 100644 --- a/src/popen.c +++ b/src/popen.c @@ -43,7 +43,7 @@ static void mypopen_del(FILE *fp) { #define PIPE_READ 0 #define PIPE_WRITE 1 -FILE *mypopen(const char *command, pid_t *pidptr) +FILE *mypopen(const char *command, volatile pid_t *pidptr) { int pipefd[2]; @@ -113,6 +113,42 @@ FILE *mypopen(const char *command, pid_t *pidptr) exit(1); } +FILE *mypopene(const char *command, volatile pid_t *pidptr, char **env) { + int pipefd[2]; + + if(pipe(pipefd) == -1) + return NULL; + + int pid = fork(); + if(pid == -1) { + close(pipefd[PIPE_READ]); + close(pipefd[PIPE_WRITE]); + return NULL; + } + if(pid != 0) { + // the parent + *pidptr = pid; + close(pipefd[PIPE_WRITE]); + FILE *fp = fdopen(pipefd[PIPE_READ], "r"); + return(fp); + } + // the child + + // close all files + int i; + for(i = (int) (sysconf(_SC_OPEN_MAX) - 1); i > 0; i--) + if(i != STDIN_FILENO && i != STDERR_FILENO && i != pipefd[PIPE_WRITE]) close(i); + + // move the pipe to stdout + if(pipefd[PIPE_WRITE] != STDOUT_FILENO) { + dup2(pipefd[PIPE_WRITE], STDOUT_FILENO); + close(pipefd[PIPE_WRITE]); + } + + execle("/bin/sh", "sh", "-c", command, NULL, env); + exit(1); +} + int mypclose(FILE *fp, pid_t pid) { debug(D_EXIT, "Request to mypclose() on pid %d", pid); @@ -126,6 +162,8 @@ int mypclose(FILE *fp, pid_t pid) { // close the pipe file pointer fclose(fp); + errno = 0; + siginfo_t info; if(waitid(P_PID, (id_t) pid, &info, WEXITED) != -1) { switch(info.si_code) { @@ -133,37 +171,30 @@ int mypclose(FILE *fp, pid_t pid) { if(info.si_status) error("child pid %d exited with code %d.", info.si_pid, info.si_status); return(info.si_status); - break; case CLD_KILLED: error("child pid %d killed by signal %d.", info.si_pid, info.si_status); return(-1); - break; case CLD_DUMPED: error("child pid %d core dumped by signal %d.", info.si_pid, info.si_status); return(-2); - break; case CLD_STOPPED: error("child pid %d stopped by signal %d.", info.si_pid, info.si_status); return(0); - break; case CLD_TRAPPED: error("child pid %d trapped by signal %d.", info.si_pid, info.si_status); return(-4); - break; case CLD_CONTINUED: error("child pid %d continued by signal %d.", info.si_pid, info.si_status); return(0); - break; default: error("child pid %d gave us a SIGCHLD with code %d and status %d.", info.si_pid, info.si_code, info.si_status); return(-5); - break; } } else diff --git a/src/popen.h b/src/popen.h index 90845e1fb..3dd79bb4d 100644 --- a/src/popen.h +++ b/src/popen.h @@ -4,7 +4,8 @@ #define PIPE_READ 0 #define PIPE_WRITE 1 -extern FILE *mypopen(const char *command, pid_t *pidptr); +extern FILE *mypopen(const char *command, volatile pid_t *pidptr); +extern FILE *mypopene(const char *command, volatile pid_t *pidptr, char **env); extern int mypclose(FILE *fp, pid_t pid); #endif /* NETDATA_POPEN_H */ diff --git a/src/proc_diskstats.c b/src/proc_diskstats.c index 866e49c77..8cde3334b 100644 --- a/src/proc_diskstats.c +++ b/src/proc_diskstats.c @@ -21,7 +21,6 @@ static struct disk { char *mount_point; // disk options caching - int configured; int do_io; int do_ops; int do_mops; @@ -29,9 +28,27 @@ static struct disk { int do_qops; int do_util; int do_backlog; + int do_bcache; int updated; + int device_is_bcache; + + char *bcache_filename_dirty_data; + char *bcache_filename_writeback_rate; + char *bcache_filename_cache_congested; + char *bcache_filename_cache_available_percent; + char *bcache_filename_stats_five_minute_cache_hit_ratio; + char *bcache_filename_stats_hour_cache_hit_ratio; + char *bcache_filename_stats_day_cache_hit_ratio; + char *bcache_filename_stats_total_cache_hit_ratio; + char *bcache_filename_stats_total_cache_hits; + char *bcache_filename_stats_total_cache_misses; + char *bcache_filename_stats_total_cache_miss_collisions; + char *bcache_filename_stats_total_cache_bypass_hits; + char *bcache_filename_stats_total_cache_bypass_misses; + char *bcache_filename_stats_total_cache_readaheads; + RRDSET *st_io; RRDDIM *rd_io_reads; RRDDIM *rd_io_writes; @@ -68,21 +85,118 @@ static struct disk { RRDSET *st_svctm; RRDDIM *rd_svctm_svctm; + RRDSET *st_bcache_size; + RRDDIM *rd_bcache_dirty_size; + + RRDSET *st_bcache_usage; + RRDDIM *rd_bcache_available_percent; + + RRDSET *st_bcache_hit_ratio; + RRDDIM *rd_bcache_hit_ratio_5min; + RRDDIM *rd_bcache_hit_ratio_1hour; + RRDDIM *rd_bcache_hit_ratio_1day; + RRDDIM *rd_bcache_hit_ratio_total; + + RRDSET *st_bcache; + RRDDIM *rd_bcache_hits; + RRDDIM *rd_bcache_misses; + RRDDIM *rd_bcache_miss_collisions; + + RRDSET *st_bcache_bypass; + RRDDIM *rd_bcache_bypass_hits; + RRDDIM *rd_bcache_bypass_misses; + + RRDSET *st_bcache_rates; + RRDDIM *rd_bcache_rate_congested; + RRDDIM *rd_bcache_readaheads; + RRDDIM *rd_bcache_rate_writeback; + struct disk *next; } *disk_root = NULL; -#define rrdset_obsolete_and_pointer_null(st) do { if(st) { rrdset_is_obsolete(st); st = NULL; } } while(st) +#define rrdset_obsolete_and_pointer_null(st) do { if(st) { rrdset_is_obsolete(st); (st) = NULL; } } while(st) -static char *path_to_get_hw_sector_size = NULL; -static char *path_to_get_hw_sector_size_partitions = NULL; +// static char *path_to_get_hw_sector_size = NULL; +// static char *path_to_get_hw_sector_size_partitions = NULL; static char *path_to_sys_dev_block_major_minor_string = NULL; static char *path_to_sys_block_device = NULL; +static char *path_to_sys_block_device_bcache = NULL; static char *path_to_sys_devices_virtual_block_device = NULL; static char *path_to_device_mapper = NULL; static char *path_to_device_label = NULL; static char *path_to_device_id = NULL; static int name_disks_by_id = CONFIG_BOOLEAN_NO; +static int global_enable_new_disks_detected_at_runtime = CONFIG_BOOLEAN_YES, + global_enable_performance_for_physical_disks = CONFIG_BOOLEAN_AUTO, + global_enable_performance_for_virtual_disks = CONFIG_BOOLEAN_AUTO, + global_enable_performance_for_partitions = CONFIG_BOOLEAN_NO, + global_do_io = CONFIG_BOOLEAN_AUTO, + global_do_ops = CONFIG_BOOLEAN_AUTO, + global_do_mops = CONFIG_BOOLEAN_AUTO, + global_do_iotime = CONFIG_BOOLEAN_AUTO, + global_do_qops = CONFIG_BOOLEAN_AUTO, + global_do_util = CONFIG_BOOLEAN_AUTO, + global_do_backlog = CONFIG_BOOLEAN_AUTO, + global_do_bcache = CONFIG_BOOLEAN_AUTO, + globals_initialized = 0, + global_cleanup_removed_disks = 1; + +static SIMPLE_PATTERN *excluded_disks = NULL; + +static unsigned long long int bcache_read_number_with_units(const char *filename) { + char buffer[50 + 1]; + if(read_file(filename, buffer, 50) == 0) { + static int unknown_units_error = 10; + + char *end = NULL; + long double value = str2ld(buffer, &end); + if(end && *end) { + if(*end == 'k') + return (unsigned long long int)(value * 1024.0); + else if(*end == 'M') + return (unsigned long long int)(value * 1024.0 * 1024.0); + else if(*end == 'G') + return (unsigned long long int)(value * 1024.0 * 1024.0 * 1024.0); + else if(unknown_units_error > 0) { + error("bcache file '%s' provides value '%s' with unknown units '%s'", filename, buffer, end); + unknown_units_error--; + } + } + + return (unsigned long long int)value; + } + + return 0; +} + +static inline int is_major_enabled(int major) { + static int8_t *major_configs = NULL; + static size_t major_size = 0; + + if(major < 0) return 1; + + size_t wanted_size = (size_t)major + 1; + + if(major_size < wanted_size) { + major_configs = reallocz(major_configs, wanted_size * sizeof(int8_t)); + + size_t i; + for(i = major_size; i < wanted_size ; i++) + major_configs[i] = -1; + + major_size = wanted_size; + } + + if(major_configs[major] == -1) { + char buffer[CONFIG_MAX_NAME + 1]; + snprintfz(buffer, CONFIG_MAX_NAME, "performance metrics for disks with major %d", major); + major_configs[major] = (char)config_get_boolean(CONFIG_SECTION_DISKSTATS, buffer, 1); + } + + return (int)major_configs[major]; +} + static inline int get_disk_name_from_path(const char *path, char *result, size_t result_size, unsigned long major, unsigned long minor, char *disk) { char filename[FILENAME_MAX + 1]; int found = 0; @@ -160,6 +274,103 @@ static inline char *get_disk_name(unsigned long major, unsigned long minor, char return strdup(result); } +static void get_disk_config(struct disk *d) { + int def_enable = global_enable_new_disks_detected_at_runtime; + + if(def_enable != CONFIG_BOOLEAN_NO && (simple_pattern_matches(excluded_disks, d->device) || simple_pattern_matches(excluded_disks, d->disk))) + def_enable = CONFIG_BOOLEAN_NO; + + char var_name[4096 + 1]; + snprintfz(var_name, 4096, "plugin:proc:/proc/diskstats:%s", d->disk); + + def_enable = config_get_boolean_ondemand(var_name, "enable", def_enable); + if(unlikely(def_enable == CONFIG_BOOLEAN_NO)) { + // the user does not want any metrics for this disk + d->do_io = CONFIG_BOOLEAN_NO; + d->do_ops = CONFIG_BOOLEAN_NO; + d->do_mops = CONFIG_BOOLEAN_NO; + d->do_iotime = CONFIG_BOOLEAN_NO; + d->do_qops = CONFIG_BOOLEAN_NO; + d->do_util = CONFIG_BOOLEAN_NO; + d->do_backlog = CONFIG_BOOLEAN_NO; + d->do_bcache = CONFIG_BOOLEAN_NO; + } + else { + // this disk is enabled + // check its direct settings + + int def_performance = CONFIG_BOOLEAN_AUTO; + + // since this is 'on demand' we can figure the performance settings + // based on the type of disk + + if(!d->device_is_bcache) { + switch(d->type) { + default: + case DISK_TYPE_UNKNOWN: + break; + + case DISK_TYPE_PHYSICAL: + def_performance = global_enable_performance_for_physical_disks; + break; + + case DISK_TYPE_PARTITION: + def_performance = global_enable_performance_for_partitions; + break; + + case DISK_TYPE_VIRTUAL: + def_performance = global_enable_performance_for_virtual_disks; + break; + } + } + + // check if we have to disable performance for this disk + if(def_performance) + def_performance = is_major_enabled((int)d->major); + + // ------------------------------------------------------------ + // now we have def_performance and def_space + // to work further + + // def_performance + // check the user configuration (this will also show our 'on demand' decision) + def_performance = config_get_boolean_ondemand(var_name, "enable performance metrics", def_performance); + + int ddo_io = CONFIG_BOOLEAN_NO, + ddo_ops = CONFIG_BOOLEAN_NO, + ddo_mops = CONFIG_BOOLEAN_NO, + ddo_iotime = CONFIG_BOOLEAN_NO, + ddo_qops = CONFIG_BOOLEAN_NO, + ddo_util = CONFIG_BOOLEAN_NO, + ddo_backlog = CONFIG_BOOLEAN_NO, + ddo_bcache = CONFIG_BOOLEAN_NO; + + // we enable individual performance charts only when def_performance is not disabled + if(unlikely(def_performance != CONFIG_BOOLEAN_NO)) { + ddo_io = global_do_io, + ddo_ops = global_do_ops, + ddo_mops = global_do_mops, + ddo_iotime = global_do_iotime, + ddo_qops = global_do_qops, + ddo_util = global_do_util, + ddo_backlog = global_do_backlog, + ddo_bcache = global_do_bcache; + } + + d->do_io = config_get_boolean_ondemand(var_name, "bandwidth", ddo_io); + d->do_ops = config_get_boolean_ondemand(var_name, "operations", ddo_ops); + d->do_mops = config_get_boolean_ondemand(var_name, "merged operations", ddo_mops); + d->do_iotime = config_get_boolean_ondemand(var_name, "i/o time", ddo_iotime); + d->do_qops = config_get_boolean_ondemand(var_name, "queued operations", ddo_qops); + d->do_util = config_get_boolean_ondemand(var_name, "utilization percentage", ddo_util); + d->do_backlog = config_get_boolean_ondemand(var_name, "backlog", ddo_backlog); + + if(d->device_is_bcache) + d->do_bcache = config_get_boolean_ondemand(var_name, "bcache", ddo_bcache); + else + d->do_bcache = 0; + } +} static struct disk *get_disk(unsigned long major, unsigned long minor, char *disk) { static struct mountinfo *disk_mountinfo_root = NULL; @@ -183,7 +394,6 @@ static struct disk *get_disk(unsigned long major, unsigned long minor, char *dis d->major = major; d->minor = minor; d->type = DISK_TYPE_UNKNOWN; // Default type. Changed later if not correct. - d->configured = 0; d->sector_size = 512; // the default, will be changed below d->next = NULL; @@ -249,8 +459,8 @@ static struct disk *get_disk(unsigned long major, unsigned long minor, char *dis // mountinfo_find() can be called with NULL disk_mountinfo_root struct mountinfo *mi = mountinfo_find(disk_mountinfo_root, d->major, d->minor); if(unlikely(!mi)) { - // mountinfo_free can be called with NULL - mountinfo_free(disk_mountinfo_root); + // mountinfo_free_all can be called with NULL + mountinfo_free_all(disk_mountinfo_root); disk_mountinfo_root = mountinfo_read(0); mi = mountinfo_find(disk_mountinfo_root, d->major, d->minor); } @@ -263,6 +473,9 @@ static struct disk *get_disk(unsigned long major, unsigned long minor, char *dis // ------------------------------------------------------------------------ // find the disk sector size + /* + * sector size is always 512 bytes inside the kernel #3481 + * { char tf[FILENAME_MAX + 1], *t; strncpyz(tf, d->device, FILENAME_MAX); @@ -294,52 +507,110 @@ static struct disk *get_disk(unsigned long major, unsigned long minor, char *dis } else error("Cannot read sector size for device %s from %s. Assuming 512.", d->device, buffer); } + */ - return d; -} + // ------------------------------------------------------------------------ + // check if the device is a bcache -static inline int is_major_enabled(int major) { - static int8_t *major_configs = NULL; - static size_t major_size = 0; + struct stat bcache; + snprintfz(buffer, FILENAME_MAX, path_to_sys_block_device_bcache, disk); + if(unlikely(stat(buffer, &bcache) == 0 && (bcache.st_mode & S_IFMT) == S_IFDIR)) { + // we have the 'bcache' directory + d->device_is_bcache = 1; - if(major < 0) return 1; + char buffer2[FILENAME_MAX + 1]; - size_t wanted_size = (size_t)major + 1; + snprintfz(buffer2, FILENAME_MAX, "%s/cache/congested", buffer); + if(access(buffer2, R_OK) == 0) + d->bcache_filename_cache_congested = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); - if(major_size < wanted_size) { - major_configs = reallocz(major_configs, wanted_size * sizeof(int8_t)); + snprintfz(buffer2, FILENAME_MAX, "%s/readahead", buffer); + if(access(buffer2, R_OK) == 0) + d->bcache_filename_stats_total_cache_readaheads = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); - size_t i; - for(i = major_size; i < wanted_size ; i++) - major_configs[i] = -1; + snprintfz(buffer2, FILENAME_MAX, "%s/dirty_data", buffer); + if(access(buffer2, R_OK) == 0) + d->bcache_filename_dirty_data = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); - major_size = wanted_size; - } + snprintfz(buffer2, FILENAME_MAX, "%s/writeback_rate", buffer); + if(access(buffer2, R_OK) == 0) + d->bcache_filename_writeback_rate = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); - if(major_configs[major] == -1) { - char buffer[CONFIG_MAX_NAME + 1]; - snprintfz(buffer, CONFIG_MAX_NAME, "performance metrics for disks with major %d", major); - major_configs[major] = (char)config_get_boolean(CONFIG_SECTION_DISKSTATS, buffer, 1); + snprintfz(buffer2, FILENAME_MAX, "%s/cache/cache_available_percent", buffer); + if(access(buffer2, R_OK) == 0) + d->bcache_filename_cache_available_percent = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); + + snprintfz(buffer2, FILENAME_MAX, "%s/stats_total/cache_hits", buffer); + if(access(buffer2, R_OK) == 0) + d->bcache_filename_stats_total_cache_hits = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); + + snprintfz(buffer2, FILENAME_MAX, "%s/stats_five_minute/cache_hit_ratio", buffer); + if(access(buffer2, R_OK) == 0) + d->bcache_filename_stats_five_minute_cache_hit_ratio = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); + + snprintfz(buffer2, FILENAME_MAX, "%s/stats_hour/cache_hit_ratio", buffer); + if(access(buffer2, R_OK) == 0) + d->bcache_filename_stats_hour_cache_hit_ratio = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); + + snprintfz(buffer2, FILENAME_MAX, "%s/stats_day/cache_hit_ratio", buffer); + if(access(buffer2, R_OK) == 0) + d->bcache_filename_stats_day_cache_hit_ratio = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); + + snprintfz(buffer2, FILENAME_MAX, "%s/stats_total/cache_hit_ratio", buffer); + if(access(buffer2, R_OK) == 0) + d->bcache_filename_stats_total_cache_hit_ratio = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); + + snprintfz(buffer2, FILENAME_MAX, "%s/stats_total/cache_misses", buffer); + if(access(buffer2, R_OK) == 0) + d->bcache_filename_stats_total_cache_misses = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); + + snprintfz(buffer2, FILENAME_MAX, "%s/stats_total/cache_bypass_hits", buffer); + if(access(buffer2, R_OK) == 0) + d->bcache_filename_stats_total_cache_bypass_hits = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); + + snprintfz(buffer2, FILENAME_MAX, "%s/stats_total/cache_bypass_misses", buffer); + if(access(buffer2, R_OK) == 0) + d->bcache_filename_stats_total_cache_bypass_misses = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); + + snprintfz(buffer2, FILENAME_MAX, "%s/stats_total/cache_miss_collisions", buffer); + if(access(buffer2, R_OK) == 0) + d->bcache_filename_stats_total_cache_miss_collisions = strdupz(buffer2); + else + error("bcache file '%s' cannot be read.", buffer2); } - return (int)major_configs[major]; + get_disk_config(d); + return d; } int do_proc_diskstats(int update_every, usec_t dt) { static procfile *ff = NULL; - static int global_enable_new_disks_detected_at_runtime = CONFIG_BOOLEAN_YES, - global_enable_performance_for_physical_disks = CONFIG_BOOLEAN_AUTO, - global_enable_performance_for_virtual_disks = CONFIG_BOOLEAN_AUTO, - global_enable_performance_for_partitions = CONFIG_BOOLEAN_NO, - global_do_io = CONFIG_BOOLEAN_AUTO, - global_do_ops = CONFIG_BOOLEAN_AUTO, - global_do_mops = CONFIG_BOOLEAN_AUTO, - global_do_iotime = CONFIG_BOOLEAN_AUTO, - global_do_qops = CONFIG_BOOLEAN_AUTO, - global_do_util = CONFIG_BOOLEAN_AUTO, - global_do_backlog = CONFIG_BOOLEAN_AUTO, - globals_initialized = 0, - global_cleanup_removed_disks = 1; if(unlikely(!globals_initialized)) { globals_initialized = 1; @@ -356,6 +627,7 @@ int do_proc_diskstats(int update_every, usec_t dt) { global_do_qops = config_get_boolean_ondemand(CONFIG_SECTION_DISKSTATS, "queued operations for all disks", global_do_qops); global_do_util = config_get_boolean_ondemand(CONFIG_SECTION_DISKSTATS, "utilization percentage for all disks", global_do_util); global_do_backlog = config_get_boolean_ondemand(CONFIG_SECTION_DISKSTATS, "backlog for all disks", global_do_backlog); + global_do_bcache = config_get_boolean_ondemand(CONFIG_SECTION_DISKSTATS, "bcache for all disks", global_do_bcache); global_cleanup_removed_disks = config_get_boolean(CONFIG_SECTION_DISKSTATS, "remove charts of removed disks" , global_cleanup_removed_disks); @@ -364,17 +636,20 @@ int do_proc_diskstats(int update_every, usec_t dt) { snprintfz(buffer, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/block/%s"); path_to_sys_block_device = config_get(CONFIG_SECTION_DISKSTATS, "path to get block device", buffer); + snprintfz(buffer, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/block/%s/bcache"); + path_to_sys_block_device_bcache = config_get(CONFIG_SECTION_DISKSTATS, "path to get block device bcache", buffer); + snprintfz(buffer, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/devices/virtual/block/%s"); path_to_sys_devices_virtual_block_device = config_get(CONFIG_SECTION_DISKSTATS, "path to get virtual block device", buffer); snprintfz(buffer, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/dev/block/%lu:%lu/%s"); path_to_sys_dev_block_major_minor_string = config_get(CONFIG_SECTION_DISKSTATS, "path to get block device infos", buffer); - snprintfz(buffer, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/block/%s/queue/hw_sector_size"); - path_to_get_hw_sector_size = config_get(CONFIG_SECTION_DISKSTATS, "path to get h/w sector size", buffer); + //snprintfz(buffer, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/block/%s/queue/hw_sector_size"); + //path_to_get_hw_sector_size = config_get(CONFIG_SECTION_DISKSTATS, "path to get h/w sector size", buffer); - snprintfz(buffer, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/dev/block/%lu:%lu/subsystem/%s/../queue/hw_sector_size"); - path_to_get_hw_sector_size_partitions = config_get(CONFIG_SECTION_DISKSTATS, "path to get h/w sector size for partitions", buffer); + //snprintfz(buffer, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/dev/block/%lu:%lu/subsystem/%s/../queue/hw_sector_size"); + //path_to_get_hw_sector_size_partitions = config_get(CONFIG_SECTION_DISKSTATS, "path to get h/w sector size for partitions", buffer); snprintfz(buffer, FILENAME_MAX, "%s/dev/mapper", netdata_configured_host_prefix); path_to_device_mapper = config_get(CONFIG_SECTION_DISKSTATS, "path to device mapper", buffer); @@ -386,6 +661,12 @@ int do_proc_diskstats(int update_every, usec_t dt) { path_to_device_id = config_get(CONFIG_SECTION_DISKSTATS, "path to /dev/disk/by-id", buffer); name_disks_by_id = config_get_boolean(CONFIG_SECTION_DISKSTATS, "name disks by id", name_disks_by_id); + + excluded_disks = simple_pattern_create( + config_get(CONFIG_SECTION_DISKSTATS, "exclude disks", DEFAULT_EXCLUDED_DISKS) + , NULL + , SIMPLE_PATTERN_EXACT + ); } // -------------------------------------------------------------------------- @@ -479,6 +760,9 @@ int do_proc_diskstats(int update_every, usec_t dt) { struct disk *d = get_disk(major, minor, disk); d->updated = 1; + // -------------------------------------------------------------------------- + // count the global system disk I/O of physical disks + if(unlikely(d->type == DISK_TYPE_PHYSICAL)) { system_read_kb += readsectors * d->sector_size / 1024; system_write_kb += writesectors * d->sector_size / 1024; @@ -492,108 +776,6 @@ int do_proc_diskstats(int update_every, usec_t dt) { // -------------------------------------------------------------------------- - // Check the configuration for the device - - if(unlikely(!d->configured)) { - d->configured = 1; - - static SIMPLE_PATTERN *excluded_disks = NULL; - - if(unlikely(!excluded_disks)) { - excluded_disks = simple_pattern_create( - config_get(CONFIG_SECTION_DISKSTATS, "exclude disks", DEFAULT_EXCLUDED_DISKS), - SIMPLE_PATTERN_EXACT - ); - } - - int def_enable = global_enable_new_disks_detected_at_runtime; - - if(def_enable != CONFIG_BOOLEAN_NO && (simple_pattern_matches(excluded_disks, d->device) || simple_pattern_matches(excluded_disks, d->disk))) - def_enable = CONFIG_BOOLEAN_NO; - - char var_name[4096 + 1]; - snprintfz(var_name, 4096, "plugin:proc:/proc/diskstats:%s", d->disk); - - def_enable = config_get_boolean_ondemand(var_name, "enable", def_enable); - if(unlikely(def_enable == CONFIG_BOOLEAN_NO)) { - // the user does not want any metrics for this disk - d->do_io = CONFIG_BOOLEAN_NO; - d->do_ops = CONFIG_BOOLEAN_NO; - d->do_mops = CONFIG_BOOLEAN_NO; - d->do_iotime = CONFIG_BOOLEAN_NO; - d->do_qops = CONFIG_BOOLEAN_NO; - d->do_util = CONFIG_BOOLEAN_NO; - d->do_backlog = CONFIG_BOOLEAN_NO; - } - else { - // this disk is enabled - // check its direct settings - - int def_performance = CONFIG_BOOLEAN_AUTO; - - // since this is 'on demand' we can figure the performance settings - // based on the type of disk - - switch(d->type) { - default: - case DISK_TYPE_UNKNOWN: - break; - - case DISK_TYPE_PHYSICAL: - def_performance = global_enable_performance_for_physical_disks; - break; - - case DISK_TYPE_PARTITION: - def_performance = global_enable_performance_for_partitions; - break; - - case DISK_TYPE_VIRTUAL: - def_performance = global_enable_performance_for_virtual_disks; - break; - } - - // check if we have to disable performance for this disk - if(def_performance) - def_performance = is_major_enabled((int)major); - - // ------------------------------------------------------------ - // now we have def_performance and def_space - // to work further - - // def_performance - // check the user configuration (this will also show our 'on demand' decision) - def_performance = config_get_boolean_ondemand(var_name, "enable performance metrics", def_performance); - - int ddo_io = CONFIG_BOOLEAN_NO, - ddo_ops = CONFIG_BOOLEAN_NO, - ddo_mops = CONFIG_BOOLEAN_NO, - ddo_iotime = CONFIG_BOOLEAN_NO, - ddo_qops = CONFIG_BOOLEAN_NO, - ddo_util = CONFIG_BOOLEAN_NO, - ddo_backlog = CONFIG_BOOLEAN_NO; - - // we enable individual performance charts only when def_performance is not disabled - if(unlikely(def_performance != CONFIG_BOOLEAN_NO)) { - ddo_io = global_do_io, - ddo_ops = global_do_ops, - ddo_mops = global_do_mops, - ddo_iotime = global_do_iotime, - ddo_qops = global_do_qops, - ddo_util = global_do_util, - ddo_backlog = global_do_backlog; - } - - d->do_io = config_get_boolean_ondemand(var_name, "bandwidth", ddo_io); - d->do_ops = config_get_boolean_ondemand(var_name, "operations", ddo_ops); - d->do_mops = config_get_boolean_ondemand(var_name, "merged operations", ddo_mops); - d->do_iotime = config_get_boolean_ondemand(var_name, "i/o time", ddo_iotime); - d->do_qops = config_get_boolean_ondemand(var_name, "queued operations", ddo_qops); - d->do_util = config_get_boolean_ondemand(var_name, "utilization percentage", ddo_util); - d->do_backlog = config_get_boolean_ondemand(var_name, "backlog", ddo_backlog); - } - } - - // -------------------------------------------------------------------------- // Do performance metrics if(d->do_io == CONFIG_BOOLEAN_YES || (d->do_io == CONFIG_BOOLEAN_AUTO && (readsectors || writesectors))) { @@ -913,6 +1095,248 @@ int do_proc_diskstats(int update_every, usec_t dt) { rrdset_done(d->st_svctm); } } + + // -------------------------------------------------------------------------- + // read bcache metrics and generate the bcache charts + + if(d->device_is_bcache && d->do_bcache != CONFIG_BOOLEAN_NO) { + unsigned long long int + stats_total_cache_bypass_hits = 0, + stats_total_cache_bypass_misses = 0, + stats_total_cache_hits = 0, + stats_total_cache_miss_collisions = 0, + stats_total_cache_misses = 0, + stats_five_minute_cache_hit_ratio = 0, + stats_hour_cache_hit_ratio = 0, + stats_day_cache_hit_ratio = 0, + stats_total_cache_hit_ratio = 0, + cache_available_percent = 0, + cache_readaheads = 0, + cache_congested = 0, + dirty_data = 0, + writeback_rate = 0; + + // read the bcache values + + if(d->bcache_filename_dirty_data) + dirty_data = bcache_read_number_with_units(d->bcache_filename_dirty_data); + + if(d->bcache_filename_writeback_rate) + writeback_rate = bcache_read_number_with_units(d->bcache_filename_writeback_rate); + + if(d->bcache_filename_cache_congested) + cache_congested = bcache_read_number_with_units(d->bcache_filename_cache_congested); + + if(d->bcache_filename_cache_available_percent) + read_single_number_file(d->bcache_filename_cache_available_percent, &cache_available_percent); + + if(d->bcache_filename_stats_five_minute_cache_hit_ratio) + read_single_number_file(d->bcache_filename_stats_five_minute_cache_hit_ratio, &stats_five_minute_cache_hit_ratio); + + if(d->bcache_filename_stats_hour_cache_hit_ratio) + read_single_number_file(d->bcache_filename_stats_hour_cache_hit_ratio, &stats_hour_cache_hit_ratio); + + if(d->bcache_filename_stats_day_cache_hit_ratio) + read_single_number_file(d->bcache_filename_stats_day_cache_hit_ratio, &stats_day_cache_hit_ratio); + + if(d->bcache_filename_stats_total_cache_hit_ratio) + read_single_number_file(d->bcache_filename_stats_total_cache_hit_ratio, &stats_total_cache_hit_ratio); + + if(d->bcache_filename_stats_total_cache_hits) + read_single_number_file(d->bcache_filename_stats_total_cache_hits, &stats_total_cache_hits); + + if(d->bcache_filename_stats_total_cache_misses) + read_single_number_file(d->bcache_filename_stats_total_cache_misses, &stats_total_cache_misses); + + if(d->bcache_filename_stats_total_cache_miss_collisions) + read_single_number_file(d->bcache_filename_stats_total_cache_miss_collisions, &stats_total_cache_miss_collisions); + + if(d->bcache_filename_stats_total_cache_bypass_hits) + read_single_number_file(d->bcache_filename_stats_total_cache_bypass_hits, &stats_total_cache_bypass_hits); + + if(d->bcache_filename_stats_total_cache_bypass_misses) + read_single_number_file(d->bcache_filename_stats_total_cache_bypass_misses, &stats_total_cache_bypass_misses); + + if(d->bcache_filename_stats_total_cache_readaheads) + cache_readaheads = bcache_read_number_with_units(d->bcache_filename_stats_total_cache_readaheads); + + + // update the charts + + { + + if(unlikely(!d->st_bcache_hit_ratio)) { + d->st_bcache_hit_ratio = rrdset_create_localhost( + "disk_bcache_hit_ratio" + , d->device + , d->disk + , family + , "disk.bcache_hit_ratio" + , "BCache Cache Hit Ratio" + , "percentage" + , "proc" + , "diskstats" + , 2120 + , update_every + , RRDSET_TYPE_LINE + ); + + d->rd_bcache_hit_ratio_5min = rrddim_add(d->st_bcache_hit_ratio, "5min", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_bcache_hit_ratio_1hour = rrddim_add(d->st_bcache_hit_ratio, "1hour", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_bcache_hit_ratio_1day = rrddim_add(d->st_bcache_hit_ratio, "1day", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_bcache_hit_ratio_total = rrddim_add(d->st_bcache_hit_ratio, "ever", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + else rrdset_next(d->st_bcache_hit_ratio); + + rrddim_set_by_pointer(d->st_bcache_hit_ratio, d->rd_bcache_hit_ratio_5min, stats_five_minute_cache_hit_ratio); + rrddim_set_by_pointer(d->st_bcache_hit_ratio, d->rd_bcache_hit_ratio_1hour, stats_hour_cache_hit_ratio); + rrddim_set_by_pointer(d->st_bcache_hit_ratio, d->rd_bcache_hit_ratio_1day, stats_day_cache_hit_ratio); + rrddim_set_by_pointer(d->st_bcache_hit_ratio, d->rd_bcache_hit_ratio_total, stats_total_cache_hit_ratio); + rrdset_done(d->st_bcache_hit_ratio); + } + + { + + if(unlikely(!d->st_bcache_rates)) { + d->st_bcache_rates = rrdset_create_localhost( + "disk_bcache_rates" + , d->device + , d->disk + , family + , "disk.bcache_rates" + , "BCache Rates" + , "KB/s" + , "proc" + , "diskstats" + , 2121 + , update_every + , RRDSET_TYPE_AREA + ); + + d->rd_bcache_rate_congested = rrddim_add(d->st_bcache_rates, "congested", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + d->rd_bcache_rate_writeback = rrddim_add(d->st_bcache_rates, "writeback", NULL, -1, 1024, RRD_ALGORITHM_ABSOLUTE); + } + else rrdset_next(d->st_bcache_rates); + + rrddim_set_by_pointer(d->st_bcache_rates, d->rd_bcache_rate_writeback, writeback_rate); + rrddim_set_by_pointer(d->st_bcache_rates, d->rd_bcache_rate_congested, cache_congested); + rrdset_done(d->st_bcache_rates); + } + + { + if(unlikely(!d->st_bcache_size)) { + d->st_bcache_size = rrdset_create_localhost( + "disk_bcache_size" + , d->device + , d->disk + , family + , "disk.bcache_size" + , "BCache Cache Sizes" + , "MB" + , "proc" + , "diskstats" + , 2122 + , update_every + , RRDSET_TYPE_AREA + ); + + d->rd_bcache_dirty_size = rrddim_add(d->st_bcache_size, "dirty", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + } + else rrdset_next(d->st_bcache_size); + + rrddim_set_by_pointer(d->st_bcache_size, d->rd_bcache_dirty_size, dirty_data); + rrdset_done(d->st_bcache_size); + } + + { + if(unlikely(!d->st_bcache_usage)) { + d->st_bcache_usage = rrdset_create_localhost( + "disk_bcache_usage" + , d->device + , d->disk + , family + , "disk.bcache_usage" + , "BCache Cache Usage" + , "percent" + , "proc" + , "diskstats" + , 2123 + , update_every + , RRDSET_TYPE_AREA + ); + + d->rd_bcache_available_percent = rrddim_add(d->st_bcache_usage, "avail", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + else rrdset_next(d->st_bcache_usage); + + rrddim_set_by_pointer(d->st_bcache_usage, d->rd_bcache_available_percent, cache_available_percent); + rrdset_done(d->st_bcache_usage); + } + + if(d->do_bcache == CONFIG_BOOLEAN_YES || (d->do_bcache == CONFIG_BOOLEAN_AUTO && (stats_total_cache_hits != 0 || stats_total_cache_misses != 0 || stats_total_cache_miss_collisions != 0))) { + + if(unlikely(!d->st_bcache)) { + d->st_bcache = rrdset_create_localhost( + "disk_bcache" + , d->device + , d->disk + , family + , "disk.bcache" + , "BCache Cache I/O Operations" + , "operations/s" + , "proc" + , "diskstats" + , 2124 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(d->st_bcache, RRDSET_FLAG_DETAIL); + + d->rd_bcache_hits = rrddim_add(d->st_bcache, "hits", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + d->rd_bcache_misses = rrddim_add(d->st_bcache, "misses", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + d->rd_bcache_miss_collisions = rrddim_add(d->st_bcache, "collisions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + d->rd_bcache_readaheads = rrddim_add(d->st_bcache, "readaheads", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + else rrdset_next(d->st_bcache); + + rrddim_set_by_pointer(d->st_bcache, d->rd_bcache_hits, stats_total_cache_hits); + rrddim_set_by_pointer(d->st_bcache, d->rd_bcache_misses, stats_total_cache_misses); + rrddim_set_by_pointer(d->st_bcache, d->rd_bcache_miss_collisions, stats_total_cache_miss_collisions); + rrddim_set_by_pointer(d->st_bcache, d->rd_bcache_readaheads, cache_readaheads); + rrdset_done(d->st_bcache); + } + + if(d->do_bcache == CONFIG_BOOLEAN_YES || (d->do_bcache == CONFIG_BOOLEAN_AUTO && (stats_total_cache_bypass_hits != 0 || stats_total_cache_bypass_misses != 0))) { + + if(unlikely(!d->st_bcache_bypass)) { + d->st_bcache_bypass = rrdset_create_localhost( + "disk_bcache_bypass" + , d->device + , d->disk + , family + , "disk.bcache_bypass" + , "BCache Cache Bypass I/O Operations" + , "operations/s" + , "proc" + , "diskstats" + , 2125 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(d->st_bcache_bypass, RRDSET_FLAG_DETAIL); + + d->rd_bcache_bypass_hits = rrddim_add(d->st_bcache_bypass, "hits", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + d->rd_bcache_bypass_misses = rrddim_add(d->st_bcache_bypass, "misses", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + else rrdset_next(d->st_bcache_bypass); + + rrddim_set_by_pointer(d->st_bcache_bypass, d->rd_bcache_bypass_hits, stats_total_cache_bypass_hits); + rrddim_set_by_pointer(d->st_bcache_bypass, d->rd_bcache_bypass_misses, stats_total_cache_bypass_misses); + rrdset_done(d->st_bcache_bypass); + } + } } @@ -968,6 +1392,12 @@ int do_proc_diskstats(int update_every, usec_t dt) { rrdset_obsolete_and_pointer_null(d->st_qops); rrdset_obsolete_and_pointer_null(d->st_svctm); rrdset_obsolete_and_pointer_null(d->st_util); + rrdset_obsolete_and_pointer_null(d->st_bcache); + rrdset_obsolete_and_pointer_null(d->st_bcache_bypass); + rrdset_obsolete_and_pointer_null(d->st_bcache_rates); + rrdset_obsolete_and_pointer_null(d->st_bcache_size); + rrdset_obsolete_and_pointer_null(d->st_bcache_usage); + rrdset_obsolete_and_pointer_null(d->st_bcache_hit_ratio); if(d == disk_root) { disk_root = d = d->next; @@ -977,6 +1407,21 @@ int do_proc_diskstats(int update_every, usec_t dt) { last->next = d = d->next; } + freez(t->bcache_filename_dirty_data); + freez(t->bcache_filename_writeback_rate); + freez(t->bcache_filename_cache_congested); + freez(t->bcache_filename_cache_available_percent); + freez(t->bcache_filename_stats_five_minute_cache_hit_ratio); + freez(t->bcache_filename_stats_hour_cache_hit_ratio); + freez(t->bcache_filename_stats_day_cache_hit_ratio); + freez(t->bcache_filename_stats_total_cache_hit_ratio); + freez(t->bcache_filename_stats_total_cache_hits); + freez(t->bcache_filename_stats_total_cache_misses); + freez(t->bcache_filename_stats_total_cache_miss_collisions); + freez(t->bcache_filename_stats_total_cache_bypass_hits); + freez(t->bcache_filename_stats_total_cache_bypass_misses); + freez(t->bcache_filename_stats_total_cache_readaheads); + freez(t->disk); freez(t->device); freez(t->mount_point); diff --git a/src/proc_meminfo.c b/src/proc_meminfo.c index 085850c2c..3915bf0e9 100644 --- a/src/proc_meminfo.c +++ b/src/proc_meminfo.c @@ -4,14 +4,15 @@ int do_proc_meminfo(int update_every, usec_t dt) { (void)dt; static procfile *ff = NULL; - static int do_ram = -1, do_swap = -1, do_hwcorrupt = -1, do_committed = -1, do_writeback = -1, do_kernel = -1, do_slab = -1; + static int do_ram = -1, do_swap = -1, do_hwcorrupt = -1, do_committed = -1, do_writeback = -1, do_kernel = -1, do_slab = -1, do_hugepages = -1, do_transparent_hugepages = -1; static ARL_BASE *arl_base = NULL; - static ARL_ENTRY *arl_hwcorrupted = NULL; + static ARL_ENTRY *arl_hwcorrupted = NULL, *arl_memavailable = NULL; static unsigned long long MemTotal = 0, MemFree = 0, + MemAvailable = 0, Buffers = 0, Cached = 0, //SwapCached = 0, @@ -43,12 +44,13 @@ int do_proc_meminfo(int update_every, usec_t dt) { //VmallocTotal = 0, VmallocUsed = 0, //VmallocChunk = 0, - //AnonHugePages = 0, - //HugePages_Total = 0, - //HugePages_Free = 0, - //HugePages_Rsvd = 0, - //HugePages_Surp = 0, - //Hugepagesize = 0, + AnonHugePages = 0, + ShmemHugePages = 0, + HugePages_Total = 0, + HugePages_Free = 0, + HugePages_Rsvd = 0, + HugePages_Surp = 0, + Hugepagesize = 0, //DirectMap4k = 0, //DirectMap2M = 0, HardwareCorrupted = 0; @@ -61,10 +63,13 @@ int do_proc_meminfo(int update_every, usec_t dt) { do_writeback = config_get_boolean("plugin:proc:/proc/meminfo", "writeback memory", 1); do_kernel = config_get_boolean("plugin:proc:/proc/meminfo", "kernel memory", 1); do_slab = config_get_boolean("plugin:proc:/proc/meminfo", "slab memory", 1); + do_hugepages = config_get_boolean_ondemand("plugin:proc:/proc/meminfo", "hugepages", CONFIG_BOOLEAN_AUTO); + do_transparent_hugepages = config_get_boolean_ondemand("plugin:proc:/proc/meminfo", "transparent hugepages", CONFIG_BOOLEAN_AUTO); arl_base = arl_create("meminfo", NULL, 60); arl_expect(arl_base, "MemTotal", &MemTotal); arl_expect(arl_base, "MemFree", &MemFree); + arl_memavailable = arl_expect(arl_base, "MemAvailable", &MemAvailable); arl_expect(arl_base, "Buffers", &Buffers); arl_expect(arl_base, "Cached", &Cached); //arl_expect(arl_base, "SwapCached", &SwapCached); @@ -97,12 +102,13 @@ int do_proc_meminfo(int update_every, usec_t dt) { arl_expect(arl_base, "VmallocUsed", &VmallocUsed); //arl_expect(arl_base, "VmallocChunk", &VmallocChunk); arl_hwcorrupted = arl_expect(arl_base, "HardwareCorrupted", &HardwareCorrupted); - //arl_expect(arl_base, "AnonHugePages", &AnonHugePages); - //arl_expect(arl_base, "HugePages_Total", &HugePages_Total); - //arl_expect(arl_base, "HugePages_Free", &HugePages_Free); - //arl_expect(arl_base, "HugePages_Rsvd", &HugePages_Rsvd); - //arl_expect(arl_base, "HugePages_Surp", &HugePages_Surp); - //arl_expect(arl_base, "Hugepagesize", &Hugepagesize); + arl_expect(arl_base, "AnonHugePages", &AnonHugePages); + arl_expect(arl_base, "ShmemHugePages", &ShmemHugePages); + arl_expect(arl_base, "HugePages_Total", &HugePages_Total); + arl_expect(arl_base, "HugePages_Free", &HugePages_Free); + arl_expect(arl_base, "HugePages_Rsvd", &HugePages_Rsvd); + arl_expect(arl_base, "HugePages_Surp", &HugePages_Surp); + arl_expect(arl_base, "Hugepagesize", &Hugepagesize); //arl_expect(arl_base, "DirectMap4k", &DirectMap4k); //arl_expect(arl_base, "DirectMap2M", &DirectMap2M); } @@ -135,41 +141,73 @@ int do_proc_meminfo(int update_every, usec_t dt) { // -------------------------------------------------------------------- // http://stackoverflow.com/questions/3019748/how-to-reliably-measure-available-memory-in-linux - unsigned long long MemUsed = MemTotal - MemFree - Cached - Buffers; + unsigned long long MemCached = Cached + Slab; + unsigned long long MemUsed = MemTotal - MemFree - MemCached - Buffers; if(do_ram) { - static RRDSET *st_system_ram = NULL; - static RRDDIM *rd_free = NULL, *rd_used = NULL, *rd_cached = NULL, *rd_buffers = NULL; - - if(unlikely(!st_system_ram)) { - st_system_ram = rrdset_create_localhost( - "system" - , "ram" - , NULL - , "ram" - , NULL - , "System RAM" - , "MB" - , "proc" - , "meminfo" - , 200 - , update_every - , RRDSET_TYPE_STACKED - ); - - rd_free = rrddim_add(st_system_ram, "free", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); - rd_used = rrddim_add(st_system_ram, "used", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); - rd_cached = rrddim_add(st_system_ram, "cached", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); - rd_buffers = rrddim_add(st_system_ram, "buffers", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + { + static RRDSET *st_system_ram = NULL; + static RRDDIM *rd_free = NULL, *rd_used = NULL, *rd_cached = NULL, *rd_buffers = NULL; + + if(unlikely(!st_system_ram)) { + st_system_ram = rrdset_create_localhost( + "system" + , "ram" + , NULL + , "ram" + , NULL + , "System RAM" + , "MB" + , "proc" + , "meminfo" + , 200 + , update_every + , RRDSET_TYPE_STACKED + ); + + rd_free = rrddim_add(st_system_ram, "free", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_used = rrddim_add(st_system_ram, "used", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_cached = rrddim_add(st_system_ram, "cached", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_buffers = rrddim_add(st_system_ram, "buffers", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + } + else rrdset_next(st_system_ram); + + rrddim_set_by_pointer(st_system_ram, rd_free, MemFree); + rrddim_set_by_pointer(st_system_ram, rd_used, MemUsed); + rrddim_set_by_pointer(st_system_ram, rd_cached, MemCached); + rrddim_set_by_pointer(st_system_ram, rd_buffers, Buffers); + + rrdset_done(st_system_ram); } - else rrdset_next(st_system_ram); - rrddim_set_by_pointer(st_system_ram, rd_free, MemFree); - rrddim_set_by_pointer(st_system_ram, rd_used, MemUsed); - rrddim_set_by_pointer(st_system_ram, rd_cached, Cached); - rrddim_set_by_pointer(st_system_ram, rd_buffers, Buffers); - - rrdset_done(st_system_ram); + if(arl_memavailable->flags & ARL_ENTRY_FLAG_FOUND) { + static RRDSET *st_mem_available = NULL; + static RRDDIM *rd_avail = NULL; + + if(unlikely(!st_mem_available)) { + st_mem_available = rrdset_create_localhost( + "mem" + , "available" + , NULL + , "system" + , NULL + , "Available RAM for applications" + , "MB" + , "proc" + , "meminfo" + , NETDATA_CHART_PRIO_MEM_SYSTEM_AVAILABLE + , update_every + , RRDSET_TYPE_AREA + ); + + rd_avail = rrddim_add(st_mem_available, "MemAvailable", "avail", 1, 1024, RRD_ALGORITHM_ABSOLUTE); + } + else rrdset_next(st_mem_available); + + rrddim_set_by_pointer(st_mem_available, rd_avail, MemAvailable); + + rrdset_done(st_mem_available); + } } // -------------------------------------------------------------------- @@ -230,7 +268,7 @@ int do_proc_meminfo(int update_every, usec_t dt) { , "MB" , "proc" , "meminfo" - , 9000 + , NETDATA_CHART_PRIO_MEM_HW , update_every , RRDSET_TYPE_LINE ); @@ -263,7 +301,7 @@ int do_proc_meminfo(int update_every, usec_t dt) { , "MB" , "proc" , "meminfo" - , 5000 + , NETDATA_CHART_PRIO_MEM_SYSTEM_COMMITTED , update_every , RRDSET_TYPE_AREA ); @@ -296,7 +334,7 @@ int do_proc_meminfo(int update_every, usec_t dt) { , "MB" , "proc" , "meminfo" - , 4000 + , NETDATA_CHART_PRIO_MEM_KERNEL , update_every , RRDSET_TYPE_LINE ); @@ -336,7 +374,7 @@ int do_proc_meminfo(int update_every, usec_t dt) { , "MB" , "proc" , "meminfo" - , 6000 + , NETDATA_CHART_PRIO_MEM_KERNEL + 1 , update_every , RRDSET_TYPE_STACKED ); @@ -375,7 +413,7 @@ int do_proc_meminfo(int update_every, usec_t dt) { , "MB" , "proc" , "meminfo" - , 6500 + , NETDATA_CHART_PRIO_MEM_SLAB , update_every , RRDSET_TYPE_STACKED ); @@ -393,6 +431,84 @@ int do_proc_meminfo(int update_every, usec_t dt) { rrdset_done(st_mem_slab); } + // -------------------------------------------------------------------- + + if(do_hugepages == CONFIG_BOOLEAN_YES || (do_hugepages == CONFIG_BOOLEAN_AUTO && Hugepagesize != 0 && HugePages_Total != 0)) { + do_hugepages = CONFIG_BOOLEAN_YES; + + static RRDSET *st_mem_hugepages = NULL; + static RRDDIM *rd_used = NULL, *rd_free = NULL, *rd_rsvd = NULL, *rd_surp = NULL; + + if(unlikely(!st_mem_hugepages)) { + st_mem_hugepages = rrdset_create_localhost( + "mem" + , "hugepages" + , NULL + , "hugepages" + , NULL + , "Dedicated HugePages Memory" + , "MB" + , "proc" + , "meminfo" + , NETDATA_CHART_PRIO_MEM_HUGEPAGES + 1 + , update_every + , RRDSET_TYPE_STACKED + ); + + rrdset_flag_set(st_mem_hugepages, RRDSET_FLAG_DETAIL); + + rd_free = rrddim_add(st_mem_hugepages, "free", NULL, Hugepagesize, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_used = rrddim_add(st_mem_hugepages, "used", NULL, Hugepagesize, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_surp = rrddim_add(st_mem_hugepages, "surplus", NULL, Hugepagesize, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_rsvd = rrddim_add(st_mem_hugepages, "reserved", NULL, Hugepagesize, 1024, RRD_ALGORITHM_ABSOLUTE); + } + else rrdset_next(st_mem_hugepages); + + rrddim_set_by_pointer(st_mem_hugepages, rd_used, HugePages_Total - HugePages_Free - HugePages_Rsvd); + rrddim_set_by_pointer(st_mem_hugepages, rd_free, HugePages_Free); + rrddim_set_by_pointer(st_mem_hugepages, rd_rsvd, HugePages_Rsvd); + rrddim_set_by_pointer(st_mem_hugepages, rd_surp, HugePages_Surp); + + rrdset_done(st_mem_hugepages); + } + + // -------------------------------------------------------------------- + + if(do_transparent_hugepages == CONFIG_BOOLEAN_YES || (do_transparent_hugepages == CONFIG_BOOLEAN_AUTO && (AnonHugePages != 0 || ShmemHugePages != 0))) { + do_transparent_hugepages = CONFIG_BOOLEAN_YES; + + static RRDSET *st_mem_transparent_hugepages = NULL; + static RRDDIM *rd_anonymous = NULL, *rd_shared = NULL; + + if(unlikely(!st_mem_transparent_hugepages)) { + st_mem_transparent_hugepages = rrdset_create_localhost( + "mem" + , "transparent_hugepages" + , NULL + , "hugepages" + , NULL + , "Transparent HugePages Memory" + , "MB" + , "proc" + , "meminfo" + , NETDATA_CHART_PRIO_MEM_HUGEPAGES + , update_every + , RRDSET_TYPE_STACKED + ); + + rrdset_flag_set(st_mem_transparent_hugepages, RRDSET_FLAG_DETAIL); + + rd_anonymous = rrddim_add(st_mem_transparent_hugepages, "anonymous", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_shared = rrddim_add(st_mem_transparent_hugepages, "shmem", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + } + else rrdset_next(st_mem_transparent_hugepages); + + rrddim_set_by_pointer(st_mem_transparent_hugepages, rd_anonymous, AnonHugePages); + rrddim_set_by_pointer(st_mem_transparent_hugepages, rd_shared, ShmemHugePages); + + rrdset_done(st_mem_transparent_hugepages); + } + return 0; } diff --git a/src/proc_net_dev.c b/src/proc_net_dev.c index 32bb5bab1..341b9e0ca 100644 --- a/src/proc_net_dev.c +++ b/src/proc_net_dev.c @@ -447,7 +447,7 @@ int do_proc_net_dev(int update_every, usec_t dt) { do_compressed = config_get_boolean_ondemand("plugin:proc:/proc/net/dev", "compressed packets for all interfaces", CONFIG_BOOLEAN_AUTO); do_events = config_get_boolean_ondemand("plugin:proc:/proc/net/dev", "frames, collisions, carrier counters for all interfaces", CONFIG_BOOLEAN_AUTO); - disabled_list = simple_pattern_create(config_get("plugin:proc:/proc/net/dev", "disable by default interfaces matching", "lo fireqos* *-ifb"), SIMPLE_PATTERN_EXACT); + disabled_list = simple_pattern_create(config_get("plugin:proc:/proc/net/dev", "disable by default interfaces matching", "lo fireqos* *-ifb"), NULL, SIMPLE_PATTERN_EXACT); } if(unlikely(!ff)) { diff --git a/src/proc_net_rpc_nfs.c b/src/proc_net_rpc_nfs.c index 126216e0b..a4c778cba 100644 --- a/src/proc_net_rpc_nfs.c +++ b/src/proc_net_rpc_nfs.c @@ -288,7 +288,7 @@ int do_proc_net_rpc_nfs(int update_every, usec_t dt) { , "operations/s" , "proc" , "net/rpc/nfs" - , 5007 + , 2207 , update_every , RRDSET_TYPE_STACKED ); @@ -328,7 +328,7 @@ int do_proc_net_rpc_nfs(int update_every, usec_t dt) { , "calls/s" , "proc" , "net/rpc/nfs" - , 5008 + , 2208 , update_every , RRDSET_TYPE_LINE ); @@ -361,7 +361,7 @@ int do_proc_net_rpc_nfs(int update_every, usec_t dt) { , "calls/s" , "proc" , "net/rpc/nfs" - , 5009 + , 2209 , update_every , RRDSET_TYPE_STACKED ); @@ -394,7 +394,7 @@ int do_proc_net_rpc_nfs(int update_every, usec_t dt) { , "calls/s" , "proc" , "net/rpc/nfs" - , 5010 + , 2210 , update_every , RRDSET_TYPE_STACKED ); @@ -427,7 +427,7 @@ int do_proc_net_rpc_nfs(int update_every, usec_t dt) { , "calls/s" , "proc" , "net/rpc/nfs" - , 5011 + , 2211 , update_every , RRDSET_TYPE_STACKED ); diff --git a/src/proc_net_rpc_nfsd.c b/src/proc_net_rpc_nfsd.c index f0c9a20ce..8aca31aed 100644 --- a/src/proc_net_rpc_nfsd.c +++ b/src/proc_net_rpc_nfsd.c @@ -224,30 +224,32 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { static int do_rc = -1, do_fh = -1, do_io = -1, do_th = -1, do_ra = -1, do_net = -1, do_rpc = -1, do_proc2 = -1, do_proc3 = -1, do_proc4 = -1, do_proc4ops = -1; static int ra_warning = 0, th_warning = 0, proc2_warning = 0, proc3_warning = 0, proc4_warning = 0, proc4ops_warning = 0; - if(!ff) { + if(unlikely(!ff)) { char filename[FILENAME_MAX + 1]; snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/net/rpc/nfsd"); ff = procfile_open(config_get("plugin:proc:/proc/net/rpc/nfsd", "filename to monitor", filename), " \t", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) return 1; } - if(!ff) return 1; ff = procfile_readall(ff); - if(!ff) return 0; // we return 0, so that we will retry to open it next time - - if(do_rc == -1) do_rc = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "read cache", 1); - if(do_fh == -1) do_fh = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "file handles", 1); - if(do_io == -1) do_io = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "I/O", 1); - if(do_th == -1) do_th = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "threads", 1); - if(do_ra == -1) do_ra = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "read ahead", 1); - if(do_net == -1) do_net = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "network", 1); - if(do_rpc == -1) do_rpc = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "rpc", 1); - if(do_proc2 == -1) do_proc2 = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "NFS v2 procedures", 1); - if(do_proc3 == -1) do_proc3 = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "NFS v3 procedures", 1); - if(do_proc4 == -1) do_proc4 = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "NFS v4 procedures", 1); - if(do_proc4ops == -1) do_proc4ops = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "NFS v4 operations", 1); + if(unlikely(!ff)) return 0; // we return 0, so that we will retry to open it next time + + if(unlikely(do_rc == -1)) { + do_rc = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "read cache", 1); + do_fh = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "file handles", 1); + do_io = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "I/O", 1); + do_th = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "threads", 1); + do_ra = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "read ahead", 1); + do_net = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "network", 1); + do_rpc = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "rpc", 1); + do_proc2 = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "NFS v2 procedures", 1); + do_proc3 = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "NFS v3 procedures", 1); + do_proc4 = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "NFS v4 procedures", 1); + do_proc4ops = config_get_boolean("plugin:proc:/proc/net/rpc/nfsd", "NFS v4 operations", 1); + } // if they are enabled, reset them to 1 - // later we do them =2 to avoid doing strcmp() for all lines + // later we do them = 2 to avoid doing strcmp() for all lines if(do_rc) do_rc = 1; if(do_fh) do_fh = 1; if(do_io) do_io = 1; @@ -273,12 +275,12 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { for(l = 0; l < lines ;l++) { size_t words = procfile_linewords(ff, l); - if(!words) continue; + if(unlikely(!words)) continue; type = procfile_lineword(ff, l, 0); if(do_rc == 1 && strcmp(type, "rc") == 0) { - if(words < 4) { + if(unlikely(words < 4)) { error("%s line of /proc/net/rpc/nfsd has %zu words, expected %d", type, words, 4); continue; } @@ -292,7 +294,7 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { else do_rc = 2; } else if(do_fh == 1 && strcmp(type, "fh") == 0) { - if(words < 6) { + if(unlikely(words < 6)) { error("%s line of /proc/net/rpc/nfsd has %zu words, expected %d", type, words, 6); continue; } @@ -308,7 +310,7 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { else do_fh = 2; } else if(do_io == 1 && strcmp(type, "io") == 0) { - if(words < 3) { + if(unlikely(words < 3)) { error("%s line of /proc/net/rpc/nfsd has %zu words, expected %d", type, words, 3); continue; } @@ -321,7 +323,7 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { else do_io = 2; } else if(do_th == 1 && strcmp(type, "th") == 0) { - if(words < 13) { + if(unlikely(words < 13)) { error("%s line of /proc/net/rpc/nfsd has %zu words, expected %d", type, words, 13); continue; } @@ -352,7 +354,7 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { else do_th = 2; } else if(do_ra == 1 && strcmp(type, "ra") == 0) { - if(words < 13) { + if(unlikely(words < 13)) { error("%s line of /proc/net/rpc/nfsd has %zu words, expected %d", type, words, 13); continue; } @@ -381,7 +383,7 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { else do_ra = 2; } else if(do_net == 1 && strcmp(type, "net") == 0) { - if(words < 5) { + if(unlikely(words < 5)) { error("%s line of /proc/net/rpc/nfsd has %zu words, expected %d", type, words, 5); continue; } @@ -396,7 +398,7 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { else do_net = 2; } else if(do_rpc == 1 && strcmp(type, "rpc") == 0) { - if(words < 6) { + if(unlikely(words < 6)) { error("%s line of /proc/net/rpc/nfsd has %zu words, expected %d", type, words, 6); continue; } @@ -515,7 +517,7 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { , "reads/s" , "proc" , "net/rpc/nfsd" - , 5000 + , 2100 , update_every , RRDSET_TYPE_STACKED ); @@ -553,7 +555,7 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { , "handles/s" , "proc" , "net/rpc/nfsd" - , 5001 + , 2101 , update_every , RRDSET_TYPE_LINE ); @@ -593,7 +595,7 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { , "kilobytes/s" , "proc" , "net/rpc/nfsd" - , 5002 + , 2102 , update_every , RRDSET_TYPE_AREA ); @@ -626,7 +628,7 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { , "threads" , "proc" , "net/rpc/nfsd" - , 5003 + , 2103 , update_every , RRDSET_TYPE_LINE ); @@ -654,7 +656,7 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { , "ops/s" , "proc" , "net/rpc/nfsd" - , 5004 + , 2104 , update_every , RRDSET_TYPE_LINE ); @@ -691,7 +693,7 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { , "percentage" , "proc" , "net/rpc/nfsd" - , 5005 + , 2105 , update_every , RRDSET_TYPE_LINE ); @@ -750,7 +752,7 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { , "percentage" , "proc" , "net/rpc/nfsd" - , 5005 + , 2105 , update_every , RRDSET_TYPE_STACKED ); @@ -804,7 +806,7 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { , "packets/s" , "proc" , "net/rpc/nfsd" - , 5007 + , 2107 , update_every , RRDSET_TYPE_STACKED ); @@ -843,7 +845,7 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { , "calls/s" , "proc" , "net/rpc/nfsd" - , 5008 + , 2108 , update_every , RRDSET_TYPE_LINE ); @@ -879,7 +881,7 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { , "calls/s" , "proc" , "net/rpc/nfsd" - , 5009 + , 2109 , update_every , RRDSET_TYPE_STACKED ); @@ -912,7 +914,7 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { , "calls/s" , "proc" , "net/rpc/nfsd" - , 5010 + , 2110 , update_every , RRDSET_TYPE_STACKED ); @@ -945,7 +947,7 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { , "calls/s" , "proc" , "net/rpc/nfsd" - , 5011 + , 2111 , update_every , RRDSET_TYPE_STACKED ); @@ -978,7 +980,7 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { , "operations/s" , "proc" , "net/rpc/nfsd" - , 5012 + , 2112 , update_every , RRDSET_TYPE_STACKED ); diff --git a/src/proc_net_snmp.c b/src/proc_net_snmp.c index fabfdf8c6..43c010c14 100644 --- a/src/proc_net_snmp.c +++ b/src/proc_net_snmp.c @@ -90,7 +90,7 @@ int do_proc_net_snmp(int update_every, usec_t dt) { static procfile *ff = NULL; static int do_ip_packets = -1, do_ip_fragsout = -1, do_ip_fragsin = -1, do_ip_errors = -1, - do_tcp_sockets = -1, do_tcp_packets = -1, do_tcp_errors = -1, do_tcp_handshake = -1, + do_tcp_sockets = -1, do_tcp_packets = -1, do_tcp_errors = -1, do_tcp_handshake = -1, do_tcp_opens = -1, do_udp_packets = -1, do_udp_errors = -1, do_icmp_packets = -1, do_icmpmsg = -1, do_udplite_packets = -1; static uint32_t hash_ip = 0, hash_icmp = 0, hash_tcp = 0, hash_udp = 0, hash_icmpmsg = 0, hash_udplite = 0; @@ -112,6 +112,7 @@ int do_proc_net_snmp(int update_every, usec_t dt) { do_tcp_sockets = config_get_boolean("plugin:proc:/proc/net/snmp", "ipv4 TCP connections", 1); do_tcp_packets = config_get_boolean("plugin:proc:/proc/net/snmp", "ipv4 TCP packets", 1); do_tcp_errors = config_get_boolean("plugin:proc:/proc/net/snmp", "ipv4 TCP errors", 1); + do_tcp_opens = config_get_boolean("plugin:proc:/proc/net/snmp", "ipv4 TCP opens", 1); do_tcp_handshake = config_get_boolean("plugin:proc:/proc/net/snmp", "ipv4 TCP handshake issues", 1); do_udp_packets = config_get_boolean("plugin:proc:/proc/net/snmp", "ipv4 UDP packets", 1); do_udp_errors = config_get_boolean("plugin:proc:/proc/net/snmp", "ipv4 UDP errors", 1); @@ -723,7 +724,7 @@ int do_proc_net_snmp(int update_every, usec_t dt) { , "packets/s" , "proc" , "net/snmp" - , 2520 + , 2525 , update_every , RRDSET_TYPE_LINE ); @@ -743,12 +744,44 @@ int do_proc_net_snmp(int update_every, usec_t dt) { // -------------------------------------------------------------------- + if(do_tcp_opens) { + static RRDSET *st = NULL; + static RRDDIM *rd_ActiveOpens = NULL, + *rd_PassiveOpens = NULL; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + RRD_TYPE_NET_SNMP + , "tcpopens" + , NULL + , "tcp" + , NULL + , "IPv4 TCP Opens" + , "connections/s" + , "proc" + , "net/snmp" + , 2502 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_flag_set(st, RRDSET_FLAG_DETAIL); + + rd_ActiveOpens = rrddim_add(st, "ActiveOpens", "active", 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_PassiveOpens = rrddim_add(st, "PassiveOpens", "passive", 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + else rrdset_next(st); + + rrddim_set_by_pointer(st, rd_ActiveOpens, (collected_number)snmp_root.tcp_ActiveOpens); + rrddim_set_by_pointer(st, rd_PassiveOpens, (collected_number)snmp_root.tcp_PassiveOpens); + rrdset_done(st); + } + + // -------------------------------------------------------------------- + if(do_tcp_handshake) { static RRDSET *st = NULL; static RRDDIM *rd_EstabResets = NULL, *rd_OutRsts = NULL, - *rd_ActiveOpens = NULL, - *rd_PassiveOpens = NULL, *rd_AttemptFails = NULL, *rd_TCPSynRetrans = NULL; @@ -769,19 +802,15 @@ int do_proc_net_snmp(int update_every, usec_t dt) { ); rrdset_flag_set(st, RRDSET_FLAG_DETAIL); - rd_EstabResets = rrddim_add(st, "EstabResets", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - rd_OutRsts = rrddim_add(st, "OutRsts", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); - rd_ActiveOpens = rrddim_add(st, "ActiveOpens", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - rd_PassiveOpens = rrddim_add(st, "PassiveOpens", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - rd_AttemptFails = rrddim_add(st, "AttemptFails", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - rd_TCPSynRetrans = rrddim_add(st, "TCPSynRetrans", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_EstabResets = rrddim_add(st, "EstabResets", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_OutRsts = rrddim_add(st, "OutRsts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_AttemptFails = rrddim_add(st, "AttemptFails", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_TCPSynRetrans = rrddim_add(st, "TCPSynRetrans", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); } else rrdset_next(st); rrddim_set_by_pointer(st, rd_EstabResets, (collected_number)snmp_root.tcp_EstabResets); rrddim_set_by_pointer(st, rd_OutRsts, (collected_number)snmp_root.tcp_OutRsts); - rrddim_set_by_pointer(st, rd_ActiveOpens, (collected_number)snmp_root.tcp_ActiveOpens); - rrddim_set_by_pointer(st, rd_PassiveOpens, (collected_number)snmp_root.tcp_PassiveOpens); rrddim_set_by_pointer(st, rd_AttemptFails, (collected_number)snmp_root.tcp_AttemptFails); rrddim_set_by_pointer(st, rd_TCPSynRetrans, tcpext_TCPSynRetrans); rrdset_done(st); diff --git a/src/proc_net_sockstat.c b/src/proc_net_sockstat.c index 2ca4061b5..db3070660 100644 --- a/src/proc_net_sockstat.c +++ b/src/proc_net_sockstat.c @@ -304,7 +304,7 @@ int do_proc_net_sockstat(int update_every, usec_t dt) { , "KB" , "proc" , "net/sockstat" - , 2540 + , 4000 , update_every , RRDSET_TYPE_AREA ); diff --git a/src/proc_self_mountinfo.c b/src/proc_self_mountinfo.c index bb031a9ab..4ccdddff1 100644 --- a/src/proc_self_mountinfo.c +++ b/src/proc_self_mountinfo.c @@ -103,20 +103,11 @@ struct mountinfo *mountinfo_find_by_filesystem_super_option(struct mountinfo *ro return NULL; } - -// free a linked list of mountinfo structures -void mountinfo_free(struct mountinfo *mi) { - if(unlikely(!mi)) - return; - - if(likely(mi->next)) - mountinfo_free(mi->next); - +static void mountinfo_free(struct mountinfo *mi) { freez(mi->root); freez(mi->mount_point); freez(mi->mount_options); freez(mi->persistent_id); - /* if(mi->optional_fields_count) { int i; @@ -131,6 +122,16 @@ void mountinfo_free(struct mountinfo *mi) { freez(mi); } +// free a linked list of mountinfo structures +void mountinfo_free_all(struct mountinfo *mi) { + while(mi) { + struct mountinfo *t = mi; + mi = mi->next; + + mountinfo_free(t); + } +} + static char *strdupz_decoding_octal(const char *string) { char *buffer = strdupz(string); diff --git a/src/proc_self_mountinfo.h b/src/proc_self_mountinfo.h index 00cf699ab..a8d337539 100644 --- a/src/proc_self_mountinfo.h +++ b/src/proc_self_mountinfo.h @@ -49,7 +49,7 @@ extern struct mountinfo *mountinfo_find(struct mountinfo *root, unsigned long ma extern struct mountinfo *mountinfo_find_by_filesystem_mount_source(struct mountinfo *root, const char *filesystem, const char *mount_source); extern struct mountinfo *mountinfo_find_by_filesystem_super_option(struct mountinfo *root, const char *filesystem, const char *super_options); -extern void mountinfo_free(struct mountinfo *mi); +extern void mountinfo_free_all(struct mountinfo *mi); extern struct mountinfo *mountinfo_read(int do_statvfs); #endif /* NETDATA_PROC_SELF_MOUNTINFO_H */
\ No newline at end of file diff --git a/src/proc_softirqs.c b/src/proc_softirqs.c index a1b9947e0..cd7440b00 100644 --- a/src/proc_softirqs.c +++ b/src/proc_softirqs.c @@ -18,10 +18,10 @@ struct interrupt { // since each interrupt is variable in size // we use this to calculate its record size -#define recordsize(cpus) (sizeof(struct interrupt) + (cpus * sizeof(struct cpu_interrupt))) +#define recordsize(cpus) (sizeof(struct interrupt) + ((cpus) * sizeof(struct cpu_interrupt))) // given a base, get a pointer to each record -#define irrindex(base, line, cpus) ((struct interrupt *)&((char *)(base))[line * recordsize(cpus)]) +#define irrindex(base, line, cpus) ((struct interrupt *)&((char *)(base))[(line) * recordsize(cpus)]) static inline struct interrupt *get_interrupts_array(size_t lines, int cpus) { static struct interrupt *irrs = NULL; diff --git a/src/proc_stat.c b/src/proc_stat.c index 907b659d0..d1aefb73e 100644 --- a/src/proc_stat.c +++ b/src/proc_stat.c @@ -54,7 +54,7 @@ static int read_per_core_files(struct cpu_chart *all_cpu_charts, size_t len, siz } ssize_t ret = read(f->fd, buf, 50); - if(unlikely(ret == -1)) { + if(unlikely(ret < 0)) { // cannot read that file error("Cannot read file '%s'", f->filename); diff --git a/src/proc_uptime.c b/src/proc_uptime.c index 8f4b90291..259de4760 100644 --- a/src/proc_uptime.c +++ b/src/proc_uptime.c @@ -1,39 +1,72 @@ #include "common.h" -int do_proc_uptime(int update_every, usec_t dt) { - (void)dt; - - collected_number uptime = 0; - +static inline collected_number uptime_from_boottime(void) { #ifdef CLOCK_BOOTTIME_IS_AVAILABLE - uptime = now_boottime_usec() / 1000; + return now_boottime_usec() / 1000; #else - static procfile *ff = NULL; + error("uptime cannot be read from CLOCK_BOOTTIME on this system."); + return 0; +#endif +} - if(unlikely(!ff)) { +static procfile *read_proc_uptime_ff = NULL; +static inline collected_number read_proc_uptime(void) { + if(unlikely(!read_proc_uptime_ff)) { char filename[FILENAME_MAX + 1]; snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/uptime"); - ff = procfile_open(config_get("plugin:proc:/proc/uptime", "filename to monitor", filename), " \t", PROCFILE_FLAG_DEFAULT); - if(unlikely(!ff)) - return 1; + read_proc_uptime_ff = procfile_open(config_get("plugin:proc:/proc/uptime", "filename to monitor", filename), " \t", PROCFILE_FLAG_DEFAULT); + if(unlikely(!read_proc_uptime_ff)) return 0; } - ff = procfile_readall(ff); - if(unlikely(!ff)) - return 0; // we return 0, so that we will retry to open it next time + read_proc_uptime_ff = procfile_readall(read_proc_uptime_ff); + if(unlikely(!read_proc_uptime_ff)) return 0; - if(unlikely(procfile_lines(ff) < 1)) { + if(unlikely(procfile_lines(read_proc_uptime_ff) < 1)) { error("/proc/uptime has no lines."); - return 1; + return 0; } - if(unlikely(procfile_linewords(ff, 0) < 1)) { + if(unlikely(procfile_linewords(read_proc_uptime_ff, 0) < 1)) { error("/proc/uptime has less than 1 word in it."); - return 1; + return 0; } - uptime = (collected_number)(strtold(procfile_lineword(ff, 0, 0), NULL) * 1000.0); -#endif + return (collected_number)(strtold(procfile_lineword(read_proc_uptime_ff, 0, 0), NULL) * 1000.0); +} + +int do_proc_uptime(int update_every, usec_t dt) { + (void)dt; + + static int use_boottime = -1; + + if(unlikely(use_boottime == -1)) { + collected_number uptime_boottime = uptime_from_boottime(); + collected_number uptime_proc = read_proc_uptime(); + + long long delta = (long long)uptime_boottime - (long long)uptime_proc; + if(delta < 0) delta = -delta; + + if(delta <= 1000 && uptime_boottime != 0) { + procfile_close(read_proc_uptime_ff); + info("Using now_boottime_usec() for uptime (dt is %lld ms)", delta); + use_boottime = 1; + } + else if(uptime_proc != 0) { + info("Using /proc/uptime for uptime (dt is %lld ms)", delta); + use_boottime = 0; + } + else { + error("Cannot find any way to read uptime on this system."); + return 1; + } + } + + collected_number uptime; + if(use_boottime) + uptime = uptime_from_boottime(); + else + uptime = read_proc_uptime(); + // -------------------------------------------------------------------- diff --git a/src/proc_vmstat.c b/src/proc_vmstat.c index 72ca3818f..52e88d888 100644 --- a/src/proc_vmstat.c +++ b/src/proc_vmstat.c @@ -168,7 +168,7 @@ int do_proc_vmstat(int update_every, usec_t dt) { , "page faults/s" , "proc" , "vmstat" - , 500 + , NETDATA_CHART_PRIO_MEM_SYSTEM_PGFAULTS , update_every , RRDSET_TYPE_LINE ); @@ -213,7 +213,7 @@ int do_proc_vmstat(int update_every, usec_t dt) { , "events/s" , "proc" , "vmstat" - , 800 + , NETDATA_CHART_PRIO_MEM_NUMA , update_every , RRDSET_TYPE_LINE ); diff --git a/src/procfile.c b/src/procfile.c index 3a89e8353..044f975b5 100644 --- a/src/procfile.c +++ b/src/procfile.c @@ -39,23 +39,21 @@ char *procfile_filename(procfile *ff) { // ---------------------------------------------------------------------------- // An array of words -static inline pfwords *pfwords_add(pfwords *fw, char *str) NEVERNULL; -static inline pfwords *pfwords_add(pfwords *fw, char *str) { +static inline void pfwords_add(procfile *ff, char *str) { // debug(D_PROCFILE, PF_PREFIX ": adding word No %d: '%s'", fw->len, str); + pfwords *fw = ff->words; if(unlikely(fw->len == fw->size)) { // debug(D_PROCFILE, PF_PREFIX ": expanding words"); - fw = reallocz(fw, sizeof(pfwords) + (fw->size + PFWORDS_INCREASE_STEP) * sizeof(char *)); + ff->words = fw = reallocz(fw, sizeof(pfwords) + (fw->size + PFWORDS_INCREASE_STEP) * sizeof(char *)); fw->size += PFWORDS_INCREASE_STEP; } fw->words[fw->len++] = str; - - return fw; } -static inline pfwords *pfwords_new(void) NEVERNULL; +NEVERNULL static inline pfwords *pfwords_new(void) { // debug(D_PROCFILE, PF_PREFIX ": initializing words"); @@ -82,24 +80,26 @@ static inline void pfwords_free(pfwords *fw) { // ---------------------------------------------------------------------------- // An array of lines -static inline pflines *pflines_add(pflines *fl, size_t first_word) NEVERNULL; -static inline pflines *pflines_add(pflines *fl, size_t first_word) { +NEVERNULL +static inline size_t *pflines_add(procfile *ff) { // debug(D_PROCFILE, PF_PREFIX ": adding line %d at word %d", fl->len, first_word); + pflines *fl = ff->lines; if(unlikely(fl->len == fl->size)) { // debug(D_PROCFILE, PF_PREFIX ": expanding lines"); - fl = reallocz(fl, sizeof(pflines) + (fl->size + PFLINES_INCREASE_STEP) * sizeof(ffline)); + ff->lines = fl = reallocz(fl, sizeof(pflines) + (fl->size + PFLINES_INCREASE_STEP) * sizeof(ffline)); fl->size += PFLINES_INCREASE_STEP; } - fl->lines[fl->len].words = 0; - fl->lines[fl->len++].first = first_word; + ffline *ffl = &fl->lines[fl->len++]; + ffl->words = 0; + ffl->first = ff->words->len; - return fl; + return &ffl->words; } -static inline pflines *pflines_new(void) NEVERNULL; +NEVERNULL static inline pflines *pflines_new(void) { // debug(D_PROCFILE, PF_PREFIX ": initializing lines"); @@ -139,69 +139,61 @@ void procfile_close(procfile *ff) { freez(ff); } -static inline void procfile_parser(procfile *ff) { +NOINLINE +static void procfile_parser(procfile *ff) { // debug(D_PROCFILE, PF_PREFIX ": Parsing file '%s'", ff->filename); char *s = ff->data // our current position , *e = &ff->data[ff->len] // the terminating null - , *t = ff->data; // the first character of a quoted or a parenthesized string + , *t = ff->data; // the first character of a word (or quoted / parenthesized string) // the look up array to find our type of character PF_CHAR_TYPE *separators = ff->separators; char quote = 0; // the quote character - only when in quoted string + size_t opened = 0; // counts the number of open parenthesis - size_t - l = 0 // counts the number of lines we added - , w = 0 // counts the number of words we added - , opened = 0; // counts the number of open parenthesis - - ff->lines = pflines_add(ff->lines, w); + size_t *line_words = pflines_add(ff); - while(likely(s < e)) { - // we are not at the end + while(s < e) { PF_CHAR_TYPE ct = separators[(unsigned char)(*s)]; // this is faster than a switch() + // read more here: http://lazarenko.me/switch/ if(likely(ct == PF_CHAR_IS_WORD)) { s++; } else if(likely(ct == PF_CHAR_IS_SEPARATOR)) { - if(unlikely(quote || opened)) { - // we are inside a quote - s++; - continue; + if(!quote && !opened) { + if (s != t) { + // separator, but we have word before it + *s = '\0'; + pfwords_add(ff, t); + (*line_words)++; + t = ++s; + } + else { + // separator at the beginning + // skip it + t = ++s; + } } - - if(unlikely(s == t)) { - // skip all leading white spaces - t = ++s; - continue; + else { + // we are inside a quote or parenthesized string + s++; } - - // end of word - *s = '\0'; - - ff->words = pfwords_add(ff->words, t); - ff->lines->lines[l].words++; - w++; - - t = ++s; } else if(likely(ct == PF_CHAR_IS_NEWLINE)) { // end of line - *s = '\0'; - ff->words = pfwords_add(ff->words, t); - ff->lines->lines[l].words++; - w++; + *s = '\0'; + pfwords_add(ff, t); + (*line_words)++; + t = ++s; // debug(D_PROCFILE, PF_PREFIX ": ended line %d with %d words", l, ff->lines->lines[l].words); - ff->lines = pflines_add(ff->lines, w); - l++; - - t = ++s; + line_words = pflines_add(ff); } else if(likely(ct == PF_CHAR_IS_QUOTE)) { if(unlikely(!quote && s == t)) { @@ -214,10 +206,8 @@ static inline void procfile_parser(procfile *ff) { quote = 0; *s = '\0'; - ff->words = pfwords_add(ff->words, t); - ff->lines->lines[l].words++; - w++; - + pfwords_add(ff, t); + (*line_words)++; t = ++s; } else @@ -241,10 +231,8 @@ static inline void procfile_parser(procfile *ff) { if(!opened) { *s = '\0'; - ff->words = pfwords_add(ff->words, t); - ff->lines->lines[l].words++; - w++; - + pfwords_add(ff, t); + (*line_words)++; t = ++s; } else @@ -259,15 +247,15 @@ static inline void procfile_parser(procfile *ff) { if(likely(s > t && t < e)) { // the last word - if(likely(ff->len < ff->size)) - *s = '\0'; - else { + if(unlikely(ff->len >= ff->size)) { // we are going to loose the last byte - ff->data[ff->size - 1] = '\0'; + s = &ff->data[ff->size - 1]; } - ff->words = pfwords_add(ff->words, t); - ff->lines->lines[l].words++; + *s = '\0'; + pfwords_add(ff, t); + (*line_words)++; + // t = ++s; } } @@ -289,7 +277,7 @@ procfile *procfile_readall(procfile *ff) { debug(D_PROCFILE, "Reading file '%s', from position %zd with length %zd", procfile_filename(ff), s, (ssize_t)(ff->size - s)); r = read(ff->fd, &ff->data[s], ff->size - s); if(unlikely(r == -1)) { - if(unlikely(!(ff->flags & PROCFILE_FLAG_NO_ERROR_ON_FILE_IO))) error(PF_PREFIX ": Cannot read from file '%s'", procfile_filename(ff)); + if(unlikely(!(ff->flags & PROCFILE_FLAG_NO_ERROR_ON_FILE_IO))) error(PF_PREFIX ": Cannot read from file '%s' on fd %d", procfile_filename(ff), ff->fd); procfile_close(ff); return NULL; } @@ -318,7 +306,8 @@ procfile *procfile_readall(procfile *ff) { return ff; } -static inline void procfile_set_separators(procfile *ff, const char *separators) { +NOINLINE +static void procfile_set_separators(procfile *ff, const char *separators) { static PF_CHAR_TYPE def[256]; static char initilized = 0; @@ -408,6 +397,8 @@ procfile *procfile_open(const char *filename, const char *separators, uint32_t f return NULL; } + // info("PROCFILE: opened '%s' on fd %d", filename, fd); + size_t size = (unlikely(procfile_adaptive_initial_allocation)) ? procfile_max_allocation : PROCFILE_INCREMENT_BUFFER; procfile *ff = mallocz(sizeof(procfile) + size); @@ -431,7 +422,10 @@ procfile *procfile_open(const char *filename, const char *separators, uint32_t f procfile *procfile_reopen(procfile *ff, const char *filename, const char *separators, uint32_t flags) { if(unlikely(!ff)) return procfile_open(filename, separators, flags); - if(likely(ff->fd != -1)) close(ff->fd); + if(likely(ff->fd != -1)) { + // info("PROCFILE: closing fd %d", ff->fd); + close(ff->fd); + } ff->fd = open(filename, O_RDONLY, 0666); if(unlikely(ff->fd == -1)) { @@ -439,9 +433,10 @@ procfile *procfile_reopen(procfile *ff, const char *filename, const char *separa return NULL; } + // info("PROCFILE: opened '%s' on fd %d", filename, ff->fd); + //strncpyz(ff->filename, filename, FILENAME_MAX); ff->filename[0] = '\0'; - ff->flags = flags; // do not do the separators again if NULL is given diff --git a/src/procfile.h b/src/procfile.h index 98765697f..012c6efe1 100644 --- a/src/procfile.h +++ b/src/procfile.h @@ -107,7 +107,7 @@ extern char *procfile_filename(procfile *ff); extern int procfile_adaptive_initial_allocation; // return the number of lines present -#define procfile_lines(ff) (ff->lines->len) +#define procfile_lines(ff) ((ff)->lines->len) // return the number of words of the Nth line #define procfile_linewords(ff, line) (((line) < procfile_lines(ff)) ? (ff)->lines->lines[(line)].words : 0) @@ -119,6 +119,6 @@ extern int procfile_adaptive_initial_allocation; #define procfile_line(ff, line) (((line) < procfile_lines(ff)) ? procfile_word((ff), (ff)->lines->lines[(line)].first) : "") // return the Nth word of the current line -#define procfile_lineword(ff, line, word) (((line) < procfile_lines(ff) && (word) < procfile_linewords(ff, (line))) ? procfile_word((ff), (ff)->lines->lines[(line)].first + word) : "") +#define procfile_lineword(ff, line, word) (((line) < procfile_lines(ff) && (word) < procfile_linewords((ff), (line))) ? procfile_word((ff), (ff)->lines->lines[(line)].first + (word)) : "") #endif /* NETDATA_PROCFILE_H */ diff --git a/src/registry.c b/src/registry.c index 9d382e86f..bbc2ef366 100644 --- a/src/registry.c +++ b/src/registry.c @@ -27,10 +27,10 @@ static void registry_set_cookie(struct web_client *w, const char *guid) { struct tm etmbuf, *etm = gmtime_r(&et, &etmbuf); strftime(edate, sizeof(edate), "%a, %d %b %Y %H:%M:%S %Z", etm); - snprintfz(w->cookie1, COOKIE_MAX, NETDATA_REGISTRY_COOKIE_NAME "=%s; Expires=%s", guid, edate); + snprintfz(w->cookie1, NETDATA_WEB_REQUEST_COOKIE_SIZE, NETDATA_REGISTRY_COOKIE_NAME "=%s; Expires=%s", guid, edate); if(registry.registry_domain && registry.registry_domain[0]) - snprintfz(w->cookie2, COOKIE_MAX, NETDATA_REGISTRY_COOKIE_NAME "=%s; Domain=%s; Expires=%s", guid, registry.registry_domain, edate); + snprintfz(w->cookie2, NETDATA_WEB_REQUEST_COOKIE_SIZE, NETDATA_REGISTRY_COOKIE_NAME "=%s; Domain=%s; Expires=%s", guid, registry.registry_domain, edate); } static inline void registry_set_person_cookie(struct web_client *w, REGISTRY_PERSON *p) { diff --git a/src/registry_internals.c b/src/registry_internals.c index fd3c295ce..44b0a1513 100644 --- a/src/registry_internals.c +++ b/src/registry_internals.c @@ -17,7 +17,7 @@ int regenerate_guid(const char *guid, char *result) { uuid_unparse_lower(uuid, result); #ifdef NETDATA_INTERNAL_CHECKS - if(strcmp(guid, result)) + if(strcmp(guid, result) != 0) info("GUID '%s' and re-generated GUID '%s' differ!", guid, result); #endif /* NETDATA_INTERNAL_CHECKS */ } @@ -74,13 +74,6 @@ static inline char *registry_fix_url(char *url, size_t *len) { // ---------------------------------------------------------------------------- -// forward definition of functions - -extern REGISTRY_PERSON *registry_request_access(char *person_guid, char *machine_guid, char *url, char *name, time_t when); -extern REGISTRY_PERSON *registry_request_delete(char *person_guid, char *machine_guid, char *url, char *delete_url, time_t when); - - -// ---------------------------------------------------------------------------- // HELPERS // verify the person, the machine and the URL exist in our DB diff --git a/src/registry_internals.h b/src/registry_internals.h index 433f04a66..cceaf292b 100644 --- a/src/registry_internals.h +++ b/src/registry_internals.h @@ -6,7 +6,7 @@ #define REGISTRY_URL_FLAGS_DEFAULT 0x00 #define REGISTRY_URL_FLAGS_EXPIRED 0x01 -#define DICTIONARY_FLAGS DICTIONARY_FLAG_VALUE_LINK_DONT_CLONE | DICTIONARY_FLAG_NAME_LINK_DONT_CLONE | DICTIONARY_FLAG_SINGLE_THREADED +#define DICTIONARY_FLAGS (DICTIONARY_FLAG_VALUE_LINK_DONT_CLONE | DICTIONARY_FLAG_NAME_LINK_DONT_CLONE | DICTIONARY_FLAG_SINGLE_THREADED) // ---------------------------------------------------------------------------- // COMMON structures @@ -59,8 +59,6 @@ struct registry { netdata_mutex_t lock; }; -extern int regenerate_guid(const char *guid, char *result); - #include "registry_url.h" #include "registry_machine.h" #include "registry_person.h" @@ -74,7 +72,7 @@ extern REGISTRY_PERSON *registry_request_delete(char *person_guid, char *machine extern REGISTRY_MACHINE *registry_request_machine(char *person_guid, char *machine_guid, char *url, char *request_machine, time_t when); // REGISTRY LOG (in registry_log.c) -extern void registry_log(const char action, REGISTRY_PERSON *p, REGISTRY_MACHINE *m, REGISTRY_URL *u, char *name); +extern void registry_log(char action, REGISTRY_PERSON *p, REGISTRY_MACHINE *m, REGISTRY_URL *u, char *name); extern int registry_log_open(void); extern void registry_log_close(void); extern void registry_log_recreate(void); diff --git a/src/registry_log.c b/src/registry_log.c index 3229a34b4..cca43b09f 100644 --- a/src/registry_log.c +++ b/src/registry_log.c @@ -1,6 +1,6 @@ #include "registry_internals.h" -void registry_log(const char action, REGISTRY_PERSON *p, REGISTRY_MACHINE *m, REGISTRY_URL *u, char *name) { +void registry_log(char action, REGISTRY_PERSON *p, REGISTRY_MACHINE *m, REGISTRY_URL *u, char *name) { if(likely(registry.log_fp)) { if(unlikely(fprintf(registry.log_fp, "%c\t%08x\t%s\t%s\t%s\t%s\n", action, diff --git a/src/registry_person.c b/src/registry_person.c index 409c76925..d8b6cd98a 100644 --- a/src/registry_person.c +++ b/src/registry_person.c @@ -242,7 +242,7 @@ REGISTRY_PERSON_URL *registry_person_link_to_url(REGISTRY_PERSON *p, REGISTRY_MA pu->machine = m; } - if(strcmp(pu->machine_name, name)) { + if(strcmp(pu->machine_name, name) != 0) { // the name of the PERSON_URL has changed ! pu = registry_person_url_reallocate(p, m, u, name, namelen, when, pu); } @@ -14,6 +14,7 @@ int rrd_delete_unupdated_dimensions = 0; int default_rrd_update_every = UPDATE_EVERY; int default_rrd_history_entries = RRD_DEFAULT_HISTORY_ENTRIES; RRD_MEMORY_MODE default_rrd_memory_mode = RRD_MEMORY_MODE_SAVE; +int gap_when_lost_iterations_above = 1; // ---------------------------------------------------------------------------- @@ -9,6 +9,7 @@ extern int default_rrd_update_every; extern int default_rrd_history_entries; +extern int gap_when_lost_iterations_above; #define RRD_ID_LENGTH_MAX 200 @@ -18,6 +19,8 @@ extern int default_rrd_history_entries; typedef long long total_number; #define TOTAL_NUMBER_FORMAT "%lld" +typedef struct rrdhost RRDHOST; + // ---------------------------------------------------------------------------- // chart types @@ -99,18 +102,19 @@ typedef struct rrdfamily RRDFAMILY; // and may lead to missing information. typedef enum rrddim_flags { + RRDDIM_FLAG_NONE = 0, RRDDIM_FLAG_HIDDEN = 1 << 0, // this dimension will not be offered to callers RRDDIM_FLAG_DONT_DETECT_RESETS_OR_OVERFLOWS = 1 << 1 // do not offer RESET or OVERFLOW info to callers } RRDDIM_FLAGS; #ifdef HAVE_C___ATOMIC -#define rrddim_flag_check(rd, flag) (__atomic_load_n(&((rd)->flags), __ATOMIC_SEQ_CST) & flag) -#define rrddim_flag_set(rd, flag) __atomic_or_fetch(&((rd)->flags), flag, __ATOMIC_SEQ_CST) -#define rrddim_flag_clear(rd, flag) __atomic_and_fetch(&((rd)->flags), ~flag, __ATOMIC_SEQ_CST) +#define rrddim_flag_check(rd, flag) (__atomic_load_n(&((rd)->flags), __ATOMIC_SEQ_CST) & (flag)) +#define rrddim_flag_set(rd, flag) __atomic_or_fetch(&((rd)->flags), (flag), __ATOMIC_SEQ_CST) +#define rrddim_flag_clear(rd, flag) __atomic_and_fetch(&((rd)->flags), ~(flag), __ATOMIC_SEQ_CST) #else -#define rrddim_flag_check(rd, flag) ((rd)->flags & flag) -#define rrddim_flag_set(rd, flag) (rd)->flags |= flag -#define rrddim_flag_clear(rd, flag) (rd)->flags &= ~flag +#define rrddim_flag_check(rd, flag) ((rd)->flags & (flag)) +#define rrddim_flag_set(rd, flag) (rd)->flags |= (flag) +#define rrddim_flag_clear(rd, flag) (rd)->flags &= ~(flag) #endif @@ -204,10 +208,10 @@ typedef struct rrddim RRDDIM; // these loop macros make sure the linked list is accessed with the right lock #define rrddim_foreach_read(rd, st) \ - for(rd = st->dimensions, rrdset_check_rdlock(st); rd ; rd = rd->next) + for((rd) = (st)->dimensions, rrdset_check_rdlock(st); (rd) ; (rd) = (rd)->next) #define rrddim_foreach_write(rd, st) \ - for(rd = st->dimensions, rrdset_check_wrlock(st); rd ; rd = rd->next) + for((rd) = (st)->dimensions, rrdset_check_wrlock(st); (rd) ; (rd) = (rd)->next) // ---------------------------------------------------------------------------- @@ -228,17 +232,18 @@ typedef enum rrdset_flags { RRDSET_FLAG_EXPOSED_UPSTREAM = 1 << 6, // if set, we have sent this chart to netdata master (streaming) RRDSET_FLAG_STORE_FIRST = 1 << 7, // if set, do not eliminate the first collection during interpolation RRDSET_FLAG_HETEROGENEOUS = 1 << 8, // if set, the chart is not homogeneous (dimensions in it have multiple algorithms, multipliers or dividers) - RRDSET_FLAG_HOMEGENEOUS_CHECK= 1 << 9 // if set, the chart should be checked to determine if the dimensions as homogeneous + RRDSET_FLAG_HOMEGENEOUS_CHECK= 1 << 9, // if set, the chart should be checked to determine if the dimensions as homogeneous + RRDSET_FLAG_HIDDEN = 1 << 10, // if set, do not show this chart on the dashboard, but use it for backends } RRDSET_FLAGS; #ifdef HAVE_C___ATOMIC -#define rrdset_flag_check(st, flag) (__atomic_load_n(&((st)->flags), __ATOMIC_SEQ_CST) & flag) +#define rrdset_flag_check(st, flag) (__atomic_load_n(&((st)->flags), __ATOMIC_SEQ_CST) & (flag)) #define rrdset_flag_set(st, flag) __atomic_or_fetch(&((st)->flags), flag, __ATOMIC_SEQ_CST) #define rrdset_flag_clear(st, flag) __atomic_and_fetch(&((st)->flags), ~flag, __ATOMIC_SEQ_CST) #else -#define rrdset_flag_check(st, flag) ((st)->flags & flag) -#define rrdset_flag_set(st, flag) (st)->flags |= flag -#define rrdset_flag_clear(st, flag) (st)->flags &= ~flag +#define rrdset_flag_check(st, flag) ((st)->flags & (flag)) +#define rrdset_flag_set(st, flag) (st)->flags |= (flag) +#define rrdset_flag_clear(st, flag) (st)->flags &= ~(flag) #endif struct rrdset { @@ -320,7 +325,7 @@ struct rrdset { total_number last_collected_total; // used internally to calculate percentages RRDFAMILY *rrdfamily; // pointer to RRDFAMILY this chart belongs to - struct rrdhost *rrdhost; // pointer to RRDHOST this chart belongs to + RRDHOST *rrdhost; // pointer to RRDHOST this chart belongs to struct rrdset *next; // linking of rrdsets @@ -359,10 +364,10 @@ typedef struct rrdset RRDSET; // these loop macros make sure the linked list is accessed with the right lock #define rrdset_foreach_read(st, host) \ - for(st = host->rrdset_root, rrdhost_check_rdlock(host); st ; st = st->next) + for((st) = (host)->rrdset_root, rrdhost_check_rdlock(host); st ; (st) = (st)->next) #define rrdset_foreach_write(st, host) \ - for(st = host->rrdset_root, rrdhost_check_wrlock(host); st ; st = st->next) + for((st) = (host)->rrdset_root, rrdhost_check_wrlock(host); st ; (st) = (st)->next) // ---------------------------------------------------------------------------- @@ -374,17 +379,19 @@ typedef struct rrdset RRDSET; typedef enum rrdhost_flags { RRDHOST_FLAG_ORPHAN = 1 << 0, // this host is orphan (not receiving data) RRDHOST_FLAG_DELETE_OBSOLETE_CHARTS = 1 << 1, // delete files of obsolete charts - RRDHOST_FLAG_DELETE_ORPHAN_HOST = 1 << 2 // delete the entire host when orphan + RRDHOST_FLAG_DELETE_ORPHAN_HOST = 1 << 2, // delete the entire host when orphan + RRDHOST_FLAG_BACKEND_SEND = 1 << 3, // send it to backends + RRDHOST_FLAG_BACKEND_DONT_SEND = 1 << 4, // don't send it to backends } RRDHOST_FLAGS; #ifdef HAVE_C___ATOMIC -#define rrdhost_flag_check(host, flag) (__atomic_load_n(&((host)->flags), __ATOMIC_SEQ_CST) & flag) +#define rrdhost_flag_check(host, flag) (__atomic_load_n(&((host)->flags), __ATOMIC_SEQ_CST) & (flag)) #define rrdhost_flag_set(host, flag) __atomic_or_fetch(&((host)->flags), flag, __ATOMIC_SEQ_CST) #define rrdhost_flag_clear(host, flag) __atomic_and_fetch(&((host)->flags), ~flag, __ATOMIC_SEQ_CST) #else -#define rrdhost_flag_check(host, flag) ((host)->flags & flag) -#define rrdhost_flag_set(host, flag) (host)->flags |= flag -#define rrdhost_flag_clear(host, flag) (host)->flags &= ~flag +#define rrdhost_flag_check(host, flag) ((host)->flags & (flag)) +#define rrdhost_flag_set(host, flag) (host)->flags |= (flag) +#define rrdhost_flag_clear(host, flag) (host)->flags &= ~(flag) #endif #ifdef NETDATA_INTERNAL_CHECKS @@ -425,6 +432,8 @@ struct rrdhost { char *cache_dir; // the directory to save RRD cache files char *varlib_dir; // the directory to save health log + char *program_name; // the program name that collects metrics for this host + char *program_version; // the program version that collects metrics for this host // ------------------------------------------------------------------------ // streaming of data to remote hosts - rrdpush @@ -436,7 +445,7 @@ struct rrdhost { // the following are state information for the threading // streaming metrics from this netdata to an upstream netdata volatile int rrdpush_sender_spawn:1; // 1 when the sender thread has been spawn - pthread_t rrdpush_sender_thread; // the sender thread + netdata_thread_t rrdpush_sender_thread; // the sender thread volatile int rrdpush_sender_connected:1; // 1 when the sender is ready to push metrics int rrdpush_sender_socket; // the fd of the socket to the remote host, or -1 @@ -508,7 +517,6 @@ struct rrdhost { struct rrdhost *next; }; -typedef struct rrdhost RRDHOST; extern RRDHOST *localhost; #define rrdhost_rdlock(host) netdata_rwlock_rdlock(&((host)->rrdhost_rwlock)) @@ -519,10 +527,10 @@ extern RRDHOST *localhost; // these loop macros make sure the linked list is accessed with the right lock #define rrdhost_foreach_read(var) \ - for(var = localhost, rrd_check_rdlock(); var ; var = var->next) + for((var) = localhost, rrd_check_rdlock(); var ; (var) = (var)->next) #define rrdhost_foreach_write(var) \ - for(var = localhost, rrd_check_wrlock(); var ; var = var->next) + for((var) = localhost, rrd_check_wrlock(); var ; (var) = (var)->next) // ---------------------------------------------------------------------------- @@ -551,6 +559,8 @@ extern RRDHOST *rrdhost_find_or_create( , const char *os , const char *timezone , const char *tags + , const char *program_name + , const char *program_version , int update_every , long history , RRD_MEMORY_MODE mode @@ -643,7 +653,7 @@ extern void rrdset_is_obsolete(RRDSET *st); extern void rrdset_isnot_obsolete(RRDSET *st); // checks if the RRDSET should be offered to viewers -#define rrdset_is_available_for_viewers(st) (rrdset_flag_check(st, RRDSET_FLAG_ENABLED) && !rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE) && (st)->dimensions && (st)->rrd_memory_mode != RRD_MEMORY_MODE_NONE) +#define rrdset_is_available_for_viewers(st) (rrdset_flag_check(st, RRDSET_FLAG_ENABLED) && !rrdset_flag_check(st, RRDSET_FLAG_HIDDEN) && !rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE) && (st)->dimensions && (st)->rrd_memory_mode != RRD_MEMORY_MODE_NONE) #define rrdset_is_available_for_backends(st) (rrdset_flag_check(st, RRDSET_FLAG_ENABLED) && !rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE) && (st)->dimensions) // get the total duration in seconds of the round robin database diff --git a/src/rrd2json.c b/src/rrd2json.c index 00d7a552f..24b3da340 100644 --- a/src/rrd2json.c +++ b/src/rrd2json.c @@ -128,7 +128,7 @@ void rrd_stats_api_v1_charts(RRDHOST *host, BUFFER *wb) { ",\n\t\"custom_info\": \"%s\"" ",\n\t\"charts\": {" , host->hostname - , program_version + , host->program_version , host->os , host->timezone , host->rrd_update_every @@ -255,13 +255,13 @@ void rrd_stats_api_v1_charts_allmetrics_shell(RRDHOST *host, BUFFER *wb) { if(rd->multiplier < 0 || rd->divisor < 0) n = -n; n = calculated_number_round(n); if(!rrddim_flag_check(rd, RRDDIM_FLAG_HIDDEN)) total += n; - buffer_sprintf(wb, "NETDATA_%s_%s=\"%0.0Lf\" # %s\n", chart, dimension, n, st->units); + buffer_sprintf(wb, "NETDATA_%s_%s=\"" CALCULATED_NUMBER_FORMAT_ZERO "\" # %s\n", chart, dimension, n, st->units); } } } total = calculated_number_round(total); - buffer_sprintf(wb, "NETDATA_%s_VISIBLETOTAL=\"%0.0Lf\" # %s\n", chart, total, st->units); + buffer_sprintf(wb, "NETDATA_%s_VISIBLETOTAL=\"" CALCULATED_NUMBER_FORMAT_ZERO "\" # %s\n", chart, total, st->units); rrdset_unlock(st); } } @@ -284,7 +284,7 @@ void rrd_stats_api_v1_charts_allmetrics_shell(RRDHOST *host, BUFFER *wb) { buffer_sprintf(wb, "NETDATA_ALARM_%s_%s_VALUE=\"\" # %s\n", chart, alarm, rc->units); else { n = calculated_number_round(n); - buffer_sprintf(wb, "NETDATA_ALARM_%s_%s_VALUE=\"%0.0Lf\" # %s\n", chart, alarm, n, rc->units); + buffer_sprintf(wb, "NETDATA_ALARM_%s_%s_VALUE=\"" CALCULATED_NUMBER_FORMAT_ZERO "\" # %s\n", chart, alarm, n, rc->units); } buffer_sprintf(wb, "NETDATA_ALARM_%s_%s_STATUS=\"%s\"\n", chart, alarm, rrdcalc_status2string(rc->status)); @@ -464,47 +464,47 @@ static void rrdr_dump(RRDR *r) void rrdr_disable_not_selected_dimensions(RRDR *r, uint32_t options, const char *dims) { rrdset_check_rdlock(r->st); - if(unlikely(!dims || !*dims)) return; + if(unlikely(!dims || !*dims || (dims[0] == '*' && dims[1] == '\0'))) return; - char b[strlen(dims) + 1]; - char *o = b, *tok; - strcpy(o, dims); + int match_ids = 0, match_names = 0; - long c, dims_selected = 0, dims_not_hidden_not_zero = 0; - RRDDIM *d; + if(unlikely(options & RRDR_OPTION_MATCH_IDS)) + match_ids = 1; + if(unlikely(options & RRDR_OPTION_MATCH_NAMES)) + match_names = 1; - // disable all of them - for(c = 0, d = r->st->dimensions; d ;c++, d = d->next) - r->od[c] |= RRDR_HIDDEN; + if(likely(!match_ids && !match_names)) + match_ids = match_names = 1; - while(o && *o && (tok = mystrsep(&o, ",|"))) { - if(!*tok) continue; - - uint32_t hash = simple_hash(tok); - - // find it and enable it - for(c = 0, d = r->st->dimensions; d ;c++, d = d->next) { - if(unlikely((hash == d->hash && !strcmp(d->id, tok)) || (hash == d->hash_name && !strcmp(d->name, tok)))) { + SIMPLE_PATTERN *pattern = simple_pattern_create(dims, ",|\t\r\n\f\v", SIMPLE_PATTERN_EXACT); - if(likely(r->od[c] & RRDR_HIDDEN)) { - r->od[c] |= RRDR_SELECTED; - r->od[c] &= ~RRDR_HIDDEN; - dims_selected++; - } - - // since the user needs this dimension - // make it appear as NONZERO, to return it - // even if the dimension has only zeros - // unless option non_zero is set - if(likely(!(options & RRDR_OPTION_NONZERO))) - r->od[c] |= RRDR_NONZERO; + RRDDIM *d; + long c, dims_selected = 0, dims_not_hidden_not_zero = 0; + for(c = 0, d = r->st->dimensions; d ;c++, d = d->next) { + if( (match_ids && simple_pattern_matches(pattern, d->id)) + || (match_names && simple_pattern_matches(pattern, d->name)) + ) { + r->od[c] |= RRDR_SELECTED; + if(unlikely(r->od[c] & RRDR_HIDDEN)) r->od[c] &= ~RRDR_HIDDEN; + dims_selected++; + + // since the user needs this dimension + // make it appear as NONZERO, to return it + // even if the dimension has only zeros + // unless option non_zero is set + if(unlikely(!(options & RRDR_OPTION_NONZERO))) + r->od[c] |= RRDR_NONZERO; - // count the visible dimensions - if(likely(r->od[c] & RRDR_NONZERO)) - dims_not_hidden_not_zero++; - } + // count the visible dimensions + if(likely(r->od[c] & RRDR_NONZERO)) + dims_not_hidden_not_zero++; + } + else { + r->od[c] |= RRDR_HIDDEN; + if(unlikely(r->od[c] & RRDR_SELECTED)) r->od[c] &= ~RRDR_SELECTED; } } + simple_pattern_free(pattern); // check if all dimensions are hidden if(unlikely(!dims_not_hidden_not_zero && dims_selected)) { @@ -717,6 +717,23 @@ void rrdr_json_wrapper_begin(RRDR *r, BUFFER *wb, uint32_t format, uint32_t opti i = 0; if(rows) { + calculated_number total = 1; + + if(unlikely(options & RRDR_OPTION_PERCENTAGE)) { + total = 0; + for(c = 0, rd = r->st->dimensions; rd && c < r->d ;c++, rd = rd->next) { + calculated_number *cn = &r->v[ (0) * r->d ]; + calculated_number n = cn[c]; + + if(likely((options & RRDR_OPTION_ABSOLUTE) && n < 0)) + n = -n; + + total += n; + } + // prevent a division by zero + if(total == 0) total = 1; + } + for(c = 0, i = 0, rd = r->st->dimensions; rd && c < r->d ;c++, rd = rd->next) { if(unlikely(r->od[c] & RRDR_HIDDEN)) continue; if(unlikely((options & RRDR_OPTION_NONZERO) && !(r->od[c] & RRDR_NONZERO))) continue; @@ -726,11 +743,23 @@ void rrdr_json_wrapper_begin(RRDR *r, BUFFER *wb, uint32_t format, uint32_t opti calculated_number *cn = &r->v[ (0) * r->d ]; uint8_t *co = &r->o[ (0) * r->d ]; + calculated_number n = cn[c]; - if(co[c] & RRDR_EMPTY) - buffer_strcat(wb, "null"); - else - buffer_rrd_value(wb, cn[c]); + if(co[c] & RRDR_EMPTY) { + if(options & RRDR_OPTION_NULL2ZERO) + buffer_strcat(wb, "0"); + else + buffer_strcat(wb, "null"); + } + else { + if(unlikely((options & RRDR_OPTION_ABSOLUTE) && n < 0)) + n = -n; + + if(unlikely(options & RRDR_OPTION_PERCENTAGE)) + n = n * 100 / total; + + buffer_rrd_value(wb, n); + } } } if(!i) { @@ -963,6 +992,7 @@ static void rrdr2json(RRDR *r, BUFFER *wb, uint32_t options, int datatable) buffer_strcat(wb, post_date); } + int set_min_max = 0; if(unlikely(options & RRDR_OPTION_PERCENTAGE)) { total = 0; for(c = 0, rd = r->st->dimensions; rd && c < r->d ;c++, rd = rd->next) { @@ -975,6 +1005,7 @@ static void rrdr2json(RRDR *r, BUFFER *wb, uint32_t options, int datatable) } // prevent a division by zero if(total == 0) total = 1; + set_min_max = 1; } // for each dimension @@ -999,9 +1030,18 @@ static void rrdr2json(RRDR *r, BUFFER *wb, uint32_t options, int datatable) if(unlikely((options & RRDR_OPTION_ABSOLUTE) && n < 0)) n = -n; - if(unlikely(options & RRDR_OPTION_PERCENTAGE)) + if(unlikely(options & RRDR_OPTION_PERCENTAGE)) { n = n * 100 / total; + if(unlikely(set_min_max)) { + r->min = r->max = n; + set_min_max = 0; + } + + if(n < r->min) r->min = n; + if(n > r->max) r->max = n; + } + buffer_rrd_value(wb, n); } @@ -1078,6 +1118,7 @@ static void rrdr2csv(RRDR *r, BUFFER *wb, uint32_t options, const char *startlin buffer_date(wb, tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec); } + int set_min_max = 0; if(unlikely(options & RRDR_OPTION_PERCENTAGE)) { total = 0; for(c = 0, d = r->st->dimensions; d && c < r->d ;c++, d = d->next) { @@ -1090,6 +1131,7 @@ static void rrdr2csv(RRDR *r, BUFFER *wb, uint32_t options, const char *startlin } // prevent a division by zero if(total == 0) total = 1; + set_min_max = 1; } // for each dimension @@ -1111,9 +1153,18 @@ static void rrdr2csv(RRDR *r, BUFFER *wb, uint32_t options, const char *startlin if(unlikely((options & RRDR_OPTION_ABSOLUTE) && n < 0)) n = -n; - if(unlikely(options & RRDR_OPTION_PERCENTAGE)) + if(unlikely(options & RRDR_OPTION_PERCENTAGE)) { n = n * 100 / total; + if(unlikely(set_min_max)) { + r->min = r->max = n; + set_min_max = 0; + } + + if(n < r->min) r->min = n; + if(n > r->max) r->max = n; + } + buffer_rrd_value(wb, n); } } @@ -1136,6 +1187,7 @@ inline static calculated_number rrdr2value(RRDR *r, long i, uint32_t options, in int all_null = 1, init = 1; calculated_number total = 1; + int set_min_max = 0; if(unlikely(options & RRDR_OPTION_PERCENTAGE)) { total = 0; for(c = 0, d = r->st->dimensions; d && c < r->d ;c++, d = d->next) { @@ -1148,6 +1200,7 @@ inline static calculated_number rrdr2value(RRDR *r, long i, uint32_t options, in } // prevent a division by zero if(total == 0) total = 1; + set_min_max = 1; } // for each dimension @@ -1160,9 +1213,18 @@ inline static calculated_number rrdr2value(RRDR *r, long i, uint32_t options, in if(likely((options & RRDR_OPTION_ABSOLUTE) && n < 0)) n = -n; - if(unlikely(options & RRDR_OPTION_PERCENTAGE)) + if(unlikely(options & RRDR_OPTION_PERCENTAGE)) { n = n * 100 / total; + if(unlikely(set_min_max)) { + r->min = r->max = n; + set_min_max = 0; + } + + if(n < r->min) r->min = n; + if(n > r->max) r->max = n; + } + if(unlikely(init)) { if(n > 0) { min = 0; @@ -1351,9 +1413,11 @@ static RRDR *rrdr_create(RRDSET *st, long n) return r; } -RRDR *rrd2rrdr(RRDSET *st, long points, long long after, long long before, int group_method, int aligned) +RRDR *rrd2rrdr(RRDSET *st, long points, long long after, long long before, int group_method, long group_time, int aligned) { +#ifdef NETDATA_INTERNAL_CHECKS int debug = rrdset_flag_check(st, RRDSET_FLAG_DEBUG)?1:0; +#endif int absolute_period_requested = -1; time_t first_entry_t = rrdset_first_entry_t(st); @@ -1414,20 +1478,51 @@ RRDR *rrd2rrdr(RRDSET *st, long points, long long after, long long before, int g if(duration <= 0 || available_points <= 0) return rrdr_create(st, 1); - // check the wanted points - if(points < 0) points = -points; - if(points > available_points) points = available_points; - if(points == 0) points = available_points; + // check the number of wanted points in the result + if(unlikely(points < 0)) points = -points; + if(unlikely(points > available_points)) points = available_points; + if(unlikely(points == 0)) points = available_points; - // calculate proper grouping of source data + // calculate the desired grouping of source data points long group = available_points / points; - if(group <= 0) group = 1; + if(unlikely(group <= 0)) group = 1; + if(unlikely(available_points % points > points / 2)) group++; // rounding to the closest integer - // round group to the closest integer - if(available_points % points > points / 2) group++; + // group_time enforces a certain grouping multiple + calculated_number group_sum_divisor = 1.0; + long group_points = 1; + if(unlikely(group_time > st->update_every)) { + if (unlikely(group_time > duration)) { + // group_time is above the available duration - time_t after_new = (aligned) ? (after - (after % (group * st->update_every))) : after; - time_t before_new = (aligned) ? (before - (before % (group * st->update_every))) : before; + #ifdef NETDATA_INTERNAL_CHECKS + info("INTERNAL CHECK: %s: requested gtime %ld secs, is greater than the desired duration %ld secs", st->id, group_time, duration); + #endif + + group = points; // use all the points + } + else { + // the points we should group to satisfy gtime + group_points = group_time / st->update_every; + if(unlikely(group_time % group_points)) { + #ifdef NETDATA_INTERNAL_CHECKS + info("INTERNAL CHECK: %s: requested gtime %ld secs, is not a multiple of the chart's data collection frequency %d secs", st->id, group_time, st->update_every); + #endif + + group_points++; + } + + // adapt group according to group_points + if(unlikely(group < group_points)) group = group_points; // do not allow grouping below the desired one + if(unlikely(group % group_points)) group += group_points - (group % group_points); // make sure group is multiple of group_points + + //group_sum_divisor = group / group_points; + group_sum_divisor = (calculated_number)(group * st->update_every) / (calculated_number)group_time; + } + } + + time_t after_new = after - (after % ( ((aligned)?group:1) * st->update_every )); + time_t before_new = before - (before % ( ((aligned)?group:1) * st->update_every )); long points_new = (before_new - after_new) / st->update_every / group; // find the starting and ending slots in our round robin db @@ -1435,27 +1530,32 @@ RRDR *rrd2rrdr(RRDSET *st, long points, long long after, long long before, int g stop_at_slot = rrdset_time2slot(st, after_new); #ifdef NETDATA_INTERNAL_CHECKS - if(after_new < first_entry_t) { - error("after_new %u is too small, minimum %u", (uint32_t)after_new, (uint32_t)first_entry_t); - } - if(after_new > last_entry_t) { - error("after_new %u is too big, maximum %u", (uint32_t)after_new, (uint32_t)last_entry_t); - } - if(before_new < first_entry_t) { - error("before_new %u is too small, minimum %u", (uint32_t)before_new, (uint32_t)first_entry_t); - } - if(before_new > last_entry_t) { - error("before_new %u is too big, maximum %u", (uint32_t)before_new, (uint32_t)last_entry_t); - } - if(start_at_slot < 0 || start_at_slot >= st->entries) { - error("start_at_slot is invalid %ld, expected 0 to %ld", start_at_slot, st->entries - 1); - } - if(stop_at_slot < 0 || stop_at_slot >= st->entries) { - error("stop_at_slot is invalid %ld, expected 0 to %ld", stop_at_slot, st->entries - 1); - } - if(points_new > (before_new - after_new) / group / st->update_every + 1) { - error("points_new %ld is more than points %ld", points_new, (before_new - after_new) / group / st->update_every + 1); - } + if(after_new < first_entry_t) + error("INTERNAL CHECK: after_new %u is too small, minimum %u", (uint32_t)after_new, (uint32_t)first_entry_t); + + if(after_new > last_entry_t) + error("INTERNAL CHECK: after_new %u is too big, maximum %u", (uint32_t)after_new, (uint32_t)last_entry_t); + + if(before_new < first_entry_t) + error("INTERNAL CHECK: before_new %u is too small, minimum %u", (uint32_t)before_new, (uint32_t)first_entry_t); + + if(before_new > last_entry_t) + error("INTERNAL CHECK: before_new %u is too big, maximum %u", (uint32_t)before_new, (uint32_t)last_entry_t); + + if(start_at_slot < 0 || start_at_slot >= st->entries) + error("INTERNAL CHECK: start_at_slot is invalid %ld, expected 0 to %ld", start_at_slot, st->entries - 1); + + if(stop_at_slot < 0 || stop_at_slot >= st->entries) + error("INTERNAL CHECK: stop_at_slot is invalid %ld, expected 0 to %ld", stop_at_slot, st->entries - 1); + + if(points_new > (before_new - after_new) / group / st->update_every + 1) + error("INTERNAL CHECK: points_new %ld is more than points %ld", points_new, (before_new - after_new) / group / st->update_every + 1); + + if(group < group_points) + error("INTERNAL CHECK: group %ld is less than the desired group points %ld", group, group_points); + + if(group > group_points && group % group_points) + error("INTERNAL CHECK: group %ld is not a multiple of the desired group points %ld", group, group_points); #endif //info("RRD2RRDR(): %s: wanted %ld points, got %ld - group=%ld, wanted duration=%u, got %u - wanted %ld - %ld, got %ld - %ld", st->id, points, points_new, group, before - after, before_new - after_new, after, before, after_new, before_new); @@ -1478,20 +1578,21 @@ RRDR *rrd2rrdr(RRDSET *st, long points, long long after, long long before, int g // initialize our result set RRDR *r = rrdr_create(st, points); - if(!r) { + if(unlikely(!r)) { #ifdef NETDATA_INTERNAL_CHECKS - error("Cannot create RRDR for %s, after=%u, before=%u, duration=%u, points=%ld", st->id, (uint32_t)after, (uint32_t)before, (uint32_t)duration, points); + error("INTERNAL CHECK: Cannot create RRDR for %s, after=%u, before=%u, duration=%u, points=%ld", st->id, (uint32_t)after, (uint32_t)before, (uint32_t)duration, points); #endif return NULL; } - if(!r->d) { + + if(unlikely(!r->d)) { #ifdef NETDATA_INTERNAL_CHECKS - error("Returning empty RRDR (no dimensions in RRDSET) for %s, after=%u, before=%u, duration=%u, points=%ld", st->id, (uint32_t)after, (uint32_t)before, (uint32_t)duration, points); + error("INTERNAL CHECK: Returning empty RRDR (no dimensions in RRDSET) for %s, after=%u, before=%u, duration=%u, points=%ld", st->id, (uint32_t)after, (uint32_t)before, (uint32_t)duration, points); #endif return r; } - if(absolute_period_requested == 1) + if(unlikely(absolute_period_requested == 1)) r->result_options |= RRDR_RESULT_OPTION_ABSOLUTE; else r->result_options |= RRDR_RESULT_OPTION_RELATIVE; @@ -1502,8 +1603,8 @@ RRDR *rrd2rrdr(RRDSET *st, long points, long long after, long long before, int g // ------------------------------------------------------------------------- // checks for debugging - - if(debug) debug(D_RRD_STATS, "INFO %s first_t: %u, last_t: %u, all_duration: %u, after: %u, before: %u, duration: %u, points: %ld, group: %ld" +#ifdef NETDATA_INTERNAL_CHECKS + if(debug) debug(D_RRD_STATS, "INFO %s first_t: %u, last_t: %u, all_duration: %u, after: %u, before: %u, duration: %u, points: %ld, group: %ld, group_points: %ld" , st->id , (uint32_t)first_entry_t , (uint32_t)last_entry_t @@ -1513,8 +1614,9 @@ RRDR *rrd2rrdr(RRDSET *st, long points, long long after, long long before, int g , (uint32_t)duration , points , group + , group_points ); - +#endif // ------------------------------------------------------------------------- // temp arrays for keeping values per dimension @@ -1546,6 +1648,7 @@ RRDR *rrd2rrdr(RRDSET *st, long points, long long after, long long before, int g dt = st->update_every, group_start_t = 0; +#ifdef NETDATA_INTERNAL_CHECKS if(unlikely(debug)) debug(D_RRD_STATS, "BEGIN %s after_t: %u (stop_at_t: %ld), before_t: %u (start_at_t: %ld), start_t(now): %u, current_entry: %ld, entries: %ld" , st->id , (uint32_t)after @@ -1556,6 +1659,7 @@ RRDR *rrd2rrdr(RRDSET *st, long points, long long after, long long before, int g , st->current_entry , st->entries ); +#endif r->group = group; r->update_every = (int)group * st->update_every; @@ -1569,6 +1673,7 @@ RRDR *rrd2rrdr(RRDSET *st, long points, long long after, long long before, int g if(unlikely(slot < 0)) slot = st->entries - 1; if(unlikely(slot == stop_at_slot)) stop_now = counter; +#ifdef NETDATA_INTERNAL_CHECKS if(unlikely(debug)) debug(D_RRD_STATS, "ROW %s slot: %ld, entries_counter: %ld, group_count: %ld, added: %ld, now: %ld, %s %s" , st->id , slot @@ -1579,14 +1684,13 @@ RRDR *rrd2rrdr(RRDSET *st, long points, long long after, long long before, int g , (group_count + 1 == group)?"PRINT":" - " , (now >= after && now <= before)?"RANGE":" - " ); +#endif // make sure we return data in the proper time range if(unlikely(now > before)) continue; if(unlikely(now < after)) break; - if(unlikely(group_count == 0)) { - group_start_t = now; - } + if(unlikely(group_count == 0)) group_start_t = now; group_count++; if(unlikely(group_count == group)) { @@ -1684,7 +1788,11 @@ RRDR *rrd2rrdr(RRDSET *st, long points, long long after, long long before, int g default: case GROUP_AVERAGE: case GROUP_UNDEFINED: - cn[c] = group_values[c] / group_counts[c]; + if(unlikely(group_points != 1)) + cn[c] = group_values[c] / group_sum_divisor; + else + cn[c] = group_values[c] / group_counts[c]; + group_values[c] = 0; break; } @@ -1719,12 +1827,13 @@ int rrdset2value_api_v1( , long long after , long long before , int group_method + , long group_time , uint32_t options , time_t *db_after , time_t *db_before , int *value_is_null ) { - RRDR *r = rrd2rrdr(st, points, after, before, group_method, !(options & RRDR_OPTION_NOT_ALIGNED)); + RRDR *r = rrd2rrdr(st, points, after, before, group_method, group_time, !(options & RRDR_OPTION_NOT_ALIGNED)); if(!r) { if(value_is_null) *value_is_null = 1; return 500; @@ -1740,10 +1849,12 @@ int rrdset2value_api_v1( return 400; } - if(r->result_options & RRDR_RESULT_OPTION_RELATIVE) - buffer_no_cacheable(wb); - else if(r->result_options & RRDR_RESULT_OPTION_ABSOLUTE) - buffer_cacheable(wb); + if(wb) { + if (r->result_options & RRDR_RESULT_OPTION_RELATIVE) + buffer_no_cacheable(wb); + else if (r->result_options & RRDR_RESULT_OPTION_ABSOLUTE) + buffer_cacheable(wb); + } options = rrdr_check_options(r, options, dimensions); @@ -1769,12 +1880,13 @@ int rrdset2anything_api_v1( , long long after , long long before , int group_method + , long group_time , uint32_t options , time_t *latest_timestamp ) { st->last_accessed_time = now_realtime_sec(); - RRDR *r = rrd2rrdr(st, points, after, before, group_method, !(options & RRDR_OPTION_NOT_ALIGNED)); + RRDR *r = rrd2rrdr(st, points, after, before, group_method, group_time, !(options & RRDR_OPTION_NOT_ALIGNED)); if(!r) { buffer_strcat(wb, "Cannot generate output with these parameters on this chart."); return 500; diff --git a/src/rrd2json.h b/src/rrd2json.h index 7212c0b3d..b41c814ec 100644 --- a/src/rrd2json.h +++ b/src/rrd2json.h @@ -6,7 +6,7 @@ #define API_RELATIVE_TIME_MAX (3 * 365 * 86400) // type of JSON generations -#define DATASOURCE_INVALID -1 +#define DATASOURCE_INVALID (-1) #define DATASOURCE_JSON 0 #define DATASOURCE_DATATABLE_JSON 1 #define DATASOURCE_DATATABLE_JSONP 2 @@ -62,6 +62,8 @@ #define RRDR_OPTION_PERCENTAGE 0x00000800 // give values as percentage of total #define RRDR_OPTION_NOT_ALIGNED 0x00001000 // do not align charts for persistant timeframes #define RRDR_OPTION_DISPLAY_ABS 0x00002000 // for badges, display the absolute value, but calculate colors with sign +#define RRDR_OPTION_MATCH_IDS 0x00004000 // when filtering dimensions, match only IDs +#define RRDR_OPTION_MATCH_NAMES 0x00008000 // when filtering dimensions, match only names extern void rrd_stats_api_v1_chart(RRDSET *st, BUFFER *wb); extern void rrd_stats_api_v1_charts(RRDHOST *host, BUFFER *wb); @@ -70,11 +72,11 @@ extern void rrd_stats_api_v1_charts_allmetrics_json(RRDHOST *host, BUFFER *wb); extern void rrd_stats_api_v1_charts_allmetrics_shell(RRDHOST *host, BUFFER *wb); extern int rrdset2anything_api_v1(RRDSET *st, BUFFER *out, BUFFER *dimensions, uint32_t format, long points - , long long after, long long before, int group_method, uint32_t options + , long long after, long long before, int group_method, long group_time, uint32_t options , time_t *latest_timestamp); extern int rrdset2value_api_v1(RRDSET *st, BUFFER *wb, calculated_number *n, const char *dimensions, long points - , long long after, long long before, int group_method, uint32_t options - , time_t *db_before, time_t *db_after, int *value_is_null); + , long long after, long long before, int group_method, long group_time, uint32_t options + , time_t *db_after, time_t *db_before, int *value_is_null); #endif /* NETDATA_RRD2JSON_H */ diff --git a/src/rrdcalc.c b/src/rrdcalc.c index 4177733b0..4e41539e2 100644 --- a/src/rrdcalc.c +++ b/src/rrdcalc.c @@ -34,7 +34,9 @@ inline const char *rrdcalc_status2string(RRDCALC_STATUS status) { } static void rrdsetcalc_link(RRDSET *st, RRDCALC *rc) { - debug(D_HEALTH, "Health linking alarm '%s.%s' to chart '%s' of host '%s'", rc->chart?rc->chart:"NOCHART", rc->name, st->id, st->rrdhost->hostname); + RRDHOST *host = st->rrdhost; + + debug(D_HEALTH, "Health linking alarm '%s.%s' to chart '%s' of host '%s'", rc->chart?rc->chart:"NOCHART", rc->name, st->id, host->hostname); rc->last_status_change = now_realtime_sec(); rc->rrdset = st; @@ -53,12 +55,12 @@ static void rrdsetcalc_link(RRDSET *st, RRDCALC *rc) { } if(!isnan(rc->green) && isnan(st->green)) { - debug(D_HEALTH, "Health alarm '%s.%s' green threshold set from %Lf to %Lf.", rc->rrdset->id, rc->name, rc->rrdset->green, rc->green); + debug(D_HEALTH, "Health alarm '%s.%s' green threshold set from " CALCULATED_NUMBER_FORMAT_AUTO " to " CALCULATED_NUMBER_FORMAT_AUTO ".", rc->rrdset->id, rc->name, rc->rrdset->green, rc->green); st->green = rc->green; } if(!isnan(rc->red) && isnan(st->red)) { - debug(D_HEALTH, "Health alarm '%s.%s' red threshold set from %Lf to %Lf.", rc->rrdset->id, rc->name, rc->rrdset->red, rc->red); + debug(D_HEALTH, "Health alarm '%s.%s' red threshold set from " CALCULATED_NUMBER_FORMAT_AUTO " to " CALCULATED_NUMBER_FORMAT_AUTO ".", rc->rrdset->id, rc->name, rc->rrdset->red, rc->red); st->red = rc->red; } @@ -67,17 +69,17 @@ static void rrdsetcalc_link(RRDSET *st, RRDCALC *rc) { char fullname[RRDVAR_MAX_LENGTH + 1]; snprintfz(fullname, RRDVAR_MAX_LENGTH, "%s.%s", st->id, rc->name); - rc->hostid = rrdvar_create_and_index("host", &st->rrdhost->rrdvar_root_index, fullname, RRDVAR_TYPE_CALCULATED, &rc->value); + rc->hostid = rrdvar_create_and_index("host", &host->rrdvar_root_index, fullname, RRDVAR_TYPE_CALCULATED, &rc->value); snprintfz(fullname, RRDVAR_MAX_LENGTH, "%s.%s", st->name, rc->name); - rc->hostname = rrdvar_create_and_index("host", &st->rrdhost->rrdvar_root_index, fullname, RRDVAR_TYPE_CALCULATED, &rc->value); + rc->hostname = rrdvar_create_and_index("host", &host->rrdvar_root_index, fullname, RRDVAR_TYPE_CALCULATED, &rc->value); if(!rc->units) rc->units = strdupz(st->units); { time_t now = now_realtime_sec(); health_alarm_log( - st->rrdhost, + host, rc->id, rc->next_event_id++, now, @@ -110,10 +112,11 @@ static inline int rrdcalc_is_matching_this_rrdset(RRDCALC *rc, RRDSET *st) { // this has to be called while the RRDHOST is locked inline void rrdsetcalc_link_matching(RRDSET *st) { + RRDHOST *host = st->rrdhost; // debug(D_HEALTH, "find matching alarms for chart '%s'", st->id); RRDCALC *rc; - for(rc = st->rrdhost->alarms; rc ; rc = rc->next) { + for(rc = host->alarms; rc ; rc = rc->next) { if(unlikely(rc->rrdset)) continue; @@ -132,10 +135,12 @@ inline void rrdsetcalc_unlink(RRDCALC *rc) { return; } + RRDHOST *host = st->rrdhost; + { time_t now = now_realtime_sec(); health_alarm_log( - st->rrdhost, + host, rc->id, rc->next_event_id++, now, @@ -157,8 +162,6 @@ inline void rrdsetcalc_unlink(RRDCALC *rc) { ); } - RRDHOST *host = st->rrdhost; - debug(D_HEALTH, "Health unlinking alarm '%s.%s' from chart '%s' of host '%s'", rc->chart?rc->chart:"NOCHART", rc->name, st->id, host->hostname); // unlink it @@ -173,16 +176,16 @@ inline void rrdsetcalc_unlink(RRDCALC *rc) { rc->rrdset_prev = rc->rrdset_next = NULL; - rrdvar_free(st->rrdhost, &st->rrdvar_root_index, rc->local); + rrdvar_free(host, &st->rrdvar_root_index, rc->local); rc->local = NULL; - rrdvar_free(st->rrdhost, &st->rrdfamily->rrdvar_root_index, rc->family); + rrdvar_free(host, &st->rrdfamily->rrdvar_root_index, rc->family); rc->family = NULL; - rrdvar_free(st->rrdhost, &st->rrdhost->rrdvar_root_index, rc->hostid); + rrdvar_free(host, &host->rrdvar_root_index, rc->hostid); rc->hostid = NULL; - rrdvar_free(st->rrdhost, &st->rrdhost->rrdvar_root_index, rc->hostname); + rrdvar_free(host, &host->rrdvar_root_index, rc->hostname); rc->hostname = NULL; rc->rrdset = NULL; @@ -348,7 +351,7 @@ inline RRDCALC *rrdcalc_create(RRDHOST *host, RRDCALCTEMPLATE *rt, const char *c error("Health alarm '%s.%s': failed to re-parse critical expression '%s'", chart, rt->name, rt->critical->source); } - debug(D_HEALTH, "Health runtime added alarm '%s.%s': exec '%s', recipient '%s', green %Lf, red %Lf, lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f", + debug(D_HEALTH, "Health runtime added alarm '%s.%s': exec '%s', recipient '%s', green " CALCULATED_NUMBER_FORMAT_AUTO ", red " CALCULATED_NUMBER_FORMAT_AUTO ", lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f", (rc->chart)?rc->chart:"NOCHART", rc->name, (rc->exec)?rc->exec:"DEFAULT", diff --git a/src/rrdcalctemplate.c b/src/rrdcalctemplate.c index 4ec24cd21..75a7002b3 100644 --- a/src/rrdcalctemplate.c +++ b/src/rrdcalctemplate.c @@ -5,14 +5,15 @@ // RRDCALCTEMPLATE management void rrdcalctemplate_link_matching(RRDSET *st) { + RRDHOST *host = st->rrdhost; RRDCALCTEMPLATE *rt; - for(rt = st->rrdhost->templates; rt ; rt = rt->next) { + for(rt = host->templates; rt ; rt = rt->next) { if(rt->hash_context == st->hash_context && !strcmp(rt->context, st->context) && (!rt->family_pattern || simple_pattern_matches(rt->family_pattern, st->family))) { - RRDCALC *rc = rrdcalc_create(st->rrdhost, rt, st->id); + RRDCALC *rc = rrdcalc_create(host, rt, st->id); if(unlikely(!rc)) - info("Health tried to create alarm from template '%s' on chart '%s' of host '%s', but it failed", rt->name, st->id, st->rrdhost->hostname); + info("Health tried to create alarm from template '%s' on chart '%s' of host '%s', but it failed", rt->name, st->id, host->hostname); #ifdef NETDATA_INTERNAL_CHECKS else if(rc->rrdset != st) diff --git a/src/rrddim.c b/src/rrddim.c index 6477a1cbe..a54c6452f 100644 --- a/src/rrddim.c +++ b/src/rrddim.c @@ -99,6 +99,7 @@ RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collecte return rd; } + RRDHOST *host = st->rrdhost; char filename[FILENAME_MAX + 1]; char fullfilename[FILENAME_MAX + 1]; @@ -247,7 +248,7 @@ RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collecte info("Dimension '%s' added on chart '%s' of host '%s' is not homogeneous to other dimensions already present (algorithm is '%s' vs '%s', multiplier is " COLLECTED_NUMBER_FORMAT " vs " COLLECTED_NUMBER_FORMAT ", divisor is " COLLECTED_NUMBER_FORMAT " vs " COLLECTED_NUMBER_FORMAT ").", rd->name, st->name, - st->rrdhost->hostname, + host->hostname, rrd_algorithm_name(rd->algorithm), rrd_algorithm_name(td->algorithm), rd->multiplier, td->multiplier, rd->divisor, td->divisor @@ -261,7 +262,7 @@ RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collecte td->next = rd; } - if(st->rrdhost->health_enabled) { + if(host->health_enabled) { rrddimvar_create(rd, RRDVAR_TYPE_CALCULATED, NULL, NULL, &rd->last_stored_value, RRDVAR_OPTION_DEFAULT); rrddimvar_create(rd, RRDVAR_TYPE_COLLECTED, NULL, "_raw", &rd->last_collected_value, RRDVAR_OPTION_DEFAULT); rrddimvar_create(rd, RRDVAR_TYPE_TIME_T, NULL, "_last_collected_t", &rd->last_collected_time.tv_sec, RRDVAR_OPTION_DEFAULT); @@ -330,9 +331,11 @@ void rrddim_free(RRDSET *st, RRDDIM *rd) int rrddim_hide(RRDSET *st, const char *id) { debug(D_RRD_CALLS, "rrddim_hide() for chart %s, dimension %s", st->name, id); + RRDHOST *host = st->rrdhost; + RRDDIM *rd = rrddim_find(st, id); if(unlikely(!rd)) { - error("Cannot find dimension with id '%s' on stats '%s' (%s) on host '%s'.", id, st->name, st->id, st->rrdhost->hostname); + error("Cannot find dimension with id '%s' on stats '%s' (%s) on host '%s'.", id, st->name, st->id, host->hostname); return 1; } @@ -343,9 +346,10 @@ int rrddim_hide(RRDSET *st, const char *id) { int rrddim_unhide(RRDSET *st, const char *id) { debug(D_RRD_CALLS, "rrddim_unhide() for chart %s, dimension %s", st->name, id); + RRDHOST *host = st->rrdhost; RRDDIM *rd = rrddim_find(st, id); if(unlikely(!rd)) { - error("Cannot find dimension with id '%s' on stats '%s' (%s) on host '%s'.", id, st->name, st->id, st->rrdhost->hostname); + error("Cannot find dimension with id '%s' on stats '%s' (%s) on host '%s'.", id, st->name, st->id, host->hostname); return 1; } @@ -372,9 +376,10 @@ inline collected_number rrddim_set_by_pointer(RRDSET *st, RRDDIM *rd, collected_ } collected_number rrddim_set(RRDSET *st, const char *id, collected_number value) { + RRDHOST *host = st->rrdhost; RRDDIM *rd = rrddim_find(st, id); if(unlikely(!rd)) { - error("Cannot find dimension with id '%s' on stats '%s' (%s) on host '%s'.", id, st->name, st->id, st->rrdhost->hostname); + error("Cannot find dimension with id '%s' on stats '%s' (%s) on host '%s'.", id, st->name, st->id, host->hostname); return 0; } diff --git a/src/rrddimvar.c b/src/rrddimvar.c index 0d20815bf..28a3e7fa6 100644 --- a/src/rrddimvar.c +++ b/src/rrddimvar.c @@ -10,41 +10,42 @@ static inline void rrddimvar_free_variables(RRDDIMVAR *rs) { RRDDIM *rd = rs->rrddim; RRDSET *st = rd->rrdset; + RRDHOST *host = st->rrdhost; // CHART VARIABLES FOR THIS DIMENSION - rrdvar_free(st->rrdhost, &st->rrdvar_root_index, rs->var_local_id); + rrdvar_free(host, &st->rrdvar_root_index, rs->var_local_id); rs->var_local_id = NULL; - rrdvar_free(st->rrdhost, &st->rrdvar_root_index, rs->var_local_name); + rrdvar_free(host, &st->rrdvar_root_index, rs->var_local_name); rs->var_local_name = NULL; // FAMILY VARIABLES FOR THIS DIMENSION - rrdvar_free(st->rrdhost, &st->rrdfamily->rrdvar_root_index, rs->var_family_id); + rrdvar_free(host, &st->rrdfamily->rrdvar_root_index, rs->var_family_id); rs->var_family_id = NULL; - rrdvar_free(st->rrdhost, &st->rrdfamily->rrdvar_root_index, rs->var_family_name); + rrdvar_free(host, &st->rrdfamily->rrdvar_root_index, rs->var_family_name); rs->var_family_name = NULL; - rrdvar_free(st->rrdhost, &st->rrdfamily->rrdvar_root_index, rs->var_family_contextid); + rrdvar_free(host, &st->rrdfamily->rrdvar_root_index, rs->var_family_contextid); rs->var_family_contextid = NULL; - rrdvar_free(st->rrdhost, &st->rrdfamily->rrdvar_root_index, rs->var_family_contextname); + rrdvar_free(host, &st->rrdfamily->rrdvar_root_index, rs->var_family_contextname); rs->var_family_contextname = NULL; // HOST VARIABLES FOR THIS DIMENSION - rrdvar_free(st->rrdhost, &st->rrdhost->rrdvar_root_index, rs->var_host_chartidid); + rrdvar_free(host, &host->rrdvar_root_index, rs->var_host_chartidid); rs->var_host_chartidid = NULL; - rrdvar_free(st->rrdhost, &st->rrdhost->rrdvar_root_index, rs->var_host_chartidname); + rrdvar_free(host, &host->rrdvar_root_index, rs->var_host_chartidname); rs->var_host_chartidname = NULL; - rrdvar_free(st->rrdhost, &st->rrdhost->rrdvar_root_index, rs->var_host_chartnameid); + rrdvar_free(host, &host->rrdvar_root_index, rs->var_host_chartnameid); rs->var_host_chartnameid = NULL; - rrdvar_free(st->rrdhost, &st->rrdhost->rrdvar_root_index, rs->var_host_chartnamename); + rrdvar_free(host, &host->rrdvar_root_index, rs->var_host_chartnamename); rs->var_host_chartnamename = NULL; // KEYS @@ -79,6 +80,7 @@ static inline void rrddimvar_create_variables(RRDDIMVAR *rs) { RRDDIM *rd = rs->rrddim; RRDSET *st = rd->rrdset; + RRDHOST *host = st->rrdhost; char buffer[RRDDIMVAR_ID_MAX + 1]; @@ -141,10 +143,10 @@ static inline void rrddimvar_create_variables(RRDDIMVAR *rs) { // - $chart-name.id // - $chart-name.name - rs->var_host_chartidid = rrdvar_create_and_index("host", &st->rrdhost->rrdvar_root_index, rs->key_fullidid, rs->type, rs->value); - rs->var_host_chartidname = rrdvar_create_and_index("host", &st->rrdhost->rrdvar_root_index, rs->key_fullidname, rs->type, rs->value); - rs->var_host_chartnameid = rrdvar_create_and_index("host", &st->rrdhost->rrdvar_root_index, rs->key_fullnameid, rs->type, rs->value); - rs->var_host_chartnamename = rrdvar_create_and_index("host", &st->rrdhost->rrdvar_root_index, rs->key_fullnamename, rs->type, rs->value); + rs->var_host_chartidid = rrdvar_create_and_index("host", &host->rrdvar_root_index, rs->key_fullidid, rs->type, rs->value); + rs->var_host_chartidname = rrdvar_create_and_index("host", &host->rrdvar_root_index, rs->key_fullidname, rs->type, rs->value); + rs->var_host_chartnameid = rrdvar_create_and_index("host", &host->rrdvar_root_index, rs->key_fullnameid, rs->type, rs->value); + rs->var_host_chartnamename = rrdvar_create_and_index("host", &host->rrdvar_root_index, rs->key_fullnamename, rs->type, rs->value); } RRDDIMVAR *rrddimvar_create(RRDDIM *rd, RRDVAR_TYPE type, const char *prefix, const char *suffix, void *value, RRDVAR_OPTIONS options) { diff --git a/src/rrdhost.c b/src/rrdhost.c index 831cc46c8..e62e61ae8 100644 --- a/src/rrdhost.c +++ b/src/rrdhost.c @@ -111,6 +111,8 @@ RRDHOST *rrdhost_create(const char *hostname, const char *os, const char *timezone, const char *tags, + const char *program_name, + const char *program_version, int update_every, long entries, RRD_MEMORY_MODE memory_mode, @@ -146,6 +148,9 @@ RRDHOST *rrdhost_create(const char *hostname, rrdhost_init_os(host, os); rrdhost_init_timezone(host, timezone); rrdhost_init_tags(host, tags); + + host->program_name = strdupz((program_name && *program_name)?program_name:"unknown"); + host->program_version = strdupz((program_version && *program_version)?program_version:"unknown"); host->registry_hostname = strdupz((registry_hostname && *registry_hostname)?registry_hostname:hostname); avl_init_lock(&(host->rrdset_root_index), rrdset_compare); @@ -265,6 +270,8 @@ RRDHOST *rrdhost_create(const char *hostname, ", os '%s'" ", timezone '%s'" ", tags '%s'" + ", program_name '%s'" + ", program_version '%s'" ", update every %d" ", memory mode %s" ", history entries %ld" @@ -282,6 +289,8 @@ RRDHOST *rrdhost_create(const char *hostname, , host->os , host->timezone , (host->tags)?host->tags:"" + , host->program_name + , host->program_version , host->rrd_update_every , rrd_memory_mode_name(host->rrd_memory_mode) , host->rrd_history_entries @@ -309,6 +318,8 @@ RRDHOST *rrdhost_find_or_create( , const char *os , const char *timezone , const char *tags + , const char *program_name + , const char *program_version , int update_every , long history , RRD_MEMORY_MODE mode @@ -329,6 +340,8 @@ RRDHOST *rrdhost_find_or_create( , os , timezone , tags + , program_name + , program_version , update_every , history , mode @@ -342,13 +355,28 @@ RRDHOST *rrdhost_find_or_create( else { host->health_enabled = health_enabled; - if(strcmp(host->hostname, hostname)) { + if(strcmp(host->hostname, hostname) != 0) { + info("Host '%s' has been renamed to '%s'. If this is not intentional it may mean multiple hosts are using the same machine_guid.", host->hostname, hostname); char *t = host->hostname; host->hostname = strdupz(hostname); host->hash_hostname = simple_hash(host->hostname); freez(t); } + if(strcmp(host->program_name, program_name) != 0) { + info("Host '%s' switched program name from '%s' to '%s'", host->hostname, host->program_name, program_name); + char *t = host->program_name; + host->program_name = strdupz(program_name); + freez(t); + } + + if(strcmp(host->program_version, program_version) != 0) { + info("Host '%s' switched program version from '%s' to '%s'", host->hostname, host->program_version, program_version); + char *t = host->program_version; + host->program_version = strdupz(program_version); + freez(t); + } + if(host->rrd_update_every != update_every) error("Host '%s' has an update frequency of %d seconds, but the wanted one is %d seconds. Restart netdata here to apply the new settings.", host->hostname, host->rrd_update_every, update_every); @@ -407,6 +435,9 @@ restart_after_removal: void rrd_init(char *hostname) { rrdset_free_obsolete_time = config_get_number(CONFIG_SECTION_GLOBAL, "cleanup obsolete charts after seconds", rrdset_free_obsolete_time); + gap_when_lost_iterations_above = (int)config_get_number(CONFIG_SECTION_GLOBAL, "gap when lost iterations above", gap_when_lost_iterations_above); + if(gap_when_lost_iterations_above < 1) + gap_when_lost_iterations_above = 1; health_init(); registry_init(); @@ -421,6 +452,8 @@ void rrd_init(char *hostname) { , os_type , netdata_configured_timezone , config_get(CONFIG_SECTION_BACKEND, "host tags", "") + , program_name + , program_version , default_rrd_update_every , default_rrd_history_entries , default_rrd_memory_mode @@ -530,6 +563,8 @@ void rrdhost_free(RRDHOST *host) { freez((void *)host->tags); freez((void *)host->os); freez((void *)host->timezone); + freez(host->program_version); + freez(host->program_name); freez(host->cache_dir); freez(host->varlib_dir); freez(host->rrdpush_send_api_key); diff --git a/src/rrdpush.c b/src/rrdpush.c index 2d10c3ca9..8f71c6d4c 100644 --- a/src/rrdpush.c +++ b/src/rrdpush.c @@ -25,6 +25,11 @@ #define START_STREAMING_PROMPT "Hit me baby, push them over..." +typedef enum { + RRDPUSH_MULTIPLE_CONNECTIONS_ALLOW, + RRDPUSH_MULTIPLE_CONNECTIONS_DENY_NEW +} RRDPUSH_MULTIPLE_CONNECTIONS_STRATEGY; + int default_rrdpush_enabled = 0; char *default_rrdpush_destination = NULL; char *default_rrdpush_api_key = NULL; @@ -86,7 +91,7 @@ static inline void rrdpush_send_chart_definition_nolock(RRDSET *st) { // send the chart buffer_sprintf( host->rrdpush_sender_buffer - , "CHART \"%s\" \"%s\" \"%s\" \"%s\" \"%s\" \"%s\" \"%s\" %ld %d \"%s %s %s\" \"%s\" \"%s\"\n" + , "CHART \"%s\" \"%s\" \"%s\" \"%s\" \"%s\" \"%s\" \"%s\" %ld %d \"%s %s %s %s\" \"%s\" \"%s\"\n" , st->id , st->name , st->title @@ -99,6 +104,7 @@ static inline void rrdpush_send_chart_definition_nolock(RRDSET *st) { , rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)?"obsolete":"" , rrdset_flag_check(st, RRDSET_FLAG_DETAIL)?"detail":"" , rrdset_flag_check(st, RRDSET_FLAG_STORE_FIRST)?"store_first":"" + , rrdset_flag_check(st, RRDSET_FLAG_HIDDEN)?"hidden":"" , (st->plugin_name)?st->plugin_name:"" , (st->module_name)?st->module_name:"" ); @@ -140,19 +146,20 @@ static inline void rrdpush_send_chart_definition_nolock(RRDSET *st) { // sends the current chart dimensions static inline void rrdpush_send_chart_metrics_nolock(RRDSET *st) { - buffer_sprintf(st->rrdhost->rrdpush_sender_buffer, "BEGIN \"%s\" %llu\n", st->id, (st->upstream_resync_time > st->last_collected_time.tv_sec)?st->usec_since_last_update:0); + RRDHOST *host = st->rrdhost; + buffer_sprintf(host->rrdpush_sender_buffer, "BEGIN \"%s\" %llu\n", st->id, (st->upstream_resync_time > st->last_collected_time.tv_sec)?st->usec_since_last_update:0); RRDDIM *rd; rrddim_foreach_read(rd, st) { if(rd->updated && rd->exposed) - buffer_sprintf(st->rrdhost->rrdpush_sender_buffer + buffer_sprintf(host->rrdpush_sender_buffer , "SET \"%s\" = " COLLECTED_NUMBER_FORMAT "\n" , rd->id , rd->collected_value ); } - buffer_strcat(st->rrdhost->rrdpush_sender_buffer, "END\n"); + buffer_strcat(host->rrdpush_sender_buffer, "END\n"); } static void rrdpush_sender_thread_spawn(RRDHOST *host); @@ -289,7 +296,7 @@ void rrdpush_sender_thread_stop(RRDHOST *host) { rrdpush_buffer_lock(host); rrdhost_wrlock(host); - pthread_t thr = 0; + netdata_thread_t thr = 0; if(host->rrdpush_sender_spawn) { info("STREAM %s [send]: signaling sending thread to stop...", host->hostname); @@ -302,9 +309,7 @@ void rrdpush_sender_thread_stop(RRDHOST *host) { thr = host->rrdpush_sender_thread; // signal it to cancel - int ret = pthread_cancel(host->rrdpush_sender_thread); - if(ret != 0) - error("STREAM %s [send]: pthread_cancel() returned error.", host->hostname); + netdata_thread_cancel(host->rrdpush_sender_thread); } rrdhost_unlock(host); @@ -312,12 +317,8 @@ void rrdpush_sender_thread_stop(RRDHOST *host) { if(thr != 0) { info("STREAM %s [send]: waiting for the sending thread to stop...", host->hostname); - void *result; - int ret = pthread_join(thr, &result); - if(ret != 0) - error("STREAM %s [send]: pthread_join() returned error.", host->hostname); - + netdata_thread_join(thr, &result); info("STREAM %s [send]: sending thread has exited.", host->hostname); } } @@ -363,7 +364,7 @@ static int rrdpush_sender_thread_connect_to_master(RRDHOST *host, int default_po char http[HTTP_HEADER_SIZE + 1]; snprintfz(http, HTTP_HEADER_SIZE, "STREAM key=%s&hostname=%s®istry_hostname=%s&machine_guid=%s&update_every=%d&os=%s&timezone=%s&tags=%s HTTP/1.1\r\n" - "User-Agent: netdata-push-service/%s\r\n" + "User-Agent: %s/%s\r\n" "Accept: */*\r\n\r\n" , host->rrdpush_send_api_key , host->hostname @@ -373,7 +374,8 @@ static int rrdpush_sender_thread_connect_to_master(RRDHOST *host, int default_po , host->os , host->timezone , (host->tags)?host->tags:"" - , program_version + , host->program_name + , host->program_version ); if(send_timeout(host->rrdpush_sender_socket, http, strlen(http), 0, timeout) == -1) { @@ -435,8 +437,7 @@ static void rrdpush_sender_thread_cleanup_callback(void *ptr) { if(!host->rrdpush_sender_join) { info("STREAM %s [send]: sending thread detaches itself.", host->hostname); - if(pthread_detach(pthread_self())) - error("STREAM %s [send]: pthread_detach() failed.", host->hostname); + netdata_thread_detach(netdata_thread_self()); } host->rrdpush_sender_spawn = 0; @@ -452,18 +453,11 @@ void *rrdpush_sender_thread(void *ptr) { if(!host->rrdpush_send_enabled || !host->rrdpush_send_destination || !*host->rrdpush_send_destination || !host->rrdpush_send_api_key || !*host->rrdpush_send_api_key) { error("STREAM %s [send]: thread created (task id %d), but host has streaming disabled.", host->hostname, gettid()); - pthread_exit(NULL); return NULL; } info("STREAM %s [send]: thread created (task id %d)", host->hostname, gettid()); - if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0) - error("STREAM %s [send]: cannot set pthread cancel state to ENABLE.", host->hostname); - - if(pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0) - error("STREAM %s [send]: cannot set pthread cancel type to DEFERRED.", host->hostname); - int timeout = (int)appconfig_get_number(&stream_config, CONFIG_SECTION_STREAM, "timeout seconds", 60); int default_port = (int)appconfig_get_number(&stream_config, CONFIG_SECTION_STREAM, "default port", 19999); size_t max_size = (size_t)appconfig_get_number(&stream_config, CONFIG_SECTION_STREAM, "buffer size bytes", 1024 * 1024); @@ -492,11 +486,11 @@ void *rrdpush_sender_thread(void *ptr) { size_t not_connected_loops = 0; - pthread_cleanup_push(rrdpush_sender_thread_cleanup_callback, host); + netdata_thread_cleanup_push(rrdpush_sender_thread_cleanup_callback, host); for(; host->rrdpush_send_enabled && !netdata_exit ;) { // check for outstanding cancellation requests - pthread_testcancel(); + netdata_thread_testcancel(); // if we don't have socket open, lets wait a bit if(unlikely(host->rrdpush_sender_socket == -1)) { @@ -595,8 +589,7 @@ void *rrdpush_sender_thread(void *ptr) { // but the socket is in non-blocking mode // so, we will not block at send() - if (pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL) != 0) - error("STREAM %s [send]: cannot set pthread cancel state to DISABLE.", host->hostname); + netdata_thread_disable_cancelability(); debug(D_STREAM, "STREAM: Getting exclusive lock on host..."); rrdpush_buffer_lock(host); @@ -647,8 +640,7 @@ void *rrdpush_sender_thread(void *ptr) { debug(D_STREAM, "STREAM: Releasing exclusive lock on host..."); rrdpush_buffer_unlock(host); - if (pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0) - error("STREAM %s [send]: cannot set pthread cancel state to ENABLE.", host->hostname); + netdata_thread_enable_cancelability(); // END RRDPUSH LOCKED SESSION } @@ -689,9 +681,7 @@ void *rrdpush_sender_thread(void *ptr) { } } - pthread_cleanup_pop(1); - - pthread_exit(NULL); + netdata_thread_cleanup_pop(1); return NULL; } @@ -703,7 +693,49 @@ static void log_stream_connection(const char *client_ip, const char *client_port log_access("STREAM: %d '[%s]:%s' '%s' host '%s' api key '%s' machine guid '%s'", gettid(), client_ip, client_port, msg, host, api_key, machine_guid); } -static int rrdpush_receive(int fd, const char *key, const char *hostname, const char *registry_hostname, const char *machine_guid, const char *os, const char *timezone, const char *tags, int update_every, char *client_ip, char *client_port) { +static RRDPUSH_MULTIPLE_CONNECTIONS_STRATEGY get_multiple_connections_strategy(struct config *c, const char *section, const char *name, RRDPUSH_MULTIPLE_CONNECTIONS_STRATEGY def) { + char *value; + switch(def) { + default: + case RRDPUSH_MULTIPLE_CONNECTIONS_ALLOW: + value = "allow"; + break; + + case RRDPUSH_MULTIPLE_CONNECTIONS_DENY_NEW: + value = "deny"; + break; + } + + value = appconfig_get(c, section, name, value); + + RRDPUSH_MULTIPLE_CONNECTIONS_STRATEGY ret = def; + + if(strcasecmp(value, "allow") == 0 || strcasecmp(value, "permit") == 0 || strcasecmp(value, "accept") == 0) + ret = RRDPUSH_MULTIPLE_CONNECTIONS_ALLOW; + + else if(strcasecmp(value, "deny") == 0 || strcasecmp(value, "reject") == 0 || strcasecmp(value, "block") == 0) + ret = RRDPUSH_MULTIPLE_CONNECTIONS_DENY_NEW; + + else + error("Invalid stream config value at section [%s], setting '%s', value '%s'", section, name, value); + + return ret; +} + +static int rrdpush_receive(int fd + , const char *key + , const char *hostname + , const char *registry_hostname + , const char *machine_guid + , const char *os + , const char *timezone + , const char *tags + , const char *program_name + , const char *program_version + , int update_every + , char *client_ip + , char *client_port +) { RRDHOST *host; int history = default_rrd_history_entries; RRD_MEMORY_MODE mode = default_rrd_memory_mode; @@ -712,6 +744,7 @@ static int rrdpush_receive(int fd, const char *key, const char *hostname, const char *rrdpush_destination = default_rrdpush_destination; char *rrdpush_api_key = default_rrdpush_api_key; time_t alarms_delay = 60; + RRDPUSH_MULTIPLE_CONNECTIONS_STRATEGY rrdpush_multiple_connections_strategy = RRDPUSH_MULTIPLE_CONNECTIONS_ALLOW; update_every = (int)appconfig_get_number(&stream_config, machine_guid, "update every", update_every); if(update_every < 0) update_every = 1; @@ -738,6 +771,9 @@ static int rrdpush_receive(int fd, const char *key, const char *hostname, const rrdpush_api_key = appconfig_get(&stream_config, key, "default proxy api key", rrdpush_api_key); rrdpush_api_key = appconfig_get(&stream_config, machine_guid, "proxy api key", rrdpush_api_key); + rrdpush_multiple_connections_strategy = get_multiple_connections_strategy(&stream_config, key, "multiple connections", rrdpush_multiple_connections_strategy); + rrdpush_multiple_connections_strategy = get_multiple_connections_strategy(&stream_config, machine_guid, "multiple connections", rrdpush_multiple_connections_strategy); + tags = appconfig_set_default(&stream_config, machine_guid, "host tags", (tags)?tags:""); if(tags && !*tags) tags = NULL; @@ -751,6 +787,8 @@ static int rrdpush_receive(int fd, const char *key, const char *hostname, const , os , timezone , tags + , program_name + , program_version , update_every , history , mode @@ -821,8 +859,20 @@ static int rrdpush_receive(int fd, const char *key, const char *hostname, const } rrdhost_wrlock(host); - if(host->connected_senders > 0) - info("STREAM %s [receive from [%s]:%s]: multiple streaming connections for the same host detected. If multiple netdata are pushing metrics for the same charts, at the same time, the result is unexpected.", host->hostname, client_ip, client_port); + if(host->connected_senders > 0) { + switch(rrdpush_multiple_connections_strategy) { + case RRDPUSH_MULTIPLE_CONNECTIONS_ALLOW: + info("STREAM %s [receive from [%s]:%s]: multiple streaming connections for the same host detected. If multiple netdata are pushing metrics for the same charts, at the same time, the result is unexpected.", host->hostname, client_ip, client_port); + break; + + case RRDPUSH_MULTIPLE_CONNECTIONS_DENY_NEW: + rrdhost_unlock(host); + log_stream_connection(client_ip, client_port, key, host->machine_guid, host->hostname, "REJECTED - ALREADY CONNECTED"); + info("STREAM %s [receive from [%s]:%s]: multiple streaming connections for the same host detected. Rejecting new connection.", host->hostname, client_ip, client_port); + fclose(fp); + return 0; + } + } rrdhost_flag_clear(host, RRDHOST_FLAG_ORPHAN); host->connected_senders++; @@ -877,35 +927,57 @@ struct rrdpush_thread { char *tags; char *client_ip; char *client_port; + char *program_name; + char *program_version; int update_every; }; -static void *rrdpush_receiver_thread(void *ptr) { - struct rrdpush_thread *rpt = (struct rrdpush_thread *)ptr; - - if (pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0) - error("STREAM %s [receive]: cannot set pthread cancel type to DEFERRED.", rpt->hostname); - - if (pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0) - error("STREAM %s [receive]: cannot set pthread cancel state to ENABLE.", rpt->hostname); - - - info("STREAM %s [%s]:%s: receive thread created (task id %d)", rpt->hostname, rpt->client_ip, rpt->client_port, gettid()); - rrdpush_receive(rpt->fd, rpt->key, rpt->hostname, rpt->registry_hostname, rpt->machine_guid, rpt->os, rpt->timezone, rpt->tags, rpt->update_every, rpt->client_ip, rpt->client_port); - info("STREAM %s [receive from [%s]:%s]: receive thread ended (task id %d)", rpt->hostname, rpt->client_ip, rpt->client_port, gettid()); +static void rrdpush_receiver_thread_cleanup(void *ptr) { + static __thread int executed = 0; + if(!executed) { + executed = 1; + struct rrdpush_thread *rpt = (struct rrdpush_thread *) ptr; + + info("STREAM %s [receive from [%s]:%s]: receive thread ended (task id %d)", rpt->hostname, rpt->client_ip, rpt->client_port, gettid()); + + freez(rpt->key); + freez(rpt->hostname); + freez(rpt->registry_hostname); + freez(rpt->machine_guid); + freez(rpt->os); + freez(rpt->timezone); + freez(rpt->tags); + freez(rpt->client_ip); + freez(rpt->client_port); + freez(rpt->program_name); + freez(rpt->program_version); + freez(rpt); + } +} - freez(rpt->key); - freez(rpt->hostname); - freez(rpt->registry_hostname); - freez(rpt->machine_guid); - freez(rpt->os); - freez(rpt->timezone); - freez(rpt->tags); - freez(rpt->client_ip); - freez(rpt->client_port); - freez(rpt); +static void *rrdpush_receiver_thread(void *ptr) { + netdata_thread_cleanup_push(rrdpush_receiver_thread_cleanup, ptr); + + struct rrdpush_thread *rpt = (struct rrdpush_thread *)ptr; + info("STREAM %s [%s]:%s: receive thread created (task id %d)", rpt->hostname, rpt->client_ip, rpt->client_port, gettid()); + + rrdpush_receive( + rpt->fd + , rpt->key + , rpt->hostname + , rpt->registry_hostname + , rpt->machine_guid + , rpt->os + , rpt->timezone + , rpt->tags + , rpt->program_name + , rpt->program_version + , rpt->update_every + , rpt->client_ip + , rpt->client_port + ); - pthread_exit(NULL); + netdata_thread_cleanup_pop(1); return NULL; } @@ -913,7 +985,10 @@ static void rrdpush_sender_thread_spawn(RRDHOST *host) { rrdhost_wrlock(host); if(!host->rrdpush_sender_spawn) { - if(pthread_create(&host->rrdpush_sender_thread, NULL, rrdpush_sender_thread, (void *) host)) + char tag[NETDATA_THREAD_TAG_MAX + 1]; + snprintfz(tag, NETDATA_THREAD_TAG_MAX, "STREAM_SENDER[%s]", host->hostname); + + if(netdata_thread_create(&host->rrdpush_sender_thread, tag, NETDATA_THREAD_OPTION_JOINABLE, rrdpush_sender_thread, (void *) host)) error("STREAM %s [send]: failed to create new thread for client.", host->hostname); else host->rrdpush_sender_spawn = 1; @@ -933,7 +1008,7 @@ int rrdpush_receiver_permission_denied(struct web_client *w) { int rrdpush_receiver_thread_spawn(RRDHOST *host, struct web_client *w, char *url) { (void)host; - info("STREAM [receive from [%s]:%s]: new client connection.", w->client_ip, w->client_port); + info("clients wants to STREAM metrics."); char *key = NULL, *hostname = NULL, *registry_hostname = NULL, *machine_guid = NULL, *os = "unknown", *timezone = "unknown", *tags = NULL; int update_every = default_rrd_update_every; @@ -1004,7 +1079,7 @@ int rrdpush_receiver_thread_spawn(RRDHOST *host, struct web_client *w, char *url } { - SIMPLE_PATTERN *key_allow_from = simple_pattern_create(appconfig_get(&stream_config, key, "allow from", "*"), SIMPLE_PATTERN_EXACT); + SIMPLE_PATTERN *key_allow_from = simple_pattern_create(appconfig_get(&stream_config, key, "allow from", "*"), NULL, SIMPLE_PATTERN_EXACT); if(key_allow_from) { if(!simple_pattern_matches(key_allow_from, w->client_ip)) { simple_pattern_free(key_allow_from); @@ -1023,7 +1098,7 @@ int rrdpush_receiver_thread_spawn(RRDHOST *host, struct web_client *w, char *url } { - SIMPLE_PATTERN *machine_allow_from = simple_pattern_create(appconfig_get(&stream_config, machine_guid, "allow from", "*"), SIMPLE_PATTERN_EXACT); + SIMPLE_PATTERN *machine_allow_from = simple_pattern_create(appconfig_get(&stream_config, machine_guid, "allow from", "*"), NULL, SIMPLE_PATTERN_EXACT); if(machine_allow_from) { if(!simple_pattern_matches(machine_allow_from, w->client_ip)) { simple_pattern_free(machine_allow_from); @@ -1035,7 +1110,7 @@ int rrdpush_receiver_thread_spawn(RRDHOST *host, struct web_client *w, char *url } } - struct rrdpush_thread *rpt = mallocz(sizeof(struct rrdpush_thread)); + struct rrdpush_thread *rpt = callocz(1, sizeof(struct rrdpush_thread)); rpt->fd = w->ifd; rpt->key = strdupz(key); rpt->hostname = strdupz(hostname); @@ -1047,21 +1122,38 @@ int rrdpush_receiver_thread_spawn(RRDHOST *host, struct web_client *w, char *url rpt->client_ip = strdupz(w->client_ip); rpt->client_port = strdupz(w->client_port); rpt->update_every = update_every; - pthread_t thread; - debug(D_SYSTEM, "STREAM [receive from [%s]:%s]: starting receiving thread.", w->client_ip, w->client_port); + if(w->user_agent && w->user_agent[0]) { + char *t = strchr(w->user_agent, '/'); + if(t && *t) { + *t = '\0'; + t++; + } + + rpt->program_name = strdupz(w->user_agent); + if(t && *t) rpt->program_version = strdupz(t); + } + + netdata_thread_t thread; + + debug(D_SYSTEM, "starting STREAM receive thread."); - if(pthread_create(&thread, NULL, rrdpush_receiver_thread, (void *)rpt)) - error("STREAM [receive from [%s]:%s]: failed to create new thread for client.", w->client_ip, w->client_port); + char tag[FILENAME_MAX + 1]; + snprintfz(tag, FILENAME_MAX, "STREAM_RECEIVER[%s,[%s]:%s]", rpt->hostname, w->client_ip, w->client_port); - else if(pthread_detach(thread)) - error("STREAM [receive from [%s]:%s]: cannot request detach newly created thread.", w->client_ip, w->client_port); + if(netdata_thread_create(&thread, tag, NETDATA_THREAD_OPTION_DEFAULT, rrdpush_receiver_thread, (void *)rpt)) + error("Failed to create new STREAM receive thread for client."); // prevent the caller from closing the streaming socket - if(w->ifd == w->ofd) - w->ifd = w->ofd = -1; - else - w->ifd = -1; + if(web_server_mode == WEB_SERVER_MODE_STATIC_THREADED) { + web_client_flag_set(w, WEB_CLIENT_FLAG_DONT_CLOSE_SOCKET); + } + else { + if(w->ifd == w->ofd) + w->ifd = w->ofd = -1; + else + w->ifd = -1; + } buffer_flush(w->response.data); return 200; diff --git a/src/rrdset.c b/src/rrdset.c index 8504d1cb7..bbd0ae728 100644 --- a/src/rrdset.c +++ b/src/rrdset.c @@ -1,8 +1,6 @@ #define NETDATA_RRD_INTERNALS 1 #include "common.h" -#define RRD_DEFAULT_GAP_INTERPOLATIONS 1 - void __rrdset_check_rdlock(RRDSET *st, const char *file, const char *function, const unsigned long line) { debug(D_RRD_CALLS, "Checking read lock on chart '%s'", st->id); @@ -83,7 +81,7 @@ static inline RRDSET *rrdset_index_find_name(RRDHOST *host, const char *name, ui result = avl_search_lock(&host->rrdset_root_index_name, (avl *) (&(tmp.avlname))); if(result) { RRDSET *st = rrdset_from_avlname(result); - if(strcmp(st->magic, RRDSET_MAGIC)) + if(strcmp(st->magic, RRDSET_MAGIC) != 0) error("Search for RRDSET %s returned an invalid RRDSET %s (name %s)", name, st->id, st->name); // fprintf(stderr, "FOUND: %s\n", name); @@ -139,6 +137,8 @@ int rrdset_set_name(RRDSET *st, const char *name) { if(unlikely(st->name && !strcmp(st->name, name))) return 1; + RRDHOST *host = st->rrdhost; + debug(D_RRD_CALLS, "rrdset_set_name() old: '%s', new: '%s'", st->name?st->name:"", name); char b[CONFIG_MAX_VALUE + 1]; @@ -147,13 +147,13 @@ int rrdset_set_name(RRDSET *st, const char *name) { snprintfz(n, RRD_ID_LENGTH_MAX, "%s.%s", st->type, name); rrdset_strncpyz_name(b, n, CONFIG_MAX_VALUE); - if(rrdset_index_find_name(st->rrdhost, b, 0)) { - error("RRDSET: chart name '%s' on host '%s' already exists.", b, st->rrdhost->hostname); + if(rrdset_index_find_name(host, b, 0)) { + error("RRDSET: chart name '%s' on host '%s' already exists.", b, host->hostname); return 0; } if(st->name) { - rrdset_index_del_name(st->rrdhost, st); + rrdset_index_del_name(host, st); st->name = config_set_default(st->config_section, "name", b); st->hash_name = simple_hash(st->name); rrdsetvar_rename_all(st); @@ -169,20 +169,22 @@ int rrdset_set_name(RRDSET *st, const char *name) { rrddimvar_rename_all(rd); rrdset_unlock(st); - if(unlikely(rrdset_index_add_name(st->rrdhost, st) != st)) + if(unlikely(rrdset_index_add_name(host, st) != st)) error("RRDSET: INTERNAL ERROR: attempted to index duplicate chart name '%s'", st->name); return 1; } inline void rrdset_is_obsolete(RRDSET *st) { + RRDHOST *host = st->rrdhost; + if(unlikely(!(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)))) { rrdset_flag_set(st, RRDSET_FLAG_OBSOLETE); rrdset_flag_clear(st, RRDSET_FLAG_EXPOSED_UPSTREAM); // the chart will not get more updates (data collection) // so, we have to push its definition now - if(unlikely(st->rrdhost->rrdpush_send_enabled)) + if(unlikely(host->rrdpush_send_enabled)) rrdset_push_chart_definition(st); } } @@ -198,6 +200,7 @@ inline void rrdset_isnot_obsolete(RRDSET *st) { } inline void rrdset_update_heterogeneous_flag(RRDSET *st) { + RRDHOST *host = st->rrdhost; RRDDIM *rd; rrdset_flag_clear(st, RRDSET_FLAG_HOMEGENEOUS_CHECK); @@ -213,7 +216,7 @@ inline void rrdset_update_heterogeneous_flag(RRDSET *st) { info("Dimension '%s' added on chart '%s' of host '%s' is not homogeneous to other dimensions already present (algorithm is '%s' vs '%s', multiplier is " COLLECTED_NUMBER_FORMAT " vs " COLLECTED_NUMBER_FORMAT ", divisor is " COLLECTED_NUMBER_FORMAT " vs " COLLECTED_NUMBER_FORMAT ").", rd->name, st->name, - st->rrdhost->hostname, + host->hostname, rrd_algorithm_name(rd->algorithm), rrd_algorithm_name(algorithm), rd->multiplier, multiplier, rd->divisor, divisor @@ -294,7 +297,9 @@ static inline void last_updated_time_align(RRDSET *st) { void rrdset_free(RRDSET *st) { if(unlikely(!st)) return; - rrdhost_check_wrlock(st->rrdhost); // make sure we have a write lock on the host + RRDHOST *host = st->rrdhost; + + rrdhost_check_wrlock(host); // make sure we have a write lock on the host rrdset_wrlock(st); // lock this RRDSET // info("Removing chart '%s' ('%s')", st->id, st->name); @@ -302,10 +307,10 @@ void rrdset_free(RRDSET *st) { // ------------------------------------------------------------------------ // remove it from the indexes - if(unlikely(rrdset_index_del(st->rrdhost, st) != st)) + if(unlikely(rrdset_index_del(host, st) != st)) error("RRDSET: INTERNAL ERROR: attempt to remove from index chart '%s', removed a different chart.", st->id); - rrdset_index_del_name(st->rrdhost, st); + rrdset_index_del_name(host, st); // ------------------------------------------------------------------------ // free its children structures @@ -314,25 +319,25 @@ void rrdset_free(RRDSET *st) { while(st->alarms) rrdsetcalc_unlink(st->alarms); while(st->dimensions) rrddim_free(st, st->dimensions); - rrdfamily_free(st->rrdhost, st->rrdfamily); + rrdfamily_free(host, st->rrdfamily); - debug(D_RRD_CALLS, "RRDSET: Cleaning up remaining chart variables for host '%s', chart '%s'", st->rrdhost->hostname, st->id); - rrdvar_free_remaining_variables(st->rrdhost, &st->rrdvar_root_index); + debug(D_RRD_CALLS, "RRDSET: Cleaning up remaining chart variables for host '%s', chart '%s'", host->hostname, st->id); + rrdvar_free_remaining_variables(host, &st->rrdvar_root_index); // ------------------------------------------------------------------------ // unlink it from the host - if(st == st->rrdhost->rrdset_root) { - st->rrdhost->rrdset_root = st->next; + if(st == host->rrdset_root) { + host->rrdset_root = st->next; } else { // find the previous one RRDSET *s; - for(s = st->rrdhost->rrdset_root; s && s->next != st ; s = s->next) ; + for(s = host->rrdset_root; s && s->next != st ; s = s->next) ; // bypass it if(s) s->next = st->next; - else error("Request to free RRDSET '%s': cannot find it under host '%s'", st->id, st->rrdhost->hostname); + else error("Request to free RRDSET '%s': cannot find it under host '%s'", st->id, host->hostname); } rrdset_unlock(st); @@ -654,8 +659,7 @@ RRDSET *rrdset_create_custom( st->last_collected_time.tv_usec = 0; st->counter_done = 0; - st->gap_when_lost_iterations_above = (int) ( - config_get_number(st->config_section, "gap when lost iterations above", RRD_DEFAULT_GAP_INTERPOLATIONS) + 2); + st->gap_when_lost_iterations_above = (int) (gap_when_lost_iterations_above + 2); st->last_accessed_time = 0; st->upstream_resync_time = 0; @@ -706,7 +710,7 @@ RRDSET *rrdset_create_custom( // RRDSET - data collection iteration control inline void rrdset_next_usec_unfiltered(RRDSET *st, usec_t microseconds) { - if(unlikely(!st->last_collected_time.tv_sec || !microseconds || (st->counter % remote_clock_resync_iterations) == 0)) { + if(unlikely(!st->last_collected_time.tv_sec || !microseconds)) { // call the full next_usec() function rrdset_next_usec(st, microseconds); return; @@ -733,7 +737,7 @@ inline void rrdset_next_usec(RRDSET *st, usec_t microseconds) { if(unlikely(since_last_usec < 0)) { // oops! the database is in the future - info("RRD database for chart '%s' on host '%s' is %0.5Lf secs in the future. Adjusting it to current time.", st->id, st->rrdhost->hostname, (long double)-since_last_usec / USEC_PER_SEC); + info("RRD database for chart '%s' on host '%s' is %0.5" LONG_DOUBLE_MODIFIER " secs in the future (counter #%zu, update #%zu). Adjusting it to current time.", st->id, st->rrdhost->hostname, (LONG_DOUBLE)-since_last_usec / USEC_PER_SEC, st->counter, st->counter_done); st->last_collected_time.tv_sec = now.tv_sec - st->update_every; st->last_collected_time.tv_usec = now.tv_usec; @@ -747,7 +751,7 @@ inline void rrdset_next_usec(RRDSET *st, usec_t microseconds) { } else if(unlikely((usec_t)since_last_usec > (usec_t)(st->update_every * 10 * USEC_PER_SEC))) { // oops! the database is too far behind - info("RRD database for chart '%s' on host '%s' is %0.5Lf secs in the past. Adjusting it to current time.", st->id, st->rrdhost->hostname, (long double)since_last_usec / USEC_PER_SEC); + info("RRD database for chart '%s' on host '%s' is %0.5" LONG_DOUBLE_MODIFIER " secs in the past (counter #%zu, update #%zu). Adjusting it to current time.", st->id, st->rrdhost->hostname, (LONG_DOUBLE)since_last_usec / USEC_PER_SEC, st->counter, st->counter_done); microseconds = (usec_t)since_last_usec; } @@ -772,7 +776,7 @@ static inline usec_t rrdset_init_last_collected_time(RRDSET *st) { usec_t last_collect_ut = st->last_collected_time.tv_sec * USEC_PER_SEC + st->last_collected_time.tv_usec; #ifdef NETDATA_INTERNAL_CHECKS - rrdset_debug(st, "initialized last collected time to %0.3Lf", (long double)last_collect_ut / USEC_PER_SEC); + rrdset_debug(st, "initialized last collected time to %0.3" LONG_DOUBLE_MODIFIER, (LONG_DOUBLE)last_collect_ut / USEC_PER_SEC); #endif return last_collect_ut; @@ -785,7 +789,7 @@ static inline usec_t rrdset_update_last_collected_time(RRDSET *st) { st->last_collected_time.tv_usec = (suseconds_t) (ut % USEC_PER_SEC); #ifdef NETDATA_INTERNAL_CHECKS - rrdset_debug(st, "updated last collected time to %0.3Lf", (long double)last_collect_ut / USEC_PER_SEC); + rrdset_debug(st, "updated last collected time to %0.3" LONG_DOUBLE_MODIFIER, (LONG_DOUBLE)last_collect_ut / USEC_PER_SEC); #endif return last_collect_ut; @@ -804,7 +808,7 @@ static inline usec_t rrdset_init_last_updated_time(RRDSET *st) { usec_t last_updated_ut = st->last_updated.tv_sec * USEC_PER_SEC + st->last_updated.tv_usec; #ifdef NETDATA_INTERNAL_CHECKS - rrdset_debug(st, "initialized last updated time to %0.3Lf", (long double)last_updated_ut / USEC_PER_SEC); + rrdset_debug(st, "initialized last updated time to %0.3" LONG_DOUBLE_MODIFIER, (LONG_DOUBLE)last_updated_ut / USEC_PER_SEC); #endif return last_updated_ut; @@ -863,8 +867,8 @@ static inline size_t rrdset_done_interpolate( #ifdef NETDATA_INTERNAL_CHECKS if(iterations < 0) { error("INTERNAL CHECK: %s: iterations calculation wrapped! first_ut = %llu, last_stored_ut = %llu, next_store_ut = %llu, now_collect_ut = %llu", st->name, first_ut, last_stored_ut, next_store_ut, now_collect_ut); } - rrdset_debug(st, "last_stored_ut = %0.3Lf (last updated time)", (long double)last_stored_ut/USEC_PER_SEC); - rrdset_debug(st, "next_store_ut = %0.3Lf (next interpolation point)", (long double)next_store_ut/USEC_PER_SEC); + rrdset_debug(st, "last_stored_ut = %0.3" LONG_DOUBLE_MODIFIER " (last updated time)", (LONG_DOUBLE)last_stored_ut/USEC_PER_SEC); + rrdset_debug(st, "next_store_ut = %0.3" LONG_DOUBLE_MODIFIER " (next interpolation point)", (LONG_DOUBLE)next_store_ut/USEC_PER_SEC); #endif last_ut = next_store_ut; @@ -1080,9 +1084,6 @@ void rrdset_done(RRDSET *st) { RRDDIM *rd; - int - pthreadoldcancelstate; // store the old cancelable pthread state, to restore it at the end - char store_this_entry = 1, // boolean: 1 = store this entry, 0 = don't store this entry first_entry = 0; // boolean: 1 = this is the first entry seen for this chart, 0 = all other entries @@ -1094,8 +1095,7 @@ void rrdset_done(RRDSET *st) { next_store_ut, // the timestamp in microseconds, of the next entry to store in the db update_every_ut = st->update_every * USEC_PER_SEC; // st->update_every in microseconds - if(unlikely(pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &pthreadoldcancelstate) != 0)) - error("Cannot set pthread cancel state to DISABLE."); + netdata_thread_disable_cancelability(); // a read lock is OK here rrdset_rdlock(st); @@ -1107,7 +1107,7 @@ void rrdset_done(RRDSET *st) { // check if the chart has a long time to be updated if(unlikely(st->usec_since_last_update > st->entries * update_every_ut)) { - info("host '%s', chart %s: took too long to be updated (%0.3Lf secs). Resetting it.", st->rrdhost->hostname, st->name, (long double)st->usec_since_last_update / USEC_PER_SEC); + info("host '%s', chart %s: took too long to be updated (counter #%zu, update #%zu, %0.3" LONG_DOUBLE_MODIFIER " secs). Resetting it.", st->rrdhost->hostname, st->name, st->counter, st->counter_done, (LONG_DOUBLE)st->usec_since_last_update / USEC_PER_SEC); rrdset_reset(st); st->usec_since_last_update = update_every_ut; store_this_entry = 0; @@ -1199,10 +1199,10 @@ void rrdset_done(RRDSET *st) { rrdset_done_push(st); #ifdef NETDATA_INTERNAL_CHECKS - rrdset_debug(st, "last_collect_ut = %0.3Lf (last collection time)", (long double)last_collect_ut/USEC_PER_SEC); - rrdset_debug(st, "now_collect_ut = %0.3Lf (current collection time)", (long double)now_collect_ut/USEC_PER_SEC); - rrdset_debug(st, "last_stored_ut = %0.3Lf (last updated time)", (long double)last_stored_ut/USEC_PER_SEC); - rrdset_debug(st, "next_store_ut = %0.3Lf (next interpolation point)", (long double)next_store_ut/USEC_PER_SEC); + rrdset_debug(st, "last_collect_ut = %0.3" LONG_DOUBLE_MODIFIER " (last collection time)", (LONG_DOUBLE)last_collect_ut/USEC_PER_SEC); + rrdset_debug(st, "now_collect_ut = %0.3" LONG_DOUBLE_MODIFIER " (current collection time)", (LONG_DOUBLE)now_collect_ut/USEC_PER_SEC); + rrdset_debug(st, "last_stored_ut = %0.3" LONG_DOUBLE_MODIFIER " (last updated time)", (LONG_DOUBLE)last_stored_ut/USEC_PER_SEC); + rrdset_debug(st, "next_store_ut = %0.3" LONG_DOUBLE_MODIFIER " (next interpolation point)", (LONG_DOUBLE)next_store_ut/USEC_PER_SEC); #endif // calculate totals and count the dimensions @@ -1540,6 +1540,5 @@ void rrdset_done(RRDSET *st) { rrdset_unlock(st); - if(unlikely(pthread_setcancelstate(pthreadoldcancelstate, NULL) != 0)) - error("Cannot set pthread cancel state to RESTORE (%d).", pthreadoldcancelstate); + netdata_thread_enable_cancelability(); } diff --git a/src/rrdsetvar.c b/src/rrdsetvar.c index 280156a8a..aec57efa9 100644 --- a/src/rrdsetvar.c +++ b/src/rrdsetvar.c @@ -7,26 +7,27 @@ static inline void rrdsetvar_free_variables(RRDSETVAR *rs) { RRDSET *st = rs->rrdset; + RRDHOST *host = st->rrdhost; // ------------------------------------------------------------------------ // CHART - rrdvar_free(st->rrdhost, &st->rrdvar_root_index, rs->var_local); + rrdvar_free(host, &st->rrdvar_root_index, rs->var_local); rs->var_local = NULL; // ------------------------------------------------------------------------ // FAMILY - rrdvar_free(st->rrdhost, &st->rrdfamily->rrdvar_root_index, rs->var_family); + rrdvar_free(host, &st->rrdfamily->rrdvar_root_index, rs->var_family); rs->var_family = NULL; - rrdvar_free(st->rrdhost, &st->rrdfamily->rrdvar_root_index, rs->var_family_name); + rrdvar_free(host, &st->rrdfamily->rrdvar_root_index, rs->var_family_name); rs->var_family_name = NULL; // ------------------------------------------------------------------------ // HOST - rrdvar_free(st->rrdhost, &st->rrdhost->rrdvar_root_index, rs->var_host); + rrdvar_free(host, &host->rrdvar_root_index, rs->var_host); rs->var_host = NULL; - rrdvar_free(st->rrdhost, &st->rrdhost->rrdvar_root_index, rs->var_host_name); + rrdvar_free(host, &host->rrdvar_root_index, rs->var_host_name); rs->var_host_name = NULL; // ------------------------------------------------------------------------ @@ -40,6 +41,7 @@ static inline void rrdsetvar_free_variables(RRDSETVAR *rs) { static inline void rrdsetvar_create_variables(RRDSETVAR *rs) { RRDSET *st = rs->rrdset; + RRDHOST *host = st->rrdhost; // ------------------------------------------------------------------------ // free the old ones (if any) @@ -67,8 +69,8 @@ static inline void rrdsetvar_create_variables(RRDSETVAR *rs) { // ------------------------------------------------------------------------ // HOST - rs->var_host = rrdvar_create_and_index("host", &st->rrdhost->rrdvar_root_index, rs->key_fullid, rs->type, rs->value); - rs->var_host_name = rrdvar_create_and_index("host", &st->rrdhost->rrdvar_root_index, rs->key_fullname, rs->type, rs->value); + rs->var_host = rrdvar_create_and_index("host", &host->rrdvar_root_index, rs->key_fullid, rs->type, rs->value); + rs->var_host_name = rrdvar_create_and_index("host", &host->rrdvar_root_index, rs->key_fullname, rs->type, rs->value); } RRDSETVAR *rrdsetvar_create(RRDSET *st, const char *variable, RRDVAR_TYPE type, void *value, RRDVAR_OPTIONS options) { @@ -128,6 +130,8 @@ void rrdsetvar_free(RRDSETVAR *rs) { // custom chart variables RRDSETVAR *rrdsetvar_custom_chart_variable_create(RRDSET *st, const char *name) { + RRDHOST *host = st->rrdhost; + char *n = strdupz(name); rrdvar_fix_name(n); uint32_t hash = simple_hash(n); @@ -144,7 +148,7 @@ RRDSETVAR *rrdsetvar_custom_chart_variable_create(RRDSET *st, const char *name) return rs; } else { - error("RRDSETVAR: custom variable '%s' on chart '%s' of host '%s', conflicts with an internal chart variable", n, st->id, st->rrdhost->hostname); + error("RRDSETVAR: custom variable '%s' on chart '%s' of host '%s', conflicts with an internal chart variable", n, st->id, host->hostname); free(n); return NULL; } diff --git a/src/rrdvar.c b/src/rrdvar.c index 9119b5384..6936c36f1 100644 --- a/src/rrdvar.c +++ b/src/rrdvar.c @@ -205,10 +205,11 @@ static calculated_number rrdvar2number(RRDVAR *rv) { int health_variable_lookup(const char *variable, uint32_t hash, RRDCALC *rc, calculated_number *result) { RRDSET *st = rc->rrdset; - RRDVAR *rv; - if(!st) return 0; + RRDHOST *host = st->rrdhost; + RRDVAR *rv; + rv = rrdvar_index_find(&st->rrdvar_root_index, variable, hash); if(rv) { *result = rrdvar2number(rv); @@ -221,7 +222,7 @@ int health_variable_lookup(const char *variable, uint32_t hash, RRDCALC *rc, cal return 1; } - rv = rrdvar_index_find(&st->rrdhost->rrdvar_root_index, variable, hash); + rv = rrdvar_index_find(&host->rrdvar_root_index, variable, hash); if(rv) { *result = rrdvar2number(rv); return 1; @@ -246,7 +247,7 @@ static int single_variable2json(void *entry, void *data) { if(unlikely(isnan(value) || isinf(value))) buffer_sprintf(helper->buf, "%s\n\t\t\"%s\": null", helper->counter?",":"", rv->name); else - buffer_sprintf(helper->buf, "%s\n\t\t\"%s\": %0.5Lf", helper->counter?",":"", rv->name, (long double)value); + buffer_sprintf(helper->buf, "%s\n\t\t\"%s\": %0.5" LONG_DOUBLE_MODIFIER, helper->counter?",":"", rv->name, (LONG_DOUBLE)value); helper->counter++; @@ -254,6 +255,8 @@ static int single_variable2json(void *entry, void *data) { } void health_api_v1_chart_variables2json(RRDSET *st, BUFFER *buf) { + RRDHOST *host = st->rrdhost; + struct variable2json_helper helper = { .buf = buf, .counter = 0 @@ -264,9 +267,9 @@ void health_api_v1_chart_variables2json(RRDSET *st, BUFFER *buf) { buffer_sprintf(buf, "\n\t},\n\t\"family\": \"%s\",\n\t\"family_variables\": {", st->family); helper.counter = 0; avl_traverse_lock(&st->rrdfamily->rrdvar_root_index, single_variable2json, (void *)&helper); - buffer_sprintf(buf, "\n\t},\n\t\"host\": \"%s\",\n\t\"host_variables\": {", st->rrdhost->hostname); + buffer_sprintf(buf, "\n\t},\n\t\"host\": \"%s\",\n\t\"host_variables\": {", host->hostname); helper.counter = 0; - avl_traverse_lock(&st->rrdhost->rrdvar_root_index, single_variable2json, (void *)&helper); + avl_traverse_lock(&host->rrdvar_root_index, single_variable2json, (void *)&helper); buffer_strcat(buf, "\n\t}\n}\n"); } diff --git a/src/simple_pattern.c b/src/simple_pattern.c index 469ea396f..747b5150a 100644 --- a/src/simple_pattern.c +++ b/src/simple_pattern.c @@ -68,11 +68,25 @@ static inline struct simple_pattern *parse_pattern(char *str, SIMPLE_PREFIX_MODE return m; } -SIMPLE_PATTERN *simple_pattern_create(const char *list, SIMPLE_PREFIX_MODE default_mode) { +SIMPLE_PATTERN *simple_pattern_create(const char *list, const char *separators, SIMPLE_PREFIX_MODE default_mode) { struct simple_pattern *root = NULL, *last = NULL; if(unlikely(!list || !*list)) return root; + int isseparator[256] = { + [' '] = 1 // space + , ['\t'] = 1 // tab + , ['\r'] = 1 // carriage return + , ['\n'] = 1 // new line + , ['\f'] = 1 // form feed + , ['\v'] = 1 // vertical tab + }; + + if (unlikely(separators && *separators)) { + memset(&isseparator[0], 0, sizeof(isseparator)); + while(*separators) isseparator[(unsigned char)*separators++] = 1; + } + char *buf = mallocz(strlen(list) + 1); const char *s = list; @@ -83,7 +97,7 @@ SIMPLE_PATTERN *simple_pattern_create(const char *list, SIMPLE_PREFIX_MODE defau char negative = 0; // skip all spaces - while(isspace(*s)) + while(isseparator[(unsigned char)*s]) s++; if(*s == '!') { @@ -103,7 +117,7 @@ SIMPLE_PATTERN *simple_pattern_create(const char *list, SIMPLE_PREFIX_MODE defau s++; } else { - if (isspace(*s) && !escape) { + if (isseparator[(unsigned char)*s] && !escape) { s++; break; } diff --git a/src/simple_pattern.h b/src/simple_pattern.h index 60a25f493..d0b75af7e 100644 --- a/src/simple_pattern.h +++ b/src/simple_pattern.h @@ -13,7 +13,7 @@ typedef void SIMPLE_PATTERN; // create a simple_pattern from the string given // default_mode is used in cases where EXACT matches, without an asterisk, // should be considered PREFIX matches. -extern SIMPLE_PATTERN *simple_pattern_create(const char *list, SIMPLE_PREFIX_MODE default_mode); +extern SIMPLE_PATTERN *simple_pattern_create(const char *list, const char *separators, SIMPLE_PREFIX_MODE default_mode); // test if string str is matched from the pattern and fill 'wildcarded' with the parts matched by '*' extern int simple_pattern_matches_extract(SIMPLE_PATTERN *list, const char *str, char *wildcarded, size_t wildcarded_size); diff --git a/src/socket.c b/src/socket.c index 906ab33dd..8bede73fd 100644 --- a/src/socket.c +++ b/src/socket.c @@ -79,6 +79,29 @@ int sock_enlarge_out(int fd) { // -------------------------------------------------------------------------------------------------------------------- + +char *strdup_client_description(int family, const char *protocol, const char *ip, int port) { + char buffer[100 + 1]; + + switch(family) { + case AF_INET: + snprintfz(buffer, 100, "%s:%s:%d", protocol, ip, port); + break; + + case AF_INET6: + default: + snprintfz(buffer, 100, "%s:[%s]:%d", protocol, ip, port); + break; + + case AF_UNIX: + snprintfz(buffer, 100, "%s:%s", protocol, ip); + break; + } + + return strdupz(buffer); +} + +// -------------------------------------------------------------------------------------------------------------------- // listening sockets int create_listen_socket_unix(const char *path, int listen_backlog) { @@ -231,25 +254,7 @@ static inline int listen_sockets_add(LISTEN_SOCKETS *sockets, int fd, int family sockets->fds[sockets->opened] = fd; sockets->fds_types[sockets->opened] = socktype; sockets->fds_families[sockets->opened] = family; - - char buffer[100 + 1]; - - switch(family) { - case AF_INET: - snprintfz(buffer, 100, "%s:%s:%d", protocol, ip, port); - break; - - case AF_INET6: - default: - snprintfz(buffer, 100, "%s:[%s]:%d", protocol, ip, port); - break; - - case AF_UNIX: - snprintfz(buffer, 100, "%s:%s", protocol, ip); - break; - } - - sockets->fds_names[sockets->opened] = strdupz(buffer); + sockets->fds_names[sockets->opened] = strdup_client_description(family, protocol, ip, port); sockets->opened++; return 0; @@ -615,13 +620,39 @@ static inline int connect_to_this_ip46(int protocol, int socktype, const char *h error("Failed to set timeout on the socket to ip '%s' port '%s'", hostBfr, servBfr); } + errno = 0; if(connect(fd, ai->ai_addr, ai->ai_addrlen) < 0) { - error("Failed to connect to '%s', port '%s'", hostBfr, servBfr); - close(fd); - fd = -1; + if(errno == EALREADY || errno == EINPROGRESS) { + info("Waiting for connection to ip %s port %s to be established", hostBfr, servBfr); + + fd_set fds; + FD_ZERO(&fds); + FD_SET(0, &fds); + int rc = select (1, NULL, &fds, NULL, timeout); + + if(rc > 0 && FD_ISSET(fd, &fds)) { + info("connect() to ip %s port %s completed successfully", hostBfr, servBfr); + } + else if(rc == -1) { + error("Failed to connect to '%s', port '%s'. select() returned %d", hostBfr, servBfr, rc); + close(fd); + fd = -1; + } + else { + error("Timed out while connecting to '%s', port '%s'. select() returned %d", hostBfr, servBfr, rc); + close(fd); + fd = -1; + } + } + else { + error("Failed to connect to '%s', port '%s'", hostBfr, servBfr); + close(fd); + fd = -1; + } } - debug(D_CONNECT_TO, "Connected to '%s' on port '%s'.", hostBfr, servBfr); + if(fd != -1) + debug(D_CONNECT_TO, "Connected to '%s' on port '%s'.", hostBfr, servBfr); } } @@ -838,7 +869,8 @@ int accept4(int sock, struct sockaddr *addr, socklen_t *addrlen, int flags) { #endif if (flags) { - errno = -EINVAL; + close(fd); + errno = EINVAL; return -1; } @@ -930,46 +962,36 @@ int accept_socket(int fd, int flags, char *client_ip, size_t ipsize, char *clien #define POLL_FDS_INCREASE_STEP 10 -#define POLLINFO_FLAG_SERVER_SOCKET 0x00000001 -#define POLLINFO_FLAG_CLIENT_SOCKET 0x00000002 - -struct pollinfo { - size_t slot; - char *client; - struct pollinfo *next; - uint32_t flags; - int socktype; - - void *data; -}; - -struct poll { - size_t slots; - size_t used; - size_t min; - size_t max; - struct pollfd *fds; - struct pollinfo *inf; - struct pollinfo *first_free; - - void *(*add_callback)(int fd, int socktype, short int *events); - void (*del_callback)(int fd, int socktype, void *data); - int (*rcv_callback)(int fd, int socktype, void *data, short int *events); - int (*snd_callback)(int fd, int socktype, void *data, short int *events); -}; - -static inline struct pollinfo *poll_add_fd(struct poll *p, int fd, int socktype, short int events, uint32_t flags) { +inline POLLINFO *poll_add_fd(POLLJOB *p + , int fd + , int socktype + , uint32_t flags + , const char *client_ip + , const char *client_port + , void *(*add_callback)(POLLINFO *pi, short int *events, void *data) + , void (*del_callback)(POLLINFO *pi) + , int (*rcv_callback)(POLLINFO *pi, short int *events) + , int (*snd_callback)(POLLINFO *pi, short int *events) + , void *data +) { debug(D_POLLFD, "POLLFD: ADD: request to add fd %d, slots = %zu, used = %zu, min = %zu, max = %zu, next free = %zd", fd, p->slots, p->used, p->min, p->max, p->first_free?(ssize_t)p->first_free->slot:(ssize_t)-1); if(unlikely(fd < 0)) return NULL; + //if(p->limit && p->used >= p->limit) { + // info("Max sockets limit reached (%zu sockets), dropping connection", p->used); + // close(fd); + // return NULL; + //} + if(unlikely(!p->first_free)) { size_t new_slots = p->slots + POLL_FDS_INCREASE_STEP; debug(D_POLLFD, "POLLFD: ADD: increasing size (current = %zu, new = %zu, used = %zu, min = %zu, max = %zu)", p->slots, new_slots, p->used, p->min, p->max); p->fds = reallocz(p->fds, sizeof(struct pollfd) * new_slots); - p->inf = reallocz(p->inf, sizeof(struct pollinfo) * new_slots); + p->inf = reallocz(p->inf, sizeof(POLLINFO) * new_slots); + // reset all the newly added slots ssize_t i; for(i = new_slots - 1; i >= (ssize_t)p->slots ; i--) { debug(D_POLLFD, "POLLFD: ADD: resetting new slot %zd", i); @@ -977,11 +999,19 @@ static inline struct pollinfo *poll_add_fd(struct poll *p, int fd, int socktype, p->fds[i].events = 0; p->fds[i].revents = 0; + p->inf[i].p = p; p->inf[i].slot = (size_t)i; p->inf[i].flags = 0; p->inf[i].socktype = -1; - p->inf[i].client = NULL; + p->inf[i].client_ip = NULL; + p->inf[i].client_port = NULL; + p->inf[i].del_callback = p->del_callback; + p->inf[i].rcv_callback = p->rcv_callback; + p->inf[i].snd_callback = p->snd_callback; p->inf[i].data = NULL; + + // link them so that the first free will be earlier in the array + // (we loop decrementing i) p->inf[i].next = p->first_free; p->first_free = &p->inf[i]; } @@ -989,64 +1019,97 @@ static inline struct pollinfo *poll_add_fd(struct poll *p, int fd, int socktype, p->slots = new_slots; } - struct pollinfo *pi = p->first_free; + POLLINFO *pi = p->first_free; p->first_free = p->first_free->next; debug(D_POLLFD, "POLLFD: ADD: selected slot %zu, next free is %zd", pi->slot, p->first_free?(ssize_t)p->first_free->slot:(ssize_t)-1); struct pollfd *pf = &p->fds[pi->slot]; pf->fd = fd; - pf->events = events; + pf->events = POLLIN; pf->revents = 0; + pi->fd = fd; + pi->p = p; pi->socktype = socktype; pi->flags = flags; pi->next = NULL; + pi->client_ip = strdupz(client_ip); + pi->client_port = strdupz(client_port); + + pi->del_callback = del_callback; + pi->rcv_callback = rcv_callback; + pi->snd_callback = snd_callback; + + pi->connected_t = now_boottime_sec(); + pi->last_received_t = 0; + pi->last_sent_t = 0; + pi->last_sent_t = 0; + pi->recv_count = 0; + pi->send_count = 0; + netdata_thread_disable_cancelability(); p->used++; if(unlikely(pi->slot > p->max)) p->max = pi->slot; if(pi->flags & POLLINFO_FLAG_CLIENT_SOCKET) { - pi->data = p->add_callback(fd, pi->socktype, &pf->events); + pi->data = add_callback(pi, &pf->events, data); } if(pi->flags & POLLINFO_FLAG_SERVER_SOCKET) { p->min = pi->slot; } + netdata_thread_enable_cancelability(); debug(D_POLLFD, "POLLFD: ADD: completed, slots = %zu, used = %zu, min = %zu, max = %zu, next free = %zd", p->slots, p->used, p->min, p->max, p->first_free?(ssize_t)p->first_free->slot:(ssize_t)-1); return pi; } -static inline void poll_close_fd(struct poll *p, struct pollinfo *pi) { +inline void poll_close_fd(POLLINFO *pi) { + POLLJOB *p = pi->p; + struct pollfd *pf = &p->fds[pi->slot]; debug(D_POLLFD, "POLLFD: DEL: request to clear slot %zu (fd %d), old next free was %zd", pi->slot, pf->fd, p->first_free?(ssize_t)p->first_free->slot:(ssize_t)-1); if(unlikely(pf->fd == -1)) return; + netdata_thread_disable_cancelability(); + if(pi->flags & POLLINFO_FLAG_CLIENT_SOCKET) { - p->del_callback(pf->fd, pi->socktype, pi->data); + pi->del_callback(pi); + + if(likely(!(pi->flags & POLLINFO_FLAG_DONT_CLOSE))) { + if(close(pf->fd) == -1) + error("Failed to close() poll_events() socket %d", pf->fd); + } } - close(pf->fd); pf->fd = -1; pf->events = 0; pf->revents = 0; + pi->fd = -1; pi->socktype = -1; pi->flags = 0; pi->data = NULL; - freez(pi->client); - pi->client = NULL; + pi->del_callback = NULL; + pi->rcv_callback = NULL; + pi->snd_callback = NULL; + + freez(pi->client_ip); + pi->client_ip = NULL; + + freez(pi->client_port); + pi->client_port = NULL; pi->next = p->first_free; p->first_free = pi; p->used--; - if(p->max == pi->slot) { + if(unlikely(p->max == pi->slot)) { p->max = p->min; ssize_t i; for(i = (ssize_t)pi->slot; i > (ssize_t)p->min ;i--) { @@ -1056,243 +1119,400 @@ static inline void poll_close_fd(struct poll *p, struct pollinfo *pi) { } } } + netdata_thread_enable_cancelability(); debug(D_POLLFD, "POLLFD: DEL: completed, slots = %zu, used = %zu, min = %zu, max = %zu, next free = %zd", p->slots, p->used, p->min, p->max, p->first_free?(ssize_t)p->first_free->slot:(ssize_t)-1); } -static void *add_callback_default(int fd, int socktype, short int *events) { - (void)fd; - (void)socktype; +void *poll_default_add_callback(POLLINFO *pi, short int *events, void *data) { + (void)pi; (void)events; + (void)data; + + // error("POLLFD: internal error: poll_default_add_callback() called"); return NULL; } -static void del_callback_default(int fd, int socktype, void *data) { - (void)fd; - (void)socktype; - (void)data; - if(data) +void poll_default_del_callback(POLLINFO *pi) { + if(pi->data) error("POLLFD: internal error: del_callback_default() called with data pointer - possible memory leak"); } -static int rcv_callback_default(int fd, int socktype, void *data, short int *events) { - (void)socktype; - (void)data; - (void)events; +int poll_default_rcv_callback(POLLINFO *pi, short int *events) { + *events |= POLLIN; char buffer[1024 + 1]; ssize_t rc; do { - rc = recv(fd, buffer, 1024, MSG_DONTWAIT); + rc = recv(pi->fd, buffer, 1024, MSG_DONTWAIT); if (rc < 0) { // read failed if (errno != EWOULDBLOCK && errno != EAGAIN) { - error("POLLFD: recv() failed."); + error("POLLFD: poll_default_rcv_callback(): recv() failed with %zd.", rc); return -1; } } else if (rc) { // data received - info("POLLFD: internal error: discarding %zd bytes received on socket %d", rc, fd); + info("POLLFD: internal error: poll_default_rcv_callback() is discarding %zd bytes received on socket %d", rc, pi->fd); } } while (rc != -1); return 0; } -static int snd_callback_default(int fd, int socktype, void *data, short int *events) { - (void)socktype; - (void)data; - (void)events; - +int poll_default_snd_callback(POLLINFO *pi, short int *events) { *events &= ~POLLOUT; - info("POLLFD: internal error: nothing to send on socket %d", fd); + info("POLLFD: internal error: poll_default_snd_callback(): nothing to send on socket %d", pi->fd); return 0; } -void poll_events_cleanup(void *data) { - struct poll *p = (struct poll *)data; +void poll_default_tmr_callback(void *timer_data) { + (void)timer_data; +} + +static void poll_events_cleanup(void *data) { + POLLJOB *p = (POLLJOB *)data; size_t i; for(i = 0 ; i <= p->max ; i++) { - struct pollinfo *pi = &p->inf[i]; - poll_close_fd(p, pi); + POLLINFO *pi = &p->inf[i]; + poll_close_fd(pi); } freez(p->fds); freez(p->inf); } -void poll_events(LISTEN_SOCKETS *sockets - , void *(*add_callback)(int fd, int socktype, short int *events) - , void (*del_callback)(int fd, int socktype, void *data) - , int (*rcv_callback)(int fd, int socktype, void *data, short int *events) - , int (*snd_callback)(int fd, int socktype, void *data, short int *events) - , SIMPLE_PATTERN *access_list - , void *data -) { - int retval; +static void poll_events_process(POLLJOB *p, POLLINFO *pi, struct pollfd *pf, short int revents, time_t now) { + short int events = pf->events; + int fd = pf->fd; + pf->revents = 0; + size_t i = pi->slot; - struct poll p = { - .slots = 0, - .used = 0, - .max = 0, - .fds = NULL, - .inf = NULL, - .first_free = NULL, + if(unlikely(fd == -1)) { + debug(D_POLLFD, "POLLFD: LISTENER: ignoring slot %zu, it does not have an fd", i); + return; + } - .add_callback = add_callback?add_callback:add_callback_default, - .del_callback = del_callback?del_callback:del_callback_default, - .rcv_callback = rcv_callback?rcv_callback:rcv_callback_default, - .snd_callback = snd_callback?snd_callback:snd_callback_default - }; + debug(D_POLLFD, "POLLFD: LISTENER: processing events for slot %zu (events = %d, revents = %d)", i, events, revents); - size_t i; - for(i = 0; i < sockets->opened ;i++) { - struct pollinfo *pi = poll_add_fd(&p, sockets->fds[i], sockets->fds_types[i], POLLIN, POLLINFO_FLAG_SERVER_SOCKET); - pi->data = data; - info("POLLFD: LISTENER: listening on '%s'", (sockets->fds_names[i])?sockets->fds_names[i]:"UNKNOWN"); - } + if(revents & POLLIN || revents & POLLPRI) { + // receiving data - int timeout = -1; // wait forever + pi->last_received_t = now; + pi->recv_count++; - pthread_cleanup_push(poll_events_cleanup, &p); + if(likely(pi->flags & POLLINFO_FLAG_CLIENT_SOCKET)) { + // read data from client TCP socket + debug(D_POLLFD, "POLLFD: LISTENER: reading data from TCP client slot %zu (fd %d)", i, fd); - for(;;) { - if(unlikely(netdata_exit)) break; + pf->events = 0; + if (pi->rcv_callback(pi, &pf->events) == -1) { + poll_close_fd(&p->inf[i]); + return; + } + pf = &p->fds[i]; + pi = &p->inf[i]; + +#ifdef NETDATA_INTERNAL_CHECKS + // this is common - it is used for web server file copies + if(unlikely(!(pf->events & (POLLIN|POLLOUT)))) { + error("POLLFD: LISTENER: after reading, client slot %zu (fd %d) from '%s:%s' was left without expecting input or output. ", i, fd, pi->client_ip?pi->client_ip:"<undefined-ip>", pi->client_port?pi->client_port:"<undefined-port>"); + //poll_close_fd(pi); + //return; + } +#endif + } + else if(likely(pi->flags & POLLINFO_FLAG_SERVER_SOCKET)) { + // new connection + // debug(D_POLLFD, "POLLFD: LISTENER: accepting connections from slot %zu (fd %d)", i, fd); + + switch(pi->socktype) { + case SOCK_STREAM: { + // a TCP socket + // we accept the connection + + int nfd; + do { + char client_ip[NI_MAXHOST + 1]; + char client_port[NI_MAXSERV + 1]; + + debug(D_POLLFD, "POLLFD: LISTENER: calling accept4() slot %zu (fd %d)", i, fd); + nfd = accept_socket(fd, SOCK_NONBLOCK, client_ip, NI_MAXHOST + 1, client_port, NI_MAXSERV + 1, p->access_list); + if (unlikely(nfd < 0)) { + // accept failed + + debug(D_POLLFD, "POLLFD: LISTENER: accept4() slot %zu (fd %d) failed.", i, fd); + + if(unlikely(errno == EMFILE)) { + error("POLLFD: LISTENER: too many open files - sleeping for 1ms - used by this thread %zu, max for this thread %zu", p->used, p->limit); + usleep(1000); // 10ms + } + else if(unlikely(errno != EWOULDBLOCK && errno != EAGAIN)) + error("POLLFD: LISTENER: accept() failed."); - debug(D_POLLFD, "POLLFD: LISTENER: Waiting on %zu sockets...", p.max + 1); - retval = poll(p.fds, p.max + 1, timeout); + break; + } + else { + // accept ok + // info("POLLFD: LISTENER: client '[%s]:%s' connected to '%s' on fd %d", client_ip, client_port, sockets->fds_names[i], nfd); + poll_add_fd(p + , nfd + , SOCK_STREAM + , POLLINFO_FLAG_CLIENT_SOCKET + , client_ip + , client_port + , p->add_callback + , p->del_callback + , p->rcv_callback + , p->snd_callback + , NULL + ); + + // it may have reallocated them, so refresh our pointers + pf = &p->fds[i]; + pi = &p->inf[i]; + } + } while (nfd >= 0 && (!p->limit || p->used < p->limit)); + break; + } - if(unlikely(retval == -1)) { - error("POLLFD: LISTENER: poll() failed."); - continue; + case SOCK_DGRAM: { + // a UDP socket + // we read data from the server socket + + debug(D_POLLFD, "POLLFD: LISTENER: reading data from UDP slot %zu (fd %d)", i, fd); + + // FIXME: access_list is not applied to UDP + + pf->events = 0; + pi->rcv_callback(pi, &pf->events); + break; + } + + default: { + error("POLLFD: LISTENER: Unknown socktype %d on slot %zu", pi->socktype, pi->slot); + break; + } + } } - else if(unlikely(!retval)) { - debug(D_POLLFD, "POLLFD: LISTENER: poll() timeout."); - continue; + } + + if(unlikely(revents & POLLOUT)) { + // sending data + debug(D_POLLFD, "POLLFD: LISTENER: sending data to socket on slot %zu (fd %d)", i, fd); + + pi->last_sent_t = now; + pi->send_count++; + + pf->events = 0; + if (pi->snd_callback(pi, &pf->events) == -1) { + poll_close_fd(&p->inf[i]); + return; + } + pf = &p->fds[i]; + pi = &p->inf[i]; + +#ifdef NETDATA_INTERNAL_CHECKS + // this is common - it is used for streaming + if(unlikely(pi->flags & POLLINFO_FLAG_CLIENT_SOCKET && !(pf->events & (POLLIN|POLLOUT)))) { + error("POLLFD: LISTENER: after sending, client slot %zu (fd %d) from '%s:%s' was left without expecting input or output. ", i, fd, pi->client_ip?pi->client_ip:"<undefined-ip>", pi->client_port?pi->client_port:"<undefined-port>"); + //poll_close_fd(pi); + //return; } +#endif + } - if(unlikely(netdata_exit)) break; + if(unlikely(revents & POLLERR)) { + error("POLLFD: LISTENER: processing POLLERR events for slot %zu fd %d (events = %d, revents = %d)", i, events, revents, fd); + pf->events = 0; + poll_close_fd(pi); + return; + } - for(i = 0 ; i <= p.max ; i++) { - struct pollfd *pf = &p.fds[i]; - struct pollinfo *pi = &p.inf[i]; - int fd = pf->fd; - short int revents = pf->revents; - pf->revents = 0; + if(unlikely(revents & POLLHUP)) { + error("POLLFD: LISTENER: processing POLLHUP events for slot %zu fd %d (events = %d, revents = %d)", i, events, revents, fd); + pf->events = 0; + poll_close_fd(pi); + return; + } - if(unlikely(fd == -1)) { - debug(D_POLLFD, "POLLFD: LISTENER: ignoring slot %zu, it does not have an fd", i); - continue; - } + if(unlikely(revents & POLLNVAL)) { + error("POLLFD: LISTENER: processing POLLNVAL events for slot %zu fd %d (events = %d, revents = %d)", i, events, revents, fd); + pf->events = 0; + poll_close_fd(pi); + return; + } +} - debug(D_POLLFD, "POLLFD: LISTENER: processing events for slot %zu (events = %d, revents = %d)", i, pf->events, revents); +void poll_events(LISTEN_SOCKETS *sockets + , void *(*add_callback)(POLLINFO *pi, short int *events, void *data) + , void (*del_callback)(POLLINFO *pi) + , int (*rcv_callback)(POLLINFO *pi, short int *events) + , int (*snd_callback)(POLLINFO *pi, short int *events) + , void (*tmr_callback)(void *timer_data) + , SIMPLE_PATTERN *access_list + , void *data + , time_t tcp_request_timeout_seconds + , time_t tcp_idle_timeout_seconds + , time_t timer_milliseconds + , void *timer_data + , size_t max_tcp_sockets +) { + if(!sockets || !sockets->opened) { + error("POLLFD: internal error: no listening sockets are opened"); + return; + } - if(revents & POLLIN || revents & POLLPRI) { - // receiving data + if(timer_milliseconds <= 0) timer_milliseconds = 0; - if(likely(pi->flags & POLLINFO_FLAG_SERVER_SOCKET)) { - // new connection - // debug(D_POLLFD, "POLLFD: LISTENER: accepting connections from slot %zu (fd %d)", i, fd); + int retval; - switch(pi->socktype) { - case SOCK_STREAM: { - // a TCP socket - // we accept the connection + POLLJOB p = { + .slots = 0, + .used = 0, + .max = 0, + .limit = max_tcp_sockets, + .fds = NULL, + .inf = NULL, + .first_free = NULL, - int nfd; - do { - char client_ip[NI_MAXHOST + 1]; - char client_port[NI_MAXSERV + 1]; + .complete_request_timeout = tcp_request_timeout_seconds, + .idle_timeout = tcp_idle_timeout_seconds, + .checks_every = (tcp_idle_timeout_seconds / 3) + 1, - debug(D_POLLFD, "POLLFD: LISTENER: calling accept4() slot %zu (fd %d)", i, fd); - nfd = accept_socket(fd, SOCK_NONBLOCK, client_ip, NI_MAXHOST + 1, client_port, NI_MAXSERV + 1, access_list); - if (nfd < 0) { - // accept failed + .access_list = access_list, - debug(D_POLLFD, "POLLFD: LISTENER: accept4() slot %zu (fd %d) failed.", i, fd); + .timer_milliseconds = timer_milliseconds, + .timer_data = timer_data, - if(errno != EWOULDBLOCK && errno != EAGAIN) - error("POLLFD: LISTENER: accept() failed."); + .add_callback = add_callback?add_callback:poll_default_add_callback, + .del_callback = del_callback?del_callback:poll_default_del_callback, + .rcv_callback = rcv_callback?rcv_callback:poll_default_rcv_callback, + .snd_callback = snd_callback?snd_callback:poll_default_snd_callback, + .tmr_callback = tmr_callback?tmr_callback:poll_default_tmr_callback + }; - break; - } - else { - // accept ok - info("POLLFD: LISTENER: client '[%s]:%s' connected to '%s'", client_ip, client_port, sockets->fds_names[i]); - poll_add_fd(&p, nfd, SOCK_STREAM, POLLIN, POLLINFO_FLAG_CLIENT_SOCKET); + size_t i; + for(i = 0; i < sockets->opened ;i++) { - // it may have realloced them, so refresh our pointers - pf = &p.fds[i]; - pi = &p.inf[i]; - } - } while (nfd != -1); - break; - } + POLLINFO *pi = poll_add_fd(&p + , sockets->fds[i] + , sockets->fds_types[i] + , POLLINFO_FLAG_SERVER_SOCKET + , (sockets->fds_names[i])?sockets->fds_names[i]:"UNKNOWN" + , "" + , p.add_callback + , p.del_callback + , p.rcv_callback + , p.snd_callback + , NULL + ); - case SOCK_DGRAM: { - // a UDP socket - // we read data from the server socket + pi->data = data; + info("POLLFD: LISTENER: listening on '%s'", (sockets->fds_names[i])?sockets->fds_names[i]:"UNKNOWN"); + } - debug(D_POLLFD, "POLLFD: LISTENER: reading data from UDP slot %zu (fd %d)", i, fd); + int listen_sockets_active = 1; - // FIXME: access_list is not applied to UDP + int timeout_ms = 1000; // in milliseconds + time_t last_check = now_boottime_sec(); - p.rcv_callback(fd, pi->socktype, pi->data, &pf->events); - break; - } + usec_t timer_usec = timer_milliseconds * USEC_PER_MS; + usec_t now_usec = 0, next_timer_usec = 0, last_timer_usec = 0; + if(unlikely(timer_usec)) { + now_usec = now_boottime_usec(); + next_timer_usec = now_usec - (now_usec % timer_usec) + timer_usec; + } - default: { - error("POLLFD: LISTENER: Unknown socktype %d on slot %zu", pi->socktype, pi->slot); - break; - } - } - } + netdata_thread_cleanup_push(poll_events_cleanup, &p); - if(likely(pi->flags & POLLINFO_FLAG_CLIENT_SOCKET)) { - // read data from client TCP socket - debug(D_POLLFD, "POLLFD: LISTENER: reading data from TCP client slot %zu (fd %d)", i, fd); + while(!netdata_exit) { + if(unlikely(timer_usec)) { + now_usec = now_boottime_usec(); - if (p.rcv_callback(fd, pi->socktype, pi->data, &pf->events) == -1) { - poll_close_fd(&p, pi); - continue; - } - } + if(unlikely(timer_usec && now_usec >= next_timer_usec)) { + debug(D_POLLFD, "Calling timer callback after %zu usec", (size_t)(now_usec - last_timer_usec)); + last_timer_usec = now_usec; + p.tmr_callback(p.timer_data); + now_usec = now_boottime_usec(); + next_timer_usec = now_usec - (now_usec % timer_usec) + timer_usec; } - if(unlikely(revents & POLLOUT)) { - // sending data - debug(D_POLLFD, "POLLFD: LISTENER: sending data to socket on slot %zu (fd %d)", i, fd); + usec_t dt_usec = next_timer_usec - now_usec; + if(dt_usec > 1000 * USEC_PER_MS) + timeout_ms = 1000; + else + timeout_ms = (int)(dt_usec / USEC_PER_MS); + } - if (p.snd_callback(fd, pi->socktype, pi->data, &pf->events) == -1) { - poll_close_fd(&p, pi); - continue; + // enable or disable the TCP listening sockets, based on the current number of sockets used and the limit set + if((listen_sockets_active && (p.limit && p.used >= p.limit)) || (!listen_sockets_active && (!p.limit || p.used < p.limit))) { + listen_sockets_active = !listen_sockets_active; + info("%s listening sockets (used TCP sockets %zu, max allowed for this worker %zu)", (listen_sockets_active)?"ENABLING":"DISABLING", p.used, p.limit); + for (i = 0; i <= p.max; i++) { + if(p.inf[i].flags & POLLINFO_FLAG_SERVER_SOCKET && p.inf[i].socktype == SOCK_STREAM) { + p.fds[i].events = (short int) ((listen_sockets_active) ? POLLIN : 0); } } + } - if(unlikely(revents & POLLERR)) { - error("POLLFD: LISTENER: processing POLLERR events for slot %zu (events = %d, revents = %d)", i, pf->events, revents); - poll_close_fd(&p, pi); - continue; - } + debug(D_POLLFD, "POLLFD: LISTENER: Waiting on %zu sockets for %zu ms...", p.max + 1, (size_t)timeout_ms); + retval = poll(p.fds, p.max + 1, timeout_ms); + time_t now = now_boottime_sec(); - if(unlikely(revents & POLLHUP)) { - error("POLLFD: LISTENER: processing POLLHUP events for slot %zu (events = %d, revents = %d)", i, pf->events, pf->revents); - poll_close_fd(&p, pi); - continue; + if(unlikely(retval == -1)) { + error("POLLFD: LISTENER: poll() failed while waiting on %zu sockets.", p.max + 1); + break; + } + else if(unlikely(!retval)) { + debug(D_POLLFD, "POLLFD: LISTENER: poll() timeout."); + } + else { + for (i = 0; i <= p.max; i++) { + struct pollfd *pf = &p.fds[i]; + short int revents = pf->revents; + if (unlikely(revents)) + poll_events_process(&p, &p.inf[i], pf, revents, now); } + } - if(unlikely(revents & POLLNVAL)) { - error("POLLFD: LISTENER: processing POLLNVAP events for slot %zu (events = %d, revents = %d)", i, pf->events, revents); - poll_close_fd(&p, pi); - continue; + if(unlikely(p.checks_every > 0 && now - last_check > p.checks_every)) { + last_check = now; + + // security checks + for(i = 0; i <= p.max; i++) { + POLLINFO *pi = &p.inf[i]; + + if(likely(pi->flags & POLLINFO_FLAG_CLIENT_SOCKET)) { + if (unlikely(pi->send_count == 0 && p.complete_request_timeout > 0 && (now - pi->connected_t) >= p.complete_request_timeout)) { + info("POLLFD: LISTENER: client slot %zu (fd %d) from '%s:%s' has not sent a complete request in %zu seconds - closing it. " + , i + , pi->fd + , pi->client_ip ? pi->client_ip : "<undefined-ip>" + , pi->client_port ? pi->client_port : "<undefined-port>" + , (size_t) p.complete_request_timeout + ); + poll_close_fd(pi); + } + else if(unlikely(pi->recv_count && p.idle_timeout > 0 && now - ((pi->last_received_t > pi->last_sent_t) ? pi->last_received_t : pi->last_sent_t) >= p.idle_timeout )) { + info("POLLFD: LISTENER: client slot %zu (fd %d) from '%s:%s' is idle for more than %zu seconds - closing it. " + , i + , pi->fd + , pi->client_ip ? pi->client_ip : "<undefined-ip>" + , pi->client_port ? pi->client_port : "<undefined-port>" + , (size_t) p.idle_timeout + ); + poll_close_fd(pi); + } + } } } } - pthread_cleanup_pop(1); + netdata_thread_cleanup_pop(1); debug(D_POLLFD, "POLLFD: LISTENER: cleanup completed"); } diff --git a/src/socket.h b/src/socket.h index 08b8518b9..7b3e726ec 100644 --- a/src/socket.h +++ b/src/socket.h @@ -19,6 +19,8 @@ typedef struct listen_sockets { int fds_families[MAX_LISTEN_FDS]; // the family of the open sockets (AF_UNIX, AF_INET, AF_INET6) } LISTEN_SOCKETS; +extern char *strdup_client_description(int family, const char *protocol, const char *ip, int port); + extern int listen_sockets_setup(LISTEN_SOCKETS *sockets); extern void listen_sockets_close(LISTEN_SOCKETS *sockets); @@ -51,13 +53,110 @@ extern int accept4(int sock, struct sockaddr *addr, socklen_t *addrlen, int flag #endif /* #ifndef HAVE_ACCEPT4 */ +// ---------------------------------------------------------------------------- +// poll() based listener + +#define POLLINFO_FLAG_SERVER_SOCKET 0x00000001 +#define POLLINFO_FLAG_CLIENT_SOCKET 0x00000002 +#define POLLINFO_FLAG_DONT_CLOSE 0x00000004 + +typedef struct poll POLLJOB; + +typedef struct pollinfo { + POLLJOB *p; // the parent + size_t slot; // the slot id + + int fd; // the file descriptor + int socktype; // the client socket type + char *client_ip; // the connected client IP + char *client_port; // the connected client port + + time_t connected_t; // the time the socket connected + time_t last_received_t; // the time the socket last received data + time_t last_sent_t; // the time the socket last sent data + + size_t recv_count; // the number of times the socket was ready for inbound traffic + size_t send_count; // the number of times the socket was ready for outbound traffic + + uint32_t flags; // internal flags + + // callbacks for this socket + void (*del_callback)(struct pollinfo *pi); + int (*rcv_callback)(struct pollinfo *pi, short int *events); + int (*snd_callback)(struct pollinfo *pi, short int *events); + + // the user data + void *data; + + // linking of free pollinfo structures + // for quickly finding the next available + // this is like a stack, it grows and shrinks + // (with gaps - lower empty slots are preferred) + struct pollinfo *next; +} POLLINFO; + +struct poll { + size_t slots; + size_t used; + size_t min; + size_t max; + + size_t limit; + + time_t complete_request_timeout; + time_t idle_timeout; + time_t checks_every; + + time_t timer_milliseconds; + void *timer_data; + + struct pollfd *fds; + struct pollinfo *inf; + struct pollinfo *first_free; + + SIMPLE_PATTERN *access_list; + + void *(*add_callback)(POLLINFO *pi, short int *events, void *data); + void (*del_callback)(POLLINFO *pi); + int (*rcv_callback)(POLLINFO *pi, short int *events); + int (*snd_callback)(POLLINFO *pi, short int *events); + void (*tmr_callback)(void *timer_data); +}; + +#define pollinfo_from_slot(p, slot) (&((p)->inf[(slot)])) + +extern int poll_default_snd_callback(POLLINFO *pi, short int *events); +extern int poll_default_rcv_callback(POLLINFO *pi, short int *events); +extern void poll_default_del_callback(POLLINFO *pi); +extern void *poll_default_add_callback(POLLINFO *pi, short int *events, void *data); + +extern POLLINFO *poll_add_fd(POLLJOB *p + , int fd + , int socktype + , uint32_t flags + , const char *client_ip + , const char *client_port + , void *(*add_callback)(POLLINFO *pi, short int *events, void *data) + , void (*del_callback)(POLLINFO *pi) + , int (*rcv_callback)(POLLINFO *pi, short int *events) + , int (*snd_callback)(POLLINFO *pi, short int *events) + , void *data +); +extern void poll_close_fd(POLLINFO *pi); + extern void poll_events(LISTEN_SOCKETS *sockets - , void *(*add_callback)(int fd, int socktype, short int *events) - , void (*del_callback)(int fd, int socktype, void *data) - , int (*rcv_callback)(int fd, int socktype, void *data, short int *events) - , int (*snd_callback)(int fd, int socktype, void *data, short int *events) + , void *(*add_callback)(POLLINFO *pi, short int *events, void *data) + , void (*del_callback)(POLLINFO *pi) + , int (*rcv_callback)(POLLINFO *pi, short int *events) + , int (*snd_callback)(POLLINFO *pi, short int *events) + , void (*tmr_callback)(void *timer_data) , SIMPLE_PATTERN *access_list , void *data + , time_t tcp_request_timeout_seconds + , time_t tcp_idle_timeout_seconds + , time_t timer_milliseconds + , void *timer_data + , size_t max_tcp_sockets ); #endif //NETDATA_SOCKET_H diff --git a/src/statistical.c b/src/statistical.c index 807bc25ea..d4b33fd5a 100644 --- a/src/statistical.c +++ b/src/statistical.c @@ -2,7 +2,7 @@ // -------------------------------------------------------------------------------------------------------------------- -inline long double sum_and_count(long double *series, size_t entries, size_t *count) { +inline LONG_DOUBLE sum_and_count(const LONG_DOUBLE *series, size_t entries, size_t *count) { if(unlikely(entries == 0)) { if(likely(count)) *count = 0; @@ -18,10 +18,10 @@ inline long double sum_and_count(long double *series, size_t entries, size_t *co } size_t i, c = 0; - long double sum = 0; + LONG_DOUBLE sum = 0; for(i = 0; i < entries ; i++) { - long double value = series[i]; + LONG_DOUBLE value = series[i]; if(unlikely(isnan(value) || isinf(value))) continue; c++; sum += value; @@ -36,44 +36,44 @@ inline long double sum_and_count(long double *series, size_t entries, size_t *co return sum; } -inline long double sum(long double *series, size_t entries) { +inline LONG_DOUBLE sum(const LONG_DOUBLE *series, size_t entries) { return sum_and_count(series, entries, NULL); } -inline long double average(long double *series, size_t entries) { +inline LONG_DOUBLE average(const LONG_DOUBLE *series, size_t entries) { size_t count = 0; - long double sum = sum_and_count(series, entries, &count); + LONG_DOUBLE sum = sum_and_count(series, entries, &count); if(unlikely(count == 0)) return NAN; - return sum / (long double)count; + return sum / (LONG_DOUBLE)count; } // -------------------------------------------------------------------------------------------------------------------- -long double moving_average(long double *series, size_t entries, size_t period) { +LONG_DOUBLE moving_average(const LONG_DOUBLE *series, size_t entries, size_t period) { if(unlikely(period <= 0)) return 0.0; size_t i, count; - long double sum = 0, avg = 0; - long double p[period]; + LONG_DOUBLE sum = 0, avg = 0; + LONG_DOUBLE p[period]; for(count = 0; count < period ; count++) p[count] = 0.0; for(i = 0, count = 0; i < entries; i++) { - long double value = series[i]; + LONG_DOUBLE value = series[i]; if(unlikely(isnan(value) || isinf(value))) continue; if(unlikely(count < period)) { sum += value; - avg = (count == period - 1) ? sum / (long double)period : 0; + avg = (count == period - 1) ? sum / (LONG_DOUBLE)period : 0; } else { sum = sum - p[count % period] + value; - avg = sum / (long double)period; + avg = sum / (LONG_DOUBLE)period; } p[count % period] = value; @@ -86,8 +86,8 @@ long double moving_average(long double *series, size_t entries, size_t period) { // -------------------------------------------------------------------------------------------------------------------- static int qsort_compare(const void *a, const void *b) { - long double *p1 = (long double *)a, *p2 = (long double *)b; - long double n1 = *p1, n2 = *p2; + LONG_DOUBLE *p1 = (LONG_DOUBLE *)a, *p2 = (LONG_DOUBLE *)b; + LONG_DOUBLE n1 = *p1, n2 = *p2; if(unlikely(isnan(n1) || isnan(n2))) { if(isnan(n1) && !isnan(n2)) return -1; @@ -105,17 +105,17 @@ static int qsort_compare(const void *a, const void *b) { return 0; } -inline void sort_series(long double *series, size_t entries) { - qsort(series, entries, sizeof(long double), qsort_compare); +inline void sort_series(LONG_DOUBLE *series, size_t entries) { + qsort(series, entries, sizeof(LONG_DOUBLE), qsort_compare); } -inline long double *copy_series(long double *series, size_t entries) { - long double *copy = mallocz(sizeof(long double) * entries); - memcpy(copy, series, sizeof(long double) * entries); +inline LONG_DOUBLE *copy_series(const LONG_DOUBLE *series, size_t entries) { + LONG_DOUBLE *copy = mallocz(sizeof(LONG_DOUBLE) * entries); + memcpy(copy, series, sizeof(LONG_DOUBLE) * entries); return copy; } -long double median_on_sorted_series(long double *series, size_t entries) { +LONG_DOUBLE median_on_sorted_series(const LONG_DOUBLE *series, size_t entries) { if(unlikely(entries == 0)) return NAN; @@ -125,7 +125,7 @@ long double median_on_sorted_series(long double *series, size_t entries) { if(unlikely(entries == 2)) return (series[0] + series[1]) / 2; - long double avg; + LONG_DOUBLE avg; if(entries % 2 == 0) { size_t m = entries / 2; avg = (series[m] + series[m + 1]) / 2; @@ -137,7 +137,7 @@ long double median_on_sorted_series(long double *series, size_t entries) { return avg; } -long double median(long double *series, size_t entries) { +LONG_DOUBLE median(const LONG_DOUBLE *series, size_t entries) { if(unlikely(entries == 0)) return NAN; @@ -147,10 +147,10 @@ long double median(long double *series, size_t entries) { if(unlikely(entries == 2)) return (series[0] + series[1]) / 2; - long double *copy = copy_series(series, entries); + LONG_DOUBLE *copy = copy_series(series, entries); sort_series(copy, entries); - long double avg = median_on_sorted_series(copy, entries); + LONG_DOUBLE avg = median_on_sorted_series(copy, entries); freez(copy); return avg; @@ -158,18 +158,18 @@ long double median(long double *series, size_t entries) { // -------------------------------------------------------------------------------------------------------------------- -long double moving_median(long double *series, size_t entries, size_t period) { +LONG_DOUBLE moving_median(const LONG_DOUBLE *series, size_t entries, size_t period) { if(entries <= period) return median(series, entries); - long double *data = copy_series(series, entries); + LONG_DOUBLE *data = copy_series(series, entries); size_t i; for(i = period; i < entries; i++) { data[i - period] = median(&series[i - period], period); } - long double avg = median(data, entries - period); + LONG_DOUBLE avg = median(data, entries - period); freez(data); return avg; } @@ -177,13 +177,13 @@ long double moving_median(long double *series, size_t entries, size_t period) { // -------------------------------------------------------------------------------------------------------------------- // http://stackoverflow.com/a/15150143/4525767 -long double running_median_estimate(long double *series, size_t entries) { - long double median = 0.0f; - long double average = 0.0f; +LONG_DOUBLE running_median_estimate(const LONG_DOUBLE *series, size_t entries) { + LONG_DOUBLE median = 0.0f; + LONG_DOUBLE average = 0.0f; size_t i; for(i = 0; i < entries ; i++) { - long double value = series[i]; + LONG_DOUBLE value = series[i]; if(unlikely(isnan(value) || isinf(value))) continue; average += ( value - average ) * 0.1f; // rough running average. @@ -195,7 +195,7 @@ long double running_median_estimate(long double *series, size_t entries) { // -------------------------------------------------------------------------------------------------------------------- -long double standard_deviation(long double *series, size_t entries) { +LONG_DOUBLE standard_deviation(const LONG_DOUBLE *series, size_t entries) { if(unlikely(entries < 1)) return NAN; @@ -203,10 +203,10 @@ long double standard_deviation(long double *series, size_t entries) { return series[0]; size_t i, count = 0; - long double sum = 0; + LONG_DOUBLE sum = 0; for(i = 0; i < entries ; i++) { - long double value = series[i]; + LONG_DOUBLE value = series[i]; if(unlikely(isnan(value) || isinf(value))) continue; count++; @@ -219,10 +219,10 @@ long double standard_deviation(long double *series, size_t entries) { if(unlikely(count == 1)) return sum; - long double average = sum / (long double)count; + LONG_DOUBLE average = sum / (LONG_DOUBLE)count; for(i = 0, count = 0, sum = 0; i < entries ; i++) { - long double value = series[i]; + LONG_DOUBLE value = series[i]; if(unlikely(isnan(value) || isinf(value))) continue; count++; @@ -235,29 +235,29 @@ long double standard_deviation(long double *series, size_t entries) { if(unlikely(count == 1)) return average; - long double variance = sum / (long double)(count - 1); // remove -1 to have a population stddev + LONG_DOUBLE variance = sum / (LONG_DOUBLE)(count - 1); // remove -1 to have a population stddev - long double stddev = sqrtl(variance); + LONG_DOUBLE stddev = sqrtl(variance); return stddev; } // -------------------------------------------------------------------------------------------------------------------- -long double single_exponential_smoothing(long double *series, size_t entries, long double alpha) { +LONG_DOUBLE single_exponential_smoothing(const LONG_DOUBLE *series, size_t entries, LONG_DOUBLE alpha) { size_t i, count = 0; - long double level = 0, sum = 0; + LONG_DOUBLE level = 0, sum = 0; if(unlikely(isnan(alpha))) alpha = 0.3; for(i = 0; i < entries ; i++) { - long double value = series[i]; + LONG_DOUBLE value = series[i]; if(unlikely(isnan(value) || isinf(value))) continue; count++; sum += value; - long double last_level = level; + LONG_DOUBLE last_level = level; level = alpha * value + (1.0 - alpha) * last_level; } @@ -267,9 +267,9 @@ long double single_exponential_smoothing(long double *series, size_t entries, lo // -------------------------------------------------------------------------------------------------------------------- // http://grisha.org/blog/2016/02/16/triple-exponential-smoothing-forecasting-part-ii/ -long double double_exponential_smoothing(long double *series, size_t entries, long double alpha, long double beta, long double *forecast) { +LONG_DOUBLE double_exponential_smoothing(const LONG_DOUBLE *series, size_t entries, LONG_DOUBLE alpha, LONG_DOUBLE beta, LONG_DOUBLE *forecast) { size_t i, count = 0; - long double level = series[0], trend, sum; + LONG_DOUBLE level = series[0], trend, sum; if(unlikely(isnan(alpha))) alpha = 0.3; @@ -285,13 +285,13 @@ long double double_exponential_smoothing(long double *series, size_t entries, lo sum = series[0]; for(i = 1; i < entries ; i++) { - long double value = series[i]; + LONG_DOUBLE value = series[i]; if(unlikely(isnan(value) || isinf(value))) continue; count++; sum += value; - long double last_level = level; + LONG_DOUBLE last_level = level; level = alpha * value + (1.0 - alpha) * (level + trend); trend = beta * (level - last_level) + (1.0 - beta) * trend; @@ -327,24 +327,24 @@ long double double_exponential_smoothing(long double *series, size_t entries, lo * s[t] = γ (Y[t] / a[t]) + (1-γ) s[t-p] */ static int __HoltWinters( - long double *series, + const LONG_DOUBLE *series, int entries, // start_time + h - long double alpha, // alpha parameter of Holt-Winters Filter. - long double beta, // beta parameter of Holt-Winters Filter. If set to 0, the function will do exponential smoothing. - long double gamma, // gamma parameter used for the seasonal component. If set to 0, an non-seasonal model is fitted. + LONG_DOUBLE alpha, // alpha parameter of Holt-Winters Filter. + LONG_DOUBLE beta, // beta parameter of Holt-Winters Filter. If set to 0, the function will do exponential smoothing. + LONG_DOUBLE gamma, // gamma parameter used for the seasonal component. If set to 0, an non-seasonal model is fitted. - int *seasonal, - int *period, - long double *a, // Start value for level (a[0]). - long double *b, // Start value for trend (b[0]). - long double *s, // Vector of start values for the seasonal component (s_1[0] ... s_p[0]) + const int *seasonal, + const int *period, + const LONG_DOUBLE *a, // Start value for level (a[0]). + const LONG_DOUBLE *b, // Start value for trend (b[0]). + LONG_DOUBLE *s, // Vector of start values for the seasonal component (s_1[0] ... s_p[0]) /* return values */ - long double *SSE, // The final sum of squared errors achieved in optimizing - long double *level, // Estimated values for the level component (size entries - t + 2) - long double *trend, // Estimated values for the trend component (size entries - t + 2) - long double *season // Estimated values for the seasonal component (size entries - t + 2) + LONG_DOUBLE *SSE, // The final sum of squared errors achieved in optimizing + LONG_DOUBLE *level, // Estimated values for the level component (size entries - t + 2) + LONG_DOUBLE *trend, // Estimated values for the trend component (size entries - t + 2) + LONG_DOUBLE *season // Estimated values for the seasonal component (size entries - t + 2) ) { if(unlikely(entries < 4)) @@ -352,13 +352,13 @@ static int __HoltWinters( int start_time = 2; - long double res = 0, xhat = 0, stmp = 0; + LONG_DOUBLE res = 0, xhat = 0, stmp = 0; int i, i0, s0; /* copy start values to the beginning of the vectors */ level[0] = *a; if(beta > 0) trend[0] = *b; - if(gamma > 0) memcpy(season, s, *period * sizeof(long double)); + if(gamma > 0) memcpy(season, s, *period * sizeof(LONG_DOUBLE)); for(i = start_time - 1; i < entries; i++) { /* indices for period i */ @@ -404,7 +404,7 @@ static int __HoltWinters( return 1; } -long double holtwinters(long double *series, size_t entries, long double alpha, long double beta, long double gamma, long double *forecast) { +LONG_DOUBLE holtwinters(const LONG_DOUBLE *series, size_t entries, LONG_DOUBLE alpha, LONG_DOUBLE beta, LONG_DOUBLE gamma, LONG_DOUBLE *forecast) { if(unlikely(isnan(alpha))) alpha = 0.3; @@ -416,15 +416,15 @@ long double holtwinters(long double *series, size_t entries, long double alpha, int seasonal = 0; int period = 0; - long double a0 = series[0]; - long double b0 = 0; - long double s[] = {}; + LONG_DOUBLE a0 = series[0]; + LONG_DOUBLE b0 = 0; + LONG_DOUBLE s[] = {}; - long double errors = 0.0; + LONG_DOUBLE errors = 0.0; size_t nb_computations = entries; - long double *estimated_level = callocz(nb_computations, sizeof(long double)); - long double *estimated_trend = callocz(nb_computations, sizeof(long double)); - long double *estimated_season = callocz(nb_computations, sizeof(long double)); + LONG_DOUBLE *estimated_level = callocz(nb_computations, sizeof(LONG_DOUBLE)); + LONG_DOUBLE *estimated_trend = callocz(nb_computations, sizeof(LONG_DOUBLE)); + LONG_DOUBLE *estimated_season = callocz(nb_computations, sizeof(LONG_DOUBLE)); int ret = __HoltWinters( series, @@ -443,7 +443,7 @@ long double holtwinters(long double *series, size_t entries, long double alpha, estimated_season ); - long double value = estimated_level[nb_computations - 1]; + LONG_DOUBLE value = estimated_level[nb_computations - 1]; if(forecast) *forecast = 0.0; diff --git a/src/statistical.h b/src/statistical.h index 844e579bb..675389021 100644 --- a/src/statistical.h +++ b/src/statistical.h @@ -1,19 +1,19 @@ #ifndef NETDATA_STATISTICAL_H #define NETDATA_STATISTICAL_H -extern long double average(long double *series, size_t entries); -extern long double moving_average(long double *series, size_t entries, size_t period); -extern long double median(long double *series, size_t entries); -extern long double moving_median(long double *series, size_t entries, size_t period); -extern long double running_median_estimate(long double *series, size_t entries); -extern long double standard_deviation(long double *series, size_t entries); -extern long double single_exponential_smoothing(long double *series, size_t entries, long double alpha); -extern long double double_exponential_smoothing(long double *series, size_t entries, long double alpha, long double beta, long double *forecast); -extern long double holtwinters(long double *series, size_t entries, long double alpha, long double beta, long double gamma, long double *forecast); -extern long double sum_and_count(long double *series, size_t entries, size_t *count); -extern long double sum(long double *series, size_t entries); -extern long double median_on_sorted_series(long double *series, size_t entries); -extern long double *copy_series(long double *series, size_t entries); -extern void sort_series(long double *series, size_t entries); +extern LONG_DOUBLE average(const LONG_DOUBLE *series, size_t entries); +extern LONG_DOUBLE moving_average(const LONG_DOUBLE *series, size_t entries, size_t period); +extern LONG_DOUBLE median(const LONG_DOUBLE *series, size_t entries); +extern LONG_DOUBLE moving_median(const LONG_DOUBLE *series, size_t entries, size_t period); +extern LONG_DOUBLE running_median_estimate(const LONG_DOUBLE *series, size_t entries); +extern LONG_DOUBLE standard_deviation(const LONG_DOUBLE *series, size_t entries); +extern LONG_DOUBLE single_exponential_smoothing(const LONG_DOUBLE *series, size_t entries, LONG_DOUBLE alpha); +extern LONG_DOUBLE double_exponential_smoothing(const LONG_DOUBLE *series, size_t entries, LONG_DOUBLE alpha, LONG_DOUBLE beta, LONG_DOUBLE *forecast); +extern LONG_DOUBLE holtwinters(const LONG_DOUBLE *series, size_t entries, LONG_DOUBLE alpha, LONG_DOUBLE beta, LONG_DOUBLE gamma, LONG_DOUBLE *forecast); +extern LONG_DOUBLE sum_and_count(const LONG_DOUBLE *series, size_t entries, size_t *count); +extern LONG_DOUBLE sum(const LONG_DOUBLE *series, size_t entries); +extern LONG_DOUBLE median_on_sorted_series(const LONG_DOUBLE *series, size_t entries); +extern LONG_DOUBLE *copy_series(const LONG_DOUBLE *series, size_t entries); +extern void sort_series(LONG_DOUBLE *series, size_t entries); #endif //NETDATA_STATISTICAL_H diff --git a/src/statsd.c b/src/statsd.c index 39041ca88..44ebd8894 100644 --- a/src/statsd.c +++ b/src/statsd.c @@ -36,7 +36,7 @@ // data specific to each metric type typedef struct statsd_metric_gauge { - long double value; + LONG_DOUBLE value; } STATSD_METRIC_GAUGE; typedef struct statsd_metric_counter { // counter and meter @@ -65,7 +65,7 @@ typedef struct statsd_histogram_extensions { size_t size; size_t used; - long double *values; // dynamic array of values collected + LONG_DOUBLE *values; // dynamic array of values collected } STATSD_METRIC_HISTOGRAM_EXTENSIONS; typedef struct statsd_metric_histogram { // histogram and timer @@ -175,6 +175,8 @@ typedef struct statsd_app_chart_dimension { collected_number multiplier; // the multipler of the dimension collected_number divisor; // the divisor of the dimension + RRDDIM_FLAGS flags; // the RRDDIM flags for this dimension + STATSD_APP_CHART_DIM_VALUE_TYPE value_type; // which value to use of the source metric RRDDIM *rd; // a pointer to the RRDDIM that has been created for this dimension @@ -218,6 +220,17 @@ typedef struct statsd_app { // -------------------------------------------------------------------------------------------------------------------- // global statsd data +struct collection_thread_status { + int status; + size_t max_sockets; + + netdata_thread_t thread; + struct rusage rusage; + RRDSET *st_cpu; + RRDDIM *rd_user; + RRDDIM *rd_system; +}; + static struct statsd { STATSD_INDEX gauges; STATSD_INDEX counters; @@ -227,6 +240,9 @@ static struct statsd { STATSD_INDEX sets; size_t unknown_types; size_t socket_errors; + size_t tcp_socket_connects; + size_t tcp_socket_disconnects; + size_t tcp_socket_connected; size_t tcp_socket_reads; size_t tcp_packets_received; size_t tcp_bytes_read; @@ -238,24 +254,30 @@ static struct statsd { int update_every; SIMPLE_PATTERN *charts_for; + size_t tcp_idle_timeout; size_t decimal_detail; size_t private_charts; size_t max_private_charts; size_t max_private_charts_hard; RRD_MEMORY_MODE private_charts_memory_mode; long private_charts_rrd_history_entries; + int private_charts_hidden; STATSD_APP *apps; size_t recvmmsg_size; size_t histogram_increase_step; double histogram_percentile; char *histogram_percentile_str; + int threads; + struct collection_thread_status *collection_threads_status; + LISTEN_SOCKETS sockets; } statsd = { .enabled = 1, .max_private_charts = 200, .max_private_charts_hard = 1000, + .private_charts_hidden = 0, .recvmmsg_size = 10, .decimal_detail = STATSD_DECIMAL_DETAIL, @@ -314,10 +336,13 @@ static struct statsd { STATSD_FIRST_PTR_MUTEX_INIT }, + .tcp_idle_timeout = 600, + .apps = NULL, .histogram_percentile = 95.0, .histogram_increase_step = 10, .threads = 0, + .collection_threads_status = NULL, .sockets = { .config_section = CONFIG_SECTION_STATSD, .default_bind_to = "udp:localhost tcp:localhost", @@ -336,7 +361,7 @@ static int statsd_metric_compare(void* a, void* b) { else return strcmp(((STATSD_METRIC *)a)->name, ((STATSD_METRIC *)b)->name); } -static inline STATSD_METRIC *stasd_metric_index_find(STATSD_INDEX *index, const char *name, uint32_t hash) { +static inline STATSD_METRIC *statsd_metric_index_find(STATSD_INDEX *index, const char *name, uint32_t hash) { STATSD_METRIC tmp; tmp.name = name; tmp.hash = (hash)?hash:simple_hash(tmp.name); @@ -349,7 +374,7 @@ static inline STATSD_METRIC *statsd_find_or_add_metric(STATSD_INDEX *index, cons uint32_t hash = simple_hash(name); - STATSD_METRIC *m = stasd_metric_index_find(index, name, hash); + STATSD_METRIC *m = statsd_metric_index_find(index, name, hash); if(unlikely(!m)) { debug(D_STATSD, "Creating new %s metric '%s'", index->name, name); @@ -387,8 +412,8 @@ static inline STATSD_METRIC *statsd_find_or_add_metric(STATSD_INDEX *index, cons // -------------------------------------------------------------------------------------------------------------------- // statsd parsing numbers -static inline long double statsd_parse_float(const char *v, long double def) { - long double value; +static inline LONG_DOUBLE statsd_parse_float(const char *v, LONG_DOUBLE def) { + LONG_DOUBLE value; if(likely(v && *v)) { char *e = NULL; @@ -426,6 +451,10 @@ static inline void statsd_reset_metric(STATSD_METRIC *m) { m->count = 0; } +static inline int value_is_zinit(const char *value) { + return (value && *value == 'z' && *++value == 'i' && *++value == 'n' && *++value == 'i' && *++value == 't' && *++value == '\0'); +} + static inline void statsd_process_gauge(STATSD_METRIC *m, const char *value, const char *sampling) { if(unlikely(!value || !*value)) { error("STATSD: metric '%s' of type gauge, with empty value is ignored.", m->name); @@ -437,13 +466,18 @@ static inline void statsd_process_gauge(STATSD_METRIC *m, const char *value, con statsd_reset_metric(m); } - if(unlikely(*value == '+' || *value == '-')) - m->gauge.value += statsd_parse_float(value, 1.0) / statsd_parse_float(sampling, 1.0); - else - m->gauge.value = statsd_parse_float(value, 1.0) / statsd_parse_float(sampling, 1.0); + if(unlikely(value_is_zinit(value))) { + // magic loading of metric, without affecting anything + } + else { + if (unlikely(*value == '+' || *value == '-')) + m->gauge.value += statsd_parse_float(value, 1.0) / statsd_parse_float(sampling, 1.0); + else + m->gauge.value = statsd_parse_float(value, 1.0) / statsd_parse_float(sampling, 1.0); - m->events++; - m->count++; + m->events++; + m->count++; + } } static inline void statsd_process_counter(STATSD_METRIC *m, const char *value, const char *sampling) { @@ -451,10 +485,15 @@ static inline void statsd_process_counter(STATSD_METRIC *m, const char *value, c if(unlikely(m->reset)) statsd_reset_metric(m); - m->counter.value += llrintl((long double)statsd_parse_int(value, 1) / statsd_parse_float(sampling, 1.0)); + if(unlikely(value_is_zinit(value))) { + // magic loading of metric, without affecting anything + } + else { + m->counter.value += llrintl((LONG_DOUBLE) statsd_parse_int(value, 1) / statsd_parse_float(sampling, 1.0)); - m->events++; - m->count++; + m->events++; + m->count++; + } } static inline void statsd_process_meter(STATSD_METRIC *m, const char *value, const char *sampling) { @@ -473,17 +512,22 @@ static inline void statsd_process_histogram(STATSD_METRIC *m, const char *value, statsd_reset_metric(m); } - if(unlikely(m->histogram.ext->used == m->histogram.ext->size)) { - netdata_mutex_lock(&m->histogram.ext->mutex); - m->histogram.ext->size += statsd.histogram_increase_step; - m->histogram.ext->values = reallocz(m->histogram.ext->values, sizeof(long double) * m->histogram.ext->size); - netdata_mutex_unlock(&m->histogram.ext->mutex); + if(unlikely(value_is_zinit(value))) { + // magic loading of metric, without affecting anything } + else { + if (unlikely(m->histogram.ext->used == m->histogram.ext->size)) { + netdata_mutex_lock(&m->histogram.ext->mutex); + m->histogram.ext->size += statsd.histogram_increase_step; + m->histogram.ext->values = reallocz(m->histogram.ext->values, sizeof(LONG_DOUBLE) * m->histogram.ext->size); + netdata_mutex_unlock(&m->histogram.ext->mutex); + } - m->histogram.ext->values[m->histogram.ext->used++] = statsd_parse_float(value, 1.0) / statsd_parse_float(sampling, 1.0); + m->histogram.ext->values[m->histogram.ext->used++] = statsd_parse_float(value, 1.0) / statsd_parse_float(sampling, 1.0); - m->events++; - m->count++; + m->events++; + m->count++; + } } static inline void statsd_process_timer(STATSD_METRIC *m, const char *value, const char *sampling) { @@ -510,19 +554,24 @@ static inline void statsd_process_set(STATSD_METRIC *m, const char *value) { statsd_reset_metric(m); } - if(unlikely(!m->set.dict)) { - m->set.dict = dictionary_create(STATSD_DICTIONARY_OPTIONS|DICTIONARY_FLAG_VALUE_LINK_DONT_CLONE); + if (unlikely(!m->set.dict)) { + m->set.dict = dictionary_create(STATSD_DICTIONARY_OPTIONS | DICTIONARY_FLAG_VALUE_LINK_DONT_CLONE); m->set.unique = 0; } - void *t = dictionary_get(m->set.dict, value); - if(unlikely(!t)) { - dictionary_set(m->set.dict, value, NULL, 1); - m->set.unique++; + if(unlikely(value_is_zinit(value))) { + // magic loading of metric, without affecting anything } + else { + void *t = dictionary_get(m->set.dict, value); + if (unlikely(!t)) { + dictionary_set(m->set.dict, value, NULL, 1); + m->set.unique++; + } - m->events++; - m->count++; + m->events++; + m->count++; + } } @@ -678,6 +727,7 @@ struct statsd_tcp { #ifdef HAVE_RECVMMSG struct statsd_udp { + int *running; STATSD_SOCKET_DATA_TYPE type; size_t size; struct iovec *iovecs; @@ -685,54 +735,58 @@ struct statsd_udp { }; #else struct statsd_udp { + int *running; STATSD_SOCKET_DATA_TYPE type; char buffer[STATSD_UDP_BUFFER_SIZE]; }; #endif // new TCP client connected -static void *statsd_add_callback(int fd, int socktype, short int *events) { - (void)fd; - (void)socktype; +static void *statsd_add_callback(POLLINFO *pi, short int *events, void *data) { + (void)pi; + (void)data; + *events = POLLIN; - struct statsd_tcp *data = (struct statsd_tcp *)callocz(sizeof(struct statsd_tcp) + STATSD_TCP_BUFFER_SIZE, 1); - data->type = STATSD_SOCKET_DATA_TYPE_TCP; - data->size = STATSD_TCP_BUFFER_SIZE - 1; + struct statsd_tcp *t = (struct statsd_tcp *)callocz(sizeof(struct statsd_tcp) + STATSD_TCP_BUFFER_SIZE, 1); + t->type = STATSD_SOCKET_DATA_TYPE_TCP; + t->size = STATSD_TCP_BUFFER_SIZE - 1; + statsd.tcp_socket_connects++; + statsd.tcp_socket_connected++; - return data; + return t; } // TCP client disconnected -static void statsd_del_callback(int fd, int socktype, void *data) { - (void)fd; - (void)socktype; +static void statsd_del_callback(POLLINFO *pi) { + struct statsd_tcp *t = pi->data; - if(data) { - struct statsd_tcp *t = data; + if(likely(t)) { if(t->type == STATSD_SOCKET_DATA_TYPE_TCP) { if(t->len != 0) { statsd.socket_errors++; error("STATSD: client is probably sending unterminated metrics. Closed socket left with '%s'. Trying to process it.", t->buffer); statsd_process(t->buffer, t->len, 0); } + statsd.tcp_socket_disconnects++; + statsd.tcp_socket_connected--; } else error("STATSD: internal error: received socket data type is %d, but expected %d", (int)t->type, (int)STATSD_SOCKET_DATA_TYPE_TCP); - freez(data); + freez(t); } - - return; } // Receive data -static int statsd_rcv_callback(int fd, int socktype, void *data, short int *events) { +static int statsd_rcv_callback(POLLINFO *pi, short int *events) { *events = POLLIN; - switch(socktype) { + int fd = pi->fd; + + switch(pi->socktype) { case SOCK_STREAM: { - struct statsd_tcp *d = (struct statsd_tcp *)data; + struct statsd_tcp *d = (struct statsd_tcp *)pi->data; if(unlikely(!d)) { error("STATSD: internal error: expected TCP data pointer is NULL"); statsd.socket_errors++; @@ -784,7 +838,7 @@ static int statsd_rcv_callback(int fd, int socktype, void *data, short int *even } case SOCK_DGRAM: { - struct statsd_udp *d = (struct statsd_udp *)data; + struct statsd_udp *d = (struct statsd_udp *)pi->data; if(unlikely(!d)) { error("STATSD: internal error: expected UDP data pointer is NULL"); statsd.socket_errors++; @@ -849,7 +903,7 @@ static int statsd_rcv_callback(int fd, int socktype, void *data, short int *even } default: { - error("STATSD: internal error: unknown socktype %d on socket %d", socktype, fd); + error("STATSD: internal error: unknown socktype %d on socket %d", pi->socktype, fd); statsd.socket_errors++; return -1; } @@ -858,21 +912,27 @@ static int statsd_rcv_callback(int fd, int socktype, void *data, short int *even return 0; } -static int statsd_snd_callback(int fd, int socktype, void *data, short int *events) { - (void)fd; - (void)socktype; - (void)data; +static int statsd_snd_callback(POLLINFO *pi, short int *events) { + (void)pi; (void)events; error("STATSD: snd_callback() called, but we never requested to send data to statsd clients."); return -1; } +static void statsd_timer_callback(void *timer_data) { + struct collection_thread_status *status = timer_data; + getrusage(RUSAGE_THREAD, &status->rusage); +} + // -------------------------------------------------------------------------------------------------------------------- // statsd child thread to collect metrics from network void statsd_collector_thread_cleanup(void *data) { struct statsd_udp *d = data; + *d->running = 0; + + info("cleaning up..."); #ifdef HAVE_RECVMMSG size_t i; @@ -887,18 +947,15 @@ void statsd_collector_thread_cleanup(void *data) { } void *statsd_collector_thread(void *ptr) { - int id = *((int *)ptr); - - info("STATSD collector thread No %d created with task id %d", id + 1, gettid()); + struct collection_thread_status *status = ptr; + status->status = 1; - if(pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0) - error("Cannot set pthread cancel type to DEFERRED."); - - if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0) - error("Cannot set pthread cancel state to ENABLE."); + info("STATSD collector thread started with taskid %d", gettid()); struct statsd_udp *d = callocz(sizeof(struct statsd_udp), 1); - pthread_cleanup_push(statsd_collector_thread_cleanup, d); + d->running = &status->status; + + netdata_thread_cleanup_push(statsd_collector_thread_cleanup, d); #ifdef HAVE_RECVMMSG d->type = STATSD_SOCKET_DATA_TYPE_UDP; @@ -920,14 +977,17 @@ void *statsd_collector_thread(void *ptr) { , statsd_del_callback , statsd_rcv_callback , statsd_snd_callback + , statsd_timer_callback , NULL , (void *)d + , 0 // tcp request timeout, 0 = disabled + , statsd.tcp_idle_timeout // tcp idle timeout, 0 = disabled + , statsd.update_every * 1000 + , ptr // timer_data + , status->max_sockets ); - pthread_cleanup_pop(1); - - debug(D_WEB_CLIENT, "STATSD: exit!"); - pthread_exit(NULL); + netdata_thread_cleanup_pop(1); return NULL; } @@ -977,6 +1037,7 @@ static STATSD_APP_CHART_DIM *add_dimension_to_app_chart( , const char *dim_name , collected_number multiplier , collected_number divisor + , RRDDIM_FLAGS flags , STATSD_APP_CHART_DIM_VALUE_TYPE value_type ) { STATSD_APP_CHART_DIM *dim = callocz(sizeof(STATSD_APP_CHART_DIM), 1); @@ -988,6 +1049,7 @@ static STATSD_APP_CHART_DIM *add_dimension_to_app_chart( dim->multiplier = multiplier; dim->divisor = divisor; dim->value_type = value_type; + dim->flags = flags; if(!dim->multiplier) dim->multiplier = 1; @@ -1014,23 +1076,23 @@ static STATSD_APP_CHART_DIM *add_dimension_to_app_chart( return dim; } -int statsd_readfile(const char *path, const char *filename) { +static int statsd_readfile(const char *path, const char *filename, STATSD_APP *app, STATSD_APP_CHART *chart, DICTIONARY *dict) { debug(D_STATSD, "STATSD configuration reading file '%s/%s'", path, filename); - char buffer[STATSD_CONF_LINE_MAX + 1]; + char *buffer = mallocz(STATSD_CONF_LINE_MAX + 1); + + if(filename[0] == '/') + strncpyz(buffer, filename, STATSD_CONF_LINE_MAX); + else + snprintfz(buffer, STATSD_CONF_LINE_MAX, "%s/%s", path, filename); - FILE *fp = NULL; - snprintfz(buffer, STATSD_CONF_LINE_MAX, "%s/%s", path, filename); - fp = fopen(buffer, "r"); + FILE *fp = fopen(buffer, "r"); if(!fp) { error("STATSD: cannot open file '%s'.", buffer); + freez(buffer); return -1; } - STATSD_APP *app = NULL; - STATSD_APP_CHART *chart = NULL; - DICTIONARY *dict = NULL; - size_t line = 0; char *s; while(fgets(buffer, STATSD_CONF_LINE_MAX, fp) != NULL) { @@ -1042,8 +1104,19 @@ int statsd_readfile(const char *path, const char *filename) { debug(D_STATSD, "STATSD: ignoring line %zu of file '%s/%s', it is empty.", line, path, filename); continue; } + debug(D_STATSD, "STATSD: processing line %zu of file '%s/%s': %s", line, path, filename, buffer); + if(*s == 'i' && strncmp(s, "include", 7) == 0) { + s = trim(&s[7]); + if(s && *s) + statsd_readfile(path, s, app, chart, dict); + else + error("STATSD: ignoring line %zu of file '%s/%s', include filename is empty", line, path, s); + + continue; + } + int len = (int) strlen(s); if (*s == '[' && s[len - 1] == ']') { // new section @@ -1061,6 +1134,12 @@ int statsd_readfile(const char *path, const char *filename) { statsd.apps = app; chart = NULL; dict = NULL; + + { + char lineandfile[FILENAME_MAX + 1]; + snprintfz(lineandfile, FILENAME_MAX, "%zu@%s", line, filename); + app->source = strdupz(lineandfile); + } } else if(app) { if(!strcmp(s, "dictionary")) { @@ -1086,6 +1165,12 @@ int statsd_readfile(const char *path, const char *filename) { chart->next = app->charts; app->charts = chart; + + { + char lineandfile[FILENAME_MAX + 1]; + snprintfz(lineandfile, FILENAME_MAX, "%zu@%s", line, filename); + chart->source = strdupz(lineandfile); + } } } else @@ -1135,7 +1220,7 @@ int statsd_readfile(const char *path, const char *filename) { } else if (!strcmp(name, "metrics")) { simple_pattern_free(app->metrics); - app->metrics = simple_pattern_create(value, SIMPLE_PATTERN_EXACT); + app->metrics = simple_pattern_create(value, NULL, SIMPLE_PATTERN_EXACT); } else if (!strcmp(name, "private charts")) { if (!strcmp(value, "yes") || !strcmp(value, "on")) @@ -1209,6 +1294,14 @@ int statsd_readfile(const char *path, const char *filename) { char *type = words[i++]; char *multipler = words[i++]; char *divisor = words[i++]; + char *options = words[i++]; + + RRDDIM_FLAGS flags = RRDDIM_FLAG_NONE; + if(options && *options) { + if(strstr(options, "hidden") != NULL) flags |= RRDDIM_FLAG_HIDDEN; + if(strstr(options, "noreset") != NULL) flags |= RRDDIM_FLAG_DONT_DETECT_RESETS_OR_OVERFLOWS; + if(strstr(options, "nooverflow") != NULL) flags |= RRDDIM_FLAG_DONT_DETECT_RESETS_OR_OVERFLOWS; + } if(!pattern) { if(app->dict) { @@ -1232,11 +1325,12 @@ int statsd_readfile(const char *path, const char *filename) { , dim_name , (multipler && *multipler)?str2l(multipler):1 , (divisor && *divisor)?str2l(divisor):1 + , flags , string2valuetype(type, line, path, filename) ); if(pattern) - dim->metric_pattern = simple_pattern_create(dim->metric, SIMPLE_PATTERN_EXACT); + dim->metric_pattern = simple_pattern_create(dim->metric, NULL, SIMPLE_PATTERN_EXACT); } else { error("STATSD: ignoring line %zu ('%s') of file '%s/%s'. Unknown keyword for the [%s] section.", line, name, path, filename, chart->id); @@ -1245,6 +1339,7 @@ int statsd_readfile(const char *path, const char *filename) { } } + freez(buffer); fclose(fp); return 0; } @@ -1285,7 +1380,7 @@ static void statsd_readdir(const char *path) { else if((de->d_type == DT_LNK || de->d_type == DT_REG || de->d_type == DT_UNKNOWN) && len > 5 && !strcmp(&de->d_name[len - 5], ".conf")) { - statsd_readfile(path, de->d_name); + statsd_readfile(path, de->d_name, NULL, NULL, NULL); } else debug(D_STATSD, "STATSD: ignoring file '%s'", de->d_name); @@ -1351,7 +1446,7 @@ static inline RRDSET *statsd_private_rrdset_create( , title // title , units // units , "statsd" // plugin - , NULL // module + , "private_chart" // module , priority // priority , update_every // update every , chart_type // chart type @@ -1359,6 +1454,10 @@ static inline RRDSET *statsd_private_rrdset_create( , history // history ); rrdset_flag_set(st, RRDSET_FLAG_STORE_FIRST); + + if(statsd.private_charts_hidden) + rrdset_flag_set(st, RRDSET_FLAG_HIDDEN); + // rrdset_flag_set(st, RRDSET_FLAG_DEBUG); return st; } @@ -1370,14 +1469,20 @@ static inline void statsd_private_chart_gauge(STATSD_METRIC *m) { char type[RRD_ID_LENGTH_MAX + 1], id[RRD_ID_LENGTH_MAX + 1]; statsd_get_metric_type_and_id(m, type, id, "gauge", RRD_ID_LENGTH_MAX); + char context[RRD_ID_LENGTH_MAX + 1]; + snprintfz(context, RRD_ID_LENGTH_MAX, "statsd_gauge.%s", m->name); + + char title[RRD_ID_LENGTH_MAX + 1]; + snprintfz(title, RRD_ID_LENGTH_MAX, "statsd private chart for gauge %s", m->name); + m->st = statsd_private_rrdset_create( m , type , id , NULL // name , "gauges" // family (submenu) - , m->name // context - , m->name // title + , context // context + , title // title , "value" // units , STATSD_CHART_PRIORITY , statsd.update_every @@ -1406,14 +1511,20 @@ static inline void statsd_private_chart_counter_or_meter(STATSD_METRIC *m, const char type[RRD_ID_LENGTH_MAX + 1], id[RRD_ID_LENGTH_MAX + 1]; statsd_get_metric_type_and_id(m, type, id, dim, RRD_ID_LENGTH_MAX); + char context[RRD_ID_LENGTH_MAX + 1]; + snprintfz(context, RRD_ID_LENGTH_MAX, "statsd_%s.%s", dim, m->name); + + char title[RRD_ID_LENGTH_MAX + 1]; + snprintfz(title, RRD_ID_LENGTH_MAX, "statsd private chart for %s %s", dim, m->name); + m->st = statsd_private_rrdset_create( m , type , id , NULL // name , family // family (submenu) - , m->name // context - , m->name // title + , context // context + , title // title , "events/s" // units , STATSD_CHART_PRIORITY , statsd.update_every @@ -1442,14 +1553,20 @@ static inline void statsd_private_chart_set(STATSD_METRIC *m) { char type[RRD_ID_LENGTH_MAX + 1], id[RRD_ID_LENGTH_MAX + 1]; statsd_get_metric_type_and_id(m, type, id, "set", RRD_ID_LENGTH_MAX); + char context[RRD_ID_LENGTH_MAX + 1]; + snprintfz(context, RRD_ID_LENGTH_MAX, "statsd_set.%s", m->name); + + char title[RRD_ID_LENGTH_MAX + 1]; + snprintfz(title, RRD_ID_LENGTH_MAX, "statsd private chart for set %s", m->name); + m->st = statsd_private_rrdset_create( m , type , id , NULL // name , "sets" // family (submenu) - , m->name // context - , m->name // title + , context // context + , title // title , "entries" // units , STATSD_CHART_PRIORITY , statsd.update_every @@ -1478,14 +1595,20 @@ static inline void statsd_private_chart_timer_or_histogram(STATSD_METRIC *m, con char type[RRD_ID_LENGTH_MAX + 1], id[RRD_ID_LENGTH_MAX + 1]; statsd_get_metric_type_and_id(m, type, id, dim, RRD_ID_LENGTH_MAX); + char context[RRD_ID_LENGTH_MAX + 1]; + snprintfz(context, RRD_ID_LENGTH_MAX, "statsd_%s.%s", dim, m->name); + + char title[RRD_ID_LENGTH_MAX + 1]; + snprintfz(title, RRD_ID_LENGTH_MAX, "statsd private chart for %s %s", dim, m->name); + m->st = statsd_private_rrdset_create( m , type , id , NULL // name , family // family (submenu) - , m->name // context - , m->name // title + , context // context + , title // title , units // units , STATSD_CHART_PRIORITY , statsd.update_every @@ -1599,7 +1722,7 @@ static inline void statsd_flush_timer_or_histogram(STATSD_METRIC *m, const char int updated = 0; if(m->count && !m->reset && m->histogram.ext->used > 0) { size_t len = m->histogram.ext->used; - long double *series = m->histogram.ext->values; + LONG_DOUBLE *series = m->histogram.ext->values; sort_series(series, len); m->histogram.ext->last_min = (collected_number)roundl(series[0] * statsd.decimal_detail); @@ -1779,6 +1902,7 @@ static inline void check_if_metric_is_for_app(STATSD_INDEX *index, STATSD_METRIC , final_name , dim->multiplier , dim->divisor + , dim->flags , dim->value_type ); @@ -1834,10 +1958,12 @@ static inline RRDDIM *statsd_add_dim_to_app_chart(STATSD_APP *app, STATSD_APP_CH } dim->rd = rrddim_add(chart->st, metric, dim->name, dim->multiplier, dim->divisor, dim->algorithm); + if(dim->flags != RRDDIM_FLAG_NONE) dim->rd->flags |= dim->flags; return dim->rd; } dim->rd = rrddim_add(chart->st, dim->metric, dim->name, dim->multiplier, dim->divisor, dim->algorithm); + if(dim->flags != RRDDIM_FLAG_NONE) dim->rd->flags |= dim->flags; return dim->rd; } @@ -1855,7 +1981,7 @@ static inline void statsd_update_app_chart(STATSD_APP *app, STATSD_APP_CHART *ch , chart->title // title , chart->units // units , "statsd" // plugin - , NULL // module + , chart->source // module , chart->priority // priority , statsd.update_every // update every , chart->chart_type // chart type @@ -1903,10 +2029,23 @@ static inline void statsd_update_all_app_charts(void) { // debug(D_STATSD, "completed update of app charts"); } +const char *statsd_metric_type_string(STATSD_METRIC_TYPE type) { + switch(type) { + case STATSD_METRIC_TYPE_COUNTER: return "counter"; + case STATSD_METRIC_TYPE_GAUGE: return "gauge"; + case STATSD_METRIC_TYPE_HISTOGRAM: return "histogram"; + case STATSD_METRIC_TYPE_METER: return "meter"; + case STATSD_METRIC_TYPE_SET: return "set"; + case STATSD_METRIC_TYPE_TIMER: return "timer"; + default: return "unknown"; + } +} + static inline void statsd_flush_index_metrics(STATSD_INDEX *index, void (*flush_metric)(STATSD_METRIC *)) { STATSD_METRIC *m; for(m = index->first; m ; m = m->next) { if(unlikely(!(m->options & STATSD_METRIC_OPTION_CHECKED_IN_APPS))) { + log_access("NEW STATSD METRIC '%s': '%s'", statsd_metric_type_string(m->type), m->name); check_if_metric_is_for_app(index, m); m->options |= STATSD_METRIC_OPTION_CHECKED_IN_APPS; } @@ -1938,30 +2077,37 @@ static inline void statsd_flush_index_metrics(STATSD_INDEX *index, void (*flush_ // -------------------------------------------------------------------------------------- // statsd main thread -int statsd_listen_sockets_setup(void) { +static int statsd_listen_sockets_setup(void) { return listen_sockets_setup(&statsd.sockets); } -void statsd_main_cleanup(void *data) { - pthread_t *threads = data; - - int i; - for(i = 0; i < statsd.threads ;i++) - pthread_cancel(threads[i]); +static void statsd_main_cleanup(void *data) { + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)data; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + info("cleaning up..."); + + if (statsd.collection_threads_status) { + int i; + for (i = 0; i < statsd.threads; i++) { + if(statsd.collection_threads_status[i].status) { + info("STATSD: stopping data collection thread %d...", i + 1); + netdata_thread_cancel(statsd.collection_threads_status[i].thread); + } + else { + info("STATSD: data collection thread %d found stopped.", i + 1); + } + } + } + info("STATSD: closing sockets..."); listen_sockets_close(&statsd.sockets); + + info("STATSD: cleanup completed."); + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; } void *statsd_main(void *ptr) { - (void)ptr; - - info("STATSD main thread created with task id %d", gettid()); - - if(pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0) - error("Cannot set pthread cancel type to DEFERRED."); - - if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0) - error("Cannot set pthread cancel state to ENABLE."); + netdata_thread_cleanup_push(statsd_main_cleanup, ptr); // ---------------------------------------------------------------------------------------------------------------- // statsd configuration @@ -1979,12 +2125,14 @@ void *statsd_main(void *ptr) { statsd.recvmmsg_size = (size_t)config_get_number(CONFIG_SECTION_STATSD, "udp messages to process at once", (long long)statsd.recvmmsg_size); #endif - statsd.charts_for = simple_pattern_create(config_get(CONFIG_SECTION_STATSD, "create private charts for metrics matching", "*"), SIMPLE_PATTERN_EXACT); + statsd.charts_for = simple_pattern_create(config_get(CONFIG_SECTION_STATSD, "create private charts for metrics matching", "*"), NULL, SIMPLE_PATTERN_EXACT); statsd.max_private_charts = (size_t)config_get_number(CONFIG_SECTION_STATSD, "max private charts allowed", (long long)statsd.max_private_charts); statsd.max_private_charts_hard = (size_t)config_get_number(CONFIG_SECTION_STATSD, "max private charts hard limit", (long long)statsd.max_private_charts * 5); statsd.private_charts_memory_mode = rrd_memory_mode_id(config_get(CONFIG_SECTION_STATSD, "private charts memory mode", rrd_memory_mode_name(default_rrd_memory_mode))); statsd.private_charts_rrd_history_entries = (int)config_get_number(CONFIG_SECTION_STATSD, "private charts history", default_rrd_history_entries); statsd.decimal_detail = (size_t)config_get_number(CONFIG_SECTION_STATSD, "decimal detail", (long long int)statsd.decimal_detail); + statsd.tcp_idle_timeout = (size_t) config_get_number(CONFIG_SECTION_STATSD, "disconnect idle tcp clients after seconds", (long long int)statsd.tcp_idle_timeout); + statsd.private_charts_hidden = (int)config_get_boolean(CONFIG_SECTION_STATSD, "private charts hidden", statsd.private_charts_hidden); statsd.histogram_percentile = (double)config_get_float(CONFIG_SECTION_STATSD, "histograms and timers percentile (percentThreshold)", statsd.histogram_percentile); if(isless(statsd.histogram_percentile, 0) || isgreater(statsd.histogram_percentile, 100)) { @@ -2024,6 +2172,8 @@ void *statsd_main(void *ptr) { if(config_get_boolean(CONFIG_SECTION_STATSD, "gaps on timers (deleteTimers)", 0)) statsd.timers.default_options |= STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED; + size_t max_sockets = (size_t)config_get_number(CONFIG_SECTION_STATSD, "statsd server max TCP sockets", (long long int)(rlimit_nofile.rlim_cur / 4)); + #ifdef STATSD_MULTITHREADED statsd.threads = (int)config_get_number(CONFIG_SECTION_STATSD, "threads", processors); if(statsd.threads < 1) { @@ -2050,22 +2200,19 @@ void *statsd_main(void *ptr) { statsd_listen_sockets_setup(); if(!statsd.sockets.opened) { error("STATSD: No statsd sockets to listen to. statsd will be disabled."); - pthread_exit(NULL); + goto cleanup; } - pthread_t threads[statsd.threads]; - int i; + statsd.collection_threads_status = callocz((size_t)statsd.threads, sizeof(struct collection_thread_status)); + int i; for(i = 0; i < statsd.threads ;i++) { - if(pthread_create(&threads[i], NULL, statsd_collector_thread, &i)) - error("STATSD: failed to create child thread."); - - else if(pthread_detach(threads[i])) - error("STATSD: cannot request detach of child thread."); + statsd.collection_threads_status[i].max_sockets = max_sockets / statsd.threads; + char tag[NETDATA_THREAD_TAG_MAX + 1]; + snprintfz(tag, NETDATA_THREAD_TAG_MAX, "STATSD_COLLECTOR[%d]", i + 1); + netdata_thread_create(&statsd.collection_threads_status[i].thread, tag, NETDATA_THREAD_OPTION_DEFAULT, statsd_collector_thread, &statsd.collection_threads_status[i]); } - pthread_cleanup_push(statsd_main_cleanup, &threads); - // ---------------------------------------------------------------------------------------------------------------- // statsd monitoring charts @@ -2077,9 +2224,9 @@ void *statsd_main(void *ptr) { , NULL , "Metrics in the netdata statsd database" , "metrics" - , "netdata" + , "statsd" , "stats" - , 132000 + , 132010 , statsd.update_every , RRDSET_TYPE_STACKED ); @@ -2098,9 +2245,9 @@ void *statsd_main(void *ptr) { , NULL , "Events processed by the netdata statsd server" , "events/s" - , "netdata" + , "statsd" , "stats" - , 132001 + , 132011 , statsd.update_every , RRDSET_TYPE_STACKED ); @@ -2121,9 +2268,9 @@ void *statsd_main(void *ptr) { , NULL , "Read operations made by the netdata statsd server" , "reads/s" - , "netdata" + , "statsd" , "stats" - , 132002 + , 132012 , statsd.update_every , RRDSET_TYPE_STACKED ); @@ -2140,7 +2287,7 @@ void *statsd_main(void *ptr) { , "kilobits/s" , "netdata" , "stats" - , 132003 + , 132013 , statsd.update_every , RRDSET_TYPE_STACKED ); @@ -2157,13 +2304,46 @@ void *statsd_main(void *ptr) { , "packets/s" , "netdata" , "stats" - , 132004 + , 132014 , statsd.update_every , RRDSET_TYPE_STACKED ); RRDDIM *rd_packets_tcp = rrddim_add(st_packets, "tcp", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); RRDDIM *rd_packets_udp = rrddim_add(st_packets, "udp", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + RRDSET *st_tcp_connects = rrdset_create_localhost( + "netdata" + , "tcp_connects" + , NULL + , "statsd" + , NULL + , "statsd server TCP connects and disconnects" + , "events" + , "statsd" + , "stats" + , 132015 + , statsd.update_every + , RRDSET_TYPE_LINE + ); + RRDDIM *rd_tcp_connects = rrddim_add(st_tcp_connects, "connects", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + RRDDIM *rd_tcp_disconnects = rrddim_add(st_tcp_connects, "disconnects", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + + RRDSET *st_tcp_connected = rrdset_create_localhost( + "netdata" + , "tcp_connected" + , NULL + , "statsd" + , NULL + , "statsd server TCP connected sockets" + , "connected" + , "statsd" + , "stats" + , 132016 + , statsd.update_every + , RRDSET_TYPE_LINE + ); + RRDDIM *rd_tcp_connected = rrddim_add(st_tcp_connected, "connected", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + RRDSET *st_pcharts = rrdset_create_localhost( "netdata" , "private_charts" @@ -2172,27 +2352,68 @@ void *statsd_main(void *ptr) { , NULL , "Private metric charts created by the netdata statsd server" , "charts" - , "netdata" + , "statsd" , "stats" - , 132010 + , 132020 , statsd.update_every , RRDSET_TYPE_AREA ); RRDDIM *rd_pcharts = rrddim_add(st_pcharts, "charts", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + RRDSET *stcpu_thread = rrdset_create_localhost( + "netdata" + , "plugin_statsd_charting_cpu" + , NULL + , "statsd" + , "netdata.statsd_cpu" + , "NetData statsd charting thread CPU usage" + , "milliseconds/s" + , "statsd" + , "stats" + , 132001 + , statsd.update_every + , RRDSET_TYPE_STACKED + ); - // ---------------------------------------------------------------------------------------------------------------- + RRDDIM *rd_user = rrddim_add(stcpu_thread, "user", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL); + RRDDIM *rd_system = rrddim_add(stcpu_thread, "system", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL); + struct rusage thread; + + for(i = 0; i < statsd.threads ;i++) { + char id[100 + 1]; + char title[100 + 1]; + + snprintfz(id, 100, "plugin_statsd_collector%d_cpu", i + 1); + snprintfz(title, 100, "NetData statsd collector thread No %d CPU usage", i + 1); + + statsd.collection_threads_status[i].st_cpu = rrdset_create_localhost( + "netdata" + , id + , NULL + , "statsd" + , "netdata.statsd_cpu" + , title + , "milliseconds/s" + , "statsd" + , "stats" + , 132002 + i + , statsd.update_every + , RRDSET_TYPE_STACKED + ); + + statsd.collection_threads_status[i].rd_user = rrddim_add(statsd.collection_threads_status[i].st_cpu, "user", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL); + statsd.collection_threads_status[i].rd_system = rrddim_add(statsd.collection_threads_status[i].st_cpu, "system", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL); + } + + // ---------------------------------------------------------------------------------------------------------------- // statsd thread to turn metrics into charts usec_t step = statsd.update_every * USEC_PER_SEC; heartbeat_t hb; heartbeat_init(&hb); - for(;;) { + while(!netdata_exit) { usec_t hb_dt = heartbeat_next(&hb, step); - if(unlikely(netdata_exit)) - break; - statsd_flush_index_metrics(&statsd.gauges, statsd_flush_gauge); statsd_flush_index_metrics(&statsd.counters, statsd_flush_counter); statsd_flush_index_metrics(&statsd.meters, statsd_flush_meter); @@ -2202,61 +2423,77 @@ void *statsd_main(void *ptr) { statsd_update_all_app_charts(); + getrusage(RUSAGE_THREAD, &thread); + if(unlikely(netdata_exit)) break; - if(hb_dt) { + if(likely(hb_dt)) { rrdset_next(st_metrics); rrdset_next(st_events); rrdset_next(st_reads); rrdset_next(st_bytes); rrdset_next(st_packets); + rrdset_next(st_tcp_connects); + rrdset_next(st_tcp_connected); rrdset_next(st_pcharts); + rrdset_next(stcpu_thread); + for(i = 0; i < statsd.threads ;i++) + rrdset_next(statsd.collection_threads_status[i].st_cpu); } - rrddim_set_by_pointer(st_metrics, rd_metrics_gauge, (collected_number)statsd.gauges.metrics); - rrddim_set_by_pointer(st_metrics, rd_metrics_counter, (collected_number)statsd.counters.metrics); - rrddim_set_by_pointer(st_metrics, rd_metrics_timer, (collected_number)statsd.timers.metrics); - rrddim_set_by_pointer(st_metrics, rd_metrics_meter, (collected_number)statsd.meters.metrics); - rrddim_set_by_pointer(st_metrics, rd_metrics_histogram, (collected_number)statsd.histograms.metrics); - rrddim_set_by_pointer(st_metrics, rd_metrics_set, (collected_number)statsd.sets.metrics); + rrddim_set_by_pointer(st_metrics, rd_metrics_gauge, (collected_number)statsd.gauges.metrics); + rrddim_set_by_pointer(st_metrics, rd_metrics_counter, (collected_number)statsd.counters.metrics); + rrddim_set_by_pointer(st_metrics, rd_metrics_timer, (collected_number)statsd.timers.metrics); + rrddim_set_by_pointer(st_metrics, rd_metrics_meter, (collected_number)statsd.meters.metrics); + rrddim_set_by_pointer(st_metrics, rd_metrics_histogram, (collected_number)statsd.histograms.metrics); + rrddim_set_by_pointer(st_metrics, rd_metrics_set, (collected_number)statsd.sets.metrics); + rrdset_done(st_metrics); - rrddim_set_by_pointer(st_events, rd_events_gauge, (collected_number)statsd.gauges.events); - rrddim_set_by_pointer(st_events, rd_events_counter, (collected_number)statsd.counters.events); - rrddim_set_by_pointer(st_events, rd_events_timer, (collected_number)statsd.timers.events); - rrddim_set_by_pointer(st_events, rd_events_meter, (collected_number)statsd.meters.events); - rrddim_set_by_pointer(st_events, rd_events_histogram, (collected_number)statsd.histograms.events); - rrddim_set_by_pointer(st_events, rd_events_set, (collected_number)statsd.sets.events); - rrddim_set_by_pointer(st_events, rd_events_unknown, (collected_number)statsd.unknown_types); - rrddim_set_by_pointer(st_events, rd_events_errors, (collected_number)statsd.socket_errors); + rrddim_set_by_pointer(st_events, rd_events_gauge, (collected_number)statsd.gauges.events); + rrddim_set_by_pointer(st_events, rd_events_counter, (collected_number)statsd.counters.events); + rrddim_set_by_pointer(st_events, rd_events_timer, (collected_number)statsd.timers.events); + rrddim_set_by_pointer(st_events, rd_events_meter, (collected_number)statsd.meters.events); + rrddim_set_by_pointer(st_events, rd_events_histogram, (collected_number)statsd.histograms.events); + rrddim_set_by_pointer(st_events, rd_events_set, (collected_number)statsd.sets.events); + rrddim_set_by_pointer(st_events, rd_events_unknown, (collected_number)statsd.unknown_types); + rrddim_set_by_pointer(st_events, rd_events_errors, (collected_number)statsd.socket_errors); + rrdset_done(st_events); - rrddim_set_by_pointer(st_reads, rd_reads_tcp, (collected_number)statsd.tcp_socket_reads); - rrddim_set_by_pointer(st_reads, rd_reads_udp, (collected_number)statsd.udp_socket_reads); + rrddim_set_by_pointer(st_reads, rd_reads_tcp, (collected_number)statsd.tcp_socket_reads); + rrddim_set_by_pointer(st_reads, rd_reads_udp, (collected_number)statsd.udp_socket_reads); + rrdset_done(st_reads); - rrddim_set_by_pointer(st_bytes, rd_bytes_tcp, (collected_number)statsd.tcp_bytes_read); - rrddim_set_by_pointer(st_bytes, rd_bytes_udp, (collected_number)statsd.udp_bytes_read); + rrddim_set_by_pointer(st_bytes, rd_bytes_tcp, (collected_number)statsd.tcp_bytes_read); + rrddim_set_by_pointer(st_bytes, rd_bytes_udp, (collected_number)statsd.udp_bytes_read); + rrdset_done(st_bytes); - rrddim_set_by_pointer(st_packets, rd_packets_tcp, (collected_number)statsd.tcp_packets_received); - rrddim_set_by_pointer(st_packets, rd_packets_udp, (collected_number)statsd.udp_packets_received); + rrddim_set_by_pointer(st_packets, rd_packets_tcp, (collected_number)statsd.tcp_packets_received); + rrddim_set_by_pointer(st_packets, rd_packets_udp, (collected_number)statsd.udp_packets_received); + rrdset_done(st_packets); - rrddim_set_by_pointer(st_pcharts, rd_pcharts, (collected_number)statsd.private_charts); + rrddim_set_by_pointer(st_tcp_connects, rd_tcp_connects, (collected_number)statsd.tcp_socket_connects); + rrddim_set_by_pointer(st_tcp_connects, rd_tcp_disconnects, (collected_number)statsd.tcp_socket_disconnects); + rrdset_done(st_tcp_connects); - if(unlikely(netdata_exit)) - break; + rrddim_set_by_pointer(st_tcp_connected, rd_tcp_connected, (collected_number)statsd.tcp_socket_connected); + rrdset_done(st_tcp_connected); - rrdset_done(st_metrics); - rrdset_done(st_events); - rrdset_done(st_reads); - rrdset_done(st_bytes); - rrdset_done(st_packets); + rrddim_set_by_pointer(st_pcharts, rd_pcharts, (collected_number)statsd.private_charts); rrdset_done(st_pcharts); - if(unlikely(netdata_exit)) - break; - } + rrddim_set_by_pointer(stcpu_thread, rd_user, thread.ru_utime.tv_sec * 1000000ULL + thread.ru_utime.tv_usec); + rrddim_set_by_pointer(stcpu_thread, rd_system, thread.ru_stime.tv_sec * 1000000ULL + thread.ru_stime.tv_usec); + rrdset_done(stcpu_thread); - pthread_cleanup_pop(1); + for(i = 0; i < statsd.threads ;i++) { + rrddim_set_by_pointer(statsd.collection_threads_status[i].st_cpu, statsd.collection_threads_status[i].rd_user, statsd.collection_threads_status[i].rusage.ru_utime.tv_sec * 1000000ULL + statsd.collection_threads_status[i].rusage.ru_utime.tv_usec); + rrddim_set_by_pointer(statsd.collection_threads_status[i].st_cpu, statsd.collection_threads_status[i].rd_system, statsd.collection_threads_status[i].rusage.ru_stime.tv_sec * 1000000ULL + statsd.collection_threads_status[i].rusage.ru_stime.tv_usec); + rrdset_done(statsd.collection_threads_status[i].st_cpu); + } + } - pthread_exit(NULL); +cleanup: ; // added semi-colon to prevent older gcc error: label at end of compound statement + netdata_thread_cleanup_pop(1); return NULL; } diff --git a/src/storage_number.c b/src/storage_number.c index 3fd22a416..c7bbaa8d9 100644 --- a/src/storage_number.c +++ b/src/storage_number.c @@ -1,9 +1,5 @@ #include "common.h" -extern char *print_number_lu_r(char *str, unsigned long uvalue); -extern char *print_number_llu_r(char *str, unsigned long long uvalue); -extern char *print_number_llu_r_smart(char *str, unsigned long long uvalue); - storage_number pack_storage_number(calculated_number value, uint32_t flags) { // bit 32 = sign 0:positive, 1:negative @@ -166,6 +162,7 @@ int print_calculated_number(char *str, calculated_number value) */ int print_calculated_number(char *str, calculated_number value) { + // info("printing number " CALCULATED_NUMBER_FORMAT, value); char integral_str[50], fractional_str[50]; char *wstr = str; @@ -178,30 +175,39 @@ int print_calculated_number(char *str, calculated_number value) { calculated_number integral, fractional; #ifdef STORAGE_WITH_MATH - fractional = modfl(value, &integral) * 10000000.0; + fractional = calculated_number_modf(value, &integral) * 10000000.0; #else fractional = ((unsigned long long)(value * 10000000ULL) % 10000000ULL); #endif + unsigned long long integral_int = (unsigned long long)integral; + unsigned long long fractional_int = (unsigned long long)calculated_number_llrint(fractional); + if(unlikely(fractional_int >= 10000000)) { + integral_int += 1; + fractional_int -= 10000000; + } + + // info("integral " CALCULATED_NUMBER_FORMAT " (%llu), fractional " CALCULATED_NUMBER_FORMAT " (%llu)", integral, integral_int, fractional, fractional_int); + char *istre; - if(integral == 0.0) { + if(unlikely(integral_int == 0)) { integral_str[0] = '0'; istre = &integral_str[1]; } else // convert the integral part to string (reversed) - istre = print_number_llu_r_smart(integral_str, (unsigned long long)integral); + istre = print_number_llu_r_smart(integral_str, integral_int); // copy reversed the integral string istre--; while( istre >= integral_str ) *wstr++ = *istre--; - if(fractional != 0.0) { + if(likely(fractional_int != 0)) { // add a dot *wstr++ = '.'; // convert the fractional part to string (reversed) - char *fstre = print_number_llu_r_smart(fractional_str, (unsigned long long)calculated_number_llrint(fractional)); + char *fstre = print_number_llu_r_smart(fractional_str, fractional_int); // prepend zeros to reach 7 digits length int decimal = 7; @@ -220,5 +226,6 @@ int print_calculated_number(char *str, calculated_number value) { } *wstr = '\0'; + // info("printed number '%s'", str); return (int)(wstr - str); } diff --git a/src/storage_number.h b/src/storage_number.h index 616ff881e..ef81863fd 100644 --- a/src/storage_number.h +++ b/src/storage_number.h @@ -1,8 +1,36 @@ #ifndef NETDATA_STORAGE_NUMBER_H #define NETDATA_STORAGE_NUMBER_H +#ifdef NETDATA_WITHOUT_LONG_DOUBLE + +#define powl pow +#define modfl modf +#define llrintl llrint +#define roundl round +#define sqrtl sqrt +#define copysignl copysign +#define strtold strtod + +typedef double calculated_number; +#define CALCULATED_NUMBER_FORMAT "%0.7f" +#define CALCULATED_NUMBER_FORMAT_ZERO "%0.0f" +#define CALCULATED_NUMBER_FORMAT_AUTO "%f" + +#define LONG_DOUBLE_MODIFIER "f" +typedef double LONG_DOUBLE; + +#else + typedef long double calculated_number; #define CALCULATED_NUMBER_FORMAT "%0.7Lf" +#define CALCULATED_NUMBER_FORMAT_ZERO "%0.0Lf" +#define CALCULATED_NUMBER_FORMAT_AUTO "%Lf" + +#define LONG_DOUBLE_MODIFIER "Lf" +typedef long double LONG_DOUBLE; + +#endif + //typedef long long calculated_number; //#define CALCULATED_NUMBER_FORMAT "%lld" @@ -14,6 +42,7 @@ typedef long double collected_number; #define COLLECTED_NUMBER_FORMAT "%0.7Lf" */ +#define calculated_number_modf(x, y) modfl(x, y) #define calculated_number_llrint(x) llrintl(x) #define calculated_number_round(x) roundl(x) #define calculated_number_fabs(x) fabsl(x) diff --git a/src/sys_devices_system_edac_mc.c b/src/sys_devices_system_edac_mc.c index 9eac8a12e..caa16192e 100644 --- a/src/sys_devices_system_edac_mc.c +++ b/src/sys_devices_system_edac_mc.c @@ -142,7 +142,7 @@ int do_proc_sys_devices_system_edac_mc(int update_every, usec_t dt) { , "errors" , "proc" , "/sys/devices/system/edac/mc" - , 6600 + , NETDATA_CHART_PRIO_MEM_HW + 50 , update_every , RRDSET_TYPE_LINE ); @@ -180,7 +180,7 @@ int do_proc_sys_devices_system_edac_mc(int update_every, usec_t dt) { , "errors" , "proc" , "/sys/devices/system/edac/mc" - , 6610 + , NETDATA_CHART_PRIO_MEM_HW + 60 , update_every , RRDSET_TYPE_LINE ); diff --git a/src/sys_devices_system_node.c b/src/sys_devices_system_node.c index 86d55b298..d04c8dc30 100644 --- a/src/sys_devices_system_node.c +++ b/src/sys_devices_system_node.c @@ -107,7 +107,7 @@ int do_proc_sys_devices_system_node(int update_every, usec_t dt) { , "events/s" , "proc" , "/sys/devices/system/node" - , 1000 + , NETDATA_CHART_PRIO_MEM_NUMA + 10 , update_every , RRDSET_TYPE_LINE ); diff --git a/src/sys_fs_btrfs.c b/src/sys_fs_btrfs.c new file mode 100644 index 000000000..a8dfb5c91 --- /dev/null +++ b/src/sys_fs_btrfs.c @@ -0,0 +1,714 @@ +#include "common.h" + +typedef struct btrfs_disk { + char *name; + uint32_t hash; + int exists; + + char *size_filename; + char *hw_sector_size_filename; + unsigned long long size; + unsigned long long hw_sector_size; + + struct btrfs_disk *next; +} BTRFS_DISK; + +typedef struct btrfs_node { + int exists; + int logged_error; + + char *id; + uint32_t hash; + + char *label; + + // unsigned long long int sectorsize; + // unsigned long long int nodesize; + // unsigned long long int quota_override; + + #define declare_btrfs_allocation_section_field(SECTION, FIELD) \ + char *allocation_ ## SECTION ## _ ## FIELD ## _filename; \ + unsigned long long int allocation_ ## SECTION ## _ ## FIELD; + + #define declare_btrfs_allocation_field(FIELD) \ + char *allocation_ ## FIELD ## _filename; \ + unsigned long long int allocation_ ## FIELD; + + RRDSET *st_allocation_disks; + RRDDIM *rd_allocation_disks_unallocated; + RRDDIM *rd_allocation_disks_data_used; + RRDDIM *rd_allocation_disks_data_free; + RRDDIM *rd_allocation_disks_metadata_used; + RRDDIM *rd_allocation_disks_metadata_free; + RRDDIM *rd_allocation_disks_system_used; + RRDDIM *rd_allocation_disks_system_free; + unsigned long long all_disks_total; + + RRDSET *st_allocation_data; + RRDDIM *rd_allocation_data_free; + RRDDIM *rd_allocation_data_used; + declare_btrfs_allocation_section_field(data, total_bytes) + declare_btrfs_allocation_section_field(data, bytes_used) + declare_btrfs_allocation_section_field(data, disk_total) + declare_btrfs_allocation_section_field(data, disk_used) + + RRDSET *st_allocation_metadata; + RRDDIM *rd_allocation_metadata_free; + RRDDIM *rd_allocation_metadata_used; + RRDDIM *rd_allocation_metadata_reserved; + declare_btrfs_allocation_section_field(metadata, total_bytes) + declare_btrfs_allocation_section_field(metadata, bytes_used) + declare_btrfs_allocation_section_field(metadata, disk_total) + declare_btrfs_allocation_section_field(metadata, disk_used) + //declare_btrfs_allocation_field(global_rsv_reserved) + declare_btrfs_allocation_field(global_rsv_size) + + RRDSET *st_allocation_system; + RRDDIM *rd_allocation_system_free; + RRDDIM *rd_allocation_system_used; + declare_btrfs_allocation_section_field(system, total_bytes) + declare_btrfs_allocation_section_field(system, bytes_used) + declare_btrfs_allocation_section_field(system, disk_total) + declare_btrfs_allocation_section_field(system, disk_used) + + BTRFS_DISK *disks; + + struct btrfs_node *next; +} BTRFS_NODE; + +static BTRFS_NODE *nodes = NULL; + +static inline void btrfs_free_disk(BTRFS_DISK *d) { + freez(d->name); + freez(d->size_filename); + freez(d->hw_sector_size_filename); + freez(d); +} + +static inline void btrfs_free_node(BTRFS_NODE *node) { + // info("BTRFS: destroying '%s'", node->id); + + if(node->st_allocation_disks) + rrdset_is_obsolete(node->st_allocation_disks); + + if(node->st_allocation_data) + rrdset_is_obsolete(node->st_allocation_data); + + if(node->st_allocation_metadata) + rrdset_is_obsolete(node->st_allocation_metadata); + + if(node->st_allocation_system) + rrdset_is_obsolete(node->st_allocation_system); + + freez(node->allocation_data_bytes_used_filename); + freez(node->allocation_data_total_bytes_filename); + + freez(node->allocation_metadata_bytes_used_filename); + freez(node->allocation_metadata_total_bytes_filename); + + freez(node->allocation_system_bytes_used_filename); + freez(node->allocation_system_total_bytes_filename); + + while(node->disks) { + BTRFS_DISK *d = node->disks; + node->disks = node->disks->next; + btrfs_free_disk(d); + } + + freez(node->label); + freez(node->id); + freez(node); +} + +static inline int find_btrfs_disks(BTRFS_NODE *node, const char *path) { + char filename[FILENAME_MAX + 1]; + + node->all_disks_total = 0; + + BTRFS_DISK *d; + for(d = node->disks ; d ; d = d->next) + d->exists = 0; + + DIR *dir = opendir(path); + if (!dir) { + if(!node->logged_error) { + error("BTRFS: Cannot open directory '%s'.", path); + node->logged_error = 1; + } + return 1; + } + node->logged_error = 0; + + struct dirent *de = NULL; + while ((de = readdir(dir))) { + if (de->d_type != DT_LNK + || !strcmp(de->d_name, ".") + || !strcmp(de->d_name, "..") + ) { + // info("BTRFS: ignoring '%s'", de->d_name); + continue; + } + + uint32_t hash = simple_hash(de->d_name); + + // -------------------------------------------------------------------- + // search for it + + for(d = node->disks ; d ; d = d->next) { + if(hash == d->hash && !strcmp(de->d_name, d->name)) + break; + } + + // -------------------------------------------------------------------- + // did we find it? + + if(!d) { + d = callocz(sizeof(BTRFS_DISK), 1); + + d->name = strdupz(de->d_name); + d->hash = simple_hash(d->name); + + snprintfz(filename, FILENAME_MAX, "%s/%s/size", path, de->d_name); + d->size_filename = strdupz(filename); + + // for disks + snprintfz(filename, FILENAME_MAX, "%s/%s/queue/hw_sector_size", path, de->d_name); + struct stat sb; + if(stat(filename, &sb) == -1) + // for partitions + snprintfz(filename, FILENAME_MAX, "%s/%s/../queue/hw_sector_size", path, de->d_name); + + d->hw_sector_size_filename = strdupz(filename); + + // link it + d->next = node->disks; + node->disks = d; + } + + d->exists = 1; + + + // -------------------------------------------------------------------- + // update the values + + if(read_single_number_file(d->size_filename, &d->size) != 0) { + error("BTRFS: failed to read '%s'", d->size_filename); + d->exists = 0; + continue; + } + + if(read_single_number_file(d->hw_sector_size_filename, &d->hw_sector_size) != 0) { + error("BTRFS: failed to read '%s'", d->hw_sector_size_filename); + d->exists = 0; + continue; + } + + node->all_disks_total += d->size * d->hw_sector_size; + } + closedir(dir); + + // ------------------------------------------------------------------------ + // cleanup + + BTRFS_DISK *last = NULL; + d = node->disks; + + while(d) { + if(unlikely(!d->exists)) { + if(unlikely(node->disks == d)) { + node->disks = d->next; + btrfs_free_disk(d); + d = node->disks; + last = NULL; + } + else { + last->next = d->next; + btrfs_free_disk(d); + d = last->next; + } + + continue; + } + + last = d; + d = d->next; + } + + return 0; +} + + +static inline int find_all_btrfs_pools(const char *path) { + static int logged_error = 0; + char filename[FILENAME_MAX + 1]; + + BTRFS_NODE *node; + for(node = nodes ; node ; node = node->next) + node->exists = 0; + + DIR *dir = opendir(path); + if (!dir) { + if(!logged_error) { + error("BTRFS: Cannot open directory '%s'.", path); + logged_error = 1; + } + return 1; + } + logged_error = 0; + + struct dirent *de = NULL; + while ((de = readdir(dir))) { + if(de->d_type != DT_DIR + || !strcmp(de->d_name, ".") + || !strcmp(de->d_name, "..") + || !strcmp(de->d_name, "features") + ) { + // info("BTRFS: ignoring '%s'", de->d_name); + continue; + } + + uint32_t hash = simple_hash(de->d_name); + + // search for it + for(node = nodes ; node ; node = node->next) { + if(hash == node->hash && !strcmp(de->d_name, node->id)) + break; + } + + // did we find it? + if(node) { + // info("BTRFS: already exists '%s'", de->d_name); + node->exists = 1; + + // update the disk sizes + snprintfz(filename, FILENAME_MAX, "%s/%s/devices", path, de->d_name); + find_btrfs_disks(node, filename); + + continue; + } + + // info("BTRFS: adding '%s'", de->d_name); + + // not found, create it + node = callocz(sizeof(BTRFS_NODE), 1); + + node->id = strdupz(de->d_name); + node->hash = simple_hash(node->id); + node->exists = 1; + + { + char label[FILENAME_MAX + 1] = ""; + + snprintfz(filename, FILENAME_MAX, "%s/%s/label", path, de->d_name); + read_file(filename, label, FILENAME_MAX); + + char *s = label; + if (s[0]) + s = trim(label); + + if(s && s[0]) + node->label = strdupz(s); + else + node->label = strdupz(node->id); + } + + //snprintfz(filename, FILENAME_MAX, "%s/%s/sectorsize", path, de->d_name); + //if(read_single_number_file(filename, &node->sectorsize) != 0) { + // error("BTRFS: failed to read '%s'", filename); + // btrfs_free_node(node); + // continue; + //} + + //snprintfz(filename, FILENAME_MAX, "%s/%s/nodesize", path, de->d_name); + //if(read_single_number_file(filename, &node->nodesize) != 0) { + // error("BTRFS: failed to read '%s'", filename); + // btrfs_free_node(node); + // continue; + //} + + //snprintfz(filename, FILENAME_MAX, "%s/%s/quota_override", path, de->d_name); + //if(read_single_number_file(filename, &node->quota_override) != 0) { + // error("BTRFS: failed to read '%s'", filename); + // btrfs_free_node(node); + // continue; + //} + + // -------------------------------------------------------------------- + // macros to simplify our life + + #define init_btrfs_allocation_field(FIELD) {\ + snprintfz(filename, FILENAME_MAX, "%s/%s/allocation/" #FIELD, path, de->d_name); \ + if(read_single_number_file(filename, &node->allocation_ ## FIELD) != 0) {\ + error("BTRFS: failed to read '%s'", filename);\ + btrfs_free_node(node);\ + continue;\ + }\ + if(!node->allocation_ ## FIELD ## _filename)\ + node->allocation_ ## FIELD ## _filename = strdupz(filename);\ + } + + #define init_btrfs_allocation_section_field(SECTION, FIELD) {\ + snprintfz(filename, FILENAME_MAX, "%s/%s/allocation/" #SECTION "/" #FIELD, path, de->d_name); \ + if(read_single_number_file(filename, &node->allocation_ ## SECTION ## _ ## FIELD) != 0) {\ + error("BTRFS: failed to read '%s'", filename);\ + btrfs_free_node(node);\ + continue;\ + }\ + if(!node->allocation_ ## SECTION ## _ ## FIELD ## _filename)\ + node->allocation_ ## SECTION ## _ ## FIELD ## _filename = strdupz(filename);\ + } + + // -------------------------------------------------------------------- + // allocation/data + + init_btrfs_allocation_section_field(data, total_bytes); + init_btrfs_allocation_section_field(data, bytes_used); + init_btrfs_allocation_section_field(data, disk_total); + init_btrfs_allocation_section_field(data, disk_used); + + + // -------------------------------------------------------------------- + // allocation/metadata + + init_btrfs_allocation_section_field(metadata, total_bytes); + init_btrfs_allocation_section_field(metadata, bytes_used); + init_btrfs_allocation_section_field(metadata, disk_total); + init_btrfs_allocation_section_field(metadata, disk_used); + + init_btrfs_allocation_field(global_rsv_size); + // init_btrfs_allocation_field(global_rsv_reserved); + + + // -------------------------------------------------------------------- + // allocation/system + + init_btrfs_allocation_section_field(system, total_bytes); + init_btrfs_allocation_section_field(system, bytes_used); + init_btrfs_allocation_section_field(system, disk_total); + init_btrfs_allocation_section_field(system, disk_used); + + + // -------------------------------------------------------------------- + // find all disks related to this node + // and collect their sizes + + snprintfz(filename, FILENAME_MAX, "%s/%s/devices", path, de->d_name); + find_btrfs_disks(node, filename); + + + // -------------------------------------------------------------------- + // link it + + // info("BTRFS: linking '%s'", node->id); + node->next = nodes; + nodes = node; + } + closedir(dir); + + + // ------------------------------------------------------------------------ + // cleanup + + BTRFS_NODE *last = NULL; + node = nodes; + + while(node) { + if(unlikely(!node->exists)) { + if(unlikely(nodes == node)) { + nodes = node->next; + btrfs_free_node(node); + node = nodes; + last = NULL; + } + else { + last->next = node->next; + btrfs_free_node(node); + node = last->next; + } + + continue; + } + + last = node; + node = node->next; + } + + return 0; +} + +int do_sys_fs_btrfs(int update_every, usec_t dt) { + static int initialized = 0 + , do_allocation_disks = CONFIG_BOOLEAN_AUTO + , do_allocation_system = CONFIG_BOOLEAN_AUTO + , do_allocation_data = CONFIG_BOOLEAN_AUTO + , do_allocation_metadata = CONFIG_BOOLEAN_AUTO; + + static usec_t refresh_delta = 0, refresh_every = 60 * USEC_PER_SEC; + static char *btrfs_path = NULL; + + (void)dt; + + if(unlikely(!initialized)) { + initialized = 1; + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/fs/btrfs"); + btrfs_path = config_get("plugin:proc:/sys/fs/btrfs", "path to monitor", filename); + + refresh_every = config_get_number("plugin:proc:/sys/fs/btrfs", "check for btrfs changes every", refresh_every / USEC_PER_SEC) * USEC_PER_SEC; + refresh_delta = refresh_every; + + do_allocation_disks = config_get_boolean_ondemand("plugin:proc:/sys/fs/btrfs", "physical disks allocation", do_allocation_disks); + do_allocation_data = config_get_boolean_ondemand("plugin:proc:/sys/fs/btrfs", "data allocation", do_allocation_data); + do_allocation_metadata = config_get_boolean_ondemand("plugin:proc:/sys/fs/btrfs", "metadata allocation", do_allocation_metadata); + do_allocation_system = config_get_boolean_ondemand("plugin:proc:/sys/fs/btrfs", "system allocation", do_allocation_system); + } + + refresh_delta += dt; + if(refresh_delta >= refresh_every) { + refresh_delta = 0; + find_all_btrfs_pools(btrfs_path); + } + + BTRFS_NODE *node; + for(node = nodes; node ; node = node->next) { + // -------------------------------------------------------------------- + // allocation/system + + #define collect_btrfs_allocation_field(FIELD) \ + read_single_number_file(node->allocation_ ## FIELD ## _filename, &node->allocation_ ## FIELD) + + #define collect_btrfs_allocation_section_field(SECTION, FIELD) \ + read_single_number_file(node->allocation_ ## SECTION ## _ ## FIELD ## _filename, &node->allocation_ ## SECTION ## _ ## FIELD) + + if(do_allocation_disks != CONFIG_BOOLEAN_NO) { + if( collect_btrfs_allocation_section_field(data, disk_total) != 0 + || collect_btrfs_allocation_section_field(data, disk_used) != 0 + || collect_btrfs_allocation_section_field(metadata, disk_total) != 0 + || collect_btrfs_allocation_section_field(metadata, disk_used) != 0 + || collect_btrfs_allocation_section_field(system, disk_total) != 0 + || collect_btrfs_allocation_section_field(system, disk_used) != 0) { + error("BTRFS: failed to collect physical disks allocation for '%s'", node->id); + // make it refresh btrfs at the next iteration + refresh_delta = refresh_every; + continue; + } + } + + if(do_allocation_data != CONFIG_BOOLEAN_NO) { + if (collect_btrfs_allocation_section_field(data, total_bytes) != 0 + || collect_btrfs_allocation_section_field(data, bytes_used) != 0) { + error("BTRFS: failed to collect allocation/data for '%s'", node->id); + // make it refresh btrfs at the next iteration + refresh_delta = refresh_every; + continue; + } + } + + if(do_allocation_metadata != CONFIG_BOOLEAN_NO) { + if (collect_btrfs_allocation_section_field(metadata, total_bytes) != 0 + || collect_btrfs_allocation_section_field(metadata, bytes_used) != 0 + || collect_btrfs_allocation_field(global_rsv_size) != 0 + ) { + error("BTRFS: failed to collect allocation/metadata for '%s'", node->id); + // make it refresh btrfs at the next iteration + refresh_delta = refresh_every; + continue; + } + } + + if(do_allocation_system != CONFIG_BOOLEAN_NO) { + if (collect_btrfs_allocation_section_field(system, total_bytes) != 0 + || collect_btrfs_allocation_section_field(system, bytes_used) != 0) { + error("BTRFS: failed to collect allocation/system for '%s'", node->id); + // make it refresh btrfs at the next iteration + refresh_delta = refresh_every; + continue; + } + } + + // -------------------------------------------------------------------- + // allocation/disks + + if(do_allocation_disks == CONFIG_BOOLEAN_YES || (do_allocation_disks == CONFIG_BOOLEAN_AUTO && node->all_disks_total && node->allocation_data_disk_total)) { + do_allocation_disks = CONFIG_BOOLEAN_YES; + + if(unlikely(!node->st_allocation_disks)) { + char id[RRD_ID_LENGTH_MAX + 1], name[RRD_ID_LENGTH_MAX + 1], title[200 + 1]; + + snprintf(id, RRD_ID_LENGTH_MAX, "disk_%s", node->id); + snprintf(name, RRD_ID_LENGTH_MAX, "disk_%s", node->label); + snprintf(title, 200, "BTRFS Disk Allocation for %s", node->label); + + netdata_fix_chart_id(id); + netdata_fix_chart_name(name); + + node->st_allocation_disks = rrdset_create_localhost( + "btrfs" + , id + , name + , node->label + , "btrfs.disk" + , title + , "MB" + , "proc" + , "sys/fs/btrfs" + , 2300 + , update_every + , RRDSET_TYPE_STACKED + ); + + node->rd_allocation_disks_unallocated = rrddim_add(node->st_allocation_disks, "unallocated", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + node->rd_allocation_disks_data_used = rrddim_add(node->st_allocation_disks, "data_used", "data used", 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + node->rd_allocation_disks_data_free = rrddim_add(node->st_allocation_disks, "data_free", "data free", 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + node->rd_allocation_disks_metadata_used = rrddim_add(node->st_allocation_disks, "meta_used", "meta used", 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + node->rd_allocation_disks_metadata_free = rrddim_add(node->st_allocation_disks, "meta_free", "meta free", 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + node->rd_allocation_disks_system_used = rrddim_add(node->st_allocation_disks, "sys_used", "sys used", 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + node->rd_allocation_disks_system_free = rrddim_add(node->st_allocation_disks, "sys_free", "sys free", 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + } + else rrdset_next(node->st_allocation_disks); + + // unsigned long long disk_used = node->allocation_data_disk_used + node->allocation_metadata_disk_used + node->allocation_system_disk_used; + unsigned long long disk_total = node->allocation_data_disk_total + node->allocation_metadata_disk_total + node->allocation_system_disk_total; + unsigned long long disk_unallocated = node->all_disks_total - disk_total; + + rrddim_set_by_pointer(node->st_allocation_disks, node->rd_allocation_disks_unallocated, disk_unallocated); + rrddim_set_by_pointer(node->st_allocation_disks, node->rd_allocation_disks_data_used, node->allocation_data_disk_used); + rrddim_set_by_pointer(node->st_allocation_disks, node->rd_allocation_disks_data_free, node->allocation_data_disk_total - node->allocation_data_disk_used); + rrddim_set_by_pointer(node->st_allocation_disks, node->rd_allocation_disks_metadata_used, node->allocation_metadata_disk_used); + rrddim_set_by_pointer(node->st_allocation_disks, node->rd_allocation_disks_metadata_free, node->allocation_metadata_disk_total - node->allocation_metadata_disk_used); + rrddim_set_by_pointer(node->st_allocation_disks, node->rd_allocation_disks_system_used, node->allocation_system_disk_used); + rrddim_set_by_pointer(node->st_allocation_disks, node->rd_allocation_disks_system_free, node->allocation_system_disk_total - node->allocation_system_disk_used); + rrdset_done(node->st_allocation_disks); + } + + + // -------------------------------------------------------------------- + // allocation/data + + if(do_allocation_data == CONFIG_BOOLEAN_YES || (do_allocation_data == CONFIG_BOOLEAN_AUTO && node->allocation_data_total_bytes)) { + do_allocation_data = CONFIG_BOOLEAN_YES; + + if(unlikely(!node->st_allocation_data)) { + char id[RRD_ID_LENGTH_MAX + 1], name[RRD_ID_LENGTH_MAX + 1], title[200 + 1]; + + snprintf(id, RRD_ID_LENGTH_MAX, "data_%s", node->id); + snprintf(name, RRD_ID_LENGTH_MAX, "data_%s", node->label); + snprintf(title, 200, "BTRFS Data Allocation for %s", node->label); + + netdata_fix_chart_id(id); + netdata_fix_chart_name(name); + + node->st_allocation_data = rrdset_create_localhost( + "btrfs" + , id + , name + , node->label + , "btrfs.data" + , title + , "MB" + , "proc" + , "sys/fs/btrfs" + , 2301 + , update_every + , RRDSET_TYPE_STACKED + ); + + node->rd_allocation_data_free = rrddim_add(node->st_allocation_data, "free", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + node->rd_allocation_data_used = rrddim_add(node->st_allocation_data, "used", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + } + else rrdset_next(node->st_allocation_data); + + rrddim_set_by_pointer(node->st_allocation_data, node->rd_allocation_data_free, node->allocation_data_total_bytes - node->allocation_data_bytes_used); + rrddim_set_by_pointer(node->st_allocation_data, node->rd_allocation_data_used, node->allocation_data_bytes_used); + rrdset_done(node->st_allocation_data); + } + + // -------------------------------------------------------------------- + // allocation/metadata + + if(do_allocation_metadata == CONFIG_BOOLEAN_YES || (do_allocation_metadata == CONFIG_BOOLEAN_AUTO && node->allocation_metadata_total_bytes)) { + do_allocation_metadata = CONFIG_BOOLEAN_YES; + + if(unlikely(!node->st_allocation_metadata)) { + char id[RRD_ID_LENGTH_MAX + 1], name[RRD_ID_LENGTH_MAX + 1], title[200 + 1]; + + snprintf(id, RRD_ID_LENGTH_MAX, "metadata_%s", node->id); + snprintf(name, RRD_ID_LENGTH_MAX, "metadata_%s", node->label); + snprintf(title, 200, "BTRFS Metadata Allocation for %s", node->label); + + netdata_fix_chart_id(id); + netdata_fix_chart_name(name); + + node->st_allocation_metadata = rrdset_create_localhost( + "btrfs" + , id + , name + , node->label + , "btrfs.metadata" + , title + , "MB" + , "proc" + , "sys/fs/btrfs" + , 2302 + , update_every + , RRDSET_TYPE_STACKED + ); + + node->rd_allocation_metadata_free = rrddim_add(node->st_allocation_metadata, "free", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + node->rd_allocation_metadata_used = rrddim_add(node->st_allocation_metadata, "used", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + node->rd_allocation_metadata_reserved = rrddim_add(node->st_allocation_metadata, "reserved", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + } + else rrdset_next(node->st_allocation_metadata); + + rrddim_set_by_pointer(node->st_allocation_metadata, node->rd_allocation_metadata_free, node->allocation_metadata_total_bytes - node->allocation_metadata_bytes_used - node->allocation_global_rsv_size); + rrddim_set_by_pointer(node->st_allocation_metadata, node->rd_allocation_metadata_used, node->allocation_metadata_bytes_used); + rrddim_set_by_pointer(node->st_allocation_metadata, node->rd_allocation_metadata_reserved, node->allocation_global_rsv_size); + rrdset_done(node->st_allocation_metadata); + } + + // -------------------------------------------------------------------- + // allocation/system + + if(do_allocation_system == CONFIG_BOOLEAN_YES || (do_allocation_system == CONFIG_BOOLEAN_AUTO && node->allocation_system_total_bytes)) { + do_allocation_system = CONFIG_BOOLEAN_YES; + + if(unlikely(!node->st_allocation_system)) { + char id[RRD_ID_LENGTH_MAX + 1], name[RRD_ID_LENGTH_MAX + 1], title[200 + 1]; + + snprintf(id, RRD_ID_LENGTH_MAX, "system_%s", node->id); + snprintf(name, RRD_ID_LENGTH_MAX, "system_%s", node->label); + snprintf(title, 200, "BTRFS System Allocation for %s", node->label); + + netdata_fix_chart_id(id); + netdata_fix_chart_name(name); + + node->st_allocation_system = rrdset_create_localhost( + "btrfs" + , id + , name + , node->label + , "btrfs.system" + , title + , "MB" + , "proc" + , "sys/fs/btrfs" + , 2303 + , update_every + , RRDSET_TYPE_STACKED + ); + + node->rd_allocation_system_free = rrddim_add(node->st_allocation_system, "free", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + node->rd_allocation_system_used = rrddim_add(node->st_allocation_system, "used", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + } + else rrdset_next(node->st_allocation_system); + + rrddim_set_by_pointer(node->st_allocation_system, node->rd_allocation_system_free, node->allocation_system_total_bytes - node->allocation_system_bytes_used); + rrddim_set_by_pointer(node->st_allocation_system, node->rd_allocation_system_used, node->allocation_system_bytes_used); + rrdset_done(node->st_allocation_system); + } + } + + return 0; +} + diff --git a/src/sys_fs_cgroup.c b/src/sys_fs_cgroup.c index 946831afa..f6e613c4b 100644 --- a/src/sys_fs_cgroup.c +++ b/src/sys_fs_cgroup.c @@ -156,11 +156,16 @@ void read_cgroup_plugin_configuration() { // ---------------------------------------------------------------- + " /machine.slice/*.service " // #3367 systemd-nspawn + + // ---------------------------------------------------------------- + " !*/vcpu* " // libvirtd adds these sub-cgroups " !*/emulator " // libvirtd adds these sub-cgroups " !*.mount " " !*.partition " " !*.service " + " !*.socket " " !*.slice " " !*.swap " " !*.user " @@ -175,7 +180,7 @@ void read_cgroup_plugin_configuration() { " !/systemd " " !/user " " * " // enable anything else - ), SIMPLE_PATTERN_EXACT); + ), NULL, SIMPLE_PATTERN_EXACT); enabled_cgroup_paths = simple_pattern_create( config_get("plugin:cgroups", "search for cgroups in subpaths matching", @@ -187,9 +192,9 @@ void read_cgroup_plugin_configuration() { " !/systemd " " !/user " " !/user.slice " - " !/lxc/*/* " // #2161 #2649 + " !/lxc/*/* " // #2161 #2649 " * " - ), SIMPLE_PATTERN_EXACT); + ), NULL, SIMPLE_PATTERN_EXACT); snprintfz(filename, FILENAME_MAX, "%s/cgroup-name.sh", netdata_configured_plugins_dir); cgroups_rename_script = config_get("plugin:cgroups", "script to get cgroup names", filename); @@ -199,32 +204,36 @@ void read_cgroup_plugin_configuration() { enabled_cgroup_renames = simple_pattern_create( config_get("plugin:cgroups", "run script to rename cgroups matching", - " *.scope " - " *docker* " - " *lxc* " - " *qemu* " - " *.libvirt-qemu " // #3010 - " !*/vcpu* " // libvirtd adds these sub-cgroups - " !*/emulator* " // libvirtd adds these sub-cgroups " !/ " " !*.mount " + " !*.socket " " !*.partition " + " /machine.slice/*.service " // #3367 systemd-nspawn " !*.service " " !*.slice " " !*.swap " " !*.user " + " !init.scope " + " !*.scope/vcpu* " // libvirtd adds these sub-cgroups + " !*.scope/emulator " // libvirtd adds these sub-cgroups + " *.scope " + " *docker* " + " *lxc* " + " *qemu* " + " *kubepods* " // #3396 kubernetes + " *.libvirt-qemu " // #3010 " * " - ), SIMPLE_PATTERN_EXACT); + ), NULL, SIMPLE_PATTERN_EXACT); if(cgroup_enable_systemd_services) { systemd_services_cgroups = simple_pattern_create( config_get("plugin:cgroups", "cgroups to match as systemd services", " !/system.slice/*/*.service " " /system.slice/*.service " - ), SIMPLE_PATTERN_EXACT); + ), NULL, SIMPLE_PATTERN_EXACT); } - mountinfo_free(root); + mountinfo_free_all(root); } // ---------------------------------------------------------------------------- @@ -531,14 +540,14 @@ static inline void cgroup_read_cpuacct_usage(struct cpuacct_usage *ca) { } static inline void cgroup_read_blkio(struct blkio *io) { - static procfile *ff = NULL; - if(unlikely(io->enabled == CONFIG_BOOLEAN_AUTO && io->delay_counter > 0)) { io->delay_counter--; return; } if(likely(io->filename)) { + static procfile *ff = NULL; + ff = procfile_reopen(ff, io->filename, NULL, PROCFILE_FLAG_DEFAULT); if(unlikely(!ff)) { io->updated = 0; @@ -838,9 +847,9 @@ static inline void cgroup_get_chart_name(struct cgroup *cg) { pid_t cgroup_pid; char buffer[CGROUP_CHARTID_LINE_MAX + 1]; - snprintfz(buffer, CGROUP_CHARTID_LINE_MAX, "exec %s '%s'", cgroups_rename_script, cg->chart_id); + snprintfz(buffer, CGROUP_CHARTID_LINE_MAX, "exec %s '%s' '%s'", cgroups_rename_script, cg->chart_id, cg->id); - debug(D_CGROUP, "executing command '%s' for cgroup '%s'", buffer, cg->id); + debug(D_CGROUP, "executing command \"%s\" for cgroup '%s'", buffer, cg->id); FILE *fp = mypopen(buffer, &cgroup_pid); if(fp) { // debug(D_CGROUP, "reading from command '%s' for cgroup '%s'", buffer, cg->id); @@ -1386,7 +1395,6 @@ static inline void find_all_cgroups() { } debug(D_CGROUP, "done searching for cgroups"); - return; } // ---------------------------------------------------------------------------- @@ -2674,16 +2682,17 @@ void update_cgroup_charts(int update_every) { // ---------------------------------------------------------------------------- // cgroups main -void *cgroups_main(void *ptr) { +static void cgroup_main_cleanup(void *ptr) { struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; - info("CGROUP plugin thread created with task id %d", gettid()); + info("cleaning up..."); - if(pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0) - error("CGROUP: cannot set pthread cancel type to DEFERRED."); + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} - if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0) - error("CGROUP: cannot set pthread cancel state to ENABLE."); +void *cgroups_main(void *ptr) { + netdata_thread_cleanup_push(cgroup_main_cleanup, ptr); struct rusage thread; @@ -2698,7 +2707,8 @@ void *cgroups_main(void *ptr) { heartbeat_init(&hb); usec_t step = cgroup_update_every * USEC_PER_SEC; usec_t find_every = cgroup_check_for_new_every * USEC_PER_SEC, find_dt = 0; - for(;;) { + + while(!netdata_exit) { usec_t hb_dt = heartbeat_next(&hb, step); if(unlikely(netdata_exit)) break; @@ -2750,9 +2760,6 @@ void *cgroups_main(void *ptr) { } } - info("CGROUP thread exiting"); - - static_thread->enabled = 0; - pthread_exit(NULL); + netdata_thread_cleanup_pop(1); return NULL; } diff --git a/src/sys_kernel_mm_ksm.c b/src/sys_kernel_mm_ksm.c index 356315be4..7ca1366b4 100644 --- a/src/sys_kernel_mm_ksm.c +++ b/src/sys_kernel_mm_ksm.c @@ -16,16 +16,16 @@ KSM_NAME_VALUE values[] = { [PAGES_SHARING] = { "/sys/kernel/mm/ksm/pages_sharing", 0ULL }, [PAGES_UNSHARED] = { "/sys/kernel/mm/ksm/pages_unshared", 0ULL }, [PAGES_VOLATILE] = { "/sys/kernel/mm/ksm/pages_volatile", 0ULL }, - [PAGES_TO_SCAN] = { "/sys/kernel/mm/ksm/pages_to_scan", 0ULL }, + // [PAGES_TO_SCAN] = { "/sys/kernel/mm/ksm/pages_to_scan", 0ULL }, }; int do_sys_kernel_mm_ksm(int update_every, usec_t dt) { (void)dt; - static procfile *ff_pages_shared = NULL, *ff_pages_sharing = NULL, *ff_pages_unshared = NULL, *ff_pages_volatile = NULL, *ff_pages_to_scan = NULL; - static long page_size = -1; + static procfile *ff_pages_shared = NULL, *ff_pages_sharing = NULL, *ff_pages_unshared = NULL, *ff_pages_volatile = NULL/*, *ff_pages_to_scan = NULL*/; + static unsigned long page_size = 0; - if(unlikely(page_size == -1)) - page_size = sysconf(_SC_PAGESIZE); + if(unlikely(page_size == 0)) + page_size = (unsigned long)sysconf(_SC_PAGESIZE); if(unlikely(!ff_pages_shared)) { snprintfz(values[PAGES_SHARED].filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/kernel/mm/ksm/pages_shared"); @@ -51,16 +51,16 @@ int do_sys_kernel_mm_ksm(int update_every, usec_t dt) { ff_pages_volatile = procfile_open(values[PAGES_VOLATILE].filename, " \t:", PROCFILE_FLAG_DEFAULT); } - if(unlikely(!ff_pages_to_scan)) { - snprintfz(values[PAGES_TO_SCAN].filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/kernel/mm/ksm/pages_to_scan"); - snprintfz(values[PAGES_TO_SCAN].filename, FILENAME_MAX, "%s", config_get("plugin:proc:/sys/kernel/mm/ksm", "/sys/kernel/mm/ksm/pages_to_scan", values[PAGES_TO_SCAN].filename)); - ff_pages_to_scan = procfile_open(values[PAGES_TO_SCAN].filename, " \t:", PROCFILE_FLAG_DEFAULT); - } + //if(unlikely(!ff_pages_to_scan)) { + // snprintfz(values[PAGES_TO_SCAN].filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/kernel/mm/ksm/pages_to_scan"); + // snprintfz(values[PAGES_TO_SCAN].filename, FILENAME_MAX, "%s", config_get("plugin:proc:/sys/kernel/mm/ksm", "/sys/kernel/mm/ksm/pages_to_scan", values[PAGES_TO_SCAN].filename)); + // ff_pages_to_scan = procfile_open(values[PAGES_TO_SCAN].filename, " \t:", PROCFILE_FLAG_DEFAULT); + //} - if(unlikely(!ff_pages_shared || !ff_pages_sharing || !ff_pages_unshared || !ff_pages_volatile || !ff_pages_to_scan)) + if(unlikely(!ff_pages_shared || !ff_pages_sharing || !ff_pages_unshared || !ff_pages_volatile /*|| !ff_pages_to_scan */)) return 1; - unsigned long long pages_shared = 0, pages_sharing = 0, pages_unshared = 0, pages_volatile = 0, pages_to_scan = 0, offered = 0, saved = 0; + unsigned long long pages_shared = 0, pages_sharing = 0, pages_unshared = 0, pages_volatile = 0, /*pages_to_scan = 0,*/ offered = 0, saved = 0; ff_pages_shared = procfile_readall(ff_pages_shared); if(unlikely(!ff_pages_shared)) return 0; // we return 0, so that we will retry to open it next time @@ -78,20 +78,20 @@ int do_sys_kernel_mm_ksm(int update_every, usec_t dt) { if(unlikely(!ff_pages_volatile)) return 0; // we return 0, so that we will retry to open it next time pages_volatile = str2ull(procfile_lineword(ff_pages_volatile, 0, 0)); - ff_pages_to_scan = procfile_readall(ff_pages_to_scan); - if(unlikely(!ff_pages_to_scan)) return 0; // we return 0, so that we will retry to open it next time - pages_to_scan = str2ull(procfile_lineword(ff_pages_to_scan, 0, 0)); + //ff_pages_to_scan = procfile_readall(ff_pages_to_scan); + //if(unlikely(!ff_pages_to_scan)) return 0; // we return 0, so that we will retry to open it next time + //pages_to_scan = str2ull(procfile_lineword(ff_pages_to_scan, 0, 0)); offered = pages_sharing + pages_shared + pages_unshared + pages_volatile; - saved = pages_sharing - pages_shared; + saved = pages_sharing; - if(unlikely(!offered || !pages_to_scan)) return 0; + if(unlikely(!offered /*|| !pages_to_scan*/)) return 0; // -------------------------------------------------------------------- { static RRDSET *st_mem_ksm = NULL; - static RRDDIM *rd_shared = NULL, *rd_unshared = NULL, *rd_sharing = NULL, *rd_volatile = NULL, *rd_to_scan = NULL; + static RRDDIM *rd_shared = NULL, *rd_unshared = NULL, *rd_sharing = NULL, *rd_volatile = NULL/*, *rd_to_scan = NULL*/; if (unlikely(!st_mem_ksm)) { st_mem_ksm = rrdset_create_localhost( @@ -104,7 +104,7 @@ int do_sys_kernel_mm_ksm(int update_every, usec_t dt) { , "MB" , "proc" , "/sys/kernel/mm/ksm" - , 5000 + , NETDATA_CHART_PRIO_MEM_KSM , update_every , RRDSET_TYPE_AREA ); @@ -113,7 +113,7 @@ int do_sys_kernel_mm_ksm(int update_every, usec_t dt) { rd_unshared = rrddim_add(st_mem_ksm, "unshared", NULL, -1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); rd_sharing = rrddim_add(st_mem_ksm, "sharing", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); rd_volatile = rrddim_add(st_mem_ksm, "volatile", NULL, -1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - rd_to_scan = rrddim_add(st_mem_ksm, "to_scan", "to scan", -1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + //rd_to_scan = rrddim_add(st_mem_ksm, "to_scan", "to scan", -1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); } else rrdset_next(st_mem_ksm); @@ -122,7 +122,7 @@ int do_sys_kernel_mm_ksm(int update_every, usec_t dt) { rrddim_set_by_pointer(st_mem_ksm, rd_unshared, pages_unshared * page_size); rrddim_set_by_pointer(st_mem_ksm, rd_sharing, pages_sharing * page_size); rrddim_set_by_pointer(st_mem_ksm, rd_volatile, pages_volatile * page_size); - rrddim_set_by_pointer(st_mem_ksm, rd_to_scan, pages_to_scan * page_size); + //rrddim_set_by_pointer(st_mem_ksm, rd_to_scan, pages_to_scan * page_size); rrdset_done(st_mem_ksm); } @@ -144,7 +144,7 @@ int do_sys_kernel_mm_ksm(int update_every, usec_t dt) { , "MB" , "proc" , "/sys/kernel/mm/ksm" - , 5001 + , NETDATA_CHART_PRIO_MEM_KSM + 1 , update_every , RRDSET_TYPE_AREA ); @@ -178,7 +178,7 @@ int do_sys_kernel_mm_ksm(int update_every, usec_t dt) { , "percentage" , "proc" , "/sys/kernel/mm/ksm" - , 5002 + , NETDATA_CHART_PRIO_MEM_KSM + 2 , update_every , RRDSET_TYPE_LINE ); diff --git a/src/threads.c b/src/threads.c new file mode 100644 index 000000000..b9ca3c085 --- /dev/null +++ b/src/threads.c @@ -0,0 +1,181 @@ +#include "common.h" + +static size_t default_stacksize = 0, wanted_stacksize = 0; +static pthread_attr_t *attr = NULL; + +// ---------------------------------------------------------------------------- +// per thread data + +typedef struct { + void *arg; + pthread_t *thread; + const char *tag; + void *(*start_routine) (void *); + NETDATA_THREAD_OPTIONS options; +} NETDATA_THREAD; + +static __thread NETDATA_THREAD *netdata_thread = NULL; + +const char *netdata_thread_tag(void) { + return ((netdata_thread && netdata_thread->tag && *netdata_thread->tag)?netdata_thread->tag:"MAIN"); +} + +// ---------------------------------------------------------------------------- +// compatibility library functions + +pid_t gettid(void) { +#ifdef __FreeBSD__ + + return (pid_t)pthread_getthreadid_np(); + +#elif defined(__APPLE__) + + #if (defined __MAC_OS_X_VERSION_MIN_REQUIRED && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1060) + uint64_t curthreadid; + pthread_threadid_np(NULL, &curthreadid); + return (pid_t)curthreadid; + #else /* __MAC_OS_X_VERSION_MIN_REQUIRED */ + return (pid_t)pthread_self; + #endif /* __MAC_OS_X_VERSION_MIN_REQUIRED */ + +#else /* __APPLE__*/ + + return (pid_t)syscall(SYS_gettid); + +#endif /* __FreeBSD__, __APPLE__*/ +} + +// ---------------------------------------------------------------------------- +// early initialization + +size_t netdata_threads_init(void) { + int i; + + // -------------------------------------------------------------------- + // get the required stack size of the threads of netdata + + attr = callocz(1, sizeof(pthread_attr_t)); + i = pthread_attr_init(attr); + if(i != 0) + fatal("pthread_attr_init() failed with code %d.", i); + + i = pthread_attr_getstacksize(attr, &default_stacksize); + if(i != 0) + fatal("pthread_attr_getstacksize() failed with code %d.", i); + else + debug(D_OPTIONS, "initial pthread stack size is %zu bytes", default_stacksize); + + return default_stacksize; +} + +// ---------------------------------------------------------------------------- +// late initialization + +void netdata_threads_init_after_fork(size_t stacksize) { + wanted_stacksize = stacksize; + int i; + + // ------------------------------------------------------------------------ + // set default pthread stack size + + if(attr && default_stacksize < wanted_stacksize && wanted_stacksize > 0) { + i = pthread_attr_setstacksize(attr, wanted_stacksize); + if(i != 0) + fatal("pthread_attr_setstacksize() to %zu bytes, failed with code %d.", wanted_stacksize, i); + else + debug(D_SYSTEM, "Successfully set pthread stacksize to %zu bytes", wanted_stacksize); + } +} + + +// ---------------------------------------------------------------------------- +// netdata_thread_create + +static void thread_cleanup(void *ptr) { + if(netdata_thread != ptr) { + NETDATA_THREAD *info = (NETDATA_THREAD *)ptr; + error("THREADS: internal error - thread local variable does not match the one passed to this function. Expected thread '%s', passed thread '%s'", netdata_thread->tag, info->tag); + } + + if(!(netdata_thread->options & NETDATA_THREAD_OPTION_DONT_LOG_CLEANUP)) + info("thread with task id %d finished", gettid()); + + freez((void *)netdata_thread->tag); + netdata_thread->tag = NULL; + + freez(netdata_thread); + netdata_thread = NULL; +} + +static void *thread_start(void *ptr) { + netdata_thread = (NETDATA_THREAD *)ptr; + + if(!(netdata_thread->options & NETDATA_THREAD_OPTION_DONT_LOG_STARTUP)) + info("thread created with task id %d", gettid()); + + if(pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0) + error("cannot set pthread cancel type to DEFERRED."); + + if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0) + error("cannot set pthread cancel state to ENABLE."); + + void *ret = NULL; + pthread_cleanup_push(thread_cleanup, ptr); + ret = netdata_thread->start_routine(netdata_thread->arg); + pthread_cleanup_pop(1); + + return ret; +} + +int netdata_thread_create(netdata_thread_t *thread, const char *tag, NETDATA_THREAD_OPTIONS options, void *(*start_routine) (void *), void *arg) { + NETDATA_THREAD *info = mallocz(sizeof(NETDATA_THREAD)); + info->arg = arg; + info->thread = thread; + info->tag = strdupz(tag); + info->start_routine = start_routine; + info->options = options; + + int ret = pthread_create(thread, attr, thread_start, info); + if(ret != 0) + error("failed to create new thread for %s. pthread_create() failed with code %d", tag, ret); + + else { + if (!(options & NETDATA_THREAD_OPTION_JOINABLE)) { + int ret2 = pthread_detach(*thread); + if (ret2 != 0) + error("cannot request detach of newly created %s thread. pthread_detach() failed with code %d", tag, ret2); + } + } + + return ret; +} + +// ---------------------------------------------------------------------------- +// netdata_thread_cancel + +int netdata_thread_cancel(netdata_thread_t thread) { + int ret = pthread_cancel(thread); + if(ret != 0) + error("cannot cancel thread. pthread_cancel() failed with code %d.", ret); + + return ret; +} + +// ---------------------------------------------------------------------------- +// netdata_thread_join + +int netdata_thread_join(netdata_thread_t thread, void **retval) { + int ret = pthread_join(thread, retval); + if(ret != 0) + error("cannot join thread. pthread_join() failed with code %d.", ret); + + return ret; +} + +int netdata_thread_detach(pthread_t thread) { + int ret = pthread_detach(thread); + if(ret != 0) + error("cannot detach thread. pthread_detach() failed with code %d.", ret); + + return ret; +} diff --git a/src/threads.h b/src/threads.h new file mode 100644 index 000000000..e2ed6a4ff --- /dev/null +++ b/src/threads.h @@ -0,0 +1,33 @@ +#ifndef NETDATA_THREADS_H +#define NETDATA_THREADS_H + +extern pid_t gettid(void); + +typedef enum { + NETDATA_THREAD_OPTION_DEFAULT = 0 << 0, + NETDATA_THREAD_OPTION_JOINABLE = 1 << 0, + NETDATA_THREAD_OPTION_DONT_LOG_STARTUP = 1 << 1, + NETDATA_THREAD_OPTION_DONT_LOG_CLEANUP = 1 << 2, + NETDATA_THREAD_OPTION_DONT_LOG = NETDATA_THREAD_OPTION_DONT_LOG_STARTUP|NETDATA_THREAD_OPTION_DONT_LOG_CLEANUP, +} NETDATA_THREAD_OPTIONS; + +#define netdata_thread_cleanup_push(func, arg) pthread_cleanup_push(func, arg) +#define netdata_thread_cleanup_pop(execute) pthread_cleanup_pop(execute) + +typedef pthread_t netdata_thread_t; + +#define NETDATA_THREAD_TAG_MAX 100 +extern const char *netdata_thread_tag(void); + +extern size_t netdata_threads_init(void); +extern void netdata_threads_init_after_fork(size_t stacksize); + +extern int netdata_thread_create(netdata_thread_t *thread, const char *tag, NETDATA_THREAD_OPTIONS options, void *(*start_routine) (void *), void *arg); +extern int netdata_thread_cancel(netdata_thread_t thread); +extern int netdata_thread_join(netdata_thread_t thread, void **retval); +extern int netdata_thread_detach(pthread_t thread); + +#define netdata_thread_self pthread_self +#define netdata_thread_testcancel pthread_testcancel + +#endif //NETDATA_THREADS_H diff --git a/src/unit_test.c b/src/unit_test.c index 821063baf..e3eb146ad 100644 --- a/src/unit_test.c +++ b/src/unit_test.c @@ -1,5 +1,41 @@ #include "common.h" +static int check_number_printing(void) { + struct { + calculated_number n; + const char *correct; + } values[] = { + { .n = 0, .correct = "0" }, + { .n = 0.0000001, .correct = "0.0000001" }, + { .n = 0.00000009, .correct = "0.0000001" }, + { .n = 0.000000001, .correct = "0" }, + { .n = 99.99999999999999999, .correct = "100" }, + { .n = -99.99999999999999999, .correct = "-100" }, + { .n = 123.4567890123456789, .correct = "123.456789" }, + { .n = 9999.9999999, .correct = "9999.9999999" }, + { .n = -9999.9999999, .correct = "-9999.9999999" }, + { .n = 0, .correct = NULL }, + }; + + char netdata[50], system[50]; + int i, failed = 0; + for(i = 0; values[i].correct ; i++) { + print_calculated_number(netdata, values[i].n); + snprintfz(system, 49, "%0.12" LONG_DOUBLE_MODIFIER, (LONG_DOUBLE)values[i].n); + + int ok = 1; + if(strcmp(netdata, values[i].correct) != 0) { + ok = 0; + failed++; + } + + fprintf(stderr, "'%s' (system) printed as '%s' (netdata): %s\n", system, netdata, ok?"OK":"FAILED"); + } + + if(failed) return 1; + return 0; +} + static int check_rrdcalc_comparisons(void) { RRDCALC_STATUS a, b; @@ -92,8 +128,8 @@ int check_storage_number(calculated_number n, int debug) { p, pdiff, pcdiff ); if(len != strlen(buffer)) fprintf(stderr, "ERROR: printed number %s is reported to have length %zu but it has %zu\n", buffer, len, strlen(buffer)); - if(dcdiff > ACCURACY_LOSS) fprintf(stderr, "WARNING: packing number " CALCULATED_NUMBER_FORMAT " has accuracy loss %0.7Lf %%\n", n, dcdiff); - if(pcdiff > ACCURACY_LOSS) fprintf(stderr, "WARNING: re-parsing the packed, unpacked and printed number " CALCULATED_NUMBER_FORMAT " has accuracy loss %0.7Lf %%\n", n, pcdiff); + if(dcdiff > ACCURACY_LOSS) fprintf(stderr, "WARNING: packing number " CALCULATED_NUMBER_FORMAT " has accuracy loss " CALCULATED_NUMBER_FORMAT " %%\n", n, dcdiff); + if(pcdiff > ACCURACY_LOSS) fprintf(stderr, "WARNING: re-parsing the packed, unpacked and printed number " CALCULATED_NUMBER_FORMAT " has accuracy loss " CALCULATED_NUMBER_FORMAT " %%\n", n, pcdiff); } if(len != strlen(buffer)) return 1; @@ -136,10 +172,10 @@ void benchmark_storage_number(int loop, int multiplier) { their = (calculated_number)sizeof(calculated_number) * (calculated_number)loop; if(mine > their) { - fprintf(stderr, "\nNETDATA NEEDS %0.2Lf TIMES MORE MEMORY. Sorry!\n", (long double)(mine / their)); + fprintf(stderr, "\nNETDATA NEEDS %0.2" LONG_DOUBLE_MODIFIER " TIMES MORE MEMORY. Sorry!\n", (LONG_DOUBLE)(mine / their)); } else { - fprintf(stderr, "\nNETDATA INTERNAL FLOATING POINT ARITHMETICS NEEDS %0.2Lf TIMES LESS MEMORY.\n", (long double)(their / mine)); + fprintf(stderr, "\nNETDATA INTERNAL FLOATING POINT ARITHMETICS NEEDS %0.2" LONG_DOUBLE_MODIFIER " TIMES LESS MEMORY.\n", (LONG_DOUBLE)(their / mine)); } fprintf(stderr, "\nNETDATA FLOATING POINT\n"); @@ -172,7 +208,7 @@ void benchmark_storage_number(int loop, int multiplier) { total = user + system; mine = total; - fprintf(stderr, "user %0.5Lf, system %0.5Lf, total %0.5Lf\n", (long double)(user / 1000000.0), (long double)(system / 1000000.0), (long double)(total / 1000000.0)); + fprintf(stderr, "user %0.5" LONG_DOUBLE_MODIFIER", system %0.5" LONG_DOUBLE_MODIFIER ", total %0.5" LONG_DOUBLE_MODIFIER "\n", (LONG_DOUBLE)(user / 1000000.0), (LONG_DOUBLE)(system / 1000000.0), (LONG_DOUBLE)(total / 1000000.0)); // ------------------------------------------------------------------------ @@ -196,13 +232,13 @@ void benchmark_storage_number(int loop, int multiplier) { total = user + system; their = total; - fprintf(stderr, "user %0.5Lf, system %0.5Lf, total %0.5Lf\n", (long double)(user / 1000000.0), (long double)(system / 1000000.0), (long double)(total / 1000000.0)); + fprintf(stderr, "user %0.5" LONG_DOUBLE_MODIFIER ", system %0.5" LONG_DOUBLE_MODIFIER ", total %0.5" LONG_DOUBLE_MODIFIER "\n", (LONG_DOUBLE)(user / 1000000.0), (LONG_DOUBLE)(system / 1000000.0), (LONG_DOUBLE)(total / 1000000.0)); if(mine > total) { - fprintf(stderr, "NETDATA CODE IS SLOWER %0.2Lf %%\n", (long double)(mine * 100.0 / their - 100.0)); + fprintf(stderr, "NETDATA CODE IS SLOWER %0.2" LONG_DOUBLE_MODIFIER " %%\n", (LONG_DOUBLE)(mine * 100.0 / their - 100.0)); } else { - fprintf(stderr, "NETDATA CODE IS F A S T E R %0.2Lf %%\n", (long double)(their * 100.0 / mine - 100.0)); + fprintf(stderr, "NETDATA CODE IS F A S T E R %0.2" LONG_DOUBLE_MODIFIER " %%\n", (LONG_DOUBLE)(their * 100.0 / mine - 100.0)); } // ------------------------------------------------------------------------ @@ -230,13 +266,13 @@ void benchmark_storage_number(int loop, int multiplier) { total = user + system; mine = total; - fprintf(stderr, "user %0.5Lf, system %0.5Lf, total %0.5Lf\n", (long double)(user / 1000000.0), (long double)(system / 1000000.0), (long double)(total / 1000000.0)); + fprintf(stderr, "user %0.5" LONG_DOUBLE_MODIFIER ", system %0.5" LONG_DOUBLE_MODIFIER ", total %0.5" LONG_DOUBLE_MODIFIER "\n", (LONG_DOUBLE)(user / 1000000.0), (LONG_DOUBLE)(system / 1000000.0), (LONG_DOUBLE)(total / 1000000.0)); if(mine > their) { - fprintf(stderr, "WITH PACKING UNPACKING NETDATA CODE IS SLOWER %0.2Lf %%\n", (long double)(mine * 100.0 / their - 100.0)); + fprintf(stderr, "WITH PACKING UNPACKING NETDATA CODE IS SLOWER %0.2" LONG_DOUBLE_MODIFIER " %%\n", (LONG_DOUBLE)(mine * 100.0 / their - 100.0)); } else { - fprintf(stderr, "EVEN WITH PACKING AND UNPACKING, NETDATA CODE IS F A S T E R %0.2Lf %%\n", (long double)(their * 100.0 / mine - 100.0)); + fprintf(stderr, "EVEN WITH PACKING AND UNPACKING, NETDATA CODE IS F A S T E R %0.2" LONG_DOUBLE_MODIFIER " %%\n", (LONG_DOUBLE)(their * 100.0 / mine - 100.0)); } // ------------------------------------------------------------------------ @@ -308,23 +344,23 @@ int unit_test_str2ld() { int i; for(i = 0; values[i] ; i++) { char *e_mine = "hello", *e_sys = "world"; - long double mine = str2ld(values[i], &e_mine); - long double sys = strtold(values[i], &e_sys); + LONG_DOUBLE mine = str2ld(values[i], &e_mine); + LONG_DOUBLE sys = strtold(values[i], &e_sys); if(isnan(mine)) { if(!isnan(sys)) { - fprintf(stderr, "Value '%s' is parsed as %Lf, but system believes it is %Lf.\n", values[i], mine, sys); + fprintf(stderr, "Value '%s' is parsed as %" LONG_DOUBLE_MODIFIER ", but system believes it is %" LONG_DOUBLE_MODIFIER ".\n", values[i], mine, sys); return -1; } } else if(isinf(mine)) { if(!isinf(sys)) { - fprintf(stderr, "Value '%s' is parsed as %Lf, but system believes it is %Lf.\n", values[i], mine, sys); + fprintf(stderr, "Value '%s' is parsed as %" LONG_DOUBLE_MODIFIER ", but system believes it is %" LONG_DOUBLE_MODIFIER ".\n", values[i], mine, sys); return -1; } } else if(mine != sys && abs(mine-sys) > 0.000001) { - fprintf(stderr, "Value '%s' is parsed as %Lf, but system believes it is %Lf, delta %Lf.\n", values[i], mine, sys, sys-mine); + fprintf(stderr, "Value '%s' is parsed as %" LONG_DOUBLE_MODIFIER ", but system believes it is %" LONG_DOUBLE_MODIFIER ", delta %" LONG_DOUBLE_MODIFIER ".\n", values[i], mine, sys, sys-mine); return -1; } @@ -333,7 +369,7 @@ int unit_test_str2ld() { return -1; } - fprintf(stderr, "str2ld() parsed value '%s' exactly the same way with strtold(), returned %Lf vs %Lf\n", values[i], mine, sys); + fprintf(stderr, "str2ld() parsed value '%s' exactly the same way with strtold(), returned %" LONG_DOUBLE_MODIFIER " vs %" LONG_DOUBLE_MODIFIER "\n", values[i], mine, sys); } return 0; @@ -1086,7 +1122,7 @@ int run_test(struct test *test) int errors = 0; if(st->counter != test->result_entries) { - fprintf(stderr, " %s stored %lu entries, but we were expecting %lu, ### E R R O R ###\n", test->name, st->counter, test->result_entries); + fprintf(stderr, " %s stored %zu entries, but we were expecting %lu, ### E R R O R ###\n", test->name, st->counter, test->result_entries); errors++; } @@ -1163,6 +1199,9 @@ static int test_variable_renames(void) { int run_all_mockup_tests(void) { + if(check_number_printing()) + return 1; + if(check_rrdcalc_comparisons()) return 1; diff --git a/src/web_api_v1.c b/src/web_api_v1.c index 02c6b0edd..c32660c81 100644 --- a/src/web_api_v1.c +++ b/src/web_api_v1.c @@ -40,6 +40,10 @@ static struct { , {"google-json" , 0 , RRDR_OPTION_GOOGLE_JSON} , {"percentage" , 0 , RRDR_OPTION_PERCENTAGE} , {"unaligned" , 0 , RRDR_OPTION_NOT_ALIGNED} + , {"match_ids" , 0 , RRDR_OPTION_MATCH_IDS} + , {"match-ids" , 0 , RRDR_OPTION_MATCH_IDS} + , {"match_names" , 0 , RRDR_OPTION_MATCH_NAMES} + , {"match-names" , 0 , RRDR_OPTION_MATCH_NAMES} , { NULL, 0, 0} }; @@ -250,7 +254,7 @@ inline int web_client_api_request_v1_charts(RRDHOST *host, struct web_client *w, inline int web_client_api_request_v1_allmetrics(RRDHOST *host, struct web_client *w, char *url) { int format = ALLMETRICS_SHELL; - int help = 0, types = 0, names = backend_send_names; // prometheus options + int help = 0, types = 0, timestamps = 1, names = backend_send_names; // prometheus options const char *prometheus_server = w->client_ip; uint32_t prometheus_options = backend_options; const char *prometheus_prefix = backend_prefix; @@ -293,6 +297,12 @@ inline int web_client_api_request_v1_allmetrics(RRDHOST *host, struct web_client else names = 0; } + else if(!strcmp(name, "timestamps")) { + if(!strcmp(value, "yes")) + timestamps = 1; + else + timestamps = 0; + } else if(!strcmp(name, "server")) { prometheus_server = value; } @@ -320,12 +330,12 @@ inline int web_client_api_request_v1_allmetrics(RRDHOST *host, struct web_client case ALLMETRICS_PROMETHEUS: w->response.data->contenttype = CT_PROMETHEUS; - rrd_stats_api_v1_charts_allmetrics_prometheus_single_host(host, w->response.data, prometheus_server, prometheus_prefix, prometheus_options, help, types, names); + rrd_stats_api_v1_charts_allmetrics_prometheus_single_host(host, w->response.data, prometheus_server, prometheus_prefix, prometheus_options, help, types, names, timestamps); return 200; case ALLMETRICS_PROMETHEUS_ALL_HOSTS: w->response.data->contenttype = CT_PROMETHEUS; - rrd_stats_api_v1_charts_allmetrics_prometheus_all_hosts(host, w->response.data, prometheus_server, prometheus_prefix, prometheus_options, help, types, names); + rrd_stats_api_v1_charts_allmetrics_prometheus_all_hosts(host, w->response.data, prometheus_server, prometheus_prefix, prometheus_options, help, types, names, timestamps); return 200; default: @@ -357,6 +367,7 @@ int web_client_api_request_v1_badge(RRDHOST *host, struct web_client *w, char *u , *value_color = NULL , *refresh_str = NULL , *precision_str = NULL + , *scale_str = NULL , *alarm = NULL; int group = GROUP_AVERAGE; @@ -400,6 +411,7 @@ int web_client_api_request_v1_badge(RRDHOST *host, struct web_client *w, char *u else if(!strcmp(name, "divide")) divide_str = value; else if(!strcmp(name, "refresh")) refresh_str = value; else if(!strcmp(name, "precision")) precision_str = value; + else if(!strcmp(name, "scale")) scale_str = value; else if(!strcmp(name, "alarm")) alarm = value; } @@ -409,11 +421,13 @@ int web_client_api_request_v1_badge(RRDHOST *host, struct web_client *w, char *u goto cleanup; } + int scale = (scale_str && *scale_str)?str2i(scale_str):100; + RRDSET *st = rrdset_find(host, chart); if(!st) st = rrdset_find_byname(host, chart); if(!st) { buffer_no_cacheable(w->response.data); - buffer_svg(w->response.data, "chart not found", NAN, "", NULL, NULL, -1, 0); + buffer_svg(w->response.data, "chart not found", NAN, "", NULL, NULL, -1, scale, 0); ret = 200; goto cleanup; } @@ -424,7 +438,7 @@ int web_client_api_request_v1_badge(RRDHOST *host, struct web_client *w, char *u rc = rrdcalc_find(st, alarm); if (!rc) { buffer_no_cacheable(w->response.data); - buffer_svg(w->response.data, "alarm not found", NAN, "", NULL, NULL, -1, 0); + buffer_svg(w->response.data, "alarm not found", NAN, "", NULL, NULL, -1, scale, 0); ret = 200; goto cleanup; } @@ -541,6 +555,7 @@ int web_client_api_request_v1_badge(RRDHOST *host, struct web_client *w, char *u label_color, value_color, precision, + scale, options ); ret = 200; @@ -554,7 +569,7 @@ int web_client_api_request_v1_badge(RRDHOST *host, struct web_client *w, char *u // if the collected value is too old, don't calculate its value if (rrdset_last_entry_t(st) >= (now_realtime_sec() - (st->update_every * st->gap_when_lost_iterations_above))) ret = rrdset2value_api_v1(st, w->response.data, &n, (dimensions) ? buffer_tostring(dimensions) : NULL - , points, after, before, group, options, NULL, &latest_timestamp, &value_is_null); + , points, after, before, group, 0, options, NULL, &latest_timestamp, &value_is_null); // if the value cannot be calculated, show empty badge if (ret != 200) { @@ -577,6 +592,7 @@ int web_client_api_request_v1_badge(RRDHOST *host, struct web_client *w, char *u label_color, value_color, precision, + scale, options ); } @@ -607,6 +623,7 @@ inline int web_client_api_request_v1_data(RRDHOST *host, struct web_client *w, c char *chart = NULL , *before_str = NULL , *after_str = NULL + , *group_time_str = NULL , *points_str = NULL; int group = GROUP_AVERAGE; @@ -635,6 +652,7 @@ inline int web_client_api_request_v1_data(RRDHOST *host, struct web_client *w, c else if(!strcmp(name, "after")) after_str = value; else if(!strcmp(name, "before")) before_str = value; else if(!strcmp(name, "points")) points_str = value; + else if(!strcmp(name, "gtime")) group_time_str = value; else if(!strcmp(name, "group")) { group = web_client_api_request_v1_data_group(value, GROUP_AVERAGE); } @@ -701,6 +719,7 @@ inline int web_client_api_request_v1_data(RRDHOST *host, struct web_client *w, c long long before = (before_str && *before_str)?str2l(before_str):0; long long after = (after_str && *after_str) ?str2l(after_str):0; int points = (points_str && *points_str)?str2i(points_str):0; + long group_time = (group_time_str && *group_time_str)?str2l(group_time_str):0; debug(D_WEB_CLIENT, "%llu: API command 'data' for chart '%s', dimensions '%s', after '%lld', before '%lld', points '%d', group '%d', format '%u', options '0x%08x'" , w->id @@ -739,8 +758,8 @@ inline int web_client_api_request_v1_data(RRDHOST *host, struct web_client *w, c buffer_strcat(w->response.data, "("); } - ret = rrdset2anything_api_v1(st, w->response.data, dimensions, format, points, after, before, group, options - , &last_timestamp_in_data); + ret = rrdset2anything_api_v1(st, w->response.data, dimensions, format, points, after, before, group, group_time + , options, &last_timestamp_in_data); if(format == DATASOURCE_DATATABLE_JSONP) { if(google_timestamp < last_timestamp_in_data) diff --git a/src/web_buffer.c b/src/web_buffer.c index f5452452f..50c76f6d6 100644 --- a/src/web_buffer.c +++ b/src/web_buffer.c @@ -21,7 +21,7 @@ static inline void _buffer_overflow_check(BUFFER *b, const char *file, const cha b->len = b->size; } - if(b->buffer[b->size] != '\0' || strcmp(&b->buffer[b->size + 1], BUFFER_OVERFLOW_EOF)) { + if(b->buffer[b->size] != '\0' || strcmp(&b->buffer[b->size + 1], BUFFER_OVERFLOW_EOF) != 0) { error("BUFFER: detected overflow at line %lu, at function %s() of file '%s'.", line, function, file); buffer_overflow_init(b); } @@ -160,8 +160,6 @@ void buffer_strcat(BUFFER *wb, const char *txt) void buffer_strcat_htmlescape(BUFFER *wb, const char *txt) { - char b[2] = { [0] = '\0', [1] = '\0' }; - while(*txt) { switch(*txt) { case '&': buffer_strcat(wb, "&"); break; @@ -171,12 +169,14 @@ void buffer_strcat_htmlescape(BUFFER *wb, const char *txt) case '/': buffer_strcat(wb, "/"); break; case '\'': buffer_strcat(wb, "'"); break; default: { - b[0] = *txt; - buffer_strcat(wb, b); + buffer_need_bytes(wb, 1); + wb->buffer[wb->len++] = *txt; } } txt++; } + + buffer_overflow_check(wb); } void buffer_snprintf(BUFFER *wb, size_t len, const char *fmt, ...) diff --git a/src/web_buffer.h b/src/web_buffer.h index 177abc0a8..694c9d4ce 100644 --- a/src/web_buffer.h +++ b/src/web_buffer.h @@ -47,8 +47,6 @@ typedef struct web_buffer { #define buffer_strlen(wb) ((wb)->len) extern const char *buffer_tostring(BUFFER *wb); -#define buffer_need_bytes(buffer, needed_free_size) do { if(unlikely((buffer)->size - (buffer)->len < (size_t)(needed_free_size))) buffer_increase((buffer), (size_t)(needed_free_size)); } while(0) - #define buffer_flush(wb) wb->buffer[(wb)->len = 0] = '\0' extern void buffer_reset(BUFFER *wb); @@ -75,4 +73,9 @@ extern char *print_number_llu_r_smart(char *str, unsigned long long uvalue); extern void buffer_print_llu(BUFFER *wb, unsigned long long uvalue); +static inline void buffer_need_bytes(BUFFER *buffer, size_t needed_free_size) { + if(unlikely(buffer->size - buffer->len < needed_free_size)) + buffer_increase(buffer, needed_free_size); +} + #endif /* NETDATA_WEB_BUFFER_H */ diff --git a/src/web_buffer_svg.c b/src/web_buffer_svg.c index 25128bd32..c05e526ed 100644 --- a/src/web_buffer_svg.c +++ b/src/web_buffer_svg.c @@ -270,7 +270,7 @@ double verdana11_widths[256] = { // find the width of the string using the verdana 11points font // re-write the string in place, skiping zero-length characters -static inline int verdana11_width(char *s) { +static inline double verdana11_width(char *s) { double w = 0.0; char *d = s; @@ -290,7 +290,7 @@ static inline int verdana11_width(char *s) { *d = '\0'; w -= VERDANA_KERNING; w += VERDANA_PADDING; - return (int)ceil(w); + return w; } static inline size_t escape_xmlz(char *dst, const char *src, size_t len) { @@ -386,16 +386,16 @@ static inline char *format_value_with_precision_and_unit(char *value_string, siz } if(isgreaterequal(abs, 1000)) { - len = snprintfz(value_string, value_string_len, "%0.0Lf", (long double) value); + len = snprintfz(value_string, value_string_len, "%0.0" LONG_DOUBLE_MODIFIER, (LONG_DOUBLE) value); trim_zeros = 0; } - else if(isgreaterequal(abs, 10)) len = snprintfz(value_string, value_string_len, "%0.1Lf", (long double) value); - else if(isgreaterequal(abs, 1)) len = snprintfz(value_string, value_string_len, "%0.2Lf", (long double) value); - else if(isgreaterequal(abs, 0.1)) len = snprintfz(value_string, value_string_len, "%0.2Lf", (long double) value); - else if(isgreaterequal(abs, 0.01)) len = snprintfz(value_string, value_string_len, "%0.4Lf", (long double) value); - else if(isgreaterequal(abs, 0.001)) len = snprintfz(value_string, value_string_len, "%0.5Lf", (long double) value); - else if(isgreaterequal(abs, 0.0001)) len = snprintfz(value_string, value_string_len, "%0.6Lf", (long double) value); - else len = snprintfz(value_string, value_string_len, "%0.7Lf", (long double) value); + else if(isgreaterequal(abs, 10)) len = snprintfz(value_string, value_string_len, "%0.1" LONG_DOUBLE_MODIFIER, (LONG_DOUBLE) value); + else if(isgreaterequal(abs, 1)) len = snprintfz(value_string, value_string_len, "%0.2" LONG_DOUBLE_MODIFIER, (LONG_DOUBLE) value); + else if(isgreaterequal(abs, 0.1)) len = snprintfz(value_string, value_string_len, "%0.2" LONG_DOUBLE_MODIFIER, (LONG_DOUBLE) value); + else if(isgreaterequal(abs, 0.01)) len = snprintfz(value_string, value_string_len, "%0.4" LONG_DOUBLE_MODIFIER, (LONG_DOUBLE) value); + else if(isgreaterequal(abs, 0.001)) len = snprintfz(value_string, value_string_len, "%0.5" LONG_DOUBLE_MODIFIER, (LONG_DOUBLE) value); + else if(isgreaterequal(abs, 0.0001)) len = snprintfz(value_string, value_string_len, "%0.6" LONG_DOUBLE_MODIFIER, (LONG_DOUBLE) value); + else len = snprintfz(value_string, value_string_len, "%0.7" LONG_DOUBLE_MODIFIER, (LONG_DOUBLE) value); if(unlikely(trim_zeros)) { int l; @@ -422,7 +422,7 @@ static inline char *format_value_with_precision_and_unit(char *value_string, siz } else { if(precision > 50) precision = 50; - snprintfz(value_string, value_string_len, "%0.*Lf%s%s", precision, (long double) value, separator, units); + snprintfz(value_string, value_string_len, "%0.*" LONG_DOUBLE_MODIFIER "%s%s", precision, (LONG_DOUBLE) value, separator, units); } return value_string; @@ -752,7 +752,7 @@ static inline void calc_colorz(const char *color, char *final, size_t len, calcu // colors #define COLOR_STRING_SIZE 100 -void buffer_svg(BUFFER *wb, const char *label, calculated_number value, const char *units, const char *label_color, const char *value_color, int precision, uint32_t options) { +void buffer_svg(BUFFER *wb, const char *label, calculated_number value, const char *units, const char *label_color, const char *value_color, int precision, int scale, uint32_t options) { char label_buffer[LABEL_STRING_SIZE + 1] , value_color_buffer[COLOR_STRING_SIZE + 1] , value_string[VALUE_STRING_SIZE + 1] @@ -761,7 +761,9 @@ void buffer_svg(BUFFER *wb, const char *label, calculated_number value, const ch , label_color_escaped[COLOR_STRING_SIZE + 1] , value_color_escaped[COLOR_STRING_SIZE + 1]; - int label_width, value_width, total_width; + double label_width, value_width, total_width, height = 20.0, font_size = 11.0, text_offset = 5.8, round_corner = 3.0; + + if(scale < 100) scale = 100; if(unlikely(!label_color || !*label_color)) label_color = "#555"; @@ -786,35 +788,45 @@ void buffer_svg(BUFFER *wb, const char *label, calculated_number value, const ch wb->contenttype = CT_IMAGE_SVG_XML; + total_width = total_width * scale / 100.0; + height = height * scale / 100.0; + font_size = font_size * scale / 100.0; + text_offset = text_offset * scale / 100.0; + label_width = label_width * scale / 100.0; + value_width = value_width * scale / 100.0; + round_corner = round_corner * scale / 100.0; + // svg template from: // https://raw.githubusercontent.com/badges/shields/master/templates/flat-template.svg buffer_sprintf(wb, - "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"%d\" height=\"20\">" + "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"%0.2f\" height=\"%0.2f\">" "<linearGradient id=\"smooth\" x2=\"0\" y2=\"100%%\">" "<stop offset=\"0\" stop-color=\"#bbb\" stop-opacity=\".1\"/>" "<stop offset=\"1\" stop-opacity=\".1\"/>" "</linearGradient>" "<mask id=\"round\">" - "<rect width=\"%d\" height=\"20\" rx=\"3\" fill=\"#fff\"/>" + "<rect width=\"%0.2f\" height=\"%0.2f\" rx=\"%0.2f\" fill=\"#fff\"/>" "</mask>" "<g mask=\"url(#round)\">" - "<rect width=\"%d\" height=\"20\" fill=\"%s\"/>" - "<rect x=\"%d\" width=\"%d\" height=\"20\" fill=\"%s\"/>" - "<rect width=\"%d\" height=\"20\" fill=\"url(#smooth)\"/>" + "<rect width=\"%0.2f\" height=\"%0.2f\" fill=\"%s\"/>" + "<rect x=\"%0.2f\" width=\"%0.2f\" height=\"%0.2f\" fill=\"%s\"/>" + "<rect width=\"%0.2f\" height=\"%0.2f\" fill=\"url(#smooth)\"/>" "</g>" - "<g fill=\"#fff\" text-anchor=\"middle\" font-family=\"DejaVu Sans,Verdana,Geneva,sans-serif\" font-size=\"11\">" - "<text x=\"%d\" y=\"15\" fill=\"#010101\" fill-opacity=\".3\">%s</text>" - "<text x=\"%d\" y=\"14\">%s</text>" - "<text x=\"%d\" y=\"15\" fill=\"#010101\" fill-opacity=\".3\">%s</text>" - "<text x=\"%d\" y=\"14\">%s</text>" + "<g fill=\"#fff\" text-anchor=\"middle\" font-family=\"DejaVu Sans,Verdana,Geneva,sans-serif\" font-size=\"%0.2f\">" + "<text x=\"%0.2f\" y=\"%0.0f\" fill=\"#010101\" fill-opacity=\".3\">%s</text>" + "<text x=\"%0.2f\" y=\"%0.0f\">%s</text>" + "<text x=\"%0.2f\" y=\"%0.0f\" fill=\"#010101\" fill-opacity=\".3\">%s</text>" + "<text x=\"%0.2f\" y=\"%0.0f\">%s</text>" "</g>" "</svg>", - total_width, total_width, - label_width, label_color_escaped, - label_width, value_width, value_color_escaped, - total_width, - label_width / 2, label_escaped, - label_width / 2, label_escaped, - label_width + value_width / 2 -1, value_escaped, - label_width + value_width / 2 -1, value_escaped); + total_width, height, + total_width, height, round_corner, + label_width, height, label_color_escaped, + label_width, value_width, height, value_color_escaped, + total_width, height, + font_size, + label_width / 2, ceil(height - text_offset), label_escaped, + label_width / 2, ceil(height - text_offset - 1.0), label_escaped, + label_width + value_width / 2 -1, ceil(height - text_offset), value_escaped, + label_width + value_width / 2 -1, ceil(height - text_offset - 1.0), value_escaped); } diff --git a/src/web_buffer_svg.h b/src/web_buffer_svg.h index c09ef7bca..c23abf0dc 100644 --- a/src/web_buffer_svg.h +++ b/src/web_buffer_svg.h @@ -1,7 +1,7 @@ #ifndef NETDATA_WEB_BUFFER_SVG_H #define NETDATA_WEB_BUFFER_SVG_H 1 -extern void buffer_svg(BUFFER *wb, const char *label, calculated_number value, const char *units, const char *label_color, const char *value_color, int precision, uint32_t options); +extern void buffer_svg(BUFFER *wb, const char *label, calculated_number value, const char *units, const char *label_color, const char *value_color, int precision, int scale, uint32_t options); extern char *format_value_and_unit(char *value_string, size_t value_string_len, calculated_number value, const char *units, int precision); #endif /* NETDATA_WEB_BUFFER_SVG_H */ diff --git a/src/web_client.c b/src/web_client.c index e17bac922..477fb3d57 100644 --- a/src/web_client.c +++ b/src/web_client.c @@ -1,28 +1,22 @@ #include "common.h" -#define INITIAL_WEB_DATA_LENGTH 16384 -#define WEB_REQUEST_LENGTH 16384 -#define TOO_BIG_REQUEST 16384 +// this is an async I/O implementation of the web server request parser +// it is used by all netdata web servers -int web_client_timeout = DEFAULT_DISCONNECT_IDLE_WEB_CLIENTS_AFTER_SECONDS; int respect_web_browser_do_not_track_policy = 0; char *web_x_frame_options = NULL; -SIMPLE_PATTERN *web_allow_connections_from = NULL; -SIMPLE_PATTERN *web_allow_streaming_from = NULL; -SIMPLE_PATTERN *web_allow_netdataconf_from = NULL; - -// WEB_CLIENT_ACL -SIMPLE_PATTERN *web_allow_dashboard_from = NULL; -SIMPLE_PATTERN *web_allow_registry_from = NULL; -SIMPLE_PATTERN *web_allow_badges_from = NULL; - #ifdef NETDATA_WITH_ZLIB int web_enable_gzip = 1, web_gzip_level = 3, web_gzip_strategy = Z_DEFAULT_STRATEGY; #endif /* NETDATA_WITH_ZLIB */ -struct web_client *web_clients = NULL; -unsigned long long web_clients_count = 0; +inline int web_client_permission_denied(struct web_client *w) { + w->response.data->contenttype = CT_TEXT_PLAIN; + buffer_flush(w->response.data); + buffer_strcat(w->response.data, "You are not allowed to access this resource."); + w->response.code = 403; + return 403; +} static inline int web_client_crock_socket(struct web_client *w) { #ifdef TCP_CORK @@ -59,87 +53,7 @@ static inline int web_client_uncrock_socket(struct web_client *w) { return 0; } -inline int web_client_permission_denied(struct web_client *w) { - w->response.data->contenttype = CT_TEXT_PLAIN; - buffer_flush(w->response.data); - buffer_strcat(w->response.data, "You are not allowed to access this resource."); - w->response.code = 403; - return 403; -} - -static void log_connection(struct web_client *w, const char *msg) { - log_access("%llu: %d '[%s]:%s' '%s'", w->id, gettid(), w->client_ip, w->client_port, msg); -} - -static void web_client_update_acl_matches(struct web_client *w) { - w->acl = WEB_CLIENT_ACL_NONE; - - if(!web_allow_dashboard_from || simple_pattern_matches(web_allow_dashboard_from, w->client_ip)) - w->acl |= WEB_CLIENT_ACL_DASHBOARD; - - if(!web_allow_registry_from || simple_pattern_matches(web_allow_registry_from, w->client_ip)) - w->acl |= WEB_CLIENT_ACL_REGISTRY; - - if(!web_allow_badges_from || simple_pattern_matches(web_allow_badges_from, w->client_ip)) - w->acl |= WEB_CLIENT_ACL_BADGE; -} - -struct web_client *web_client_create(int listener) { - struct web_client *w; - - w = callocz(1, sizeof(struct web_client)); - w->id = ++web_clients_count; - w->mode = WEB_CLIENT_MODE_NORMAL; - - { - w->ifd = accept_socket(listener, SOCK_NONBLOCK, w->client_ip, sizeof(w->client_ip), w->client_port, sizeof(w->client_port), web_allow_connections_from); - - if(unlikely(!*w->client_ip)) strcpy(w->client_ip, "-"); - if(unlikely(!*w->client_port)) strcpy(w->client_port, "-"); - - if (w->ifd == -1) { - if(errno == EPERM) - log_connection(w, "ACCESS DENIED"); - else { - log_connection(w, "CONNECTION FAILED"); - error("%llu: Failed to accept new incoming connection.", w->id); - } - - freez(w); - return NULL; - } - else - log_connection(w, "CONNECTED"); - - w->ofd = w->ifd; - - int flag = 1; - if(setsockopt(w->ofd, IPPROTO_TCP, TCP_NODELAY, (char *) &flag, sizeof(int)) != 0) - error("%llu: failed to enable TCP_NODELAY on socket.", w->id); - - flag = 1; - if(setsockopt(w->ifd, SOL_SOCKET, SO_KEEPALIVE, (char *) &flag, sizeof(int)) != 0) - error("%llu: Cannot set SO_KEEPALIVE on socket.", w->id); - } - - web_client_update_acl_matches(w); - - w->response.data = buffer_create(INITIAL_WEB_DATA_LENGTH); - w->response.header = buffer_create(HTTP_RESPONSE_HEADER_SIZE); - w->response.header_output = buffer_create(HTTP_RESPONSE_HEADER_SIZE); - w->origin[0] = '*'; - web_client_enable_wait_receive(w); - - if(web_clients) web_clients->prev = w; - w->next = web_clients; - web_clients = w; - - web_client_connected(); - - return(w); -} - -void web_client_reset(struct web_client *w) { +void web_client_request_done(struct web_client *w) { web_client_uncrock_socket(w); debug(D_WEB_CLIENT, "%llu: Resetting client.", w->id); @@ -213,7 +127,11 @@ void web_client_reset(struct web_client *w) { if(unlikely(w->mode == WEB_CLIENT_MODE_FILECOPY)) { if(w->ifd != w->ofd) { debug(D_WEB_CLIENT, "%llu: Closing filecopy input file descriptor %d.", w->id, w->ifd); - if(w->ifd != -1) close(w->ifd); + + if(web_server_mode != WEB_SERVER_MODE_STATIC_THREADED) { + if (w->ifd != -1) close(w->ifd); + } + w->ifd = w->ofd; } } @@ -224,6 +142,8 @@ void web_client_reset(struct web_client *w) { w->origin[0] = '*'; w->origin[1] = '\0'; + freez(w->user_agent); w->user_agent = NULL; + w->mode = WEB_CLIENT_MODE_NORMAL; w->tcp_cork = 0; @@ -239,6 +159,9 @@ void web_client_reset(struct web_client *w) { w->response.sent = 0; w->response.code = 0; + w->header_parse_tries = 0; + w->header_parse_last_size = 0; + web_client_enable_wait_receive(w); web_client_disable_wait_send(w); @@ -260,28 +183,6 @@ void web_client_reset(struct web_client *w) { #endif // NETDATA_WITH_ZLIB } -struct web_client *web_client_free(struct web_client *w) { - web_client_reset(w); - - struct web_client *n = w->next; - if(w == web_clients) web_clients = n; - - debug(D_WEB_CLIENT_ACCESS, "%llu: Closing web client from %s port %s.", w->id, w->client_ip, w->client_port); - - if(w->prev) w->prev->next = w->next; - if(w->next) w->next->prev = w->prev; - buffer_free(w->response.header_output); - buffer_free(w->response.header); - buffer_free(w->response.data); - if(w->ifd != -1) close(w->ifd); - if(w->ofd != -1 && w->ofd != w->ifd) close(w->ofd); - freez(w); - - web_client_disconnected(); - - return(n); -} - uid_t web_files_uid(void) { static char *web_owner = NULL; static uid_t owner_uid = 0; @@ -344,6 +245,81 @@ gid_t web_files_gid(void) { return(owner_gid); } +static struct { + const char *extension; + uint32_t hash; + uint8_t contenttype; +} mime_types[] = { + { "html" , 0 , CT_TEXT_HTML} + , {"js" , 0 , CT_APPLICATION_X_JAVASCRIPT} + , {"css" , 0 , CT_TEXT_CSS} + , {"xml" , 0 , CT_TEXT_XML} + , {"xsl" , 0 , CT_TEXT_XSL} + , {"txt" , 0 , CT_TEXT_PLAIN} + , {"svg" , 0 , CT_IMAGE_SVG_XML} + , {"ttf" , 0 , CT_APPLICATION_X_FONT_TRUETYPE} + , {"otf" , 0 , CT_APPLICATION_X_FONT_OPENTYPE} + , {"woff2", 0 , CT_APPLICATION_FONT_WOFF2} + , {"woff" , 0 , CT_APPLICATION_FONT_WOFF} + , {"eot" , 0 , CT_APPLICATION_VND_MS_FONTOBJ} + , {"png" , 0 , CT_IMAGE_PNG} + , {"jpg" , 0 , CT_IMAGE_JPG} + , {"jpeg" , 0 , CT_IMAGE_JPG} + , {"gif" , 0 , CT_IMAGE_GIF} + , {"bmp" , 0 , CT_IMAGE_BMP} + , {"ico" , 0 , CT_IMAGE_XICON} + , {"icns" , 0 , CT_IMAGE_ICNS} + , { NULL, 0, 0} +}; + +static inline uint8_t contenttype_for_filename(const char *filename) { + // info("checking filename '%s'", filename); + + static int initialized = 0; + int i; + + if(unlikely(!initialized)) { + for (i = 0; mime_types[i].extension; i++) + mime_types[i].hash = simple_hash(mime_types[i].extension); + + initialized = 1; + } + + const char *s = filename, *last_dot = NULL; + + // find the last dot + while(*s) { + if(unlikely(*s == '.')) last_dot = s; + s++; + } + + if(unlikely(!last_dot || !*last_dot || !last_dot[1])) { + // info("no extension for filename '%s'", filename); + return CT_APPLICATION_OCTET_STREAM; + } + last_dot++; + + // info("extension for filename '%s' is '%s'", filename, last_dot); + + uint32_t hash = simple_hash(last_dot); + for(i = 0; mime_types[i].extension ; i++) { + if(unlikely(hash == mime_types[i].hash && !strcmp(last_dot, mime_types[i].extension))) { + // info("matched extension for filename '%s': '%s'", filename, last_dot); + return mime_types[i].contenttype; + } + } + + // info("not matched extension for filename '%s': '%s'", filename, last_dot); + return CT_APPLICATION_OCTET_STREAM; +} + +static inline int access_to_file_is_not_permitted(struct web_client *w, const char *filename) { + w->response.data->contenttype = CT_TEXT_HTML; + buffer_strcat(w->response.data, "Access to file is not permitted: "); + buffer_strcat_htmlescape(w->response.data, filename); + return 403; +} + int mysendfile(struct web_client *w, char *filename) { debug(D_WEB_CLIENT, "%llu: Looking for file '%s/%s'", w->id, netdata_configured_web_dir, filename); @@ -357,6 +333,7 @@ int mysendfile(struct web_client *w, char *filename) { if(strncmp(filename, WEB_PATH_FILE "/", strlen(WEB_PATH_FILE) + 1) == 0) filename = &filename[strlen(WEB_PATH_FILE) + 1]; + // if the filename contains "strange" characters, refuse to serve it char *s; for(s = filename; *s ;s++) { if( !isalnum(*s) && *s != '/' && *s != '.' && *s != '-' && *s != '_') { @@ -377,49 +354,45 @@ int mysendfile(struct web_client *w, char *filename) { return 400; } - // access the file + // find the physical file on disk char webfilename[FILENAME_MAX + 1]; snprintfz(webfilename, FILENAME_MAX, "%s/%s", netdata_configured_web_dir, filename); - // check if the file exists - struct stat stat; - if(lstat(webfilename, &stat) != 0) { - debug(D_WEB_CLIENT_ACCESS, "%llu: File '%s' is not found.", w->id, webfilename); - w->response.data->contenttype = CT_TEXT_HTML; - buffer_strcat(w->response.data, "File does not exist, or is not accessible: "); - buffer_strcat_htmlescape(w->response.data, webfilename); - return 404; - } + struct stat statbuf; + int done = 0; + while(!done) { + // check if the file exists + if (lstat(webfilename, &statbuf) != 0) { + debug(D_WEB_CLIENT_ACCESS, "%llu: File '%s' is not found.", w->id, webfilename); + w->response.data->contenttype = CT_TEXT_HTML; + buffer_strcat(w->response.data, "File does not exist, or is not accessible: "); + buffer_strcat_htmlescape(w->response.data, webfilename); + return 404; + } - // check if the file is owned by expected user - if(stat.st_uid != web_files_uid()) { - error("%llu: File '%s' is owned by user %u (expected user %u). Access Denied.", w->id, webfilename, stat.st_uid, web_files_uid()); - w->response.data->contenttype = CT_TEXT_HTML; - buffer_strcat(w->response.data, "Access to file is not permitted: "); - buffer_strcat_htmlescape(w->response.data, webfilename); - return 403; - } + if ((statbuf.st_mode & S_IFMT) == S_IFDIR) { + snprintfz(webfilename, FILENAME_MAX, "%s/%s/index.html", netdata_configured_web_dir, filename); + continue; + } - // check if the file is owned by expected group - if(stat.st_gid != web_files_gid()) { - error("%llu: File '%s' is owned by group %u (expected group %u). Access Denied.", w->id, webfilename, stat.st_gid, web_files_gid()); - w->response.data->contenttype = CT_TEXT_HTML; - buffer_strcat(w->response.data, "Access to file is not permitted: "); - buffer_strcat_htmlescape(w->response.data, webfilename); - return 403; - } + if ((statbuf.st_mode & S_IFMT) != S_IFREG) { + error("%llu: File '%s' is not a regular file. Access Denied.", w->id, webfilename); + return access_to_file_is_not_permitted(w, webfilename); + } - if((stat.st_mode & S_IFMT) == S_IFDIR) { - snprintfz(webfilename, FILENAME_MAX, "%s/index.html", filename); - return mysendfile(w, webfilename); - } + // check if the file is owned by expected user + if (statbuf.st_uid != web_files_uid()) { + error("%llu: File '%s' is owned by user %u (expected user %u). Access Denied.", w->id, webfilename, statbuf.st_uid, web_files_uid()); + return access_to_file_is_not_permitted(w, webfilename); + } - if((stat.st_mode & S_IFMT) != S_IFREG) { - error("%llu: File '%s' is not a regular file. Access Denied.", w->id, webfilename); - w->response.data->contenttype = CT_TEXT_HTML; - buffer_strcat(w->response.data, "Access to file is not permitted: "); - buffer_strcat_htmlescape(w->response.data, webfilename); - return 403; + // check if the file is owned by expected group + if (statbuf.st_gid != web_files_gid()) { + error("%llu: File '%s' is owned by group %u (expected group %u). Access Denied.", w->id, webfilename, statbuf.st_gid, web_files_gid()); + return access_to_file_is_not_permitted(w, webfilename); + } + + done = 1; } // open the file @@ -446,39 +419,19 @@ int mysendfile(struct web_client *w, char *filename) { sock_setnonblock(w->ifd); - // pick a Content-Type for the file - if(strstr(filename, ".html") != NULL) w->response.data->contenttype = CT_TEXT_HTML; - else if(strstr(filename, ".js") != NULL) w->response.data->contenttype = CT_APPLICATION_X_JAVASCRIPT; - else if(strstr(filename, ".css") != NULL) w->response.data->contenttype = CT_TEXT_CSS; - else if(strstr(filename, ".xml") != NULL) w->response.data->contenttype = CT_TEXT_XML; - else if(strstr(filename, ".xsl") != NULL) w->response.data->contenttype = CT_TEXT_XSL; - else if(strstr(filename, ".txt") != NULL) w->response.data->contenttype = CT_TEXT_PLAIN; - else if(strstr(filename, ".svg") != NULL) w->response.data->contenttype = CT_IMAGE_SVG_XML; - else if(strstr(filename, ".ttf") != NULL) w->response.data->contenttype = CT_APPLICATION_X_FONT_TRUETYPE; - else if(strstr(filename, ".otf") != NULL) w->response.data->contenttype = CT_APPLICATION_X_FONT_OPENTYPE; - else if(strstr(filename, ".woff2")!= NULL) w->response.data->contenttype = CT_APPLICATION_FONT_WOFF2; - else if(strstr(filename, ".woff") != NULL) w->response.data->contenttype = CT_APPLICATION_FONT_WOFF; - else if(strstr(filename, ".eot") != NULL) w->response.data->contenttype = CT_APPLICATION_VND_MS_FONTOBJ; - else if(strstr(filename, ".png") != NULL) w->response.data->contenttype = CT_IMAGE_PNG; - else if(strstr(filename, ".jpg") != NULL) w->response.data->contenttype = CT_IMAGE_JPG; - else if(strstr(filename, ".jpeg") != NULL) w->response.data->contenttype = CT_IMAGE_JPG; - else if(strstr(filename, ".gif") != NULL) w->response.data->contenttype = CT_IMAGE_GIF; - else if(strstr(filename, ".bmp") != NULL) w->response.data->contenttype = CT_IMAGE_BMP; - else if(strstr(filename, ".ico") != NULL) w->response.data->contenttype = CT_IMAGE_XICON; - else if(strstr(filename, ".icns") != NULL) w->response.data->contenttype = CT_IMAGE_ICNS; - else w->response.data->contenttype = CT_APPLICATION_OCTET_STREAM; - - debug(D_WEB_CLIENT_ACCESS, "%llu: Sending file '%s' (%ld bytes, ifd %d, ofd %d).", w->id, webfilename, stat.st_size, w->ifd, w->ofd); + w->response.data->contenttype = contenttype_for_filename(webfilename); + debug(D_WEB_CLIENT_ACCESS, "%llu: Sending file '%s' (%ld bytes, ifd %d, ofd %d).", w->id, webfilename, statbuf.st_size, w->ifd, w->ofd); w->mode = WEB_CLIENT_MODE_FILECOPY; web_client_enable_wait_receive(w); web_client_disable_wait_send(w); buffer_flush(w->response.data); - w->response.rlen = stat.st_size; + buffer_need_bytes(w->response.data, (size_t)statbuf.st_size); + w->response.rlen = (size_t)statbuf.st_size; #ifdef __APPLE__ - w->response.data->date = stat.st_mtimespec.tv_sec; + w->response.data->date = statbuf.st_mtimespec.tv_sec; #else - w->response.data->date = stat.st_mtim.tv_sec; + w->response.data->date = statbuf.st_mtim.tv_sec; #endif /* __APPLE__ */ buffer_cacheable(w->response.data); @@ -774,14 +727,15 @@ const char *web_response_code_to_string(int code) { } } -static inline char *http_header_parse(struct web_client *w, char *s) { - static uint32_t hash_origin = 0, hash_connection = 0, hash_accept_encoding = 0, hash_donottrack = 0; +static inline char *http_header_parse(struct web_client *w, char *s, int parse_useragent) { + static uint32_t hash_origin = 0, hash_connection = 0, hash_accept_encoding = 0, hash_donottrack = 0, hash_useragent = 0; if(unlikely(!hash_origin)) { hash_origin = simple_uhash("Origin"); hash_connection = simple_uhash("Connection"); hash_accept_encoding = simple_uhash("Accept-Encoding"); hash_donottrack = simple_uhash("DNT"); + hash_useragent = simple_uhash("User-Agent"); } char *e = s; @@ -814,7 +768,7 @@ static inline char *http_header_parse(struct web_client *w, char *s) { uint32_t hash = simple_uhash(s); if(hash == hash_origin && !strcasecmp(s, "Origin")) - strncpyz(w->origin, v, ORIGIN_MAX); + strncpyz(w->origin, v, NETDATA_WEB_REQUEST_ORIGIN_HEADER_SIZE); else if(hash == hash_connection && !strcasecmp(s, "Connection")) { if(strcasestr(v, "keep-alive")) @@ -824,6 +778,9 @@ static inline char *http_header_parse(struct web_client *w, char *s) { if(*v == '0') web_client_disable_donottrack(w); else if(*v == '1') web_client_enable_donottrack(w); } + else if(parse_useragent && hash == hash_useragent && !strcasecmp(s, "User-Agent")) { + w->user_agent = strdupz(v); + } #ifdef NETDATA_WITH_ZLIB else if(hash == hash_accept_encoding && !strcasecmp(s, "Accept-Encoding")) { if(web_enable_gzip) { @@ -848,14 +805,38 @@ static inline char *http_header_parse(struct web_client *w, char *s) { // > 0 : request is not supported // < 0 : request is incomplete - wait for more data -typedef enum http_validation { +typedef enum { HTTP_VALIDATION_OK, HTTP_VALIDATION_NOT_SUPPORTED, HTTP_VALIDATION_INCOMPLETE } HTTP_VALIDATION; static inline HTTP_VALIDATION http_request_validate(struct web_client *w) { - char *s = w->response.data->buffer, *encoded_url = NULL; + char *s = (char *)buffer_tostring(w->response.data), *encoded_url = NULL; + + size_t last_pos = w->header_parse_last_size; + if(last_pos > 4) last_pos -= 4; // allow searching for \r\n\r\n + else last_pos = 0; + + w->header_parse_tries++; + w->header_parse_last_size = buffer_strlen(w->response.data); + + if(w->header_parse_tries > 1) { + if(w->header_parse_last_size < last_pos) + last_pos = 0; + + if(strstr(&s[last_pos], "\r\n\r\n") == NULL) { + if(w->header_parse_tries > 10) { + info("Disabling slow client after %zu attempts to read the request (%zu bytes received)", w->header_parse_tries, buffer_strlen(w->response.data)); + w->header_parse_tries = 0; + w->header_parse_last_size = 0; + web_client_disable_wait_receive(w); + return HTTP_VALIDATION_NOT_SUPPORTED; + } + + return HTTP_VALIDATION_INCOMPLETE; + } + } // is is a valid request? if(!strncmp(s, "GET ", 4)) { @@ -871,6 +852,8 @@ static inline HTTP_VALIDATION http_request_validate(struct web_client *w) { w->mode = WEB_CLIENT_MODE_STREAM; } else { + w->header_parse_tries = 0; + w->header_parse_last_size = 0; web_client_disable_wait_receive(w); return HTTP_VALIDATION_NOT_SUPPORTED; } @@ -911,19 +894,23 @@ static inline HTTP_VALIDATION http_request_validate(struct web_client *w) { // a valid complete HTTP request found *ue = '\0'; - url_decode_r(w->decoded_url, encoded_url, URL_MAX + 1); + url_decode_r(w->decoded_url, encoded_url, NETDATA_WEB_REQUEST_URL_SIZE + 1); *ue = ' '; // copy the URL - we are going to overwrite parts of it // FIXME -- we should avoid it - strncpyz(w->last_url, w->decoded_url, URL_MAX); + strncpyz(w->last_url, w->decoded_url, NETDATA_WEB_REQUEST_URL_SIZE); + w->header_parse_tries = 0; + w->header_parse_last_size = 0; web_client_disable_wait_receive(w); return HTTP_VALIDATION_OK; } // another header line - s = http_header_parse(w, s); + s = http_header_parse(w, s, + (w->mode == WEB_CLIENT_MODE_STREAM) // parse user agent + ); } } @@ -965,13 +952,14 @@ static inline void web_client_send_http_header(struct web_client *w) { buffer_sprintf(w->response.header_output, "HTTP/1.1 %d %s\r\n" "Connection: %s\r\n" - "Server: NetData Embedded HTTP Server\r\n" + "Server: NetData Embedded HTTP Server v%s\r\n" "Access-Control-Allow-Origin: %s\r\n" "Access-Control-Allow-Credentials: true\r\n" "Content-Type: %s\r\n" "Date: %s\r\n" , w->response.code, code_msg , web_client_has_keepalive(w)?"keep-alive":"close" + , VERSION , w->origin , content_type_string , date @@ -1104,7 +1092,7 @@ static inline int web_client_switch_host(RRDHOST *host, struct web_client *w, ch // copy the URL, we need it to serve files w->last_url[0] = '/'; - if(url && *url) strncpyz(&w->last_url[1], url, URL_MAX - 1); + if(url && *url) strncpyz(&w->last_url[1], url, NETDATA_WEB_REQUEST_URL_SIZE - 1); else w->last_url[1] = '\0'; uint32_t hash = simple_hash(tok); @@ -1320,7 +1308,7 @@ void web_client_process_request(struct web_client *w) { break; case HTTP_VALIDATION_INCOMPLETE: - if(w->response.data->len > TOO_BIG_REQUEST) { + if(w->response.data->len > NETDATA_WEB_REQUEST_MAX_SIZE) { strcpy(w->last_url, "too big request"); debug(D_WEB_CLIENT_ACCESS, "%llu: Received request is too big (%zu bytes).", w->id, w->response.data->len); @@ -1386,7 +1374,7 @@ void web_client_process_request(struct web_client *w) { if(len != w->response.data->rbytes) error("%llu: sendfile() should copy %ld bytes, but copied %ld. Falling back to manual copy.", w->id, w->response.data->rbytes, len); else - web_client_reset(w); + web_client_request_done(w); } */ } @@ -1504,7 +1492,7 @@ ssize_t web_client_send_deflate(struct web_client *w) } // reset the client - web_client_reset(w); + web_client_request_done(w); debug(D_WEB_CLIENT, "%llu: Done sending all data on socket.", w->id); return t; } @@ -1528,7 +1516,7 @@ ssize_t web_client_send_deflate(struct web_client *w) // reset the compressor output buffer w->response.zstream.next_out = w->response.zbuffer; - w->response.zstream.avail_out = ZLIB_CHUNK; + w->response.zstream.avail_out = NETDATA_WEB_RESPONSE_ZLIB_CHUNK_SIZE; // ask for FINISH if we have all the input int flush = Z_SYNC_FLUSH; @@ -1544,11 +1532,11 @@ ssize_t web_client_send_deflate(struct web_client *w) // compress if(deflate(&w->response.zstream, flush) == Z_STREAM_ERROR) { error("%llu: Compression failed. Closing down client.", w->id); - web_client_reset(w); + web_client_request_done(w); return(-1); } - w->response.zhave = ZLIB_CHUNK - w->response.zstream.avail_out; + w->response.zhave = NETDATA_WEB_RESPONSE_ZLIB_CHUNK_SIZE - w->response.zstream.avail_out; w->response.zsent = 0; // keep track of the bytes passed through the compressor @@ -1615,7 +1603,7 @@ ssize_t web_client_send(struct web_client *w) { return 0; } - web_client_reset(w); + web_client_request_done(w); debug(D_WEB_CLIENT, "%llu: Done sending all data on socket. Waiting for next request on the same socket.", w->id); return 0; } @@ -1638,216 +1626,81 @@ ssize_t web_client_send(struct web_client *w) { return(bytes); } -ssize_t web_client_receive(struct web_client *w) +ssize_t web_client_read_file(struct web_client *w) { - // do we have any space for more data? - buffer_need_bytes(w->response.data, WEB_REQUEST_LENGTH); - - ssize_t left = w->response.data->size - w->response.data->len; - ssize_t bytes; + if(unlikely(w->response.rlen > w->response.data->size)) + buffer_need_bytes(w->response.data, w->response.rlen - w->response.data->size); - if(unlikely(w->mode == WEB_CLIENT_MODE_FILECOPY)) - bytes = read(w->ifd, &w->response.data->buffer[w->response.data->len], (size_t) (left - 1)); - else - bytes = recv(w->ifd, &w->response.data->buffer[w->response.data->len], (size_t) (left - 1), MSG_DONTWAIT); + if(unlikely(w->response.rlen <= w->response.data->len)) + return 0; + ssize_t left = w->response.rlen - w->response.data->len; + ssize_t bytes = read(w->ifd, &w->response.data->buffer[w->response.data->len], (size_t)left); if(likely(bytes > 0)) { - if(w->mode != WEB_CLIENT_MODE_FILECOPY) - w->stats_received_bytes += bytes; - size_t old = w->response.data->len; w->response.data->len += bytes; w->response.data->buffer[w->response.data->len] = '\0'; - debug(D_WEB_CLIENT, "%llu: Received %zd bytes.", w->id, bytes); - debug(D_WEB_DATA, "%llu: Received data: '%s'.", w->id, &w->response.data->buffer[old]); + debug(D_WEB_CLIENT, "%llu: Read %zd bytes.", w->id, bytes); + debug(D_WEB_DATA, "%llu: Read data: '%s'.", w->id, &w->response.data->buffer[old]); - if(w->mode == WEB_CLIENT_MODE_FILECOPY) { - web_client_enable_wait_send(w); + web_client_enable_wait_send(w); - if(w->response.rlen && w->response.data->len >= w->response.rlen) - web_client_disable_wait_receive(w); - } + if(w->response.rlen && w->response.data->len >= w->response.rlen) + web_client_disable_wait_receive(w); } else if(likely(bytes == 0)) { - debug(D_WEB_CLIENT, "%llu: Out of input data.", w->id); + debug(D_WEB_CLIENT, "%llu: Out of input file data.", w->id); // if we cannot read, it means we have an error on input. // if however, we are copying a file from ifd to ofd, we should not return an error. // in this case, the error should be generated when the file has been sent to the client. - if(w->mode == WEB_CLIENT_MODE_FILECOPY) { - // we are copying data from ifd to ofd - // let it finish copying... - web_client_disable_wait_receive(w); + // we are copying data from ifd to ofd + // let it finish copying... + web_client_disable_wait_receive(w); - debug(D_WEB_CLIENT, "%llu: Read the whole file.", w->id); - if(w->ifd != w->ofd) close(w->ifd); - w->ifd = w->ofd; - } - else { - debug(D_WEB_CLIENT, "%llu: failed to receive data.", w->id); - WEB_CLIENT_IS_DEAD(w); + debug(D_WEB_CLIENT, "%llu: Read the whole file.", w->id); + + if(web_server_mode != WEB_SERVER_MODE_STATIC_THREADED) { + if (w->ifd != w->ofd) close(w->ifd); } + + w->ifd = w->ofd; } else { - debug(D_WEB_CLIENT, "%llu: receive data failed.", w->id); + debug(D_WEB_CLIENT, "%llu: read data failed.", w->id); WEB_CLIENT_IS_DEAD(w); } return(bytes); } - -// -------------------------------------------------------------------------------------- -// the thread of a single client - -// 1. waits for input and output, using async I/O -// 2. it processes HTTP requests -// 3. it generates HTTP responses -// 4. it copies data from input to output if mode is FILECOPY - -void *web_client_main(void *ptr) +ssize_t web_client_receive(struct web_client *w) { - if(pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0) - error("Cannot set pthread cancel type to DEFERRED."); - - if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0) - error("Cannot set pthread cancel state to ENABLE."); - - struct web_client *w = ptr; - struct pollfd fds[2], *ifd, *ofd; - int retval, timeout; - nfds_t fdmax = 0; - - for(;;) { - if(unlikely(netdata_exit)) break; - - if(unlikely(web_client_check_dead(w))) { - debug(D_WEB_CLIENT, "%llu: client is dead.", w->id); - break; - } - else if(unlikely(!web_client_has_wait_receive(w) && !web_client_has_wait_send(w))) { - debug(D_WEB_CLIENT, "%llu: client is not set for neither receiving nor sending data.", w->id); - break; - } - - if(unlikely(w->ifd < 0 || w->ofd < 0)) { - error("%llu: invalid file descriptor, ifd = %d, ofd = %d (required 0 <= fd", w->id, w->ifd, w->ofd); - break; - } - - if(w->ifd == w->ofd) { - fds[0].fd = w->ifd; - fds[0].events = 0; - fds[0].revents = 0; - - if(web_client_has_wait_receive(w)) fds[0].events |= POLLIN; - if(web_client_has_wait_send(w)) fds[0].events |= POLLOUT; - - fds[1].fd = -1; - fds[1].events = 0; - fds[1].revents = 0; - - ifd = ofd = &fds[0]; - - fdmax = 1; - } - else { - fds[0].fd = w->ifd; - fds[0].events = 0; - fds[0].revents = 0; - if(web_client_has_wait_receive(w)) fds[0].events |= POLLIN; - ifd = &fds[0]; - - fds[1].fd = w->ofd; - fds[1].events = 0; - fds[1].revents = 0; - if(web_client_has_wait_send(w)) fds[1].events |= POLLOUT; - ofd = &fds[1]; - - fdmax = 2; - } - - debug(D_WEB_CLIENT, "%llu: Waiting socket async I/O for %s %s", w->id, web_client_has_wait_receive(w)?"INPUT":"", web_client_has_wait_send(w)?"OUTPUT":""); - errno = 0; - timeout = web_client_timeout * 1000; - retval = poll(fds, fdmax, timeout); - - if(unlikely(netdata_exit)) break; - - if(unlikely(retval == -1)) { - if(errno == EAGAIN || errno == EINTR) { - debug(D_WEB_CLIENT, "%llu: EAGAIN received.", w->id); - continue; - } - - debug(D_WEB_CLIENT, "%llu: LISTENER: poll() failed (input fd = %d, output fd = %d). Closing client.", w->id, w->ifd, w->ofd); - break; - } - else if(unlikely(!retval)) { - debug(D_WEB_CLIENT, "%llu: Timeout while waiting socket async I/O for %s %s", w->id, web_client_has_wait_receive(w)?"INPUT":"", web_client_has_wait_send(w)?"OUTPUT":""); - break; - } - - if(unlikely(netdata_exit)) break; - - int used = 0; - if(web_client_has_wait_send(w) && ofd->revents & POLLOUT) { - used++; - if(web_client_send(w) < 0) { - debug(D_WEB_CLIENT, "%llu: Cannot send data to client. Closing client.", w->id); - break; - } - } - - if(unlikely(netdata_exit)) break; - - if(web_client_has_wait_receive(w) && (ifd->revents & POLLIN || ifd->revents & POLLPRI)) { - used++; - if(web_client_receive(w) < 0) { - debug(D_WEB_CLIENT, "%llu: Cannot receive data from client. Closing client.", w->id); - break; - } - - if(w->mode == WEB_CLIENT_MODE_NORMAL) { - debug(D_WEB_CLIENT, "%llu: Attempting to process received data.", w->id); - web_client_process_request(w); - - // if the sockets are closed, may have transferred this client - // to plugins.d - if(unlikely(w->mode == WEB_CLIENT_MODE_STREAM)) - break; - } - } + if(unlikely(w->mode == WEB_CLIENT_MODE_FILECOPY)) + return web_client_read_file(w); - if(unlikely(!used)) { - debug(D_WEB_CLIENT_ACCESS, "%llu: Received error on socket.", w->id); - break; - } - } + // do we have any space for more data? + buffer_need_bytes(w->response.data, NETDATA_WEB_REQUEST_RECEIVE_SIZE); - if(w->mode != WEB_CLIENT_MODE_STREAM) - log_connection(w, "DISCONNECTED"); + ssize_t left = w->response.data->size - w->response.data->len; + ssize_t bytes = recv(w->ifd, &w->response.data->buffer[w->response.data->len], (size_t) (left - 1), MSG_DONTWAIT); - web_client_reset(w); + if(likely(bytes > 0)) { + w->stats_received_bytes += bytes; - debug(D_WEB_CLIENT, "%llu: done...", w->id); + size_t old = w->response.data->len; + w->response.data->len += bytes; + w->response.data->buffer[w->response.data->len] = '\0'; - // close the sockets/files now - // to free file descriptors - if(w->ifd == w->ofd) { - if(w->ifd != -1) close(w->ifd); + debug(D_WEB_CLIENT, "%llu: Received %zd bytes.", w->id, bytes); + debug(D_WEB_DATA, "%llu: Received data: '%s'.", w->id, &w->response.data->buffer[old]); } else { - if(w->ifd != -1) close(w->ifd); - if(w->ofd != -1) close(w->ofd); + debug(D_WEB_CLIENT, "%llu: receive data failed.", w->id); + WEB_CLIENT_IS_DEAD(w); } - w->ifd = -1; - w->ofd = -1; - WEB_CLIENT_IS_OBSOLETE(w); - - pthread_exit(NULL); - return NULL; + return(bytes); } diff --git a/src/web_client.h b/src/web_client.h index a07558e1e..b495c37e1 100644 --- a/src/web_client.h +++ b/src/web_client.h @@ -1,9 +1,6 @@ #ifndef NETDATA_WEB_CLIENT_H #define NETDATA_WEB_CLIENT_H 1 -#define DEFAULT_DISCONNECT_IDLE_WEB_CLIENTS_AFTER_SECONDS 60 -extern int web_client_timeout; - #ifdef NETDATA_WITH_ZLIB extern int web_enable_gzip, web_gzip_level, @@ -21,10 +18,6 @@ typedef enum web_client_mode { } WEB_CLIENT_MODE; typedef enum web_client_flags { - WEB_CLIENT_FLAG_OBSOLETE = 1 << 0, // if set, the listener will remove this client - // after setting this, you should not touch - // this web_client - WEB_CLIENT_FLAG_DEAD = 1 << 1, // if set, this client is dead WEB_CLIENT_FLAG_KEEPALIVE = 1 << 2, // if set, the web client will be re-used @@ -36,7 +29,9 @@ typedef enum web_client_flags { WEB_CLIENT_FLAG_TRACKING_REQUIRED = 1 << 6, // if set, we need to send cookies WEB_CLIENT_FLAG_TCP_CLIENT = 1 << 7, // if set, the client is using a TCP socket - WEB_CLIENT_FLAG_UNIX_CLIENT = 1 << 8 // if set, the client is using a UNIX socket + WEB_CLIENT_FLAG_UNIX_CLIENT = 1 << 8, // if set, the client is using a UNIX socket + + WEB_CLIENT_FLAG_DONT_CLOSE_SOCKET = 1 << 9, // don't close the socket when cleaning up (static-threaded web server) } WEB_CLIENT_FLAGS; //#ifdef HAVE_C___ATOMIC @@ -49,9 +44,6 @@ typedef enum web_client_flags { #define web_client_flag_clear(w, flag) (w)->flags &= ~flag //#endif -#define WEB_CLIENT_IS_OBSOLETE(w) web_client_flag_set(w, WEB_CLIENT_FLAG_OBSOLETE) -#define web_client_check_obsolete(w) web_client_flag_check(w, WEB_CLIENT_FLAG_OBSOLETE) - #define WEB_CLIENT_IS_DEAD(w) web_client_flag_set(w, WEB_CLIENT_FLAG_DEAD) #define web_client_check_dead(w) web_client_flag_check(w, WEB_CLIENT_FLAG_DEAD) @@ -81,11 +73,14 @@ typedef enum web_client_flags { #define web_client_is_corkable(w) web_client_flag_check(w, WEB_CLIENT_FLAG_TCP_CLIENT) -#define URL_MAX 8192 -#define ZLIB_CHUNK 16384 -#define HTTP_RESPONSE_HEADER_SIZE 4096 -#define COOKIE_MAX 1024 -#define ORIGIN_MAX 1024 +#define NETDATA_WEB_REQUEST_URL_SIZE 8192 +#define NETDATA_WEB_RESPONSE_ZLIB_CHUNK_SIZE 16384 +#define NETDATA_WEB_RESPONSE_HEADER_SIZE 4096 +#define NETDATA_WEB_REQUEST_COOKIE_SIZE 1024 +#define NETDATA_WEB_REQUEST_ORIGIN_HEADER_SIZE 1024 +#define NETDATA_WEB_RESPONSE_INITIAL_SIZE 16384 +#define NETDATA_WEB_REQUEST_RECEIVE_SIZE 16384 +#define NETDATA_WEB_REQUEST_MAX_SIZE 16384 struct response { BUFFER *header; // our response header @@ -100,7 +95,7 @@ struct response { int zoutput; // if set to 1, web_client_send() will send compressed data #ifdef NETDATA_WITH_ZLIB z_stream zstream; // zlib stream for sending compressed output to client - Bytef zbuffer[ZLIB_CHUNK]; // temporary buffer for storing compressed output + Bytef zbuffer[NETDATA_WEB_RESPONSE_ZLIB_CHUNK_SIZE]; // temporary buffer for storing compressed output size_t zsent; // the compressed bytes we have sent to the client size_t zhave; // the compressed bytes that we have received from zlib int zinitialized:1; @@ -129,11 +124,14 @@ typedef enum web_client_acl { struct web_client { unsigned long long id; - WEB_CLIENT_FLAGS flags; // status flags for the client - WEB_CLIENT_MODE mode; // the operational mode of the client - WEB_CLIENT_ACL acl; // the access list of the client + WEB_CLIENT_FLAGS flags; // status flags for the client + WEB_CLIENT_MODE mode; // the operational mode of the client + WEB_CLIENT_ACL acl; // the access list of the client + + size_t header_parse_tries; + size_t header_parse_last_size; - int tcp_cork; // 1 = we have a cork on the socket + int tcp_cork; // 1 = we have a cork on the socket int ifd; int ofd; @@ -141,47 +139,45 @@ struct web_client { char client_ip[NI_MAXHOST+1]; char client_port[NI_MAXSERV+1]; - char decoded_url[URL_MAX + 1]; // we decode the URL in this buffer - char last_url[URL_MAX+1]; // we keep a copy of the decoded URL here + char decoded_url[NETDATA_WEB_REQUEST_URL_SIZE + 1]; // we decode the URL in this buffer + char last_url[NETDATA_WEB_REQUEST_URL_SIZE+1]; // we keep a copy of the decoded URL here struct timeval tv_in, tv_ready; - char cookie1[COOKIE_MAX+1]; - char cookie2[COOKIE_MAX+1]; - char origin[ORIGIN_MAX+1]; + char cookie1[NETDATA_WEB_REQUEST_COOKIE_SIZE+1]; + char cookie2[NETDATA_WEB_REQUEST_COOKIE_SIZE+1]; + char origin[NETDATA_WEB_REQUEST_ORIGIN_HEADER_SIZE+1]; + char *user_agent; struct response response; size_t stats_received_bytes; size_t stats_sent_bytes; - pthread_t thread; // the thread servicing this client + // cache of web_client allocations + struct web_client *prev; // maintain a linked list of web clients + struct web_client *next; // for the web servers that need it - struct web_client *prev; - struct web_client *next; -}; + // MULTI-THREADED WEB SERVER MEMBERS + netdata_thread_t thread; // the thread servicing this client + volatile int running; // 1 when the thread runs, 0 otherwise -extern struct web_client *web_clients; -extern SIMPLE_PATTERN *web_allow_connections_from; -extern SIMPLE_PATTERN *web_allow_dashboard_from; -extern SIMPLE_PATTERN *web_allow_registry_from; -extern SIMPLE_PATTERN *web_allow_badges_from; -extern SIMPLE_PATTERN *web_allow_streaming_from; -extern SIMPLE_PATTERN *web_allow_netdataconf_from; + // STATIC-THREADED WEB SERVER MEMBERS + size_t pollinfo_slot; // POLLINFO slot of the web client + size_t pollinfo_filecopy_slot; // POLLINFO slot of the file read +}; extern uid_t web_files_uid(void); extern uid_t web_files_gid(void); extern int web_client_permission_denied(struct web_client *w); -extern struct web_client *web_client_create(int listener); -extern struct web_client *web_client_free(struct web_client *w); extern ssize_t web_client_send(struct web_client *w); extern ssize_t web_client_receive(struct web_client *w); -extern void web_client_process_request(struct web_client *w); -extern void web_client_reset(struct web_client *w); +extern ssize_t web_client_read_file(struct web_client *w); -extern void *web_client_main(void *ptr); +extern void web_client_process_request(struct web_client *w); +extern void web_client_request_done(struct web_client *w); extern int web_client_api_request_v1_data_group(char *name, int def); extern const char *group_method2string(int group); diff --git a/src/web_server.c b/src/web_server.c index d231cbb5c..31c546411 100644 --- a/src/web_server.c +++ b/src/web_server.c @@ -1,50 +1,12 @@ #include "common.h" -static LISTEN_SOCKETS api_sockets = { - .config_section = CONFIG_SECTION_WEB, - .default_bind_to = "*", - .default_port = API_LISTEN_PORT, - .backlog = API_LISTEN_BACKLOG -}; +// this file includes 3 web servers: +// +// 1. single-threaded, based on select() +// 2. multi-threaded, based on poll() that spawns threads to handle the requests, based on select() +// 3. static-threaded, based on poll() using a fixed number of threads (configured at netdata.conf) -WEB_SERVER_MODE web_server_mode = WEB_SERVER_MODE_MULTI_THREADED; - -#ifdef NETDATA_INTERNAL_CHECKS -static void log_allocations(void) -{ -#ifdef HAVE_C_MALLINFO - static int heap = 0, used = 0, mmap = 0; - - struct mallinfo mi; - - mi = mallinfo(); - if(mi.uordblks > used) { - int clients = 0; - struct web_client *w; - for(w = web_clients; w ; w = w->next) clients++; - - info("Allocated memory: used %d KB (+%d B), mmap %d KB (+%d B), heap %d KB (+%d B). %d web clients connected.", - mi.uordblks / 1024, - mi.uordblks - used, - mi.hblkhd / 1024, - mi.hblkhd - mmap, - mi.arena / 1024, - mi.arena - heap, - clients); - - used = mi.uordblks; - heap = mi.arena; - mmap = mi.hblkhd; - } -#else /* ! HAVE_C_MALLINFO */ - ; -#endif /* ! HAVE_C_MALLINFO */ - -#ifdef has_jemalloc - malloc_stats_print(NULL, NULL, NULL); -#endif -} -#endif /* NETDATA_INTERNAL_CHECKS */ +WEB_SERVER_MODE web_server_mode = WEB_SERVER_MODE_STATIC_THREADED; // -------------------------------------------------------------------------------------- @@ -53,6 +15,8 @@ WEB_SERVER_MODE web_server_mode_id(const char *mode) { return WEB_SERVER_MODE_NONE; else if(!strcmp(mode, "single") || !strcmp(mode, "single-threaded")) return WEB_SERVER_MODE_SINGLE_THREADED; + else if(!strcmp(mode, "static") || !strcmp(mode, "static-threaded")) + return WEB_SERVER_MODE_STATIC_THREADED; else // if(!strcmp(mode, "multi") || !strcmp(mode, "multi-threaded")) return WEB_SERVER_MODE_MULTI_THREADED; } @@ -65,6 +29,9 @@ const char *web_server_mode_name(WEB_SERVER_MODE id) { case WEB_SERVER_MODE_SINGLE_THREADED: return "single-threaded"; + case WEB_SERVER_MODE_STATIC_THREADED: + return "static-threaded"; + default: case WEB_SERVER_MODE_MULTI_THREADED: return "multi-threaded"; @@ -72,6 +39,14 @@ const char *web_server_mode_name(WEB_SERVER_MODE id) { } // -------------------------------------------------------------------------------------- +// API sockets + +static LISTEN_SOCKETS api_sockets = { + .config_section = CONFIG_SECTION_WEB, + .default_bind_to = "*", + .default_port = API_LISTEN_PORT, + .backlog = API_LISTEN_BACKLOG +}; int api_listen_sockets_setup(void) { int socks = listen_sockets_setup(&api_sockets); @@ -82,89 +57,622 @@ int api_listen_sockets_setup(void) { return socks; } + // -------------------------------------------------------------------------------------- -// the main socket listener +// access lists -static inline void cleanup_web_clients(void) { - struct web_client *w; +SIMPLE_PATTERN *web_allow_connections_from = NULL; +SIMPLE_PATTERN *web_allow_streaming_from = NULL; +SIMPLE_PATTERN *web_allow_netdataconf_from = NULL; + +// WEB_CLIENT_ACL +SIMPLE_PATTERN *web_allow_dashboard_from = NULL; +SIMPLE_PATTERN *web_allow_registry_from = NULL; +SIMPLE_PATTERN *web_allow_badges_from = NULL; + +static void web_client_update_acl_matches(struct web_client *w) { + w->acl = WEB_CLIENT_ACL_NONE; + + if(!web_allow_dashboard_from || simple_pattern_matches(web_allow_dashboard_from, w->client_ip)) + w->acl |= WEB_CLIENT_ACL_DASHBOARD; + + if(!web_allow_registry_from || simple_pattern_matches(web_allow_registry_from, w->client_ip)) + w->acl |= WEB_CLIENT_ACL_REGISTRY; + + if(!web_allow_badges_from || simple_pattern_matches(web_allow_badges_from, w->client_ip)) + w->acl |= WEB_CLIENT_ACL_BADGE; +} + + +// -------------------------------------------------------------------------------------- + +static void log_connection(struct web_client *w, const char *msg) { + log_access("%llu: %d '[%s]:%s' '%s'", w->id, gettid(), w->client_ip, w->client_port, msg); +} + +// ---------------------------------------------------------------------------- +// allocate and free web_clients + +static void web_client_zero(struct web_client *w) { + // zero everything about it - but keep the buffers + + // remember the pointers to the buffers + BUFFER *b1 = w->response.data; + BUFFER *b2 = w->response.header; + BUFFER *b3 = w->response.header_output; + + // empty the buffers + buffer_flush(b1); + buffer_flush(b2); + buffer_flush(b3); + + freez(w->user_agent); + + // zero everything + memset(w, 0, sizeof(struct web_client)); + + // restore the pointers of the buffers + w->response.data = b1; + w->response.header = b2; + w->response.header_output = b3; +} + +static void web_client_free(struct web_client *w) { + buffer_free(w->response.header_output); + buffer_free(w->response.header); + buffer_free(w->response.data); + freez(w->user_agent); + freez(w); +} + +static struct web_client *web_client_alloc(void) { + struct web_client *w = callocz(1, sizeof(struct web_client)); + w->response.data = buffer_create(NETDATA_WEB_RESPONSE_INITIAL_SIZE); + w->response.header = buffer_create(NETDATA_WEB_RESPONSE_HEADER_SIZE); + w->response.header_output = buffer_create(NETDATA_WEB_RESPONSE_HEADER_SIZE); + return w; +} + +// ---------------------------------------------------------------------------- +// web clients caching + +// When clients connect and disconnect, avoid allocating and releasing memory. +// Instead, when new clients get connected, reuse any memory previously allocated +// for serving web clients that are now disconnected. + +// The size of the cache is adaptive. It caches the structures of 2x +// the number of currently connected clients. + +// Comments per server: +// SINGLE-THREADED : 1 cache is maintained +// MULTI-THREADED : 1 cache is maintained +// STATIC-THREADED : 1 cache for each thred of the web server + +struct clients_cache { + pid_t pid; + + struct web_client *used; // the structures of the currently connected clients + size_t used_count; // the count the currently connected clients + + struct web_client *avail; // the cached structures, available for future clients + size_t avail_count; // the number of cached structures + + size_t reused; // the number of re-uses + size_t allocated; // the number of allocations +}; + +static __thread struct clients_cache web_clients_cache = { + .pid = 0, + .used = NULL, + .used_count = 0, + .avail = NULL, + .avail_count = 0, + .allocated = 0, + .reused = 0 +}; + +static inline void web_client_cache_verify(int force) { +#ifdef NETDATA_INTERNAL_CHECKS + static __thread size_t count = 0; + count++; + + if(unlikely(force || count > 1000)) { + count = 0; + + struct web_client *w; + size_t used = 0, avail = 0; + for(w = web_clients_cache.used; w ; w = w->next) used++; + for(w = web_clients_cache.avail; w ; w = w->next) avail++; + + info("web_client_cache has %zu (%zu) used and %zu (%zu) available clients, allocated %zu, reused %zu (hit %zu%%)." + , used, web_clients_cache.used_count + , avail, web_clients_cache.avail_count + , web_clients_cache.allocated + , web_clients_cache.reused + , (web_clients_cache.allocated + web_clients_cache.reused)?(web_clients_cache.reused * 100 / (web_clients_cache.allocated + web_clients_cache.reused)):0 + ); + } +#else + if(unlikely(force)) { + info("web_client_cache has %zu used and %zu available clients, allocated %zu, reused %zu (hit %zu%%)." + , web_clients_cache.used_count + , web_clients_cache.avail_count + , web_clients_cache.allocated + , web_clients_cache.reused + , (web_clients_cache.allocated + web_clients_cache.reused)?(web_clients_cache.reused * 100 / (web_clients_cache.allocated + web_clients_cache.reused)):0 + ); + } +#endif +} + +// destroy the cache and free all the memory it uses +static void web_client_cache_destroy(void) { +#ifdef NETDATA_INTERNAL_CHECKS + if(unlikely(web_clients_cache.pid != 0 && web_clients_cache.pid != gettid())) + error("Oops! wrong thread accessing the cache. Expected %d, found %d", (int)web_clients_cache.pid, (int)gettid()); + + web_client_cache_verify(1); +#endif + + netdata_thread_disable_cancelability(); + + struct web_client *w, *t; + + w = web_clients_cache.used; + while(w) { + t = w; + w = w->next; + web_client_free(t); + } + web_clients_cache.used = NULL; + web_clients_cache.used_count = 0; + + w = web_clients_cache.avail; + while(w) { + t = w; + w = w->next; + web_client_free(t); + } + web_clients_cache.avail = NULL; + web_clients_cache.avail_count = 0; + + netdata_thread_enable_cancelability(); +} + +static struct web_client *web_client_get_from_cache_or_allocate() { - for (w = web_clients; w;) { - if (web_client_check_obsolete(w)) { - debug(D_WEB_CLIENT, "%llu: Removing client.", w->id); - // pthread_cancel(w->thread); - // pthread_join(w->thread, NULL); - w = web_client_free(w); #ifdef NETDATA_INTERNAL_CHECKS - log_allocations(); + if(unlikely(web_clients_cache.pid == 0)) + web_clients_cache.pid = gettid(); + + if(unlikely(web_clients_cache.pid != 0 && web_clients_cache.pid != gettid())) + error("Oops! wrong thread accessing the cache. Expected %d, found %d", (int)web_clients_cache.pid, (int)gettid()); #endif + + netdata_thread_disable_cancelability(); + + struct web_client *w = web_clients_cache.avail; + + if(w) { + // get it from avail + if (w == web_clients_cache.avail) web_clients_cache.avail = w->next; + if(w->prev) w->prev->next = w->next; + if(w->next) w->next->prev = w->prev; + web_clients_cache.avail_count--; + web_client_zero(w); + web_clients_cache.reused++; + } + else { + // allocate it + w = web_client_alloc(); + web_clients_cache.allocated++; + } + + // link it to used web clients + if (web_clients_cache.used) web_clients_cache.used->prev = w; + w->next = web_clients_cache.used; + w->prev = NULL; + web_clients_cache.used = w; + web_clients_cache.used_count++; + + // initialize it + w->id = web_client_connected(); + w->mode = WEB_CLIENT_MODE_NORMAL; + + netdata_thread_enable_cancelability(); + + return w; +} + +static void web_client_release(struct web_client *w) { +#ifdef NETDATA_INTERNAL_CHECKS + if(unlikely(web_clients_cache.pid != 0 && web_clients_cache.pid != gettid())) + error("Oops! wrong thread accessing the cache. Expected %d, found %d", (int)web_clients_cache.pid, (int)gettid()); + + if(unlikely(w->running)) + error("%llu: releasing web client from %s port %s, but it still running.", w->id, w->client_ip, w->client_port); +#endif + + debug(D_WEB_CLIENT_ACCESS, "%llu: Closing web client from %s port %s.", w->id, w->client_ip, w->client_port); + + log_connection(w, "DISCONNECTED"); + web_client_request_done(w); + web_client_disconnected(); + + netdata_thread_disable_cancelability(); + + if(web_server_mode != WEB_SERVER_MODE_STATIC_THREADED) { + if (w->ifd != -1) close(w->ifd); + if (w->ofd != -1 && w->ofd != w->ifd) close(w->ofd); + w->ifd = w->ofd = -1; + } + + // unlink it from the used + if (w == web_clients_cache.used) web_clients_cache.used = w->next; + if(w->prev) w->prev->next = w->next; + if(w->next) w->next->prev = w->prev; + web_clients_cache.used_count--; + + if(web_clients_cache.avail_count >= 2 * web_clients_cache.used_count) { + // we have too many of them - free it + web_client_free(w); + } + else { + // link it to the avail + if (web_clients_cache.avail) web_clients_cache.avail->prev = w; + w->next = web_clients_cache.avail; + w->prev = NULL; + web_clients_cache.avail = w; + web_clients_cache.avail_count++; + } + + netdata_thread_enable_cancelability(); +} + + +// ---------------------------------------------------------------------------- +// high level web clients connection management + +static void web_client_initialize_connection(struct web_client *w) { + int flag = 1; + if(setsockopt(w->ifd, IPPROTO_TCP, TCP_NODELAY, (char *) &flag, sizeof(int)) != 0) + error("%llu: failed to enable TCP_NODELAY on socket fd %d.", w->id, w->ifd); + + flag = 1; + if(setsockopt(w->ifd, SOL_SOCKET, SO_KEEPALIVE, (char *) &flag, sizeof(int)) != 0) + error("%llu: failed to enable SO_KEEPALIVE on socket fd %d.", w->id, w->ifd); + + web_client_update_acl_matches(w); + + w->origin[0] = '*'; w->origin[1] = '\0'; + w->cookie1[0] = '\0'; w->cookie2[0] = '\0'; + freez(w->user_agent); w->user_agent = NULL; + + web_client_enable_wait_receive(w); + + log_connection(w, "CONNECTED"); + + web_client_cache_verify(0); +} + +static struct web_client *web_client_create_on_fd(int fd, const char *client_ip, const char *client_port) { + struct web_client *w; + + w = web_client_get_from_cache_or_allocate(); + w->ifd = w->ofd = fd; + + strncpyz(w->client_ip, client_ip, sizeof(w->client_ip) - 1); + strncpyz(w->client_port, client_port, sizeof(w->client_port) - 1); + + if(unlikely(!*w->client_ip)) strcpy(w->client_ip, "-"); + if(unlikely(!*w->client_port)) strcpy(w->client_port, "-"); + + web_client_initialize_connection(w); + return(w); +} + +static struct web_client *web_client_create_on_listenfd(int listener) { + struct web_client *w; + + w = web_client_get_from_cache_or_allocate(); + w->ifd = w->ofd = accept_socket(listener, SOCK_NONBLOCK, w->client_ip, sizeof(w->client_ip), w->client_port, sizeof(w->client_port), web_allow_connections_from); + + if(unlikely(!*w->client_ip)) strcpy(w->client_ip, "-"); + if(unlikely(!*w->client_port)) strcpy(w->client_port, "-"); + + if (w->ifd == -1) { + if(errno == EPERM) + log_connection(w, "ACCESS DENIED"); + else { + log_connection(w, "CONNECTION FAILED"); + error("%llu: Failed to accept new incoming connection.", w->id); } - else w = w->next; + + web_client_release(w); + return NULL; } + + web_client_initialize_connection(w); + return(w); +} + + +// -------------------------------------------------------------------------------------- +// the thread of a single client - for the MULTI-THREADED web server + +// 1. waits for input and output, using async I/O +// 2. it processes HTTP requests +// 3. it generates HTTP responses +// 4. it copies data from input to output if mode is FILECOPY + +int web_client_timeout = DEFAULT_DISCONNECT_IDLE_WEB_CLIENTS_AFTER_SECONDS; +int web_client_first_request_timeout = DEFAULT_TIMEOUT_TO_RECEIVE_FIRST_WEB_REQUEST; + +static void multi_threaded_web_client_worker_main_cleanup(void *ptr) { + struct web_client *w = ptr; + WEB_CLIENT_IS_DEAD(w); + w->running = 0; +} + +static void *multi_threaded_web_client_worker_main(void *ptr) { + netdata_thread_cleanup_push(multi_threaded_web_client_worker_main_cleanup, ptr); + + struct web_client *w = ptr; + w->running = 1; + + struct pollfd fds[2], *ifd, *ofd; + int retval, timeout_ms; + nfds_t fdmax = 0; + + while(!netdata_exit) { + if(unlikely(web_client_check_dead(w))) { + debug(D_WEB_CLIENT, "%llu: client is dead.", w->id); + break; + } + else if(unlikely(!web_client_has_wait_receive(w) && !web_client_has_wait_send(w))) { + debug(D_WEB_CLIENT, "%llu: client is not set for neither receiving nor sending data.", w->id); + break; + } + + if(unlikely(w->ifd < 0 || w->ofd < 0)) { + error("%llu: invalid file descriptor, ifd = %d, ofd = %d (required 0 <= fd", w->id, w->ifd, w->ofd); + break; + } + + if(w->ifd == w->ofd) { + fds[0].fd = w->ifd; + fds[0].events = 0; + fds[0].revents = 0; + + if(web_client_has_wait_receive(w)) fds[0].events |= POLLIN; + if(web_client_has_wait_send(w)) fds[0].events |= POLLOUT; + + fds[1].fd = -1; + fds[1].events = 0; + fds[1].revents = 0; + + ifd = ofd = &fds[0]; + + fdmax = 1; + } + else { + fds[0].fd = w->ifd; + fds[0].events = 0; + fds[0].revents = 0; + if(web_client_has_wait_receive(w)) fds[0].events |= POLLIN; + ifd = &fds[0]; + + fds[1].fd = w->ofd; + fds[1].events = 0; + fds[1].revents = 0; + if(web_client_has_wait_send(w)) fds[1].events |= POLLOUT; + ofd = &fds[1]; + + fdmax = 2; + } + + debug(D_WEB_CLIENT, "%llu: Waiting socket async I/O for %s %s", w->id, web_client_has_wait_receive(w)?"INPUT":"", web_client_has_wait_send(w)?"OUTPUT":""); + errno = 0; + timeout_ms = web_client_timeout * 1000; + retval = poll(fds, fdmax, timeout_ms); + + if(unlikely(netdata_exit)) break; + + if(unlikely(retval == -1)) { + if(errno == EAGAIN || errno == EINTR) { + debug(D_WEB_CLIENT, "%llu: EAGAIN received.", w->id); + continue; + } + + debug(D_WEB_CLIENT, "%llu: LISTENER: poll() failed (input fd = %d, output fd = %d). Closing client.", w->id, w->ifd, w->ofd); + break; + } + else if(unlikely(!retval)) { + debug(D_WEB_CLIENT, "%llu: Timeout while waiting socket async I/O for %s %s", w->id, web_client_has_wait_receive(w)?"INPUT":"", web_client_has_wait_send(w)?"OUTPUT":""); + break; + } + + if(unlikely(netdata_exit)) break; + + int used = 0; + if(web_client_has_wait_send(w) && ofd->revents & POLLOUT) { + used++; + if(web_client_send(w) < 0) { + debug(D_WEB_CLIENT, "%llu: Cannot send data to client. Closing client.", w->id); + break; + } + } + + if(unlikely(netdata_exit)) break; + + if(web_client_has_wait_receive(w) && (ifd->revents & POLLIN || ifd->revents & POLLPRI)) { + used++; + if(web_client_receive(w) < 0) { + debug(D_WEB_CLIENT, "%llu: Cannot receive data from client. Closing client.", w->id); + break; + } + + if(w->mode == WEB_CLIENT_MODE_NORMAL) { + debug(D_WEB_CLIENT, "%llu: Attempting to process received data.", w->id); + web_client_process_request(w); + + // if the sockets are closed, may have transferred this client + // to plugins.d + if(unlikely(w->mode == WEB_CLIENT_MODE_STREAM)) + break; + } + } + + if(unlikely(!used)) { + debug(D_WEB_CLIENT_ACCESS, "%llu: Received error on socket.", w->id); + break; + } + } + + if(w->mode != WEB_CLIENT_MODE_STREAM) + log_connection(w, "DISCONNECTED"); + + web_client_request_done(w); + + debug(D_WEB_CLIENT, "%llu: done...", w->id); + + // close the sockets/files now + // to free file descriptors + if(w->ifd == w->ofd) { + if(w->ifd != -1) close(w->ifd); + } + else { + if(w->ifd != -1) close(w->ifd); + if(w->ofd != -1) close(w->ofd); + } + w->ifd = -1; + w->ofd = -1; + + netdata_thread_cleanup_pop(1); + return NULL; } +// -------------------------------------------------------------------------------------- +// the main socket listener - MULTI-THREADED + // 1. it accepts new incoming requests on our port // 2. creates a new web_client for each connection received -// 3. spawns a new pthread to serve the client (this is optimal for keep-alive clients) -// 4. cleans up old web_clients that their pthreads have been exited +// 3. spawns a new netdata_thread to serve the client (this is optimal for keep-alive clients) +// 4. cleans up old web_clients that their netdata_threads have been exited + +static void web_client_multi_threaded_web_server_release_clients(void) { + struct web_client *w; + for(w = web_clients_cache.used; w ; ) { + if(unlikely(!w->running && web_client_check_dead(w))) { + struct web_client *t = w->next; + web_client_release(w); + w = t; + } + else + w = w->next; + } +} + +static void web_client_multi_threaded_web_server_stop_all_threads(void) { + struct web_client *w; + + int found = 1, max = 2 * USEC_PER_SEC, step = 50000; + for(w = web_clients_cache.used; w ; w = w->next) { + if(w->running) { + found++; + info("stopping web client %s, id %llu", w->client_ip, w->id); + netdata_thread_cancel(w->thread); + } + } + + while(found && max > 0) { + max -= step; + info("Waiting %d web threads to finish...", found); + sleep_usec(step); + found = 0; + for(w = web_clients_cache.used; w ; w = w->next) + if(w->running) found++; + } + + if(found) + error("%d web threads are taking too long to finish. Giving up.", found); +} + +static struct pollfd *socket_listen_main_multi_threaded_fds = NULL; + +static void socket_listen_main_multi_threaded_cleanup(void *data) { + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)data; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + + info("cleaning up..."); -#define CLEANUP_EVERY_EVENTS 100 + info("releasing allocated memory..."); + freez(socket_listen_main_multi_threaded_fds); + info("closing all sockets..."); + listen_sockets_close(&api_sockets); + + info("stopping all running web server threads..."); + web_client_multi_threaded_web_server_stop_all_threads(); + + info("freeing web clients cache..."); + web_client_cache_destroy(); + + info("cleanup completed."); + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +#define CLEANUP_EVERY_EVENTS 60 void *socket_listen_main_multi_threaded(void *ptr) { - struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + netdata_thread_cleanup_push(socket_listen_main_multi_threaded_cleanup, ptr); web_server_mode = WEB_SERVER_MODE_MULTI_THREADED; - info("Multi-threaded WEB SERVER thread created with task id %d", gettid()); + web_server_is_multithreaded = 1; struct web_client *w; int retval, counter = 0; - if(pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0) - error("Cannot set pthread cancel type to DEFERRED."); - - if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0) - error("Cannot set pthread cancel state to ENABLE."); - if(!api_sockets.opened) fatal("LISTENER: No sockets to listen to."); - struct pollfd *fds = callocz(sizeof(struct pollfd), api_sockets.opened); + socket_listen_main_multi_threaded_fds = callocz(sizeof(struct pollfd), api_sockets.opened); size_t i; for(i = 0; i < api_sockets.opened ;i++) { - fds[i].fd = api_sockets.fds[i]; - fds[i].events = POLLIN; - fds[i].revents = 0; + socket_listen_main_multi_threaded_fds[i].fd = api_sockets.fds[i]; + socket_listen_main_multi_threaded_fds[i].events = POLLIN; + socket_listen_main_multi_threaded_fds[i].revents = 0; info("Listening on '%s'", (api_sockets.fds_names[i])?api_sockets.fds_names[i]:"UNKNOWN"); } - int timeout = 10 * 1000; + int timeout_ms = 1 * 1000; + + while(!netdata_exit) { - for(;;) { // debug(D_WEB_CLIENT, "LISTENER: Waiting..."); - retval = poll(fds, api_sockets.opened, timeout); + retval = poll(socket_listen_main_multi_threaded_fds, api_sockets.opened, timeout_ms); if(unlikely(retval == -1)) { error("LISTENER: poll() failed."); continue; } else if(unlikely(!retval)) { - debug(D_WEB_CLIENT, "LISTENER: select() timeout."); - counter = 0; - cleanup_web_clients(); + debug(D_WEB_CLIENT, "LISTENER: poll() timeout."); + counter++; continue; } for(i = 0 ; i < api_sockets.opened ; i++) { - short int revents = fds[i].revents; + short int revents = socket_listen_main_multi_threaded_fds[i].revents; // check for new incoming connections if(revents & POLLIN || revents & POLLPRI) { - fds[i].revents = 0; + socket_listen_main_multi_threaded_fds[i].revents = 0; - w = web_client_create(fds[i].fd); + w = web_client_create_on_listenfd(socket_listen_main_multi_threaded_fds[i].fd); if(unlikely(!w)) { - // no need for error log - web_client_create already logged the error + // no need for error log - web_client_create_on_listenfd already logged the error continue; } @@ -173,40 +681,38 @@ void *socket_listen_main_multi_threaded(void *ptr) { else web_client_set_tcp(w); - if(pthread_create(&w->thread, NULL, web_client_main, w) != 0) { - error("%llu: failed to create new thread for web client.", w->id); - WEB_CLIENT_IS_OBSOLETE(w); - } - else if(pthread_detach(w->thread) != 0) { - error("%llu: Cannot request detach of newly created web client thread.", w->id); - WEB_CLIENT_IS_OBSOLETE(w); + char tag[NETDATA_THREAD_TAG_MAX + 1]; + snprintfz(tag, NETDATA_THREAD_TAG_MAX, "WEB_CLIENT[%llu,[%s]:%s]", w->id, w->client_ip, w->client_port); + + w->running = 1; + if(netdata_thread_create(&w->thread, tag, NETDATA_THREAD_OPTION_DONT_LOG, multi_threaded_web_client_worker_main, w) != 0) { + w->running = 0; + web_client_release(w); } } } - // cleanup unused clients counter++; - if(counter >= CLEANUP_EVERY_EVENTS) { + if(counter > CLEANUP_EVERY_EVENTS) { counter = 0; - cleanup_web_clients(); + web_client_multi_threaded_web_server_release_clients(); } } - debug(D_WEB_CLIENT, "LISTENER: exit!"); - listen_sockets_close(&api_sockets); - - freez(fds); - - static_thread->enabled = 0; - pthread_exit(NULL); + netdata_thread_cleanup_pop(1); return NULL; } + +// -------------------------------------------------------------------------------------- +// the main socket listener - SINGLE-THREADED + struct web_client *single_threaded_clients[FD_SETSIZE]; static inline int single_threaded_link_client(struct web_client *w, fd_set *ifds, fd_set *ofds, fd_set *efds, int *max) { - if(unlikely(web_client_check_obsolete(w) || web_client_check_dead(w) || (!web_client_has_wait_receive(w) && !web_client_has_wait_send(w)))) + if(unlikely(web_client_check_dead(w) || (!web_client_has_wait_receive(w) && !web_client_has_wait_send(w)))) { return 1; + } if(unlikely(w->ifd < 0 || w->ifd >= (int)FD_SETSIZE || w->ofd < 0 || w->ofd >= (int)FD_SETSIZE)) { error("%llu: invalid file descriptor, ifd = %d, ofd = %d (required 0 <= fd < FD_SETSIZE (%d)", w->id, w->ifd, w->ofd, (int)FD_SETSIZE); @@ -240,27 +746,33 @@ static inline int single_threaded_unlink_client(struct web_client *w, fd_set *if single_threaded_clients[w->ifd] = NULL; single_threaded_clients[w->ofd] = NULL; - if(unlikely(web_client_check_obsolete(w) || web_client_check_dead(w) || (!web_client_has_wait_receive(w) && !web_client_has_wait_send(w)))) + if(unlikely(web_client_check_dead(w) || (!web_client_has_wait_receive(w) && !web_client_has_wait_send(w)))) { return 1; + } return 0; } -void *socket_listen_main_single_threaded(void *ptr) { - struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; +static void socket_listen_main_single_threaded_cleanup(void *data) { + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)data; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; - web_server_mode = WEB_SERVER_MODE_SINGLE_THREADED; + info("closing all sockets..."); + listen_sockets_close(&api_sockets); - info("Single-threaded WEB SERVER thread created with task id %d", gettid()); + info("freeing web clients cache..."); + web_client_cache_destroy(); - struct web_client *w; - int retval; + info("cleanup completed."); + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} - if(pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0) - error("Cannot set pthread cancel type to DEFERRED."); +void *socket_listen_main_single_threaded(void *ptr) { + netdata_thread_cleanup_push(socket_listen_main_single_threaded_cleanup, ptr); + web_server_mode = WEB_SERVER_MODE_SINGLE_THREADED; + web_server_is_multithreaded = 0; - if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0) - error("Cannot set pthread cancel state to ENABLE."); + struct web_client *w; if(!api_sockets.opened) fatal("LISTENER: no listen sockets available."); @@ -287,14 +799,14 @@ void *socket_listen_main_single_threaded(void *ptr) { fdmax = api_sockets.fds[i]; } - for(;;) { + while(!netdata_exit) { debug(D_WEB_CLIENT_ACCESS, "LISTENER: single threaded web server waiting (fdmax = %d)...", fdmax); struct timeval tv = { .tv_sec = 1, .tv_usec = 0 }; rifds = ifds; rofds = ofds; refds = efds; - retval = select(fdmax+1, &rifds, &rofds, &refds, &tv); + int retval = select(fdmax+1, &rifds, &rofds, &refds, &tv); if(unlikely(retval == -1)) { error("LISTENER: select() failed."); @@ -306,7 +818,9 @@ void *socket_listen_main_single_threaded(void *ptr) { for(i = 0; i < api_sockets.opened ; i++) { if (FD_ISSET(api_sockets.fds[i], &rifds)) { debug(D_WEB_CLIENT_ACCESS, "LISTENER: new connection."); - w = web_client_create(api_sockets.fds[i]); + w = web_client_create_on_listenfd(api_sockets.fds[i]); + if(unlikely(!w)) + continue; if(api_sockets.fds_families[i] == AF_UNIX) web_client_set_unix(w); @@ -314,7 +828,7 @@ void *socket_listen_main_single_threaded(void *ptr) { web_client_set_tcp(w); if (single_threaded_link_client(w, &ifds, &ofds, &ifds, &fdmax) != 0) { - web_client_free(w); + web_client_release(w); } } } @@ -324,22 +838,27 @@ void *socket_listen_main_single_threaded(void *ptr) { continue; w = single_threaded_clients[i]; - if(unlikely(!w)) + if(unlikely(!w)) { + // error("no client on slot %zu", i); continue; + } if(unlikely(single_threaded_unlink_client(w, &ifds, &ofds, &efds) != 0)) { - web_client_free(w); + // error("failed to unlink client %zu", i); + web_client_release(w); continue; } if (unlikely(FD_ISSET(w->ifd, &refds) || FD_ISSET(w->ofd, &refds))) { - web_client_free(w); + // error("no input on client %zu", i); + web_client_release(w); continue; } if (unlikely(web_client_has_wait_receive(w) && FD_ISSET(w->ifd, &rifds))) { if (unlikely(web_client_receive(w) < 0)) { - web_client_free(w); + // error("cannot read from client %zu", i); + web_client_release(w); continue; } @@ -351,122 +870,243 @@ void *socket_listen_main_single_threaded(void *ptr) { if (unlikely(web_client_has_wait_send(w) && FD_ISSET(w->ofd, &rofds))) { if (unlikely(web_client_send(w) < 0)) { + // error("cannot send data to client %zu", i); debug(D_WEB_CLIENT, "%llu: Cannot send data to client. Closing client.", w->id); - web_client_free(w); + web_client_release(w); continue; } } if(unlikely(single_threaded_link_client(w, &ifds, &ofds, &efds, &fdmax) != 0)) { - web_client_free(w); + // error("failed to link client %zu", i); + web_client_release(w); } } } else { debug(D_WEB_CLIENT_ACCESS, "LISTENER: single threaded web server timeout."); -#ifdef NETDATA_INTERNAL_CHECKS - log_allocations(); -#endif } } - debug(D_WEB_CLIENT, "LISTENER: exit!"); - listen_sockets_close(&api_sockets); - - static_thread->enabled = 0; - pthread_exit(NULL); + netdata_thread_cleanup_pop(1); return NULL; } -#if 0 -// new TCP client connected -static void *web_server_add_callback(int fd, int socktype, short int *events) { - (void)fd; - (void)socktype; +// -------------------------------------------------------------------------------------- +// the main socket listener - STATIC-THREADED + +struct web_server_static_threaded_worker { + netdata_thread_t thread; + + int id; + int running; + + size_t max_sockets; + + volatile size_t connected; + volatile size_t disconnected; + volatile size_t receptions; + volatile size_t sends; + volatile size_t max_concurrent; + + volatile size_t files_read; + volatile size_t file_reads; +}; + +static long long static_threaded_workers_count = 1; +static struct web_server_static_threaded_worker *static_workers_private_data = NULL; +static __thread struct web_server_static_threaded_worker *worker_private = NULL; + +// ---------------------------------------------------------------------------- + +static inline int web_server_check_client_status(struct web_client *w) { + if(unlikely(web_client_check_dead(w) || (!web_client_has_wait_receive(w) && !web_client_has_wait_send(w)))) + return -1; + + return 0; +} + +// ---------------------------------------------------------------------------- +// web server files + +static void *web_server_file_add_callback(POLLINFO *pi, short int *events, void *data) { + struct web_client *w = (struct web_client *)data; + + worker_private->files_read++; + debug(D_WEB_CLIENT, "%llu: ADDED FILE READ ON FD %d", w->id, pi->fd); *events = POLLIN; + pi->data = w; + return w; +} + +static void web_werver_file_del_callback(POLLINFO *pi) { + struct web_client *w = (struct web_client *)pi->data; + debug(D_WEB_CLIENT, "%llu: RELEASE FILE READ ON FD %d", w->id, pi->fd); + + w->pollinfo_filecopy_slot = 0; + + if(unlikely(!w->pollinfo_slot)) { + debug(D_WEB_CLIENT, "%llu: CROSS WEB CLIENT CLEANUP (iFD %d, oFD %d)", w->id, pi->fd, w->ofd); + web_client_release(w); + } +} + +static int web_server_file_read_callback(POLLINFO *pi, short int *events) { + struct web_client *w = (struct web_client *)pi->data; + + // if there is no POLLINFO linked to this, it means the client disconnected + // stop the file reading too + if(unlikely(!w->pollinfo_slot)) { + debug(D_WEB_CLIENT, "%llu: PREVENTED ATTEMPT TO READ FILE ON FD %d, ON CLOSED WEB CLIENT", w->id, pi->fd); + return -1; + } + + if(unlikely(w->mode != WEB_CLIENT_MODE_FILECOPY || w->ifd == w->ofd)) { + debug(D_WEB_CLIENT, "%llu: PREVENTED ATTEMPT TO READ FILE ON FD %d, ON NON-FILECOPY WEB CLIENT", w->id, pi->fd); + return -1; + } + + debug(D_WEB_CLIENT, "%llu: READING FILE ON FD %d", w->id, pi->fd); - debug(D_WEB_CLIENT_ACCESS, "LISTENER on %d: new connection.", fd); - struct web_client *w = web_client_create(fd); + worker_private->file_reads++; + ssize_t ret = unlikely(web_client_read_file(w)); - if(unlikely(socktype == AF_UNIX)) + if(likely(web_client_has_wait_send(w))) { + POLLJOB *p = pi->p; // our POLLJOB + POLLINFO *wpi = pollinfo_from_slot(p, w->pollinfo_slot); // POLLINFO of the client socket + + debug(D_WEB_CLIENT, "%llu: SIGNALING W TO SEND (iFD %d, oFD %d)", w->id, pi->fd, wpi->fd); + p->fds[wpi->slot].events |= POLLOUT; + } + + if(unlikely(ret <= 0 || w->ifd == w->ofd)) { + debug(D_WEB_CLIENT, "%llu: DONE READING FILE ON FD %d", w->id, pi->fd); + return -1; + } + + *events = POLLIN; + return 0; +} + +static int web_server_file_write_callback(POLLINFO *pi, short int *events) { + (void)pi; + (void)events; + + error("Writing to web files is not supported!"); + + return -1; +} + +// ---------------------------------------------------------------------------- +// web server clients + +static void *web_server_add_callback(POLLINFO *pi, short int *events, void *data) { + (void)data; + + worker_private->connected++; + + size_t concurrent = worker_private->connected - worker_private->disconnected; + if(unlikely(concurrent > worker_private->max_concurrent)) + worker_private->max_concurrent = concurrent; + + *events = POLLIN; + + debug(D_WEB_CLIENT_ACCESS, "LISTENER on %d: new connection.", pi->fd); + struct web_client *w = web_client_create_on_fd(pi->fd, pi->client_ip, pi->client_port); + w->pollinfo_slot = pi->slot; + + if(unlikely(pi->socktype == AF_UNIX)) web_client_set_unix(w); else web_client_set_tcp(w); - return (void *)w; + debug(D_WEB_CLIENT, "%llu: ADDED CLIENT FD %d", w->id, pi->fd); + return w; } // TCP client disconnected -static void web_server_del_callback(int fd, int socktype, void *data) { - (void)fd; - (void)socktype; +static void web_server_del_callback(POLLINFO *pi) { + worker_private->disconnected++; - struct web_client *w = (struct web_client *)data; + struct web_client *w = (struct web_client *)pi->data; - if(w) { - if(w->ofd == -1 || fd == w->ofd) { - // we free the client, only if the closing fd - // is the client socket - web_client_free(w); - } + w->pollinfo_slot = 0; + if(unlikely(w->pollinfo_filecopy_slot)) { + POLLINFO *fpi = pollinfo_from_slot(pi->p, w->pollinfo_filecopy_slot); // POLLINFO of the client socket + debug(D_WEB_CLIENT, "%llu: THE CLIENT WILL BE FRED BY READING FILE JOB ON FD %d", w->id, fpi->fd); } + else { + if(web_client_flag_check(w, WEB_CLIENT_FLAG_DONT_CLOSE_SOCKET)) + pi->flags |= POLLINFO_FLAG_DONT_CLOSE; - return; + debug(D_WEB_CLIENT, "%llu: CLOSING CLIENT FD %d", w->id, pi->fd); + web_client_release(w); + } } -// Receive data -static int web_server_rcv_callback(int fd, int socktype, void *data, short int *events) { - (void)fd; - (void)socktype; - - *events = 0; - - struct web_client *w = (struct web_client *)data; +static int web_server_rcv_callback(POLLINFO *pi, short int *events) { + worker_private->receptions++; - if(unlikely(!web_client_has_wait_receive(w))) - return -1; + struct web_client *w = (struct web_client *)pi->data; + int fd = pi->fd; if(unlikely(web_client_receive(w) < 0)) return -1; + debug(D_WEB_CLIENT, "%llu: processing received data on fd %d.", w->id, fd); + web_client_process_request(w); + if(unlikely(w->mode == WEB_CLIENT_MODE_FILECOPY)) { - if(unlikely(w->ifd != -1 && w->ifd != fd)) { - // FIXME: we switched input fd - // add a new socket to poll_events, with the same - } - else if(unlikely(w->ifd == -1)) { - // FIXME: we closed input fd - // instruct poll_events() to close fd - return -1; + if(w->pollinfo_filecopy_slot == 0) { + debug(D_WEB_CLIENT, "%llu: FILECOPY DETECTED ON FD %d", w->id, pi->fd); + + if (unlikely(w->ifd != -1 && w->ifd != w->ofd && w->ifd != fd)) { + // add a new socket to poll_events, with the same + debug(D_WEB_CLIENT, "%llu: CREATING FILECOPY SLOT ON FD %d", w->id, pi->fd); + + POLLINFO *fpi = poll_add_fd( + pi->p + , w->ifd + , 0 + , POLLINFO_FLAG_CLIENT_SOCKET + , "FILENAME" + , "" + , web_server_file_add_callback + , web_werver_file_del_callback + , web_server_file_read_callback + , web_server_file_write_callback + , (void *) w + ); + + if(fpi) + w->pollinfo_filecopy_slot = fpi->slot; + else { + error("Failed to add filecopy fd. Closing client."); + return -1; + } + } } } else { - debug(D_WEB_CLIENT, "%llu: Processing received data.", w->id); - web_client_process_request(w); + if(unlikely(w->ifd == fd && web_client_has_wait_receive(w))) + *events |= POLLIN; } - if(unlikely(w->ifd == fd && web_client_has_wait_receive(w))) - *events |= POLLIN; - if(unlikely(w->ofd == fd && web_client_has_wait_send(w))) *events |= POLLOUT; - if(unlikely(*events == 0)) - return -1; - - return 0; + return web_server_check_client_status(w); } -static int web_server_snd_callback(int fd, int socktype, void *data, short int *events) { - (void)fd; - (void)socktype; +static int web_server_snd_callback(POLLINFO *pi, short int *events) { + worker_private->sends++; - struct web_client *w = (struct web_client *)data; + struct web_client *w = (struct web_client *)pi->data; + int fd = pi->fd; - if(unlikely(!web_client_has_wait_send(w))) - return -1; + debug(D_WEB_CLIENT, "%llu: sending data on fd %d.", w->id, fd); if(unlikely(web_client_send(w) < 0)) return -1; @@ -477,42 +1117,176 @@ static int web_server_snd_callback(int fd, int socktype, void *data, short int * if(unlikely(w->ofd == fd && web_client_has_wait_send(w))) *events |= POLLOUT; - if(unlikely(*events == 0)) - return -1; - - return 0; + return web_server_check_client_status(w); } -void *socket_listen_main_single_threaded(void *ptr) { - struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; +static void web_server_tmr_callback(void *timer_data) { + worker_private = (struct web_server_static_threaded_worker *)timer_data; - web_server_mode = WEB_SERVER_MODE_SINGLE_THREADED; + static __thread RRDSET *st = NULL; + static __thread RRDDIM *rd_user = NULL, *rd_system = NULL; - info("Single-threaded WEB SERVER thread created with task id %d", gettid()); + if(unlikely(!st)) { + char id[100 + 1]; + char title[100 + 1]; - if(pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0) - error("Cannot set pthread cancel type to DEFERRED."); + snprintfz(id, 100, "web_thread%d_cpu", worker_private->id + 1); + snprintfz(title, 100, "NetData web server thread No %d CPU usage", worker_private->id + 1); - if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0) - error("Cannot set pthread cancel state to ENABLE."); + st = rrdset_create_localhost( + "netdata" + , id + , NULL + , "web" + , "netdata.web_cpu" + , title + , "milliseconds/s" + , "web" + , "stats" + , 132000 + worker_private->id + , default_rrd_update_every + , RRDSET_TYPE_STACKED + ); + + rd_user = rrddim_add(st, "user", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL); + rd_system = rrddim_add(st, "system", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL); + } + else + rrdset_next(st); - if(!api_sockets.opened) - fatal("LISTENER: no listen sockets available."); + struct rusage rusage; + getrusage(RUSAGE_THREAD, &rusage); + rrddim_set_by_pointer(st, rd_user, rusage.ru_utime.tv_sec * 1000000ULL + rusage.ru_utime.tv_usec); + rrddim_set_by_pointer(st, rd_system, rusage.ru_stime.tv_sec * 1000000ULL + rusage.ru_stime.tv_usec); + rrdset_done(st); +} - poll_events(&api_sockets - , web_server_add_callback - , web_server_del_callback - , web_server_rcv_callback - , web_server_snd_callback - , web_allow_connections_from - , NULL +// ---------------------------------------------------------------------------- +// web server worker thread + +static void socket_listen_main_static_threaded_worker_cleanup(void *ptr) { + worker_private = (struct web_server_static_threaded_worker *)ptr; + + info("freeing local web clients cache..."); + web_client_cache_destroy(); + + info("stopped after %zu connects, %zu disconnects (max concurrent %zu), %zu receptions and %zu sends", + worker_private->connected, + worker_private->disconnected, + worker_private->max_concurrent, + worker_private->receptions, + worker_private->sends ); - debug(D_WEB_CLIENT, "LISTENER: exit!"); + worker_private->running = 0; +} + +void *socket_listen_main_static_threaded_worker(void *ptr) { + worker_private = (struct web_server_static_threaded_worker *)ptr; + worker_private->running = 1; + + netdata_thread_cleanup_push(socket_listen_main_static_threaded_worker_cleanup, ptr); + + poll_events(&api_sockets + , web_server_add_callback + , web_server_del_callback + , web_server_rcv_callback + , web_server_snd_callback + , web_server_tmr_callback + , web_allow_connections_from + , NULL + , web_client_first_request_timeout + , web_client_timeout + , default_rrd_update_every * 1000 // timer_milliseconds + , ptr // timer_data + , worker_private->max_sockets + ); + + netdata_thread_cleanup_pop(1); + return NULL; +} + + +// ---------------------------------------------------------------------------- +// web server main thread - also becomes a worker + +static void socket_listen_main_static_threaded_cleanup(void *ptr) { + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + + int i, found = 0, max = 2 * USEC_PER_SEC, step = 50000; + + // we start from 1, - 0 is self + for(i = 1; i < static_threaded_workers_count; i++) { + if(static_workers_private_data[i].running) { + found++; + info("stopping worker %d", i + 1); + netdata_thread_cancel(static_workers_private_data[i].thread); + } + else + info("found stopped worker %d", i + 1); + } + + while(found && max > 0) { + max -= step; + info("Waiting %d static web threads to finish...", found); + sleep_usec(step); + found = 0; + + // we start from 1, - 0 is self + for(i = 1; i < static_threaded_workers_count; i++) { + if (static_workers_private_data[i].running) + found++; + } + } + + if(found) + error("%d static web threads are taking too long to finish. Giving up.", found); + + info("closing all web server sockets..."); listen_sockets_close(&api_sockets); - static_thread->enabled = 0; - pthread_exit(NULL); + info("all static web threads stopped."); + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +void *socket_listen_main_static_threaded(void *ptr) { + netdata_thread_cleanup_push(socket_listen_main_static_threaded_cleanup, ptr); + web_server_mode = WEB_SERVER_MODE_STATIC_THREADED; + + if(!api_sockets.opened) + fatal("LISTENER: no listen sockets available."); + + // 6 threads is the optimal value + // since 6 are the parallel connections browsers will do + // so, if the machine has more CPUs, avoid using resources unnecessarily + int def_thread_count = (processors > 6)?6:processors; + + static_threaded_workers_count = config_get_number(CONFIG_SECTION_WEB, "web server threads", def_thread_count); + if(static_threaded_workers_count < 1) static_threaded_workers_count = 1; + + size_t max_sockets = (size_t)config_get_number(CONFIG_SECTION_WEB, "web server max sockets", (long long int)(rlimit_nofile.rlim_cur / 2)); + + static_workers_private_data = callocz((size_t)static_threaded_workers_count, sizeof(struct web_server_static_threaded_worker)); + + web_server_is_multithreaded = (static_threaded_workers_count > 1); + + int i; + for(i = 1; i < static_threaded_workers_count; i++) { + static_workers_private_data[i].id = i; + static_workers_private_data[i].max_sockets = max_sockets / static_threaded_workers_count; + + char tag[50 + 1]; + snprintfz(tag, 50, "WEB_SERVER[static%d]", i+1); + + info("starting worker %d", i+1); + netdata_thread_create(&static_workers_private_data[i].thread, tag, NETDATA_THREAD_OPTION_DEFAULT, socket_listen_main_static_threaded_worker, (void *)&static_workers_private_data[i]); + } + + // and the main one + static_workers_private_data[0].max_sockets = max_sockets / static_threaded_workers_count; + socket_listen_main_static_threaded_worker((void *)&static_workers_private_data[0]); + + netdata_thread_cleanup_pop(1); return NULL; } -#endif diff --git a/src/web_server.h b/src/web_server.h index aa293695d..7492547ef 100644 --- a/src/web_server.h +++ b/src/web_server.h @@ -16,10 +16,18 @@ typedef enum web_server_mode { WEB_SERVER_MODE_SINGLE_THREADED, + WEB_SERVER_MODE_STATIC_THREADED, WEB_SERVER_MODE_MULTI_THREADED, WEB_SERVER_MODE_NONE } WEB_SERVER_MODE; +extern SIMPLE_PATTERN *web_allow_connections_from; +extern SIMPLE_PATTERN *web_allow_dashboard_from; +extern SIMPLE_PATTERN *web_allow_registry_from; +extern SIMPLE_PATTERN *web_allow_badges_from; +extern SIMPLE_PATTERN *web_allow_streaming_from; +extern SIMPLE_PATTERN *web_allow_netdataconf_from; + extern WEB_SERVER_MODE web_server_mode; extern WEB_SERVER_MODE web_server_mode_id(const char *mode); @@ -27,6 +35,12 @@ extern const char *web_server_mode_name(WEB_SERVER_MODE id); extern void *socket_listen_main_multi_threaded(void *ptr); extern void *socket_listen_main_single_threaded(void *ptr); +extern void *socket_listen_main_static_threaded(void *ptr); extern int api_listen_sockets_setup(void); +#define DEFAULT_TIMEOUT_TO_RECEIVE_FIRST_WEB_REQUEST 60 +#define DEFAULT_DISCONNECT_IDLE_WEB_CLIENTS_AFTER_SECONDS 60 +extern int web_client_timeout; +extern int web_client_first_request_timeout; + #endif /* NETDATA_WEB_SERVER_H */ diff --git a/src/zfs_common.c b/src/zfs_common.c index 0915416f5..05935dd0f 100644 --- a/src/zfs_common.c +++ b/src/zfs_common.c @@ -48,7 +48,7 @@ void generate_charts_arcstats(const char *plugin, int update_every) { , "MB" , plugin , "zfs" - , 2000 + , 2500 , update_every , RRDSET_TYPE_AREA ); @@ -86,7 +86,7 @@ void generate_charts_arcstats(const char *plugin, int update_every) { , "MB" , plugin , "zfs" - , 2000 + , 2500 , update_every , RRDSET_TYPE_AREA ); @@ -123,7 +123,7 @@ void generate_charts_arcstats(const char *plugin, int update_every) { , "reads/s" , plugin , "zfs" - , 2010 + , 2510 , update_every , RRDSET_TYPE_AREA ); @@ -168,7 +168,7 @@ void generate_charts_arcstats(const char *plugin, int update_every) { , "kilobytes/s" , plugin , "zfs" - , 2200 + , 2700 , update_every , RRDSET_TYPE_AREA ); @@ -202,7 +202,7 @@ void generate_charts_arcstats(const char *plugin, int update_every) { , "percentage" , plugin , "zfs" - , 2020 + , 2520 , update_every , RRDSET_TYPE_STACKED ); @@ -236,7 +236,7 @@ void generate_charts_arcstats(const char *plugin, int update_every) { , "percentage" , plugin , "zfs" - , 2030 + , 2530 , update_every , RRDSET_TYPE_STACKED ); @@ -270,7 +270,7 @@ void generate_charts_arcstats(const char *plugin, int update_every) { , "percentage" , plugin , "zfs" - , 2040 + , 2540 , update_every , RRDSET_TYPE_STACKED ); @@ -304,7 +304,7 @@ void generate_charts_arcstats(const char *plugin, int update_every) { , "percentage" , plugin , "zfs" - , 2050 + , 2550 , update_every , RRDSET_TYPE_STACKED ); @@ -338,7 +338,7 @@ void generate_charts_arcstats(const char *plugin, int update_every) { , "percentage" , plugin , "zfs" - , 2060 + , 2560 , update_every , RRDSET_TYPE_STACKED ); @@ -374,7 +374,7 @@ void generate_charts_arcstats(const char *plugin, int update_every) { , "hits/s" , plugin , "zfs" - , 2100 + , 2600 , update_every , RRDSET_TYPE_AREA ); @@ -433,7 +433,7 @@ void generate_charts_arc_summary(const char *plugin, int update_every) { , "percentage" , plugin , "zfs" - , 2020 + , 2520 , update_every , RRDSET_TYPE_STACKED ); @@ -472,7 +472,7 @@ void generate_charts_arc_summary(const char *plugin, int update_every) { , "operations/s" , plugin , "zfs" - , 2023 + , 2523 , update_every , RRDSET_TYPE_LINE ); @@ -518,7 +518,7 @@ void generate_charts_arc_summary(const char *plugin, int update_every) { , "operations/s" , plugin , "zfs" - , 2022 + , 2522 , update_every , RRDSET_TYPE_LINE ); @@ -556,7 +556,7 @@ void generate_charts_arc_summary(const char *plugin, int update_every) { , "percentage" , plugin , "zfs" - , 2019 + , 2519 , update_every , RRDSET_TYPE_STACKED ); @@ -590,7 +590,7 @@ void generate_charts_arc_summary(const char *plugin, int update_every) { , "percentage" , plugin , "zfs" - , 2031 + , 2531 , update_every , RRDSET_TYPE_STACKED ); @@ -624,7 +624,7 @@ void generate_charts_arc_summary(const char *plugin, int update_every) { , "percentage" , plugin , "zfs" - , 2032 + , 2532 , update_every , RRDSET_TYPE_STACKED ); @@ -658,7 +658,7 @@ void generate_charts_arc_summary(const char *plugin, int update_every) { , "elements" , plugin , "zfs" - , 2300 + , 2800 , update_every , RRDSET_TYPE_LINE ); @@ -692,7 +692,7 @@ void generate_charts_arc_summary(const char *plugin, int update_every) { , "chains" , plugin , "zfs" - , 2310 + , 2810 , update_every , RRDSET_TYPE_LINE ); |