diff options
Diffstat (limited to 'exec')
-rw-r--r-- | exec/Makefile.am | 70 | ||||
-rw-r--r-- | exec/Makefile.in | 1218 | ||||
-rw-r--r-- | exec/apidef.c | 149 | ||||
-rw-r--r-- | exec/apidef.h | 42 | ||||
-rw-r--r-- | exec/cfg.c | 1470 | ||||
-rw-r--r-- | exec/cmap.c | 1155 | ||||
-rw-r--r-- | exec/coroparse.c | 1697 | ||||
-rw-r--r-- | exec/cpg.c | 2344 | ||||
-rw-r--r-- | exec/cs_queue.h | 292 | ||||
-rw-r--r-- | exec/fsm.h | 131 | ||||
-rw-r--r-- | exec/icmap.c | 1330 | ||||
-rw-r--r-- | exec/ipc_glue.c | 829 | ||||
-rw-r--r-- | exec/ipcs_stats.h | 61 | ||||
-rw-r--r-- | exec/logconfig.c | 757 | ||||
-rw-r--r-- | exec/logconfig.h | 62 | ||||
-rw-r--r-- | exec/logsys.c | 952 | ||||
-rw-r--r-- | exec/main.c | 1666 | ||||
-rw-r--r-- | exec/main.h | 129 | ||||
-rw-r--r-- | exec/mon.c | 511 | ||||
-rw-r--r-- | exec/pload.c | 357 | ||||
-rw-r--r-- | exec/quorum.c | 110 | ||||
-rw-r--r-- | exec/quorum.h | 65 | ||||
-rw-r--r-- | exec/schedwrk.c | 157 | ||||
-rw-r--r-- | exec/schedwrk.h | 53 | ||||
-rw-r--r-- | exec/service.c | 468 | ||||
-rw-r--r-- | exec/service.h | 91 | ||||
-rw-r--r-- | exec/stats.c | 784 | ||||
-rw-r--r-- | exec/stats.h | 73 | ||||
-rw-r--r-- | exec/sync.c | 549 | ||||
-rw-r--r-- | exec/sync.h | 73 | ||||
-rw-r--r-- | exec/timer.c | 96 | ||||
-rw-r--r-- | exec/timer.h | 65 | ||||
-rw-r--r-- | exec/totemconfig.c | 2454 | ||||
-rw-r--r-- | exec/totemconfig.h | 93 | ||||
-rw-r--r-- | exec/totemip.c | 624 | ||||
-rw-r--r-- | exec/totemknet.c | 2306 | ||||
-rw-r--r-- | exec/totemknet.h | 159 | ||||
-rw-r--r-- | exec/totemnet.c | 628 | ||||
-rw-r--r-- | exec/totemnet.h | 166 | ||||
-rw-r--r-- | exec/totempg.c | 1620 | ||||
-rw-r--r-- | exec/totemsrp.c | 5252 | ||||
-rw-r--r-- | exec/totemsrp.h | 168 | ||||
-rw-r--r-- | exec/totemudp.c | 1549 | ||||
-rw-r--r-- | exec/totemudp.h | 144 | ||||
-rw-r--r-- | exec/totemudpu.c | 1453 | ||||
-rw-r--r-- | exec/totemudpu.h | 144 | ||||
-rw-r--r-- | exec/util.c | 343 | ||||
-rw-r--r-- | exec/util.h | 98 | ||||
-rw-r--r-- | exec/votequorum.c | 3082 | ||||
-rw-r--r-- | exec/votequorum.h | 44 | ||||
-rw-r--r-- | exec/vsf.h | 58 | ||||
-rw-r--r-- | exec/vsf_quorum.c | 801 | ||||
-rw-r--r-- | exec/vsf_ykd.c | 537 | ||||
-rw-r--r-- | exec/vsf_ykd.h | 44 | ||||
-rw-r--r-- | exec/wd.c | 767 |
55 files changed, 40340 insertions, 0 deletions
diff --git a/exec/Makefile.am b/exec/Makefile.am new file mode 100644 index 0000000..1c31f8c --- /dev/null +++ b/exec/Makefile.am @@ -0,0 +1,70 @@ +# Copyright (c) 2009 Red Hat, Inc. +# +# Authors: Andrew Beekhof +# Steven Dake (sdake@redhat.com) +# +# This software licensed under BSD license, the text of which follows: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# - Neither the name of the MontaVista Software, Inc. nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. + +MAINTAINERCLEANFILES = Makefile.in + +noinst_HEADERS = apidef.h cs_queue.h logconfig.h main.h \ + quorum.h service.h timer.h totemconfig.h \ + totemnet.h totemudp.h \ + totemudpu.h totemsrp.h util.h vsf.h \ + schedwrk.h sync.h fsm.h votequorum.h vsf_ykd.h \ + totemknet.h stats.h ipcs_stats.h + +sbin_PROGRAMS = corosync + +corosync_SOURCES = vsf_ykd.c coroparse.c vsf_quorum.c sync.c \ + logsys.c cfg.c cmap.c cpg.c pload.c \ + votequorum.c util.c schedwrk.c main.c \ + apidef.c quorum.c icmap.c timer.c stats.c \ + ipc_glue.c service.c logconfig.c totemconfig.c \ + totemip.c totemnet.c totemudp.c \ + totemudpu.c totemsrp.c \ + totempg.c totemknet.c + +if BUILD_MONITORING +corosync_SOURCES += mon.c +endif + +if BUILD_WATCHDOG +corosync_SOURCES += wd.c +endif + +corosync_CPPFLAGS = -DLOGCONFIG_USE_ICMAP=1 + +corosync_CFLAGS = $(statgrab_CFLAGS) $(libsystemd_CFLAGS) $(knet_CFLAGS) $(nozzle_CFLAGS) + +corosync_LDADD = ../common_lib/libcorosync_common.la \ + $(LIBQB_LIBS) $(statgrab_LIBS) $(libsystemd_LIBS) $(knet_LIBS) $(nozzle_LIBS) + +corosync_DEPENDENCIES = ../common_lib/libcorosync_common.la + +lint: + -splint $(LINT_FLAGS) $(CPPFLAGS) $(CFLAGS) *.c diff --git a/exec/Makefile.in b/exec/Makefile.in new file mode 100644 index 0000000..0fbc6b7 --- /dev/null +++ b/exec/Makefile.in @@ -0,0 +1,1218 @@ +# Makefile.in generated by automake 1.13.4 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# Copyright (c) 2009 Red Hat, Inc. +# +# Authors: Andrew Beekhof +# Steven Dake (sdake@redhat.com) +# +# This software licensed under BSD license, the text of which follows: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# - Neither the name of the MontaVista Software, Inc. nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. + + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +sbin_PROGRAMS = corosync$(EXEEXT) +@BUILD_MONITORING_TRUE@am__append_1 = mon.c +@BUILD_WATCHDOG_TRUE@am__append_2 = wd.c +subdir = exec +DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(top_srcdir)/depcomp $(noinst_HEADERS) +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/lib/libcfg.verso $(top_srcdir)/lib/libcpg.verso \ + $(top_srcdir)/lib/libquorum.verso \ + $(top_srcdir)/lib/libsam.verso \ + $(top_srcdir)/lib/libvotequorum.verso \ + $(top_srcdir)/lib/libcmap.verso $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/include/corosync/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__installdirs = "$(DESTDIR)$(sbindir)" +PROGRAMS = $(sbin_PROGRAMS) +am__corosync_SOURCES_DIST = vsf_ykd.c coroparse.c vsf_quorum.c sync.c \ + logsys.c cfg.c cmap.c cpg.c pload.c votequorum.c util.c \ + schedwrk.c main.c apidef.c quorum.c icmap.c timer.c stats.c \ + ipc_glue.c service.c logconfig.c totemconfig.c totemip.c \ + totemnet.c totemudp.c totemudpu.c totemsrp.c totempg.c \ + totemknet.c mon.c wd.c +@BUILD_MONITORING_TRUE@am__objects_1 = corosync-mon.$(OBJEXT) +@BUILD_WATCHDOG_TRUE@am__objects_2 = corosync-wd.$(OBJEXT) +am_corosync_OBJECTS = corosync-vsf_ykd.$(OBJEXT) \ + corosync-coroparse.$(OBJEXT) corosync-vsf_quorum.$(OBJEXT) \ + corosync-sync.$(OBJEXT) corosync-logsys.$(OBJEXT) \ + corosync-cfg.$(OBJEXT) corosync-cmap.$(OBJEXT) \ + corosync-cpg.$(OBJEXT) corosync-pload.$(OBJEXT) \ + corosync-votequorum.$(OBJEXT) corosync-util.$(OBJEXT) \ + corosync-schedwrk.$(OBJEXT) corosync-main.$(OBJEXT) \ + corosync-apidef.$(OBJEXT) corosync-quorum.$(OBJEXT) \ + corosync-icmap.$(OBJEXT) corosync-timer.$(OBJEXT) \ + corosync-stats.$(OBJEXT) corosync-ipc_glue.$(OBJEXT) \ + corosync-service.$(OBJEXT) corosync-logconfig.$(OBJEXT) \ + corosync-totemconfig.$(OBJEXT) corosync-totemip.$(OBJEXT) \ + corosync-totemnet.$(OBJEXT) corosync-totemudp.$(OBJEXT) \ + corosync-totemudpu.$(OBJEXT) corosync-totemsrp.$(OBJEXT) \ + corosync-totempg.$(OBJEXT) corosync-totemknet.$(OBJEXT) \ + $(am__objects_1) $(am__objects_2) +corosync_OBJECTS = $(am_corosync_OBJECTS) +am__DEPENDENCIES_1 = +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +corosync_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(corosync_CFLAGS) \ + $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/include/corosync +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(corosync_SOURCES) +DIST_SOURCES = $(am__corosync_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +HEADERS = $(noinst_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUGTOOL = @AUGTOOL@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASHPATH = @BASHPATH@ +BINDGEN = @BINDGEN@ +CARGO = @CARGO@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFG_SONAME = @CFG_SONAME@ +CFLAGS = @CFLAGS@ +CLIPPY = @CLIPPY@ +CMAP_SONAME = @CMAP_SONAME@ +COROSYSCONFDIR = @COROSYSCONFDIR@ +CPG_SONAME = @CPG_SONAME@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DBUS_CFLAGS = @DBUS_CFLAGS@ +DBUS_LIBS = @DBUS_LIBS@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOT = @DOT@ +DOXYGEN = @DOXYGEN@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GREP = @GREP@ +GROFF = @GROFF@ +INITCONFIGDIR = @INITCONFIGDIR@ +INITDDIR = @INITDDIR@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQB_CFLAGS = @LIBQB_CFLAGS@ +LIBQB_LIBS = @LIBQB_LIBS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LINT_FLAGS = @LINT_FLAGS@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LOGDIR = @LOGDIR@ +LOGROTATEDIR = @LOGROTATEDIR@ +LTLIBOBJS = @LTLIBOBJS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +QUORUM_SONAME = @QUORUM_SONAME@ +RANLIB = @RANLIB@ +RUSTC = @RUSTC@ +RUSTDOC = @RUSTDOC@ +RUSTFMT = @RUSTFMT@ +RUST_FLAGS = @RUST_FLAGS@ +RUST_TARGET_DIR = @RUST_TARGET_DIR@ +SAM_SONAME = @SAM_SONAME@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SNMPCONFIG = @SNMPCONFIG@ +SNMP_LIBS = @SNMP_LIBS@ +SOMAJOR = @SOMAJOR@ +SOMICRO = @SOMICRO@ +SOMINOR = @SOMINOR@ +SONAME = @SONAME@ +STRIP = @STRIP@ +SYSTEMDDIR = @SYSTEMDDIR@ +VERSCRIPT_LDFLAGS = @VERSCRIPT_LDFLAGS@ +VERSION = @VERSION@ +VOTEQUORUM_SONAME = @VOTEQUORUM_SONAME@ +WITH_LIST = @WITH_LIST@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +corosyncrustver = @corosyncrustver@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +knet_CFLAGS = @knet_CFLAGS@ +knet_LIBS = @knet_LIBS@ +libdir = @libdir@ +libexecdir = @libexecdir@ +libsystemd_CFLAGS = @libsystemd_CFLAGS@ +libsystemd_LIBS = @libsystemd_LIBS@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +nozzle_CFLAGS = @nozzle_CFLAGS@ +nozzle_LIBS = @nozzle_LIBS@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +statgrab_CFLAGS = @statgrab_CFLAGS@ +statgrab_LIBS = @statgrab_LIBS@ +statgrabge090_CFLAGS = @statgrabge090_CFLAGS@ +statgrabge090_LIBS = @statgrabge090_LIBS@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +MAINTAINERCLEANFILES = Makefile.in +noinst_HEADERS = apidef.h cs_queue.h logconfig.h main.h \ + quorum.h service.h timer.h totemconfig.h \ + totemnet.h totemudp.h \ + totemudpu.h totemsrp.h util.h vsf.h \ + schedwrk.h sync.h fsm.h votequorum.h vsf_ykd.h \ + totemknet.h stats.h ipcs_stats.h + +corosync_SOURCES = vsf_ykd.c coroparse.c vsf_quorum.c sync.c logsys.c \ + cfg.c cmap.c cpg.c pload.c votequorum.c util.c schedwrk.c \ + main.c apidef.c quorum.c icmap.c timer.c stats.c ipc_glue.c \ + service.c logconfig.c totemconfig.c totemip.c totemnet.c \ + totemudp.c totemudpu.c totemsrp.c totempg.c totemknet.c \ + $(am__append_1) $(am__append_2) +corosync_CPPFLAGS = -DLOGCONFIG_USE_ICMAP=1 +corosync_CFLAGS = $(statgrab_CFLAGS) $(libsystemd_CFLAGS) $(knet_CFLAGS) $(nozzle_CFLAGS) +corosync_LDADD = ../common_lib/libcorosync_common.la \ + $(LIBQB_LIBS) $(statgrab_LIBS) $(libsystemd_LIBS) $(knet_LIBS) $(nozzle_LIBS) + +corosync_DEPENDENCIES = ../common_lib/libcorosync_common.la +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign exec/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign exec/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-sbinPROGRAMS: $(sbin_PROGRAMS) + @$(NORMAL_INSTALL) + @list='$(sbin_PROGRAMS)'; test -n "$(sbindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(sbindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(sbindir)" || exit 1; \ + fi; \ + for p in $$list; do echo "$$p $$p"; done | \ + sed 's/$(EXEEXT)$$//' | \ + while read p p1; do if test -f $$p \ + || test -f $$p1 \ + ; then echo "$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n;h' \ + -e 's|.*|.|' \ + -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ + sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) files[d] = files[d] " " $$1; \ + else { print "f", $$3 "/" $$4, $$1; } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(sbindir)$$dir'"; \ + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(sbindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-sbinPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(sbin_PROGRAMS)'; test -n "$(sbindir)" || list=; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ + -e 's/$$/$(EXEEXT)/' \ + `; \ + test -n "$$list" || exit 0; \ + echo " ( cd '$(DESTDIR)$(sbindir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(sbindir)" && rm -f $$files + +clean-sbinPROGRAMS: + @list='$(sbin_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +corosync$(EXEEXT): $(corosync_OBJECTS) $(corosync_DEPENDENCIES) $(EXTRA_corosync_DEPENDENCIES) + @rm -f corosync$(EXEEXT) + $(AM_V_CCLD)$(corosync_LINK) $(corosync_OBJECTS) $(corosync_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-apidef.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-cfg.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-cmap.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-coroparse.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-cpg.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-icmap.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-ipc_glue.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-logconfig.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-logsys.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-main.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-mon.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-pload.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-quorum.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-schedwrk.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-service.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-stats.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-sync.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-timer.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-totemconfig.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-totemip.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-totemknet.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-totemnet.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-totempg.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-totemsrp.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-totemudp.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-totemudpu.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-util.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-votequorum.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-vsf_quorum.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-vsf_ykd.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/corosync-wd.Po@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +corosync-vsf_ykd.o: vsf_ykd.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-vsf_ykd.o -MD -MP -MF $(DEPDIR)/corosync-vsf_ykd.Tpo -c -o corosync-vsf_ykd.o `test -f 'vsf_ykd.c' || echo '$(srcdir)/'`vsf_ykd.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-vsf_ykd.Tpo $(DEPDIR)/corosync-vsf_ykd.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='vsf_ykd.c' object='corosync-vsf_ykd.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-vsf_ykd.o `test -f 'vsf_ykd.c' || echo '$(srcdir)/'`vsf_ykd.c + +corosync-vsf_ykd.obj: vsf_ykd.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-vsf_ykd.obj -MD -MP -MF $(DEPDIR)/corosync-vsf_ykd.Tpo -c -o corosync-vsf_ykd.obj `if test -f 'vsf_ykd.c'; then $(CYGPATH_W) 'vsf_ykd.c'; else $(CYGPATH_W) '$(srcdir)/vsf_ykd.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-vsf_ykd.Tpo $(DEPDIR)/corosync-vsf_ykd.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='vsf_ykd.c' object='corosync-vsf_ykd.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-vsf_ykd.obj `if test -f 'vsf_ykd.c'; then $(CYGPATH_W) 'vsf_ykd.c'; else $(CYGPATH_W) '$(srcdir)/vsf_ykd.c'; fi` + +corosync-coroparse.o: coroparse.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-coroparse.o -MD -MP -MF $(DEPDIR)/corosync-coroparse.Tpo -c -o corosync-coroparse.o `test -f 'coroparse.c' || echo '$(srcdir)/'`coroparse.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-coroparse.Tpo $(DEPDIR)/corosync-coroparse.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='coroparse.c' object='corosync-coroparse.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-coroparse.o `test -f 'coroparse.c' || echo '$(srcdir)/'`coroparse.c + +corosync-coroparse.obj: coroparse.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-coroparse.obj -MD -MP -MF $(DEPDIR)/corosync-coroparse.Tpo -c -o corosync-coroparse.obj `if test -f 'coroparse.c'; then $(CYGPATH_W) 'coroparse.c'; else $(CYGPATH_W) '$(srcdir)/coroparse.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-coroparse.Tpo $(DEPDIR)/corosync-coroparse.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='coroparse.c' object='corosync-coroparse.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-coroparse.obj `if test -f 'coroparse.c'; then $(CYGPATH_W) 'coroparse.c'; else $(CYGPATH_W) '$(srcdir)/coroparse.c'; fi` + +corosync-vsf_quorum.o: vsf_quorum.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-vsf_quorum.o -MD -MP -MF $(DEPDIR)/corosync-vsf_quorum.Tpo -c -o corosync-vsf_quorum.o `test -f 'vsf_quorum.c' || echo '$(srcdir)/'`vsf_quorum.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-vsf_quorum.Tpo $(DEPDIR)/corosync-vsf_quorum.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='vsf_quorum.c' object='corosync-vsf_quorum.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-vsf_quorum.o `test -f 'vsf_quorum.c' || echo '$(srcdir)/'`vsf_quorum.c + +corosync-vsf_quorum.obj: vsf_quorum.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-vsf_quorum.obj -MD -MP -MF $(DEPDIR)/corosync-vsf_quorum.Tpo -c -o corosync-vsf_quorum.obj `if test -f 'vsf_quorum.c'; then $(CYGPATH_W) 'vsf_quorum.c'; else $(CYGPATH_W) '$(srcdir)/vsf_quorum.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-vsf_quorum.Tpo $(DEPDIR)/corosync-vsf_quorum.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='vsf_quorum.c' object='corosync-vsf_quorum.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-vsf_quorum.obj `if test -f 'vsf_quorum.c'; then $(CYGPATH_W) 'vsf_quorum.c'; else $(CYGPATH_W) '$(srcdir)/vsf_quorum.c'; fi` + +corosync-sync.o: sync.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-sync.o -MD -MP -MF $(DEPDIR)/corosync-sync.Tpo -c -o corosync-sync.o `test -f 'sync.c' || echo '$(srcdir)/'`sync.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-sync.Tpo $(DEPDIR)/corosync-sync.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sync.c' object='corosync-sync.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-sync.o `test -f 'sync.c' || echo '$(srcdir)/'`sync.c + +corosync-sync.obj: sync.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-sync.obj -MD -MP -MF $(DEPDIR)/corosync-sync.Tpo -c -o corosync-sync.obj `if test -f 'sync.c'; then $(CYGPATH_W) 'sync.c'; else $(CYGPATH_W) '$(srcdir)/sync.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-sync.Tpo $(DEPDIR)/corosync-sync.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sync.c' object='corosync-sync.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-sync.obj `if test -f 'sync.c'; then $(CYGPATH_W) 'sync.c'; else $(CYGPATH_W) '$(srcdir)/sync.c'; fi` + +corosync-logsys.o: logsys.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-logsys.o -MD -MP -MF $(DEPDIR)/corosync-logsys.Tpo -c -o corosync-logsys.o `test -f 'logsys.c' || echo '$(srcdir)/'`logsys.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-logsys.Tpo $(DEPDIR)/corosync-logsys.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='logsys.c' object='corosync-logsys.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-logsys.o `test -f 'logsys.c' || echo '$(srcdir)/'`logsys.c + +corosync-logsys.obj: logsys.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-logsys.obj -MD -MP -MF $(DEPDIR)/corosync-logsys.Tpo -c -o corosync-logsys.obj `if test -f 'logsys.c'; then $(CYGPATH_W) 'logsys.c'; else $(CYGPATH_W) '$(srcdir)/logsys.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-logsys.Tpo $(DEPDIR)/corosync-logsys.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='logsys.c' object='corosync-logsys.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-logsys.obj `if test -f 'logsys.c'; then $(CYGPATH_W) 'logsys.c'; else $(CYGPATH_W) '$(srcdir)/logsys.c'; fi` + +corosync-cfg.o: cfg.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-cfg.o -MD -MP -MF $(DEPDIR)/corosync-cfg.Tpo -c -o corosync-cfg.o `test -f 'cfg.c' || echo '$(srcdir)/'`cfg.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-cfg.Tpo $(DEPDIR)/corosync-cfg.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cfg.c' object='corosync-cfg.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-cfg.o `test -f 'cfg.c' || echo '$(srcdir)/'`cfg.c + +corosync-cfg.obj: cfg.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-cfg.obj -MD -MP -MF $(DEPDIR)/corosync-cfg.Tpo -c -o corosync-cfg.obj `if test -f 'cfg.c'; then $(CYGPATH_W) 'cfg.c'; else $(CYGPATH_W) '$(srcdir)/cfg.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-cfg.Tpo $(DEPDIR)/corosync-cfg.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cfg.c' object='corosync-cfg.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-cfg.obj `if test -f 'cfg.c'; then $(CYGPATH_W) 'cfg.c'; else $(CYGPATH_W) '$(srcdir)/cfg.c'; fi` + +corosync-cmap.o: cmap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-cmap.o -MD -MP -MF $(DEPDIR)/corosync-cmap.Tpo -c -o corosync-cmap.o `test -f 'cmap.c' || echo '$(srcdir)/'`cmap.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-cmap.Tpo $(DEPDIR)/corosync-cmap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cmap.c' object='corosync-cmap.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-cmap.o `test -f 'cmap.c' || echo '$(srcdir)/'`cmap.c + +corosync-cmap.obj: cmap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-cmap.obj -MD -MP -MF $(DEPDIR)/corosync-cmap.Tpo -c -o corosync-cmap.obj `if test -f 'cmap.c'; then $(CYGPATH_W) 'cmap.c'; else $(CYGPATH_W) '$(srcdir)/cmap.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-cmap.Tpo $(DEPDIR)/corosync-cmap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cmap.c' object='corosync-cmap.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-cmap.obj `if test -f 'cmap.c'; then $(CYGPATH_W) 'cmap.c'; else $(CYGPATH_W) '$(srcdir)/cmap.c'; fi` + +corosync-cpg.o: cpg.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-cpg.o -MD -MP -MF $(DEPDIR)/corosync-cpg.Tpo -c -o corosync-cpg.o `test -f 'cpg.c' || echo '$(srcdir)/'`cpg.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-cpg.Tpo $(DEPDIR)/corosync-cpg.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cpg.c' object='corosync-cpg.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-cpg.o `test -f 'cpg.c' || echo '$(srcdir)/'`cpg.c + +corosync-cpg.obj: cpg.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-cpg.obj -MD -MP -MF $(DEPDIR)/corosync-cpg.Tpo -c -o corosync-cpg.obj `if test -f 'cpg.c'; then $(CYGPATH_W) 'cpg.c'; else $(CYGPATH_W) '$(srcdir)/cpg.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-cpg.Tpo $(DEPDIR)/corosync-cpg.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cpg.c' object='corosync-cpg.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-cpg.obj `if test -f 'cpg.c'; then $(CYGPATH_W) 'cpg.c'; else $(CYGPATH_W) '$(srcdir)/cpg.c'; fi` + +corosync-pload.o: pload.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-pload.o -MD -MP -MF $(DEPDIR)/corosync-pload.Tpo -c -o corosync-pload.o `test -f 'pload.c' || echo '$(srcdir)/'`pload.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-pload.Tpo $(DEPDIR)/corosync-pload.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='pload.c' object='corosync-pload.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-pload.o `test -f 'pload.c' || echo '$(srcdir)/'`pload.c + +corosync-pload.obj: pload.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-pload.obj -MD -MP -MF $(DEPDIR)/corosync-pload.Tpo -c -o corosync-pload.obj `if test -f 'pload.c'; then $(CYGPATH_W) 'pload.c'; else $(CYGPATH_W) '$(srcdir)/pload.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-pload.Tpo $(DEPDIR)/corosync-pload.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='pload.c' object='corosync-pload.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-pload.obj `if test -f 'pload.c'; then $(CYGPATH_W) 'pload.c'; else $(CYGPATH_W) '$(srcdir)/pload.c'; fi` + +corosync-votequorum.o: votequorum.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-votequorum.o -MD -MP -MF $(DEPDIR)/corosync-votequorum.Tpo -c -o corosync-votequorum.o `test -f 'votequorum.c' || echo '$(srcdir)/'`votequorum.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-votequorum.Tpo $(DEPDIR)/corosync-votequorum.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='votequorum.c' object='corosync-votequorum.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-votequorum.o `test -f 'votequorum.c' || echo '$(srcdir)/'`votequorum.c + +corosync-votequorum.obj: votequorum.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-votequorum.obj -MD -MP -MF $(DEPDIR)/corosync-votequorum.Tpo -c -o corosync-votequorum.obj `if test -f 'votequorum.c'; then $(CYGPATH_W) 'votequorum.c'; else $(CYGPATH_W) '$(srcdir)/votequorum.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-votequorum.Tpo $(DEPDIR)/corosync-votequorum.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='votequorum.c' object='corosync-votequorum.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-votequorum.obj `if test -f 'votequorum.c'; then $(CYGPATH_W) 'votequorum.c'; else $(CYGPATH_W) '$(srcdir)/votequorum.c'; fi` + +corosync-util.o: util.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-util.o -MD -MP -MF $(DEPDIR)/corosync-util.Tpo -c -o corosync-util.o `test -f 'util.c' || echo '$(srcdir)/'`util.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-util.Tpo $(DEPDIR)/corosync-util.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='util.c' object='corosync-util.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-util.o `test -f 'util.c' || echo '$(srcdir)/'`util.c + +corosync-util.obj: util.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-util.obj -MD -MP -MF $(DEPDIR)/corosync-util.Tpo -c -o corosync-util.obj `if test -f 'util.c'; then $(CYGPATH_W) 'util.c'; else $(CYGPATH_W) '$(srcdir)/util.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-util.Tpo $(DEPDIR)/corosync-util.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='util.c' object='corosync-util.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-util.obj `if test -f 'util.c'; then $(CYGPATH_W) 'util.c'; else $(CYGPATH_W) '$(srcdir)/util.c'; fi` + +corosync-schedwrk.o: schedwrk.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-schedwrk.o -MD -MP -MF $(DEPDIR)/corosync-schedwrk.Tpo -c -o corosync-schedwrk.o `test -f 'schedwrk.c' || echo '$(srcdir)/'`schedwrk.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-schedwrk.Tpo $(DEPDIR)/corosync-schedwrk.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='schedwrk.c' object='corosync-schedwrk.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-schedwrk.o `test -f 'schedwrk.c' || echo '$(srcdir)/'`schedwrk.c + +corosync-schedwrk.obj: schedwrk.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-schedwrk.obj -MD -MP -MF $(DEPDIR)/corosync-schedwrk.Tpo -c -o corosync-schedwrk.obj `if test -f 'schedwrk.c'; then $(CYGPATH_W) 'schedwrk.c'; else $(CYGPATH_W) '$(srcdir)/schedwrk.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-schedwrk.Tpo $(DEPDIR)/corosync-schedwrk.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='schedwrk.c' object='corosync-schedwrk.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-schedwrk.obj `if test -f 'schedwrk.c'; then $(CYGPATH_W) 'schedwrk.c'; else $(CYGPATH_W) '$(srcdir)/schedwrk.c'; fi` + +corosync-main.o: main.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-main.o -MD -MP -MF $(DEPDIR)/corosync-main.Tpo -c -o corosync-main.o `test -f 'main.c' || echo '$(srcdir)/'`main.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-main.Tpo $(DEPDIR)/corosync-main.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='main.c' object='corosync-main.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-main.o `test -f 'main.c' || echo '$(srcdir)/'`main.c + +corosync-main.obj: main.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-main.obj -MD -MP -MF $(DEPDIR)/corosync-main.Tpo -c -o corosync-main.obj `if test -f 'main.c'; then $(CYGPATH_W) 'main.c'; else $(CYGPATH_W) '$(srcdir)/main.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-main.Tpo $(DEPDIR)/corosync-main.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='main.c' object='corosync-main.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-main.obj `if test -f 'main.c'; then $(CYGPATH_W) 'main.c'; else $(CYGPATH_W) '$(srcdir)/main.c'; fi` + +corosync-apidef.o: apidef.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-apidef.o -MD -MP -MF $(DEPDIR)/corosync-apidef.Tpo -c -o corosync-apidef.o `test -f 'apidef.c' || echo '$(srcdir)/'`apidef.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-apidef.Tpo $(DEPDIR)/corosync-apidef.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='apidef.c' object='corosync-apidef.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-apidef.o `test -f 'apidef.c' || echo '$(srcdir)/'`apidef.c + +corosync-apidef.obj: apidef.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-apidef.obj -MD -MP -MF $(DEPDIR)/corosync-apidef.Tpo -c -o corosync-apidef.obj `if test -f 'apidef.c'; then $(CYGPATH_W) 'apidef.c'; else $(CYGPATH_W) '$(srcdir)/apidef.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-apidef.Tpo $(DEPDIR)/corosync-apidef.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='apidef.c' object='corosync-apidef.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-apidef.obj `if test -f 'apidef.c'; then $(CYGPATH_W) 'apidef.c'; else $(CYGPATH_W) '$(srcdir)/apidef.c'; fi` + +corosync-quorum.o: quorum.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-quorum.o -MD -MP -MF $(DEPDIR)/corosync-quorum.Tpo -c -o corosync-quorum.o `test -f 'quorum.c' || echo '$(srcdir)/'`quorum.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-quorum.Tpo $(DEPDIR)/corosync-quorum.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='quorum.c' object='corosync-quorum.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-quorum.o `test -f 'quorum.c' || echo '$(srcdir)/'`quorum.c + +corosync-quorum.obj: quorum.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-quorum.obj -MD -MP -MF $(DEPDIR)/corosync-quorum.Tpo -c -o corosync-quorum.obj `if test -f 'quorum.c'; then $(CYGPATH_W) 'quorum.c'; else $(CYGPATH_W) '$(srcdir)/quorum.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-quorum.Tpo $(DEPDIR)/corosync-quorum.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='quorum.c' object='corosync-quorum.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-quorum.obj `if test -f 'quorum.c'; then $(CYGPATH_W) 'quorum.c'; else $(CYGPATH_W) '$(srcdir)/quorum.c'; fi` + +corosync-icmap.o: icmap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-icmap.o -MD -MP -MF $(DEPDIR)/corosync-icmap.Tpo -c -o corosync-icmap.o `test -f 'icmap.c' || echo '$(srcdir)/'`icmap.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-icmap.Tpo $(DEPDIR)/corosync-icmap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='icmap.c' object='corosync-icmap.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-icmap.o `test -f 'icmap.c' || echo '$(srcdir)/'`icmap.c + +corosync-icmap.obj: icmap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-icmap.obj -MD -MP -MF $(DEPDIR)/corosync-icmap.Tpo -c -o corosync-icmap.obj `if test -f 'icmap.c'; then $(CYGPATH_W) 'icmap.c'; else $(CYGPATH_W) '$(srcdir)/icmap.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-icmap.Tpo $(DEPDIR)/corosync-icmap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='icmap.c' object='corosync-icmap.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-icmap.obj `if test -f 'icmap.c'; then $(CYGPATH_W) 'icmap.c'; else $(CYGPATH_W) '$(srcdir)/icmap.c'; fi` + +corosync-timer.o: timer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-timer.o -MD -MP -MF $(DEPDIR)/corosync-timer.Tpo -c -o corosync-timer.o `test -f 'timer.c' || echo '$(srcdir)/'`timer.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-timer.Tpo $(DEPDIR)/corosync-timer.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='timer.c' object='corosync-timer.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-timer.o `test -f 'timer.c' || echo '$(srcdir)/'`timer.c + +corosync-timer.obj: timer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-timer.obj -MD -MP -MF $(DEPDIR)/corosync-timer.Tpo -c -o corosync-timer.obj `if test -f 'timer.c'; then $(CYGPATH_W) 'timer.c'; else $(CYGPATH_W) '$(srcdir)/timer.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-timer.Tpo $(DEPDIR)/corosync-timer.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='timer.c' object='corosync-timer.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-timer.obj `if test -f 'timer.c'; then $(CYGPATH_W) 'timer.c'; else $(CYGPATH_W) '$(srcdir)/timer.c'; fi` + +corosync-stats.o: stats.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-stats.o -MD -MP -MF $(DEPDIR)/corosync-stats.Tpo -c -o corosync-stats.o `test -f 'stats.c' || echo '$(srcdir)/'`stats.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-stats.Tpo $(DEPDIR)/corosync-stats.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stats.c' object='corosync-stats.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-stats.o `test -f 'stats.c' || echo '$(srcdir)/'`stats.c + +corosync-stats.obj: stats.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-stats.obj -MD -MP -MF $(DEPDIR)/corosync-stats.Tpo -c -o corosync-stats.obj `if test -f 'stats.c'; then $(CYGPATH_W) 'stats.c'; else $(CYGPATH_W) '$(srcdir)/stats.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-stats.Tpo $(DEPDIR)/corosync-stats.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stats.c' object='corosync-stats.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-stats.obj `if test -f 'stats.c'; then $(CYGPATH_W) 'stats.c'; else $(CYGPATH_W) '$(srcdir)/stats.c'; fi` + +corosync-ipc_glue.o: ipc_glue.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-ipc_glue.o -MD -MP -MF $(DEPDIR)/corosync-ipc_glue.Tpo -c -o corosync-ipc_glue.o `test -f 'ipc_glue.c' || echo '$(srcdir)/'`ipc_glue.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-ipc_glue.Tpo $(DEPDIR)/corosync-ipc_glue.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='ipc_glue.c' object='corosync-ipc_glue.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-ipc_glue.o `test -f 'ipc_glue.c' || echo '$(srcdir)/'`ipc_glue.c + +corosync-ipc_glue.obj: ipc_glue.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-ipc_glue.obj -MD -MP -MF $(DEPDIR)/corosync-ipc_glue.Tpo -c -o corosync-ipc_glue.obj `if test -f 'ipc_glue.c'; then $(CYGPATH_W) 'ipc_glue.c'; else $(CYGPATH_W) '$(srcdir)/ipc_glue.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-ipc_glue.Tpo $(DEPDIR)/corosync-ipc_glue.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='ipc_glue.c' object='corosync-ipc_glue.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-ipc_glue.obj `if test -f 'ipc_glue.c'; then $(CYGPATH_W) 'ipc_glue.c'; else $(CYGPATH_W) '$(srcdir)/ipc_glue.c'; fi` + +corosync-service.o: service.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-service.o -MD -MP -MF $(DEPDIR)/corosync-service.Tpo -c -o corosync-service.o `test -f 'service.c' || echo '$(srcdir)/'`service.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-service.Tpo $(DEPDIR)/corosync-service.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='service.c' object='corosync-service.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-service.o `test -f 'service.c' || echo '$(srcdir)/'`service.c + +corosync-service.obj: service.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-service.obj -MD -MP -MF $(DEPDIR)/corosync-service.Tpo -c -o corosync-service.obj `if test -f 'service.c'; then $(CYGPATH_W) 'service.c'; else $(CYGPATH_W) '$(srcdir)/service.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-service.Tpo $(DEPDIR)/corosync-service.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='service.c' object='corosync-service.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-service.obj `if test -f 'service.c'; then $(CYGPATH_W) 'service.c'; else $(CYGPATH_W) '$(srcdir)/service.c'; fi` + +corosync-logconfig.o: logconfig.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-logconfig.o -MD -MP -MF $(DEPDIR)/corosync-logconfig.Tpo -c -o corosync-logconfig.o `test -f 'logconfig.c' || echo '$(srcdir)/'`logconfig.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-logconfig.Tpo $(DEPDIR)/corosync-logconfig.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='logconfig.c' object='corosync-logconfig.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-logconfig.o `test -f 'logconfig.c' || echo '$(srcdir)/'`logconfig.c + +corosync-logconfig.obj: logconfig.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-logconfig.obj -MD -MP -MF $(DEPDIR)/corosync-logconfig.Tpo -c -o corosync-logconfig.obj `if test -f 'logconfig.c'; then $(CYGPATH_W) 'logconfig.c'; else $(CYGPATH_W) '$(srcdir)/logconfig.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-logconfig.Tpo $(DEPDIR)/corosync-logconfig.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='logconfig.c' object='corosync-logconfig.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-logconfig.obj `if test -f 'logconfig.c'; then $(CYGPATH_W) 'logconfig.c'; else $(CYGPATH_W) '$(srcdir)/logconfig.c'; fi` + +corosync-totemconfig.o: totemconfig.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-totemconfig.o -MD -MP -MF $(DEPDIR)/corosync-totemconfig.Tpo -c -o corosync-totemconfig.o `test -f 'totemconfig.c' || echo '$(srcdir)/'`totemconfig.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-totemconfig.Tpo $(DEPDIR)/corosync-totemconfig.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='totemconfig.c' object='corosync-totemconfig.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-totemconfig.o `test -f 'totemconfig.c' || echo '$(srcdir)/'`totemconfig.c + +corosync-totemconfig.obj: totemconfig.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-totemconfig.obj -MD -MP -MF $(DEPDIR)/corosync-totemconfig.Tpo -c -o corosync-totemconfig.obj `if test -f 'totemconfig.c'; then $(CYGPATH_W) 'totemconfig.c'; else $(CYGPATH_W) '$(srcdir)/totemconfig.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-totemconfig.Tpo $(DEPDIR)/corosync-totemconfig.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='totemconfig.c' object='corosync-totemconfig.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-totemconfig.obj `if test -f 'totemconfig.c'; then $(CYGPATH_W) 'totemconfig.c'; else $(CYGPATH_W) '$(srcdir)/totemconfig.c'; fi` + +corosync-totemip.o: totemip.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-totemip.o -MD -MP -MF $(DEPDIR)/corosync-totemip.Tpo -c -o corosync-totemip.o `test -f 'totemip.c' || echo '$(srcdir)/'`totemip.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-totemip.Tpo $(DEPDIR)/corosync-totemip.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='totemip.c' object='corosync-totemip.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-totemip.o `test -f 'totemip.c' || echo '$(srcdir)/'`totemip.c + +corosync-totemip.obj: totemip.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-totemip.obj -MD -MP -MF $(DEPDIR)/corosync-totemip.Tpo -c -o corosync-totemip.obj `if test -f 'totemip.c'; then $(CYGPATH_W) 'totemip.c'; else $(CYGPATH_W) '$(srcdir)/totemip.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-totemip.Tpo $(DEPDIR)/corosync-totemip.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='totemip.c' object='corosync-totemip.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-totemip.obj `if test -f 'totemip.c'; then $(CYGPATH_W) 'totemip.c'; else $(CYGPATH_W) '$(srcdir)/totemip.c'; fi` + +corosync-totemnet.o: totemnet.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-totemnet.o -MD -MP -MF $(DEPDIR)/corosync-totemnet.Tpo -c -o corosync-totemnet.o `test -f 'totemnet.c' || echo '$(srcdir)/'`totemnet.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-totemnet.Tpo $(DEPDIR)/corosync-totemnet.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='totemnet.c' object='corosync-totemnet.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-totemnet.o `test -f 'totemnet.c' || echo '$(srcdir)/'`totemnet.c + +corosync-totemnet.obj: totemnet.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-totemnet.obj -MD -MP -MF $(DEPDIR)/corosync-totemnet.Tpo -c -o corosync-totemnet.obj `if test -f 'totemnet.c'; then $(CYGPATH_W) 'totemnet.c'; else $(CYGPATH_W) '$(srcdir)/totemnet.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-totemnet.Tpo $(DEPDIR)/corosync-totemnet.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='totemnet.c' object='corosync-totemnet.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-totemnet.obj `if test -f 'totemnet.c'; then $(CYGPATH_W) 'totemnet.c'; else $(CYGPATH_W) '$(srcdir)/totemnet.c'; fi` + +corosync-totemudp.o: totemudp.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-totemudp.o -MD -MP -MF $(DEPDIR)/corosync-totemudp.Tpo -c -o corosync-totemudp.o `test -f 'totemudp.c' || echo '$(srcdir)/'`totemudp.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-totemudp.Tpo $(DEPDIR)/corosync-totemudp.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='totemudp.c' object='corosync-totemudp.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-totemudp.o `test -f 'totemudp.c' || echo '$(srcdir)/'`totemudp.c + +corosync-totemudp.obj: totemudp.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-totemudp.obj -MD -MP -MF $(DEPDIR)/corosync-totemudp.Tpo -c -o corosync-totemudp.obj `if test -f 'totemudp.c'; then $(CYGPATH_W) 'totemudp.c'; else $(CYGPATH_W) '$(srcdir)/totemudp.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-totemudp.Tpo $(DEPDIR)/corosync-totemudp.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='totemudp.c' object='corosync-totemudp.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-totemudp.obj `if test -f 'totemudp.c'; then $(CYGPATH_W) 'totemudp.c'; else $(CYGPATH_W) '$(srcdir)/totemudp.c'; fi` + +corosync-totemudpu.o: totemudpu.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-totemudpu.o -MD -MP -MF $(DEPDIR)/corosync-totemudpu.Tpo -c -o corosync-totemudpu.o `test -f 'totemudpu.c' || echo '$(srcdir)/'`totemudpu.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-totemudpu.Tpo $(DEPDIR)/corosync-totemudpu.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='totemudpu.c' object='corosync-totemudpu.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-totemudpu.o `test -f 'totemudpu.c' || echo '$(srcdir)/'`totemudpu.c + +corosync-totemudpu.obj: totemudpu.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-totemudpu.obj -MD -MP -MF $(DEPDIR)/corosync-totemudpu.Tpo -c -o corosync-totemudpu.obj `if test -f 'totemudpu.c'; then $(CYGPATH_W) 'totemudpu.c'; else $(CYGPATH_W) '$(srcdir)/totemudpu.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-totemudpu.Tpo $(DEPDIR)/corosync-totemudpu.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='totemudpu.c' object='corosync-totemudpu.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-totemudpu.obj `if test -f 'totemudpu.c'; then $(CYGPATH_W) 'totemudpu.c'; else $(CYGPATH_W) '$(srcdir)/totemudpu.c'; fi` + +corosync-totemsrp.o: totemsrp.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-totemsrp.o -MD -MP -MF $(DEPDIR)/corosync-totemsrp.Tpo -c -o corosync-totemsrp.o `test -f 'totemsrp.c' || echo '$(srcdir)/'`totemsrp.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-totemsrp.Tpo $(DEPDIR)/corosync-totemsrp.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='totemsrp.c' object='corosync-totemsrp.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-totemsrp.o `test -f 'totemsrp.c' || echo '$(srcdir)/'`totemsrp.c + +corosync-totemsrp.obj: totemsrp.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-totemsrp.obj -MD -MP -MF $(DEPDIR)/corosync-totemsrp.Tpo -c -o corosync-totemsrp.obj `if test -f 'totemsrp.c'; then $(CYGPATH_W) 'totemsrp.c'; else $(CYGPATH_W) '$(srcdir)/totemsrp.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-totemsrp.Tpo $(DEPDIR)/corosync-totemsrp.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='totemsrp.c' object='corosync-totemsrp.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-totemsrp.obj `if test -f 'totemsrp.c'; then $(CYGPATH_W) 'totemsrp.c'; else $(CYGPATH_W) '$(srcdir)/totemsrp.c'; fi` + +corosync-totempg.o: totempg.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-totempg.o -MD -MP -MF $(DEPDIR)/corosync-totempg.Tpo -c -o corosync-totempg.o `test -f 'totempg.c' || echo '$(srcdir)/'`totempg.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-totempg.Tpo $(DEPDIR)/corosync-totempg.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='totempg.c' object='corosync-totempg.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-totempg.o `test -f 'totempg.c' || echo '$(srcdir)/'`totempg.c + +corosync-totempg.obj: totempg.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-totempg.obj -MD -MP -MF $(DEPDIR)/corosync-totempg.Tpo -c -o corosync-totempg.obj `if test -f 'totempg.c'; then $(CYGPATH_W) 'totempg.c'; else $(CYGPATH_W) '$(srcdir)/totempg.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-totempg.Tpo $(DEPDIR)/corosync-totempg.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='totempg.c' object='corosync-totempg.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-totempg.obj `if test -f 'totempg.c'; then $(CYGPATH_W) 'totempg.c'; else $(CYGPATH_W) '$(srcdir)/totempg.c'; fi` + +corosync-totemknet.o: totemknet.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-totemknet.o -MD -MP -MF $(DEPDIR)/corosync-totemknet.Tpo -c -o corosync-totemknet.o `test -f 'totemknet.c' || echo '$(srcdir)/'`totemknet.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-totemknet.Tpo $(DEPDIR)/corosync-totemknet.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='totemknet.c' object='corosync-totemknet.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-totemknet.o `test -f 'totemknet.c' || echo '$(srcdir)/'`totemknet.c + +corosync-totemknet.obj: totemknet.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-totemknet.obj -MD -MP -MF $(DEPDIR)/corosync-totemknet.Tpo -c -o corosync-totemknet.obj `if test -f 'totemknet.c'; then $(CYGPATH_W) 'totemknet.c'; else $(CYGPATH_W) '$(srcdir)/totemknet.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-totemknet.Tpo $(DEPDIR)/corosync-totemknet.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='totemknet.c' object='corosync-totemknet.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-totemknet.obj `if test -f 'totemknet.c'; then $(CYGPATH_W) 'totemknet.c'; else $(CYGPATH_W) '$(srcdir)/totemknet.c'; fi` + +corosync-mon.o: mon.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-mon.o -MD -MP -MF $(DEPDIR)/corosync-mon.Tpo -c -o corosync-mon.o `test -f 'mon.c' || echo '$(srcdir)/'`mon.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-mon.Tpo $(DEPDIR)/corosync-mon.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mon.c' object='corosync-mon.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-mon.o `test -f 'mon.c' || echo '$(srcdir)/'`mon.c + +corosync-mon.obj: mon.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-mon.obj -MD -MP -MF $(DEPDIR)/corosync-mon.Tpo -c -o corosync-mon.obj `if test -f 'mon.c'; then $(CYGPATH_W) 'mon.c'; else $(CYGPATH_W) '$(srcdir)/mon.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-mon.Tpo $(DEPDIR)/corosync-mon.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mon.c' object='corosync-mon.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-mon.obj `if test -f 'mon.c'; then $(CYGPATH_W) 'mon.c'; else $(CYGPATH_W) '$(srcdir)/mon.c'; fi` + +corosync-wd.o: wd.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-wd.o -MD -MP -MF $(DEPDIR)/corosync-wd.Tpo -c -o corosync-wd.o `test -f 'wd.c' || echo '$(srcdir)/'`wd.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-wd.Tpo $(DEPDIR)/corosync-wd.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='wd.c' object='corosync-wd.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-wd.o `test -f 'wd.c' || echo '$(srcdir)/'`wd.c + +corosync-wd.obj: wd.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -MT corosync-wd.obj -MD -MP -MF $(DEPDIR)/corosync-wd.Tpo -c -o corosync-wd.obj `if test -f 'wd.c'; then $(CYGPATH_W) 'wd.c'; else $(CYGPATH_W) '$(srcdir)/wd.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/corosync-wd.Tpo $(DEPDIR)/corosync-wd.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='wd.c' object='corosync-wd.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(corosync_CPPFLAGS) $(CPPFLAGS) $(corosync_CFLAGS) $(CFLAGS) -c -o corosync-wd.obj `if test -f 'wd.c'; then $(CYGPATH_W) 'wd.c'; else $(CYGPATH_W) '$(srcdir)/wd.c'; fi` + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(PROGRAMS) $(HEADERS) +installdirs: + for dir in "$(DESTDIR)$(sbindir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES) +clean: clean-am + +clean-am: clean-generic clean-libtool clean-sbinPROGRAMS \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-sbinPROGRAMS + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-sbinPROGRAMS + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-sbinPROGRAMS cscopelist-am ctags ctags-am \ + distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-sbinPROGRAMS install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am uninstall-sbinPROGRAMS + + +lint: + -splint $(LINT_FLAGS) $(CPPFLAGS) $(CFLAGS) *.c + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/exec/apidef.c b/exec/apidef.c new file mode 100644 index 0000000..8805973 --- /dev/null +++ b/exec/apidef.c @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2008-2012 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <config.h> + +#include <stdlib.h> +#include <string.h> + +#include <qb/qbutil.h> +#include <qb/qbloop.h> +#include <qb/qbipcs.h> + +#include <corosync/corotypes.h> +#include <corosync/totem/totempg.h> +#include <corosync/totem/totemip.h> +#include <corosync/totem/totem.h> +#include <corosync/logsys.h> +#include "util.h" +#include "timer.h" +#include "quorum.h" +#include "schedwrk.h" +#include "main.h" +#include "apidef.h" +#include "service.h" + +LOGSYS_DECLARE_SUBSYS ("APIDEF"); + +/* + * Remove compile warnings about type name changes in corosync_tpg_group + */ +typedef int (*typedef_tpg_join) ( + void *, + const struct corosync_tpg_group *, + size_t); + +typedef int (*typedef_tpg_leave) (void *, + const struct corosync_tpg_group *, + size_t); + +typedef int (*typedef_tpg_groups_mcast_groups) ( + void *, int, + const struct corosync_tpg_group *, + size_t groups_cnt, + const struct iovec *, + unsigned int); + +typedef int (*typedef_tpg_groups_send_ok) ( + void *, + const struct corosync_tpg_group *, + size_t groups_cnt, + struct iovec *, + int); + +static inline void _corosync_public_exit_error (cs_fatal_error_t err, + const char *file, + unsigned int line) + __attribute__((noreturn)); +static inline void _corosync_public_exit_error ( + cs_fatal_error_t err, const char *file, unsigned int line) +{ + _corosync_exit_error (err, file, line); +} + +static struct corosync_api_v1 apidef_corosync_api_v1 = { + .timer_add_duration = corosync_timer_add_duration, + .timer_add_absolute = corosync_timer_add_absolute, + .timer_delete = corosync_timer_delete, + .timer_time_get = cs_timer_time_get, + .timer_expire_time_get = corosync_timer_expire_time_get, + .ipc_source_set = message_source_set, + .ipc_source_is_local = message_source_is_local, + .ipc_private_data_get = cs_ipcs_private_data_get, + .ipc_response_iov_send = cs_ipcs_response_iov_send, + .ipc_response_send = cs_ipcs_response_send, + .ipc_dispatch_send = cs_ipcs_dispatch_send, + .ipc_dispatch_iov_send = cs_ipcs_dispatch_iov_send, + .ipc_refcnt_inc = cs_ipc_refcnt_inc, + .ipc_refcnt_dec = cs_ipc_refcnt_dec, + .totem_nodeid_get = totempg_my_nodeid_get, + .totem_family_get = totempg_my_family_get, + .totem_mcast = main_mcast, + .totem_ifaces_get = totempg_ifaces_get, + .totem_ifaces_print = totempg_ifaces_print, + .totem_ip_print = totemip_print, + .totem_crypto_set = totempg_crypto_set, + .totem_callback_token_create = totempg_callback_token_create, + .totem_get_stats = totempg_get_stats, + .tpg_init = totempg_groups_initialize, + .tpg_exit = NULL, /* missing from totempg api */ + .tpg_join = (typedef_tpg_join)totempg_groups_join, + .tpg_leave = (typedef_tpg_leave)totempg_groups_leave, + .tpg_joined_mcast = totempg_groups_mcast_joined, + .tpg_joined_reserve = totempg_groups_joined_reserve, + .tpg_joined_release = totempg_groups_joined_release, + .tpg_groups_mcast = (typedef_tpg_groups_mcast_groups)totempg_groups_mcast_groups, + .tpg_groups_reserve = NULL, + .tpg_groups_release = NULL, + .schedwrk_create = schedwrk_create, + .schedwrk_create_nolock = schedwrk_create_nolock, + .schedwrk_destroy = schedwrk_destroy, + .sync_request = NULL, //sync_request, + .quorum_is_quorate = corosync_quorum_is_quorate, + .quorum_register_callback = corosync_quorum_register_callback, + .quorum_unregister_callback = corosync_quorum_unregister_callback, + .quorum_initialize = corosync_quorum_initialize, + .error_memory_failure = _corosync_out_of_memory_error, + .fatal_error = _corosync_public_exit_error, + .shutdown_request = corosync_shutdown_request, + .state_dump = corosync_state_dump, + .poll_handle_get = cs_poll_handle_get, + .poll_dispatch_add = cs_poll_dispatch_add, + .poll_dispatch_delete = cs_poll_dispatch_delete +}; + +struct corosync_api_v1 *apidef_get (void) +{ + return (&apidef_corosync_api_v1); +} diff --git a/exec/apidef.h b/exec/apidef.h new file mode 100644 index 0000000..52812d4 --- /dev/null +++ b/exec/apidef.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2008-2012 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef APIDEF_H_DEFINED +#define APIDEF_H_DEFINED + +#include <corosync/coroapi.h> + +extern struct corosync_api_v1 *apidef_get (void); + +#endif /* APIDEF_H_DEFINED*/ diff --git a/exec/cfg.c b/exec/cfg.c new file mode 100644 index 0000000..fe5f551 --- /dev/null +++ b/exec/cfg.c @@ -0,0 +1,1470 @@ +/* + * Copyright (c) 2005-2006 MontaVista Software, Inc. + * Copyright (c) 2006-2018 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <config.h> + +#include <sys/types.h> +#include <sys/uio.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <unistd.h> +#include <fcntl.h> +#include <stdlib.h> +#include <stdio.h> +#include <stddef.h> +#include <limits.h> +#include <errno.h> +#include <string.h> +#include <assert.h> + +#include <corosync/corotypes.h> +#include <qb/qbipc_common.h> +#include <corosync/cfg.h> +#include <qb/qblist.h> +#include <qb/qbutil.h> +#include <corosync/mar_gen.h> +#include <corosync/totem/totemip.h> +#include <corosync/totem/totem.h> +#include <corosync/ipc_cfg.h> +#include <corosync/logsys.h> +#include <corosync/coroapi.h> +#include <corosync/icmap.h> +#include <corosync/corodefs.h> + +#include "totemconfig.h" +#include "totemknet.h" +#include "service.h" +#include "main.h" + +LOGSYS_DECLARE_SUBSYS ("CFG"); + +enum cfg_message_req_types { + MESSAGE_REQ_EXEC_CFG_RINGREENABLE = 0, + MESSAGE_REQ_EXEC_CFG_KILLNODE = 1, + MESSAGE_REQ_EXEC_CFG_SHUTDOWN = 2, + MESSAGE_REQ_EXEC_CFG_RELOAD_CONFIG = 3, + MESSAGE_REQ_EXEC_CFG_CRYPTO_RECONFIG = 4 +}; + +/* in milliseconds */ +#define DEFAULT_SHUTDOWN_TIMEOUT 5000 + +static struct qb_list_head trackers_list; + +/* + * Variables controlling a requested shutdown + */ +static corosync_timer_handle_t shutdown_timer; +static struct cfg_info *shutdown_con; +static uint32_t shutdown_flags; +static int shutdown_yes; +static int shutdown_no; +static int shutdown_expected; + +struct cfg_info +{ + struct qb_list_head list; + void *conn; + void *tracker_conn; + enum {SHUTDOWN_REPLY_UNKNOWN, SHUTDOWN_REPLY_YES, SHUTDOWN_REPLY_NO} shutdown_reply; +}; + +static void cfg_confchg_fn ( + enum totem_configuration_type configuration_type, + const unsigned int *member_list, size_t member_list_entries, + const unsigned int *left_list, size_t left_list_entries, + const unsigned int *joined_list, size_t joined_list_entries, + const struct memb_ring_id *ring_id); + +static char *cfg_exec_init_fn (struct corosync_api_v1 *corosync_api_v1); + +static struct corosync_api_v1 *api; + +static int cfg_lib_init_fn (void *conn); + +static int cfg_lib_exit_fn (void *conn); + +static void message_handler_req_exec_cfg_ringreenable ( + const void *message, + unsigned int nodeid); + +static void message_handler_req_exec_cfg_killnode ( + const void *message, + unsigned int nodeid); + +static void message_handler_req_exec_cfg_shutdown ( + const void *message, + unsigned int nodeid); + +static void message_handler_req_exec_cfg_reload_config ( + const void *message, + unsigned int nodeid); + +static void message_handler_req_exec_cfg_reconfig_crypto ( + const void *message, + unsigned int nodeid); + +static void exec_cfg_killnode_endian_convert (void *msg); + +static void message_handler_req_lib_cfg_ringstatusget ( + void *conn, + const void *msg); + +static void message_handler_req_lib_cfg_nodestatusget ( + void *conn, + const void *msg); + +static void message_handler_req_lib_cfg_ringreenable ( + void *conn, + const void *msg); + +static void message_handler_req_lib_cfg_killnode ( + void *conn, + const void *msg); + +static void message_handler_req_lib_cfg_tryshutdown ( + void *conn, + const void *msg); + +static void message_handler_req_lib_cfg_replytoshutdown ( + void *conn, + const void *msg); + +static void message_handler_req_lib_cfg_trackstart ( + void *conn, + const void *msg); + +static void message_handler_req_lib_cfg_trackstop ( + void *conn, + const void *msg); + +static void message_handler_req_lib_cfg_get_node_addrs ( + void *conn, + const void *msg); + +static void message_handler_req_lib_cfg_local_get ( + void *conn, + const void *msg); + +static void message_handler_req_lib_cfg_reload_config ( + void *conn, + const void *msg); + +static void message_handler_req_lib_cfg_reopen_log_files ( + void *conn, + const void *msg); + +/* + * Service Handler Definition + */ +static struct corosync_lib_handler cfg_lib_engine[] = +{ + { /* 0 */ + .lib_handler_fn = message_handler_req_lib_cfg_ringstatusget, + .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED + }, + { /* 1 */ + .lib_handler_fn = message_handler_req_lib_cfg_ringreenable, + .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED + }, + { /* 2 */ + .lib_handler_fn = message_handler_req_lib_cfg_killnode, + .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED + }, + { /* 3 */ + .lib_handler_fn = message_handler_req_lib_cfg_tryshutdown, + .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED + }, + { /* 4 */ + .lib_handler_fn = message_handler_req_lib_cfg_replytoshutdown, + .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED + }, + { /* 5 */ + .lib_handler_fn = message_handler_req_lib_cfg_get_node_addrs, + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 6 */ + .lib_handler_fn = message_handler_req_lib_cfg_local_get, + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 7 */ + .lib_handler_fn = message_handler_req_lib_cfg_reload_config, + .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED + }, + { /* 8 */ + .lib_handler_fn = message_handler_req_lib_cfg_reopen_log_files, + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 9 */ + .lib_handler_fn = message_handler_req_lib_cfg_nodestatusget, + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 10 */ + .lib_handler_fn = message_handler_req_lib_cfg_trackstart, + .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED + }, + { /* 11 */ + .lib_handler_fn = message_handler_req_lib_cfg_trackstop, + .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED + }, + +}; + +static struct corosync_exec_handler cfg_exec_engine[] = +{ + { /* 0 */ + .exec_handler_fn = message_handler_req_exec_cfg_ringreenable, + }, + { /* 1 */ + .exec_handler_fn = message_handler_req_exec_cfg_killnode, + .exec_endian_convert_fn = exec_cfg_killnode_endian_convert + }, + { /* 2 */ + .exec_handler_fn = message_handler_req_exec_cfg_shutdown, + }, + { /* 3 */ + .exec_handler_fn = message_handler_req_exec_cfg_reload_config, + }, + { /* 4 */ + .exec_handler_fn = message_handler_req_exec_cfg_reconfig_crypto, + } +}; + +/* + * Exports the interface for the service + */ +struct corosync_service_engine cfg_service_engine = { + .name = "corosync configuration service", + .id = CFG_SERVICE, + .priority = 1, + .private_data_size = sizeof(struct cfg_info), + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED, + .allow_inquorate = CS_LIB_ALLOW_INQUORATE, + .lib_init_fn = cfg_lib_init_fn, + .lib_exit_fn = cfg_lib_exit_fn, + .lib_engine = cfg_lib_engine, + .lib_engine_count = sizeof (cfg_lib_engine) / sizeof (struct corosync_lib_handler), + .exec_init_fn = cfg_exec_init_fn, + .exec_engine = cfg_exec_engine, + .exec_engine_count = sizeof (cfg_exec_engine) / sizeof (struct corosync_exec_handler), + .confchg_fn = cfg_confchg_fn +}; + +struct corosync_service_engine *cfg_get_service_engine_ver0 (void) +{ + return (&cfg_service_engine); +} + +struct req_exec_cfg_ringreenable { + struct qb_ipc_request_header header __attribute__((aligned(8))); + mar_message_source_t source __attribute__((aligned(8))); +}; + +struct req_exec_cfg_reload_config { + struct qb_ipc_request_header header __attribute__((aligned(8))); + mar_message_source_t source __attribute__((aligned(8))); +}; + +struct req_exec_cfg_crypto_reconfig { + struct qb_ipc_request_header header __attribute__((aligned(8))); + mar_uint32_t phase __attribute__((aligned(8))); +}; + +struct req_exec_cfg_killnode { + struct qb_ipc_request_header header __attribute__((aligned(8))); + mar_uint32_t nodeid __attribute__((aligned(8))); + mar_name_t reason __attribute__((aligned(8))); +}; + +struct req_exec_cfg_shutdown { + struct qb_ipc_request_header header __attribute__((aligned(8))); +}; + +/* IMPL */ + +static char *cfg_exec_init_fn ( + struct corosync_api_v1 *corosync_api_v1) +{ + api = corosync_api_v1; + + qb_list_init(&trackers_list); + return (NULL); +} + +static void cfg_confchg_fn ( + enum totem_configuration_type configuration_type, + const unsigned int *member_list, size_t member_list_entries, + const unsigned int *left_list, size_t left_list_entries, + const unsigned int *joined_list, size_t joined_list_entries, + const struct memb_ring_id *ring_id) +{ +} + +/* + * Tell other nodes we are shutting down + */ +static int send_shutdown(void) +{ + struct req_exec_cfg_shutdown req_exec_cfg_shutdown; + struct iovec iovec; + + ENTER(); + req_exec_cfg_shutdown.header.size = + sizeof (struct req_exec_cfg_shutdown); + req_exec_cfg_shutdown.header.id = SERVICE_ID_MAKE (CFG_SERVICE, + MESSAGE_REQ_EXEC_CFG_SHUTDOWN); + + iovec.iov_base = (char *)&req_exec_cfg_shutdown; + iovec.iov_len = sizeof (struct req_exec_cfg_shutdown); + + assert (api->totem_mcast (&iovec, 1, TOTEM_SAFE) == 0); + + LEAVE(); + return 0; +} + +static void send_test_shutdown(void *only_conn, void *exclude_conn, int status) +{ + struct res_lib_cfg_testshutdown res_lib_cfg_testshutdown; + struct qb_list_head *iter; + + ENTER(); + res_lib_cfg_testshutdown.header.size = sizeof(struct res_lib_cfg_testshutdown); + res_lib_cfg_testshutdown.header.id = MESSAGE_RES_CFG_TESTSHUTDOWN; + res_lib_cfg_testshutdown.header.error = status; + res_lib_cfg_testshutdown.flags = shutdown_flags; + + if (only_conn) { + TRACE1("sending testshutdown to only %p", only_conn); + api->ipc_dispatch_send(only_conn, &res_lib_cfg_testshutdown, + sizeof(res_lib_cfg_testshutdown)); + } else { + qb_list_for_each(iter, &trackers_list) { + struct cfg_info *ci = qb_list_entry(iter, struct cfg_info, list); + + if (ci->conn != exclude_conn) { + TRACE1("sending testshutdown to %p", ci->tracker_conn); + api->ipc_dispatch_send(ci->tracker_conn, &res_lib_cfg_testshutdown, + sizeof(res_lib_cfg_testshutdown)); + } + } + } + LEAVE(); +} + +static void check_shutdown_status(void) +{ + ENTER(); + + /* + * Shutdown client might have gone away + */ + if (!shutdown_con) { + LEAVE(); + return; + } + + /* + * All replies safely gathered in ? + */ + if (shutdown_yes + shutdown_no >= shutdown_expected) { + struct res_lib_cfg_tryshutdown res_lib_cfg_tryshutdown; + + api->timer_delete(shutdown_timer); + + if (shutdown_yes >= shutdown_expected || + shutdown_flags == CFG_SHUTDOWN_FLAG_REGARDLESS) { + TRACE1("shutdown confirmed"); + + res_lib_cfg_tryshutdown.header.size = sizeof(struct res_lib_cfg_tryshutdown); + res_lib_cfg_tryshutdown.header.id = MESSAGE_RES_CFG_TRYSHUTDOWN; + res_lib_cfg_tryshutdown.header.error = CS_OK; + + /* + * Tell originator that shutdown was confirmed + */ + api->ipc_response_send(shutdown_con->conn, &res_lib_cfg_tryshutdown, + sizeof(res_lib_cfg_tryshutdown)); + shutdown_con = NULL; + + /* + * Tell other nodes we are going down + */ + send_shutdown(); + + } + else { + + TRACE1("shutdown cancelled"); + res_lib_cfg_tryshutdown.header.size = sizeof(struct res_lib_cfg_tryshutdown); + res_lib_cfg_tryshutdown.header.id = MESSAGE_RES_CFG_TRYSHUTDOWN; + res_lib_cfg_tryshutdown.header.error = CS_ERR_BUSY; + + /* + * Tell originator that shutdown was cancelled + */ + api->ipc_response_send(shutdown_con->conn, &res_lib_cfg_tryshutdown, + sizeof(res_lib_cfg_tryshutdown)); + shutdown_con = NULL; + } + + log_printf(LOGSYS_LEVEL_DEBUG, "shutdown decision is: (yes count: %d, no count: %d) flags=%x", + shutdown_yes, shutdown_no, shutdown_flags); + } + LEAVE(); +} + + +/* + * Not all nodes responded to the shutdown (in time) + */ +static void shutdown_timer_fn(void *arg) +{ + ENTER(); + + /* + * Mark undecideds as "NO" + */ + shutdown_no = shutdown_expected; + check_shutdown_status(); + + send_test_shutdown(NULL, NULL, CS_ERR_TIMEOUT); + LEAVE(); +} + +static void remove_ci_from_shutdown(struct cfg_info *ci) +{ + ENTER(); + + /* + * If the controlling shutdown process has quit, then cancel the + * shutdown session + */ + if (ci == shutdown_con) { + shutdown_con = NULL; + api->timer_delete(shutdown_timer); + } + + if (!qb_list_empty(&ci->list)) { + qb_list_del(&ci->list); + qb_list_init(&ci->list); + + /* + * Remove our option + */ + if (shutdown_con) { + if (ci->shutdown_reply == SHUTDOWN_REPLY_YES) + shutdown_yes--; + if (ci->shutdown_reply == SHUTDOWN_REPLY_NO) + shutdown_no--; + } + + /* + * If we are leaving, then that's an implicit YES to shutdown + */ + ci->shutdown_reply = SHUTDOWN_REPLY_YES; + shutdown_yes++; + + check_shutdown_status(); + } + LEAVE(); +} + + +int cfg_lib_exit_fn (void *conn) +{ + struct cfg_info *ci = (struct cfg_info *)api->ipc_private_data_get (conn); + + ENTER(); + remove_ci_from_shutdown(ci); + LEAVE(); + return (0); +} + +static int cfg_lib_init_fn (void *conn) +{ + struct cfg_info *ci = (struct cfg_info *)api->ipc_private_data_get (conn); + + ENTER(); + qb_list_init(&ci->list); + LEAVE(); + + return (0); +} + +/* + * Executive message handlers + */ +static void message_handler_req_exec_cfg_ringreenable ( + const void *message, + unsigned int nodeid) +{ + ENTER(); + + LEAVE(); +} + +static void exec_cfg_killnode_endian_convert (void *msg) +{ + struct req_exec_cfg_killnode *req_exec_cfg_killnode = + (struct req_exec_cfg_killnode *)msg; + ENTER(); + + swab_mar_name_t(&req_exec_cfg_killnode->reason); + LEAVE(); +} + + +static void message_handler_req_exec_cfg_killnode ( + const void *message, + unsigned int nodeid) +{ + const struct req_exec_cfg_killnode *req_exec_cfg_killnode = message; + cs_name_t reason; + + ENTER(); + log_printf(LOGSYS_LEVEL_DEBUG, "request to kill node " CS_PRI_NODE_ID " (us=" CS_PRI_NODE_ID ")", + req_exec_cfg_killnode->nodeid, api->totem_nodeid_get()); + if (req_exec_cfg_killnode->nodeid == api->totem_nodeid_get()) { + marshall_from_mar_name_t(&reason, &req_exec_cfg_killnode->reason); + log_printf(LOGSYS_LEVEL_NOTICE, "Killed by node " CS_PRI_NODE_ID " : %s", + nodeid, reason.value); + corosync_fatal_error(COROSYNC_FATAL_ERROR_EXIT); + } + LEAVE(); +} + +/* + * Self shutdown + */ +static void message_handler_req_exec_cfg_shutdown ( + const void *message, + unsigned int nodeid) +{ + ENTER(); + + log_printf(LOGSYS_LEVEL_NOTICE, "Node " CS_PRI_NODE_ID " was shut down by sysadmin", nodeid); + if (nodeid == api->totem_nodeid_get()) { + api->shutdown_request(); + } + LEAVE(); +} + +/* strcmp replacement that can handle NULLs */ +static int nullcheck_strcmp(const char* left, const char *right) +{ + if (!left && right) + return -1; + if (left && !right) + return 1; + + if (!left && !right) + return 0; + + return strcmp(left, right); +} + +/* + * If a key has changed value in the new file, then warn the user and remove it from the temp_map + */ +static void delete_and_notify_if_changed(icmap_map_t temp_map, const char *key_name) +{ + if (!(icmap_key_value_eq(temp_map, key_name, icmap_get_global_map(), key_name))) { + if (icmap_delete_r(temp_map, key_name) == CS_OK) { + log_printf(LOGSYS_LEVEL_NOTICE, "Modified entry '%s' in corosync.conf cannot be changed at run-time", key_name); + } + } +} +/* + * Remove any keys from the new config file that in the new corosync.conf but that + * cannot be changed at run time. A log message will be issued for each + * entry that the user wants to change but they cannot. + * + * Add more here as needed. + */ +static void remove_ro_entries(icmap_map_t temp_map) +{ +#ifndef HAVE_KNET_CRYPTO_RECONF + delete_and_notify_if_changed(temp_map, "totem.secauth"); + delete_and_notify_if_changed(temp_map, "totem.crypto_hash"); + delete_and_notify_if_changed(temp_map, "totem.crypto_cipher"); + delete_and_notify_if_changed(temp_map, "totem.keyfile"); + delete_and_notify_if_changed(temp_map, "totem.key"); +#endif + delete_and_notify_if_changed(temp_map, "totem.version"); + delete_and_notify_if_changed(temp_map, "totem.threads"); + delete_and_notify_if_changed(temp_map, "totem.ip_version"); + delete_and_notify_if_changed(temp_map, "totem.rrp_mode"); + delete_and_notify_if_changed(temp_map, "totem.netmtu"); + delete_and_notify_if_changed(temp_map, "totem.interface.ringnumber"); + delete_and_notify_if_changed(temp_map, "totem.interface.bindnetaddr"); + delete_and_notify_if_changed(temp_map, "totem.interface.mcastaddr"); + delete_and_notify_if_changed(temp_map, "totem.interface.broadcast"); + delete_and_notify_if_changed(temp_map, "totem.interface.mcastport"); + delete_and_notify_if_changed(temp_map, "totem.interface.ttl"); + delete_and_notify_if_changed(temp_map, "totem.transport"); + delete_and_notify_if_changed(temp_map, "totem.cluster_name"); + delete_and_notify_if_changed(temp_map, "quorum.provider"); + delete_and_notify_if_changed(temp_map, "system.move_to_root_cgroup"); + delete_and_notify_if_changed(temp_map, "system.allow_knet_handle_fallback"); + delete_and_notify_if_changed(temp_map, "system.sched_rr"); + delete_and_notify_if_changed(temp_map, "system.priority"); + delete_and_notify_if_changed(temp_map, "system.qb_ipc_type"); + delete_and_notify_if_changed(temp_map, "system.state_dir"); +} + +/* + * Remove entries that exist in the global map, but not in the temp_map, this will + * cause delete notifications to be sent to any listeners. + * + * NOTE: This routine depends entirely on the keys returned by the iterators + * being in alpha-sorted order. + */ +static void remove_deleted_entries(icmap_map_t temp_map, const char *prefix) +{ + icmap_iter_t old_iter; + icmap_iter_t new_iter; + const char *old_key, *new_key; + int ret; + + old_iter = icmap_iter_init(prefix); + new_iter = icmap_iter_init_r(temp_map, prefix); + + old_key = icmap_iter_next(old_iter, NULL, NULL); + new_key = icmap_iter_next(new_iter, NULL, NULL); + + while (old_key || new_key) { + ret = nullcheck_strcmp(old_key, new_key); + if ((ret < 0 && old_key) || !new_key) { + /* + * new_key is greater, a line (or more) has been deleted + * Continue until old is >= new + */ + do { + /* Remove it from icmap & send notifications */ + icmap_delete(old_key); + + old_key = icmap_iter_next(old_iter, NULL, NULL); + ret = nullcheck_strcmp(old_key, new_key); + } while (ret < 0 && old_key); + } + else if ((ret > 0 && new_key) || !old_key) { + /* + * old_key is greater, a line (or more) has been added + * Continue until new is >= old + * + * we don't need to do anything special with this like tell + * icmap. That will happen when we copy the values over + */ + do { + new_key = icmap_iter_next(new_iter, NULL, NULL); + ret = nullcheck_strcmp(old_key, new_key); + } while (ret > 0 && new_key); + } + if (ret == 0) { + new_key = icmap_iter_next(new_iter, NULL, NULL); + old_key = icmap_iter_next(old_iter, NULL, NULL); + } + } + icmap_iter_finalize(new_iter); + icmap_iter_finalize(old_iter); +} + +/* + * Reload configuration file + */ +static void message_handler_req_exec_cfg_reload_config ( + const void *message, + unsigned int nodeid) +{ + const struct req_exec_cfg_reload_config *req_exec_cfg_reload_config = message; + struct res_lib_cfg_reload_config res_lib_cfg_reload_config; + struct totem_config new_config; + icmap_map_t temp_map; + const char *error_string; + int res = CS_OK; + + ENTER(); + + log_printf(LOGSYS_LEVEL_NOTICE, "Config reload requested by node " CS_PRI_NODE_ID, nodeid); + + icmap_set_uint8("config.totemconfig_reload_in_progress", 1); + + /* Make sure there is no rubbish in this that might be checked, even on error */ + memset(&new_config, 0, sizeof(new_config)); + /* + * Set up a new hashtable as a staging area. + */ + if ((res = icmap_init_r(&temp_map)) != CS_OK) { + log_printf(LOGSYS_LEVEL_ERROR, "Unable to create temporary icmap. config file reload cancelled\n"); + goto reload_fini_nomap; + } + + /* + * Load new config into the temporary map + */ + res = coroparse_configparse(temp_map, &error_string); + if (res == -1) { + log_printf (LOGSYS_LEVEL_ERROR, "Unable to reload config file: %s", error_string); + res = CS_ERR_INVALID_PARAM; + goto reload_fini_nofree; + } + + /* Signal start of the reload process */ + icmap_set_uint8("config.reload_in_progress", 1); + + /* Detect deleted entries and remove them from the main icmap hashtable */ + remove_deleted_entries(temp_map, "logging."); + remove_deleted_entries(temp_map, "totem."); + remove_deleted_entries(temp_map, "nodelist."); + remove_deleted_entries(temp_map, "quorum."); + remove_deleted_entries(temp_map, "uidgid.config."); + remove_deleted_entries(temp_map, "nozzle."); + + /* Remove entries that cannot be changed */ + remove_ro_entries(temp_map); + + /* Take a copy of the current setup so we can check what has changed */ + memset(&new_config, 0, sizeof(new_config)); + new_config.orig_interfaces = malloc (sizeof (struct totem_interface) * INTERFACE_MAX); + assert(new_config.orig_interfaces != NULL); + + totempg_get_config(&new_config); + new_config.crypto_changed = 0; + + new_config.interfaces = malloc (sizeof (struct totem_interface) * INTERFACE_MAX); + assert(new_config.interfaces != NULL); + memset(new_config.interfaces, 0, sizeof (struct totem_interface) * INTERFACE_MAX); + + /* For UDP[U] the configuration on link0 is static (apart from the nodelist) and only read at + startup. So preserve it here */ + if ( (new_config.transport_number == TOTEM_TRANSPORT_UDP) || + (new_config.transport_number == TOTEM_TRANSPORT_UDPU)) { + memcpy(&new_config.interfaces[0], &new_config.orig_interfaces[0], + sizeof(struct totem_interface)); + } + + /* Calculate new node and interface definitions */ + if (totemconfig_configure_new_params(&new_config, temp_map, &error_string) == -1) { + log_printf (LOGSYS_LEVEL_ERROR, "Cannot configure new interface definitions: %s\n", error_string); + res = CS_ERR_INVALID_PARAM; + goto reload_fini; + } + + /* Read from temp_map into new_config */ + totem_volatile_config_read(&new_config, temp_map, NULL); + + /* Get updated crypto parameters. Will set a flag in new_config if things have changed */ + if (totem_reread_crypto_config(&new_config, temp_map, &error_string) == -1) { + log_printf (LOGSYS_LEVEL_ERROR, "Crypto configuration is not valid: %s\n", error_string); + res = CS_ERR_INVALID_PARAM; + goto reload_fini; + } + + /* Validate dynamic parameters */ + if (totem_volatile_config_validate(&new_config, temp_map, &error_string) == -1) { + log_printf (LOGSYS_LEVEL_ERROR, "Configuration is not valid: %s\n", error_string); + res = CS_ERR_INVALID_PARAM; + goto reload_fini; + } + + /* Save this here so we can get at it for the later phases of crypto change */ + if (new_config.crypto_changed) { +#ifndef HAVE_KNET_CRYPTO_RECONF + new_config.crypto_changed = 0; + log_printf (LOGSYS_LEVEL_ERROR, "Crypto reconfiguration is not supported by the linked version of knet\n"); + res = CS_ERR_INVALID_PARAM; + goto reload_fini; +#endif + } + + /* + * Copy new keys into live config. + */ + if ( (res = icmap_copy_map(icmap_get_global_map(), temp_map)) != CS_OK) { + log_printf (LOGSYS_LEVEL_ERROR, "Error making new config live. cmap database may be inconsistent\n"); + /* Return res from icmap */ + goto reload_fini; + } + + /* Copy into live system */ + totempg_put_config(&new_config); + totemconfig_commit_new_params(&new_config, temp_map); + free(new_config.interfaces); + +reload_fini: + /* All done - let clients know */ + icmap_set_int32("config.reload_status", res); + icmap_set_uint8("config.totemconfig_reload_in_progress", 0); + icmap_set_uint8("config.reload_in_progress", 0); + + /* Finished with the temporary storage */ + free(new_config.orig_interfaces); + +reload_fini_nofree: + icmap_fini_r(temp_map); + +reload_fini_nomap: + + /* If crypto was changed, now it's loaded on all nodes we can enable it. + * Each node sends its own PHASE message so we're not relying on the leader + * node to survive the transition + */ + if (new_config.crypto_changed) { + struct req_exec_cfg_crypto_reconfig req_exec_cfg_crypto_reconfig; + struct iovec iovec; + + req_exec_cfg_crypto_reconfig.header.size = + sizeof (struct req_exec_cfg_crypto_reconfig); + req_exec_cfg_crypto_reconfig.header.id = SERVICE_ID_MAKE (CFG_SERVICE, + MESSAGE_REQ_EXEC_CFG_CRYPTO_RECONFIG); + req_exec_cfg_crypto_reconfig.phase = CRYPTO_RECONFIG_PHASE_ACTIVATE; + + iovec.iov_base = (char *)&req_exec_cfg_crypto_reconfig; + iovec.iov_len = sizeof (struct req_exec_cfg_crypto_reconfig); + + assert (api->totem_mcast (&iovec, 1, TOTEM_SAFE) == 0); + } + + /* All done, return result to the caller if it was on this system */ + if (nodeid == api->totem_nodeid_get()) { + res_lib_cfg_reload_config.header.size = sizeof(res_lib_cfg_reload_config); + res_lib_cfg_reload_config.header.id = MESSAGE_RES_CFG_RELOAD_CONFIG; + res_lib_cfg_reload_config.header.error = res; + api->ipc_response_send(req_exec_cfg_reload_config->source.conn, + &res_lib_cfg_reload_config, + sizeof(res_lib_cfg_reload_config)); + api->ipc_refcnt_dec(req_exec_cfg_reload_config->source.conn);; + } + + LEAVE(); +} + +/* Handle the phases of crypto reload + * The first time we are called is after the new crypto config has been loaded + * but not activated. + * + * 1 - activate the new crypto configuration + * 2 - clear out the old configuration + */ +static void message_handler_req_exec_cfg_reconfig_crypto ( + const void *message, + unsigned int nodeid) +{ + const struct req_exec_cfg_crypto_reconfig *req_exec_cfg_crypto_reconfig = message; + + /* Got our own reconfig message */ + if (nodeid == api->totem_nodeid_get()) { + log_printf (LOGSYS_LEVEL_DEBUG, "Crypto reconfiguration phase %d", req_exec_cfg_crypto_reconfig->phase); + + /* Do the deed */ + totempg_crypto_reconfigure_phase(req_exec_cfg_crypto_reconfig->phase); + + /* Move to the next phase if not finished */ + if (req_exec_cfg_crypto_reconfig->phase < CRYPTO_RECONFIG_PHASE_CLEANUP) { + struct req_exec_cfg_crypto_reconfig req_exec_cfg_crypto_reconfig2; + struct iovec iovec; + + req_exec_cfg_crypto_reconfig2.header.size = + sizeof (struct req_exec_cfg_crypto_reconfig); + req_exec_cfg_crypto_reconfig2.header.id = SERVICE_ID_MAKE (CFG_SERVICE, + MESSAGE_REQ_EXEC_CFG_CRYPTO_RECONFIG); + req_exec_cfg_crypto_reconfig2.phase = CRYPTO_RECONFIG_PHASE_CLEANUP; + + iovec.iov_base = (char *)&req_exec_cfg_crypto_reconfig2; + iovec.iov_len = sizeof (struct req_exec_cfg_crypto_reconfig); + + assert (api->totem_mcast (&iovec, 1, TOTEM_SAFE) == 0); + } + } +} + + +/* + * Library Interface Implementation + */ +static void message_handler_req_lib_cfg_ringstatusget ( + void *conn, + const void *msg) +{ + struct res_lib_cfg_ringstatusget res_lib_cfg_ringstatusget; + struct totem_ip_address interfaces[INTERFACE_MAX]; + unsigned int iface_count; + char **status; + const char *totem_ip_string; + char ifname[CFG_INTERFACE_NAME_MAX_LEN]; + unsigned int iface_ids[INTERFACE_MAX]; + unsigned int i; + cs_error_t res = CS_OK; + + ENTER(); + + res_lib_cfg_ringstatusget.header.id = MESSAGE_RES_CFG_RINGSTATUSGET; + res_lib_cfg_ringstatusget.header.size = sizeof (struct res_lib_cfg_ringstatusget); + + api->totem_ifaces_get ( + api->totem_nodeid_get(), + iface_ids, + interfaces, + INTERFACE_MAX, + &status, + &iface_count); + + assert(iface_count <= CFG_MAX_INTERFACES); + + res_lib_cfg_ringstatusget.interface_count = iface_count; + + for (i = 0; i < iface_count; i++) { + totem_ip_string + = (const char *)api->totem_ip_print (&interfaces[i]); + + if (!totem_ip_string) { + totem_ip_string=""; + } + + /* Allow for i/f number at the start */ + if (strlen(totem_ip_string) >= CFG_INTERFACE_NAME_MAX_LEN-3) { + log_printf(LOGSYS_LEVEL_ERROR, "String representation of interface %u is too long", i); + res = CS_ERR_NAME_TOO_LONG; + goto send_response; + } + snprintf(ifname, sizeof(ifname), "%d %s", iface_ids[i], totem_ip_string); + + if (strlen(status[i]) >= CFG_INTERFACE_STATUS_MAX_LEN) { + log_printf(LOGSYS_LEVEL_ERROR, "Status string for interface %u is too long", i); + res = CS_ERR_NAME_TOO_LONG; + goto send_response; + } + + strcpy ((char *)&res_lib_cfg_ringstatusget.interface_status[i], + status[i]); + strcpy ((char *)&res_lib_cfg_ringstatusget.interface_name[i], + ifname); + } + +send_response: + res_lib_cfg_ringstatusget.header.error = res; + api->ipc_response_send ( + conn, + &res_lib_cfg_ringstatusget, + sizeof (struct res_lib_cfg_ringstatusget)); + + LEAVE(); +} + + +static void message_handler_req_lib_cfg_nodestatusget ( + void *conn, + const void *msg) +{ + struct res_lib_cfg_nodestatusget_version res_lib_cfg_nodestatusget_version; + struct res_lib_cfg_nodestatusget_v1 res_lib_cfg_nodestatusget_v1; + void *res_lib_cfg_nodestatusget_ptr = NULL; + size_t res_lib_cfg_nodestatusget_size; + struct req_lib_cfg_nodestatusget *req_lib_cfg_nodestatusget = (struct req_lib_cfg_nodestatusget *)msg; + struct totem_node_status node_status; + int i; + + ENTER(); + + memset(&node_status, 0, sizeof(node_status)); + if (totempg_nodestatus_get(req_lib_cfg_nodestatusget->nodeid, &node_status) != 0) { + res_lib_cfg_nodestatusget_ptr = &res_lib_cfg_nodestatusget_version; + res_lib_cfg_nodestatusget_size = sizeof(res_lib_cfg_nodestatusget_version); + + res_lib_cfg_nodestatusget_version.header.error = CS_ERR_FAILED_OPERATION; + res_lib_cfg_nodestatusget_version.header.id = MESSAGE_RES_CFG_NODESTATUSGET; + res_lib_cfg_nodestatusget_version.header.size = res_lib_cfg_nodestatusget_size; + + goto ipc_response_send; + } + + /* Currently only one structure version supported */ + switch (req_lib_cfg_nodestatusget->version) { + case CFG_NODE_STATUS_V1: + res_lib_cfg_nodestatusget_ptr = &res_lib_cfg_nodestatusget_v1; + res_lib_cfg_nodestatusget_size = sizeof(res_lib_cfg_nodestatusget_v1); + + res_lib_cfg_nodestatusget_v1.header.error = CS_OK; + res_lib_cfg_nodestatusget_v1.header.id = MESSAGE_RES_CFG_NODESTATUSGET; + res_lib_cfg_nodestatusget_v1.header.size = res_lib_cfg_nodestatusget_size; + + res_lib_cfg_nodestatusget_v1.node_status.version = CFG_NODE_STATUS_V1; + res_lib_cfg_nodestatusget_v1.node_status.nodeid = req_lib_cfg_nodestatusget->nodeid; + res_lib_cfg_nodestatusget_v1.node_status.reachable = node_status.reachable; + res_lib_cfg_nodestatusget_v1.node_status.remote = node_status.remote; + res_lib_cfg_nodestatusget_v1.node_status.external = node_status.external; + res_lib_cfg_nodestatusget_v1.node_status.onwire_min = node_status.onwire_min; + res_lib_cfg_nodestatusget_v1.node_status.onwire_max = node_status.onwire_max; + res_lib_cfg_nodestatusget_v1.node_status.onwire_ver = node_status.onwire_ver; + + for (i=0; i < KNET_MAX_LINK; i++) { + res_lib_cfg_nodestatusget_v1.node_status.link_status[i].enabled = node_status.link_status[i].enabled; + res_lib_cfg_nodestatusget_v1.node_status.link_status[i].connected = node_status.link_status[i].connected; + res_lib_cfg_nodestatusget_v1.node_status.link_status[i].dynconnected = node_status.link_status[i].dynconnected; + res_lib_cfg_nodestatusget_v1.node_status.link_status[i].mtu = node_status.link_status[i].mtu; + memcpy(res_lib_cfg_nodestatusget_v1.node_status.link_status[i].src_ipaddr, + node_status.link_status[i].src_ipaddr, CFG_MAX_HOST_LEN); + memcpy(res_lib_cfg_nodestatusget_v1.node_status.link_status[i].dst_ipaddr, + node_status.link_status[i].dst_ipaddr, CFG_MAX_HOST_LEN); + } + break; + default: + /* + * Unsupported version requested + */ + res_lib_cfg_nodestatusget_ptr = &res_lib_cfg_nodestatusget_version; + res_lib_cfg_nodestatusget_size = sizeof(res_lib_cfg_nodestatusget_version); + + res_lib_cfg_nodestatusget_version.header.error = CS_ERR_NOT_SUPPORTED; + res_lib_cfg_nodestatusget_version.header.id = MESSAGE_RES_CFG_NODESTATUSGET; + res_lib_cfg_nodestatusget_version.header.size = res_lib_cfg_nodestatusget_size; + break; + } + +ipc_response_send: + api->ipc_response_send ( + conn, + res_lib_cfg_nodestatusget_ptr, + res_lib_cfg_nodestatusget_size); + + LEAVE(); +} + +static void message_handler_req_lib_cfg_trackstart ( + void *conn, + const void *msg) +{ + struct cfg_info *ci = (struct cfg_info *)api->ipc_private_data_get (conn); + struct res_lib_cfg_trackstart res_lib_cfg_trackstart; + + ENTER(); + + /* + * We only do shutdown tracking at the moment + */ + if (qb_list_empty(&ci->list)) { + qb_list_add(&ci->list, &trackers_list); + ci->tracker_conn = conn; + + if (shutdown_con) { + /* + * Shutdown already in progress, ask the newcomer's opinion + */ + ci->shutdown_reply = SHUTDOWN_REPLY_UNKNOWN; + shutdown_expected++; + send_test_shutdown(conn, NULL, CS_OK); + } + } + + res_lib_cfg_trackstart.header.size = sizeof(struct res_lib_cfg_trackstart); + res_lib_cfg_trackstart.header.id = MESSAGE_RES_CFG_STATETRACKSTART; + res_lib_cfg_trackstart.header.error = CS_OK; + + api->ipc_response_send(conn, &res_lib_cfg_trackstart, + sizeof(res_lib_cfg_trackstart)); + + LEAVE(); +} + +static void message_handler_req_lib_cfg_trackstop ( + void *conn, + const void *msg) +{ + struct cfg_info *ci = (struct cfg_info *)api->ipc_private_data_get (conn); + struct res_lib_cfg_trackstop res_lib_cfg_trackstop; + + ENTER(); + remove_ci_from_shutdown(ci); + + res_lib_cfg_trackstop.header.size = sizeof(struct res_lib_cfg_trackstop); + res_lib_cfg_trackstop.header.id = MESSAGE_RES_CFG_STATETRACKSTOP; + res_lib_cfg_trackstop.header.error = CS_OK; + + api->ipc_response_send(conn, &res_lib_cfg_trackstop, + sizeof(res_lib_cfg_trackstop)); + LEAVE(); +} + +static void message_handler_req_lib_cfg_ringreenable ( + void *conn, + const void *msg) +{ + struct res_lib_cfg_ringreenable res_lib_cfg_ringreenable; + ENTER(); + + res_lib_cfg_ringreenable.header.id = MESSAGE_RES_CFG_RINGREENABLE; + res_lib_cfg_ringreenable.header.size = sizeof (struct res_lib_cfg_ringreenable); + res_lib_cfg_ringreenable.header.error = CS_ERR_NOT_SUPPORTED; + api->ipc_response_send ( + conn, &res_lib_cfg_ringreenable, + sizeof (struct res_lib_cfg_ringreenable)); + + LEAVE(); +} + +static void message_handler_req_lib_cfg_killnode ( + void *conn, + const void *msg) +{ + const struct req_lib_cfg_killnode *req_lib_cfg_killnode = msg; + struct res_lib_cfg_killnode res_lib_cfg_killnode; + struct req_exec_cfg_killnode req_exec_cfg_killnode; + struct iovec iovec; + char key_name[ICMAP_KEYNAME_MAXLEN]; + char tmp_key[ICMAP_KEYNAME_MAXLEN + 1]; + icmap_map_t map; + icmap_iter_t iter; + const char *iter_key; + uint32_t nodeid; + char *status_str = NULL; + int match_nodeid_flag = 0; + cs_error_t error = CS_OK; + + ENTER(); + + map = icmap_get_global_map(); + iter = icmap_iter_init_r(map, "runtime.members."); + while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { + if (sscanf(iter_key, "runtime.members.%u.%s", &nodeid, key_name) != 2) { + continue; + } + if (strcmp(key_name, "status") != 0) { + continue; + } + if (nodeid != req_lib_cfg_killnode->nodeid) { + continue; + } + match_nodeid_flag = 1; + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "runtime.members.%u.status", nodeid); + if (icmap_get_string_r(map, tmp_key, &status_str) != CS_OK) { + error = CS_ERR_LIBRARY; + goto send_response; + } + if (strcmp(status_str, "joined") != 0) { + error = CS_ERR_NOT_EXIST; + goto send_response; + } + break; + } + + if (!match_nodeid_flag) { + error = CS_ERR_NOT_EXIST; + goto send_response; + } + + req_exec_cfg_killnode.header.size = + sizeof (struct req_exec_cfg_killnode); + req_exec_cfg_killnode.header.id = SERVICE_ID_MAKE (CFG_SERVICE, + MESSAGE_REQ_EXEC_CFG_KILLNODE); + req_exec_cfg_killnode.nodeid = req_lib_cfg_killnode->nodeid; + marshall_to_mar_name_t(&req_exec_cfg_killnode.reason, &req_lib_cfg_killnode->reason); + + iovec.iov_base = (char *)&req_exec_cfg_killnode; + iovec.iov_len = sizeof (struct req_exec_cfg_killnode); + + (void)api->totem_mcast (&iovec, 1, TOTEM_SAFE); + +send_response: + res_lib_cfg_killnode.header.size = sizeof(struct res_lib_cfg_killnode); + res_lib_cfg_killnode.header.id = MESSAGE_RES_CFG_KILLNODE; + res_lib_cfg_killnode.header.error = error; + + api->ipc_response_send(conn, &res_lib_cfg_killnode, + sizeof(res_lib_cfg_killnode)); + + free(status_str); + icmap_iter_finalize(iter); + LEAVE(); +} + + +static void message_handler_req_lib_cfg_tryshutdown ( + void *conn, + const void *msg) +{ + struct cfg_info *ci = (struct cfg_info *)api->ipc_private_data_get (conn); + const struct req_lib_cfg_tryshutdown *req_lib_cfg_tryshutdown = msg; + struct qb_list_head *iter; + + ENTER(); + + if (req_lib_cfg_tryshutdown->flags == CFG_SHUTDOWN_FLAG_IMMEDIATE) { + struct res_lib_cfg_tryshutdown res_lib_cfg_tryshutdown; + + /* + * Tell other nodes + */ + send_shutdown(); + + res_lib_cfg_tryshutdown.header.size = sizeof(struct res_lib_cfg_tryshutdown); + res_lib_cfg_tryshutdown.header.id = MESSAGE_RES_CFG_TRYSHUTDOWN; + res_lib_cfg_tryshutdown.header.error = CS_OK; + api->ipc_response_send(conn, &res_lib_cfg_tryshutdown, + sizeof(res_lib_cfg_tryshutdown)); + + LEAVE(); + return; + } + + /* + * Shutdown in progress, return an error + */ + if (shutdown_con) { + struct res_lib_cfg_tryshutdown res_lib_cfg_tryshutdown; + + res_lib_cfg_tryshutdown.header.size = sizeof(struct res_lib_cfg_tryshutdown); + res_lib_cfg_tryshutdown.header.id = MESSAGE_RES_CFG_TRYSHUTDOWN; + res_lib_cfg_tryshutdown.header.error = CS_ERR_EXIST; + + api->ipc_response_send(conn, &res_lib_cfg_tryshutdown, + sizeof(res_lib_cfg_tryshutdown)); + + + LEAVE(); + + return; + } + + ci->conn = conn; + shutdown_con = (struct cfg_info *)api->ipc_private_data_get (conn); + shutdown_flags = req_lib_cfg_tryshutdown->flags; + shutdown_yes = 0; + shutdown_no = 0; + + /* + * Count the number of listeners + */ + shutdown_expected = 0; + + qb_list_for_each(iter, &trackers_list) { + struct cfg_info *testci = qb_list_entry(iter, struct cfg_info, list); + /* + * It is assumed that we will allow shutdown + */ + if (testci != ci) { + testci->shutdown_reply = SHUTDOWN_REPLY_UNKNOWN; + shutdown_expected++; + } + } + + /* + * If no-one is listening for events then we can just go down now + */ + if (shutdown_expected == 0) { + struct res_lib_cfg_tryshutdown res_lib_cfg_tryshutdown; + + res_lib_cfg_tryshutdown.header.size = sizeof(struct res_lib_cfg_tryshutdown); + res_lib_cfg_tryshutdown.header.id = MESSAGE_RES_CFG_TRYSHUTDOWN; + res_lib_cfg_tryshutdown.header.error = CS_OK; + + /* + * Tell originator that shutdown was confirmed + */ + api->ipc_response_send(conn, &res_lib_cfg_tryshutdown, + sizeof(res_lib_cfg_tryshutdown)); + + send_shutdown(); + LEAVE(); + return; + } + else { + unsigned int shutdown_timeout = DEFAULT_SHUTDOWN_TIMEOUT; + + /* + * Look for a shutdown timeout in configuration map + */ + icmap_get_uint32("cfg.shutdown_timeout", &shutdown_timeout); + + /* + * Start the timer. If we don't get a full set of replies before this goes + * off we'll cancel the shutdown + */ + api->timer_add_duration((unsigned long long)shutdown_timeout*QB_TIME_NS_IN_MSEC, NULL, + shutdown_timer_fn, &shutdown_timer); + + /* + * Tell the users we would like to shut down + */ + send_test_shutdown(NULL, conn, CS_OK); + } + + /* + * We don't sent a reply to the caller here. + * We send it when we know if we can shut down or not + */ + + LEAVE(); +} + +static void message_handler_req_lib_cfg_replytoshutdown ( + void *conn, + const void *msg) +{ + struct cfg_info *ci = (struct cfg_info *)api->ipc_private_data_get (conn); + const struct req_lib_cfg_replytoshutdown *req_lib_cfg_replytoshutdown = msg; + struct res_lib_cfg_replytoshutdown res_lib_cfg_replytoshutdown; + int status = CS_OK; + + ENTER(); + if (!shutdown_con) { + status = CS_ERR_ACCESS; + goto exit_fn; + } + + if (req_lib_cfg_replytoshutdown->response) { + shutdown_yes++; + ci->shutdown_reply = SHUTDOWN_REPLY_YES; + } + else { + shutdown_no++; + ci->shutdown_reply = SHUTDOWN_REPLY_NO; + } + check_shutdown_status(); + +exit_fn: + res_lib_cfg_replytoshutdown.header.error = status; + res_lib_cfg_replytoshutdown.header.id = MESSAGE_RES_CFG_REPLYTOSHUTDOWN; + res_lib_cfg_replytoshutdown.header.size = sizeof(res_lib_cfg_replytoshutdown); + + api->ipc_response_send(conn, &res_lib_cfg_replytoshutdown, + sizeof(res_lib_cfg_replytoshutdown)); + + LEAVE(); +} + +static void message_handler_req_lib_cfg_get_node_addrs (void *conn, + const void *msg) +{ + struct totem_ip_address node_ifs[INTERFACE_MAX]; + unsigned int iface_ids[INTERFACE_MAX]; + char buf[PIPE_BUF]; + char **status; + unsigned int num_interfaces = 0; + struct sockaddr_storage *ss; + int ret = CS_OK; + int i; + int live_addrs = 0; + const struct req_lib_cfg_get_node_addrs *req_lib_cfg_get_node_addrs = msg; + struct res_lib_cfg_get_node_addrs *res_lib_cfg_get_node_addrs = (struct res_lib_cfg_get_node_addrs *)buf; + unsigned int nodeid = req_lib_cfg_get_node_addrs->nodeid; + char *addr_buf; + + if (nodeid == 0) + nodeid = api->totem_nodeid_get(); + + if (api->totem_ifaces_get(nodeid, iface_ids, node_ifs, INTERFACE_MAX, &status, &num_interfaces)) { + ret = CS_ERR_EXIST; + num_interfaces = 0; + } + + res_lib_cfg_get_node_addrs->header.size = sizeof(struct res_lib_cfg_get_node_addrs) + (num_interfaces * TOTEMIP_ADDRLEN); + res_lib_cfg_get_node_addrs->header.id = MESSAGE_RES_CFG_GET_NODE_ADDRS; + res_lib_cfg_get_node_addrs->header.error = ret; + if (num_interfaces) { + res_lib_cfg_get_node_addrs->family = node_ifs[0].family; + for (i = 0, addr_buf = (char *)res_lib_cfg_get_node_addrs->addrs; + i < num_interfaces; i++) { + ss = (struct sockaddr_storage *)&node_ifs[i].addr; + if (ss->ss_family) { + memcpy(addr_buf, node_ifs[i].addr, TOTEMIP_ADDRLEN); + live_addrs++; + addr_buf += TOTEMIP_ADDRLEN; + } + } + res_lib_cfg_get_node_addrs->num_addrs = live_addrs; + } else { + res_lib_cfg_get_node_addrs->header.error = CS_ERR_NOT_EXIST; + } + api->ipc_response_send(conn, res_lib_cfg_get_node_addrs, res_lib_cfg_get_node_addrs->header.size); +} + +static void message_handler_req_lib_cfg_local_get (void *conn, const void *msg) +{ + struct res_lib_cfg_local_get res_lib_cfg_local_get; + + res_lib_cfg_local_get.header.size = sizeof(res_lib_cfg_local_get); + res_lib_cfg_local_get.header.id = MESSAGE_RES_CFG_LOCAL_GET; + res_lib_cfg_local_get.header.error = CS_OK; + res_lib_cfg_local_get.local_nodeid = api->totem_nodeid_get (); + + api->ipc_response_send(conn, &res_lib_cfg_local_get, + sizeof(res_lib_cfg_local_get)); +} + +static void message_handler_req_lib_cfg_reload_config (void *conn, const void *msg) +{ + struct req_exec_cfg_reload_config req_exec_cfg_reload_config; + struct iovec iovec; + + ENTER(); + + req_exec_cfg_reload_config.header.size = + sizeof (struct req_exec_cfg_reload_config); + req_exec_cfg_reload_config.header.id = SERVICE_ID_MAKE (CFG_SERVICE, + MESSAGE_REQ_EXEC_CFG_RELOAD_CONFIG); + api->ipc_source_set (&req_exec_cfg_reload_config.source, conn); + api->ipc_refcnt_inc(conn); + + iovec.iov_base = (char *)&req_exec_cfg_reload_config; + iovec.iov_len = sizeof (struct req_exec_cfg_reload_config); + + assert (api->totem_mcast (&iovec, 1, TOTEM_SAFE) == 0); + + LEAVE(); +} + +static void message_handler_req_lib_cfg_reopen_log_files (void *conn, const void *msg) +{ + struct res_lib_cfg_reopen_log_files res_lib_cfg_reopen_log_files; + cs_error_t res; + + ENTER(); + + log_printf(LOGSYS_LEVEL_DEBUG, "Reopening logging files\n"); + + res = logsys_reopen_log_files(); + + res_lib_cfg_reopen_log_files.header.size = sizeof(res_lib_cfg_reopen_log_files); + res_lib_cfg_reopen_log_files.header.id = MESSAGE_RES_CFG_REOPEN_LOG_FILES; + res_lib_cfg_reopen_log_files.header.error = res; + api->ipc_response_send(conn, + &res_lib_cfg_reopen_log_files, + sizeof(res_lib_cfg_reopen_log_files)); + + LEAVE(); +} diff --git a/exec/cmap.c b/exec/cmap.c new file mode 100644 index 0000000..1d1b69e --- /dev/null +++ b/exec/cmap.c @@ -0,0 +1,1155 @@ +/* + * Copyright (c) 2011-2017 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Jan Friesse (jfriesse@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the Red Hat, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <config.h> + +#include <sys/types.h> +#include <unistd.h> +#include <fcntl.h> +#include <stdlib.h> +#include <errno.h> +#include <poll.h> +#include <assert.h> + +#include <qb/qbloop.h> +#include <qb/qblist.h> +#include <qb/qbipcs.h> +#include <qb/qbipc_common.h> + +#include <corosync/corotypes.h> +#include <corosync/corodefs.h> +#include <corosync/mar_gen.h> +#include <corosync/ipc_cmap.h> +#include <corosync/logsys.h> +#include <corosync/coroapi.h> +#include <corosync/icmap.h> + +#include "service.h" +#include "ipcs_stats.h" +#include "stats.h" + +LOGSYS_DECLARE_SUBSYS ("CMAP"); + +#define MAX_REQ_EXEC_CMAP_MCAST_ITEMS 32 +#define ICMAP_VALUETYPE_NOT_EXIST 0 + +struct cmap_map { + cs_error_t (*map_get)(const char *key_name, + void *value, + size_t *value_len, + icmap_value_types_t *type); + + cs_error_t (*map_set)(const char *key_name, + const void *value, + size_t value_len, + icmap_value_types_t type); + + cs_error_t (*map_adjust_int)(const char *key_name, int32_t step); + + cs_error_t (*map_delete)(const char *key_name); + + int (*map_is_key_ro)(const char *key_name); + + icmap_iter_t (*map_iter_init)(const char *prefix); + const char * (*map_iter_next)(icmap_iter_t iter, size_t *value_len, icmap_value_types_t *type); + void (*map_iter_finalize)(icmap_iter_t iter); + + cs_error_t (*map_track_add)(const char *key_name, + int32_t track_type, + icmap_notify_fn_t notify_fn, + void *user_data, + icmap_track_t *icmap_track); + + cs_error_t (*map_track_delete)(icmap_track_t icmap_track); + void * (*map_track_get_user_data)(icmap_track_t icmap_track); +}; + +struct cmap_map icmap_map = { + .map_get = icmap_get, + .map_set = icmap_set, + .map_adjust_int = icmap_adjust_int, + .map_delete = icmap_delete, + .map_is_key_ro = icmap_is_key_ro, + .map_iter_init = icmap_iter_init, + .map_iter_next = icmap_iter_next, + .map_iter_finalize = icmap_iter_finalize, + .map_track_add = icmap_track_add, + .map_track_delete = icmap_track_delete, + .map_track_get_user_data = icmap_track_get_user_data, +}; + +struct cmap_map stats_map = { + .map_get = stats_map_get, + .map_set = stats_map_set, + .map_adjust_int = stats_map_adjust_int, + .map_delete = stats_map_delete, + .map_is_key_ro = stats_map_is_key_ro, + .map_iter_init = stats_map_iter_init, + .map_iter_next = stats_map_iter_next, + .map_iter_finalize = stats_map_iter_finalize, + .map_track_add = stats_map_track_add, + .map_track_delete = stats_map_track_delete, + .map_track_get_user_data = stats_map_track_get_user_data, +}; + +struct cmap_conn_info { + struct hdb_handle_database iter_db; + struct hdb_handle_database track_db; + struct cmap_map map_fns; +}; + +typedef uint64_t cmap_iter_handle_t; +typedef uint64_t cmap_track_handle_t; + +struct cmap_track_user_data { + void *conn; + cmap_track_handle_t track_handle; + uint64_t track_inst_handle; +}; + +enum cmap_message_req_types { + MESSAGE_REQ_EXEC_CMAP_MCAST = 0, +}; + +enum cmap_mcast_reason { + CMAP_MCAST_REASON_SYNC = 0, + CMAP_MCAST_REASON_NEW_CONFIG_VERSION = 1, +}; + +static struct corosync_api_v1 *api; + +static char *cmap_exec_init_fn (struct corosync_api_v1 *corosync_api); +static int cmap_exec_exit_fn(void); + +static int cmap_lib_init_fn (void *conn); +static int cmap_lib_exit_fn (void *conn); + +static void message_handler_req_lib_cmap_set(void *conn, const void *message); +static void message_handler_req_lib_cmap_delete(void *conn, const void *message); +static void message_handler_req_lib_cmap_get(void *conn, const void *message); +static void message_handler_req_lib_cmap_adjust_int(void *conn, const void *message); +static void message_handler_req_lib_cmap_iter_init(void *conn, const void *message); +static void message_handler_req_lib_cmap_iter_next(void *conn, const void *message); +static void message_handler_req_lib_cmap_iter_finalize(void *conn, const void *message); +static void message_handler_req_lib_cmap_track_add(void *conn, const void *message); +static void message_handler_req_lib_cmap_track_delete(void *conn, const void *message); +static void message_handler_req_lib_cmap_set_current_map(void *conn, const void *message); + +static void cmap_notify_fn(int32_t event, + const char *key_name, + struct icmap_notify_value new_val, + struct icmap_notify_value old_val, + void *user_data); + +static void message_handler_req_exec_cmap_mcast( + const void *message, + unsigned int nodeid); + +static void exec_cmap_mcast_endian_convert(void *message); + +/* + * Reson is subtype of message. argc is number of items in argv array. Argv is array + * of strings (key names) which will be send to wire. There can be maximum + * MAX_REQ_EXEC_CMAP_MCAST_ITEMS items (for more items, CS_ERR_TOO_MANY_GROUPS + * error is returned). If key is not found, item has type ICMAP_VALUETYPE_NOT_EXIST + * and length zero. + */ +static cs_error_t cmap_mcast_send(enum cmap_mcast_reason reason, int argc, char *argv[]); + +static void cmap_sync_init ( + const unsigned int *trans_list, + size_t trans_list_entries, + const unsigned int *member_list, + size_t member_list_entries, + const struct memb_ring_id *ring_id); + +static int cmap_sync_process (void); +static void cmap_sync_activate (void); +static void cmap_sync_abort (void); + +static void cmap_config_version_track_cb( + int32_t event, + const char *key_name, + struct icmap_notify_value new_value, + struct icmap_notify_value old_value, + void *user_data); + +/* + * Library Handler Definition + */ +static struct corosync_lib_handler cmap_lib_engine[] = +{ + { /* 0 */ + .lib_handler_fn = message_handler_req_lib_cmap_set, + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 1 */ + .lib_handler_fn = message_handler_req_lib_cmap_delete, + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 2 */ + .lib_handler_fn = message_handler_req_lib_cmap_get, + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 3 */ + .lib_handler_fn = message_handler_req_lib_cmap_adjust_int, + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 4 */ + .lib_handler_fn = message_handler_req_lib_cmap_iter_init, + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 5 */ + .lib_handler_fn = message_handler_req_lib_cmap_iter_next, + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 6 */ + .lib_handler_fn = message_handler_req_lib_cmap_iter_finalize, + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 7 */ + .lib_handler_fn = message_handler_req_lib_cmap_track_add, + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 8 */ + .lib_handler_fn = message_handler_req_lib_cmap_track_delete, + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 9 */ + .lib_handler_fn = message_handler_req_lib_cmap_set_current_map, + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED + }, +}; + +static struct corosync_exec_handler cmap_exec_engine[] = +{ + { /* 0 - MESSAGE_REQ_EXEC_CMAP_MCAST */ + .exec_handler_fn = message_handler_req_exec_cmap_mcast, + .exec_endian_convert_fn = exec_cmap_mcast_endian_convert + }, +}; + +struct corosync_service_engine cmap_service_engine = { + .name = "corosync configuration map access", + .id = CMAP_SERVICE, + .priority = 1, + .private_data_size = sizeof(struct cmap_conn_info), + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED, + .allow_inquorate = CS_LIB_ALLOW_INQUORATE, + .lib_init_fn = cmap_lib_init_fn, + .lib_exit_fn = cmap_lib_exit_fn, + .lib_engine = cmap_lib_engine, + .lib_engine_count = sizeof (cmap_lib_engine) / sizeof (struct corosync_lib_handler), + .exec_init_fn = cmap_exec_init_fn, + .exec_exit_fn = cmap_exec_exit_fn, + .exec_engine = cmap_exec_engine, + .exec_engine_count = sizeof (cmap_exec_engine) / sizeof (struct corosync_exec_handler), + .sync_init = cmap_sync_init, + .sync_process = cmap_sync_process, + .sync_activate = cmap_sync_activate, + .sync_abort = cmap_sync_abort +}; + +struct corosync_service_engine *cmap_get_service_engine_ver0 (void) +{ + return (&cmap_service_engine); +} + +struct req_exec_cmap_mcast_item { + mar_name_t key_name __attribute__((aligned(8))); + mar_uint8_t value_type __attribute__((aligned(8))); + mar_size_t value_len __attribute__((aligned(8))); + uint8_t value[] __attribute__((aligned(8))); +}; + +struct req_exec_cmap_mcast { + struct qb_ipc_request_header header __attribute__((aligned(8))); + mar_uint8_t reason __attribute__((aligned(8))); + mar_uint8_t no_items __attribute__((aligned(8))); + mar_uint8_t reserved1 __attribute__((aligned(8))); + mar_uint8_t reserver2 __attribute__((aligned(8))); + /* + * Following are array of req_exec_cmap_mcast_item alligned to 8 bytes + */ +}; + +static size_t cmap_sync_trans_list_entries = 0; +static size_t cmap_sync_member_list_entries = 0; +static uint64_t cmap_highest_config_version_received = 0; +static uint64_t cmap_my_config_version = 0; +static int cmap_first_sync = 1; +static icmap_track_t cmap_config_version_track; + +static void cmap_config_version_track_cb( + int32_t event, + const char *key_name, + struct icmap_notify_value new_value, + struct icmap_notify_value old_value, + void *user_data) +{ + const char *key = "totem.config_version"; + cs_error_t ret; + + ENTER(); + + if (icmap_get_uint64("totem.config_version", &cmap_my_config_version) != CS_OK) { + cmap_my_config_version = 0; + } + + + ret = cmap_mcast_send(CMAP_MCAST_REASON_NEW_CONFIG_VERSION, 1, (char **)&key); + if (ret != CS_OK) { + log_printf(LOGSYS_LEVEL_ERROR, "Can't inform other nodes about new config version"); + } + + LEAVE(); +} + +static int cmap_exec_exit_fn(void) +{ + + if (icmap_track_delete(cmap_config_version_track) != CS_OK) { + log_printf(LOGSYS_LEVEL_ERROR, "Can't delete config_version icmap tracker"); + } + + return 0; +} + +static char *cmap_exec_init_fn ( + struct corosync_api_v1 *corosync_api) +{ + cs_error_t ret; + + api = corosync_api; + + ret = icmap_track_add("totem.config_version", + ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY, + cmap_config_version_track_cb, + NULL, + &cmap_config_version_track); + + if (ret != CS_OK) { + return ((char *)"Can't add config_version icmap tracker"); + } + + return (NULL); +} + +static int cmap_lib_init_fn (void *conn) +{ + struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn); + + log_printf(LOGSYS_LEVEL_DEBUG, "lib_init_fn: conn=%p", conn); + + api->ipc_refcnt_inc(conn); + + memset(conn_info, 0, sizeof(*conn_info)); + conn_info->map_fns = icmap_map; + hdb_create(&conn_info->iter_db); + hdb_create(&conn_info->track_db); + + return (0); +} + +static int cmap_lib_exit_fn (void *conn) +{ + struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn); + hdb_handle_t iter_handle = 0; + icmap_iter_t *iter; + hdb_handle_t track_handle = 0; + icmap_track_t *track; + + log_printf(LOGSYS_LEVEL_DEBUG, "exit_fn for conn=%p", conn); + + hdb_iterator_reset(&conn_info->iter_db); + while (hdb_iterator_next(&conn_info->iter_db, + (void*)&iter, &iter_handle) == 0) { + + conn_info->map_fns.map_iter_finalize(*iter); + + (void)hdb_handle_put (&conn_info->iter_db, iter_handle); + } + + hdb_destroy(&conn_info->iter_db); + + hdb_iterator_reset(&conn_info->track_db); + while (hdb_iterator_next(&conn_info->track_db, + (void*)&track, &track_handle) == 0) { + + free(conn_info->map_fns.map_track_get_user_data(*track)); + + conn_info->map_fns.map_track_delete(*track); + + (void)hdb_handle_put (&conn_info->track_db, track_handle); + } + hdb_destroy(&conn_info->track_db); + + api->ipc_refcnt_dec(conn); + + return (0); +} + +static void cmap_sync_init ( + const unsigned int *trans_list, + size_t trans_list_entries, + const unsigned int *member_list, + size_t member_list_entries, + const struct memb_ring_id *ring_id) +{ + + cmap_sync_trans_list_entries = trans_list_entries; + cmap_sync_member_list_entries = member_list_entries; + + if (icmap_get_uint64("totem.config_version", &cmap_my_config_version) != CS_OK) { + cmap_my_config_version = 0; + } + + cmap_highest_config_version_received = cmap_my_config_version; +} + +static int cmap_sync_process (void) +{ + const char *key = "totem.config_version"; + cs_error_t ret; + + ret = cmap_mcast_send(CMAP_MCAST_REASON_SYNC, 1, (char **)&key); + + return (ret == CS_OK ? 0 : -1); +} + +static void cmap_sync_activate (void) +{ + + if (cmap_sync_trans_list_entries == 0) { + log_printf(LOGSYS_LEVEL_DEBUG, "Single node sync -> no action"); + + return ; + } + + if (cmap_first_sync == 1) { + cmap_first_sync = 0; + } else { + log_printf(LOGSYS_LEVEL_DEBUG, "Not first sync -> no action"); + + return ; + } + + if (cmap_my_config_version == 0) { + log_printf(LOGSYS_LEVEL_DEBUG, "My config version is 0 -> no action"); + + return ; + } + + if (cmap_highest_config_version_received != cmap_my_config_version) { + log_printf(LOGSYS_LEVEL_ERROR, + "Received config version (%"PRIu64") is different than my config version (%"PRIu64")! Exiting", + cmap_highest_config_version_received, cmap_my_config_version); + api->shutdown_request(); + return ; + } +} + +static void cmap_sync_abort (void) +{ + + +} + +static void message_handler_req_lib_cmap_set(void *conn, const void *message) +{ + const struct req_lib_cmap_set *req_lib_cmap_set = message; + struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn); + struct res_lib_cmap_set res_lib_cmap_set; + cs_error_t ret; + + if (conn_info->map_fns.map_is_key_ro((char *)req_lib_cmap_set->key_name.value)) { + ret = CS_ERR_ACCESS; + } else { + ret = conn_info->map_fns.map_set((char *)req_lib_cmap_set->key_name.value, &req_lib_cmap_set->value, + req_lib_cmap_set->value_len, req_lib_cmap_set->type); + } + + memset(&res_lib_cmap_set, 0, sizeof(res_lib_cmap_set)); + res_lib_cmap_set.header.size = sizeof(res_lib_cmap_set); + res_lib_cmap_set.header.id = MESSAGE_RES_CMAP_SET; + res_lib_cmap_set.header.error = ret; + + api->ipc_response_send(conn, &res_lib_cmap_set, sizeof(res_lib_cmap_set)); +} + +static void message_handler_req_lib_cmap_delete(void *conn, const void *message) +{ + const struct req_lib_cmap_set *req_lib_cmap_set = message; + struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn); + struct res_lib_cmap_delete res_lib_cmap_delete; + cs_error_t ret; + + if (conn_info->map_fns.map_is_key_ro((char *)req_lib_cmap_set->key_name.value)) { + ret = CS_ERR_ACCESS; + } else { + ret = conn_info->map_fns.map_delete((char *)req_lib_cmap_set->key_name.value); + } + + memset(&res_lib_cmap_delete, 0, sizeof(res_lib_cmap_delete)); + res_lib_cmap_delete.header.size = sizeof(res_lib_cmap_delete); + res_lib_cmap_delete.header.id = MESSAGE_RES_CMAP_DELETE; + res_lib_cmap_delete.header.error = ret; + + api->ipc_response_send(conn, &res_lib_cmap_delete, sizeof(res_lib_cmap_delete)); +} + +static void message_handler_req_lib_cmap_get(void *conn, const void *message) +{ + const struct req_lib_cmap_get *req_lib_cmap_get = message; + struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn); + struct res_lib_cmap_get *res_lib_cmap_get; + struct res_lib_cmap_get error_res_lib_cmap_get; + cs_error_t ret; + size_t value_len; + size_t res_lib_cmap_get_size; + icmap_value_types_t type; + void *value; + + value_len = req_lib_cmap_get->value_len; + + res_lib_cmap_get_size = sizeof(*res_lib_cmap_get) + value_len; + res_lib_cmap_get = malloc(res_lib_cmap_get_size); + if (res_lib_cmap_get == NULL) { + ret = CS_ERR_NO_MEMORY; + goto error_exit; + } + + memset(res_lib_cmap_get, 0, res_lib_cmap_get_size); + + if (value_len > 0) { + value = res_lib_cmap_get->value; + } else { + value = NULL; + } + + ret = conn_info->map_fns.map_get((char *)req_lib_cmap_get->key_name.value, + value, + &value_len, + &type); + + if (ret != CS_OK) { + free(res_lib_cmap_get); + goto error_exit; + } + + res_lib_cmap_get->header.size = res_lib_cmap_get_size; + res_lib_cmap_get->header.id = MESSAGE_RES_CMAP_GET; + res_lib_cmap_get->header.error = ret; + res_lib_cmap_get->type = type; + res_lib_cmap_get->value_len = value_len; + + api->ipc_response_send(conn, res_lib_cmap_get, res_lib_cmap_get_size); + free(res_lib_cmap_get); + + return ; + +error_exit: + memset(&error_res_lib_cmap_get, 0, sizeof(error_res_lib_cmap_get)); + error_res_lib_cmap_get.header.size = sizeof(error_res_lib_cmap_get); + error_res_lib_cmap_get.header.id = MESSAGE_RES_CMAP_GET; + error_res_lib_cmap_get.header.error = ret; + + api->ipc_response_send(conn, &error_res_lib_cmap_get, sizeof(error_res_lib_cmap_get)); +} + +static void message_handler_req_lib_cmap_adjust_int(void *conn, const void *message) +{ + const struct req_lib_cmap_adjust_int *req_lib_cmap_adjust_int = message; + struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn); + struct res_lib_cmap_adjust_int res_lib_cmap_adjust_int; + cs_error_t ret; + + if (conn_info->map_fns.map_is_key_ro((char *)req_lib_cmap_adjust_int->key_name.value)) { + ret = CS_ERR_ACCESS; + } else { + ret = conn_info->map_fns.map_adjust_int((char *)req_lib_cmap_adjust_int->key_name.value, + req_lib_cmap_adjust_int->step); + + } + + memset(&res_lib_cmap_adjust_int, 0, sizeof(res_lib_cmap_adjust_int)); + res_lib_cmap_adjust_int.header.size = sizeof(res_lib_cmap_adjust_int); + res_lib_cmap_adjust_int.header.id = MESSAGE_RES_CMAP_ADJUST_INT; + res_lib_cmap_adjust_int.header.error = ret; + + api->ipc_response_send(conn, &res_lib_cmap_adjust_int, sizeof(res_lib_cmap_adjust_int)); +} + +static void message_handler_req_lib_cmap_iter_init(void *conn, const void *message) +{ + const struct req_lib_cmap_iter_init *req_lib_cmap_iter_init = message; + struct res_lib_cmap_iter_init res_lib_cmap_iter_init; + cs_error_t ret; + icmap_iter_t iter; + icmap_iter_t *hdb_iter; + cmap_iter_handle_t handle = 0ULL; + const char *prefix; + struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn); + + if (req_lib_cmap_iter_init->prefix.length > 0) { + prefix = (char *)req_lib_cmap_iter_init->prefix.value; + } else { + prefix = NULL; + } + + iter = conn_info->map_fns.map_iter_init(prefix); + if (iter == NULL) { + ret = CS_ERR_NO_SECTIONS; + goto reply_send; + } + + ret = hdb_error_to_cs(hdb_handle_create(&conn_info->iter_db, sizeof(iter), &handle)); + if (ret != CS_OK) { + goto reply_send; + } + + ret = hdb_error_to_cs(hdb_handle_get(&conn_info->iter_db, handle, (void *)&hdb_iter)); + if (ret != CS_OK) { + goto reply_send; + } + + *hdb_iter = iter; + + (void)hdb_handle_put (&conn_info->iter_db, handle); + +reply_send: + memset(&res_lib_cmap_iter_init, 0, sizeof(res_lib_cmap_iter_init)); + res_lib_cmap_iter_init.header.size = sizeof(res_lib_cmap_iter_init); + res_lib_cmap_iter_init.header.id = MESSAGE_RES_CMAP_ITER_INIT; + res_lib_cmap_iter_init.header.error = ret; + res_lib_cmap_iter_init.iter_handle = handle; + + api->ipc_response_send(conn, &res_lib_cmap_iter_init, sizeof(res_lib_cmap_iter_init)); +} + +static void message_handler_req_lib_cmap_iter_next(void *conn, const void *message) +{ + const struct req_lib_cmap_iter_next *req_lib_cmap_iter_next = message; + struct res_lib_cmap_iter_next res_lib_cmap_iter_next; + cs_error_t ret; + icmap_iter_t *iter; + size_t value_len = 0; + icmap_value_types_t type = 0; + const char *res = NULL; + struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn); + + ret = hdb_error_to_cs(hdb_handle_get(&conn_info->iter_db, + req_lib_cmap_iter_next->iter_handle, (void *)&iter)); + if (ret != CS_OK) { + goto reply_send; + } + + res = conn_info->map_fns.map_iter_next(*iter, &value_len, &type); + if (res == NULL) { + ret = CS_ERR_NO_SECTIONS; + } + + (void)hdb_handle_put (&conn_info->iter_db, req_lib_cmap_iter_next->iter_handle); + +reply_send: + memset(&res_lib_cmap_iter_next, 0, sizeof(res_lib_cmap_iter_next)); + res_lib_cmap_iter_next.header.size = sizeof(res_lib_cmap_iter_next); + res_lib_cmap_iter_next.header.id = MESSAGE_RES_CMAP_ITER_NEXT; + res_lib_cmap_iter_next.header.error = ret; + + if (res != NULL) { + res_lib_cmap_iter_next.value_len = value_len; + res_lib_cmap_iter_next.type = type; + + assert(strlen(res) <= sizeof(res_lib_cmap_iter_next.key_name.value)); + + memcpy(res_lib_cmap_iter_next.key_name.value, res, strlen(res)); + res_lib_cmap_iter_next.key_name.length = strlen(res); + } + + api->ipc_response_send(conn, &res_lib_cmap_iter_next, sizeof(res_lib_cmap_iter_next)); +} + +static void message_handler_req_lib_cmap_iter_finalize(void *conn, const void *message) +{ + const struct req_lib_cmap_iter_finalize *req_lib_cmap_iter_finalize = message; + struct res_lib_cmap_iter_finalize res_lib_cmap_iter_finalize; + cs_error_t ret; + icmap_iter_t *iter; + struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn); + + ret = hdb_error_to_cs(hdb_handle_get(&conn_info->iter_db, + req_lib_cmap_iter_finalize->iter_handle, (void *)&iter)); + if (ret != CS_OK) { + goto reply_send; + } + + conn_info->map_fns.map_iter_finalize(*iter); + + (void)hdb_handle_destroy(&conn_info->iter_db, req_lib_cmap_iter_finalize->iter_handle); + + (void)hdb_handle_put (&conn_info->iter_db, req_lib_cmap_iter_finalize->iter_handle); + +reply_send: + memset(&res_lib_cmap_iter_finalize, 0, sizeof(res_lib_cmap_iter_finalize)); + res_lib_cmap_iter_finalize.header.size = sizeof(res_lib_cmap_iter_finalize); + res_lib_cmap_iter_finalize.header.id = MESSAGE_RES_CMAP_ITER_FINALIZE; + res_lib_cmap_iter_finalize.header.error = ret; + + api->ipc_response_send(conn, &res_lib_cmap_iter_finalize, sizeof(res_lib_cmap_iter_finalize)); +} + +static void cmap_notify_fn(int32_t event, + const char *key_name, + struct icmap_notify_value new_val, + struct icmap_notify_value old_val, + void *user_data) +{ + struct cmap_track_user_data *cmap_track_user_data = (struct cmap_track_user_data *)user_data; + struct res_lib_cmap_notify_callback res_lib_cmap_notify_callback; + struct iovec iov[3]; + + memset(&res_lib_cmap_notify_callback, 0, sizeof(res_lib_cmap_notify_callback)); + + res_lib_cmap_notify_callback.header.size = sizeof(res_lib_cmap_notify_callback) + new_val.len + old_val.len; + res_lib_cmap_notify_callback.header.id = MESSAGE_RES_CMAP_NOTIFY_CALLBACK; + res_lib_cmap_notify_callback.header.error = CS_OK; + + res_lib_cmap_notify_callback.new_value_type = new_val.type; + res_lib_cmap_notify_callback.old_value_type = old_val.type; + res_lib_cmap_notify_callback.new_value_len = new_val.len; + res_lib_cmap_notify_callback.old_value_len = old_val.len; + res_lib_cmap_notify_callback.event = event; + res_lib_cmap_notify_callback.key_name.length = strlen(key_name); + res_lib_cmap_notify_callback.track_inst_handle = cmap_track_user_data->track_inst_handle; + + assert(strlen(key_name) <= sizeof(res_lib_cmap_notify_callback.key_name.value)); + + memcpy(res_lib_cmap_notify_callback.key_name.value, key_name, strlen(key_name)); + + iov[0].iov_base = (char *)&res_lib_cmap_notify_callback; + iov[0].iov_len = sizeof(res_lib_cmap_notify_callback); + iov[1].iov_base = (char *)new_val.data; + iov[1].iov_len = new_val.len; + iov[2].iov_base = (char *)old_val.data; + iov[2].iov_len = old_val.len; + + api->ipc_dispatch_iov_send(cmap_track_user_data->conn, iov, 3); +} + +static void message_handler_req_lib_cmap_track_add(void *conn, const void *message) +{ + const struct req_lib_cmap_track_add *req_lib_cmap_track_add = message; + struct res_lib_cmap_track_add res_lib_cmap_track_add; + cs_error_t ret; + cmap_track_handle_t handle = 0; + icmap_track_t track = NULL; + icmap_track_t *hdb_track; + struct cmap_track_user_data *cmap_track_user_data; + const char *key_name; + + struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn); + + cmap_track_user_data = malloc(sizeof(*cmap_track_user_data)); + if (cmap_track_user_data == NULL) { + ret = CS_ERR_NO_MEMORY; + + goto reply_send; + } + memset(cmap_track_user_data, 0, sizeof(*cmap_track_user_data)); + + if (req_lib_cmap_track_add->key_name.length > 0) { + key_name = (char *)req_lib_cmap_track_add->key_name.value; + } else { + key_name = NULL; + } + + ret = conn_info->map_fns.map_track_add(key_name, + req_lib_cmap_track_add->track_type, + cmap_notify_fn, + cmap_track_user_data, + &track); + if (ret != CS_OK) { + free(cmap_track_user_data); + + goto reply_send; + } + + ret = hdb_error_to_cs(hdb_handle_create(&conn_info->track_db, sizeof(track), &handle)); + if (ret != CS_OK) { + free(cmap_track_user_data); + + goto reply_send; + } + + ret = hdb_error_to_cs(hdb_handle_get(&conn_info->track_db, handle, (void *)&hdb_track)); + if (ret != CS_OK) { + free(cmap_track_user_data); + + goto reply_send; + } + + *hdb_track = track; + cmap_track_user_data->conn = conn; + cmap_track_user_data->track_handle = handle; + cmap_track_user_data->track_inst_handle = req_lib_cmap_track_add->track_inst_handle; + + (void)hdb_handle_put (&conn_info->track_db, handle); + +reply_send: + memset(&res_lib_cmap_track_add, 0, sizeof(res_lib_cmap_track_add)); + res_lib_cmap_track_add.header.size = sizeof(res_lib_cmap_track_add); + res_lib_cmap_track_add.header.id = MESSAGE_RES_CMAP_TRACK_ADD; + res_lib_cmap_track_add.header.error = ret; + res_lib_cmap_track_add.track_handle = handle; + + api->ipc_response_send(conn, &res_lib_cmap_track_add, sizeof(res_lib_cmap_track_add)); +} + +static void message_handler_req_lib_cmap_track_delete(void *conn, const void *message) +{ + const struct req_lib_cmap_track_delete *req_lib_cmap_track_delete = message; + struct res_lib_cmap_track_delete res_lib_cmap_track_delete; + cs_error_t ret; + icmap_track_t *track; + struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn); + uint64_t track_inst_handle = 0; + + ret = hdb_error_to_cs(hdb_handle_get(&conn_info->track_db, + req_lib_cmap_track_delete->track_handle, (void *)&track)); + if (ret != CS_OK) { + goto reply_send; + } + + track_inst_handle = ((struct cmap_track_user_data *) + conn_info->map_fns.map_track_get_user_data(*track))->track_inst_handle; + + free(conn_info->map_fns.map_track_get_user_data(*track)); + + ret = conn_info->map_fns.map_track_delete(*track); + + (void)hdb_handle_put (&conn_info->track_db, req_lib_cmap_track_delete->track_handle); + (void)hdb_handle_destroy(&conn_info->track_db, req_lib_cmap_track_delete->track_handle); + +reply_send: + memset(&res_lib_cmap_track_delete, 0, sizeof(res_lib_cmap_track_delete)); + res_lib_cmap_track_delete.header.size = sizeof(res_lib_cmap_track_delete); + res_lib_cmap_track_delete.header.id = MESSAGE_RES_CMAP_TRACK_DELETE; + res_lib_cmap_track_delete.header.error = ret; + res_lib_cmap_track_delete.track_inst_handle = track_inst_handle; + + api->ipc_response_send(conn, &res_lib_cmap_track_delete, sizeof(res_lib_cmap_track_delete)); +} + + +static void message_handler_req_lib_cmap_set_current_map(void *conn, const void *message) +{ + const struct req_lib_cmap_set_current_map *req_lib_cmap_set_current_map = message; + struct qb_ipc_response_header res; + cs_error_t ret = CS_OK; + struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn); + int handles_open = 0; + hdb_handle_t iter_handle = 0; + icmap_iter_t *iter; + hdb_handle_t track_handle = 0; + icmap_track_t *track; + + /* Cannot switch maps while there are tracks or iterators active */ + hdb_iterator_reset(&conn_info->iter_db); + while (hdb_iterator_next(&conn_info->iter_db, + (void*)&iter, &iter_handle) == 0) { + handles_open++; + } + + hdb_iterator_reset(&conn_info->track_db); + while (hdb_iterator_next(&conn_info->track_db, + (void*)&track, &track_handle) == 0) { + handles_open++; + } + + if (handles_open) { + ret = CS_ERR_BUSY; + goto reply_send; + } + + switch (req_lib_cmap_set_current_map->map) { + case CMAP_SETMAP_DEFAULT: + conn_info->map_fns = icmap_map; + break; + case CMAP_SETMAP_STATS: + conn_info->map_fns = stats_map; + break; + default: + ret = CS_ERR_NOT_EXIST; + break; + } + +reply_send: + res.size = sizeof(res); + res.id = MESSAGE_RES_CMAP_SET_CURRENT_MAP; + res.error = ret; + + api->ipc_response_send(conn, &res, sizeof(res)); +} + +static cs_error_t cmap_mcast_send(enum cmap_mcast_reason reason, int argc, char *argv[]) +{ + int i; + size_t value_len; + icmap_value_types_t value_type; + cs_error_t err; + size_t item_len; + size_t msg_len = 0; + struct req_exec_cmap_mcast req_exec_cmap_mcast; + struct req_exec_cmap_mcast_item *item = NULL; + struct iovec req_exec_cmap_iovec[MAX_REQ_EXEC_CMAP_MCAST_ITEMS + 1]; + + ENTER(); + + if (argc > MAX_REQ_EXEC_CMAP_MCAST_ITEMS) { + return (CS_ERR_TOO_MANY_GROUPS); + } + + memset(req_exec_cmap_iovec, 0, sizeof(req_exec_cmap_iovec)); + + for (i = 0; i < argc; i++) { + err = icmap_get(argv[i], NULL, &value_len, &value_type); + if (err != CS_OK && err != CS_ERR_NOT_EXIST) { + goto free_mem; + } + if (err == CS_ERR_NOT_EXIST) { + value_type = ICMAP_VALUETYPE_NOT_EXIST; + value_len = 0; + } + + item_len = MAR_ALIGN_UP(sizeof(*item) + value_len, 8); + + item = malloc(item_len); + if (item == NULL) { + goto free_mem; + } + memset(item, 0, item_len); + + item->value_type = value_type; + item->value_len = value_len; + item->key_name.length = strlen(argv[i]); + + assert(strlen(argv[i]) < sizeof(item->key_name.value)); + + strcpy((char *)item->key_name.value, argv[i]); + + if (value_type != ICMAP_VALUETYPE_NOT_EXIST) { + err = icmap_get(argv[i], item->value, &value_len, &value_type); + if (err != CS_OK) { + goto free_mem; + } + } + + req_exec_cmap_iovec[i + 1].iov_base = item; + req_exec_cmap_iovec[i + 1].iov_len = item_len; + msg_len += item_len; + + qb_log(LOG_TRACE, "Item %u - type %u, len %zu", i, item->value_type, item->value_len); + + item = NULL; + } + + memset(&req_exec_cmap_mcast, 0, sizeof(req_exec_cmap_mcast)); + req_exec_cmap_mcast.header.size = sizeof(req_exec_cmap_mcast) + msg_len; + req_exec_cmap_mcast.reason = reason; + req_exec_cmap_mcast.no_items = argc; + req_exec_cmap_iovec[0].iov_base = &req_exec_cmap_mcast; + req_exec_cmap_iovec[0].iov_len = sizeof(req_exec_cmap_mcast); + + qb_log(LOG_TRACE, "Sending %u items (%u iovec) for reason %u", argc, argc + 1, reason); + err = (api->totem_mcast(req_exec_cmap_iovec, argc + 1, TOTEM_AGREED) == 0 ? CS_OK : CS_ERR_MESSAGE_ERROR); + +free_mem: + for (i = 0; i < argc; i++) { + free(req_exec_cmap_iovec[i + 1].iov_base); + } + + free(item); + + LEAVE(); + return (err); +} + +static struct req_exec_cmap_mcast_item *cmap_mcast_item_find( + const void *message, + char *key) +{ + const struct req_exec_cmap_mcast *req_exec_cmap_mcast = message; + int i; + const char *p; + struct req_exec_cmap_mcast_item *item; + mar_uint16_t key_name_len; + + p = (const char *)message + sizeof(*req_exec_cmap_mcast); + + for (i = 0; i < req_exec_cmap_mcast->no_items; i++) { + item = (struct req_exec_cmap_mcast_item *)p; + + key_name_len = item->key_name.length; + if (strlen(key) == key_name_len && strcmp((char *)item->key_name.value, key) == 0) { + return (item); + } + + p += MAR_ALIGN_UP(sizeof(*item) + item->value_len, 8); + } + + return (NULL); +} + +static void message_handler_req_exec_cmap_mcast_reason_sync_nv( + enum cmap_mcast_reason reason, + const void *message, + unsigned int nodeid) +{ + char member_config_version[ICMAP_KEYNAME_MAXLEN]; + uint64_t config_version = 0; + struct req_exec_cmap_mcast_item *item; + mar_size_t value_len; + + ENTER(); + + item = cmap_mcast_item_find(message, (char *)"totem.config_version"); + if (item != NULL) { + value_len = item->value_len; + + if (item->value_type == ICMAP_VALUETYPE_NOT_EXIST) { + config_version = 0; + } + + if (item->value_type == ICMAP_VALUETYPE_UINT64) { + memcpy(&config_version, item->value, value_len); + } + } + + qb_log(LOG_TRACE, "Received config version %"PRIu64" from node " CS_PRI_NODE_ID, config_version, nodeid); + + if (nodeid != api->totem_nodeid_get() && + config_version > cmap_highest_config_version_received) { + cmap_highest_config_version_received = config_version; + } + + snprintf(member_config_version, ICMAP_KEYNAME_MAXLEN, + "runtime.members.%u.config_version", nodeid); + icmap_set_uint64(member_config_version, config_version); + + LEAVE(); +} + +static void message_handler_req_exec_cmap_mcast( + const void *message, + unsigned int nodeid) +{ + const struct req_exec_cmap_mcast *req_exec_cmap_mcast = message; + + ENTER(); + + switch (req_exec_cmap_mcast->reason) { + case CMAP_MCAST_REASON_SYNC: + message_handler_req_exec_cmap_mcast_reason_sync_nv(req_exec_cmap_mcast->reason, + message, nodeid); + + break; + case CMAP_MCAST_REASON_NEW_CONFIG_VERSION: + message_handler_req_exec_cmap_mcast_reason_sync_nv(req_exec_cmap_mcast->reason, + message, nodeid); + + break; + default: + qb_log(LOG_TRACE, "Received mcast with unknown reason %u", req_exec_cmap_mcast->reason); + }; + + LEAVE(); +} + +static void exec_cmap_mcast_endian_convert(void *message) +{ + struct req_exec_cmap_mcast *req_exec_cmap_mcast = message; + const char *p; + int i; + struct req_exec_cmap_mcast_item *item; + uint16_t u16; + uint32_t u32; + uint64_t u64; + float flt; + double dbl; + + swab_coroipc_request_header_t(&req_exec_cmap_mcast->header); + + p = (const char *)message + sizeof(*req_exec_cmap_mcast); + + for (i = 0; i < req_exec_cmap_mcast->no_items; i++) { + item = (struct req_exec_cmap_mcast_item *)p; + + swab_mar_uint16_t(&item->key_name.length); + swab_mar_size_t(&item->value_len); + + switch (item->value_type) { + case ICMAP_VALUETYPE_INT16: + case ICMAP_VALUETYPE_UINT16: + memcpy(&u16, item->value, sizeof(u16)); + u16 = swab16(u16); + memcpy(item->value, &u16, sizeof(u16)); + break; + case ICMAP_VALUETYPE_INT32: + case ICMAP_VALUETYPE_UINT32: + memcpy(&u32, item->value, sizeof(u32)); + u32 = swab32(u32); + memcpy(item->value, &u32, sizeof(u32)); + break; + case ICMAP_VALUETYPE_INT64: + case ICMAP_VALUETYPE_UINT64: + memcpy(&u64, item->value, sizeof(u64)); + u64 = swab64(u64); + memcpy(item->value, &u64, sizeof(u64)); + break; + case ICMAP_VALUETYPE_FLOAT: + memcpy(&flt, item->value, sizeof(flt)); + swabflt(&flt); + memcpy(item->value, &flt, sizeof(flt)); + break; + case ICMAP_VALUETYPE_DOUBLE: + memcpy(&dbl, item->value, sizeof(dbl)); + swabdbl(&dbl); + memcpy(item->value, &dbl, sizeof(dbl)); + break; + } + + p += MAR_ALIGN_UP(sizeof(*item) + item->value_len, 8); + } +} diff --git a/exec/coroparse.c b/exec/coroparse.c new file mode 100644 index 0000000..b614780 --- /dev/null +++ b/exec/coroparse.c @@ -0,0 +1,1697 @@ +/* + * Copyright (c) 2006-2022 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Patrick Caulfield (pcaulfie@redhat.com) + * Jan Friesse (jfriesse@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <config.h> + +#include <sys/types.h> +#include <sys/uio.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/un.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <unistd.h> +#include <fcntl.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <dirent.h> +#include <libgen.h> +#include <limits.h> +#include <stddef.h> +#include <grp.h> +#include <pwd.h> + +#include <qb/qblist.h> +#include <qb/qbutil.h> +#define LOGSYS_UTILS_ONLY 1 +#include <corosync/logsys.h> +#include <corosync/icmap.h> + +#include "main.h" +#include "util.h" + +enum parser_cb_type { + PARSER_CB_START, + PARSER_CB_END, + PARSER_CB_SECTION_START, + PARSER_CB_SECTION_END, + PARSER_CB_ITEM, +}; + +enum main_cp_cb_data_state { + MAIN_CP_CB_DATA_STATE_NORMAL, + MAIN_CP_CB_DATA_STATE_TOTEM, + MAIN_CP_CB_DATA_STATE_INTERFACE, + MAIN_CP_CB_DATA_STATE_LOGGER_SUBSYS, + MAIN_CP_CB_DATA_STATE_UIDGID, + MAIN_CP_CB_DATA_STATE_LOGGING_DAEMON, + MAIN_CP_CB_DATA_STATE_MEMBER, + MAIN_CP_CB_DATA_STATE_QUORUM, + MAIN_CP_CB_DATA_STATE_QDEVICE, + MAIN_CP_CB_DATA_STATE_NODELIST, + MAIN_CP_CB_DATA_STATE_NODELIST_NODE, + MAIN_CP_CB_DATA_STATE_PLOAD, + MAIN_CP_CB_DATA_STATE_SYSTEM, + MAIN_CP_CB_DATA_STATE_RESOURCES, + MAIN_CP_CB_DATA_STATE_RESOURCES_SYSTEM, + MAIN_CP_CB_DATA_STATE_RESOURCES_PROCESS, + MAIN_CP_CB_DATA_STATE_RESOURCES_SYSTEM_MEMUSED, + MAIN_CP_CB_DATA_STATE_RESOURCES_PROCESS_MEMUSED +}; + +typedef int (*parser_cb_f)(const char *path, + char *key, + char *value, + enum main_cp_cb_data_state *state, + enum parser_cb_type type, + const char **error_string, + icmap_map_t config_map, + void *user_data); + +struct key_value_list_item { + char *key; + char *value; + struct qb_list_head list; +}; + +struct main_cp_cb_data { + int linknumber; + char *bindnetaddr; + char *mcastaddr; + char *broadcast; + int mcastport; + int ttl; + int knet_link_priority; + int knet_ping_interval; + int knet_ping_timeout; + int knet_ping_precision; + int knet_pong_count; + int knet_pmtud_interval; + unsigned int knet_mtu; + char *knet_transport; + + struct qb_list_head logger_subsys_items_head; + char *subsys; + char *logging_daemon_name; + struct qb_list_head member_items_head; + + int node_number; +}; + +static int read_config_file_into_icmap( + const char **error_string, icmap_map_t config_map); +static char error_string_response[512]; + +static int uid_determine (const char *req_user) +{ + int pw_uid = 0; + struct passwd passwd; + struct passwd* pwdptr = &passwd; + struct passwd* temp_pwd_pt; + char *pwdbuffer; + int pwdlinelen, rc; + long int id; + char *ep; + + id = strtol(req_user, &ep, 10); + if (*req_user != '\0' && *ep == '\0' && id >= 0 && id <= UINT_MAX) { + return (id); + } + + pwdlinelen = sysconf (_SC_GETPW_R_SIZE_MAX); + + if (pwdlinelen == -1) { + pwdlinelen = 256; + } + + pwdbuffer = malloc (pwdlinelen); + + while ((rc = getpwnam_r (req_user, pwdptr, pwdbuffer, pwdlinelen, &temp_pwd_pt)) == ERANGE) { + char *n; + + pwdlinelen *= 2; + if (pwdlinelen <= 32678) { + n = realloc (pwdbuffer, pwdlinelen); + if (n != NULL) { + pwdbuffer = n; + continue; + } + } + } + if (rc != 0) { + free (pwdbuffer); + sprintf (error_string_response, "getpwnam_r(): %s", strerror(rc)); + return (-1); + } + if (temp_pwd_pt == NULL) { + free (pwdbuffer); + sprintf (error_string_response, + "The '%s' user is not found in /etc/passwd, please read the documentation.", + req_user); + return (-1); + } + pw_uid = passwd.pw_uid; + free (pwdbuffer); + + return pw_uid; +} + +static int gid_determine (const char *req_group) +{ + int corosync_gid = 0; + struct group group; + struct group * grpptr = &group; + struct group * temp_grp_pt; + char *grpbuffer; + int grplinelen, rc; + long int id; + char *ep; + + id = strtol(req_group, &ep, 10); + if (*req_group != '\0' && *ep == '\0' && id >= 0 && id <= UINT_MAX) { + return (id); + } + + grplinelen = sysconf (_SC_GETGR_R_SIZE_MAX); + + if (grplinelen == -1) { + grplinelen = 256; + } + + grpbuffer = malloc (grplinelen); + + while ((rc = getgrnam_r (req_group, grpptr, grpbuffer, grplinelen, &temp_grp_pt)) == ERANGE) { + char *n; + + grplinelen *= 2; + if (grplinelen <= 32678) { + n = realloc (grpbuffer, grplinelen); + if (n != NULL) { + grpbuffer = n; + continue; + } + } + } + if (rc != 0) { + free (grpbuffer); + sprintf (error_string_response, "getgrnam_r(): %s", strerror(rc)); + return (-1); + } + if (temp_grp_pt == NULL) { + free (grpbuffer); + sprintf (error_string_response, + "The '%s' group is not found in /etc/group, please read the documentation.", + req_group); + return (-1); + } + corosync_gid = group.gr_gid; + free (grpbuffer); + + return corosync_gid; +} +static char *strchr_rs (const char *haystack, int byte) +{ + const char *end_address = strchr (haystack, byte); + if (end_address) { + end_address += 1; /* skip past { or = */ + + while (*end_address == ' ' || *end_address == '\t' || (unsigned char)*end_address == 0xA0) + end_address++; + } + + return ((char *) end_address); +} + +int coroparse_configparse (icmap_map_t config_map, const char **error_string) +{ + if (read_config_file_into_icmap(error_string, config_map)) { + return -1; + } + + return 0; +} + +static char *remove_whitespace(char *string, int remove_colon_and_brace) +{ + char *start; + char *end; + + start = string; + while (*start == ' ' || *start == '\t' || (unsigned char)*start == 0xA0) + start++; + + end = start+(strlen(start))-1; + while ((*end == ' ' || *end == '\t' || (unsigned char)*end == 0xA0 || (remove_colon_and_brace && (*end == ':' || *end == '{'))) && end > start) + end--; + if (*end != '\0') + *(end + 1) = '\0'; + + return start; +} + + + +static int parse_section(FILE *fp, + const char *fname, + int *line_no, + char *path, + const char **error_string, + int depth, + enum main_cp_cb_data_state state, + parser_cb_f parser_cb, + icmap_map_t config_map, + void *user_data) +{ + char line[512]; + int i; + char *loc; + int ignore_line; + char new_keyname[ICMAP_KEYNAME_MAXLEN]; + static char formated_err[384]; + const char *tmp_error_string; + + if (strcmp(path, "") == 0) { + parser_cb("", NULL, NULL, &state, PARSER_CB_START, error_string, config_map, user_data); + } + + tmp_error_string = NULL; + + while (fgets (line, sizeof (line), fp)) { + (*line_no)++; + + if (strlen(line) > 0) { + /* + * Check if complete line was read. Use feof to handle files + * without ending \n at the end of the file + */ + if ((line[strlen(line) - 1] != '\n') && !feof(fp)) { + tmp_error_string = "Line too long"; + goto parse_error; + } + + if (line[strlen(line) - 1] == '\n') + line[strlen(line) - 1] = '\0'; + if (strlen (line) > 0 && line[strlen(line) - 1] == '\r') + line[strlen(line) - 1] = '\0'; + } + /* + * Clear out white space and tabs + */ + for (i = strlen (line) - 1; i > -1; i--) { + if (line[i] == '\t' || line[i] == ' ' || (unsigned char)line[i] == 0xA0) { + line[i] = '\0'; + } else { + break; + } + } + + ignore_line = 1; + for (i = 0; i < strlen (line); i++) { + if (line[i] != '\t' && line[i] != ' ' && (unsigned char)line[i] != 0xA0) { + if (line[i] != '#') + ignore_line = 0; + + break; + } + } + /* + * Clear out comments and empty lines + */ + if (ignore_line) { + continue; + } + + /* New section ? */ + if ((loc = strchr_rs (line, '{'))) { + char *section; + char *after_section; + enum main_cp_cb_data_state newstate; + + *(loc-1) = '\0'; + section = remove_whitespace(line, 1); + after_section = remove_whitespace(loc, 0); + + if (strcmp(section, "") == 0) { + tmp_error_string = "Missing section name before opening bracket '{'"; + goto parse_error; + } + + if (strcmp(after_section, "") != 0) { + tmp_error_string = "Extra characters after opening bracket '{'"; + goto parse_error; + } + + if (strlen(path) + strlen(section) + 1 >= ICMAP_KEYNAME_MAXLEN) { + tmp_error_string = "Start of section makes total cmap path too long"; + goto parse_error; + } + strcpy(new_keyname, path); + if (strcmp(path, "") != 0) { + strcat(new_keyname, "."); + } + strcat(new_keyname, section); + + /* Only use the new state for items further down the stack */ + newstate = state; + if (!parser_cb(new_keyname, NULL, NULL, &newstate, PARSER_CB_SECTION_START, + &tmp_error_string, config_map, user_data)) { + goto parse_error; + } + + if (parse_section(fp, fname, line_no, new_keyname, error_string, depth + 1, newstate, + parser_cb, config_map, user_data)) + return -1; + + continue ; + } + + /* New key/value */ + if ((loc = strchr_rs (line, ':'))) { + char *key; + char *value; + + *(loc-1) = '\0'; + key = remove_whitespace(line, 1); + value = remove_whitespace(loc, 0); + + if (strlen(path) + strlen(key) + 1 >= ICMAP_KEYNAME_MAXLEN) { + tmp_error_string = "New key makes total cmap path too long"; + goto parse_error; + } + strcpy(new_keyname, path); + if (strcmp(path, "") != 0) { + strcat(new_keyname, "."); + } + strcat(new_keyname, key); + + if (!parser_cb(new_keyname, key, value, &state, PARSER_CB_ITEM, &tmp_error_string, + config_map, user_data)) { + goto parse_error; + } + + continue ; + } + + if (strchr_rs (line, '}')) { + char *trimmed_line; + trimmed_line = remove_whitespace(line, 0); + + if (strcmp(trimmed_line, "}") != 0) { + tmp_error_string = "Extra characters before or after closing bracket '}'"; + goto parse_error; + } + + if (depth == 0) { + tmp_error_string = "Unexpected closing brace"; + + goto parse_error; + } + + if (!parser_cb(path, NULL, NULL, &state, PARSER_CB_SECTION_END, &tmp_error_string, + config_map, user_data)) { + goto parse_error; + } + + return 0; + } + + /* + * Line is not opening section, ending section or value -> error + */ + tmp_error_string = "Line is not opening or closing section or key value"; + goto parse_error; + } + + if (strcmp(path, "") != 0) { + tmp_error_string = "Missing closing brace"; + goto parse_error; + } + + if (strcmp(path, "") == 0) { + parser_cb("", NULL, NULL, &state, PARSER_CB_END, error_string, config_map, user_data); + } + + return 0; + +parse_error: + if (snprintf(formated_err, sizeof(formated_err), "parser error: %s:%u: %s", fname, *line_no, + tmp_error_string) >= sizeof(formated_err)) { + *error_string = "Can't format parser error message"; + } else { + *error_string = formated_err; + } + + return -1; +} + +static int safe_atoq_range(icmap_value_types_t value_type, long long int *min_val, long long int *max_val) +{ + switch (value_type) { + case ICMAP_VALUETYPE_INT8: *min_val = INT8_MIN; *max_val = INT8_MAX; break; + case ICMAP_VALUETYPE_UINT8: *min_val = 0; *max_val = UINT8_MAX; break; + case ICMAP_VALUETYPE_INT16: *min_val = INT16_MIN; *max_val = INT16_MAX; break; + case ICMAP_VALUETYPE_UINT16: *min_val = 0; *max_val = UINT16_MAX; break; + case ICMAP_VALUETYPE_INT32: *min_val = INT32_MIN; *max_val = INT32_MAX; break; + case ICMAP_VALUETYPE_UINT32: *min_val = 0; *max_val = UINT32_MAX; break; + default: + return (-1); + } + + return (0); +} + +/* + * Convert string str to long long int res. Type of result is target_type and currently only + * ICMAP_VALUETYPE_[U]INT[8|16|32] is supported. + * Return 0 on success, -1 on failure. + */ +static int safe_atoq(const char *str, long long int *res, icmap_value_types_t target_type) +{ + long long int val; + long long int min_val, max_val; + char *endptr; + + errno = 0; + + val = strtoll(str, &endptr, 10); + if (errno == ERANGE) { + return (-1); + } + + if (endptr == str) { + return (-1); + } + + if (*endptr != '\0') { + return (-1); + } + + if (safe_atoq_range(target_type, &min_val, &max_val) != 0) { + return (-1); + } + + if (val < min_val || val > max_val) { + return (-1); + } + + *res = val; + return (0); +} + +static int str_to_ull(const char *str, unsigned long long int *res) +{ + unsigned long long int val; + char *endptr; + + errno = 0; + + val = strtoull(str, &endptr, 10); + if (errno == ERANGE) { + return (-1); + } + + if (endptr == str) { + return (-1); + } + + if (*endptr != '\0') { + return (-1); + } + + *res = val; + return (0); +} + +static int handle_crypto_model(const char *val, const char **error_string) +{ + + if (util_is_valid_knet_crypto_model(val, NULL, 0, + "Invalid crypto model. Should be ", error_string) == 1) { + return (0); + } else { + return (-1); + } +} + +static int handle_compress_model(const char *val, const char **error_string) +{ + + if (util_is_valid_knet_compress_model(val, NULL, 0, + "Invalid compression model. Should be ", error_string) == 1) { + return (0); + } else { + return (-1); + } +} + +static int main_config_parser_cb(const char *path, + char *key, + char *value, + enum main_cp_cb_data_state *state, + enum parser_cb_type type, + const char **error_string, + icmap_map_t config_map, + void *user_data) +{ + int ii; + long long int val; + long long int min_val, max_val; + icmap_value_types_t val_type = ICMAP_VALUETYPE_BINARY; + unsigned long long int ull; + int add_as_string; + char key_name[ICMAP_KEYNAME_MAXLEN + 1]; + static char formated_err[256]; + struct main_cp_cb_data *data = (struct main_cp_cb_data *)user_data; + struct key_value_list_item *kv_item; + struct qb_list_head *iter, *tmp_iter; + int uid, gid; + cs_error_t cs_err; + + cs_err = CS_OK; + + /* + * Formally this check is not needed because length is checked by parse_section + */ + if (strlen(path) >= sizeof(key_name)) { + if (snprintf(formated_err, sizeof(formated_err), + "Can't store path \"%s\" into key_name", path) >= sizeof(formated_err)) { + *error_string = "Can't format path into key_name error message"; + } else { + *error_string = formated_err; + } + return (0); + } + /* + * Key_name is used in atoi_error/icmap_set_error, but many of icmap_set* + * are using path, so initialize key_name to valid value + */ + strncpy(key_name, path, sizeof(key_name) - 1); + + switch (type) { + case PARSER_CB_START: + memset(data, 0, sizeof(struct main_cp_cb_data)); + *state = MAIN_CP_CB_DATA_STATE_NORMAL; + break; + case PARSER_CB_END: + break; + case PARSER_CB_ITEM: + add_as_string = 1; + + switch (*state) { + case MAIN_CP_CB_DATA_STATE_NORMAL: + break; + case MAIN_CP_CB_DATA_STATE_PLOAD: + if ((strcmp(path, "pload.count") == 0) || + (strcmp(path, "pload.size") == 0)) { + val_type = ICMAP_VALUETYPE_UINT32; + if (safe_atoq(value, &val, val_type) != 0) { + goto atoi_error; + } + if ((cs_err = icmap_set_uint32_r(config_map, path, val)) != CS_OK) { + goto icmap_set_error; + } + add_as_string = 0; + } + break; + case MAIN_CP_CB_DATA_STATE_QUORUM: + if ((strcmp(path, "quorum.expected_votes") == 0) || + (strcmp(path, "quorum.votes") == 0) || + (strcmp(path, "quorum.last_man_standing_window") == 0) || + (strcmp(path, "quorum.leaving_timeout") == 0)) { + val_type = ICMAP_VALUETYPE_UINT32; + if (safe_atoq(value, &val, val_type) != 0) { + goto atoi_error; + } + if ((cs_err = icmap_set_uint32_r(config_map, path, val)) != CS_OK) { + goto icmap_set_error; + } + add_as_string = 0; + } + + if ((strcmp(path, "quorum.two_node") == 0) || + (strcmp(path, "quorum.expected_votes_tracking") == 0) || + (strcmp(path, "quorum.allow_downscale") == 0) || + (strcmp(path, "quorum.wait_for_all") == 0) || + (strcmp(path, "quorum.auto_tie_breaker") == 0) || + (strcmp(path, "quorum.last_man_standing") == 0)) { + val_type = ICMAP_VALUETYPE_UINT8; + if (safe_atoq(value, &val, val_type) != 0) { + goto atoi_error; + } + if ((cs_err = icmap_set_uint8_r(config_map, path, val)) != CS_OK) { + goto icmap_set_error; + } + add_as_string = 0; + } + break; + case MAIN_CP_CB_DATA_STATE_QDEVICE: + if ((strcmp(path, "quorum.device.timeout") == 0) || + (strcmp(path, "quorum.device.sync_timeout") == 0) || + (strcmp(path, "quorum.device.votes") == 0)) { + val_type = ICMAP_VALUETYPE_UINT32; + if (safe_atoq(value, &val, val_type) != 0) { + goto atoi_error; + } + if ((cs_err = icmap_set_uint32_r(config_map, path, val)) != CS_OK) { + goto icmap_set_error; + } + add_as_string = 0; + } + if ((strcmp(path, "quorum.device.master_wins") == 0)) { + val_type = ICMAP_VALUETYPE_UINT8; + if (safe_atoq(value, &val, val_type) != 0) { + goto atoi_error; + } + if ((cs_err = icmap_set_uint8_r(config_map, path, val)) != CS_OK) { + goto icmap_set_error; + } + add_as_string = 0; + } + break; + case MAIN_CP_CB_DATA_STATE_TOTEM: + if ((strcmp(path, "totem.version") == 0) || + (strcmp(path, "totem.nodeid") == 0) || + (strcmp(path, "totem.threads") == 0) || + (strcmp(path, "totem.token") == 0) || + (strcmp(path, "totem.token_coefficient") == 0) || + (strcmp(path, "totem.token_retransmit") == 0) || + (strcmp(path, "totem.token_warning") == 0) || + (strcmp(path, "totem.hold") == 0) || + (strcmp(path, "totem.token_retransmits_before_loss_const") == 0) || + (strcmp(path, "totem.join") == 0) || + (strcmp(path, "totem.send_join") == 0) || + (strcmp(path, "totem.consensus") == 0) || + (strcmp(path, "totem.merge") == 0) || + (strcmp(path, "totem.downcheck") == 0) || + (strcmp(path, "totem.fail_recv_const") == 0) || + (strcmp(path, "totem.seqno_unchanged_const") == 0) || + (strcmp(path, "totem.rrp_token_expired_timeout") == 0) || + (strcmp(path, "totem.rrp_problem_count_timeout") == 0) || + (strcmp(path, "totem.rrp_problem_count_threshold") == 0) || + (strcmp(path, "totem.rrp_problem_count_mcast_threshold") == 0) || + (strcmp(path, "totem.rrp_autorecovery_check_timeout") == 0) || + (strcmp(path, "totem.heartbeat_failures_allowed") == 0) || + (strcmp(path, "totem.max_network_delay") == 0) || + (strcmp(path, "totem.window_size") == 0) || + (strcmp(path, "totem.max_messages") == 0) || + (strcmp(path, "totem.miss_count_const") == 0) || + (strcmp(path, "totem.knet_pmtud_interval") == 0) || + (strcmp(path, "totem.knet_mtu") == 0) || + (strcmp(path, "totem.knet_compression_threshold") == 0) || + (strcmp(path, "totem.netmtu") == 0)) { + val_type = ICMAP_VALUETYPE_UINT32; + if (safe_atoq(value, &val, val_type) != 0) { + goto atoi_error; + } + if ((cs_err = icmap_set_uint32_r(config_map,path, val)) != CS_OK) { + goto icmap_set_error; + } + add_as_string = 0; + } + if (strcmp(path, "totem.knet_compression_level") == 0) { + val_type = ICMAP_VALUETYPE_INT32; + if (safe_atoq(value, &val, val_type) != 0) { + goto atoi_error; + } + if ((cs_err = icmap_set_int32_r(config_map, path, val)) != CS_OK) { + goto icmap_set_error; + } + add_as_string = 0; + } + if (strcmp(path, "totem.config_version") == 0) { + if (str_to_ull(value, &ull) != 0) { + goto atoi_error; + } + if ((cs_err = icmap_set_uint64_r(config_map, path, ull)) != CS_OK) { + goto icmap_set_error; + } + add_as_string = 0; + } + if (strcmp(path, "totem.ip_version") == 0) { + if ((strcmp(value, "ipv4") != 0) && + (strcmp(value, "ipv6") != 0) && + (strcmp(value, "ipv6-4") != 0) && + (strcmp(value, "ipv4-6") != 0)) { + *error_string = "Invalid ip_version type"; + + return (0); + } + } + if (strcmp(path, "totem.crypto_model") == 0) { + if (handle_crypto_model(value, error_string) != 0) { + return (0); + } + } + + if (strcmp(path, "totem.crypto_cipher") == 0) { + if ((strcmp(value, "none") != 0) && + (strcmp(value, "aes256") != 0) && + (strcmp(value, "aes192") != 0) && + (strcmp(value, "aes128") != 0)) { + *error_string = "Invalid cipher type. " + "Should be none, aes256, aes192 or aes128"; + + return (0); + } + } + if (strcmp(path, "totem.crypto_hash") == 0) { + if ((strcmp(value, "none") != 0) && + (strcmp(value, "md5") != 0) && + (strcmp(value, "sha1") != 0) && + (strcmp(value, "sha256") != 0) && + (strcmp(value, "sha384") != 0) && + (strcmp(value, "sha512") != 0)) { + *error_string = "Invalid hash type. " + "Should be none, md5, sha1, sha256, sha384 or sha512"; + + return (0); + } + } + + if (strcmp(path, "totem.knet_compression_model") == 0) { + if (handle_compress_model(value, error_string) != 0) { + return (0); + } + } + + break; + + case MAIN_CP_CB_DATA_STATE_SYSTEM: + if (strcmp(path, "system.qb_ipc_type") == 0) { + if ((strcmp(value, "native") != 0) && + (strcmp(value, "shm") != 0) && + (strcmp(value, "socket") != 0)) { + *error_string = "Invalid system.qb_ipc_type"; + + return (0); + } + } + if (strcmp(path, "system.sched_rr") == 0) { + if ((strcmp(value, "yes") != 0) && + (strcmp(value, "no") != 0)) { + *error_string = "Invalid system.sched_rr value"; + + return (0); + } + } + if (strcmp(path, "system.move_to_root_cgroup") == 0) { + if ((strcmp(value, "yes") != 0) && + (strcmp(value, "no") != 0) && + (strcmp(value, "auto") != 0)) { + *error_string = "Invalid system.move_to_root_cgroup"; + + return (0); + } + } + if (strcmp(path, "system.allow_knet_handle_fallback") == 0) { + if ((strcmp(value, "yes") != 0) && + (strcmp(value, "no") != 0)) { + *error_string = "Invalid system.allow_knet_handle_fallback"; + + return (0); + } + } + break; + + case MAIN_CP_CB_DATA_STATE_INTERFACE: + if (strcmp(path, "totem.interface.linknumber") == 0) { + val_type = ICMAP_VALUETYPE_UINT8; + if (safe_atoq(value, &val, val_type) != 0) { + goto atoi_error; + } + + data->linknumber = val; + add_as_string = 0; + } + if (strcmp(path, "totem.interface.bindnetaddr") == 0) { + data->bindnetaddr = strdup(value); + add_as_string = 0; + } + if (strcmp(path, "totem.interface.mcastaddr") == 0) { + data->mcastaddr = strdup(value); + add_as_string = 0; + } + if (strcmp(path, "totem.interface.broadcast") == 0) { + data->broadcast = strdup(value); + add_as_string = 0; + } + if (strcmp(path, "totem.interface.mcastport") == 0) { + val_type = ICMAP_VALUETYPE_UINT16; + if (safe_atoq(value, &val, val_type) != 0) { + goto atoi_error; + } + data->mcastport = val; + add_as_string = 0; + } + if (strcmp(path, "totem.interface.ttl") == 0) { + val_type = ICMAP_VALUETYPE_UINT8; + if (safe_atoq(value, &val, val_type) != 0) { + goto atoi_error; + } + data->ttl = val; + add_as_string = 0; + } + if (strcmp(path, "totem.interface.knet_link_priority") == 0) { + val_type = ICMAP_VALUETYPE_UINT8; + if (safe_atoq(value, &val, val_type) != 0) { + goto atoi_error; + } + data->knet_link_priority = val; + add_as_string = 0; + } + if (strcmp(path, "totem.interface.knet_ping_interval") == 0) { + val_type = ICMAP_VALUETYPE_UINT32; + if (safe_atoq(value, &val, val_type) != 0) { + goto atoi_error; + } + data->knet_ping_interval = val; + add_as_string = 0; + } + if (strcmp(path, "totem.interface.knet_ping_timeout") == 0) { + val_type = ICMAP_VALUETYPE_UINT32; + if (safe_atoq(value, &val, val_type) != 0) { + goto atoi_error; + } + data->knet_ping_timeout = val; + add_as_string = 0; + } + if (strcmp(path, "totem.interface.knet_ping_precision") == 0) { + val_type = ICMAP_VALUETYPE_UINT32; + if (safe_atoq(value, &val, val_type) != 0) { + goto atoi_error; + } + data->knet_ping_precision = val; + add_as_string = 0; + } + if (strcmp(path, "totem.interface.knet_pong_count") == 0) { + val_type = ICMAP_VALUETYPE_UINT32; + if (safe_atoq(value, &val, val_type) != 0) { + goto atoi_error; + } + data->knet_pong_count = val; + add_as_string = 0; + } + if (strcmp(path, "totem.interface.knet_transport") == 0) { + val_type = ICMAP_VALUETYPE_STRING; + data->knet_transport = strdup(value); + add_as_string = 0; + } + break; + case MAIN_CP_CB_DATA_STATE_LOGGER_SUBSYS: + if (strcmp(key, "subsys") == 0) { + data->subsys = strdup(value); + if (data->subsys == NULL) { + *error_string = "Can't alloc memory"; + + return (0); + } + } else { + kv_item = malloc(sizeof(*kv_item)); + if (kv_item == NULL) { + *error_string = "Can't alloc memory"; + + return (0); + } + memset(kv_item, 0, sizeof(*kv_item)); + + kv_item->key = strdup(key); + kv_item->value = strdup(value); + if (kv_item->key == NULL || kv_item->value == NULL) { + free(kv_item); + *error_string = "Can't alloc memory"; + + return (0); + } + qb_list_init(&kv_item->list); + qb_list_add(&kv_item->list, &data->logger_subsys_items_head); + } + add_as_string = 0; + break; + case MAIN_CP_CB_DATA_STATE_LOGGING_DAEMON: + if (strcmp(key, "subsys") == 0) { + data->subsys = strdup(value); + if (data->subsys == NULL) { + *error_string = "Can't alloc memory"; + + return (0); + } + } else if (strcmp(key, "name") == 0) { + data->logging_daemon_name = strdup(value); + if (data->logging_daemon_name == NULL) { + *error_string = "Can't alloc memory"; + + return (0); + } + } else { + kv_item = malloc(sizeof(*kv_item)); + if (kv_item == NULL) { + *error_string = "Can't alloc memory"; + + return (0); + } + memset(kv_item, 0, sizeof(*kv_item)); + + kv_item->key = strdup(key); + kv_item->value = strdup(value); + if (kv_item->key == NULL || kv_item->value == NULL) { + free(kv_item); + *error_string = "Can't alloc memory"; + + return (0); + } + qb_list_init(&kv_item->list); + qb_list_add(&kv_item->list, &data->logger_subsys_items_head); + } + add_as_string = 0; + break; + case MAIN_CP_CB_DATA_STATE_UIDGID: + if (strcmp(key, "uid") == 0) { + uid = uid_determine(value); + if (uid == -1) { + *error_string = error_string_response; + return (0); + } + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "uidgid.config.uid.%u", + uid); + if ((cs_err = icmap_set_uint8_r(config_map, key_name, 1)) != CS_OK) { + goto icmap_set_error; + } + add_as_string = 0; + } else if (strcmp(key, "gid") == 0) { + gid = gid_determine(value); + if (gid == -1) { + *error_string = error_string_response; + return (0); + } + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "uidgid.config.gid.%u", + gid); + if ((cs_err = icmap_set_uint8_r(config_map, key_name, 1)) != CS_OK) { + goto icmap_set_error; + } + add_as_string = 0; + } else { + *error_string = "uidgid: Only uid and gid are allowed items"; + return (0); + } + break; + case MAIN_CP_CB_DATA_STATE_MEMBER: + if (strcmp(key, "memberaddr") != 0) { + *error_string = "Only memberaddr is allowed in member section"; + + return (0); + } + + kv_item = malloc(sizeof(*kv_item)); + if (kv_item == NULL) { + *error_string = "Can't alloc memory"; + + return (0); + } + memset(kv_item, 0, sizeof(*kv_item)); + + kv_item->key = strdup(key); + kv_item->value = strdup(value); + if (kv_item->key == NULL || kv_item->value == NULL) { + free(kv_item); + *error_string = "Can't alloc memory"; + + return (0); + } + qb_list_init(&kv_item->list); + qb_list_add(&kv_item->list, &data->member_items_head); + add_as_string = 0; + break; + case MAIN_CP_CB_DATA_STATE_NODELIST: + break; + case MAIN_CP_CB_DATA_STATE_NODELIST_NODE: + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.%s", data->node_number, key); + if ((strcmp(key, "nodeid") == 0) || + (strcmp(key, "quorum_votes") == 0)) { + val_type = ICMAP_VALUETYPE_UINT32; + if (safe_atoq(value, &val, val_type) != 0) { + goto atoi_error; + } + + if ((cs_err = icmap_set_uint32_r(config_map, key_name, val)) != CS_OK) { + goto icmap_set_error; + } + add_as_string = 0; + } + + if (add_as_string) { + if ((cs_err = icmap_set_string_r(config_map, key_name, value)) != CS_OK) { + goto icmap_set_error; + }; + add_as_string = 0; + } + break; + case MAIN_CP_CB_DATA_STATE_RESOURCES: + if (strcmp(key, "watchdog_timeout") == 0) { + val_type = ICMAP_VALUETYPE_UINT32; + if (safe_atoq(value, &val, val_type) != 0) { + goto atoi_error; + } + if ((cs_err = icmap_set_uint32_r(config_map,path, val)) != CS_OK) { + goto icmap_set_error; + } + add_as_string = 0; + } + break; + case MAIN_CP_CB_DATA_STATE_RESOURCES_SYSTEM: + case MAIN_CP_CB_DATA_STATE_RESOURCES_SYSTEM_MEMUSED: + if (strcmp(key, "poll_period") == 0) { + if (str_to_ull(value, &ull) != 0) { + goto atoi_error; + } + if ((cs_err = icmap_set_uint64_r(config_map,path, ull)) != CS_OK) { + goto icmap_set_error; + } + add_as_string = 0; + } + break; + case MAIN_CP_CB_DATA_STATE_RESOURCES_PROCESS: + case MAIN_CP_CB_DATA_STATE_RESOURCES_PROCESS_MEMUSED: + if (strcmp(key, "poll_period") == 0) { + if (str_to_ull(value, &ull) != 0) { + goto atoi_error; + } + if ((cs_err = icmap_set_uint64_r(config_map,path, ull)) != CS_OK) { + goto icmap_set_error; + } + add_as_string = 0; + } + break; + } + + if (add_as_string) { + if ((cs_err = icmap_set_string_r(config_map, path, value)) != CS_OK) { + goto icmap_set_error; + } + } + break; + case PARSER_CB_SECTION_START: + if (strcmp(path, "totem.interface") == 0) { + *state = MAIN_CP_CB_DATA_STATE_INTERFACE; + data->linknumber = 0; + data->mcastport = -1; + data->ttl = -1; + data->knet_link_priority = -1; + data->knet_ping_interval = -1; + data->knet_ping_timeout = -1; + data->knet_ping_precision = -1; + data->knet_pong_count = -1; + data->knet_transport = NULL; + qb_list_init(&data->member_items_head); + }; + if (strcmp(path, "totem") == 0) { + *state = MAIN_CP_CB_DATA_STATE_TOTEM; + }; + if (strcmp(path, "system") == 0) { + *state = MAIN_CP_CB_DATA_STATE_SYSTEM; + } + if (strcmp(path, "logging.logger_subsys") == 0) { + *state = MAIN_CP_CB_DATA_STATE_LOGGER_SUBSYS; + qb_list_init(&data->logger_subsys_items_head); + data->subsys = NULL; + } + if (strcmp(path, "logging.logging_daemon") == 0) { + *state = MAIN_CP_CB_DATA_STATE_LOGGING_DAEMON; + qb_list_init(&data->logger_subsys_items_head); + data->subsys = NULL; + data->logging_daemon_name = NULL; + } + if (strcmp(path, "uidgid") == 0) { + *state = MAIN_CP_CB_DATA_STATE_UIDGID; + } + if (strcmp(path, "totem.interface.member") == 0) { + *state = MAIN_CP_CB_DATA_STATE_MEMBER; + } + if (strcmp(path, "quorum") == 0) { + *state = MAIN_CP_CB_DATA_STATE_QUORUM; + } + if (strcmp(path, "quorum.device") == 0) { + *state = MAIN_CP_CB_DATA_STATE_QDEVICE; + } + if (strcmp(path, "nodelist") == 0) { + *state = MAIN_CP_CB_DATA_STATE_NODELIST; + data->node_number = 0; + } + if (strcmp(path, "nodelist.node") == 0) { + *state = MAIN_CP_CB_DATA_STATE_NODELIST_NODE; + } + if (strcmp(path, "resources") == 0) { + *state = MAIN_CP_CB_DATA_STATE_RESOURCES; + } + if (strcmp(path, "resources.system") == 0) { + *state = MAIN_CP_CB_DATA_STATE_RESOURCES_SYSTEM; + } + if (strcmp(path, "resources.system.memory_used") == 0) { + *state = MAIN_CP_CB_DATA_STATE_RESOURCES_SYSTEM_MEMUSED; + } + if (strcmp(path, "resources.process") == 0) { + *state = MAIN_CP_CB_DATA_STATE_RESOURCES_PROCESS; + } + if (strcmp(path, "resources.process.memory_used") == 0) { + *state = MAIN_CP_CB_DATA_STATE_RESOURCES_PROCESS_MEMUSED; + } + break; + case PARSER_CB_SECTION_END: + switch (*state) { + case MAIN_CP_CB_DATA_STATE_INTERFACE: + /* + * Create new interface section + */ + if (data->bindnetaddr != NULL) { + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.bindnetaddr", + data->linknumber); + cs_err = icmap_set_string_r(config_map, key_name, data->bindnetaddr); + + free(data->bindnetaddr); + data->bindnetaddr = NULL; + + if (cs_err != CS_OK) { + goto icmap_set_error; + } + } + + if (data->mcastaddr != NULL) { + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastaddr", + data->linknumber); + cs_err = icmap_set_string_r(config_map, key_name, data->mcastaddr); + + free(data->mcastaddr); + data->mcastaddr = NULL; + + if (cs_err != CS_OK) { + goto icmap_set_error; + } + } + + if (data->broadcast != NULL) { + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.broadcast", + data->linknumber); + cs_err = icmap_set_string_r(config_map, key_name, data->broadcast); + + free(data->broadcast); + data->broadcast = NULL; + + if (cs_err != CS_OK) { + goto icmap_set_error; + } + } + + if (data->mcastport > -1) { + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastport", + data->linknumber); + if ((cs_err = icmap_set_uint16_r(config_map, key_name, + data->mcastport)) != CS_OK) { + goto icmap_set_error; + } + } + + if (data->ttl > -1) { + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.ttl", + data->linknumber); + if ((cs_err = icmap_set_uint8_r(config_map, key_name, data->ttl)) != CS_OK) { + goto icmap_set_error; + } + } + if (data->knet_link_priority > -1) { + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.knet_link_priority", + data->linknumber); + if ((cs_err = icmap_set_uint8_r(config_map, key_name, + data->knet_link_priority)) != CS_OK) { + goto icmap_set_error; + } + } + if (data->knet_ping_interval > -1) { + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.knet_ping_interval", + data->linknumber); + if ((cs_err = icmap_set_uint32_r(config_map, key_name, + data->knet_ping_interval)) != CS_OK) { + goto icmap_set_error; + } + } + if (data->knet_ping_timeout > -1) { + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.knet_ping_timeout", + data->linknumber); + if ((cs_err = icmap_set_uint32_r(config_map, key_name, + data->knet_ping_timeout)) != CS_OK) { + goto icmap_set_error; + } + } + if (data->knet_ping_precision > -1) { + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.knet_ping_precision", + data->linknumber); + if ((cs_err = icmap_set_uint32_r(config_map, key_name, + data->knet_ping_precision)) != CS_OK) { + goto icmap_set_error; + } + } + if (data->knet_pong_count > -1) { + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.knet_pong_count", + data->linknumber); + if ((cs_err = icmap_set_uint32_r(config_map, key_name, + data->knet_pong_count)) != CS_OK) { + goto icmap_set_error; + } + } + if (data->knet_transport) { + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.knet_transport", + data->linknumber); + cs_err = icmap_set_string_r(config_map, key_name, data->knet_transport); + free(data->knet_transport); + + if (cs_err != CS_OK) { + goto icmap_set_error; + } + } + + ii = 0; + + qb_list_for_each_safe(iter, tmp_iter, &(data->member_items_head)) { + kv_item = qb_list_entry(iter, struct key_value_list_item, list); + + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.member.%u", + data->linknumber, ii); + cs_err = icmap_set_string_r(config_map, key_name, kv_item->value); + + free(kv_item->value); + free(kv_item->key); + free(kv_item); + ii++; + + if (cs_err != CS_OK) { + goto icmap_set_error; + } + } + + break; + case MAIN_CP_CB_DATA_STATE_LOGGER_SUBSYS: + if (data->subsys == NULL) { + *error_string = "No subsys key in logger_subsys directive"; + + return (0); + } + + qb_list_for_each_safe(iter, tmp_iter, &(data->logger_subsys_items_head)) { + kv_item = qb_list_entry(iter, struct key_value_list_item, list); + + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logger_subsys.%s.%s", + data->subsys, kv_item->key); + cs_err = icmap_set_string_r(config_map, key_name, kv_item->value); + + free(kv_item->value); + free(kv_item->key); + free(kv_item); + + if (cs_err != CS_OK) { + goto icmap_set_error; + } + } + + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logger_subsys.%s.subsys", + data->subsys); + cs_err = icmap_set_string_r(config_map, key_name, data->subsys); + + free(data->subsys); + + if (cs_err != CS_OK) { + goto icmap_set_error; + } + break; + case MAIN_CP_CB_DATA_STATE_LOGGING_DAEMON: + if (data->logging_daemon_name == NULL) { + *error_string = "No name key in logging_daemon directive"; + + return (0); + } + + qb_list_for_each_safe(iter, tmp_iter, &(data->logger_subsys_items_head)) { + kv_item = qb_list_entry(iter, struct key_value_list_item, list); + + if (data->subsys == NULL) { + if (strcmp(data->logging_daemon_name, "corosync") == 0) { + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, + "logging.%s", + kv_item->key); + } else { + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, + "logging.logging_daemon.%s.%s", + data->logging_daemon_name, kv_item->key); + } + } else { + if (strcmp(data->logging_daemon_name, "corosync") == 0) { + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, + "logging.logger_subsys.%s.%s", + data->subsys, + kv_item->key); + } else { + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, + "logging.logging_daemon.%s.%s.%s", + data->logging_daemon_name, data->subsys, + kv_item->key); + } + } + cs_err = icmap_set_string_r(config_map, key_name, kv_item->value); + + free(kv_item->value); + free(kv_item->key); + free(kv_item); + + if (cs_err != CS_OK) { + goto icmap_set_error; + } + } + + if (data->subsys == NULL) { + if (strcmp(data->logging_daemon_name, "corosync") != 0) { + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logging_daemon.%s.name", + data->logging_daemon_name); + cs_err = icmap_set_string_r(config_map, key_name, data->logging_daemon_name); + } + } else { + if (strcmp(data->logging_daemon_name, "corosync") == 0) { + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logger_subsys.%s.subsys", + data->subsys); + cs_err = icmap_set_string_r(config_map, key_name, data->subsys); + + } else { + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logging_daemon.%s.%s.subsys", + data->logging_daemon_name, data->subsys); + cs_err = icmap_set_string_r(config_map, key_name, data->subsys); + + if (cs_err != CS_OK) { + free(data->subsys); + free(data->logging_daemon_name); + + goto icmap_set_error; + } + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logging_daemon.%s.%s.name", + data->logging_daemon_name, data->subsys); + cs_err = icmap_set_string_r(config_map, key_name, data->logging_daemon_name); + } + } + + free(data->subsys); + free(data->logging_daemon_name); + + if (cs_err != CS_OK) { + goto icmap_set_error; + } + break; + case MAIN_CP_CB_DATA_STATE_NODELIST_NODE: + data->node_number++; + break; + case MAIN_CP_CB_DATA_STATE_NORMAL: + case MAIN_CP_CB_DATA_STATE_PLOAD: + case MAIN_CP_CB_DATA_STATE_UIDGID: + case MAIN_CP_CB_DATA_STATE_MEMBER: + case MAIN_CP_CB_DATA_STATE_QUORUM: + case MAIN_CP_CB_DATA_STATE_QDEVICE: + case MAIN_CP_CB_DATA_STATE_NODELIST: + case MAIN_CP_CB_DATA_STATE_TOTEM: + case MAIN_CP_CB_DATA_STATE_SYSTEM: + break; + case MAIN_CP_CB_DATA_STATE_RESOURCES: + *state = MAIN_CP_CB_DATA_STATE_NORMAL; + break; + case MAIN_CP_CB_DATA_STATE_RESOURCES_SYSTEM: + *state = MAIN_CP_CB_DATA_STATE_RESOURCES; + break; + case MAIN_CP_CB_DATA_STATE_RESOURCES_SYSTEM_MEMUSED: + *state = MAIN_CP_CB_DATA_STATE_RESOURCES_SYSTEM; + break; + case MAIN_CP_CB_DATA_STATE_RESOURCES_PROCESS: + *state = MAIN_CP_CB_DATA_STATE_RESOURCES; + break; + case MAIN_CP_CB_DATA_STATE_RESOURCES_PROCESS_MEMUSED: + *state = MAIN_CP_CB_DATA_STATE_RESOURCES_PROCESS; + break; + } + break; + } + + return (1); + +atoi_error: + min_val = max_val = 0; + /* + * This is really assert, because developer ether doesn't set val_type correctly or + * we've got here after some nasty memory overwrite + */ + assert(safe_atoq_range(val_type, &min_val, &max_val) == 0); + + if (snprintf(formated_err, sizeof(formated_err), + "Value of key \"%s\" is expected to be integer in range (%lld..%lld), but \"%s\" was given", + key_name, min_val, max_val, value) >= sizeof(formated_err)) { + *error_string = "Can't format parser error message"; + } else { + *error_string = formated_err; + } + + return (0); + +icmap_set_error: + if (snprintf(formated_err, sizeof(formated_err), + "Can't store key \"%s\" into icmap, returned error is %s", + key_name, cs_strerror(cs_err)) >= sizeof(formated_err)) { + *error_string = "Can't format parser error message"; + } else { + *error_string = formated_err; + } + + return (0); +} + +static int uidgid_config_parser_cb(const char *path, + char *key, + char *value, + enum main_cp_cb_data_state *state, + enum parser_cb_type type, + const char **error_string, + icmap_map_t config_map, + void *user_data) +{ + char key_name[ICMAP_KEYNAME_MAXLEN]; + int uid, gid; + static char formated_err[256]; + cs_error_t cs_err; + + switch (type) { + case PARSER_CB_START: + break; + case PARSER_CB_END: + break; + case PARSER_CB_ITEM: + if (strcmp(path, "uidgid.uid") == 0) { + uid = uid_determine(value); + if (uid == -1) { + *error_string = error_string_response; + return (0); + } + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "uidgid.config.uid.%u", + uid); + if ((cs_err = icmap_set_uint8_r(config_map, key_name, 1)) != CS_OK) { + goto icmap_set_error; + } + } else if (strcmp(path, "uidgid.gid") == 0) { + gid = gid_determine(value); + if (gid == -1) { + *error_string = error_string_response; + return (0); + } + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "uidgid.config.gid.%u", + gid); + if ((cs_err = icmap_set_uint8_r(config_map, key_name, 1)) != CS_OK) { + goto icmap_set_error; + } + } else { + *error_string = "uidgid: Only uid and gid are allowed items"; + return (0); + } + break; + case PARSER_CB_SECTION_START: + if (strcmp(path, "uidgid") != 0) { + *error_string = "uidgid: Can't add subsection different than uidgid"; + return (0); + }; + break; + case PARSER_CB_SECTION_END: + break; + } + + return (1); + +icmap_set_error: + if (snprintf(formated_err, sizeof(formated_err), + "Can't store key \"%s\" into icmap, returned error is %s", + key_name, cs_strerror(cs_err)) >= sizeof(formated_err)) { + *error_string = "Can't format parser error message"; + } else { + *error_string = formated_err; + } + + return (0); +} + +static int read_uidgid_files_into_icmap( + const char **error_string, + icmap_map_t config_map) +{ + FILE *fp; + char *dirname_res; + DIR *dp; + struct dirent *dirent; + char filename[PATH_MAX + FILENAME_MAX + 1]; + char uidgid_dirname[PATH_MAX + FILENAME_MAX + 1]; + int res = 0; + struct stat stat_buf; + enum main_cp_cb_data_state state = MAIN_CP_CB_DATA_STATE_NORMAL; + char key_name[ICMAP_KEYNAME_MAXLEN]; + int line_no; + + /* + * Build uidgid directory based on corosync.conf file location + */ + res = snprintf(filename, sizeof(filename), "%s", + corosync_get_config_file()); + if (res >= sizeof(filename)) { + *error_string = "uidgid.d path too long"; + + return (-1); + } + + dirname_res = dirname(filename); + + res = snprintf(uidgid_dirname, sizeof(uidgid_dirname), "%s/%s", + dirname_res, "uidgid.d"); + if (res >= sizeof(uidgid_dirname)) { + *error_string = "uidgid.d path too long"; + + return (-1); + } + + dp = opendir (uidgid_dirname); + + if (dp == NULL) + return 0; + + for (dirent = readdir(dp); + dirent != NULL; + dirent = readdir(dp)) { + + res = snprintf(filename, sizeof (filename), "%s/%s", uidgid_dirname, dirent->d_name); + if (res >= sizeof(filename)) { + res = -1; + *error_string = "uidgid.d dirname path too long"; + + goto error_exit; + } + res = stat (filename, &stat_buf); + if (res == 0 && S_ISREG(stat_buf.st_mode)) { + + fp = fopen (filename, "r"); + if (fp == NULL) continue; + + key_name[0] = 0; + + line_no = 0; + res = parse_section(fp, filename, &line_no, key_name, error_string, 0, state, + uidgid_config_parser_cb, config_map, NULL); + + fclose (fp); + + if (res != 0) { + goto error_exit; + } + } + } + +error_exit: + closedir(dp); + + return res; +} + +/* Read config file and load into icmap */ +static int read_config_file_into_icmap( + const char **error_string, + icmap_map_t config_map) +{ + FILE *fp; + const char *filename; + char *error_reason = error_string_response; + int res; + char key_name[ICMAP_KEYNAME_MAXLEN]; + struct main_cp_cb_data data; + enum main_cp_cb_data_state state = MAIN_CP_CB_DATA_STATE_NORMAL; + int line_no; + + filename = corosync_get_config_file(); + + fp = fopen (filename, "r"); + if (fp == NULL) { + char error_str[100]; + const char *error_ptr = qb_strerror_r(errno, error_str, sizeof(error_str)); + snprintf (error_reason, sizeof(error_string_response), + "Can't read file %s: %s", + filename, error_ptr); + *error_string = error_reason; + return -1; + } + + key_name[0] = 0; + + line_no = 0; + res = parse_section(fp, filename, &line_no, key_name, error_string, 0, state, + main_config_parser_cb, config_map, &data); + + fclose(fp); + + if (res == 0) { + res = read_uidgid_files_into_icmap(error_string, config_map); + } + + if (res == 0) { + snprintf (error_reason, sizeof(error_string_response), + "Successfully read main configuration file '%s'.", filename); + *error_string = error_reason; + } + + return res; +} diff --git a/exec/cpg.c b/exec/cpg.c new file mode 100644 index 0000000..0439d14 --- /dev/null +++ b/exec/cpg.c @@ -0,0 +1,2344 @@ +/* + * Copyright (c) 2006-2019 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Christine Caulfield (ccaulfie@redhat.com) + * Author: Jan Friesse (jfriesse@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <config.h> + +#ifdef HAVE_ALLOCA_H +#include <alloca.h> +#endif +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <sys/ioctl.h> +#include <netinet/in.h> +#include <sys/uio.h> +#include <unistd.h> +#include <fcntl.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <time.h> +#include <assert.h> +#include <arpa/inet.h> +#include <sys/mman.h> + +#include <qb/qblist.h> +#include <qb/qbmap.h> + +#include <corosync/corotypes.h> +#include <qb/qbipc_common.h> +#include <corosync/corodefs.h> +#include <corosync/logsys.h> +#include <corosync/coroapi.h> + +#include <corosync/cpg.h> +#include <corosync/ipc_cpg.h> + +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif + +#include "service.h" + +LOGSYS_DECLARE_SUBSYS ("CPG"); + +#define GROUP_HASH_SIZE 32 + +enum cpg_message_req_types { + MESSAGE_REQ_EXEC_CPG_PROCJOIN = 0, + MESSAGE_REQ_EXEC_CPG_PROCLEAVE = 1, + MESSAGE_REQ_EXEC_CPG_JOINLIST = 2, + MESSAGE_REQ_EXEC_CPG_MCAST = 3, + MESSAGE_REQ_EXEC_CPG_DOWNLIST_OLD = 4, + MESSAGE_REQ_EXEC_CPG_DOWNLIST = 5, + MESSAGE_REQ_EXEC_CPG_PARTIAL_MCAST = 6, +}; + +struct zcb_mapped { + struct qb_list_head list; + void *addr; + size_t size; +}; +/* + * state` exec deliver + * match group name, pid -> if matched deliver for YES: + * XXX indicates impossible state + * + * join leave mcast + * UNJOINED XXX XXX NO + * LEAVE_STARTED XXX YES(unjoined_enter) YES + * JOIN_STARTED YES(join_started_enter) XXX NO + * JOIN_COMPLETED XXX NO YES + * + * join_started_enter + * set JOIN_COMPLETED + * add entry to process_info list + * unjoined_enter + * set UNJOINED + * delete entry from process_info list + * + * + * library accept join error codes + * UNJOINED YES(CS_OK) set JOIN_STARTED + * LEAVE_STARTED NO(CS_ERR_BUSY) + * JOIN_STARTED NO(CS_ERR_EXIST) + * JOIN_COMPlETED NO(CS_ERR_EXIST) + * + * library accept leave error codes + * UNJOINED NO(CS_ERR_NOT_EXIST) + * LEAVE_STARTED NO(CS_ERR_NOT_EXIST) + * JOIN_STARTED NO(CS_ERR_BUSY) + * JOIN_COMPLETED YES(CS_OK) set LEAVE_STARTED + * + * library accept mcast + * UNJOINED NO(CS_ERR_NOT_EXIST) + * LEAVE_STARTED NO(CS_ERR_NOT_EXIST) + * JOIN_STARTED YES(CS_OK) + * JOIN_COMPLETED YES(CS_OK) + */ +enum cpd_state { + CPD_STATE_UNJOINED, + CPD_STATE_LEAVE_STARTED, + CPD_STATE_JOIN_STARTED, + CPD_STATE_JOIN_COMPLETED +}; + +enum cpg_sync_state { + CPGSYNC_DOWNLIST, + CPGSYNC_JOINLIST +}; + +static struct qb_list_head joinlist_messages_head; + +struct cpg_pd { + void *conn; + mar_cpg_name_t group_name; + uint32_t pid; + enum cpd_state cpd_state; + unsigned int flags; + int initial_totem_conf_sent; + uint64_t transition_counter; /* These two are used when sending fragmented messages */ + uint64_t initial_transition_counter; + struct qb_list_head list; + struct qb_list_head iteration_instance_list_head; + struct qb_list_head zcb_mapped_list_head; +}; + +struct cpg_iteration_instance { + hdb_handle_t handle; + struct qb_list_head list; + struct qb_list_head items_list_head; /* List of process_info */ + struct qb_list_head *current_pointer; +}; + +DECLARE_HDB_DATABASE(cpg_iteration_handle_t_db,NULL); + +QB_LIST_DECLARE (cpg_pd_list_head); + +static unsigned int my_member_list[PROCESSOR_COUNT_MAX]; + +static unsigned int my_member_list_entries; + +static unsigned int my_old_member_list[PROCESSOR_COUNT_MAX]; + +static unsigned int my_old_member_list_entries = 0; + +static struct corosync_api_v1 *api = NULL; + +static enum cpg_sync_state my_sync_state = CPGSYNC_DOWNLIST; + +static mar_cpg_ring_id_t last_sync_ring_id; + +struct process_info { + unsigned int nodeid; + uint32_t pid; + mar_cpg_name_t group; + struct qb_list_head list; /* on the group_info members list */ +}; +QB_LIST_DECLARE (process_info_list_head); + +struct join_list_entry { + uint32_t pid; + mar_cpg_name_t group_name; +}; + +struct join_list_confchg_data { + mar_cpg_name_t cpg_group; + mar_cpg_address_t join_list[CPG_MEMBERS_MAX]; + int join_list_entries; +}; + +/* + * Service Interfaces required by service_message_handler struct + */ +static char *cpg_exec_init_fn (struct corosync_api_v1 *); + +static int cpg_lib_init_fn (void *conn); + +static int cpg_lib_exit_fn (void *conn); + +static void message_handler_req_exec_cpg_procjoin ( + const void *message, + unsigned int nodeid); + +static void message_handler_req_exec_cpg_procleave ( + const void *message, + unsigned int nodeid); + +static void message_handler_req_exec_cpg_joinlist ( + const void *message, + unsigned int nodeid); + +static void message_handler_req_exec_cpg_mcast ( + const void *message, + unsigned int nodeid); + +static void message_handler_req_exec_cpg_partial_mcast ( + const void *message, + unsigned int nodeid); + +static void message_handler_req_exec_cpg_downlist_old ( + const void *message, + unsigned int nodeid); + +static void message_handler_req_exec_cpg_downlist ( + const void *message, + unsigned int nodeid); + +static void exec_cpg_procjoin_endian_convert (void *msg); + +static void exec_cpg_joinlist_endian_convert (void *msg); + +static void exec_cpg_mcast_endian_convert (void *msg); + +static void exec_cpg_partial_mcast_endian_convert (void *msg); + +static void exec_cpg_downlist_endian_convert_old (void *msg); + +static void exec_cpg_downlist_endian_convert (void *msg); + +static void message_handler_req_lib_cpg_join (void *conn, const void *message); + +static void message_handler_req_lib_cpg_leave (void *conn, const void *message); + +static void message_handler_req_lib_cpg_finalize (void *conn, const void *message); + +static void message_handler_req_lib_cpg_mcast (void *conn, const void *message); + +static void message_handler_req_lib_cpg_partial_mcast (void *conn, const void *message); + +static void message_handler_req_lib_cpg_membership (void *conn, + const void *message); + +static void message_handler_req_lib_cpg_local_get (void *conn, + const void *message); + +static void message_handler_req_lib_cpg_iteration_initialize ( + void *conn, + const void *message); + +static void message_handler_req_lib_cpg_iteration_next ( + void *conn, + const void *message); + +static void message_handler_req_lib_cpg_iteration_finalize ( + void *conn, + const void *message); + +static void message_handler_req_lib_cpg_zc_alloc ( + void *conn, + const void *message); + +static void message_handler_req_lib_cpg_zc_free ( + void *conn, + const void *message); + +static void message_handler_req_lib_cpg_zc_execute ( + void *conn, + const void *message); + +static int cpg_node_joinleave_send (unsigned int pid, const mar_cpg_name_t *group_name, int fn, int reason); + +static int cpg_exec_send_downlist(void); + +static int cpg_exec_send_joinlist(void); + +static void downlist_inform_clients (void); + +static void joinlist_inform_clients (void); + +static void joinlist_messages_delete (void); + +static void cpg_sync_init ( + const unsigned int *trans_list, + size_t trans_list_entries, + const unsigned int *member_list, + size_t member_list_entries, + const struct memb_ring_id *ring_id); + +static int cpg_sync_process (void); + +static void cpg_sync_activate (void); + +static void cpg_sync_abort (void); + +static void do_proc_join( + const mar_cpg_name_t *name, + uint32_t pid, + unsigned int nodeid, + int reason, + qb_map_t *group_notify_map); + +static void do_proc_leave( + const mar_cpg_name_t *name, + uint32_t pid, + unsigned int nodeid, + int reason); + +static int notify_lib_totem_membership ( + void *conn, + int member_list_entries, + const unsigned int *member_list); + +static inline int zcb_all_free ( + struct cpg_pd *cpd); + +static char *cpg_print_group_name ( + const mar_cpg_name_t *group); + +/* + * Library Handler Definition + */ +static struct corosync_lib_handler cpg_lib_engine[] = +{ + { /* 0 - MESSAGE_REQ_CPG_JOIN */ + .lib_handler_fn = message_handler_req_lib_cpg_join, + .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED + }, + { /* 1 - MESSAGE_REQ_CPG_LEAVE */ + .lib_handler_fn = message_handler_req_lib_cpg_leave, + .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED + }, + { /* 2 - MESSAGE_REQ_CPG_MCAST */ + .lib_handler_fn = message_handler_req_lib_cpg_mcast, + .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED + }, + { /* 3 - MESSAGE_REQ_CPG_MEMBERSHIP */ + .lib_handler_fn = message_handler_req_lib_cpg_membership, + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 4 - MESSAGE_REQ_CPG_LOCAL_GET */ + .lib_handler_fn = message_handler_req_lib_cpg_local_get, + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 5 - MESSAGE_REQ_CPG_ITERATIONINITIALIZE */ + .lib_handler_fn = message_handler_req_lib_cpg_iteration_initialize, + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 6 - MESSAGE_REQ_CPG_ITERATIONNEXT */ + .lib_handler_fn = message_handler_req_lib_cpg_iteration_next, + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 7 - MESSAGE_REQ_CPG_ITERATIONFINALIZE */ + .lib_handler_fn = message_handler_req_lib_cpg_iteration_finalize, + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 8 - MESSAGE_REQ_CPG_FINALIZE */ + .lib_handler_fn = message_handler_req_lib_cpg_finalize, + .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED + }, + { /* 9 */ + .lib_handler_fn = message_handler_req_lib_cpg_zc_alloc, + .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED + }, + { /* 10 */ + .lib_handler_fn = message_handler_req_lib_cpg_zc_free, + .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED + }, + { /* 11 */ + .lib_handler_fn = message_handler_req_lib_cpg_zc_execute, + .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED + }, + { /* 12 */ + .lib_handler_fn = message_handler_req_lib_cpg_partial_mcast, + .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED + }, + +}; + +static struct corosync_exec_handler cpg_exec_engine[] = +{ + { /* 0 - MESSAGE_REQ_EXEC_CPG_PROCJOIN */ + .exec_handler_fn = message_handler_req_exec_cpg_procjoin, + .exec_endian_convert_fn = exec_cpg_procjoin_endian_convert + }, + { /* 1 - MESSAGE_REQ_EXEC_CPG_PROCLEAVE */ + .exec_handler_fn = message_handler_req_exec_cpg_procleave, + .exec_endian_convert_fn = exec_cpg_procjoin_endian_convert + }, + { /* 2 - MESSAGE_REQ_EXEC_CPG_JOINLIST */ + .exec_handler_fn = message_handler_req_exec_cpg_joinlist, + .exec_endian_convert_fn = exec_cpg_joinlist_endian_convert + }, + { /* 3 - MESSAGE_REQ_EXEC_CPG_MCAST */ + .exec_handler_fn = message_handler_req_exec_cpg_mcast, + .exec_endian_convert_fn = exec_cpg_mcast_endian_convert + }, + { /* 4 - MESSAGE_REQ_EXEC_CPG_DOWNLIST_OLD */ + .exec_handler_fn = message_handler_req_exec_cpg_downlist_old, + .exec_endian_convert_fn = exec_cpg_downlist_endian_convert_old + }, + { /* 5 - MESSAGE_REQ_EXEC_CPG_DOWNLIST */ + .exec_handler_fn = message_handler_req_exec_cpg_downlist, + .exec_endian_convert_fn = exec_cpg_downlist_endian_convert + }, + { /* 6 - MESSAGE_REQ_EXEC_CPG_PARTIAL_MCAST */ + .exec_handler_fn = message_handler_req_exec_cpg_partial_mcast, + .exec_endian_convert_fn = exec_cpg_partial_mcast_endian_convert + }, +}; + +struct corosync_service_engine cpg_service_engine = { + .name = "corosync cluster closed process group service v1.01", + .id = CPG_SERVICE, + .priority = 1, + .private_data_size = sizeof (struct cpg_pd), + .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED, + .allow_inquorate = CS_LIB_ALLOW_INQUORATE, + .lib_init_fn = cpg_lib_init_fn, + .lib_exit_fn = cpg_lib_exit_fn, + .lib_engine = cpg_lib_engine, + .lib_engine_count = sizeof (cpg_lib_engine) / sizeof (struct corosync_lib_handler), + .exec_init_fn = cpg_exec_init_fn, + .exec_dump_fn = NULL, + .exec_engine = cpg_exec_engine, + .exec_engine_count = sizeof (cpg_exec_engine) / sizeof (struct corosync_exec_handler), + .sync_init = cpg_sync_init, + .sync_process = cpg_sync_process, + .sync_activate = cpg_sync_activate, + .sync_abort = cpg_sync_abort +}; + +struct corosync_service_engine *cpg_get_service_engine_ver0 (void) +{ + return (&cpg_service_engine); +} + +struct req_exec_cpg_procjoin { + struct qb_ipc_request_header header __attribute__((aligned(8))); + mar_cpg_name_t group_name __attribute__((aligned(8))); + mar_uint32_t pid __attribute__((aligned(8))); + mar_uint32_t reason __attribute__((aligned(8))); +}; + +struct req_exec_cpg_mcast { + struct qb_ipc_request_header header __attribute__((aligned(8))); + mar_cpg_name_t group_name __attribute__((aligned(8))); + mar_uint32_t msglen __attribute__((aligned(8))); + mar_uint32_t pid __attribute__((aligned(8))); + mar_message_source_t source __attribute__((aligned(8))); + mar_uint8_t message[] __attribute__((aligned(8))); +}; + +struct req_exec_cpg_partial_mcast { + struct qb_ipc_request_header header __attribute__((aligned(8))); + mar_cpg_name_t group_name __attribute__((aligned(8))); + mar_uint32_t msglen __attribute__((aligned(8))); + mar_uint32_t fraglen __attribute__((aligned(8))); + mar_uint32_t pid __attribute__((aligned(8))); + mar_uint32_t type __attribute__((aligned(8))); + mar_message_source_t source __attribute__((aligned(8))); + mar_uint8_t message[] __attribute__((aligned(8))); +}; + +struct req_exec_cpg_downlist_old { + struct qb_ipc_request_header header __attribute__((aligned(8))); + mar_uint32_t left_nodes __attribute__((aligned(8))); + mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8))); +}; + +struct req_exec_cpg_downlist { + struct qb_ipc_request_header header __attribute__((aligned(8))); + /* merge decisions */ + mar_uint32_t old_members __attribute__((aligned(8))); + /* downlist below */ + mar_uint32_t left_nodes __attribute__((aligned(8))); + mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8))); +}; + +struct joinlist_msg { + mar_uint32_t sender_nodeid; + uint32_t pid; + mar_cpg_name_t group_name; + struct qb_list_head list; +}; + +static struct req_exec_cpg_downlist g_req_exec_cpg_downlist; + +/* + * Function print group name. It's not reentrant + */ +static char *cpg_print_group_name(const mar_cpg_name_t *group) +{ + static char res[CPG_MAX_NAME_LENGTH * 4 + 1]; + int dest_pos = 0; + char c; + int i; + + for (i = 0; i < group->length; i++) { + c = group->value[i]; + + if (c >= ' ' && c < 0x7f && c != '\\') { + res[dest_pos++] = c; + } else { + if (c == '\\') { + res[dest_pos++] = '\\'; + res[dest_pos++] = '\\'; + } else { + snprintf(res + dest_pos, sizeof(res) - dest_pos, "\\x%02X", c); + dest_pos += 4; + } + } + } + res[dest_pos] = 0; + + return (res); +} + +static void cpg_sync_init ( + const unsigned int *trans_list, + size_t trans_list_entries, + const unsigned int *member_list, + size_t member_list_entries, + const struct memb_ring_id *ring_id) +{ + int entries; + int i, j; + int found; + + my_sync_state = CPGSYNC_DOWNLIST; + + memcpy (my_member_list, member_list, member_list_entries * + sizeof (unsigned int)); + my_member_list_entries = member_list_entries; + + last_sync_ring_id.nodeid = ring_id->nodeid; + last_sync_ring_id.seq = ring_id->seq; + + entries = 0; + /* + * Determine list of nodeids for downlist message + */ + for (i = 0; i < my_old_member_list_entries; i++) { + found = 0; + for (j = 0; j < trans_list_entries; j++) { + if (my_old_member_list[i] == trans_list[j]) { + found = 1; + break; + } + } + if (found == 0) { + g_req_exec_cpg_downlist.nodeids[entries++] = + my_old_member_list[i]; + } + } + g_req_exec_cpg_downlist.left_nodes = entries; +} + +static int cpg_sync_process (void) +{ + int res = -1; + + if (my_sync_state == CPGSYNC_DOWNLIST) { + res = cpg_exec_send_downlist(); + if (res == -1) { + return (-1); + } + my_sync_state = CPGSYNC_JOINLIST; + } + if (my_sync_state == CPGSYNC_JOINLIST) { + res = cpg_exec_send_joinlist(); + } + return (res); +} + +static void cpg_sync_activate (void) +{ + memcpy (my_old_member_list, my_member_list, + my_member_list_entries * sizeof (unsigned int)); + my_old_member_list_entries = my_member_list_entries; + + downlist_inform_clients (); + + joinlist_inform_clients (); + + joinlist_messages_delete (); + + notify_lib_totem_membership (NULL, my_member_list_entries, my_member_list); +} + +static void cpg_sync_abort (void) +{ + + joinlist_messages_delete (); +} + +static int notify_lib_totem_membership ( + void *conn, + int member_list_entries, + const unsigned int *member_list) +{ + struct qb_list_head *iter; + char *buf; + int size; + struct res_lib_cpg_totem_confchg_callback *res; + + size = sizeof(struct res_lib_cpg_totem_confchg_callback) + + sizeof(mar_uint32_t) * (member_list_entries); + buf = alloca(size); + if (!buf) + return CS_ERR_LIBRARY; + + res = (struct res_lib_cpg_totem_confchg_callback *)buf; + res->member_list_entries = member_list_entries; + res->header.size = size; + res->header.id = MESSAGE_RES_CPG_TOTEM_CONFCHG_CALLBACK; + res->header.error = CS_OK; + + memcpy (&res->ring_id, &last_sync_ring_id, sizeof (mar_cpg_ring_id_t)); + memcpy (res->member_list, member_list, res->member_list_entries * sizeof (mar_uint32_t)); + + if (conn == NULL) { + qb_list_for_each(iter, &cpg_pd_list_head) { + struct cpg_pd *cpg_pd = qb_list_entry (iter, struct cpg_pd, list); + api->ipc_dispatch_send (cpg_pd->conn, buf, size); + } + } else { + api->ipc_dispatch_send (conn, buf, size); + } + + return CS_OK; +} + +/* + * Helper function for notify_lib_joinlist which prepares member_list using + * process_info_list with removed left_list items. + * member_list_entries - When not NULL it contains number of member_list entries + * member_list - When not NULL it is used as pointer to start of preallocated + * array of members. Pointer is adjusted to the end of array on + * exit. + */ +static void notify_lib_joinlist_fill_member_list( + const mar_cpg_name_t *group_name, + int left_list_entries, + const mar_cpg_address_t *left_list, + int *member_list_entries, + mar_cpg_address_t **member_list) +{ + struct qb_list_head *iter; + int i; + + if (member_list_entries != NULL) { + *member_list_entries = 0; + } + + qb_list_for_each(iter, &process_info_list_head) { + struct process_info *pi = qb_list_entry (iter, struct process_info, list); + + if (mar_name_compare (&pi->group, group_name) == 0) { + int in_left_list = 0; + + for (i = 0; i < left_list_entries; i++) { + if (left_list[i].nodeid == pi->nodeid && left_list[i].pid == pi->pid) { + in_left_list = 1; + break ; + } + } + + if (!in_left_list) { + if (member_list_entries != NULL) { + (*member_list_entries)++; + } + + if (member_list != NULL) { + (*member_list)->nodeid = pi->nodeid; + (*member_list)->pid = pi->pid; + (*member_list)->reason = CPG_REASON_UNDEFINED; + (*member_list)++; + } + } + } + } +} + +static int notify_lib_joinlist( + const mar_cpg_name_t *group_name, + int joined_list_entries, + mar_cpg_address_t *joined_list, + int left_list_entries, + mar_cpg_address_t *left_list, + int id) +{ + int size; + char *buf; + struct qb_list_head *iter; + int member_list_entries; + struct res_lib_cpg_confchg_callback *res; + mar_cpg_address_t *retgi; + int i; + + /* + * Find size of member_list (use process_info_list but remove items in left_list) + */ + notify_lib_joinlist_fill_member_list(group_name, left_list_entries, left_list, + &member_list_entries, NULL); + + size = sizeof(struct res_lib_cpg_confchg_callback) + + sizeof(mar_cpg_address_t) * (member_list_entries + left_list_entries + joined_list_entries); + buf = alloca(size); + if (!buf) + return CS_ERR_LIBRARY; + + res = (struct res_lib_cpg_confchg_callback *)buf; + res->joined_list_entries = joined_list_entries; + res->left_list_entries = left_list_entries; + res->member_list_entries = member_list_entries; + retgi = res->member_list; + res->header.size = size; + res->header.id = id; + res->header.error = CS_OK; + memcpy(&res->group_name, group_name, sizeof(mar_cpg_name_t)); + + /* + * Fill res->memberlist. Use process_info_list but remove items in left_list. + */ + notify_lib_joinlist_fill_member_list(group_name, left_list_entries, left_list, + NULL, &retgi); + + /* + * Fill res->left_list + */ + if (left_list_entries) { + memcpy (retgi, left_list, left_list_entries * sizeof(mar_cpg_address_t)); + retgi += left_list_entries; + } + + if (joined_list_entries) { + /* + * Fill res->joined_list + */ + memcpy (retgi, joined_list, joined_list_entries * sizeof(mar_cpg_address_t)); + retgi += joined_list_entries; + + /* + * Update cpd_state for all local joined processes in group + */ + for (i = 0; i < joined_list_entries; i++) { + if (joined_list[i].nodeid == api->totem_nodeid_get()) { + qb_list_for_each(iter, &cpg_pd_list_head) { + struct cpg_pd *cpd = qb_list_entry (iter, struct cpg_pd, list); + if (joined_list[i].pid == cpd->pid && + mar_name_compare (&cpd->group_name, group_name) == 0) { + cpd->cpd_state = CPD_STATE_JOIN_COMPLETED; + } + } + } + } + } + + /* + * Send notification to all ipc clients joined in group_name + */ + qb_list_for_each(iter, &cpg_pd_list_head) { + struct cpg_pd *cpd = qb_list_entry (iter, struct cpg_pd, list); + if (mar_name_compare (&cpd->group_name, group_name) == 0) { + if (cpd->cpd_state == CPD_STATE_JOIN_COMPLETED || + cpd->cpd_state == CPD_STATE_LEAVE_STARTED) { + + api->ipc_dispatch_send (cpd->conn, buf, size); + cpd->transition_counter++; + } + } + } + + if (left_list_entries) { + /* + * Zero internal cpd state for all local processes leaving group + * (this loop is not strictly needed because left_list always either + * contains exactly one process running on local node or more items + * but none of them is running on local node) + */ + for (i = 0; i < joined_list_entries; i++) { + if (left_list[i].nodeid == api->totem_nodeid_get() && + left_list[i].reason == CONFCHG_CPG_REASON_LEAVE) { + qb_list_for_each(iter, &cpg_pd_list_head) { + struct cpg_pd *cpd = qb_list_entry (iter, struct cpg_pd, list); + if (left_list[i].pid == cpd->pid && + mar_name_compare (&cpd->group_name, group_name) == 0) { + cpd->pid = 0; + memset (&cpd->group_name, 0, sizeof(cpd->group_name)); + cpd->cpd_state = CPD_STATE_UNJOINED; + } + } + } + } + } + + /* + * Traverse thru cpds and send totem membership for cpd, where it is not send yet + */ + qb_list_for_each(iter, &cpg_pd_list_head) { + struct cpg_pd *cpd = qb_list_entry (iter, struct cpg_pd, list); + + if ((cpd->flags & CPG_MODEL_V1_DELIVER_INITIAL_TOTEM_CONF) && (cpd->initial_totem_conf_sent == 0)) { + cpd->initial_totem_conf_sent = 1; + + notify_lib_totem_membership (cpd->conn, my_old_member_list_entries, my_old_member_list); + } + } + + return CS_OK; +} + +static void downlist_log(const char *msg, struct req_exec_cpg_downlist *dl) +{ + log_printf (LOG_DEBUG, + "%s: members(old:%d left:%d)", + msg, + dl->old_members, + dl->left_nodes); +} + +static void downlist_inform_clients (void) +{ + struct qb_list_head *iter, *tmp_iter; + struct process_info *left_pi; + qb_map_t *group_map; + struct cpg_name cpg_group; + mar_cpg_name_t group; + struct confchg_data{ + struct cpg_name cpg_group; + mar_cpg_address_t left_list[CPG_MEMBERS_MAX]; + int left_list_entries; + struct qb_list_head list; + } *pcd; + qb_map_iter_t *miter; + int i, size; + + downlist_log("my downlist", &g_req_exec_cpg_downlist); + + group_map = qb_skiplist_create(); + + /* + * only the cpg groups included in left nodes should receive + * confchg event, so we will collect these cpg groups and + * relative left_lists here. + */ + qb_list_for_each_safe(iter, tmp_iter, &process_info_list_head) { + struct process_info *pi = qb_list_entry(iter, struct process_info, list); + + left_pi = NULL; + for (i = 0; i < g_req_exec_cpg_downlist.left_nodes; i++) { + + if (pi->nodeid == g_req_exec_cpg_downlist.nodeids[i]) { + left_pi = pi; + break; + } + } + + if (left_pi) { + marshall_from_mar_cpg_name_t(&cpg_group, &left_pi->group); + cpg_group.value[cpg_group.length] = 0; + + pcd = (struct confchg_data *)qb_map_get(group_map, cpg_group.value); + if (pcd == NULL) { + pcd = (struct confchg_data *)calloc(1, sizeof(struct confchg_data)); + memcpy(&pcd->cpg_group, &cpg_group, sizeof(struct cpg_name)); + qb_map_put(group_map, pcd->cpg_group.value, pcd); + } + size = pcd->left_list_entries; + pcd->left_list[size].nodeid = left_pi->nodeid; + pcd->left_list[size].pid = left_pi->pid; + pcd->left_list[size].reason = CONFCHG_CPG_REASON_NODEDOWN; + pcd->left_list_entries++; + qb_list_del (&left_pi->list); + free (left_pi); + } + } + + /* send only one confchg event per cpg group */ + miter = qb_map_iter_create(group_map); + while (qb_map_iter_next(miter, (void **)&pcd)) { + marshall_to_mar_cpg_name_t(&group, &pcd->cpg_group); + + log_printf (LOG_DEBUG, "left_list_entries:%d", pcd->left_list_entries); + for (i=0; i<pcd->left_list_entries; i++) { + log_printf (LOG_DEBUG, "left_list[%d] group:%s, ip:%s, pid:%d", + i, cpg_print_group_name(&group), + (char*)api->totem_ifaces_print(pcd->left_list[i].nodeid), + pcd->left_list[i].pid); + } + + /* send confchg event */ + notify_lib_joinlist(&group, + 0, NULL, + pcd->left_list_entries, + pcd->left_list, + MESSAGE_RES_CPG_CONFCHG_CALLBACK); + + free(pcd); + } + qb_map_iter_free(miter); + qb_map_destroy(group_map); +} + +/* + * Remove processes that might have left the group while we were suspended. + */ +static void joinlist_remove_zombie_pi_entries (void) +{ + struct qb_list_head *pi_iter, *tmp_iter; + struct qb_list_head *jl_iter; + struct process_info *pi; + struct joinlist_msg *stored_msg; + int found; + + qb_list_for_each_safe(pi_iter, tmp_iter, &process_info_list_head) { + pi = qb_list_entry (pi_iter, struct process_info, list); + + /* + * Ignore local node + */ + if (pi->nodeid == api->totem_nodeid_get()) { + continue ; + } + + /* + * Try to find message in joinlist messages + */ + found = 0; + qb_list_for_each(jl_iter, &joinlist_messages_head) { + stored_msg = qb_list_entry(jl_iter, struct joinlist_msg, list); + + if (stored_msg->sender_nodeid == api->totem_nodeid_get()) { + continue ; + } + + if (pi->nodeid == stored_msg->sender_nodeid && + pi->pid == stored_msg->pid && + mar_name_compare (&pi->group, &stored_msg->group_name) == 0) { + found = 1; + break ; + } + } + + if (!found) { + do_proc_leave(&pi->group, pi->pid, pi->nodeid, CONFCHG_CPG_REASON_PROCDOWN); + } + } +} + +static void joinlist_inform_clients (void) +{ + struct joinlist_msg *stored_msg; + struct qb_list_head *iter; + unsigned int i; + qb_map_t *group_notify_map; + qb_map_iter_t *miter; + struct join_list_confchg_data *jld; + + group_notify_map = qb_skiplist_create(); + + i = 0; + qb_list_for_each(iter, &joinlist_messages_head) { + stored_msg = qb_list_entry(iter, struct joinlist_msg, list); + + log_printf (LOG_DEBUG, "joinlist_messages[%u] group:%s, ip:%s, pid:%d", + i++, cpg_print_group_name(&stored_msg->group_name), + (char*)api->totem_ifaces_print(stored_msg->sender_nodeid), + stored_msg->pid); + + /* Ignore our own messages */ + if (stored_msg->sender_nodeid == api->totem_nodeid_get()) { + continue ; + } + + do_proc_join (&stored_msg->group_name, stored_msg->pid, stored_msg->sender_nodeid, + CONFCHG_CPG_REASON_NODEUP, group_notify_map); + } + + miter = qb_map_iter_create(group_notify_map); + while (qb_map_iter_next(miter, (void **)&jld)) { + notify_lib_joinlist(&jld->cpg_group, + jld->join_list_entries, jld->join_list, + 0, NULL, + MESSAGE_RES_CPG_CONFCHG_CALLBACK); + free(jld); + } + qb_map_iter_free(miter); + qb_map_destroy(group_notify_map); + + joinlist_remove_zombie_pi_entries (); +} + +static void joinlist_messages_delete (void) +{ + struct joinlist_msg *stored_msg; + struct qb_list_head *iter, *tmp_iter; + + qb_list_for_each_safe(iter, tmp_iter, &joinlist_messages_head) { + stored_msg = qb_list_entry(iter, struct joinlist_msg, list); + qb_list_del (&stored_msg->list); + free (stored_msg); + } + qb_list_init (&joinlist_messages_head); +} + +static char *cpg_exec_init_fn (struct corosync_api_v1 *corosync_api) +{ + qb_list_init (&joinlist_messages_head); + api = corosync_api; + return (NULL); +} + +static void cpg_iteration_instance_finalize (struct cpg_iteration_instance *cpg_iteration_instance) +{ + struct qb_list_head *iter, *tmp_iter; + struct process_info *pi; + + qb_list_for_each_safe(iter, tmp_iter, &(cpg_iteration_instance->items_list_head)) { + pi = qb_list_entry (iter, struct process_info, list); + qb_list_del (&pi->list); + free (pi); + } + + qb_list_del (&cpg_iteration_instance->list); + hdb_handle_destroy (&cpg_iteration_handle_t_db, cpg_iteration_instance->handle); +} + +static void cpg_pd_finalize (struct cpg_pd *cpd) +{ + struct qb_list_head *iter, *tmp_iter; + struct cpg_iteration_instance *cpii; + + zcb_all_free(cpd); + qb_list_for_each_safe(iter, tmp_iter, &(cpd->iteration_instance_list_head)) { + cpii = qb_list_entry (iter, struct cpg_iteration_instance, list); + + cpg_iteration_instance_finalize (cpii); + } + + qb_list_del (&cpd->list); +} + +static int cpg_lib_exit_fn (void *conn) +{ + struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); + + log_printf(LOGSYS_LEVEL_DEBUG, "exit_fn for conn=%p", conn); + + if (cpd->group_name.length > 0 && cpd->cpd_state != CPD_STATE_LEAVE_STARTED) { + cpg_node_joinleave_send (cpd->pid, &cpd->group_name, + MESSAGE_REQ_EXEC_CPG_PROCLEAVE, CONFCHG_CPG_REASON_PROCDOWN); + } + + cpg_pd_finalize (cpd); + + api->ipc_refcnt_dec (conn); + return (0); +} + +static int cpg_node_joinleave_send (unsigned int pid, const mar_cpg_name_t *group_name, int fn, int reason) +{ + struct req_exec_cpg_procjoin req_exec_cpg_procjoin; + struct iovec req_exec_cpg_iovec; + int result; + + memset(&req_exec_cpg_procjoin, 0, sizeof(req_exec_cpg_procjoin)); + + memcpy(&req_exec_cpg_procjoin.group_name, group_name, sizeof(mar_cpg_name_t)); + req_exec_cpg_procjoin.pid = pid; + req_exec_cpg_procjoin.reason = reason; + + req_exec_cpg_procjoin.header.size = sizeof(req_exec_cpg_procjoin); + req_exec_cpg_procjoin.header.id = SERVICE_ID_MAKE(CPG_SERVICE, fn); + + req_exec_cpg_iovec.iov_base = (char *)&req_exec_cpg_procjoin; + req_exec_cpg_iovec.iov_len = sizeof(req_exec_cpg_procjoin); + + result = api->totem_mcast (&req_exec_cpg_iovec, 1, TOTEM_AGREED); + + return (result); +} + +/* Can byteswap join & leave messages */ +static void exec_cpg_procjoin_endian_convert (void *msg) +{ + struct req_exec_cpg_procjoin *req_exec_cpg_procjoin = msg; + + req_exec_cpg_procjoin->pid = swab32(req_exec_cpg_procjoin->pid); + swab_mar_cpg_name_t (&req_exec_cpg_procjoin->group_name); + req_exec_cpg_procjoin->reason = swab32(req_exec_cpg_procjoin->reason); +} + +static void exec_cpg_joinlist_endian_convert (void *msg_v) +{ + char *msg = msg_v; + struct qb_ipc_response_header *res = (struct qb_ipc_response_header *)msg; + struct join_list_entry *jle = (struct join_list_entry *)(msg + sizeof(struct qb_ipc_response_header)); + + swab_mar_int32_t (&res->size); + + while ((const char*)jle < msg + res->size) { + jle->pid = swab32(jle->pid); + swab_mar_cpg_name_t (&jle->group_name); + jle++; + } +} + +static void exec_cpg_downlist_endian_convert_old (void *msg) +{ +} + +static void exec_cpg_downlist_endian_convert (void *msg) +{ + struct req_exec_cpg_downlist *req_exec_cpg_downlist = msg; + unsigned int i; + + req_exec_cpg_downlist->left_nodes = swab32(req_exec_cpg_downlist->left_nodes); + req_exec_cpg_downlist->old_members = swab32(req_exec_cpg_downlist->old_members); + + for (i = 0; i < req_exec_cpg_downlist->left_nodes; i++) { + req_exec_cpg_downlist->nodeids[i] = swab32(req_exec_cpg_downlist->nodeids[i]); + } +} + + +static void exec_cpg_mcast_endian_convert (void *msg) +{ + struct req_exec_cpg_mcast *req_exec_cpg_mcast = msg; + + swab_coroipc_request_header_t (&req_exec_cpg_mcast->header); + swab_mar_cpg_name_t (&req_exec_cpg_mcast->group_name); + req_exec_cpg_mcast->pid = swab32(req_exec_cpg_mcast->pid); + req_exec_cpg_mcast->msglen = swab32(req_exec_cpg_mcast->msglen); + swab_mar_message_source_t (&req_exec_cpg_mcast->source); +} + +static void exec_cpg_partial_mcast_endian_convert (void *msg) +{ + struct req_exec_cpg_partial_mcast *req_exec_cpg_mcast = msg; + + swab_coroipc_request_header_t (&req_exec_cpg_mcast->header); + swab_mar_cpg_name_t (&req_exec_cpg_mcast->group_name); + req_exec_cpg_mcast->pid = swab32(req_exec_cpg_mcast->pid); + req_exec_cpg_mcast->msglen = swab32(req_exec_cpg_mcast->msglen); + req_exec_cpg_mcast->fraglen = swab32(req_exec_cpg_mcast->fraglen); + req_exec_cpg_mcast->type = swab32(req_exec_cpg_mcast->type); + swab_mar_message_source_t (&req_exec_cpg_mcast->source); +} + +static struct process_info *process_info_find(const mar_cpg_name_t *group_name, uint32_t pid, unsigned int nodeid) { + struct qb_list_head *iter; + + qb_list_for_each(iter, &process_info_list_head) { + struct process_info *pi = qb_list_entry (iter, struct process_info, list); + + if (pi->pid == pid && pi->nodeid == nodeid && + mar_name_compare (&pi->group, group_name) == 0) { + return pi; + } + } + + return NULL; +} + +static void do_proc_join( + const mar_cpg_name_t *name, + uint32_t pid, + unsigned int nodeid, + int reason, + qb_map_t *group_notify_map) +{ + struct process_info *pi; + struct process_info *pi_entry; + mar_cpg_address_t notify_info; + struct qb_list_head *list; + struct qb_list_head *list_to_add = NULL; + int size; + + if (process_info_find (name, pid, nodeid) != NULL) { + return ; + } + pi = malloc (sizeof (struct process_info)); + if (!pi) { + log_printf(LOGSYS_LEVEL_WARNING, "Unable to allocate process_info struct"); + return; + } + pi->nodeid = nodeid; + pi->pid = pid; + memcpy(&pi->group, name, sizeof(*name)); + qb_list_init(&pi->list); + + /* + * Insert new process in sorted order so synchronization works properly + */ + list_to_add = &process_info_list_head; + qb_list_for_each(list, &process_info_list_head) { + pi_entry = qb_list_entry(list, struct process_info, list); + if (pi_entry->nodeid > pi->nodeid || + (pi_entry->nodeid == pi->nodeid && pi_entry->pid > pi->pid)) { + + break; + } + list_to_add = list; + } + qb_list_add (&pi->list, list_to_add); + + notify_info.pid = pi->pid; + notify_info.nodeid = nodeid; + notify_info.reason = reason; + + if (group_notify_map == NULL) { + notify_lib_joinlist(&pi->group, + 1, ¬ify_info, + 0, NULL, + MESSAGE_RES_CPG_CONFCHG_CALLBACK); + } else { + struct join_list_confchg_data *jld = qb_map_get(group_notify_map, pi->group.value); + if (jld == NULL) { + jld = (struct join_list_confchg_data *)calloc(1, sizeof(struct join_list_confchg_data)); + memcpy(&jld->cpg_group, &pi->group, sizeof(mar_cpg_name_t)); + qb_map_put(group_notify_map, jld->cpg_group.value, jld); + } + size = jld->join_list_entries; + jld->join_list[size].nodeid = notify_info.nodeid; + jld->join_list[size].pid = notify_info.pid; + jld->join_list[size].reason = notify_info.reason; + jld->join_list_entries++; + } +} + +static void do_proc_leave( + const mar_cpg_name_t *name, + uint32_t pid, + unsigned int nodeid, + int reason) +{ + struct process_info *pi; + struct qb_list_head *iter, *tmp_iter; + mar_cpg_address_t notify_info; + + notify_info.pid = pid; + notify_info.nodeid = nodeid; + notify_info.reason = reason; + + notify_lib_joinlist(name, + 0, NULL, + 1, ¬ify_info, + MESSAGE_RES_CPG_CONFCHG_CALLBACK); + + qb_list_for_each_safe(iter, tmp_iter, &process_info_list_head) { + pi = qb_list_entry(iter, struct process_info, list); + + if (pi->pid == pid && pi->nodeid == nodeid && + mar_name_compare (&pi->group, name)==0) { + qb_list_del (&pi->list); + free (pi); + } + } +} + +static void message_handler_req_exec_cpg_downlist_old ( + const void *message, + unsigned int nodeid) +{ + log_printf (LOGSYS_LEVEL_DEBUG, "downlist OLD from node " CS_PRI_NODE_ID, + nodeid); +} + +static void message_handler_req_exec_cpg_downlist( + const void *message, + unsigned int nodeid) +{ + const struct req_exec_cpg_downlist *req_exec_cpg_downlist = message; + + log_printf (LOGSYS_LEVEL_DEBUG, "downlist left_list: %d received", + req_exec_cpg_downlist->left_nodes); +} + + +static void message_handler_req_exec_cpg_procjoin ( + const void *message, + unsigned int nodeid) +{ + const struct req_exec_cpg_procjoin *req_exec_cpg_procjoin = message; + + log_printf(LOGSYS_LEVEL_DEBUG, "got procjoin message from cluster node " CS_PRI_NODE_ID " (%s) for pid %u", + nodeid, + api->totem_ifaces_print(nodeid), + (unsigned int)req_exec_cpg_procjoin->pid); + + do_proc_join (&req_exec_cpg_procjoin->group_name, + req_exec_cpg_procjoin->pid, nodeid, + CONFCHG_CPG_REASON_JOIN, NULL); +} + +static void message_handler_req_exec_cpg_procleave ( + const void *message, + unsigned int nodeid) +{ + const struct req_exec_cpg_procjoin *req_exec_cpg_procjoin = message; + + log_printf(LOGSYS_LEVEL_DEBUG, "got procleave message from cluster node " CS_PRI_NODE_ID " (%s) for pid %u", + nodeid, + api->totem_ifaces_print(nodeid), + (unsigned int)req_exec_cpg_procjoin->pid); + + do_proc_leave (&req_exec_cpg_procjoin->group_name, + req_exec_cpg_procjoin->pid, nodeid, + req_exec_cpg_procjoin->reason); +} + + +/* Got a proclist from another node */ +static void message_handler_req_exec_cpg_joinlist ( + const void *message_v, + unsigned int nodeid) +{ + const char *message = message_v; + const struct qb_ipc_response_header *res = (const struct qb_ipc_response_header *)message; + const struct join_list_entry *jle = (const struct join_list_entry *)(message + sizeof(struct qb_ipc_response_header)); + struct joinlist_msg *stored_msg; + + log_printf(LOGSYS_LEVEL_DEBUG, "got joinlist message from node " CS_PRI_NODE_ID, + nodeid); + + while ((const char*)jle < message + res->size) { + stored_msg = malloc (sizeof (struct joinlist_msg)); + memset(stored_msg, 0, sizeof (struct joinlist_msg)); + stored_msg->sender_nodeid = nodeid; + stored_msg->pid = jle->pid; + memcpy(&stored_msg->group_name, &jle->group_name, sizeof(mar_cpg_name_t)); + qb_list_init (&stored_msg->list); + qb_list_add (&stored_msg->list, &joinlist_messages_head); + jle++; + } +} + +static void message_handler_req_exec_cpg_mcast ( + const void *message, + unsigned int nodeid) +{ + const struct req_exec_cpg_mcast *req_exec_cpg_mcast = message; + struct res_lib_cpg_deliver_callback res_lib_cpg_mcast; + int msglen = req_exec_cpg_mcast->msglen; + struct qb_list_head *iter, *pi_iter, *tmp_iter; + struct cpg_pd *cpd; + struct iovec iovec[2]; + int known_node = 0; + + res_lib_cpg_mcast.header.id = MESSAGE_RES_CPG_DELIVER_CALLBACK; + res_lib_cpg_mcast.header.size = sizeof(res_lib_cpg_mcast) + msglen; + res_lib_cpg_mcast.msglen = msglen; + res_lib_cpg_mcast.pid = req_exec_cpg_mcast->pid; + res_lib_cpg_mcast.nodeid = nodeid; + + memcpy(&res_lib_cpg_mcast.group_name, &req_exec_cpg_mcast->group_name, + sizeof(mar_cpg_name_t)); + iovec[0].iov_base = (void *)&res_lib_cpg_mcast; + iovec[0].iov_len = sizeof (res_lib_cpg_mcast); + + iovec[1].iov_base = (char*)message+sizeof(*req_exec_cpg_mcast); + iovec[1].iov_len = msglen; + + qb_list_for_each_safe(iter, tmp_iter, &cpg_pd_list_head) { + cpd = qb_list_entry(iter, struct cpg_pd, list); + if ((cpd->cpd_state == CPD_STATE_LEAVE_STARTED || cpd->cpd_state == CPD_STATE_JOIN_COMPLETED) + && (mar_name_compare (&cpd->group_name, &req_exec_cpg_mcast->group_name) == 0)) { + + if (!known_node) { + /* Try to find, if we know the node */ + qb_list_for_each(pi_iter, &process_info_list_head) { + struct process_info *pi = qb_list_entry (pi_iter, struct process_info, list); + + if (pi->nodeid == nodeid && + mar_name_compare (&pi->group, &req_exec_cpg_mcast->group_name) == 0) { + known_node = 1; + break; + } + } + } + + if (!known_node) { + log_printf(LOGSYS_LEVEL_WARNING, "Unknown node -> we will not deliver message"); + return ; + } + + api->ipc_dispatch_iov_send (cpd->conn, iovec, 2); + } + } +} + +static void message_handler_req_exec_cpg_partial_mcast ( + const void *message, + unsigned int nodeid) +{ + const struct req_exec_cpg_partial_mcast *req_exec_cpg_mcast = message; + struct res_lib_cpg_partial_deliver_callback res_lib_cpg_mcast; + int msglen = req_exec_cpg_mcast->fraglen; + struct qb_list_head *iter, *pi_iter, *tmp_iter; + struct cpg_pd *cpd; + struct iovec iovec[2]; + int known_node = 0; + + log_printf(LOGSYS_LEVEL_DEBUG, "Got fragmented message from node " CS_PRI_NODE_ID ", size = %d bytes\n", nodeid, msglen); + + res_lib_cpg_mcast.header.id = MESSAGE_RES_CPG_PARTIAL_DELIVER_CALLBACK; + res_lib_cpg_mcast.header.size = sizeof(res_lib_cpg_mcast) + msglen; + res_lib_cpg_mcast.fraglen = msglen; + res_lib_cpg_mcast.msglen = req_exec_cpg_mcast->msglen; + res_lib_cpg_mcast.pid = req_exec_cpg_mcast->pid; + res_lib_cpg_mcast.type = req_exec_cpg_mcast->type; + res_lib_cpg_mcast.nodeid = nodeid; + + memcpy(&res_lib_cpg_mcast.group_name, &req_exec_cpg_mcast->group_name, + sizeof(mar_cpg_name_t)); + iovec[0].iov_base = (void *)&res_lib_cpg_mcast; + iovec[0].iov_len = sizeof (res_lib_cpg_mcast); + + iovec[1].iov_base = (char*)message+sizeof(*req_exec_cpg_mcast); + iovec[1].iov_len = msglen; + + qb_list_for_each_safe(iter, tmp_iter, &cpg_pd_list_head) { + cpd = qb_list_entry(iter, struct cpg_pd, list); + + if ((cpd->cpd_state == CPD_STATE_LEAVE_STARTED || cpd->cpd_state == CPD_STATE_JOIN_COMPLETED) + && (mar_name_compare (&cpd->group_name, &req_exec_cpg_mcast->group_name) == 0)) { + + if (!known_node) { + /* Try to find, if we know the node */ + qb_list_for_each(pi_iter, &process_info_list_head) { + struct process_info *pi = qb_list_entry (pi_iter, struct process_info, list); + + if (pi->nodeid == nodeid && + mar_name_compare (&pi->group, &req_exec_cpg_mcast->group_name) == 0) { + known_node = 1; + break; + } + } + } + + if (!known_node) { + log_printf(LOGSYS_LEVEL_WARNING, "Unknown node -> we will not deliver message"); + return ; + } + + api->ipc_dispatch_iov_send (cpd->conn, iovec, 2); + } + } +} + + +static int cpg_exec_send_downlist(void) +{ + struct iovec iov; + + g_req_exec_cpg_downlist.header.id = SERVICE_ID_MAKE(CPG_SERVICE, MESSAGE_REQ_EXEC_CPG_DOWNLIST); + g_req_exec_cpg_downlist.header.size = sizeof(struct req_exec_cpg_downlist); + + g_req_exec_cpg_downlist.old_members = my_old_member_list_entries; + + iov.iov_base = (void *)&g_req_exec_cpg_downlist; + iov.iov_len = g_req_exec_cpg_downlist.header.size; + + return (api->totem_mcast (&iov, 1, TOTEM_AGREED)); +} + +static int cpg_exec_send_joinlist(void) +{ + int count = 0; + struct qb_list_head *iter; + struct qb_ipc_response_header *res; + char *buf; + size_t buf_size; + struct join_list_entry *jle; + struct iovec req_exec_cpg_iovec; + + qb_list_for_each(iter, &process_info_list_head) { + struct process_info *pi = qb_list_entry (iter, struct process_info, list); + + if (pi->nodeid == api->totem_nodeid_get ()) { + count++; + } + } + + /* Nothing to send */ + if (!count) + return 0; + + buf_size = sizeof(struct qb_ipc_response_header) + sizeof(struct join_list_entry) * count; + buf = alloca(buf_size); + if (!buf) { + log_printf(LOGSYS_LEVEL_WARNING, "Unable to allocate joinlist buffer"); + return -1; + } + memset(buf, 0, buf_size); + + jle = (struct join_list_entry *)(buf + sizeof(struct qb_ipc_response_header)); + res = (struct qb_ipc_response_header *)buf; + + qb_list_for_each(iter, &process_info_list_head) { + struct process_info *pi = qb_list_entry (iter, struct process_info, list); + + if (pi->nodeid == api->totem_nodeid_get ()) { + memcpy (&jle->group_name, &pi->group, sizeof (mar_cpg_name_t)); + jle->pid = pi->pid; + jle++; + } + } + + res->id = SERVICE_ID_MAKE(CPG_SERVICE, MESSAGE_REQ_EXEC_CPG_JOINLIST); + res->size = sizeof(struct qb_ipc_response_header)+sizeof(struct join_list_entry) * count; + + req_exec_cpg_iovec.iov_base = buf; + req_exec_cpg_iovec.iov_len = res->size; + + return (api->totem_mcast (&req_exec_cpg_iovec, 1, TOTEM_AGREED)); +} + +static int cpg_lib_init_fn (void *conn) +{ + struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); + memset (cpd, 0, sizeof(struct cpg_pd)); + cpd->conn = conn; + qb_list_add (&cpd->list, &cpg_pd_list_head); + + qb_list_init (&cpd->iteration_instance_list_head); + qb_list_init (&cpd->zcb_mapped_list_head); + + api->ipc_refcnt_inc (conn); + log_printf(LOGSYS_LEVEL_DEBUG, "lib_init_fn: conn=%p, cpd=%p", conn, cpd); + return (0); +} + +/* Join message from the library */ +static void message_handler_req_lib_cpg_join (void *conn, const void *message) +{ + const struct req_lib_cpg_join *req_lib_cpg_join = message; + struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); + struct res_lib_cpg_join res_lib_cpg_join; + cs_error_t error = CS_OK; + struct qb_list_head *iter; + + /* Test, if we don't have same pid and group name joined */ + qb_list_for_each(iter, &cpg_pd_list_head) { + struct cpg_pd *cpd_item = qb_list_entry (iter, struct cpg_pd, list); + + if (cpd_item->pid == req_lib_cpg_join->pid && + mar_name_compare(&req_lib_cpg_join->group_name, &cpd_item->group_name) == 0) { + + /* We have same pid and group name joined -> return error */ + error = CS_ERR_EXIST; + goto response_send; + } + } + + /* + * Same check must be done in process info list, because there may be not yet delivered + * leave of client. + */ + qb_list_for_each(iter, &process_info_list_head) { + struct process_info *pi = qb_list_entry (iter, struct process_info, list); + + if (pi->nodeid == api->totem_nodeid_get () && pi->pid == req_lib_cpg_join->pid && + mar_name_compare(&req_lib_cpg_join->group_name, &pi->group) == 0) { + /* We have same pid and group name joined -> return error */ + error = CS_ERR_TRY_AGAIN; + goto response_send; + } + } + + if (req_lib_cpg_join->group_name.length > CPG_MAX_NAME_LENGTH) { + error = CS_ERR_NAME_TOO_LONG; + goto response_send; + } + + switch (cpd->cpd_state) { + case CPD_STATE_UNJOINED: + error = CS_OK; + cpd->cpd_state = CPD_STATE_JOIN_STARTED; + cpd->pid = req_lib_cpg_join->pid; + cpd->flags = req_lib_cpg_join->flags; + memcpy (&cpd->group_name, &req_lib_cpg_join->group_name, + sizeof (cpd->group_name)); + + cpg_node_joinleave_send (req_lib_cpg_join->pid, + &req_lib_cpg_join->group_name, + MESSAGE_REQ_EXEC_CPG_PROCJOIN, CONFCHG_CPG_REASON_JOIN); + break; + case CPD_STATE_LEAVE_STARTED: + error = CS_ERR_BUSY; + break; + case CPD_STATE_JOIN_STARTED: + error = CS_ERR_EXIST; + break; + case CPD_STATE_JOIN_COMPLETED: + error = CS_ERR_EXIST; + break; + } + +response_send: + res_lib_cpg_join.header.size = sizeof(res_lib_cpg_join); + res_lib_cpg_join.header.id = MESSAGE_RES_CPG_JOIN; + res_lib_cpg_join.header.error = error; + api->ipc_response_send (conn, &res_lib_cpg_join, sizeof(res_lib_cpg_join)); +} + +/* Leave message from the library */ +static void message_handler_req_lib_cpg_leave (void *conn, const void *message) +{ + struct res_lib_cpg_leave res_lib_cpg_leave; + cs_error_t error = CS_OK; + struct req_lib_cpg_leave *req_lib_cpg_leave = (struct req_lib_cpg_leave *)message; + struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); + + log_printf(LOGSYS_LEVEL_DEBUG, "got leave request on %p", conn); + + switch (cpd->cpd_state) { + case CPD_STATE_UNJOINED: + error = CS_ERR_NOT_EXIST; + break; + case CPD_STATE_LEAVE_STARTED: + error = CS_ERR_NOT_EXIST; + break; + case CPD_STATE_JOIN_STARTED: + error = CS_ERR_BUSY; + break; + case CPD_STATE_JOIN_COMPLETED: + error = CS_OK; + cpd->cpd_state = CPD_STATE_LEAVE_STARTED; + cpg_node_joinleave_send (req_lib_cpg_leave->pid, + &req_lib_cpg_leave->group_name, + MESSAGE_REQ_EXEC_CPG_PROCLEAVE, + CONFCHG_CPG_REASON_LEAVE); + break; + } + + /* send return */ + res_lib_cpg_leave.header.size = sizeof(res_lib_cpg_leave); + res_lib_cpg_leave.header.id = MESSAGE_RES_CPG_LEAVE; + res_lib_cpg_leave.header.error = error; + api->ipc_response_send(conn, &res_lib_cpg_leave, sizeof(res_lib_cpg_leave)); +} + +/* Finalize message from library */ +static void message_handler_req_lib_cpg_finalize ( + void *conn, + const void *message) +{ + struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); + struct res_lib_cpg_finalize res_lib_cpg_finalize; + cs_error_t error = CS_OK; + + log_printf (LOGSYS_LEVEL_DEBUG, "cpg finalize for conn=%p", conn); + + /* + * We will just remove cpd from list. After this call, connection will be + * closed on lib side, and cpg_lib_exit_fn will be called + */ + qb_list_del (&cpd->list); + qb_list_init (&cpd->list); + + res_lib_cpg_finalize.header.size = sizeof (res_lib_cpg_finalize); + res_lib_cpg_finalize.header.id = MESSAGE_RES_CPG_FINALIZE; + res_lib_cpg_finalize.header.error = error; + + api->ipc_response_send (conn, &res_lib_cpg_finalize, + sizeof (res_lib_cpg_finalize)); +} + +static int +memory_map ( + const char *path, + size_t bytes, + void **buf) +{ + int32_t fd; + void *addr; + int32_t res; + + fd = open (path, O_RDWR, 0600); + + unlink (path); + + if (fd == -1) { + return (-1); + } + + res = ftruncate (fd, bytes); + if (res == -1) { + goto error_close_unlink; + } + + addr = mmap (NULL, bytes, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + + if (addr == MAP_FAILED) { + goto error_close_unlink; + } +#ifdef MADV_NOSYNC + madvise(addr, bytes, MADV_NOSYNC); +#endif + + res = close (fd); + if (res) { + munmap (addr, bytes); + return (-1); + } + *buf = addr; + return (0); + +error_close_unlink: + close (fd); + unlink(path); + return -1; +} + +static inline int zcb_alloc ( + struct cpg_pd *cpd, + const char *path_to_file, + size_t size, + void **addr) +{ + struct zcb_mapped *zcb_mapped; + unsigned int res; + + zcb_mapped = malloc (sizeof (struct zcb_mapped)); + if (zcb_mapped == NULL) { + return (-1); + } + + res = memory_map ( + path_to_file, + size, + addr); + if (res == -1) { + free (zcb_mapped); + return (-1); + } + + qb_list_init (&zcb_mapped->list); + zcb_mapped->addr = *addr; + zcb_mapped->size = size; + qb_list_add_tail (&zcb_mapped->list, &cpd->zcb_mapped_list_head); + return (0); +} + + +static inline int zcb_free (struct zcb_mapped *zcb_mapped) +{ + unsigned int res; + + res = munmap (zcb_mapped->addr, zcb_mapped->size); + qb_list_del (&zcb_mapped->list); + free (zcb_mapped); + return (res); +} + +static inline int zcb_by_addr_free (struct cpg_pd *cpd, void *addr) +{ + struct qb_list_head *list, *tmp_iter; + struct zcb_mapped *zcb_mapped; + unsigned int res = 0; + + qb_list_for_each_safe(list, tmp_iter, &(cpd->zcb_mapped_list_head)) { + zcb_mapped = qb_list_entry (list, struct zcb_mapped, list); + + if (zcb_mapped->addr == addr) { + res = zcb_free (zcb_mapped); + break; + } + + } + return (res); +} + +static inline int zcb_all_free ( + struct cpg_pd *cpd) +{ + struct qb_list_head *list, *tmp_iter; + struct zcb_mapped *zcb_mapped; + + qb_list_for_each_safe(list, tmp_iter, &(cpd->zcb_mapped_list_head)) { + zcb_mapped = qb_list_entry (list, struct zcb_mapped, list); + + zcb_free (zcb_mapped); + } + return (0); +} + +union u { + uint64_t server_addr; + void *server_ptr; +}; + +static uint64_t void2serveraddr (void *server_ptr) +{ + union u u; + + u.server_ptr = server_ptr; + return (u.server_addr); +} + +static void *serveraddr2void (uint64_t server_addr) +{ + union u u; + + u.server_addr = server_addr; + return (u.server_ptr); +}; + +static void message_handler_req_lib_cpg_zc_alloc ( + void *conn, + const void *message) +{ + mar_req_coroipcc_zc_alloc_t *hdr = (mar_req_coroipcc_zc_alloc_t *)message; + struct qb_ipc_response_header res_header; + void *addr = NULL; + struct coroipcs_zc_header *zc_header; + unsigned int res; + struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); + + log_printf(LOGSYS_LEVEL_DEBUG, "path: %s", hdr->path_to_file); + + res = zcb_alloc (cpd, hdr->path_to_file, hdr->map_size, + &addr); + assert(res == 0); + + zc_header = (struct coroipcs_zc_header *)addr; + zc_header->server_address = void2serveraddr(addr); + + res_header.size = sizeof (struct qb_ipc_response_header); + res_header.id = 0; + api->ipc_response_send (conn, + &res_header, + res_header.size); +} + +static void message_handler_req_lib_cpg_zc_free ( + void *conn, + const void *message) +{ + mar_req_coroipcc_zc_free_t *hdr = (mar_req_coroipcc_zc_free_t *)message; + struct qb_ipc_response_header res_header; + void *addr = NULL; + struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); + + log_printf(LOGSYS_LEVEL_DEBUG, " free'ing"); + + addr = serveraddr2void (hdr->server_address); + + zcb_by_addr_free (cpd, addr); + + res_header.size = sizeof (struct qb_ipc_response_header); + res_header.id = 0; + api->ipc_response_send ( + conn, &res_header, + res_header.size); +} + +/* Fragmented mcast message from the library */ +static void message_handler_req_lib_cpg_partial_mcast (void *conn, const void *message) +{ + const struct req_lib_cpg_partial_mcast *req_lib_cpg_mcast = message; + struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); + mar_cpg_name_t group_name = cpd->group_name; + + struct iovec req_exec_cpg_iovec[2]; + struct req_exec_cpg_partial_mcast req_exec_cpg_mcast; + struct res_lib_cpg_partial_send res_lib_cpg_partial_send; + int msglen = req_lib_cpg_mcast->fraglen; + int result; + cs_error_t error = CS_ERR_NOT_EXIST; + + log_printf(LOGSYS_LEVEL_TRACE, "got fragmented mcast request on %p", conn); + log_printf(LOGSYS_LEVEL_DEBUG, "Sending fragmented message size = %d bytes\n", msglen); + + switch (cpd->cpd_state) { + case CPD_STATE_UNJOINED: + error = CS_ERR_NOT_EXIST; + break; + case CPD_STATE_LEAVE_STARTED: + error = CS_ERR_NOT_EXIST; + break; + case CPD_STATE_JOIN_STARTED: + error = CS_OK; + break; + case CPD_STATE_JOIN_COMPLETED: + error = CS_OK; + break; + } + + res_lib_cpg_partial_send.header.size = sizeof(res_lib_cpg_partial_send); + res_lib_cpg_partial_send.header.id = MESSAGE_RES_CPG_PARTIAL_SEND; + + if (req_lib_cpg_mcast->type == LIBCPG_PARTIAL_FIRST) { + cpd->initial_transition_counter = cpd->transition_counter; + } + if (cpd->transition_counter != cpd->initial_transition_counter) { + error = CS_ERR_INTERRUPT; + } + + if (error == CS_OK) { + req_exec_cpg_mcast.header.size = sizeof(req_exec_cpg_mcast) + msglen; + req_exec_cpg_mcast.header.id = SERVICE_ID_MAKE(CPG_SERVICE, + MESSAGE_REQ_EXEC_CPG_PARTIAL_MCAST); + req_exec_cpg_mcast.pid = cpd->pid; + req_exec_cpg_mcast.msglen = req_lib_cpg_mcast->msglen; + req_exec_cpg_mcast.type = req_lib_cpg_mcast->type; + req_exec_cpg_mcast.fraglen = req_lib_cpg_mcast->fraglen; + api->ipc_source_set (&req_exec_cpg_mcast.source, conn); + memcpy(&req_exec_cpg_mcast.group_name, &group_name, + sizeof(mar_cpg_name_t)); + + req_exec_cpg_iovec[0].iov_base = (char *)&req_exec_cpg_mcast; + req_exec_cpg_iovec[0].iov_len = sizeof(req_exec_cpg_mcast); + req_exec_cpg_iovec[1].iov_base = (char *)&req_lib_cpg_mcast->message; + req_exec_cpg_iovec[1].iov_len = msglen; + + result = api->totem_mcast (req_exec_cpg_iovec, 2, TOTEM_AGREED); + assert(result == 0); + } else { + log_printf(LOGSYS_LEVEL_ERROR, "*** %p can't mcast to group %s state:%d, error:%d", + conn, group_name.value, cpd->cpd_state, error); + } + + res_lib_cpg_partial_send.header.error = error; + api->ipc_response_send (conn, &res_lib_cpg_partial_send, + sizeof (res_lib_cpg_partial_send)); +} + +/* Mcast message from the library */ +static void message_handler_req_lib_cpg_mcast (void *conn, const void *message) +{ + const struct req_lib_cpg_mcast *req_lib_cpg_mcast = message; + struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); + mar_cpg_name_t group_name = cpd->group_name; + + struct iovec req_exec_cpg_iovec[2]; + struct req_exec_cpg_mcast req_exec_cpg_mcast; + int msglen = req_lib_cpg_mcast->msglen; + int result; + cs_error_t error = CS_ERR_NOT_EXIST; + + log_printf(LOGSYS_LEVEL_TRACE, "got mcast request on %p", conn); + + switch (cpd->cpd_state) { + case CPD_STATE_UNJOINED: + error = CS_ERR_NOT_EXIST; + break; + case CPD_STATE_LEAVE_STARTED: + error = CS_ERR_NOT_EXIST; + break; + case CPD_STATE_JOIN_STARTED: + error = CS_OK; + break; + case CPD_STATE_JOIN_COMPLETED: + error = CS_OK; + break; + } + + if (error == CS_OK) { + memset(&req_exec_cpg_mcast, 0, sizeof(req_exec_cpg_mcast)); + + req_exec_cpg_mcast.header.size = sizeof(req_exec_cpg_mcast) + msglen; + req_exec_cpg_mcast.header.id = SERVICE_ID_MAKE(CPG_SERVICE, + MESSAGE_REQ_EXEC_CPG_MCAST); + req_exec_cpg_mcast.pid = cpd->pid; + req_exec_cpg_mcast.msglen = msglen; + api->ipc_source_set (&req_exec_cpg_mcast.source, conn); + memcpy(&req_exec_cpg_mcast.group_name, &group_name, + sizeof(mar_cpg_name_t)); + + req_exec_cpg_iovec[0].iov_base = (char *)&req_exec_cpg_mcast; + req_exec_cpg_iovec[0].iov_len = sizeof(req_exec_cpg_mcast); + req_exec_cpg_iovec[1].iov_base = (char *)&req_lib_cpg_mcast->message; + req_exec_cpg_iovec[1].iov_len = msglen; + + result = api->totem_mcast (req_exec_cpg_iovec, 2, TOTEM_AGREED); + assert(result == 0); + } else { + log_printf(LOGSYS_LEVEL_ERROR, "*** %p can't mcast to group %s state:%d, error:%d", + conn, group_name.value, cpd->cpd_state, error); + } +} + +static void message_handler_req_lib_cpg_zc_execute ( + void *conn, + const void *message) +{ + mar_req_coroipcc_zc_execute_t *hdr = (mar_req_coroipcc_zc_execute_t *)message; + struct qb_ipc_request_header *header; + struct res_lib_cpg_mcast res_lib_cpg_mcast; + struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); + struct iovec req_exec_cpg_iovec[2]; + struct req_exec_cpg_mcast req_exec_cpg_mcast; + struct req_lib_cpg_mcast *req_lib_cpg_mcast; + int result; + cs_error_t error = CS_ERR_NOT_EXIST; + + log_printf(LOGSYS_LEVEL_TRACE, "got ZC mcast request on %p", conn); + + header = (struct qb_ipc_request_header *)(((char *)serveraddr2void(hdr->server_address) + sizeof (struct coroipcs_zc_header))); + req_lib_cpg_mcast = (struct req_lib_cpg_mcast *)header; + + switch (cpd->cpd_state) { + case CPD_STATE_UNJOINED: + error = CS_ERR_NOT_EXIST; + break; + case CPD_STATE_LEAVE_STARTED: + error = CS_ERR_NOT_EXIST; + break; + case CPD_STATE_JOIN_STARTED: + error = CS_OK; + break; + case CPD_STATE_JOIN_COMPLETED: + error = CS_OK; + break; + } + + res_lib_cpg_mcast.header.size = sizeof(res_lib_cpg_mcast); + res_lib_cpg_mcast.header.id = MESSAGE_RES_CPG_MCAST; + if (error == CS_OK) { + req_exec_cpg_mcast.header.size = sizeof(req_exec_cpg_mcast) + req_lib_cpg_mcast->msglen; + req_exec_cpg_mcast.header.id = SERVICE_ID_MAKE(CPG_SERVICE, + MESSAGE_REQ_EXEC_CPG_MCAST); + req_exec_cpg_mcast.pid = cpd->pid; + req_exec_cpg_mcast.msglen = req_lib_cpg_mcast->msglen; + api->ipc_source_set (&req_exec_cpg_mcast.source, conn); + memcpy(&req_exec_cpg_mcast.group_name, &cpd->group_name, + sizeof(mar_cpg_name_t)); + + req_exec_cpg_iovec[0].iov_base = (char *)&req_exec_cpg_mcast; + req_exec_cpg_iovec[0].iov_len = sizeof(req_exec_cpg_mcast); + req_exec_cpg_iovec[1].iov_base = (char *)header + sizeof(struct req_lib_cpg_mcast); + req_exec_cpg_iovec[1].iov_len = req_exec_cpg_mcast.msglen; + + result = api->totem_mcast (req_exec_cpg_iovec, 2, TOTEM_AGREED); + if (result == 0) { + res_lib_cpg_mcast.header.error = CS_OK; + } else { + res_lib_cpg_mcast.header.error = CS_ERR_TRY_AGAIN; + } + } else { + res_lib_cpg_mcast.header.error = error; + } + + api->ipc_response_send (conn, &res_lib_cpg_mcast, + sizeof (res_lib_cpg_mcast)); + +} + +static void message_handler_req_lib_cpg_membership (void *conn, + const void *message) +{ + struct req_lib_cpg_membership_get *req_lib_cpg_membership_get = + (struct req_lib_cpg_membership_get *)message; + struct res_lib_cpg_membership_get res_lib_cpg_membership_get; + struct qb_list_head *iter; + int member_count = 0; + + res_lib_cpg_membership_get.header.id = MESSAGE_RES_CPG_MEMBERSHIP; + res_lib_cpg_membership_get.header.error = CS_OK; + res_lib_cpg_membership_get.header.size = + sizeof (struct res_lib_cpg_membership_get); + + qb_list_for_each(iter, &process_info_list_head) { + struct process_info *pi = qb_list_entry (iter, struct process_info, list); + if (mar_name_compare (&pi->group, &req_lib_cpg_membership_get->group_name) == 0) { + res_lib_cpg_membership_get.member_list[member_count].nodeid = pi->nodeid; + res_lib_cpg_membership_get.member_list[member_count].pid = pi->pid; + member_count += 1; + } + } + res_lib_cpg_membership_get.member_count = member_count; + + api->ipc_response_send (conn, &res_lib_cpg_membership_get, + sizeof (res_lib_cpg_membership_get)); +} + +static void message_handler_req_lib_cpg_local_get (void *conn, + const void *message) +{ + struct res_lib_cpg_local_get res_lib_cpg_local_get; + + res_lib_cpg_local_get.header.size = sizeof (res_lib_cpg_local_get); + res_lib_cpg_local_get.header.id = MESSAGE_RES_CPG_LOCAL_GET; + res_lib_cpg_local_get.header.error = CS_OK; + res_lib_cpg_local_get.local_nodeid = api->totem_nodeid_get (); + + api->ipc_response_send (conn, &res_lib_cpg_local_get, + sizeof (res_lib_cpg_local_get)); +} + +static void message_handler_req_lib_cpg_iteration_initialize ( + void *conn, + const void *message) +{ + const struct req_lib_cpg_iterationinitialize *req_lib_cpg_iterationinitialize = message; + struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); + hdb_handle_t cpg_iteration_handle = 0; + struct res_lib_cpg_iterationinitialize res_lib_cpg_iterationinitialize; + struct qb_list_head *iter, *iter2; + struct cpg_iteration_instance *cpg_iteration_instance; + cs_error_t error = CS_OK; + int res; + + log_printf (LOGSYS_LEVEL_DEBUG, "cpg iteration initialize"); + + /* Because between calling this function and *next can be some operations which will + * change list, we must do full copy. + */ + + /* + * Create new iteration instance + */ + res = hdb_handle_create (&cpg_iteration_handle_t_db, sizeof (struct cpg_iteration_instance), + &cpg_iteration_handle); + + if (res != 0) { + error = CS_ERR_NO_MEMORY; + goto response_send; + } + + res = hdb_handle_get (&cpg_iteration_handle_t_db, cpg_iteration_handle, (void *)&cpg_iteration_instance); + + if (res != 0) { + error = CS_ERR_BAD_HANDLE; + goto error_destroy; + } + + qb_list_init (&cpg_iteration_instance->items_list_head); + cpg_iteration_instance->handle = cpg_iteration_handle; + + /* + * Create copy of process_info list "grouped by" group name + */ + qb_list_for_each(iter, &process_info_list_head) { + struct process_info *pi = qb_list_entry (iter, struct process_info, list); + struct process_info *new_pi; + + if (req_lib_cpg_iterationinitialize->iteration_type == CPG_ITERATION_NAME_ONLY) { + /* + * Try to find processed group name in our list new list + */ + int found = 0; + + qb_list_for_each(iter2, &(cpg_iteration_instance->items_list_head)) { + struct process_info *pi2 = qb_list_entry (iter2, struct process_info, list); + + if (mar_name_compare (&pi2->group, &pi->group) == 0) { + found = 1; + break; + } + } + + if (found) { + /* + * We have this name in list -> don't add + */ + continue ; + } + } else if (req_lib_cpg_iterationinitialize->iteration_type == CPG_ITERATION_ONE_GROUP) { + /* + * Test pi group name with request + */ + if (mar_name_compare (&pi->group, &req_lib_cpg_iterationinitialize->group_name) != 0) + /* + * Not same -> don't add + */ + continue ; + } + + new_pi = malloc (sizeof (struct process_info)); + if (!new_pi) { + log_printf(LOGSYS_LEVEL_WARNING, "Unable to allocate process_info struct"); + + error = CS_ERR_NO_MEMORY; + + goto error_put_destroy; + } + + memcpy (new_pi, pi, sizeof (struct process_info)); + qb_list_init (&new_pi->list); + + if (req_lib_cpg_iterationinitialize->iteration_type == CPG_ITERATION_NAME_ONLY) { + /* + * pid and nodeid -> undefined + */ + new_pi->pid = new_pi->nodeid = 0; + } + + /* + * We will return list "grouped" by "group name", so try to find right place to add + */ + qb_list_for_each(iter2, &(cpg_iteration_instance->items_list_head)) { + struct process_info *pi2 = qb_list_entry (iter2, struct process_info, list); + + if (mar_name_compare (&pi2->group, &pi->group) == 0) { + break; + } + } + + qb_list_add (&new_pi->list, iter2); + } + + /* + * Now we have a full "grouped by" copy of process_info list + */ + + /* + * Add instance to current cpd list + */ + qb_list_init (&cpg_iteration_instance->list); + qb_list_add (&cpg_iteration_instance->list, &cpd->iteration_instance_list_head); + + cpg_iteration_instance->current_pointer = &cpg_iteration_instance->items_list_head; + +error_put_destroy: + hdb_handle_put (&cpg_iteration_handle_t_db, cpg_iteration_handle); +error_destroy: + if (error != CS_OK) { + hdb_handle_destroy (&cpg_iteration_handle_t_db, cpg_iteration_handle); + } + +response_send: + res_lib_cpg_iterationinitialize.header.size = sizeof (res_lib_cpg_iterationinitialize); + res_lib_cpg_iterationinitialize.header.id = MESSAGE_RES_CPG_ITERATIONINITIALIZE; + res_lib_cpg_iterationinitialize.header.error = error; + res_lib_cpg_iterationinitialize.iteration_handle = cpg_iteration_handle; + + api->ipc_response_send (conn, &res_lib_cpg_iterationinitialize, + sizeof (res_lib_cpg_iterationinitialize)); +} + +static void message_handler_req_lib_cpg_iteration_next ( + void *conn, + const void *message) +{ + const struct req_lib_cpg_iterationnext *req_lib_cpg_iterationnext = message; + struct res_lib_cpg_iterationnext res_lib_cpg_iterationnext; + struct cpg_iteration_instance *cpg_iteration_instance; + cs_error_t error = CS_OK; + int res; + struct process_info *pi; + + log_printf (LOGSYS_LEVEL_DEBUG, "cpg iteration next"); + + res = hdb_handle_get (&cpg_iteration_handle_t_db, + req_lib_cpg_iterationnext->iteration_handle, + (void *)&cpg_iteration_instance); + + if (res != 0) { + error = CS_ERR_LIBRARY; + goto error_exit; + } + + assert (cpg_iteration_instance); + + cpg_iteration_instance->current_pointer = cpg_iteration_instance->current_pointer->next; + + if (cpg_iteration_instance->current_pointer == &cpg_iteration_instance->items_list_head) { + error = CS_ERR_NO_SECTIONS; + goto error_put; + } + + pi = qb_list_entry (cpg_iteration_instance->current_pointer, struct process_info, list); + + /* + * Copy iteration data + */ + res_lib_cpg_iterationnext.description.nodeid = pi->nodeid; + res_lib_cpg_iterationnext.description.pid = pi->pid; + memcpy (&res_lib_cpg_iterationnext.description.group, + &pi->group, + sizeof (mar_cpg_name_t)); + +error_put: + hdb_handle_put (&cpg_iteration_handle_t_db, req_lib_cpg_iterationnext->iteration_handle); +error_exit: + res_lib_cpg_iterationnext.header.size = sizeof (res_lib_cpg_iterationnext); + res_lib_cpg_iterationnext.header.id = MESSAGE_RES_CPG_ITERATIONNEXT; + res_lib_cpg_iterationnext.header.error = error; + + api->ipc_response_send (conn, &res_lib_cpg_iterationnext, + sizeof (res_lib_cpg_iterationnext)); +} + +static void message_handler_req_lib_cpg_iteration_finalize ( + void *conn, + const void *message) +{ + const struct req_lib_cpg_iterationfinalize *req_lib_cpg_iterationfinalize = message; + struct res_lib_cpg_iterationfinalize res_lib_cpg_iterationfinalize; + struct cpg_iteration_instance *cpg_iteration_instance; + cs_error_t error = CS_OK; + int res; + + log_printf (LOGSYS_LEVEL_DEBUG, "cpg iteration finalize"); + + res = hdb_handle_get (&cpg_iteration_handle_t_db, + req_lib_cpg_iterationfinalize->iteration_handle, + (void *)&cpg_iteration_instance); + + if (res != 0) { + error = CS_ERR_LIBRARY; + goto error_exit; + } + + assert (cpg_iteration_instance); + + cpg_iteration_instance_finalize (cpg_iteration_instance); + hdb_handle_put (&cpg_iteration_handle_t_db, cpg_iteration_instance->handle); + +error_exit: + res_lib_cpg_iterationfinalize.header.size = sizeof (res_lib_cpg_iterationfinalize); + res_lib_cpg_iterationfinalize.header.id = MESSAGE_RES_CPG_ITERATIONFINALIZE; + res_lib_cpg_iterationfinalize.header.error = error; + + api->ipc_response_send (conn, &res_lib_cpg_iterationfinalize, + sizeof (res_lib_cpg_iterationfinalize)); +} diff --git a/exec/cs_queue.h b/exec/cs_queue.h new file mode 100644 index 0000000..3dd7233 --- /dev/null +++ b/exec/cs_queue.h @@ -0,0 +1,292 @@ +/* + * Copyright (c) 2002-2004 MontaVista Software, Inc. + * Copyright (c) 2006-2011 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef CS_QUEUE_H_DEFINED +#define CS_QUEUE_H_DEFINED + +#include <string.h> +#include <stdlib.h> +#include <pthread.h> +#include <errno.h> +#include "assert.h" + +struct cs_queue { + int head; + int tail; + int used; + int usedhw; + size_t size; + void *items; + size_t size_per_item; + int iterator; + pthread_mutex_t mutex; + int threaded_mode_enabled; +}; + +static inline int cs_queue_init (struct cs_queue *cs_queue, size_t cs_queue_items, size_t size_per_item, int threaded_mode_enabled) { + cs_queue->head = 0; + cs_queue->tail = cs_queue_items - 1; + cs_queue->used = 0; + cs_queue->usedhw = 0; + cs_queue->size = cs_queue_items; + cs_queue->size_per_item = size_per_item; + cs_queue->threaded_mode_enabled = threaded_mode_enabled; + + cs_queue->items = malloc (cs_queue_items * size_per_item); + if (cs_queue->items == 0) { + return (-ENOMEM); + } + memset (cs_queue->items, 0, cs_queue_items * size_per_item); + if (cs_queue->threaded_mode_enabled) { + pthread_mutex_init (&cs_queue->mutex, NULL); + } + return (0); +} + +static inline int cs_queue_reinit (struct cs_queue *cs_queue) +{ + if (cs_queue->threaded_mode_enabled) { + pthread_mutex_lock (&cs_queue->mutex); + } + cs_queue->head = 0; + cs_queue->tail = cs_queue->size - 1; + cs_queue->used = 0; + cs_queue->usedhw = 0; + + memset (cs_queue->items, 0, cs_queue->size * cs_queue->size_per_item); + if (cs_queue->threaded_mode_enabled) { + pthread_mutex_unlock (&cs_queue->mutex); + } + return (0); +} + +static inline void cs_queue_free (struct cs_queue *cs_queue) { + if (cs_queue->threaded_mode_enabled) { + pthread_mutex_destroy (&cs_queue->mutex); + } + free (cs_queue->items); +} + +static inline int cs_queue_is_full (struct cs_queue *cs_queue) { + int full; + + if (cs_queue->threaded_mode_enabled) { + pthread_mutex_lock (&cs_queue->mutex); + } + full = ((cs_queue->size - 1) == cs_queue->used); + if (cs_queue->threaded_mode_enabled) { + pthread_mutex_unlock (&cs_queue->mutex); + } + return (full); +} + +static inline int cs_queue_is_empty (struct cs_queue *cs_queue) { + int empty; + + if (cs_queue->threaded_mode_enabled) { + pthread_mutex_lock (&cs_queue->mutex); + } + empty = (cs_queue->used == 0); + if (cs_queue->threaded_mode_enabled) { + pthread_mutex_unlock (&cs_queue->mutex); + } + return (empty); +} + +static inline void cs_queue_item_add (struct cs_queue *cs_queue, void *item) +{ + char *cs_queue_item; + int cs_queue_position; + + if (cs_queue->threaded_mode_enabled) { + pthread_mutex_lock (&cs_queue->mutex); + } + cs_queue_position = cs_queue->head; + cs_queue_item = cs_queue->items; + cs_queue_item += cs_queue_position * cs_queue->size_per_item; + memcpy (cs_queue_item, item, cs_queue->size_per_item); + + assert (cs_queue->tail != cs_queue->head); + + cs_queue->head = (cs_queue->head + 1) % cs_queue->size; + cs_queue->used++; + if (cs_queue->used > cs_queue->usedhw) { + cs_queue->usedhw = cs_queue->used; + } + if (cs_queue->threaded_mode_enabled) { + pthread_mutex_unlock (&cs_queue->mutex); + } +} + +static inline void *cs_queue_item_get (struct cs_queue *cs_queue) +{ + char *cs_queue_item; + int cs_queue_position; + + if (cs_queue->threaded_mode_enabled) { + pthread_mutex_lock (&cs_queue->mutex); + } + cs_queue_position = (cs_queue->tail + 1) % cs_queue->size; + cs_queue_item = cs_queue->items; + cs_queue_item += cs_queue_position * cs_queue->size_per_item; + if (cs_queue->threaded_mode_enabled) { + pthread_mutex_unlock (&cs_queue->mutex); + } + return ((void *)cs_queue_item); +} + +static inline void cs_queue_item_remove (struct cs_queue *cs_queue) { + if (cs_queue->threaded_mode_enabled) { + pthread_mutex_lock (&cs_queue->mutex); + } + cs_queue->tail = (cs_queue->tail + 1) % cs_queue->size; + + assert (cs_queue->tail != cs_queue->head); + + cs_queue->used--; + assert (cs_queue->used >= 0); + if (cs_queue->threaded_mode_enabled) { + pthread_mutex_unlock (&cs_queue->mutex); + } +} + +static inline void cs_queue_items_remove (struct cs_queue *cs_queue, int rel_count) +{ + if (cs_queue->threaded_mode_enabled) { + pthread_mutex_lock (&cs_queue->mutex); + } + cs_queue->tail = (cs_queue->tail + rel_count) % cs_queue->size; + + assert (cs_queue->tail != cs_queue->head); + + cs_queue->used -= rel_count; + if (cs_queue->threaded_mode_enabled) { + pthread_mutex_unlock (&cs_queue->mutex); + } +} + + +static inline void cs_queue_item_iterator_init (struct cs_queue *cs_queue) +{ + if (cs_queue->threaded_mode_enabled) { + pthread_mutex_lock (&cs_queue->mutex); + } + cs_queue->iterator = (cs_queue->tail + 1) % cs_queue->size; + if (cs_queue->threaded_mode_enabled) { + pthread_mutex_unlock (&cs_queue->mutex); + } +} + +static inline void *cs_queue_item_iterator_get (struct cs_queue *cs_queue) +{ + char *cs_queue_item; + int cs_queue_position; + + if (cs_queue->threaded_mode_enabled) { + pthread_mutex_lock (&cs_queue->mutex); + } + cs_queue_position = (cs_queue->iterator) % cs_queue->size; + if (cs_queue->iterator == cs_queue->head) { + if (cs_queue->threaded_mode_enabled) { + pthread_mutex_unlock (&cs_queue->mutex); + } + return (0); + } + cs_queue_item = cs_queue->items; + cs_queue_item += cs_queue_position * cs_queue->size_per_item; + if (cs_queue->threaded_mode_enabled) { + pthread_mutex_unlock (&cs_queue->mutex); + } + return ((void *)cs_queue_item); +} + +static inline int cs_queue_item_iterator_next (struct cs_queue *cs_queue) +{ + int next_res; + + if (cs_queue->threaded_mode_enabled) { + pthread_mutex_lock (&cs_queue->mutex); + } + cs_queue->iterator = (cs_queue->iterator + 1) % cs_queue->size; + + next_res = cs_queue->iterator == cs_queue->head; + if (cs_queue->threaded_mode_enabled) { + pthread_mutex_unlock (&cs_queue->mutex); + } + return (next_res); +} + +static inline void cs_queue_avail (struct cs_queue *cs_queue, int *avail) +{ + if (cs_queue->threaded_mode_enabled) { + pthread_mutex_lock (&cs_queue->mutex); + } + *avail = cs_queue->size - cs_queue->used - 2; + assert (*avail >= 0); + if (cs_queue->threaded_mode_enabled) { + pthread_mutex_unlock (&cs_queue->mutex); + } +} + +static inline int cs_queue_used (struct cs_queue *cs_queue) { + int used; + + if (cs_queue->threaded_mode_enabled) { + pthread_mutex_lock (&cs_queue->mutex); + } + used = cs_queue->used; + if (cs_queue->threaded_mode_enabled) { + pthread_mutex_unlock (&cs_queue->mutex); + } + + return (used); +} + +static inline int cs_queue_usedhw (struct cs_queue *cs_queue) { + int usedhw; + + if (cs_queue->threaded_mode_enabled) { + pthread_mutex_lock (&cs_queue->mutex); + } + + usedhw = cs_queue->usedhw; + + if (cs_queue->threaded_mode_enabled) { + pthread_mutex_unlock (&cs_queue->mutex); + } + + return (usedhw); +} + +#endif /* CS_QUEUE_H_DEFINED */ diff --git a/exec/fsm.h b/exec/fsm.h new file mode 100644 index 0000000..87efd7d --- /dev/null +++ b/exec/fsm.h @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2010-2012 Red Hat + * + * All rights reserved. + * + * Author: Angus Salkeld <asalkeld@redhat.com> + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef FSM_H_DEFINED +#define FSM_H_DEFINED + +#include <sys/time.h> +#include <corosync/corotypes.h> +#include "util.h" + +struct cs_fsm; +struct cs_fsm_entry; +typedef void (*cs_fsm_event_action_fn)(struct cs_fsm* fsm, int32_t event, void * data); +typedef const char * (*cs_fsm_state_to_str_fn)(struct cs_fsm* fsm, int32_t state); +typedef const char * (*cs_fsm_event_to_str_fn)(struct cs_fsm* fsm, int32_t event); + +typedef void (*cs_fsm_cb)(struct cs_fsm *fsm, int cb_event, int32_t curr_state, + int32_t next_state, int32_t fsm_event, void *data); + +#define CS_FSM_NEXT_STATE_SIZE 32 + +#define CS_FSM_STATE_NONE -1 + +#define CS_FSM_CB_EVENT_PROCESS_NF 0 +#define CS_FSM_CB_EVENT_STATE_SET 1 +#define CS_FSM_CB_EVENT_STATE_SET_NF 2 + +struct cs_fsm_entry { + int32_t curr_state; + int32_t event; + cs_fsm_event_action_fn handler_fn; + int32_t next_states[CS_FSM_NEXT_STATE_SIZE]; +}; + +struct cs_fsm { + const char *name; + int32_t curr_state; + int32_t curr_entry; + size_t entries; + struct cs_fsm_entry *table; + cs_fsm_state_to_str_fn state_to_str; + cs_fsm_event_to_str_fn event_to_str; +}; + +/* + * the table entry is defined by the state + event (curr_entry). + * so cs_fsm_process() sets the entry and cs_fsm_state_set() + * sets the new state. + */ +static inline void cs_fsm_process (struct cs_fsm *fsm, int32_t new_event, void * data, cs_fsm_cb cb) +{ + int32_t i; + + for (i = 0; i < fsm->entries; i++) { + if (fsm->table[i].event == new_event && + fsm->table[i].curr_state == fsm->curr_state) { + + assert (fsm->table[i].handler_fn != NULL); + /* set current entry */ + fsm->curr_entry = i; + fsm->table[i].handler_fn (fsm, new_event, data); + return; + } + } + + if (cb != NULL) { + cb(fsm, CS_FSM_CB_EVENT_PROCESS_NF, fsm->curr_state, CS_FSM_STATE_NONE, new_event, data); + } +} + +static inline void cs_fsm_state_set (struct cs_fsm* fsm, int32_t next_state, void* data, cs_fsm_cb cb) +{ + int i; + struct cs_fsm_entry *entry = &fsm->table[fsm->curr_entry]; + + if (fsm->curr_state == next_state) { + return; + } + /* + * confirm that "next_state" is in the current entry's next list + */ + for (i = 0; i < CS_FSM_NEXT_STATE_SIZE; i++) { + if (entry->next_states[i] < 0) { + break; + } + if (entry->next_states[i] == next_state) { + if (cb != NULL) { + cb(fsm, CS_FSM_CB_EVENT_STATE_SET, fsm->curr_state, next_state, entry->event, data); + } + fsm->curr_state = next_state; + return; + } + } + if (cb != NULL) { + cb(fsm, CS_FSM_CB_EVENT_STATE_SET_NF, fsm->curr_state, next_state, entry->event, data); + } +} + +#endif /* FSM_H_DEFINED */ + + diff --git a/exec/icmap.c b/exec/icmap.c new file mode 100644 index 0000000..4aeabab --- /dev/null +++ b/exec/icmap.c @@ -0,0 +1,1330 @@ +/* + * Copyright (c) 2011 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Jan Friesse (jfriesse@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the Red Hat, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <config.h> + +#include <string.h> +#include <stdio.h> + +#include <corosync/corotypes.h> + +#include <qb/qbdefs.h> +#include <qb/qblist.h> +#include <corosync/icmap.h> + +#define ICMAP_MAX_VALUE_LEN (16*1024) + +struct icmap_item { + char *key_name; + icmap_value_types_t type; + size_t value_len; + char value[]; +}; + +struct icmap_map { + qb_map_t *qb_map; +}; + +static icmap_map_t icmap_global_map; + +struct icmap_track { + char *key_name; + int32_t track_type; + icmap_notify_fn_t notify_fn; + void *user_data; + struct qb_list_head list; +}; + +struct icmap_ro_access_item { + char *key_name; + int prefix; + struct qb_list_head list; +}; + +QB_LIST_DECLARE (icmap_ro_access_item_list_head); +QB_LIST_DECLARE (icmap_track_list_head); + +/* + * Static functions declarations + */ + +/* + * Check if key_name is valid icmap key name. Returns 0 on success, and -1 on fail + */ +static int icmap_check_key_name(const char *key_name); + +/* + * Check that value with given type has correct length value_len. Returns 0 on success, + * and -1 on fail + */ +static int icmap_check_value_len(const void *value, size_t value_len, icmap_value_types_t type); + +/* + * Checks if item has same value as value with value_len and given type. Returns 0 if not, otherwise !0. + */ +static int icmap_item_eq(const struct icmap_item *item, const void *value, size_t value_len, icmap_value_types_t type); + +/* + * Checks if given character is valid in key name. Returns 0 if not, otherwise !0. + */ +static int icmap_is_valid_name_char(char c); + +/* + * Helper for getting integer and float value with given type for key key_name and store it in value. + */ +static cs_error_t icmap_get_int_r( + const icmap_map_t map, + const char *key_name, + void *value, + icmap_value_types_t type); + +/* + * Return raw item value data. Internal function used by icmap_get_r which does most + * of arguments validity checks but doesn't copy data (it returns raw item data + * pointer). It's not very safe tho it's static. + */ +static cs_error_t icmap_get_ref_r( + const icmap_map_t map, + const char *key_name, + void **value, + size_t *value_len, + icmap_value_types_t *type); + +/* + * Function implementation + */ +int32_t icmap_tt_to_qbtt(int32_t track_type) +{ + int32_t res = 0; + + if (track_type & ICMAP_TRACK_DELETE) { + res |= QB_MAP_NOTIFY_DELETED; + } + + if (track_type & ICMAP_TRACK_MODIFY) { + res |= QB_MAP_NOTIFY_REPLACED; + } + + if (track_type & ICMAP_TRACK_ADD) { + res |= QB_MAP_NOTIFY_INSERTED; + } + + if (track_type & ICMAP_TRACK_PREFIX) { + res |= QB_MAP_NOTIFY_RECURSIVE; + } + + return (res); +} + +int32_t icmap_qbtt_to_tt(int32_t track_type) +{ + int32_t res = 0; + + if (track_type & QB_MAP_NOTIFY_DELETED) { + res |= ICMAP_TRACK_DELETE; + } + + if (track_type & QB_MAP_NOTIFY_REPLACED) { + res |= ICMAP_TRACK_MODIFY; + } + + if (track_type & QB_MAP_NOTIFY_INSERTED) { + res |= ICMAP_TRACK_ADD; + } + + if (track_type & QB_MAP_NOTIFY_RECURSIVE) { + res |= ICMAP_TRACK_PREFIX; + } + + return (res); +} + +static void icmap_map_free_cb(uint32_t event, + char* key, void* old_value, + void* value, void* user_data) +{ + struct icmap_item *item = (struct icmap_item *)old_value; + + /* + * value == old_value -> fast_adjust_int was used, don't free data + */ + if (item != NULL && value != old_value) { + free(item->key_name); + free(item); + } +} + +cs_error_t icmap_init_r(icmap_map_t *result) +{ + int32_t err; + + *result = malloc(sizeof(struct icmap_map)); + if (*result == NULL) { + return (CS_ERR_NO_MEMORY); + } + + (*result)->qb_map = qb_trie_create(); + if ((*result)->qb_map == NULL) { + free(*result); + return (CS_ERR_INIT); + } + + err = qb_map_notify_add((*result)->qb_map, NULL, icmap_map_free_cb, QB_MAP_NOTIFY_FREE, NULL); + + return (qb_to_cs_error(err)); +} + +cs_error_t icmap_init(void) +{ + return (icmap_init_r(&icmap_global_map)); +} + +static void icmap_set_ro_access_free(void) +{ + struct qb_list_head *iter, *tmp_iter; + struct icmap_ro_access_item *icmap_ro_ai; + + qb_list_for_each_safe(iter, tmp_iter, &icmap_ro_access_item_list_head) { + icmap_ro_ai = qb_list_entry(iter, struct icmap_ro_access_item, list); + qb_list_del(&icmap_ro_ai->list); + free(icmap_ro_ai->key_name); + free(icmap_ro_ai); + } +} + +static void icmap_del_all_track(void) +{ + struct qb_list_head *iter, *tmp_iter; + struct icmap_track *icmap_track; + + qb_list_for_each_safe(iter, tmp_iter, &icmap_track_list_head) { + icmap_track = qb_list_entry(iter, struct icmap_track, list); + + icmap_track_delete(icmap_track); + } +} + +void icmap_fini_r(const icmap_map_t map) +{ + + qb_map_destroy(map->qb_map); + free(map); + + return; +} + +void icmap_fini(void) +{ + + icmap_del_all_track(); + /* + * catch 22 warning: + * We need to drop this notify but we can't because it calls icmap_map_free_cb + * while destroying the tree to free icmap_item(s). + * -> qb_map_notify_del_2(icmap_map, NULL, icmap_map_free_cb, QB_MAP_NOTIFY_FREE, NULL); + * and we cannot call it after map_destroy. joy! :) + */ + icmap_fini_r(icmap_global_map); + icmap_set_ro_access_free(); + + return ; +} + +icmap_map_t icmap_get_global_map(void) +{ + + return (icmap_global_map); +} + +static int icmap_is_valid_name_char(char c) +{ + return ((c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || + c == '.' || c == '_' || c == '-' || c == '/' || c == ':'); +} + +void icmap_convert_name_to_valid_name(char *key_name) +{ + int i; + + for (i = 0; i < strlen(key_name); i++) { + if (!icmap_is_valid_name_char(key_name[i])) { + key_name[i] = '_'; + } + } +} + +static int icmap_check_key_name(const char *key_name) +{ + int i; + + if ((strlen(key_name) < ICMAP_KEYNAME_MINLEN) || strlen(key_name) > ICMAP_KEYNAME_MAXLEN) { + return (-1); + } + + for (i = 0; i < strlen(key_name); i++) { + if (!icmap_is_valid_name_char(key_name[i])) { + return (-1); + } + } + + return (0); +} + +size_t icmap_get_valuetype_len(icmap_value_types_t type) +{ + size_t res = 0; + + switch (type) { + case ICMAP_VALUETYPE_INT8: res = sizeof(int8_t); break; + case ICMAP_VALUETYPE_UINT8: res = sizeof(uint8_t); break; + case ICMAP_VALUETYPE_INT16: res = sizeof(int16_t); break; + case ICMAP_VALUETYPE_UINT16: res = sizeof(uint16_t); break; + case ICMAP_VALUETYPE_INT32: res = sizeof(int32_t); break; + case ICMAP_VALUETYPE_UINT32: res = sizeof(uint32_t); break; + case ICMAP_VALUETYPE_INT64: res = sizeof(int64_t); break; + case ICMAP_VALUETYPE_UINT64: res = sizeof(uint64_t); break; + case ICMAP_VALUETYPE_FLOAT: res = sizeof(float); break; + case ICMAP_VALUETYPE_DOUBLE: res = sizeof(double); break; + case ICMAP_VALUETYPE_STRING: + case ICMAP_VALUETYPE_BINARY: + res = 0; + break; + } + + return (res); +} + +static int icmap_check_value_len(const void *value, size_t value_len, icmap_value_types_t type) +{ + + if (value_len > ICMAP_MAX_VALUE_LEN) { + return (-1); + } + + if (type != ICMAP_VALUETYPE_STRING && type != ICMAP_VALUETYPE_BINARY) { + if (icmap_get_valuetype_len(type) == value_len) { + return (0); + } else { + return (-1); + } + } + + if (type == ICMAP_VALUETYPE_STRING) { + /* + * value_len can be shorter then real string length, but never + * longer (+ 1 is because of 0 at the end of string) + */ + if (value_len > strlen((const char *)value) + 1) { + return (-1); + } else { + return (0); + } + } + + return (0); +} + +static int icmap_item_eq(const struct icmap_item *item, const void *value, size_t value_len, icmap_value_types_t type) +{ + size_t ptr_len; + + if (item->type != type) { + return (0); + } + + if (item->type == ICMAP_VALUETYPE_STRING) { + ptr_len = strlen((const char *)value); + if (ptr_len > value_len) { + ptr_len = value_len; + } + ptr_len++; + } else { + ptr_len = value_len; + } + + if (item->value_len == ptr_len) { + return (memcmp(item->value, value, value_len) == 0); + }; + + return (0); +} + +int icmap_key_value_eq( + const icmap_map_t map1, + const char *key_name1, + const icmap_map_t map2, + const char *key_name2) +{ + struct icmap_item *item1, *item2; + + if (map1 == NULL || key_name1 == NULL || map2 == NULL || key_name2 == NULL) { + return (0); + } + + item1 = qb_map_get(map1->qb_map, key_name1); + item2 = qb_map_get(map2->qb_map, key_name2); + + if (item1 == NULL || item2 == NULL) { + return (0); + } + + return (icmap_item_eq(item1, item2->value, item2->value_len, item2->type)); +} + +cs_error_t icmap_set_r( + const icmap_map_t map, + const char *key_name, + const void *value, + size_t value_len, + icmap_value_types_t type) +{ + struct icmap_item *item; + struct icmap_item *new_item; + size_t new_value_len; + size_t new_item_size; + + if (value == NULL || key_name == NULL) { + return (CS_ERR_INVALID_PARAM); + } + + if (icmap_check_value_len(value, value_len, type) != 0) { + return (CS_ERR_INVALID_PARAM); + } + + item = qb_map_get(map->qb_map, key_name); + if (item != NULL) { + /* + * Check that key is really changed + */ + if (icmap_item_eq(item, value, value_len, type)) { + return (CS_OK); + } + } else { + if (icmap_check_key_name(key_name) != 0) { + return (CS_ERR_NAME_TOO_LONG); + } + } + + if (type == ICMAP_VALUETYPE_BINARY || type == ICMAP_VALUETYPE_STRING) { + if (type == ICMAP_VALUETYPE_STRING) { + new_value_len = strlen((const char *)value); + if (new_value_len > value_len) { + new_value_len = value_len; + } + new_value_len++; + } else { + new_value_len = value_len; + } + } else { + new_value_len = icmap_get_valuetype_len(type); + } + + new_item_size = sizeof(struct icmap_item) + new_value_len; + new_item = malloc(new_item_size); + if (new_item == NULL) { + return (CS_ERR_NO_MEMORY); + } + memset(new_item, 0, new_item_size); + + if (item == NULL) { + new_item->key_name = strdup(key_name); + if (new_item->key_name == NULL) { + free(new_item); + return (CS_ERR_NO_MEMORY); + } + } else { + new_item->key_name = item->key_name; + item->key_name = NULL; + } + + new_item->type = type; + new_item->value_len = new_value_len; + + memcpy(new_item->value, value, new_value_len); + + if (new_item->type == ICMAP_VALUETYPE_STRING) { + ((char *)new_item->value)[new_value_len - 1] = 0; + } + + qb_map_put(map->qb_map, new_item->key_name, new_item); + + return (CS_OK); +} + +cs_error_t icmap_set( + const char *key_name, + const void *value, + size_t value_len, + icmap_value_types_t type) +{ + + return (icmap_set_r(icmap_global_map, key_name, value, value_len, type)); +} + +cs_error_t icmap_set_int8_r(const icmap_map_t map, const char *key_name, int8_t value) +{ + + return (icmap_set_r(map, key_name, &value, sizeof(value), ICMAP_VALUETYPE_INT8)); +} + +cs_error_t icmap_set_uint8_r(const icmap_map_t map, const char *key_name, uint8_t value) +{ + + return (icmap_set_r(map, key_name, &value, sizeof(value), ICMAP_VALUETYPE_UINT8)); +} + +cs_error_t icmap_set_int16_r(const icmap_map_t map, const char *key_name, int16_t value) +{ + + return (icmap_set_r(map,key_name, &value, sizeof(value), ICMAP_VALUETYPE_INT16)); +} + +cs_error_t icmap_set_uint16_r(const icmap_map_t map, const char *key_name, uint16_t value) +{ + + return (icmap_set_r(map, key_name, &value, sizeof(value), ICMAP_VALUETYPE_UINT16)); +} + +cs_error_t icmap_set_int32_r(const icmap_map_t map, const char *key_name, int32_t value) +{ + + return (icmap_set_r(map, key_name, &value, sizeof(value), ICMAP_VALUETYPE_INT32)); +} + +cs_error_t icmap_set_uint32_r(const icmap_map_t map, const char *key_name, uint32_t value) +{ + + return (icmap_set_r(map, key_name, &value, sizeof(value), ICMAP_VALUETYPE_UINT32)); +} + +cs_error_t icmap_set_int64_r(const icmap_map_t map, const char *key_name, int64_t value) +{ + + return (icmap_set_r(map, key_name, &value, sizeof(value), ICMAP_VALUETYPE_INT64)); +} + +cs_error_t icmap_set_uint64_r(const icmap_map_t map, const char *key_name, uint64_t value) +{ + + return (icmap_set_r(map, key_name, &value, sizeof(value), ICMAP_VALUETYPE_UINT64)); +} + +cs_error_t icmap_set_float_r(const icmap_map_t map, const char *key_name, float value) +{ + + return (icmap_set_r(map, key_name, &value, sizeof(value), ICMAP_VALUETYPE_FLOAT)); +} + +cs_error_t icmap_set_double_r(const icmap_map_t map, const char *key_name, double value) +{ + + return (icmap_set_r(map, key_name, &value, sizeof(value), ICMAP_VALUETYPE_DOUBLE)); +} + +cs_error_t icmap_set_string_r(const icmap_map_t map, const char *key_name, const char *value) +{ + + if (value == NULL) { + return (CS_ERR_INVALID_PARAM); + } + + return (icmap_set_r(map, key_name, value, strlen(value), ICMAP_VALUETYPE_STRING)); +} + +cs_error_t icmap_set_int8(const char *key_name, int8_t value) +{ + + return (icmap_set_int8_r(icmap_global_map, key_name, value)); +} + +cs_error_t icmap_set_uint8(const char *key_name, uint8_t value) +{ + + return (icmap_set_uint8_r(icmap_global_map, key_name, value)); +} + +cs_error_t icmap_set_int16(const char *key_name, int16_t value) +{ + + return (icmap_set_int16_r(icmap_global_map, key_name, value)); +} + +cs_error_t icmap_set_uint16(const char *key_name, uint16_t value) +{ + + return (icmap_set_uint16_r(icmap_global_map, key_name, value)); +} + +cs_error_t icmap_set_int32(const char *key_name, int32_t value) +{ + + return (icmap_set_int32_r(icmap_global_map, key_name, value)); +} + +cs_error_t icmap_set_uint32(const char *key_name, uint32_t value) +{ + + return (icmap_set_uint32_r(icmap_global_map, key_name, value)); +} + +cs_error_t icmap_set_int64(const char *key_name, int64_t value) +{ + + return (icmap_set_int64_r(icmap_global_map, key_name, value)); +} + +cs_error_t icmap_set_uint64(const char *key_name, uint64_t value) +{ + + return (icmap_set_uint64_r(icmap_global_map, key_name, value)); +} + +cs_error_t icmap_set_float(const char *key_name, float value) +{ + + return (icmap_set_float_r(icmap_global_map, key_name, value)); +} + +cs_error_t icmap_set_double(const char *key_name, double value) +{ + + return (icmap_set_double_r(icmap_global_map, key_name, value)); +} + +cs_error_t icmap_set_string(const char *key_name, const char *value) +{ + + return (icmap_set_string_r(icmap_global_map, key_name, value)); +} + +cs_error_t icmap_delete_r(const icmap_map_t map, const char *key_name) +{ + struct icmap_item *item; + + if (key_name == NULL) { + return (CS_ERR_INVALID_PARAM); + } + + item = qb_map_get(map->qb_map, key_name); + if (item == NULL) { + return (CS_ERR_NOT_EXIST); + } + + if (qb_map_rm(map->qb_map, item->key_name) != QB_TRUE) { + return (CS_ERR_NOT_EXIST); + } + + return (CS_OK); +} + +cs_error_t icmap_delete(const char *key_name) +{ + + return (icmap_delete_r(icmap_global_map, key_name)); +} + +static cs_error_t icmap_get_ref_r( + const icmap_map_t map, + const char *key_name, + void **value, + size_t *value_len, + icmap_value_types_t *type) +{ + struct icmap_item *item; + + if (key_name == NULL) { + return (CS_ERR_INVALID_PARAM); + } + + item = qb_map_get(map->qb_map, key_name); + if (item == NULL) { + return (CS_ERR_NOT_EXIST); + } + + if (type != NULL) { + *type = item->type; + } + + if (value_len != NULL) { + *value_len = item->value_len; + } + + if (value != NULL) { + *value = item->value; + } + + return (CS_OK); +} + +cs_error_t icmap_get_r( + const icmap_map_t map, + const char *key_name, + void *value, + size_t *value_len, + icmap_value_types_t *type) +{ + cs_error_t res; + void *tmp_value; + size_t tmp_value_len; + + res = icmap_get_ref_r(map, key_name, &tmp_value, &tmp_value_len, type); + if (res != CS_OK) { + return (res); + } + + if (value == NULL) { + if (value_len != NULL) { + *value_len = tmp_value_len; + } + } else { + if (value_len == NULL || *value_len < tmp_value_len) { + return (CS_ERR_INVALID_PARAM); + } + + *value_len = tmp_value_len; + + memcpy(value, tmp_value, tmp_value_len); + } + + return (CS_OK); +} + +cs_error_t icmap_get( + const char *key_name, + void *value, + size_t *value_len, + icmap_value_types_t *type) +{ + + return (icmap_get_r(icmap_global_map, key_name, value, value_len, type)); +} + +cs_error_t icmap_get_string_r(icmap_map_t map, const char *key_name, char **str) +{ + cs_error_t res; + size_t str_len; + icmap_value_types_t type; + + res = icmap_get_r(map, key_name, NULL, &str_len, &type); + if (res != CS_OK || type != ICMAP_VALUETYPE_STRING) { + if (res == CS_OK) { + res = CS_ERR_INVALID_PARAM; + } + + goto return_error; + } + + *str = malloc(str_len); + if (*str == NULL) { + res = CS_ERR_NO_MEMORY; + + goto return_error; + } + + res = icmap_get_r(map, key_name, *str, &str_len, &type); + if (res != CS_OK) { + free(*str); + goto return_error; + } + + return (CS_OK); + +return_error: + return (res); +} + +static cs_error_t icmap_get_int_r( + const icmap_map_t map, + const char *key_name, + void *value, + icmap_value_types_t type) +{ + char key_value[16]; + size_t key_size; + cs_error_t err; + icmap_value_types_t key_type; + + key_size = sizeof(key_value); + memset(key_value, 0, key_size); + + err = icmap_get_r(map, key_name, key_value, &key_size, &key_type); + if (err != CS_OK) + return (err); + + if (key_type != type) { + return (CS_ERR_INVALID_PARAM); + } + + memcpy(value, key_value, icmap_get_valuetype_len(key_type)); + + return (CS_OK); +} + +cs_error_t icmap_get_int8_r(const icmap_map_t map, const char *key_name, int8_t *i8) +{ + + return (icmap_get_int_r(map, key_name, i8, ICMAP_VALUETYPE_INT8)); +} + +cs_error_t icmap_get_uint8_r(const icmap_map_t map, const char *key_name, uint8_t *u8) +{ + + return (icmap_get_int_r(map, key_name, u8, ICMAP_VALUETYPE_UINT8)); +} + +cs_error_t icmap_get_int16_r(const icmap_map_t map, const char *key_name, int16_t *i16) +{ + + return (icmap_get_int_r(map, key_name, i16, ICMAP_VALUETYPE_INT16)); +} + +cs_error_t icmap_get_uint16_r(const icmap_map_t map, const char *key_name, uint16_t *u16) +{ + + return (icmap_get_int_r(map, key_name, u16, ICMAP_VALUETYPE_UINT16)); +} + +cs_error_t icmap_get_int32_r(const icmap_map_t map, const char *key_name, int32_t *i32) +{ + + return (icmap_get_int_r(map, key_name, i32, ICMAP_VALUETYPE_INT32)); +} + +cs_error_t icmap_get_uint32_r(const icmap_map_t map, const char *key_name, uint32_t *u32) +{ + + return (icmap_get_int_r(map, key_name, u32, ICMAP_VALUETYPE_UINT32)); +} + +cs_error_t icmap_get_int64_r(const icmap_map_t map, const char *key_name, int64_t *i64) +{ + + return(icmap_get_int_r(map, key_name, i64, ICMAP_VALUETYPE_INT64)); +} + +cs_error_t icmap_get_uint64_r(const icmap_map_t map, const char *key_name, uint64_t *u64) +{ + + return (icmap_get_int_r(map, key_name, u64, ICMAP_VALUETYPE_UINT64)); +} + +cs_error_t icmap_get_float_r(const icmap_map_t map, const char *key_name, float *flt) +{ + + return (icmap_get_int_r(map, key_name, flt, ICMAP_VALUETYPE_FLOAT)); +} + +cs_error_t icmap_get_double_r(const icmap_map_t map, const char *key_name, double *dbl) +{ + + return (icmap_get_int_r(map, key_name, dbl, ICMAP_VALUETYPE_DOUBLE)); +} + +cs_error_t icmap_get_string(const char *key_name, char **str) +{ + + return (icmap_get_string_r(icmap_global_map, key_name, str)); +} + +cs_error_t icmap_get_int8(const char *key_name, int8_t *i8) +{ + + return (icmap_get_int8_r(icmap_global_map, key_name, i8)); +} + +cs_error_t icmap_get_uint8(const char *key_name, uint8_t *u8) +{ + + return (icmap_get_uint8_r(icmap_global_map, key_name, u8)); +} + +cs_error_t icmap_get_int16(const char *key_name, int16_t *i16) +{ + + return (icmap_get_int16_r(icmap_global_map, key_name, i16)); +} + +cs_error_t icmap_get_uint16(const char *key_name, uint16_t *u16) +{ + + return (icmap_get_uint16_r(icmap_global_map, key_name, u16)); +} + +cs_error_t icmap_get_int32(const char *key_name, int32_t *i32) +{ + + return (icmap_get_int32_r(icmap_global_map, key_name, i32)); +} + +cs_error_t icmap_get_uint32(const char *key_name, uint32_t *u32) +{ + + return (icmap_get_uint32_r(icmap_global_map, key_name, u32)); +} + +cs_error_t icmap_get_int64(const char *key_name, int64_t *i64) +{ + + return(icmap_get_int64_r(icmap_global_map, key_name, i64)); +} + +cs_error_t icmap_get_uint64(const char *key_name, uint64_t *u64) +{ + + return (icmap_get_uint64_r(icmap_global_map, key_name, u64)); +} + +cs_error_t icmap_get_float(const char *key_name, float *flt) +{ + + return (icmap_get_float_r(icmap_global_map, key_name, flt)); +} + +cs_error_t icmap_get_double(const char *key_name, double *dbl) +{ + + return (icmap_get_double_r(icmap_global_map, key_name, dbl)); +} + +cs_error_t icmap_adjust_int_r( + const icmap_map_t map, + const char *key_name, + int32_t step) +{ + struct icmap_item *item; + uint8_t u8; + uint16_t u16; + uint32_t u32; + uint64_t u64; + cs_error_t err = CS_OK; + + if (key_name == NULL) { + return (CS_ERR_INVALID_PARAM); + } + + item = qb_map_get(map->qb_map, key_name); + if (item == NULL) { + return (CS_ERR_NOT_EXIST); + } + + switch (item->type) { + case ICMAP_VALUETYPE_INT8: + case ICMAP_VALUETYPE_UINT8: + memcpy(&u8, item->value, sizeof(u8)); + u8 += step; + err = icmap_set(key_name, &u8, sizeof(u8), item->type); + break; + case ICMAP_VALUETYPE_INT16: + case ICMAP_VALUETYPE_UINT16: + memcpy(&u16, item->value, sizeof(u16)); + u16 += step; + err = icmap_set(key_name, &u16, sizeof(u16), item->type); + break; + case ICMAP_VALUETYPE_INT32: + case ICMAP_VALUETYPE_UINT32: + memcpy(&u32, item->value, sizeof(u32)); + u32 += step; + err = icmap_set(key_name, &u32, sizeof(u32), item->type); + break; + case ICMAP_VALUETYPE_INT64: + case ICMAP_VALUETYPE_UINT64: + memcpy(&u64, item->value, sizeof(u64)); + u64 += step; + err = icmap_set(key_name, &u64, sizeof(u64), item->type); + break; + case ICMAP_VALUETYPE_FLOAT: + case ICMAP_VALUETYPE_DOUBLE: + case ICMAP_VALUETYPE_STRING: + case ICMAP_VALUETYPE_BINARY: + err = CS_ERR_INVALID_PARAM; + break; + } + + return (err); +} + +cs_error_t icmap_adjust_int( + const char *key_name, + int32_t step) +{ + + return (icmap_adjust_int_r(icmap_global_map, key_name, step)); +} + +cs_error_t icmap_fast_adjust_int_r( + const icmap_map_t map, + const char *key_name, + int32_t step) +{ + struct icmap_item *item; + cs_error_t err = CS_OK; + + if (key_name == NULL) { + return (CS_ERR_INVALID_PARAM); + } + + item = qb_map_get(map->qb_map, key_name); + if (item == NULL) { + return (CS_ERR_NOT_EXIST); + } + + switch (item->type) { + case ICMAP_VALUETYPE_INT8: + case ICMAP_VALUETYPE_UINT8: + *(uint8_t *)item->value += step; + break; + case ICMAP_VALUETYPE_INT16: + case ICMAP_VALUETYPE_UINT16: + *(uint16_t *)item->value += step; + break; + case ICMAP_VALUETYPE_INT32: + case ICMAP_VALUETYPE_UINT32: + *(uint32_t *)item->value += step; + break; + case ICMAP_VALUETYPE_INT64: + case ICMAP_VALUETYPE_UINT64: + *(uint64_t *)item->value += step; + break; + case ICMAP_VALUETYPE_FLOAT: + case ICMAP_VALUETYPE_DOUBLE: + case ICMAP_VALUETYPE_STRING: + case ICMAP_VALUETYPE_BINARY: + err = CS_ERR_INVALID_PARAM; + break; + } + + if (err == CS_OK) { + qb_map_put(map->qb_map, item->key_name, item); + } + + return (err); +} + +cs_error_t icmap_fast_adjust_int( + const char *key_name, + int32_t step) +{ + + return (icmap_fast_adjust_int_r(icmap_global_map, key_name, step)); +} + +cs_error_t icmap_inc_r(const icmap_map_t map, const char *key_name) +{ + return (icmap_adjust_int_r(map, key_name, 1)); +} + +cs_error_t icmap_inc(const char *key_name) +{ + return (icmap_inc_r(icmap_global_map, key_name)); +} + +cs_error_t icmap_dec_r(const icmap_map_t map, const char *key_name) +{ + return (icmap_adjust_int_r(map, key_name, -1)); +} + +cs_error_t icmap_dec(const char *key_name) +{ + return (icmap_dec_r(icmap_global_map, key_name)); +} + +cs_error_t icmap_fast_inc_r(const icmap_map_t map, const char *key_name) +{ + return (icmap_fast_adjust_int_r(map, key_name, 1)); +} + +cs_error_t icmap_fast_inc(const char *key_name) +{ + return (icmap_fast_inc_r(icmap_global_map, key_name)); +} + +cs_error_t icmap_fast_dec_r(const icmap_map_t map, const char *key_name) +{ + return (icmap_fast_adjust_int_r(map, key_name, -1)); +} + +cs_error_t icmap_fast_dec(const char *key_name) +{ + return (icmap_fast_dec_r(icmap_global_map, key_name)); +} + +icmap_iter_t icmap_iter_init_r(const icmap_map_t map, const char *prefix) +{ + return (qb_map_pref_iter_create(map->qb_map, prefix)); +} + +icmap_iter_t icmap_iter_init(const char *prefix) +{ + return (icmap_iter_init_r(icmap_global_map, prefix)); +} + + +const char *icmap_iter_next(icmap_iter_t iter, size_t *value_len, icmap_value_types_t *type) +{ + struct icmap_item *item; + const char *res; + + res = qb_map_iter_next(iter, (void **)&item); + if (res == NULL) { + return (res); + } + + if (value_len != NULL) { + *value_len = item->value_len; + } + + if (type != NULL) { + *type = item->type; + } + + return (res); +} + +void icmap_iter_finalize(icmap_iter_t iter) +{ + qb_map_iter_free(iter); +} + +static void icmap_notify_fn(uint32_t event, char *key, void *old_value, void *value, void *user_data) +{ + icmap_track_t icmap_track = (icmap_track_t)user_data; + struct icmap_item *new_item = (struct icmap_item *)value; + struct icmap_item *old_item = (struct icmap_item *)old_value; + struct icmap_notify_value new_val; + struct icmap_notify_value old_val; + + if (value == NULL && old_value == NULL) { + return ; + } + + if (new_item != NULL) { + new_val.type = new_item->type; + new_val.len = new_item->value_len; + new_val.data = new_item->value; + } else { + memset(&new_val, 0, sizeof(new_val)); + } + + /* + * old_item == new_item if fast functions are used -> don't fill old value + */ + if (old_item != NULL && old_item != new_item) { + old_val.type = old_item->type; + old_val.len = old_item->value_len; + old_val.data = old_item->value; + } else { + memset(&old_val, 0, sizeof(old_val)); + } + + icmap_track->notify_fn(icmap_qbtt_to_tt(event), + key, + new_val, + old_val, + icmap_track->user_data); +} + +cs_error_t icmap_track_add( + const char *key_name, + int32_t track_type, + icmap_notify_fn_t notify_fn, + void *user_data, + icmap_track_t *icmap_track) +{ + int32_t err; + + if (notify_fn == NULL || icmap_track == NULL) { + return (CS_ERR_INVALID_PARAM); + } + + if ((track_type & ~(ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX)) != 0) { + return (CS_ERR_INVALID_PARAM); + } + + *icmap_track = malloc(sizeof(**icmap_track)); + if (*icmap_track == NULL) { + return (CS_ERR_NO_MEMORY); + } + memset(*icmap_track, 0, sizeof(**icmap_track)); + + if (key_name != NULL) { + (*icmap_track)->key_name = strdup(key_name); + }; + + (*icmap_track)->track_type = track_type; + (*icmap_track)->notify_fn = notify_fn; + (*icmap_track)->user_data = user_data; + + if ((err = qb_map_notify_add(icmap_global_map->qb_map, (*icmap_track)->key_name, icmap_notify_fn, + icmap_tt_to_qbtt(track_type), *icmap_track)) != 0) { + free((*icmap_track)->key_name); + free(*icmap_track); + + return (qb_to_cs_error(err)); + } + + qb_list_init(&(*icmap_track)->list); + qb_list_add (&(*icmap_track)->list, &icmap_track_list_head); + + return (CS_OK); +} + +cs_error_t icmap_track_delete(icmap_track_t icmap_track) +{ + int32_t err; + + if ((err = qb_map_notify_del_2(icmap_global_map->qb_map, icmap_track->key_name, + icmap_notify_fn, icmap_tt_to_qbtt(icmap_track->track_type), icmap_track)) != 0) { + return (qb_to_cs_error(err)); + } + + qb_list_del(&icmap_track->list); + free(icmap_track->key_name); + free(icmap_track); + + return (CS_OK); +} + +void *icmap_track_get_user_data(icmap_track_t icmap_track) +{ + return (icmap_track->user_data); +} + +cs_error_t icmap_set_ro_access(const char *key_name, int prefix, int ro_access) +{ + struct qb_list_head *iter, *tmp_iter; + struct icmap_ro_access_item *icmap_ro_ai; + + qb_list_for_each_safe(iter, tmp_iter, &icmap_ro_access_item_list_head) { + icmap_ro_ai = qb_list_entry(iter, struct icmap_ro_access_item, list); + + if (icmap_ro_ai->prefix == prefix && strcmp(key_name, icmap_ro_ai->key_name) == 0) { + /* + * We found item + */ + if (ro_access) { + return (CS_ERR_EXIST); + } else { + qb_list_del(&icmap_ro_ai->list); + free(icmap_ro_ai->key_name); + free(icmap_ro_ai); + + return (CS_OK); + } + } + } + + if (!ro_access) { + return (CS_ERR_NOT_EXIST); + } + + icmap_ro_ai = malloc(sizeof(*icmap_ro_ai)); + if (icmap_ro_ai == NULL) { + return (CS_ERR_NO_MEMORY); + } + + memset(icmap_ro_ai, 0, sizeof(*icmap_ro_ai)); + icmap_ro_ai->key_name = strdup(key_name); + if (icmap_ro_ai->key_name == NULL) { + free(icmap_ro_ai); + return (CS_ERR_NO_MEMORY); + } + + icmap_ro_ai->prefix = prefix; + qb_list_init(&icmap_ro_ai->list); + qb_list_add (&icmap_ro_ai->list, &icmap_ro_access_item_list_head); + + return (CS_OK); +} + +int icmap_is_key_ro(const char *key_name) +{ + struct qb_list_head *iter; + struct icmap_ro_access_item *icmap_ro_ai; + + qb_list_for_each(iter, &icmap_ro_access_item_list_head) { + icmap_ro_ai = qb_list_entry(iter, struct icmap_ro_access_item, list); + + if (icmap_ro_ai->prefix) { + if (strlen(icmap_ro_ai->key_name) > strlen(key_name)) + continue; + + if (strncmp(icmap_ro_ai->key_name, key_name, strlen(icmap_ro_ai->key_name)) == 0) { + return (CS_TRUE); + } + } else { + if (strcmp(icmap_ro_ai->key_name, key_name) == 0) { + return (CS_TRUE); + } + } + } + + return (CS_FALSE); + +} + +cs_error_t icmap_copy_map(icmap_map_t dst_map, const icmap_map_t src_map) +{ + icmap_iter_t iter; + size_t value_len; + icmap_value_types_t value_type; + const char *key_name; + cs_error_t err; + void *value; + + iter = icmap_iter_init_r(src_map, NULL); + if (iter == NULL) { + return (CS_ERR_NO_MEMORY); + } + + err = CS_OK; + + while ((key_name = icmap_iter_next(iter, &value_len, &value_type)) != NULL) { + err = icmap_get_ref_r(src_map, key_name, &value, &value_len, &value_type); + if (err != CS_OK) { + goto exit_iter_finalize; + } + + err = icmap_set_r(dst_map, key_name, value, value_len, value_type); + if (err != CS_OK) { + goto exit_iter_finalize; + } + } + +exit_iter_finalize: + icmap_iter_finalize(iter); + + return (err); +} diff --git a/exec/ipc_glue.c b/exec/ipc_glue.c new file mode 100644 index 0000000..aa86e5c --- /dev/null +++ b/exec/ipc_glue.c @@ -0,0 +1,829 @@ +/* + * Copyright (c) 2010-2017 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Angus Salkeld <asalkeld@redhat.com> + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of Red Hat, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <config.h> + +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <assert.h> +#include <sys/uio.h> +#include <string.h> + +#include <qb/qbdefs.h> +#include <qb/qblist.h> +#include <qb/qbutil.h> +#include <qb/qbloop.h> +#include <qb/qbipcs.h> + +#include <corosync/swab.h> +#include <corosync/corotypes.h> +#include <corosync/corodefs.h> +#include <corosync/totem/totempg.h> +#include <corosync/logsys.h> +#include <corosync/icmap.h> + +#include "sync.h" +#include "timer.h" +#include "main.h" +#include "util.h" +#include "apidef.h" +#include "service.h" +#include "ipcs_stats.h" +#include "stats.h" + +LOGSYS_DECLARE_SUBSYS ("MAIN"); + +static struct corosync_api_v1 *api = NULL; +static int32_t ipc_not_enough_fds_left = 0; +static int32_t ipc_fc_is_quorate; /* boolean */ +static int32_t ipc_fc_totem_queue_level; /* percentage used */ +static int32_t ipc_fc_sync_in_process; /* boolean */ +static int32_t ipc_allow_connections = 0; /* boolean */ + +#define CS_IPCS_MAPPER_SERV_NAME 256 + +struct cs_ipcs_mapper { + int32_t id; + qb_ipcs_service_t *inst; + char name[CS_IPCS_MAPPER_SERV_NAME]; +}; + +struct outq_item { + void *msg; + size_t mlen; + struct qb_list_head list; +}; + +static struct cs_ipcs_mapper ipcs_mapper[SERVICES_COUNT_MAX]; + +static int32_t cs_ipcs_job_add(enum qb_loop_priority p, void *data, qb_loop_job_dispatch_fn fn); +static int32_t cs_ipcs_dispatch_add(enum qb_loop_priority p, int32_t fd, int32_t events, + void *data, qb_ipcs_dispatch_fn_t fn); +static int32_t cs_ipcs_dispatch_mod(enum qb_loop_priority p, int32_t fd, int32_t events, + void *data, qb_ipcs_dispatch_fn_t fn); +static int32_t cs_ipcs_dispatch_del(int32_t fd); +static void outq_flush (void *data); + + +static struct qb_ipcs_poll_handlers corosync_poll_funcs = { + .job_add = cs_ipcs_job_add, + .dispatch_add = cs_ipcs_dispatch_add, + .dispatch_mod = cs_ipcs_dispatch_mod, + .dispatch_del = cs_ipcs_dispatch_del, +}; + +static int32_t cs_ipcs_connection_accept (qb_ipcs_connection_t *c, uid_t euid, gid_t egid); +static void cs_ipcs_connection_created(qb_ipcs_connection_t *c); +static int32_t cs_ipcs_msg_process(qb_ipcs_connection_t *c, + void *data, size_t size); +static int32_t cs_ipcs_connection_closed (qb_ipcs_connection_t *c); +static void cs_ipcs_connection_destroyed (qb_ipcs_connection_t *c); + +static struct qb_ipcs_service_handlers corosync_service_funcs = { + .connection_accept = cs_ipcs_connection_accept, + .connection_created = cs_ipcs_connection_created, + .msg_process = cs_ipcs_msg_process, + .connection_closed = cs_ipcs_connection_closed, + .connection_destroyed = cs_ipcs_connection_destroyed, +}; + +static struct ipcs_global_stats global_stats; + +static const char* cs_ipcs_serv_short_name(int32_t service_id) +{ + const char *name; + switch (service_id) { + case CFG_SERVICE: + name = "cfg"; + break; + case CPG_SERVICE: + name = "cpg"; + break; + case QUORUM_SERVICE: + name = "quorum"; + break; + case PLOAD_SERVICE: + name = "pload"; + break; + case VOTEQUORUM_SERVICE: + name = "votequorum"; + break; + case MON_SERVICE: + name = "mon"; + break; + case WD_SERVICE: + name = "wd"; + break; + case CMAP_SERVICE: + name = "cmap"; + break; + default: + name = NULL; + break; + } + return name; +} + +void cs_ipc_allow_connections(int32_t allow) +{ + ipc_allow_connections = allow; +} + +int32_t cs_ipcs_service_destroy(int32_t service_id) +{ + if (ipcs_mapper[service_id].inst) { + qb_ipcs_destroy(ipcs_mapper[service_id].inst); + ipcs_mapper[service_id].inst = NULL; + } + return 0; +} + +static int32_t cs_ipcs_connection_accept (qb_ipcs_connection_t *c, uid_t euid, gid_t egid) +{ + int32_t service = qb_ipcs_service_id_get(c); + uint8_t u8; + char key_name[ICMAP_KEYNAME_MAXLEN]; + + if (!ipc_allow_connections) { + log_printf(LOGSYS_LEVEL_DEBUG, "Denied connection, corosync is not ready"); + return -EAGAIN; + } + + if (corosync_service[service] == NULL || + ipcs_mapper[service].inst == NULL) { + return -ENOSYS; + } + + if (ipc_not_enough_fds_left) { + return -EMFILE; + } + + if (euid == 0 || egid == 0) { + return 0; + } + + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "uidgid.uid.%u", euid); + if (icmap_get_uint8(key_name, &u8) == CS_OK && u8 == 1) + return 0; + + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "uidgid.config.uid.%u", euid); + if (icmap_get_uint8(key_name, &u8) == CS_OK && u8 == 1) + return 0; + + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "uidgid.gid.%u", egid); + if (icmap_get_uint8(key_name, &u8) == CS_OK && u8 == 1) + return 0; + + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "uidgid.config.gid.%u", egid); + if (icmap_get_uint8(key_name, &u8) == CS_OK && u8 == 1) + return 0; + + log_printf(LOGSYS_LEVEL_ERROR, "Denied connection attempt from %d:%d", euid, egid); + + return -EACCES; +} + +static char * pid_to_name (pid_t pid, char *out_name, size_t name_len) +{ + char *name; + char *rest; + FILE *fp; + char fname[32]; + char buf[256]; + + snprintf (fname, 32, "/proc/%d/stat", pid); + fp = fopen (fname, "r"); + if (!fp) { + return NULL; + } + + if (fgets (buf, sizeof (buf), fp) == NULL) { + fclose (fp); + return NULL; + } + fclose (fp); + + name = strrchr (buf, '('); + if (!name) { + return NULL; + } + + /* move past the bracket */ + name++; + + rest = strrchr (buf, ')'); + + if (rest == NULL || rest[1] != ' ') { + return NULL; + } + + *rest = '\0'; + /* move past the NULL and space */ + rest += 2; + + /* copy the name */ + strncpy (out_name, name, name_len - 1); + out_name[name_len - 1] = '\0'; + return out_name; +} + +static void cs_ipcs_connection_created(qb_ipcs_connection_t *c) +{ + int32_t service = 0; + struct cs_ipcs_conn_context *context; + struct qb_ipcs_connection_stats stats; + size_t size = sizeof(struct cs_ipcs_conn_context); + + log_printf(LOG_DEBUG, "connection created"); + + service = qb_ipcs_service_id_get(c); + + size += corosync_service[service]->private_data_size; + context = calloc(1, size); + if (context == NULL) { + qb_ipcs_disconnect(c); + return; + } + + qb_list_init(&context->outq_head); + context->queuing = QB_FALSE; + context->queued = 0; + context->sent = 0; + + qb_ipcs_context_set(c, context); + + if (corosync_service[service]->lib_init_fn(c) != 0) { + log_printf(LOG_ERR, "lib_init_fn failed, disconnecting"); + qb_ipcs_disconnect(c); + return; + } + + qb_ipcs_connection_stats_get(c, &stats, QB_FALSE); + + if (!pid_to_name (stats.client_pid, context->proc_name, sizeof(context->proc_name))) { + context->proc_name[0] = '\0'; + } + stats_ipcs_add_connection(service, stats.client_pid, c); + global_stats.active++; +} + +void cs_ipc_refcnt_inc(void *conn) +{ + qb_ipcs_connection_ref(conn); +} + +void cs_ipc_refcnt_dec(void *conn) +{ + qb_ipcs_connection_unref(conn); +} + +void *cs_ipcs_private_data_get(void *conn) +{ + struct cs_ipcs_conn_context *cnx; + cnx = qb_ipcs_context_get(conn); + return &cnx->data[0]; +} + +static void cs_ipcs_connection_destroyed (qb_ipcs_connection_t *c) +{ + struct cs_ipcs_conn_context *context; + struct qb_list_head *list, *tmp_iter; + struct outq_item *outq_item; + + log_printf(LOG_DEBUG, "%s() ", __func__); + + context = qb_ipcs_context_get(c); + if (context) { + qb_list_for_each_safe(list, tmp_iter, &(context->outq_head)) { + outq_item = qb_list_entry (list, struct outq_item, list); + + qb_list_del (list); + free (outq_item->msg); + free (outq_item); + } + free(context); + } +} + +static int32_t cs_ipcs_connection_closed (qb_ipcs_connection_t *c) +{ + int32_t res = 0; + int32_t service = qb_ipcs_service_id_get(c); + struct qb_ipcs_connection_stats stats; + + log_printf(LOG_DEBUG, "%s() ", __func__); + res = corosync_service[service]->lib_exit_fn(c); + if (res != 0) { + return res; + } + + qb_loop_job_del(cs_poll_handle_get(), QB_LOOP_HIGH, c, outq_flush); + + qb_ipcs_connection_stats_get(c, &stats, QB_FALSE); + + stats_ipcs_del_connection(service, stats.client_pid, c); + + global_stats.active--; + global_stats.closed++; + return 0; +} + +int cs_ipcs_response_iov_send (void *conn, + const struct iovec *iov, + unsigned int iov_len) +{ + int32_t rc = qb_ipcs_response_sendv(conn, iov, iov_len); + if (rc >= 0) { + return 0; + } + return rc; +} + +int cs_ipcs_response_send(void *conn, const void *msg, size_t mlen) +{ + int32_t rc = qb_ipcs_response_send(conn, msg, mlen); + if (rc >= 0) { + return 0; + } + return rc; +} + +static void outq_flush (void *data) +{ + qb_ipcs_connection_t *conn = data; + struct qb_list_head *list, *tmp_iter; + struct outq_item *outq_item; + int32_t rc; + struct cs_ipcs_conn_context *context = qb_ipcs_context_get(conn); + + qb_list_for_each_safe(list, tmp_iter, &(context->outq_head)) { + outq_item = qb_list_entry (list, struct outq_item, list); + + rc = qb_ipcs_event_send(conn, outq_item->msg, outq_item->mlen); + if (rc < 0 && rc != -EAGAIN) { + errno = -rc; + qb_perror(LOG_ERR, "qb_ipcs_event_send"); + return; + } else if (rc == -EAGAIN) { + break; + } + assert(rc == outq_item->mlen); + context->sent++; + context->queued--; + + qb_list_del (list); + free (outq_item->msg); + free (outq_item); + } + if (qb_list_empty (&context->outq_head)) { + context->queuing = QB_FALSE; + log_printf(LOGSYS_LEVEL_INFO, "Q empty, queued:%d sent:%d.", + context->queued, context->sent); + context->queued = 0; + context->sent = 0; + } else { + qb_loop_job_add(cs_poll_handle_get(), QB_LOOP_HIGH, conn, outq_flush); + } +} + +static void msg_send_or_queue(qb_ipcs_connection_t *conn, const struct iovec *iov, uint32_t iov_len) +{ + int32_t rc = 0; + int32_t i; + int32_t bytes_msg = 0; + struct outq_item *outq_item; + char *write_buf = 0; + struct cs_ipcs_conn_context *context = qb_ipcs_context_get(conn); + + for (i = 0; i < iov_len; i++) { + bytes_msg += iov[i].iov_len; + } + + if (!context->queuing) { + assert(qb_list_empty (&context->outq_head)); + rc = qb_ipcs_event_sendv(conn, iov, iov_len); + if (rc == bytes_msg) { + context->sent++; + return; + } + if (rc == -EAGAIN) { + context->queued = 0; + context->sent = 0; + context->queuing = QB_TRUE; + qb_loop_job_add(cs_poll_handle_get(), QB_LOOP_HIGH, conn, outq_flush); + } else { + log_printf(LOGSYS_LEVEL_ERROR, "event_send retuned %d, expected %d!", rc, bytes_msg); + return; + } + } + outq_item = malloc (sizeof (struct outq_item)); + if (outq_item == NULL) { + qb_ipcs_disconnect(conn); + return; + } + outq_item->msg = malloc (bytes_msg); + if (outq_item->msg == NULL) { + free (outq_item); + qb_ipcs_disconnect(conn); + return; + } + + write_buf = outq_item->msg; + for (i = 0; i < iov_len; i++) { + memcpy (write_buf, iov[i].iov_base, iov[i].iov_len); + write_buf += iov[i].iov_len; + } + outq_item->mlen = bytes_msg; + qb_list_init (&outq_item->list); + qb_list_add_tail (&outq_item->list, &context->outq_head); + context->queued++; +} + +int cs_ipcs_dispatch_send(void *conn, const void *msg, size_t mlen) +{ + struct iovec iov; + iov.iov_base = (void *)msg; + iov.iov_len = mlen; + msg_send_or_queue (conn, &iov, 1); + return 0; +} + +int cs_ipcs_dispatch_iov_send (void *conn, + const struct iovec *iov, + unsigned int iov_len) +{ + msg_send_or_queue(conn, iov, iov_len); + return 0; +} + +static int32_t cs_ipcs_msg_process(qb_ipcs_connection_t *c, + void *data, size_t size) +{ + struct qb_ipc_response_header response; + struct qb_ipc_request_header *request_pt = (struct qb_ipc_request_header *)data; + int32_t service = qb_ipcs_service_id_get(c); + int32_t send_ok = 0; + int32_t is_async_call = QB_FALSE; + ssize_t res = -1; + int sending_allowed_private_data; + struct cs_ipcs_conn_context *cnx; + + send_ok = corosync_sending_allowed (service, + request_pt->id, + request_pt, + &sending_allowed_private_data); + + is_async_call = (service == CPG_SERVICE && request_pt->id == 2); + + /* + * This happens when the message contains some kind of invalid + * parameter, such as an invalid size + */ + if (send_ok == -EINVAL) { + response.size = sizeof (response); + response.id = 0; + response.error = CS_ERR_INVALID_PARAM; + + cnx = qb_ipcs_context_get(c); + if (cnx) { + cnx->invalid_request++; + } + + if (is_async_call) { + log_printf(LOGSYS_LEVEL_INFO, "*** %s() invalid message! size:%d error:%d", + __func__, response.size, response.error); + } else { + qb_ipcs_response_send (c, + &response, + sizeof (response)); + } + res = -EINVAL; + } else if (send_ok < 0) { + cnx = qb_ipcs_context_get(c); + if (cnx) { + cnx->overload++; + } + if (!is_async_call) { + /* + * Overload, tell library to retry + */ + response.size = sizeof (response); + response.id = 0; + response.error = CS_ERR_TRY_AGAIN; + qb_ipcs_response_send (c, + &response, + sizeof (response)); + } else { + log_printf(LOGSYS_LEVEL_WARNING, + "*** %s() (%d:%d - %d) %s!", + __func__, service, request_pt->id, + is_async_call, strerror(-send_ok)); + } + res = -ENOBUFS; + } + + if (send_ok >= 0) { + corosync_service[service]->lib_engine[request_pt->id].lib_handler_fn(c, request_pt); + res = 0; + } + corosync_sending_allowed_release (&sending_allowed_private_data); + return res; +} + + +static int32_t cs_ipcs_job_add(enum qb_loop_priority p, void *data, qb_loop_job_dispatch_fn fn) +{ + return qb_loop_job_add(cs_poll_handle_get(), p, data, fn); +} + +static int32_t cs_ipcs_dispatch_add(enum qb_loop_priority p, int32_t fd, int32_t events, + void *data, qb_ipcs_dispatch_fn_t fn) +{ + return qb_loop_poll_add(cs_poll_handle_get(), p, fd, events, data, fn); +} + +static int32_t cs_ipcs_dispatch_mod(enum qb_loop_priority p, int32_t fd, int32_t events, + void *data, qb_ipcs_dispatch_fn_t fn) +{ + return qb_loop_poll_mod(cs_poll_handle_get(), p, fd, events, data, fn); +} + +static int32_t cs_ipcs_dispatch_del(int32_t fd) +{ + return qb_loop_poll_del(cs_poll_handle_get(), fd); +} + +static void cs_ipcs_low_fds_event(int32_t not_enough, int32_t fds_available) +{ + ipc_not_enough_fds_left = not_enough; + if (not_enough) { + log_printf(LOGSYS_LEVEL_WARNING, "refusing new connections (fds_available:%d)", + fds_available); + } else { + log_printf(LOGSYS_LEVEL_NOTICE, "allowing new connections (fds_available:%d)", + fds_available); + + } +} + +int32_t cs_ipcs_q_level_get(void) +{ + return ipc_fc_totem_queue_level; +} + +static qb_loop_timer_handle ipcs_check_for_flow_control_timer; +static void cs_ipcs_check_for_flow_control(void) +{ + int32_t i; + int32_t fc_enabled; + + for (i = 0; i < SERVICES_COUNT_MAX; i++) { + if (corosync_service[i] == NULL || ipcs_mapper[i].inst == NULL) { + continue; + } + fc_enabled = QB_IPCS_RATE_OFF; + if (ipc_fc_is_quorate == 1 || + corosync_service[i]->allow_inquorate == CS_LIB_ALLOW_INQUORATE) { + /* + * we are quorate + * now check flow control + */ + if (ipc_fc_totem_queue_level != TOTEM_Q_LEVEL_CRITICAL && + ipc_fc_sync_in_process == 0) { + fc_enabled = QB_FALSE; + } else if (ipc_fc_totem_queue_level != TOTEM_Q_LEVEL_CRITICAL && + i == VOTEQUORUM_SERVICE) { + /* + * Allow message processing for votequorum service even + * in sync phase + */ + fc_enabled = QB_FALSE; + } else { + fc_enabled = QB_IPCS_RATE_OFF_2; + } + } + if (fc_enabled) { + qb_ipcs_request_rate_limit(ipcs_mapper[i].inst, fc_enabled); + + qb_loop_timer_add(cs_poll_handle_get(), QB_LOOP_MED, 1*QB_TIME_NS_IN_MSEC, + NULL, corosync_recheck_the_q_level, &ipcs_check_for_flow_control_timer); + } else if (ipc_fc_totem_queue_level == TOTEM_Q_LEVEL_LOW) { + qb_ipcs_request_rate_limit(ipcs_mapper[i].inst, QB_IPCS_RATE_FAST); + } else if (ipc_fc_totem_queue_level == TOTEM_Q_LEVEL_GOOD) { + qb_ipcs_request_rate_limit(ipcs_mapper[i].inst, QB_IPCS_RATE_NORMAL); + } else if (ipc_fc_totem_queue_level == TOTEM_Q_LEVEL_HIGH) { + qb_ipcs_request_rate_limit(ipcs_mapper[i].inst, QB_IPCS_RATE_SLOW); + } + } +} + +static void cs_ipcs_fc_quorum_changed(int quorate, void *context) +{ + ipc_fc_is_quorate = quorate; + cs_ipcs_check_for_flow_control(); +} + +static void cs_ipcs_totem_queue_level_changed(enum totem_q_level level) +{ + ipc_fc_totem_queue_level = level; + cs_ipcs_check_for_flow_control(); +} + +void cs_ipcs_sync_state_changed(int32_t sync_in_process) +{ + ipc_fc_sync_in_process = sync_in_process; + cs_ipcs_check_for_flow_control(); +} + +void cs_ipcs_get_global_stats(struct ipcs_global_stats *ipcs_stats) +{ + memcpy(ipcs_stats, &global_stats, sizeof(global_stats)); +} + +cs_error_t cs_ipcs_get_conn_stats(int service_id, uint32_t pid, void *conn_ptr, struct ipcs_conn_stats *ipcs_stats) +{ + struct cs_ipcs_conn_context *cnx; + qb_ipcs_connection_t *c, *prev; + int found = 0; + + if (corosync_service[service_id] == NULL || ipcs_mapper[service_id].inst == NULL) { + return CS_ERR_NOT_EXIST; + } + + qb_ipcs_stats_get(ipcs_mapper[service_id].inst, &ipcs_stats->srv, QB_FALSE); + + for (c = qb_ipcs_connection_first_get(ipcs_mapper[service_id].inst); + c; + prev = c, c = qb_ipcs_connection_next_get(ipcs_mapper[service_id].inst, prev), qb_ipcs_connection_unref(prev)) { + + cnx = qb_ipcs_context_get(c); + if (cnx == NULL) continue; + if (c != conn_ptr) continue; + + qb_ipcs_connection_stats_get(c, &ipcs_stats->conn, QB_FALSE); + if (ipcs_stats->conn.client_pid != pid) { + continue; + } + found = 1; + memcpy(&ipcs_stats->cnx, cnx, sizeof(struct cs_ipcs_conn_context)); + } + if (!found) { + return CS_ERR_NOT_EXIST; + } + + return CS_OK; +} + +void cs_ipcs_clear_stats() +{ + struct cs_ipcs_conn_context *cnx; + struct ipcs_conn_stats ipcs_stats; + qb_ipcs_connection_t *c, *prev; + int service_id; + + /* Global stats are easy */ + memset(&global_stats, 0, sizeof(global_stats)); + + for (service_id = 0; service_id < SERVICES_COUNT_MAX; service_id++) { + if (!ipcs_mapper[service_id].inst) { + continue; + } + + for (c = qb_ipcs_connection_first_get(ipcs_mapper[service_id].inst); + c; + prev = c, c = qb_ipcs_connection_next_get(ipcs_mapper[service_id].inst, prev), qb_ipcs_connection_unref(prev)) { + /* Get stats with 'clear_after_read' set */ + qb_ipcs_connection_stats_get(c, &ipcs_stats.conn, QB_TRUE); + + /* Our own stats */ + cnx = qb_ipcs_context_get(c); + if (cnx == NULL) continue; + cnx->invalid_request = 0; + cnx->overload = 0; + cnx->sent = 0; + + } + } +} + +static enum qb_ipc_type cs_get_ipc_type (void) +{ + char *str; + int found = 0; + enum qb_ipc_type ret = QB_IPC_NATIVE; + + if (icmap_get_string("system.qb_ipc_type", &str) != CS_OK) { + log_printf(LOGSYS_LEVEL_DEBUG, "No configured system.qb_ipc_type. Using native ipc"); + return QB_IPC_NATIVE; + } + + if (strcmp(str, "native") == 0) { + ret = QB_IPC_NATIVE; + found = 1; + } + + if (strcmp(str, "shm") == 0) { + ret = QB_IPC_SHM; + found = 1; + } + + if (strcmp(str, "socket") == 0) { + ret = QB_IPC_SOCKET; + found = 1; + } + + if (found) { + log_printf(LOGSYS_LEVEL_DEBUG, "Using %s ipc", str); + } else { + log_printf(LOGSYS_LEVEL_DEBUG, "Unknown ipc type %s", str); + } + + free(str); + + return ret; +} + +const char *cs_ipcs_service_init(struct corosync_service_engine *service) +{ + const char *serv_short_name; + + serv_short_name = cs_ipcs_serv_short_name(service->id); + + if (service->lib_engine_count == 0) { + log_printf (LOGSYS_LEVEL_DEBUG, + "NOT Initializing IPC on %s [%d]", + serv_short_name, + service->id); + return NULL; + } + + if (strlen(serv_short_name) >= CS_IPCS_MAPPER_SERV_NAME) { + log_printf (LOGSYS_LEVEL_ERROR, "service name %s is too long", serv_short_name); + return "qb_ipcs_run error"; + } + + ipcs_mapper[service->id].id = service->id; + strcpy(ipcs_mapper[service->id].name, serv_short_name); + log_printf (LOGSYS_LEVEL_DEBUG, + "Initializing IPC on %s [%d]", + ipcs_mapper[service->id].name, + ipcs_mapper[service->id].id); + ipcs_mapper[service->id].inst = qb_ipcs_create(ipcs_mapper[service->id].name, + ipcs_mapper[service->id].id, + cs_get_ipc_type(), + &corosync_service_funcs); + assert(ipcs_mapper[service->id].inst); + qb_ipcs_poll_handlers_set(ipcs_mapper[service->id].inst, + &corosync_poll_funcs); + if (qb_ipcs_run(ipcs_mapper[service->id].inst) != 0) { + log_printf (LOGSYS_LEVEL_ERROR, "Can't initialize IPC"); + return "qb_ipcs_run error"; + } + + return NULL; +} + +void cs_ipcs_init(void) +{ + api = apidef_get (); + + qb_loop_poll_low_fds_event_set(cs_poll_handle_get(), cs_ipcs_low_fds_event); + + api->quorum_register_callback (cs_ipcs_fc_quorum_changed, NULL); + totempg_queue_level_register_callback (cs_ipcs_totem_queue_level_changed); + + global_stats.active = 0; + global_stats.closed = 0; +} diff --git a/exec/ipcs_stats.h b/exec/ipcs_stats.h new file mode 100644 index 0000000..a8047b3 --- /dev/null +++ b/exec/ipcs_stats.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2017 Red Hat, Inc. + * + * All rights reserved. + * + * Authors: Christine Caulfield (ccaulfie@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +struct cs_ipcs_conn_context { + struct qb_list_head outq_head; + int32_t queuing; + uint32_t queued; + uint64_t invalid_request; + uint64_t overload; + uint32_t sent; + char proc_name[32]; + char data[1]; +}; + +struct ipcs_global_stats +{ + uint64_t active; + uint64_t closed; +}; + +struct ipcs_conn_stats +{ + struct qb_ipcs_stats srv; + struct qb_ipcs_connection_stats conn; + struct cs_ipcs_conn_context cnx; +}; + +cs_error_t cs_ipcs_get_conn_stats(int service_id, uint32_t pid, void *conn_ptr, struct ipcs_conn_stats *ipcs_stats); +void cs_ipcs_get_global_stats(struct ipcs_global_stats *ipcs_stats); +void cs_ipcs_clear_stats(void); diff --git a/exec/logconfig.c b/exec/logconfig.c new file mode 100644 index 0000000..350d8a9 --- /dev/null +++ b/exec/logconfig.c @@ -0,0 +1,757 @@ +/* + * Copyright (c) 2002-2005 MontaVista Software, Inc. + * Copyright (c) 2006-2011 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + * Jan Friesse (jfriesse@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include <corosync/totem/totem.h> +#include <corosync/logsys.h> +#ifdef LOGCONFIG_USE_ICMAP +#include <corosync/icmap.h> +#define MAP_KEYNAME_MAXLEN ICMAP_KEYNAME_MAXLEN +#define map_get_string(key_name, value) icmap_get_string(key_name, value) +#else +#include <corosync/cmap.h> +static cmap_handle_t cmap_handle; +static const char *main_logfile; +#define MAP_KEYNAME_MAXLEN CMAP_KEYNAME_MAXLEN +#define map_get_string(key_name, value) cmap_get_string(cmap_handle, key_name, value) +#endif + +#include "util.h" +#include "logconfig.h" +#include "totemknet.h" + +static char error_string_response[512]; + +/** + * insert_into_buffer + * @target_buffer: a buffer where to write results + * @bufferlen: tell us the size of the buffer to avoid overflows + * @entry: entry that needs to be added to the buffer + * @after: can either be NULL or set to a string. + * if NULL, @entry is prependend to logsys_format_get buffer. + * if set, @entry is added immediately after @after. + * + * Since the function is specific to logsys_format_get handling, it is implicit + * that source is logsys_format_get(); + * + * In case of failure, target_buffer could be left dirty. So don't trust + * any data leftover in it. + * + * Searching for "after" assumes that there is only entry of "after" + * in the source. Afterall we control the string here and for logging format + * it makes little to no sense to have duplicate format entries. + * + * Returns: 0 on success, -1 on failure + **/ +static int insert_into_buffer( + char *target_buffer, + size_t bufferlen, + const char *entry, + const char *after) +{ + const char *current_format = NULL; + + current_format = logsys_format_get(); + + /* if the entry is already in the format we don't add it again */ + if (strstr(current_format, entry) != NULL) { + return -1; + } + + /* if there is no "after", simply prepend the requested entry + * otherwise go for beautiful string manipulation.... </sarcasm> */ + if (!after) { + if (snprintf(target_buffer, bufferlen - 1, "%s%s", + entry, + current_format) >= bufferlen - 1) { + return -1; + } + } else { + const char *afterpos; + size_t afterlen; + size_t templen; + + /* check if after is contained in the format + * and afterlen has a meaning or return an error */ + afterpos = strstr(current_format, after); + afterlen = strlen(after); + if ((!afterpos) || (!afterlen)) { + return -1; + } + + templen = afterpos - current_format + afterlen; + if (snprintf(target_buffer, templen + 1, "%s", current_format) + >= bufferlen - 1) { + return -1; + } + if (snprintf(target_buffer + templen, bufferlen - ( templen + 1 ), + "%s%s", entry, current_format + templen) + >= bufferlen - ( templen + 1 )) { + return -1; + } + } + return 0; +} + +/* + * format set is global specific option that + * doesn't apply at system/subsystem level. + */ +static int corosync_main_config_format_set ( + const char **error_string) +{ + const char *error_reason; + char new_format_buffer[PATH_MAX]; + char *value = NULL; + int err = 0; + char timestamp_str_to_add[8]; + + if (map_get_string("logging.fileline", &value) == CS_OK) { + if (strcmp (value, "on") == 0) { + if (!insert_into_buffer(new_format_buffer, + sizeof(new_format_buffer), + " %f:%l", "g]")) { + err = logsys_format_set(new_format_buffer); + } else + if (!insert_into_buffer(new_format_buffer, + sizeof(new_format_buffer), + "%f:%l", NULL)) { + err = logsys_format_set(new_format_buffer); + } + } else + if (strcmp (value, "off") == 0) { + /* nothing to do here */ + } else { + error_reason = "unknown value for fileline"; + free(value); + goto parse_error; + } + + free(value); + } + + if (err) { + error_reason = "not enough memory to set logging format buffer"; + goto parse_error; + } + + if (map_get_string("logging.function_name", &value) == CS_OK) { + if (strcmp (value, "on") == 0) { + if (!insert_into_buffer(new_format_buffer, + sizeof(new_format_buffer), + "%n:", "f:")) { + err = logsys_format_set(new_format_buffer); + } else + if (!insert_into_buffer(new_format_buffer, + sizeof(new_format_buffer), + " %n", "g]")) { + err = logsys_format_set(new_format_buffer); + } + } else + if (strcmp (value, "off") == 0) { + /* nothing to do here */ + } else { + error_reason = "unknown value for function_name"; + free(value); + goto parse_error; + } + + free(value); + } + + if (err) { + error_reason = "not enough memory to set logging format buffer"; + goto parse_error; + } + + memset(timestamp_str_to_add, 0, sizeof(timestamp_str_to_add)); + + if (map_get_string("logging.timestamp", &value) == CS_OK) { + if (strcmp (value, "on") == 0) { + strcpy(timestamp_str_to_add, "%t"); +#ifdef QB_FEATURE_LOG_HIRES_TIMESTAMPS + } else if (strcmp (value, "hires") == 0) { + strcpy(timestamp_str_to_add, "%T"); +#endif + } else if (strcmp (value, "off") == 0) { + /* nothing to do here */ + } else { + error_reason = "unknown value for timestamp"; + free(value); + goto parse_error; + } + + free(value); + } else { + /* + * Display hires timestamp by default, otherwise standard timestamp + */ +#ifdef QB_FEATURE_LOG_HIRES_TIMESTAMPS + strcpy(timestamp_str_to_add, "%T"); +#else + strcpy(timestamp_str_to_add, "%t"); +#endif + } + + if(strcmp(timestamp_str_to_add, "") != 0) { + strcat(timestamp_str_to_add, " "); + if (insert_into_buffer(new_format_buffer, sizeof(new_format_buffer), + timestamp_str_to_add, NULL) == 0) { + err = logsys_format_set(new_format_buffer); + } + } + + if (err) { + error_reason = "not enough memory to set logging format buffer"; + goto parse_error; + } + + return (0); + +parse_error: + *error_string = error_reason; + + return (-1); +} + +/* + * blackbox is another global specific option that + * doesn't apply at system/subsystem level. + */ +static int corosync_main_config_blackbox_set ( + const char **error_string) +{ + const char *error_reason; + char *value = NULL; + + if (map_get_string("logging.blackbox", &value) == CS_OK) { + if (strcmp (value, "on") == 0) { + (void)logsys_blackbox_set(QB_TRUE); + } else if (strcmp (value, "off") == 0) { + (void)logsys_blackbox_set(QB_FALSE); + } else { + error_reason = "unknown value for blackbox"; + free(value); + goto parse_error; + } + + free(value); + } else { + (void)logsys_blackbox_set(QB_TRUE); + } + + return (0); + +parse_error: + *error_string = error_reason; + + return (-1); +} + +static int corosync_main_config_log_destination_set ( + const char *path, + const char *key, + const char *subsys, + const char **error_string, + unsigned int mode_mask, + char deprecated, + char default_value, + const char *replacement) +{ + static char formatted_error_reason[128]; + char *value = NULL; + unsigned int mode; + char key_name[MAP_KEYNAME_MAXLEN]; + + snprintf(key_name, MAP_KEYNAME_MAXLEN, "%s.%s", path, key); + if (map_get_string(key_name, &value) == CS_OK) { + if (deprecated) { + log_printf(LOGSYS_LEVEL_WARNING, + "Warning: the %s config parameter has been obsoleted." + " See corosync.conf man page %s directive.", + key, replacement); + } + + mode = logsys_config_mode_get (subsys); + + if (strcmp (value, "yes") == 0 || strcmp (value, "on") == 0) { + mode |= mode_mask; + if (logsys_config_mode_set(subsys, mode) < 0) { + sprintf (formatted_error_reason, "unable to set mode %s", key); + goto parse_error; + } + } else + if (strcmp (value, "no") == 0 || strcmp (value, "off") == 0) { + mode &= ~mode_mask; + if (logsys_config_mode_set(subsys, mode) < 0) { + sprintf (formatted_error_reason, "unable to unset mode %s", key); + goto parse_error; + } + } else { + sprintf (formatted_error_reason, "unknown value for %s", key); + goto parse_error; + } + } + /* Set to default if we are the top-level logger */ + else if (!subsys && !deprecated) { + + mode = logsys_config_mode_get (subsys); + if (default_value) { + mode |= mode_mask; + } + else { + mode &= ~mode_mask; + } + if (logsys_config_mode_set(subsys, mode) < 0) { + sprintf (formatted_error_reason, "unable to change mode %s", key); + goto parse_error; + } + } + + free(value); + return (0); + +parse_error: + *error_string = formatted_error_reason; + free(value); + return (-1); +} + +static int corosync_main_config_set ( + const char *path, + const char *subsys, + const char **error_string) +{ + const char *error_reason = error_string_response; + char *value = NULL; + int mode; + char key_name[MAP_KEYNAME_MAXLEN]; + + /* + * this bit abuses the internal logsys exported API + * to guarantee that all configured subsystems are + * initialized too. + * + * using this approach avoids some headaches caused + * by IPC and TOTEM that have a special logging + * handling requirements + */ + if (subsys != NULL) { + if (_logsys_subsys_create(subsys, NULL) < 0) { + error_reason = "unable to create new logging subsystem"; + goto parse_error; + } + } + + mode = logsys_config_mode_get(subsys); + if (mode < 0) { + error_reason = "unable to get mode"; + goto parse_error; + } + + if (corosync_main_config_log_destination_set (path, "to_stderr", subsys, &error_reason, + LOGSYS_MODE_OUTPUT_STDERR, 0, 1, NULL) != 0) + goto parse_error; + + if (corosync_main_config_log_destination_set (path, "to_syslog", subsys, &error_reason, + LOGSYS_MODE_OUTPUT_SYSLOG, 0, 1, NULL) != 0) + goto parse_error; + + snprintf(key_name, MAP_KEYNAME_MAXLEN, "%s.%s", path, "syslog_facility"); + if (map_get_string(key_name, &value) == CS_OK) { + int syslog_facility; + + syslog_facility = qb_log_facility2int(value); + if (syslog_facility < 0) { + error_reason = "unknown syslog facility specified"; + goto parse_error; + } + if (logsys_config_syslog_facility_set(subsys, + syslog_facility) < 0) { + error_reason = "unable to set syslog facility"; + goto parse_error; + } + + free(value); + } + else { + /* Set default here in case of a reload */ + if (logsys_config_syslog_facility_set(subsys, + qb_log_facility2int("daemon")) < 0) { + error_reason = "unable to set syslog facility"; + goto parse_error; + } + } + + snprintf(key_name, MAP_KEYNAME_MAXLEN, "%s.%s", path, "syslog_level"); + if (map_get_string(key_name, &value) == CS_OK) { + int syslog_priority; + + log_printf(LOGSYS_LEVEL_WARNING, + "Warning: the syslog_level config parameter has been obsoleted." + " See corosync.conf man page syslog_priority directive."); + + syslog_priority = logsys_priority_id_get(value); + free(value); + + if (syslog_priority < 0) { + error_reason = "unknown syslog level specified"; + goto parse_error; + } + if (logsys_config_syslog_priority_set(subsys, + syslog_priority) < 0) { + error_reason = "unable to set syslog level"; + goto parse_error; + } + } + + snprintf(key_name, MAP_KEYNAME_MAXLEN, "%s.%s", path, "syslog_priority"); + if (map_get_string(key_name, &value) == CS_OK) { + int syslog_priority; + + syslog_priority = logsys_priority_id_get(value); + free(value); + if (syslog_priority < 0) { + error_reason = "unknown syslog priority specified"; + goto parse_error; + } + if (logsys_config_syslog_priority_set(subsys, + syslog_priority) < 0) { + error_reason = "unable to set syslog priority"; + goto parse_error; + } + } + else if(strcmp(key_name, "logging.syslog_priority") == 0){ + if (logsys_config_syslog_priority_set(subsys, + logsys_priority_id_get("info")) < 0) { + error_reason = "unable to set syslog level"; + goto parse_error; + } + } + +#ifdef LOGCONFIG_USE_ICMAP + snprintf(key_name, MAP_KEYNAME_MAXLEN, "%s.%s", path, "logfile"); + if (map_get_string(key_name, &value) == CS_OK) { + if (logsys_config_file_set (subsys, &error_reason, value) < 0) { + goto parse_error; + } + free(value); + } +#else + if (!subsys) { + if (logsys_config_file_set (subsys, &error_reason, main_logfile) < 0) { + goto parse_error; + } + } +#endif + + if (corosync_main_config_log_destination_set (path, "to_file", subsys, &error_reason, + LOGSYS_MODE_OUTPUT_FILE, 1, 0, "to_logfile") != 0) + goto parse_error; + + if (corosync_main_config_log_destination_set (path, "to_logfile", subsys, &error_reason, + LOGSYS_MODE_OUTPUT_FILE, 0, 0, NULL) != 0) + goto parse_error; + + snprintf(key_name, MAP_KEYNAME_MAXLEN, "%s.%s", path, "logfile_priority"); + if (map_get_string(key_name, &value) == CS_OK) { + int logfile_priority; + + logfile_priority = logsys_priority_id_get(value); + free(value); + if (logfile_priority < 0) { + error_reason = "unknown logfile priority specified"; + goto parse_error; + } + if (logsys_config_logfile_priority_set(subsys, + logfile_priority) < 0) { + error_reason = "unable to set logfile priority"; + goto parse_error; + } + } + else if(strcmp(key_name,"logging.logfile_priority") == 0){ + if (logsys_config_logfile_priority_set(subsys, + logsys_priority_id_get("info")) < 0) { + error_reason = "unable to set syslog level"; + goto parse_error; + } + } + + snprintf(key_name, MAP_KEYNAME_MAXLEN, "%s.%s", path, "debug"); + if (map_get_string(key_name, &value) == CS_OK) { + if (strcmp (value, "trace") == 0) { + if (logsys_config_debug_set (subsys, LOGSYS_DEBUG_TRACE) < 0) { + error_reason = "unable to set debug trace"; + free(value); + goto parse_error; + } + } else + if (strcmp (value, "on") == 0) { + if (logsys_config_debug_set (subsys, LOGSYS_DEBUG_ON) < 0) { + error_reason = "unable to set debug on"; + free(value); + goto parse_error; + } + } else + if (strcmp (value, "off") == 0) { + if (logsys_config_debug_set (subsys, LOGSYS_DEBUG_OFF) < 0) { + error_reason = "unable to set debug off"; + free(value); + goto parse_error; + } + } else { + error_reason = "unknown value for debug"; + free(value); + goto parse_error; + } + free(value); + } + else { + if (logsys_config_debug_set (subsys, LOGSYS_DEBUG_OFF) < 0) { + error_reason = "unable to set debug off"; + goto parse_error; + } + } + + return (0); + +parse_error: + *error_string = error_reason; + + return (-1); +} + +static int corosync_main_config_read_logging ( + const char **error_string) +{ + const char *error_reason; +#ifdef LOGCONFIG_USE_ICMAP + icmap_iter_t iter; + const char *key_name; +#else + cmap_iter_handle_t iter; + char key_name[CMAP_KEYNAME_MAXLEN]; +#endif + char key_subsys[MAP_KEYNAME_MAXLEN]; + char key_item[MAP_KEYNAME_MAXLEN]; + int res; + + /* format set is supported only for toplevel */ + if (corosync_main_config_format_set(&error_reason) < 0) { + goto parse_error; + } + + if (corosync_main_config_blackbox_set(&error_reason) < 0) { + goto parse_error; + } + + if (corosync_main_config_set ("logging", NULL, &error_reason) < 0) { + goto parse_error; + } + + /* + * we will need 2 of these to compensate for new logging + * config format + */ +#ifdef LOGCONFIG_USE_ICMAP + iter = icmap_iter_init("logging.logger_subsys."); + while ((key_name = icmap_iter_next(iter, NULL, NULL)) != NULL) { +#else + cmap_iter_init(cmap_handle, "logging.logger_subsys.", &iter); + while ((cmap_iter_next(cmap_handle, iter, key_name, NULL, NULL)) == CS_OK) { +#endif + res = sscanf(key_name, "logging.logger_subsys.%[^.].%s", key_subsys, key_item); + + if (res != 2) { + continue ; + } + + if (strcmp(key_item, "subsys") != 0) { + continue ; + } + + if (snprintf(key_item, MAP_KEYNAME_MAXLEN, "logging.logger_subsys.%s", + key_subsys) >= MAP_KEYNAME_MAXLEN) { + /* + * This should never happen + */ + error_reason = "Can't snprintf logger_subsys key_item"; + goto parse_error; + } + + if (corosync_main_config_set(key_item, key_subsys, &error_reason) < 0) { + goto parse_error; + } + } +#ifdef LOGCONFIG_USE_ICMAP + icmap_iter_finalize(iter); +#else + cmap_iter_finalize(cmap_handle, iter); +#endif + + logsys_config_apply(); + + /* Reconfigure knet logging */ + totemknet_configure_log_level(); + return 0; + +parse_error: + *error_string = error_reason; + + return (-1); +} + +#ifdef LOGCONFIG_USE_ICMAP +static void main_logging_notify( + int32_t event, + const char *key_name, + struct icmap_notify_value new_val, + struct icmap_notify_value old_val, + void *user_data) +#else +static void main_logging_notify( + cmap_handle_t cmap_handle_unused, + cmap_handle_t cmap_track_handle_unused, + int32_t event, + const char *key_name, + struct cmap_notify_value new_val, + struct cmap_notify_value old_val, + void *user_data) +#endif +{ + const char *error_string; + static int reload_in_progress = 0; + + /* If a full reload happens then suspend updates for individual keys until + * it's all completed + */ + if (strcmp(key_name, "config.reload_in_progress") == 0) { + if (*(uint8_t *)new_val.data == 1) { + reload_in_progress = 1; + } else { + reload_in_progress = 0; + } + } + if (reload_in_progress) { + log_printf(LOGSYS_LEVEL_DEBUG, "Ignoring key change, reload in progress. %s\n", key_name); + return; + } + + /* + * Reload the logsys configuration + */ + if (logsys_format_set(NULL) == -1) { + fprintf (stderr, "Unable to setup logging format.\n"); + } + corosync_main_config_read_logging(&error_string); +} + +#ifdef LOGCONFIG_USE_ICMAP +static void add_logsys_config_notification(void) +{ + icmap_track_t icmap_track = NULL; + + icmap_track_add("logging.", + ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX, + main_logging_notify, + NULL, + &icmap_track); + + icmap_track_add("config.reload_in_progress", + ICMAP_TRACK_ADD | ICMAP_TRACK_MODIFY, + main_logging_notify, + NULL, + &icmap_track); +} +#else +static void add_logsys_config_notification(void) +{ + cmap_track_handle_t cmap_track; + + cmap_track_add(cmap_handle, "logging.", + CMAP_TRACK_ADD | CMAP_TRACK_DELETE | CMAP_TRACK_MODIFY | CMAP_TRACK_PREFIX, + main_logging_notify, + NULL, + &cmap_track); + + cmap_track_add(cmap_handle, "config.reload_in_progress", + CMAP_TRACK_ADD | CMAP_TRACK_MODIFY, + main_logging_notify, + NULL, + &cmap_track); +} +#endif + +int corosync_log_config_read ( +#ifndef LOGCONFIG_USE_ICMAP + cmap_handle_t cmap_h, + const char *default_logfile, +#endif + const char **error_string) +{ + const char *error_reason = error_string_response; + +#ifndef LOGCONFIG_USE_ICMAP + if (!cmap_h) { + error_reason = "No cmap handle"; + return (-1); + } + if (!default_logfile) { + error_reason = "No default logfile"; + return (-1); + } + cmap_handle = cmap_h; + main_logfile = default_logfile; +#endif + + if (corosync_main_config_read_logging(error_string) < 0) { + error_reason = *error_string; + goto parse_error; + } + + add_logsys_config_notification(); + + return 0; + +parse_error: + snprintf (error_string_response, sizeof(error_string_response), + "parse error in config: %s.\n", + error_reason); + + *error_string = error_string_response; + return (-1); +} diff --git a/exec/logconfig.h b/exec/logconfig.h new file mode 100644 index 0000000..14305ad --- /dev/null +++ b/exec/logconfig.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2002-2005 MontaVista Software, Inc. + * Copyright (c) 2006-2012 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef LOGCONFIG_H_DEFINED +#define LOGCONFIG_H_DEFINED + +#include <corosync/logsys.h> +#include <corosync/coroapi.h> +#include <corosync/cmap.h> + +/** + * All service handlers + */ +struct dynamic_service { + char *name; + unsigned int ver; + unsigned int handle; +}; +#define MAX_DYNAMIC_SERVICES 128 + +#ifdef LOGCONFIG_USE_ICMAP +extern int corosync_log_config_read ( + const char **error_string); +#else +extern int corosync_log_config_read ( + cmap_handle_t cmap_h, + const char *default_logfile, + const char **error_string); +#endif + +#endif /* LOGCONFIG_H_DEFINED */ diff --git a/exec/logsys.c b/exec/logsys.c new file mode 100644 index 0000000..30a4ee6 --- /dev/null +++ b/exec/logsys.c @@ -0,0 +1,952 @@ +/* + * Copyright (c) 2002-2004 MontaVista Software, Inc. + * Copyright (c) 2006-2012 Red Hat, Inc. + * + * Author: Steven Dake (sdake@redhat.com) + * Author: Lon Hohberger (lhh@redhat.com) + * Author: Fabio M. Di Nitto (fdinitto@redhat.com) + * + * All rights reserved. + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <config.h> + +#include <stdint.h> +#include <ctype.h> +#include <assert.h> +#include <stdio.h> +#include <string.h> + +#include <qb/qbdefs.h> +#include <qb/qbutil.h> +#include <qb/qblog.h> + +#include <corosync/logsys.h> + +/* + * syslog prioritynames, facility names to value mapping + * Some C libraries build this in to their headers, but it is non-portable + * so logsys supplies its own version. + */ +struct syslog_names { + const char *c_name; + int c_val; +}; + +static struct syslog_names prioritynames[] = +{ + { "alert", LOG_ALERT }, + { "crit", LOG_CRIT }, + { "debug", LOG_DEBUG }, + { "emerg", LOG_EMERG }, + { "err", LOG_ERR }, + { "error", LOG_ERR }, + { "info", LOG_INFO }, + { "notice", LOG_NOTICE }, + { "warning", LOG_WARNING }, + { NULL, -1 } +}; + +#define MAX_FILES_PER_SUBSYS 32 +#ifdef HAVE_SMALL_MEMORY_FOOTPRINT +#define IPC_LOGSYS_SIZE 8192*64 +#else +#define IPC_LOGSYS_SIZE 8192*1024 +#endif + +/* + * need unlogical order to preserve 64bit alignment + */ +struct logsys_logger { + char subsys[LOGSYS_MAX_SUBSYS_NAMELEN]; /* subsystem name */ + char *logfile; /* log to file */ + unsigned int mode; /* subsystem mode */ + unsigned int debug; /* debug on|off|trace */ + int syslog_priority; /* priority */ + int logfile_priority; /* priority to file */ + int init_status; /* internal field to handle init queues + for subsystems */ + int32_t target_id; + char *files[MAX_FILES_PER_SUBSYS]; + int32_t file_idx; + int32_t dirty; +}; + +/* values for logsys_logger init_status */ +#define LOGSYS_LOGGER_INIT_DONE 0 +#define LOGSYS_LOGGER_NEEDS_INIT 1 + +static int logsys_system_needs_init = LOGSYS_LOGGER_NEEDS_INIT; + +static struct logsys_logger logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT + 1]; + +static pthread_mutex_t logsys_config_mutex = PTHREAD_MUTEX_INITIALIZER; + +static int32_t _logsys_config_mode_set_unlocked(int32_t subsysid, uint32_t new_mode); +static void _logsys_config_apply_per_file(int32_t s, const char *filename); +static void _logsys_config_apply_per_subsys(int32_t s); +static void _logsys_subsys_filename_add (int32_t s, const char *filename); +static void logsys_file_format_get(char* file_format, int buf_len); + +static char *format_buffer=NULL; + +static int logsys_thread_started = 0; + +static int logsys_blackbox_enabled = 1; + +static int _logsys_config_subsys_get_unlocked (const char *subsys) +{ + unsigned int i; + + if (!subsys) { + return LOGSYS_MAX_SUBSYS_COUNT; + } + + for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { + if (strcmp (logsys_loggers[i].subsys, subsys) == 0) { + return i; + } + } + + return (-1); +} + + +/* + * we need a version that can work when somebody else is already + * holding a config mutex lock or we will never get out of here + */ +static int logsys_config_file_set_unlocked ( + int subsysid, + const char **error_string, + const char *file) +{ + static char error_string_response[512]; + int i; + char file_format[128]; + + if (logsys_loggers[subsysid].target_id > 0) { + int32_t f; + for (f = 0; f < logsys_loggers[subsysid].file_idx; f++) { + qb_log_filter_ctl(logsys_loggers[subsysid].target_id, + QB_LOG_FILTER_REMOVE, + QB_LOG_FILTER_FILE, + logsys_loggers[subsysid].files[f], + LOG_TRACE); + } + } + + logsys_loggers[subsysid].dirty = QB_TRUE; + if (file == NULL) { + return (0); + } + + if (logsys_loggers[subsysid].target_id > 0 && + logsys_loggers[subsysid].logfile != NULL && + strcmp(file, logsys_loggers[subsysid].logfile) == 0) { + return (0); + } + + if (strlen(file) >= PATH_MAX) { + snprintf (error_string_response, + sizeof(error_string_response), + "%s: logfile name exceed maximum system filename length", + logsys_loggers[subsysid].subsys); + *error_string = error_string_response; + return (-1); + } + + if (logsys_loggers[subsysid].logfile != NULL) { + free(logsys_loggers[subsysid].logfile); + logsys_loggers[subsysid].logfile = NULL; + } + + logsys_loggers[subsysid].logfile = strdup(file); + + if (logsys_loggers[subsysid].logfile == NULL) { + snprintf (error_string_response, + sizeof(error_string_response), + "Unable to allocate memory for logfile '%s'", + file); + *error_string = error_string_response; + return (-1); + } + + for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { + if ((logsys_loggers[i].logfile != NULL) && + (strcmp (logsys_loggers[i].logfile, file) == 0) && + (i != subsysid)) { + /* we have found another subsys with this config file + * so add a filter + */ + logsys_loggers[subsysid].target_id = logsys_loggers[i].target_id; + return (0); + } + } + + if (logsys_loggers[subsysid].target_id > 0) { + int num_using_current = 0; + for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { + if (logsys_loggers[subsysid].target_id == + logsys_loggers[i].target_id) { + num_using_current++; + } + } + if (num_using_current == 1) { + /* no one else is using this close it */ + qb_log_file_close(logsys_loggers[subsysid].target_id); + } + } + + logsys_loggers[subsysid].target_id = qb_log_file_open(file); + if (logsys_loggers[subsysid].target_id < 0) { + int err = -logsys_loggers[subsysid].target_id; + char error_str[LOGSYS_MAX_PERROR_MSG_LEN]; + const char *error_ptr; + error_ptr = qb_strerror_r(err, error_str, sizeof(error_str)); + + free(logsys_loggers[subsysid].logfile); + logsys_loggers[subsysid].logfile = NULL; + snprintf (error_string_response, + sizeof(error_string_response), + "Can't open logfile '%s' for reason: %s (%d)", + file, error_ptr, err); + *error_string = error_string_response; + return (-1); + } + logsys_file_format_get(file_format, 128); + qb_log_format_set(logsys_loggers[subsysid].target_id, file_format); + + qb_log_ctl(logsys_loggers[subsysid].target_id, + QB_LOG_CONF_ENABLED, + (logsys_loggers[subsysid].mode & LOGSYS_MODE_OUTPUT_FILE)); + if (logsys_thread_started) { + qb_log_ctl(logsys_loggers[subsysid].target_id, QB_LOG_CONF_THREADED, QB_TRUE); + } + + return (0); +} + +static void logsys_subsys_init ( + const char *subsys, + int subsysid) +{ + if (logsys_system_needs_init == LOGSYS_LOGGER_NEEDS_INIT) { + logsys_loggers[subsysid].init_status = + LOGSYS_LOGGER_NEEDS_INIT; + } else { + logsys_loggers[subsysid].mode = logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT].mode; + logsys_loggers[subsysid].debug = logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT].debug; + logsys_loggers[subsysid].syslog_priority = logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT].syslog_priority; + logsys_loggers[subsysid].logfile_priority = logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT].logfile_priority; + logsys_loggers[subsysid].init_status = LOGSYS_LOGGER_INIT_DONE; + } + strncpy (logsys_loggers[subsysid].subsys, subsys, + sizeof (logsys_loggers[subsysid].subsys)); + logsys_loggers[subsysid].subsys[ + sizeof (logsys_loggers[subsysid].subsys) - 1] = '\0'; + logsys_loggers[subsysid].file_idx = 0; +} + +static const char *_logsys_tags_stringify(uint32_t tags) +{ + if (tags == QB_LOG_TAG_LIBQB_MSG) { + return "QB"; + } else { + return logsys_loggers[tags].subsys; + } +} + +void logsys_system_fini (void) +{ + int i; + int f; + for (i = 0; i < LOGSYS_MAX_SUBSYS_COUNT; i++) { + free(logsys_loggers[i].logfile); + for (f = 0; f < logsys_loggers[i].file_idx; f++) { + free(logsys_loggers[i].files[f]); + } + } + + qb_log_fini (); +} + +/* + * Internal API - exported + */ + +int _logsys_system_setup( + const char *mainsystem, + unsigned int mode, + int syslog_facility, + int syslog_priority) +{ + int i; + int32_t fidx; + char tempsubsys[LOGSYS_MAX_SUBSYS_NAMELEN]; + + if ((mainsystem == NULL) || + (strlen(mainsystem) >= LOGSYS_MAX_SUBSYS_NAMELEN)) { + return -1; + } + + /* + * Setup libqb as a subsys + */ + i = _logsys_subsys_create ("QB", "array.c,log.c,log_syslog.c,log_blackbox.c,log_format.c," + "log_file.c,log_dcs.c,log_thread.c,ipc_shm.c,ipcs.c,ipc_us.c,loop.c," + "loop_poll_epoll.c,loop_job.c,loop_poll_poll.c,loop_poll_kqueue.c," + "loop_timerlist.c,loop_poll.c,ringbuffer.c,ringbuffer_helper.c,trie.c," + "map.c,skiplist.c,rpl_sem.c,hdb.c,unix.c,hashtable.c,strlcpy.c,ipc_socket.c," + "strchrnul.c,ipc_setup.c,strlcat.c"); + if (i < 0) { + return -1; + } + + /* + * name clash + * _logsys_subsys_filename_add (i, "util.c"); + */ + + /* + * This file (logsys.c) is not exactly QB. We need tag for logsys.c if flightrecorder init + * fails, and QB seems to be closest. + */ + _logsys_subsys_filename_add (i, "logsys.c"); + + i = LOGSYS_MAX_SUBSYS_COUNT; + + pthread_mutex_lock (&logsys_config_mutex); + + snprintf(logsys_loggers[i].subsys, + LOGSYS_MAX_SUBSYS_NAMELEN, + "%s", mainsystem); + + logsys_loggers[i].mode = mode; + logsys_loggers[i].debug = LOGSYS_DEBUG_OFF; + logsys_loggers[i].file_idx = 0; + logsys_loggers[i].logfile_priority = syslog_priority; + logsys_loggers[i].syslog_priority = syslog_priority; + + qb_log_init(mainsystem, syslog_facility, syslog_priority); + if (logsys_loggers[i].mode & LOGSYS_MODE_OUTPUT_STDERR) { + qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_TRUE); + } else { + qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_FALSE); + } + if (logsys_loggers[i].mode & LOGSYS_MODE_OUTPUT_SYSLOG) { + qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_TRUE); + } else { + qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_FALSE); + } + qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_PRIORITY_BUMP, LOG_INFO - LOG_DEBUG); + + qb_log_filter_ctl(QB_LOG_BLACKBOX, QB_LOG_FILTER_ADD, + QB_LOG_FILTER_FILE, "*", LOG_TRACE); + qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_SIZE, IPC_LOGSYS_SIZE); + qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_THREADED, QB_FALSE); + + /* + * Blackbox is disabled at the init and enabled later based + * on config (logging.blackbox) value. + */ + qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_FALSE); + + if (logsys_format_set(NULL) == -1) { + pthread_mutex_unlock (&logsys_config_mutex); + + return -1; + } + + qb_log_tags_stringify_fn_set(_logsys_tags_stringify); + + logsys_loggers[i].init_status = LOGSYS_LOGGER_INIT_DONE; + logsys_system_needs_init = LOGSYS_LOGGER_INIT_DONE; + + for (i = 0; i < LOGSYS_MAX_SUBSYS_COUNT; i++) { + if ((strcmp (logsys_loggers[i].subsys, "") != 0) && + (logsys_loggers[i].init_status == + LOGSYS_LOGGER_NEEDS_INIT)) { + fidx = logsys_loggers[i].file_idx; + strncpy (tempsubsys, logsys_loggers[i].subsys, + sizeof (tempsubsys)); + tempsubsys[sizeof (tempsubsys) - 1] = '\0'; + logsys_subsys_init(tempsubsys, i); + logsys_loggers[i].file_idx = fidx; + _logsys_config_mode_set_unlocked(i, logsys_loggers[i].mode); + _logsys_config_apply_per_subsys(i); + } + } + + pthread_mutex_unlock (&logsys_config_mutex); + + return (0); +} + + +static void _logsys_subsys_filename_add (int32_t s, const char *filename) +{ + int i; + + if (filename == NULL) { + return; + } + assert(logsys_loggers[s].file_idx < MAX_FILES_PER_SUBSYS); + assert(logsys_loggers[s].file_idx >= 0); + + for (i = 0; i < logsys_loggers[s].file_idx; i++) { + if (strcmp(logsys_loggers[s].files[i], filename) == 0) { + return; + } + } + logsys_loggers[s].files[logsys_loggers[s].file_idx++] = strdup(filename); + + if (logsys_system_needs_init == LOGSYS_LOGGER_INIT_DONE) { + _logsys_config_apply_per_file(s, filename); + } +} + +int _logsys_subsys_create (const char *subsys, const char *filename) +{ + int i; + + if ((subsys == NULL) || + (strlen(subsys) >= LOGSYS_MAX_SUBSYS_NAMELEN)) { + return -1; + } + + pthread_mutex_lock (&logsys_config_mutex); + + i = _logsys_config_subsys_get_unlocked (subsys); + if ((i > -1) && (i < LOGSYS_MAX_SUBSYS_COUNT)) { + _logsys_subsys_filename_add(i, filename); + pthread_mutex_unlock (&logsys_config_mutex); + return i; + } + + for (i = 0; i < LOGSYS_MAX_SUBSYS_COUNT; i++) { + if (strcmp (logsys_loggers[i].subsys, "") == 0) { + logsys_subsys_init(subsys, i); + _logsys_subsys_filename_add(i, filename); + break; + } + } + + if (i >= LOGSYS_MAX_SUBSYS_COUNT) { + i = -1; + } + + pthread_mutex_unlock (&logsys_config_mutex); + return i; +} + +int _logsys_config_subsys_get (const char *subsys) +{ + unsigned int i; + + pthread_mutex_lock (&logsys_config_mutex); + + i = _logsys_config_subsys_get_unlocked (subsys); + + pthread_mutex_unlock (&logsys_config_mutex); + + return i; +} + +static int32_t _logsys_config_mode_set_unlocked(int32_t subsysid, uint32_t new_mode) +{ + if ( logsys_loggers[subsysid].mode == new_mode) { + return 0; + } + if (logsys_loggers[subsysid].target_id > 0) { + qb_log_ctl(logsys_loggers[subsysid].target_id, + QB_LOG_CONF_ENABLED, + (new_mode & LOGSYS_MODE_OUTPUT_FILE)); + } + + if (subsysid == LOGSYS_MAX_SUBSYS_COUNT) { + qb_log_ctl(QB_LOG_STDERR, + QB_LOG_CONF_ENABLED, + (new_mode & LOGSYS_MODE_OUTPUT_STDERR)); + qb_log_ctl(QB_LOG_SYSLOG, + QB_LOG_CONF_ENABLED, + (new_mode & LOGSYS_MODE_OUTPUT_SYSLOG)); + } + logsys_loggers[subsysid].mode = new_mode; + return 0; +} + +int logsys_config_mode_set (const char *subsys, unsigned int mode) +{ + int i; + + pthread_mutex_lock (&logsys_config_mutex); + if (subsys != NULL) { + i = _logsys_config_subsys_get_unlocked (subsys); + if (i >= 0) { + i = _logsys_config_mode_set_unlocked(i, mode); + } + } else { + for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { + _logsys_config_mode_set_unlocked(i, mode); + } + i = 0; + } + + pthread_mutex_unlock (&logsys_config_mutex); + + return i; +} + +unsigned int logsys_config_mode_get (const char *subsys) +{ + int i; + + i = _logsys_config_subsys_get (subsys); + if (i < 0) { + return i; + } + + return logsys_loggers[i].mode; +} + +int logsys_config_file_set ( + const char *subsys, + const char **error_string, + const char *file) +{ + int i; + int res; + + pthread_mutex_lock (&logsys_config_mutex); + + if (subsys != NULL) { + i = _logsys_config_subsys_get_unlocked (subsys); + if (i < 0) { + res = i; + } else { + res = logsys_config_file_set_unlocked(i, error_string, file); + } + } else { + for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { + res = logsys_config_file_set_unlocked(i, error_string, file); + if (res < 0) { + break; + } + } + } + + pthread_mutex_unlock (&logsys_config_mutex); + return res; +} + +static void +logsys_file_format_get(char* file_format, int buf_len) +{ + char *format_buffer_start; + char *str_pos; + + file_format[0] = '\0'; + + format_buffer_start = format_buffer; + + if ((str_pos = strstr(format_buffer, "%t"))) { + strcpy(file_format, "%t "); + format_buffer_start = str_pos + 2; + } + + if ((str_pos = strstr(format_buffer, "%T"))) { + strcpy(file_format, "%T "); + format_buffer_start = str_pos + 2; + } + + strcat(file_format, "[%P] %H %N"); + strncat(file_format, format_buffer_start, buf_len - strlen(file_format)); +} + +int logsys_format_set (const char *format) +{ + int i; + int c; + int w; + int reminder; + char syslog_format[128]; + char file_format[128]; + + if (format_buffer) { + free(format_buffer); + format_buffer = NULL; + } + + format_buffer = strdup(format ? format : "%7p [%6g] %b"); + if (format_buffer == NULL) { + return -1; + } + + qb_log_format_set(QB_LOG_STDERR, format_buffer); + + logsys_file_format_get(file_format, 128); + for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { + if (logsys_loggers[i].target_id > 0) { + qb_log_format_set(logsys_loggers[i].target_id, file_format); + } + } + + /* + * This just goes through and remove %t, %T and %p from + * the format string for syslog. + */ + w = 0; + memset(syslog_format, '\0', sizeof(syslog_format)); + for (c = 0; c < strlen(format_buffer); c++) { + if (format_buffer[c] == '%') { + reminder = c; + for (c++; c < strlen(format_buffer); c++) { + if (isdigit(format_buffer[c])) { + continue; + } + if (format_buffer[c] == 't' || + format_buffer[c] == 'p' || + format_buffer[c] == 'T') { + c++; + } else { + c = reminder; + } + break; + } + } + syslog_format[w] = format_buffer[c]; + w++; + } + qb_log_format_set(QB_LOG_SYSLOG, syslog_format); + + return 0; +} + +char *logsys_format_get (void) +{ + return format_buffer; +} + +int logsys_config_syslog_facility_set ( + const char *subsys, + unsigned int facility) +{ + return qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_FACILITY, facility); +} + +int logsys_config_syslog_priority_set ( + const char *subsys, + unsigned int priority) +{ + int i; + + pthread_mutex_lock (&logsys_config_mutex); + if (subsys != NULL) { + i = _logsys_config_subsys_get_unlocked (subsys); + if (i >= 0) { + logsys_loggers[i].syslog_priority = priority; + logsys_loggers[i].dirty = QB_TRUE; + + i = 0; + } + } else { + for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { + logsys_loggers[i].syslog_priority = priority; + logsys_loggers[i].dirty = QB_TRUE; + } + i = 0; + } + pthread_mutex_unlock (&logsys_config_mutex); + + return i; +} + +int logsys_config_logfile_priority_set ( + const char *subsys, + unsigned int priority) +{ + int i; + + pthread_mutex_lock (&logsys_config_mutex); + if (subsys != NULL) { + i = _logsys_config_subsys_get_unlocked (subsys); + if (i >= 0) { + logsys_loggers[i].logfile_priority = priority; + logsys_loggers[i].dirty = QB_TRUE; + i = 0; + } + } else { + for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { + logsys_loggers[i].logfile_priority = priority; + logsys_loggers[i].dirty = QB_TRUE; + } + i = 0; + } + pthread_mutex_unlock (&logsys_config_mutex); + + return i; +} + + +static void _logsys_config_apply_per_file(int32_t s, const char *filename) +{ + uint32_t syslog_priority = logsys_loggers[s].syslog_priority; + uint32_t logfile_priority = logsys_loggers[s].logfile_priority; + + qb_log_filter_ctl(s, QB_LOG_TAG_SET, QB_LOG_FILTER_FILE, + filename, LOG_TRACE); + + qb_log_filter_ctl(QB_LOG_SYSLOG, QB_LOG_FILTER_REMOVE, + QB_LOG_FILTER_FILE, filename, LOG_TRACE); + qb_log_filter_ctl(QB_LOG_STDERR, QB_LOG_FILTER_REMOVE, + QB_LOG_FILTER_FILE, filename, LOG_TRACE); + if (logsys_loggers[s].target_id > 0) { + qb_log_filter_ctl(logsys_loggers[s].target_id, + QB_LOG_FILTER_REMOVE, + QB_LOG_FILTER_FILE, filename, LOG_TRACE); + } + + if (logsys_loggers[s].debug != LOGSYS_DEBUG_OFF) { + switch (logsys_loggers[s].debug) { + case LOGSYS_DEBUG_ON: + syslog_priority = LOG_DEBUG; + logfile_priority = LOG_DEBUG; + break; + case LOGSYS_DEBUG_TRACE: + syslog_priority = LOG_TRACE; + logfile_priority = LOG_TRACE; + break; + default: + assert(0); + } + } + qb_log_filter_ctl(QB_LOG_SYSLOG, QB_LOG_FILTER_ADD, + QB_LOG_FILTER_FILE, filename, + syslog_priority); + qb_log_filter_ctl(QB_LOG_STDERR, QB_LOG_FILTER_ADD, + QB_LOG_FILTER_FILE, filename, + logfile_priority); + if (logsys_loggers[s].target_id > 0) { + qb_log_filter_ctl(logsys_loggers[s].target_id, + QB_LOG_FILTER_ADD, + QB_LOG_FILTER_FILE, filename, + logfile_priority); + } +} + +static void _logsys_config_apply_per_subsys(int32_t s) +{ + int32_t f; + for (f = 0; f < logsys_loggers[s].file_idx; f++) { + _logsys_config_apply_per_file(s, logsys_loggers[s].files[f]); + } + if (logsys_loggers[s].target_id > 0) { + qb_log_ctl(logsys_loggers[s].target_id, + QB_LOG_CONF_ENABLED, + (logsys_loggers[s].mode & LOGSYS_MODE_OUTPUT_FILE)); + } + logsys_loggers[s].dirty = QB_FALSE; +} + +static void _logsys_config_apply_blackbox(void) { + int blackbox_enable_res; + + blackbox_enable_res = qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, logsys_blackbox_enabled); + + if (blackbox_enable_res < 0) { + LOGSYS_PERROR (-blackbox_enable_res, LOGSYS_LEVEL_WARNING, + "Unable to initialize log flight recorder. "\ + "The most common cause of this error is " \ + "not enough space on /dev/shm. Corosync will continue work, " \ + "but blackbox will not be available"); + } +} + +void logsys_config_apply(void) +{ + int32_t s; + + _logsys_config_apply_blackbox(); + + for (s = 0; s <= LOGSYS_MAX_SUBSYS_COUNT; s++) { + if (strcmp(logsys_loggers[s].subsys, "") == 0) { + continue; + } + _logsys_config_apply_per_subsys(s); + } +} + +extern int logsys_config_debug_get ( + const char *subsys) +{ + int debug_level = logsys_loggers[0].debug; + int i; + + if (subsys != NULL) { + pthread_mutex_lock (&logsys_config_mutex); + i = _logsys_config_subsys_get_unlocked (subsys); + if (i >= 0) { + debug_level = logsys_loggers[i].debug; + } + pthread_mutex_unlock (&logsys_config_mutex); + } + return debug_level; +} + +int logsys_config_debug_set ( + const char *subsys, + unsigned int debug) +{ + int i; + + pthread_mutex_lock (&logsys_config_mutex); + if (subsys != NULL) { + i = _logsys_config_subsys_get_unlocked (subsys); + if (i >= 0) { + logsys_loggers[i].dirty = QB_TRUE; + logsys_loggers[i].debug = debug; + i = 0; + } + } else { + for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { + logsys_loggers[i].debug = debug; + logsys_loggers[i].dirty = QB_TRUE; + } + i = 0; + } + pthread_mutex_unlock (&logsys_config_mutex); + + return i; +} + +int logsys_priority_id_get (const char *name) +{ + unsigned int i; + + for (i = 0; prioritynames[i].c_name != NULL; i++) { + if (strcasecmp(name, prioritynames[i].c_name) == 0) { + return (prioritynames[i].c_val); + } + } + return (-1); +} + +int logsys_thread_start (void) +{ + int i; + int err; + + err = qb_log_thread_start(); + if (err != 0) { + return (err); + } + + qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_THREADED, QB_TRUE); + for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { + if (logsys_loggers[i].target_id > 0) { + qb_log_ctl(logsys_loggers[i].target_id, QB_LOG_CONF_THREADED, QB_TRUE); + } + } + + logsys_thread_started = 1; + + return (0); +} + +void logsys_blackbox_set(int enable) +{ + + pthread_mutex_lock (&logsys_config_mutex); + + logsys_blackbox_enabled = enable; + + pthread_mutex_unlock (&logsys_config_mutex); +} + +/* + * To set correct pid to qb blackbox filename after tty dettach (fork) we have to + * close (this function) and (if needed) reopen blackbox (logsys_blackbox_postfork function). + */ +void logsys_blackbox_prefork(void) +{ + + (void)qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_FALSE); +} + +void logsys_blackbox_postfork(void) +{ + + _logsys_config_apply_blackbox(); +} + +cs_error_t logsys_reopen_log_files(void) +{ + cs_error_t res; + +#ifdef HAVE_QB_LOG_FILE_REOPEN + int i, j; + int num_using_current; + int32_t rc; + + res = CS_OK; + + pthread_mutex_lock (&logsys_config_mutex); + + for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { + if (logsys_loggers[i].target_id <= 0 || logsys_loggers[i].logfile == NULL) { + continue ; + } + + num_using_current = 0; + for (j = 0; j <= i; j++) { + if (logsys_loggers[i].target_id == logsys_loggers[j].target_id) { + num_using_current++; + } + } + if (num_using_current == 1) { + /* + * First instance of target file. Reopen it. + */ + rc = qb_log_file_reopen(logsys_loggers[i].target_id, NULL); + if (rc != 0) { + LOGSYS_PERROR (-rc, LOGSYS_LEVEL_WARNING, + "Unable to reopen log file %s", logsys_loggers[i].logfile); + res = qb_to_cs_error(rc); + } + } + } + + pthread_mutex_unlock (&logsys_config_mutex); +#else + res = CS_ERR_NOT_SUPPORTED; +#endif + + return (res); +} diff --git a/exec/main.c b/exec/main.c new file mode 100644 index 0000000..977aaf5 --- /dev/null +++ b/exec/main.c @@ -0,0 +1,1666 @@ +/* + * Copyright (c) 2002-2006 MontaVista Software, Inc. + * Copyright (c) 2006-2021 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \mainpage Corosync + * + * This is the doxygen generated developer documentation for the Corosync + * project. For more information about Corosync, please see the project + * web site, <a href="http://www.corosync.org">corosync.org</a>. + * + * \section license License + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <config.h> + +#include <pthread.h> +#include <assert.h> +#include <sys/types.h> +#include <sys/file.h> +#include <sys/poll.h> +#include <sys/uio.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <sys/stat.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <unistd.h> +#include <fcntl.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <signal.h> +#include <sched.h> +#include <time.h> +#include <semaphore.h> +#include <string.h> + +#ifdef HAVE_LIBSYSTEMD +#include <systemd/sd-daemon.h> +#endif + +#include <qb/qbdefs.h> +#include <qb/qblog.h> +#include <qb/qbloop.h> +#include <qb/qbutil.h> +#include <qb/qbipcs.h> + +#include <corosync/swab.h> +#include <corosync/corotypes.h> +#include <corosync/corodefs.h> +#include <corosync/totem/totempg.h> +#include <corosync/logsys.h> +#include <corosync/icmap.h> + +#include "quorum.h" +#include "totemsrp.h" +#include "logconfig.h" +#include "totemconfig.h" +#include "main.h" +#include "sync.h" +#include "timer.h" +#include "util.h" +#include "apidef.h" +#include "service.h" +#include "schedwrk.h" +#include "ipcs_stats.h" +#include "stats.h" + +#ifdef HAVE_SMALL_MEMORY_FOOTPRINT +#define IPC_LOGSYS_SIZE 1024*64 +#else +#define IPC_LOGSYS_SIZE 8192*128 +#endif + +/* + * LibQB adds default "*" syslog filter so we have to set syslog_priority as low + * as possible so filters applied later in _logsys_config_apply_per_file takes + * effect. + */ +LOGSYS_DECLARE_SYSTEM ("corosync", + LOGSYS_MODE_OUTPUT_STDERR | LOGSYS_MODE_OUTPUT_SYSLOG, + LOG_DAEMON, + LOG_EMERG); + +LOGSYS_DECLARE_SUBSYS ("MAIN"); + +#define SERVER_BACKLOG 5 + +static int sched_priority = 0; + +static unsigned int service_count = 32; + +static struct totem_logging_configuration totem_logging_configuration; + +static struct corosync_api_v1 *api = NULL; + +static int sync_in_process = 1; + +static qb_loop_t *corosync_poll_handle; + +struct sched_param global_sched_param; + +static corosync_timer_handle_t corosync_stats_timer_handle; + +static const char *corosync_lock_file = LOCALSTATEDIR"/run/corosync.pid"; + +static char corosync_config_file[PATH_MAX + 1] = COROSYSCONFDIR "/corosync.conf"; + +static int lockfile_fd = -1; + +enum move_to_root_cgroup_mode { + MOVE_TO_ROOT_CGROUP_MODE_OFF = 0, + MOVE_TO_ROOT_CGROUP_MODE_ON = 1, + MOVE_TO_ROOT_CGROUP_MODE_AUTO = 2, +}; + +qb_loop_t *cs_poll_handle_get (void) +{ + return (corosync_poll_handle); +} + +int cs_poll_dispatch_add (qb_loop_t * handle, + int fd, + int events, + void *data, + + int (*dispatch_fn) (int fd, + int revents, + void *data)) +{ + return qb_loop_poll_add(handle, QB_LOOP_MED, fd, events, data, + dispatch_fn); +} + +int cs_poll_dispatch_delete(qb_loop_t * handle, int fd) +{ + return qb_loop_poll_del(handle, fd); +} + +void corosync_state_dump (void) +{ + int i; + + for (i = 0; i < SERVICES_COUNT_MAX; i++) { + if (corosync_service[i] && corosync_service[i]->exec_dump_fn) { + corosync_service[i]->exec_dump_fn (); + } + } +} + +const char *corosync_get_config_file(void) +{ + + return (corosync_config_file); +} + +static void corosync_blackbox_write_to_file (void) +{ + char fname[PATH_MAX]; + char fdata_fname[PATH_MAX]; + char time_str[PATH_MAX]; + struct tm cur_time_tm; + time_t cur_time_t; + ssize_t res; + + cur_time_t = time(NULL); + localtime_r(&cur_time_t, &cur_time_tm); + + strftime(time_str, PATH_MAX, "%Y-%m-%dT%H:%M:%S", &cur_time_tm); + if (snprintf(fname, PATH_MAX, "%s/fdata-%s-%lld", + get_state_dir(), + time_str, + (long long int)getpid()) >= PATH_MAX) { + log_printf(LOGSYS_LEVEL_ERROR, "Can't snprintf blackbox file name"); + return ; + } + + if ((res = qb_log_blackbox_write_to_file(fname)) < 0) { + LOGSYS_PERROR(-res, LOGSYS_LEVEL_ERROR, "Can't store blackbox file"); + return ; + } + snprintf(fdata_fname, sizeof(fdata_fname), "%s/fdata", get_state_dir()); + unlink(fdata_fname); + if (symlink(fname, fdata_fname) == -1) { + log_printf(LOGSYS_LEVEL_ERROR, "Can't create symlink to '%s' for corosync blackbox file '%s'", + fname, fdata_fname); + } +} + +static void unlink_all_completed (void) +{ + api->timer_delete (corosync_stats_timer_handle); + qb_loop_stop (corosync_poll_handle); + icmap_fini(); +} + +void corosync_shutdown_request (void) +{ + corosync_service_unlink_all (api, unlink_all_completed); +} + +static int32_t sig_diag_handler (int num, void *data) +{ + corosync_state_dump (); + return 0; +} + +static int32_t sig_exit_handler (int num, void *data) +{ + log_printf(LOGSYS_LEVEL_NOTICE, "Node was shut down by a signal"); + corosync_service_unlink_all (api, unlink_all_completed); + return 0; +} + +static void sigsegv_handler (int num) +{ + (void)signal (num, SIG_DFL); + corosync_blackbox_write_to_file (); + qb_log_fini(); + raise (num); +} + +#define LOCALHOST_IP inet_addr("127.0.0.1") + +static void *corosync_group_handle; + +static struct totempg_group corosync_group = { + .group = "a", + .group_len = 1 +}; + +static void serialize_lock (void) +{ +} + +static void serialize_unlock (void) +{ +} + +static void corosync_sync_completed (void) +{ + log_printf (LOGSYS_LEVEL_NOTICE, + "Completed service synchronization, ready to provide service."); + sync_in_process = 0; + + cs_ipcs_sync_state_changed(sync_in_process); + cs_ipc_allow_connections(1); + /* + * Inform totem to start using new message queue again + */ + totempg_trans_ack(); + +#ifdef HAVE_LIBSYSTEMD + sd_notify (0, "READY=1"); +#endif +} + +static int corosync_sync_callbacks_retrieve ( + int service_id, + struct sync_callbacks *callbacks) +{ + if (corosync_service[service_id] == NULL) { + return (-1); + } + + if (callbacks == NULL) { + return (0); + } + + callbacks->name = corosync_service[service_id]->name; + + callbacks->sync_init = corosync_service[service_id]->sync_init; + callbacks->sync_process = corosync_service[service_id]->sync_process; + callbacks->sync_activate = corosync_service[service_id]->sync_activate; + callbacks->sync_abort = corosync_service[service_id]->sync_abort; + return (0); +} + +static struct memb_ring_id corosync_ring_id; + +static void member_object_joined (unsigned int nodeid) +{ + char member_ip[ICMAP_KEYNAME_MAXLEN]; + char member_join_count[ICMAP_KEYNAME_MAXLEN]; + char member_status[ICMAP_KEYNAME_MAXLEN]; + + snprintf(member_ip, ICMAP_KEYNAME_MAXLEN, + "runtime.members.%u.ip", nodeid); + snprintf(member_join_count, ICMAP_KEYNAME_MAXLEN, + "runtime.members.%u.join_count", nodeid); + snprintf(member_status, ICMAP_KEYNAME_MAXLEN, + "runtime.members.%u.status", nodeid); + + if (icmap_get(member_ip, NULL, NULL, NULL) == CS_OK) { + icmap_inc(member_join_count); + icmap_set_string(member_status, "joined"); + } else { + icmap_set_string(member_ip, (char*)api->totem_ifaces_print (nodeid)); + icmap_set_uint32(member_join_count, 1); + icmap_set_string(member_status, "joined"); + } + + log_printf (LOGSYS_LEVEL_DEBUG, + "Member joined: %s", api->totem_ifaces_print (nodeid)); +} + +static void member_object_left (unsigned int nodeid) +{ + char member_status[ICMAP_KEYNAME_MAXLEN]; + + snprintf(member_status, ICMAP_KEYNAME_MAXLEN, + "runtime.members.%u.status", nodeid); + icmap_set_string(member_status, "left"); + + log_printf (LOGSYS_LEVEL_DEBUG, + "Member left: %s", api->totem_ifaces_print (nodeid)); +} + +static void confchg_fn ( + enum totem_configuration_type configuration_type, + const unsigned int *member_list, size_t member_list_entries, + const unsigned int *left_list, size_t left_list_entries, + const unsigned int *joined_list, size_t joined_list_entries, + const struct memb_ring_id *ring_id) +{ + int i; + int abort_activate = 0; + + if (sync_in_process == 1) { + abort_activate = 1; + } + sync_in_process = 1; + cs_ipcs_sync_state_changed(sync_in_process); + memcpy (&corosync_ring_id, ring_id, sizeof (struct memb_ring_id)); + + for (i = 0; i < left_list_entries; i++) { + member_object_left (left_list[i]); + } + for (i = 0; i < joined_list_entries; i++) { + member_object_joined (joined_list[i]); + } + /* + * Call configuration change for all services + */ + for (i = 0; i < service_count; i++) { + if (corosync_service[i] && corosync_service[i]->confchg_fn) { + corosync_service[i]->confchg_fn (configuration_type, + member_list, member_list_entries, + left_list, left_list_entries, + joined_list, joined_list_entries, ring_id); + } + } + + if (abort_activate) { + sync_abort (); + } + if (configuration_type == TOTEM_CONFIGURATION_TRANSITIONAL) { + sync_save_transitional (member_list, member_list_entries, ring_id); + } + if (configuration_type == TOTEM_CONFIGURATION_REGULAR) { + sync_start (member_list, member_list_entries, ring_id); + } +} + +static void priv_drop (void) +{ + return; /* TODO: we are still not dropping privs */ +} + +static void corosync_tty_detach (void) +{ + int devnull; + + /* + * Disconnect from TTY if this is not a debug run + */ + + switch (fork ()) { + case -1: + corosync_exit_error (COROSYNC_DONE_FORK); + break; + case 0: + /* + * child which is disconnected, run this process + */ + break; + default: + exit (0); + break; + } + + /* Create new session */ + (void)setsid(); + + /* + * Map stdin/out/err to /dev/null. + */ + devnull = open("/dev/null", O_RDWR); + if (devnull == -1) { + corosync_exit_error (COROSYNC_DONE_STD_TO_NULL_REDIR); + } + + if (dup2(devnull, 0) < 0 || dup2(devnull, 1) < 0 + || dup2(devnull, 2) < 0) { + close(devnull); + corosync_exit_error (COROSYNC_DONE_STD_TO_NULL_REDIR); + } + close(devnull); +} + +static void corosync_mlockall (void) +{ + int res; + struct rlimit rlimit; + + rlimit.rlim_cur = RLIM_INFINITY; + rlimit.rlim_max = RLIM_INFINITY; + +#ifndef RLIMIT_MEMLOCK +#define RLIMIT_MEMLOCK RLIMIT_VMEM +#endif + + res = setrlimit (RLIMIT_MEMLOCK, &rlimit); + if (res == -1) { + LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING, + "Could not increase RLIMIT_MEMLOCK, not locking memory"); + return; + } + + res = mlockall (MCL_CURRENT | MCL_FUTURE); + if (res == -1) { + LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING, + "Could not lock memory of service to avoid page faults"); + }; +} + + +static void corosync_totem_stats_updater (void *data) +{ + totempg_stats_t * stats; + uint32_t total_mtt_rx_token; + uint32_t total_backlog_calc; + uint32_t total_token_holdtime; + int t, prev; + int32_t token_count; + const char *cstr; + + stats = api->totem_get_stats(); + + + stats->srp->firewall_enabled_or_nic_failure = stats->srp->continuous_gather > MAX_NO_CONT_GATHER ? 1 : 0; + + if (stats->srp->continuous_gather > MAX_NO_CONT_GATHER || + stats->srp->continuous_sendmsg_failures > MAX_NO_CONT_SENDMSG_FAILURES) { + cstr = ""; + + if (stats->srp->continuous_sendmsg_failures > MAX_NO_CONT_SENDMSG_FAILURES) { + cstr = "number of multicast sendmsg failures is above threshold"; + } + + if (stats->srp->continuous_gather > MAX_NO_CONT_GATHER) { + cstr = "totem is continuously in gather state"; + } + + log_printf (LOGSYS_LEVEL_WARNING, + "Totem is unable to form a cluster because of an " + "operating system or network fault (reason: %s). The most common " + "cause of this message is that the local firewall is " + "configured improperly.", cstr); + stats->srp->firewall_enabled_or_nic_failure = 1; + } else { + stats->srp->firewall_enabled_or_nic_failure = 0; + } + + total_mtt_rx_token = 0; + total_token_holdtime = 0; + total_backlog_calc = 0; + token_count = 0; + t = stats->srp->latest_token; + while (1) { + if (t == 0) + prev = TOTEM_TOKEN_STATS_MAX - 1; + else + prev = t - 1; + if (prev == stats->srp->earliest_token) + break; + /* if tx == 0, then dropped token (not ours) */ + if (stats->srp->token[t].tx != 0 || + (stats->srp->token[t].rx - stats->srp->token[prev].rx) > 0 ) { + total_mtt_rx_token += (stats->srp->token[t].rx - stats->srp->token[prev].rx); + total_token_holdtime += (stats->srp->token[t].tx - stats->srp->token[t].rx); + total_backlog_calc += stats->srp->token[t].backlog_calc; + token_count++; + } + t = prev; + } + if (token_count) { + stats->srp->mtt_rx_token = (total_mtt_rx_token / token_count); + stats->srp->avg_token_workload = (total_token_holdtime / token_count); + stats->srp->avg_backlog_calc = (total_backlog_calc / token_count); + } + + stats->srp->time_since_token_last_received = qb_util_nano_current_get () / QB_TIME_NS_IN_MSEC - + stats->srp->token[stats->srp->latest_token].rx; + + stats_trigger_trackers(); + + api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL, + corosync_totem_stats_updater, + &corosync_stats_timer_handle); +} + +static void corosync_totem_stats_init (void) +{ + /* start stats timer */ + api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL, + corosync_totem_stats_updater, + &corosync_stats_timer_handle); +} + +static void deliver_fn ( + unsigned int nodeid, + const void *msg, + unsigned int msg_len, + int endian_conversion_required) +{ + const struct qb_ipc_request_header *header; + int32_t service; + int32_t fn_id; + uint32_t id; + + header = msg; + if (endian_conversion_required) { + id = swab32 (header->id); + } else { + id = header->id; + } + + /* + * Call the proper executive handler + */ + service = id >> 16; + fn_id = id & 0xffff; + + if (!corosync_service[service]) { + return; + } + if (fn_id >= corosync_service[service]->exec_engine_count) { + log_printf(LOGSYS_LEVEL_WARNING, "discarded unknown message %d for service %d (max id %d)", + fn_id, service, corosync_service[service]->exec_engine_count); + return; + } + + icmap_fast_inc(service_stats_rx[service][fn_id]); + + if (endian_conversion_required) { + assert(corosync_service[service]->exec_engine[fn_id].exec_endian_convert_fn != NULL); + corosync_service[service]->exec_engine[fn_id].exec_endian_convert_fn + ((void *)msg); + } + + corosync_service[service]->exec_engine[fn_id].exec_handler_fn + (msg, nodeid); +} + +int main_mcast ( + const struct iovec *iovec, + unsigned int iov_len, + unsigned int guarantee) +{ + const struct qb_ipc_request_header *req = iovec->iov_base; + int32_t service; + int32_t fn_id; + + service = req->id >> 16; + fn_id = req->id & 0xffff; + + if (corosync_service[service]) { + icmap_fast_inc(service_stats_tx[service][fn_id]); + } + + return (totempg_groups_mcast_joined (corosync_group_handle, iovec, iov_len, guarantee)); +} + +static void corosync_ring_id_create_or_load ( + struct memb_ring_id *memb_ring_id, + unsigned int nodeid) +{ + int fd; + int res = 0; + char filename[PATH_MAX]; + + snprintf (filename, sizeof(filename), "%s/ringid_%u", + get_state_dir(), nodeid); + fd = open (filename, O_RDONLY); + /* + * If file can be opened and read, read the ring id + */ + if (fd != -1) { + res = read (fd, &memb_ring_id->seq, sizeof (uint64_t)); + close (fd); + } + /* + * If file could not be opened or read, create a new ring id + */ + if ((fd == -1) || (res != sizeof (uint64_t))) { + memb_ring_id->seq = 0; + fd = creat (filename, 0600); + if (fd != -1) { + res = write (fd, &memb_ring_id->seq, sizeof (uint64_t)); + close (fd); + if (res == -1) { + LOGSYS_PERROR (errno, LOGSYS_LEVEL_ERROR, + "Couldn't write ringid file '%s'", filename); + + corosync_exit_error (COROSYNC_DONE_STORE_RINGID); + } + } else { + LOGSYS_PERROR (errno, LOGSYS_LEVEL_ERROR, + "Couldn't create ringid file '%s'", filename); + + corosync_exit_error (COROSYNC_DONE_STORE_RINGID); + } + } + + memb_ring_id->rep = nodeid; +} + +static void corosync_ring_id_store ( + const struct memb_ring_id *memb_ring_id, + unsigned int nodeid) +{ + char filename[PATH_MAX]; + int fd; + int res; + + snprintf (filename, sizeof(filename), "%s/ringid_%u", + get_state_dir(), nodeid); + + fd = creat (filename, 0600); + if (fd == -1) { + LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, + "Couldn't store new ring id " CS_PRI_RING_ID_SEQ " to stable storage", + memb_ring_id->seq); + + corosync_exit_error (COROSYNC_DONE_STORE_RINGID); + } + log_printf (LOGSYS_LEVEL_DEBUG, + "Storing new sequence id for ring " CS_PRI_RING_ID_SEQ, memb_ring_id->seq); + res = write (fd, &memb_ring_id->seq, sizeof(memb_ring_id->seq)); + close (fd); + if (res != sizeof(memb_ring_id->seq)) { + LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, + "Couldn't store new ring id " CS_PRI_RING_ID_SEQ " to stable storage", + memb_ring_id->seq); + + corosync_exit_error (COROSYNC_DONE_STORE_RINGID); + } +} + +static qb_loop_timer_handle recheck_the_q_level_timer; +void corosync_recheck_the_q_level(void *data) +{ + totempg_check_q_level(corosync_group_handle); + if (cs_ipcs_q_level_get() == TOTEM_Q_LEVEL_CRITICAL) { + qb_loop_timer_add(cs_poll_handle_get(), QB_LOOP_MED, 1*QB_TIME_NS_IN_MSEC, + NULL, corosync_recheck_the_q_level, &recheck_the_q_level_timer); + } +} + +struct sending_allowed_private_data_struct { + int reserved_msgs; +}; + + +int corosync_sending_allowed ( + unsigned int service, + unsigned int id, + const void *msg, + void *sending_allowed_private_data) +{ + struct sending_allowed_private_data_struct *pd = + (struct sending_allowed_private_data_struct *)sending_allowed_private_data; + struct iovec reserve_iovec; + struct qb_ipc_request_header *header = (struct qb_ipc_request_header *)msg; + int sending_allowed; + + reserve_iovec.iov_base = (char *)header; + reserve_iovec.iov_len = header->size; + + pd->reserved_msgs = totempg_groups_joined_reserve ( + corosync_group_handle, + &reserve_iovec, 1); + if (pd->reserved_msgs == -1) { + return -EINVAL; + } + + /* Message ID out of range */ + if (id >= corosync_service[service]->lib_engine_count) { + return -EINVAL; + } + + sending_allowed = QB_FALSE; + if (corosync_quorum_is_quorate() == 1 || + corosync_service[service]->allow_inquorate == CS_LIB_ALLOW_INQUORATE) { + // we are quorate + // now check flow control + if (corosync_service[service]->lib_engine[id].flow_control == CS_LIB_FLOW_CONTROL_NOT_REQUIRED) { + sending_allowed = QB_TRUE; + } else if (pd->reserved_msgs && sync_in_process == 0) { + sending_allowed = QB_TRUE; + } else if (pd->reserved_msgs == 0) { + return -ENOBUFS; + } else /* (sync_in_process) */ { + return -EINPROGRESS; + } + } else { + return -EHOSTUNREACH; + } + + return (sending_allowed); +} + +void corosync_sending_allowed_release (void *sending_allowed_private_data) +{ + struct sending_allowed_private_data_struct *pd = + (struct sending_allowed_private_data_struct *)sending_allowed_private_data; + + if (pd->reserved_msgs == -1) { + return; + } + totempg_groups_joined_release (pd->reserved_msgs); +} + +int message_source_is_local (const mar_message_source_t *source) +{ + int ret = 0; + + assert (source != NULL); + if (source->nodeid == totempg_my_nodeid_get ()) { + ret = 1; + } + return ret; +} + +void message_source_set ( + mar_message_source_t *source, + void *conn) +{ + assert ((source != NULL) && (conn != NULL)); + memset (source, 0, sizeof (mar_message_source_t)); + source->nodeid = totempg_my_nodeid_get (); + source->conn = conn; +} + +struct scheduler_pause_timeout_data { + struct totem_config *totem_config; + qb_loop_timer_handle handle; + unsigned long long tv_prev; + unsigned long long max_tv_diff; +}; + +static void timer_function_scheduler_timeout (void *data) +{ + struct scheduler_pause_timeout_data *timeout_data = (struct scheduler_pause_timeout_data *)data; + unsigned long long tv_current; + unsigned long long tv_diff; + uint64_t schedmiss_event_tstamp; + + tv_current = qb_util_nano_current_get (); + + if (timeout_data->tv_prev == 0) { + /* + * Initial call -> just pretent everything is ok + */ + timeout_data->tv_prev = tv_current; + timeout_data->max_tv_diff = 0; + } + + tv_diff = tv_current - timeout_data->tv_prev; + timeout_data->tv_prev = tv_current; + + if (tv_diff > timeout_data->max_tv_diff) { + schedmiss_event_tstamp = qb_util_nano_from_epoch_get() / QB_TIME_NS_IN_MSEC; + + log_printf (LOGSYS_LEVEL_WARNING, "Corosync main process was not scheduled (@%" PRIu64 ") for %0.4f ms " + "(threshold is %0.4f ms). Consider token timeout increase.", + schedmiss_event_tstamp, + (float)tv_diff / QB_TIME_NS_IN_MSEC, (float)timeout_data->max_tv_diff / QB_TIME_NS_IN_MSEC); + + stats_add_schedmiss_event(schedmiss_event_tstamp, (float)tv_diff / QB_TIME_NS_IN_MSEC); + } + + /* + * Set next threshold, because token_timeout can change + */ + timeout_data->max_tv_diff = timeout_data->totem_config->token_timeout * QB_TIME_NS_IN_MSEC * 0.8; + qb_loop_timer_add (corosync_poll_handle, + QB_LOOP_MED, + timeout_data->totem_config->token_timeout * QB_TIME_NS_IN_MSEC / 3, + timeout_data, + timer_function_scheduler_timeout, + &timeout_data->handle); +} + + +/* + * Set main pid RR scheduler. + * silent: don't log sched_get_priority_max and sched_setscheduler errors + * Returns: 0 - success, -1 failure, -2 platform doesn't support SCHED_RR + */ +static int corosync_set_rr_scheduler (int silent) +{ + int ret_val = 0; + +#if defined(HAVE_PTHREAD_SETSCHEDPARAM) && defined(HAVE_SCHED_GET_PRIORITY_MAX) && defined(HAVE_SCHED_SETSCHEDULER) + int res; + + sched_priority = sched_get_priority_max (SCHED_RR); + if (sched_priority != -1) { + global_sched_param.sched_priority = sched_priority; + res = sched_setscheduler (0, SCHED_RR, &global_sched_param); + if (res == -1) { + if (!silent) { + LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, + "Could not set SCHED_RR at priority %d", + global_sched_param.sched_priority); + } + + global_sched_param.sched_priority = 0; +#ifdef HAVE_QB_LOG_THREAD_PRIORITY_SET + qb_log_thread_priority_set (SCHED_OTHER, 0); +#endif + ret_val = -1; + } else { + + /* + * Turn on SCHED_RR in logsys system + */ +#ifdef HAVE_QB_LOG_THREAD_PRIORITY_SET + res = qb_log_thread_priority_set (SCHED_RR, sched_priority); +#else + res = -1; +#endif + if (res == -1) { + log_printf (LOGSYS_LEVEL_ERROR, + "Could not set logsys thread priority." + " Can't continue because of priority inversions."); + corosync_exit_error (COROSYNC_DONE_LOGSETUP); + } + } + } else { + if (!silent) { + LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING, + "Could not get maximum scheduler priority"); + } + sched_priority = 0; + ret_val = -1; + } +#else + log_printf(LOGSYS_LEVEL_WARNING, + "The Platform is missing process priority setting features. Leaving at default."); + ret_val = -2; +#endif + + return (ret_val); +} + + +/* The basename man page contains scary warnings about + thread-safety and portability, hence this */ +static const char *corosync_basename(const char *file_name) +{ + char *base; + base = strrchr (file_name, '/'); + if (base) { + return base + 1; + } + + return file_name; +} + +static void +_logsys_log_printf(int level, int subsys, + const char *function_name, + const char *file_name, + int file_line, + const char *format, + ...) __attribute__((format(printf, 6, 7))); + +static void +_logsys_log_printf(int level, int subsys, + const char *function_name, + const char *file_name, + int file_line, + const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + qb_log_from_external_source_va(function_name, corosync_basename(file_name), + format, level, file_line, + subsys, ap); + va_end(ap); +} + +static void fplay_key_change_notify_fn ( + int32_t event, + const char *key_name, + struct icmap_notify_value new_val, + struct icmap_notify_value old_val, + void *user_data) +{ + if (strcmp(key_name, "runtime.blackbox.dump_flight_data") == 0) { + fprintf(stderr,"Writetofile\n"); + corosync_blackbox_write_to_file (); + } + if (strcmp(key_name, "runtime.blackbox.dump_state") == 0) { + fprintf(stderr,"statefump\n"); + corosync_state_dump (); + } +} + +static void corosync_fplay_control_init (void) +{ + icmap_track_t track = NULL; + + icmap_set_string("runtime.blackbox.dump_flight_data", "no"); + icmap_set_string("runtime.blackbox.dump_state", "no"); + + icmap_track_add("runtime.blackbox.dump_flight_data", + ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY, + fplay_key_change_notify_fn, + NULL, &track); + icmap_track_add("runtime.blackbox.dump_state", + ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY, + fplay_key_change_notify_fn, + NULL, &track); +} + +static void force_gather_notify_fn( + int32_t event, + const char *key_name, + struct icmap_notify_value new_val, + struct icmap_notify_value old_val, + void *user_data) +{ + char *key_val; + + if (icmap_get_string(key_name, &key_val) == CS_OK && strcmp(key_val, "no") == 0) + goto out; + + icmap_set_string("runtime.force_gather", "no"); + + if (strcmp(key_name, "runtime.force_gather") == 0) { + log_printf(LOGSYS_LEVEL_ERROR, "Forcing into GATHER state\n"); + totempg_force_gather(); + } + +out: + free(key_val); +} + +static void corosync_force_gather_init (void) +{ + icmap_track_t track = NULL; + + icmap_set_string("runtime.force_gather", "no"); + + icmap_track_add("runtime.force_gather", + ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY, + force_gather_notify_fn, + NULL, &track); +} + +/* + * Set RO flag for keys, which ether doesn't make sense to change by user (statistic) + * or which when changed are not reflected by runtime (totem.crypto_cipher, ...). + * + * Also some RO keys cannot be determined in this stage, so they are set later in + * other functions (like nodelist.local_node_pos, ...) + */ +static void set_icmap_ro_keys_flag (void) +{ + /* + * Set RO flag for all keys of internal configuration and runtime statistics + */ + icmap_set_ro_access("internal_configuration.", CS_TRUE, CS_TRUE); + icmap_set_ro_access("runtime.services.", CS_TRUE, CS_TRUE); + icmap_set_ro_access("runtime.config.", CS_TRUE, CS_TRUE); + icmap_set_ro_access("runtime.totem.", CS_TRUE, CS_TRUE); + icmap_set_ro_access("uidgid.config.", CS_TRUE, CS_TRUE); + icmap_set_ro_access("system.", CS_TRUE, CS_TRUE); + icmap_set_ro_access("nodelist.", CS_TRUE, CS_TRUE); + + /* + * Set RO flag for constrete keys of configuration which can't be changed + * during runtime + */ + icmap_set_ro_access("totem.crypto_cipher", CS_FALSE, CS_TRUE); + icmap_set_ro_access("totem.crypto_hash", CS_FALSE, CS_TRUE); + icmap_set_ro_access("totem.crypto_model", CS_FALSE, CS_TRUE); + icmap_set_ro_access("totem.keyfile", CS_FALSE, CS_TRUE); + icmap_set_ro_access("totem.key", CS_FALSE, CS_TRUE); + icmap_set_ro_access("totem.secauth", CS_FALSE, CS_TRUE); + icmap_set_ro_access("totem.ip_version", CS_FALSE, CS_TRUE); + icmap_set_ro_access("totem.rrp_mode", CS_FALSE, CS_TRUE); + icmap_set_ro_access("totem.transport", CS_FALSE, CS_TRUE); + icmap_set_ro_access("totem.cluster_name", CS_FALSE, CS_TRUE); + icmap_set_ro_access("totem.netmtu", CS_FALSE, CS_TRUE); + icmap_set_ro_access("totem.threads", CS_FALSE, CS_TRUE); + icmap_set_ro_access("totem.version", CS_FALSE, CS_TRUE); + icmap_set_ro_access("totem.nodeid", CS_FALSE, CS_TRUE); + icmap_set_ro_access("totem.clear_node_high_bit", CS_FALSE, CS_TRUE); + icmap_set_ro_access("config.reload_in_progress", CS_FALSE, CS_TRUE); + icmap_set_ro_access("config.totemconfig_reload_in_progress", CS_FALSE, CS_TRUE); +} + +static void main_service_ready (void) +{ + int res; + + /* + * This must occur after totempg is initialized because "this_ip" must be set + */ + res = corosync_service_defaults_link_and_init (api); + if (res == -1) { + log_printf (LOGSYS_LEVEL_ERROR, "Could not initialize default services"); + corosync_exit_error (COROSYNC_DONE_INIT_SERVICES); + } + cs_ipcs_init(); + corosync_totem_stats_init (); + corosync_fplay_control_init (); + corosync_force_gather_init (); + + sync_init ( + corosync_sync_callbacks_retrieve, + corosync_sync_completed); +} + +static enum e_corosync_done corosync_flock (const char *lockfile, pid_t pid) +{ + struct flock lock; + enum e_corosync_done err; + char pid_s[17]; + int fd_flag; + + err = COROSYNC_DONE_EXIT; + + lockfile_fd = open (lockfile, O_WRONLY | O_CREAT, 0640); + if (lockfile_fd == -1) { + log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't create lock file."); + return (COROSYNC_DONE_ACQUIRE_LOCK); + } + +retry_fcntl: + lock.l_type = F_WRLCK; + lock.l_start = 0; + lock.l_whence = SEEK_SET; + lock.l_len = 0; + if (fcntl (lockfile_fd, F_SETLK, &lock) == -1) { + switch (errno) { + case EINTR: + goto retry_fcntl; + break; + case EAGAIN: + case EACCES: + log_printf (LOGSYS_LEVEL_ERROR, "Another Corosync instance is already running."); + err = COROSYNC_DONE_ALREADY_RUNNING; + goto error_close; + break; + default: + log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't acquire lock. Error was %s", + strerror(errno)); + err = COROSYNC_DONE_ACQUIRE_LOCK; + goto error_close; + break; + } + } + + if (ftruncate (lockfile_fd, 0) == -1) { + log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't truncate lock file. Error was %s", + strerror (errno)); + err = COROSYNC_DONE_ACQUIRE_LOCK; + goto error_close_unlink; + } + + memset (pid_s, 0, sizeof (pid_s)); + snprintf (pid_s, sizeof (pid_s) - 1, "%u\n", pid); + +retry_write: + if (write (lockfile_fd, pid_s, strlen (pid_s)) != strlen (pid_s)) { + if (errno == EINTR) { + goto retry_write; + } else { + log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't write pid to lock file. " + "Error was %s", strerror (errno)); + err = COROSYNC_DONE_ACQUIRE_LOCK; + goto error_close_unlink; + } + } + + if ((fd_flag = fcntl (lockfile_fd, F_GETFD, 0)) == -1) { + log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't get close-on-exec flag from lock file. " + "Error was %s", strerror (errno)); + err = COROSYNC_DONE_ACQUIRE_LOCK; + goto error_close_unlink; + } + fd_flag |= FD_CLOEXEC; + if (fcntl (lockfile_fd, F_SETFD, fd_flag) == -1) { + log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't set close-on-exec flag to lock file. " + "Error was %s", strerror (errno)); + err = COROSYNC_DONE_ACQUIRE_LOCK; + goto error_close_unlink; + } + + return (err); + +error_close_unlink: + unlink (lockfile); +error_close: + close (lockfile_fd); + + return (err); +} + +static int corosync_move_to_root_cgroup(void) { + FILE *f; + int res = -1; + const char *cgroup_task_fname = NULL; + + /* + * /sys/fs/cgroup is hardcoded, because most of Linux distributions are now + * using systemd and systemd uses hardcoded path of cgroup mount point. + * + * This feature is expected to be removed as soon as systemd gets support + * for managing RT configuration. + */ + f = fopen("/sys/fs/cgroup/cpu/cpu.rt_runtime_us", "rt"); + if (f == NULL) { + /* + * Try cgroup v2 + */ + f = fopen("/sys/fs/cgroup/cgroup.procs", "rt"); + if (f == NULL) { + log_printf(LOG_DEBUG, "cpu.rt_runtime_us or cgroup.procs doesn't exist -> " + "system without cgroup or with disabled CONFIG_RT_GROUP_SCHED"); + + res = 0; + goto exit_res; + } else { + log_printf(LOGSYS_LEVEL_DEBUG, "Moving main pid to cgroup v2 root cgroup"); + + cgroup_task_fname = "/sys/fs/cgroup/cgroup.procs"; + } + } else { + log_printf(LOGSYS_LEVEL_DEBUG, "Moving main pid to cgroup v1 root cgroup"); + + cgroup_task_fname = "/sys/fs/cgroup/cpu/tasks"; + } + (void)fclose(f); + + f = fopen(cgroup_task_fname, "w"); + if (f == NULL) { + log_printf(LOGSYS_LEVEL_WARNING, "Can't open cgroups tasks file for writing"); + + goto exit_res; + } + + if (fprintf(f, "%jd\n", (intmax_t)getpid()) <= 0) { + log_printf(LOGSYS_LEVEL_WARNING, "Can't write corosync pid into cgroups tasks file"); + + goto close_and_exit_res; + } + +close_and_exit_res: + if (fclose(f) != 0) { + log_printf(LOGSYS_LEVEL_WARNING, "Can't close cgroups tasks file"); + + goto exit_res; + } + +exit_res: + return (res); +} + +static void show_version_info_crypto(void) +{ + const char *error_string; + const char *list_str; + + if (util_is_valid_knet_crypto_model(NULL, &list_str, 1, "", &error_string) != -1) { + printf("Available crypto models: %s\n", list_str); + } else { + perror(error_string); + } +} + +static void show_version_info_compress(void) +{ + const char *error_string; + const char *list_str; + + if (util_is_valid_knet_compress_model(NULL, &list_str, 1, "", &error_string) != -1) { + printf("Available compression models: %s\n", list_str); + } else { + perror(error_string); + } +} + +static void show_version_info(void) +{ + + printf ("Corosync Cluster Engine, version '%s'\n", VERSION); + printf ("Copyright (c) 2006-2021 Red Hat, Inc.\n"); + + printf ("\nBuilt-in features:" PACKAGE_FEATURES "\n"); + + show_version_info_crypto(); + show_version_info_compress(); +} + +int main (int argc, char **argv, char **envp) +{ + const char *error_string; + struct totem_config totem_config; + int res, ch; + int background, sched_rr, prio, testonly; + enum move_to_root_cgroup_mode move_to_root_cgroup; + enum e_corosync_done flock_err; + uint64_t totem_config_warnings; + struct scheduler_pause_timeout_data scheduler_pause_timeout_data; + long int tmpli; + char *ep; + char *tmp_str; + int log_subsys_id_totem; + int silent; + + /* default configuration + */ + background = 1; + testonly = 0; + + while ((ch = getopt (argc, argv, "c:ftv")) != EOF) { + + switch (ch) { + case 'c': + res = snprintf(corosync_config_file, sizeof(corosync_config_file), "%s", optarg); + if (res >= sizeof(corosync_config_file)) { + fprintf (stderr, "Config file path too long.\n"); + syslog (LOGSYS_LEVEL_ERROR, "Config file path too long."); + + logsys_system_fini(); + return EXIT_FAILURE; + } + break; + case 'f': + background = 0; + break; + case 't': + testonly = 1; + break; + case 'v': + show_version_info(); + logsys_system_fini(); + return EXIT_SUCCESS; + + break; + default: + fprintf(stderr, \ + "usage:\n"\ + " -c : Corosync config file path.\n"\ + " -f : Start application in foreground.\n"\ + " -t : Test configuration and exit.\n"\ + " -v : Display version, git revision and some useful information about Corosync and exit.\n"); + logsys_system_fini(); + return EXIT_FAILURE; + } + } + + + /* + * Other signals are registered later via qb_loop_signal_add + */ + (void)signal (SIGSEGV, sigsegv_handler); + (void)signal (SIGABRT, sigsegv_handler); +#if MSG_NOSIGNAL != 0 + (void)signal (SIGPIPE, SIG_IGN); +#endif + + if (icmap_init() != CS_OK) { + fprintf (stderr, "Corosync Executive couldn't initialize configuration component.\n"); + syslog (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't initialize configuration component."); + corosync_exit_error (COROSYNC_DONE_ICMAP); + } + set_icmap_ro_keys_flag(); + + /* + * Initialize the corosync_api_v1 definition + */ + api = apidef_get (); + + res = coroparse_configparse(icmap_get_global_map(), &error_string); + if (res == -1) { + /* + * Logsys can't log properly at this early stage, and we need to get this message out + * + */ + fprintf (stderr, "%s\n", error_string); + syslog (LOGSYS_LEVEL_ERROR, "%s", error_string); + corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD); + } + + if (stats_map_init(api) != CS_OK) { + fprintf (stderr, "Corosync Executive couldn't initialize statistics component.\n"); + syslog (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't initialize statistics component."); + corosync_exit_error (COROSYNC_DONE_STATS); + } + + res = corosync_log_config_read (&error_string); + if (res == -1) { + /* + * if we are here, we _must_ flush the logsys queue + * and try to inform that we couldn't read the config. + * this is a desperate attempt before certain death + * and there is no guarantee that we can print to stderr + * nor that logsys is sending the messages where we expect. + */ + log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); + fprintf(stderr, "%s", error_string); + syslog (LOGSYS_LEVEL_ERROR, "%s", error_string); + corosync_exit_error (COROSYNC_DONE_LOGCONFIGREAD); + } + + if (!testonly) { + log_printf (LOGSYS_LEVEL_NOTICE, "Corosync Cluster Engine %s starting up", VERSION); + log_printf (LOGSYS_LEVEL_INFO, "Corosync built-in features:" PACKAGE_FEATURES ""); + } + + /* + * Create totem logsys subsys before totem_config_read so log functions can be used + */ + log_subsys_id_totem = _logsys_subsys_create("TOTEM", "totem," + "totemip.c,totemconfig.c,totemcrypto.c,totemsrp.c," + "totempg.c,totemudp.c,totemudpu.c,totemnet.c,totemknet.c"); + + res = chdir(get_state_dir()); + if (res == -1) { + log_printf (LOGSYS_LEVEL_ERROR, "Cannot chdir to state directory %s. %s", get_state_dir(), strerror(errno)); + corosync_exit_error (COROSYNC_DONE_DIR_NOT_PRESENT); + } + + res = totem_config_read (&totem_config, &error_string, &totem_config_warnings); + if (res == -1) { + log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); + corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD); + } + + if (totem_config_warnings & TOTEM_CONFIG_WARNING_MEMBERS_IGNORED) { + log_printf (LOGSYS_LEVEL_WARNING, "member section is used together with nodelist. Members ignored."); + } + + if (totem_config_warnings & TOTEM_CONFIG_WARNING_MEMBERS_DEPRECATED) { + log_printf (LOGSYS_LEVEL_WARNING, "member section is deprecated."); + } + + if (totem_config_warnings & TOTEM_CONFIG_WARNING_TOTEM_NODEID_SET) { + log_printf (LOGSYS_LEVEL_WARNING, "nodeid in totem section is deprecated and ignored. " + "Nodelist (or autogenerated) nodeid is going to be used."); + } + + if (totem_config_warnings & TOTEM_CONFIG_BINDNETADDR_NODELIST_SET) { + log_printf (LOGSYS_LEVEL_WARNING, "interface section bindnetaddr is used together with nodelist. " + "Nodelist one is going to be used."); + } + + if (totem_config_warnings != 0) { + log_printf (LOGSYS_LEVEL_WARNING, "Please migrate config file to nodelist."); + } + + res = totem_config_validate (&totem_config, &error_string); + if (res == -1) { + log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); + corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD); + } + + if (testonly) { + corosync_exit_error (COROSYNC_DONE_EXIT); + } + + + move_to_root_cgroup = MOVE_TO_ROOT_CGROUP_MODE_AUTO; + if (icmap_get_string("system.move_to_root_cgroup", &tmp_str) == CS_OK) { + /* + * Validity of move_to_root_cgroup values checked in coroparse.c + */ + if (strcmp(tmp_str, "yes") == 0) { + move_to_root_cgroup = MOVE_TO_ROOT_CGROUP_MODE_ON; + } else if (strcmp(tmp_str, "no") == 0) { + move_to_root_cgroup = MOVE_TO_ROOT_CGROUP_MODE_OFF; + } + free(tmp_str); + } + + + sched_rr = 1; + if (icmap_get_string("system.sched_rr", &tmp_str) == CS_OK) { + if (strcmp(tmp_str, "yes") != 0) { + sched_rr = 0; + } + free(tmp_str); + } + + prio = 0; + if (icmap_get_string("system.priority", &tmp_str) == CS_OK) { + if (strcmp(tmp_str, "max") == 0) { + prio = INT_MIN; + } else if (strcmp(tmp_str, "min") == 0) { + prio = INT_MAX; + } else { + errno = 0; + + tmpli = strtol(tmp_str, &ep, 10); + if (errno != 0 || *ep != '\0' || tmpli > INT_MAX || tmpli < INT_MIN) { + log_printf (LOGSYS_LEVEL_ERROR, "Priority value %s is invalid", tmp_str); + corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD); + } + + prio = tmpli; + } + + free(tmp_str); + } + + if (move_to_root_cgroup == MOVE_TO_ROOT_CGROUP_MODE_ON) { + /* + * Try to move corosync into root cpu cgroup. Failure is not fatal and + * error is deliberately ignored. + */ + (void)corosync_move_to_root_cgroup(); + } + + /* + * Set round robin realtime scheduling with priority 99 + */ + if (sched_rr) { + silent = (move_to_root_cgroup == MOVE_TO_ROOT_CGROUP_MODE_AUTO); + res = corosync_set_rr_scheduler (silent); + + if (res == -1 && move_to_root_cgroup == MOVE_TO_ROOT_CGROUP_MODE_AUTO) { + /* + * Try to move process to root cgroup and try set priority again + */ + (void)corosync_move_to_root_cgroup(); + + res = corosync_set_rr_scheduler (0); + } + + if (res != 0) { + prio = INT_MIN; + } else { + prio = 0; + } + } + + if (prio != 0) { + if (setpriority(PRIO_PGRP, 0, prio) != 0) { + LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, + "Could not set priority %d", prio); + } + } + + totem_config.totem_memb_ring_id_create_or_load = corosync_ring_id_create_or_load; + totem_config.totem_memb_ring_id_store = corosync_ring_id_store; + + totem_config.totem_logging_configuration = totem_logging_configuration; + totem_config.totem_logging_configuration.log_subsys_id = log_subsys_id_totem; + + totem_config.totem_logging_configuration.log_level_security = LOGSYS_LEVEL_WARNING; + totem_config.totem_logging_configuration.log_level_error = LOGSYS_LEVEL_ERROR; + totem_config.totem_logging_configuration.log_level_warning = LOGSYS_LEVEL_WARNING; + totem_config.totem_logging_configuration.log_level_notice = LOGSYS_LEVEL_NOTICE; + totem_config.totem_logging_configuration.log_level_debug = LOGSYS_LEVEL_DEBUG; + totem_config.totem_logging_configuration.log_level_trace = LOGSYS_LEVEL_TRACE; + totem_config.totem_logging_configuration.log_printf = _logsys_log_printf; + + logsys_config_apply(); + + /* + * Now we are fully initialized. + */ + if (background) { + logsys_blackbox_prefork(); + + corosync_tty_detach (); + + logsys_blackbox_postfork(); + + log_printf (LOGSYS_LEVEL_DEBUG, "Corosync TTY detached"); + } + + /* + * Lock all memory to avoid page faults which may interrupt + * application healthchecking + */ + corosync_mlockall (); + + corosync_poll_handle = qb_loop_create (); + + memset(&scheduler_pause_timeout_data, 0, sizeof(scheduler_pause_timeout_data)); + scheduler_pause_timeout_data.totem_config = &totem_config; + timer_function_scheduler_timeout (&scheduler_pause_timeout_data); + + qb_loop_signal_add(corosync_poll_handle, QB_LOOP_LOW, + SIGUSR2, NULL, sig_diag_handler, NULL); + qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH, + SIGINT, NULL, sig_exit_handler, NULL); + qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH, + SIGQUIT, NULL, sig_exit_handler, NULL); + qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH, + SIGTERM, NULL, sig_exit_handler, NULL); + + if (logsys_thread_start() != 0) { + log_printf (LOGSYS_LEVEL_ERROR, "Can't initialize log thread"); + corosync_exit_error (COROSYNC_DONE_LOGCONFIGREAD); + } + + if ((flock_err = corosync_flock (corosync_lock_file, getpid ())) != COROSYNC_DONE_EXIT) { + corosync_exit_error (flock_err); + } + + /* + * if totempg_initialize doesn't have root priveleges, it cannot + * bind to a specific interface. This only matters if + * there is more then one interface in a system, so + * in this case, only a warning is printed + */ + /* + * Join multicast group and setup delivery + * and configuration change functions + */ + if (totempg_initialize ( + corosync_poll_handle, + &totem_config) != 0) { + + log_printf (LOGSYS_LEVEL_ERROR, "Can't initialize TOTEM layer"); + corosync_exit_error (COROSYNC_DONE_FATAL_ERR); + } + + totempg_service_ready_register ( + main_service_ready); + + totempg_groups_initialize ( + &corosync_group_handle, + deliver_fn, + confchg_fn); + + totempg_groups_join ( + corosync_group_handle, + &corosync_group, + 1); + + /* + * Drop root privleges to user 'corosync' + * TODO: Don't really need full root capabilities; + * needed capabilities are: + * CAP_NET_RAW (bindtodevice) + * CAP_SYS_NICE (setscheduler) + * CAP_IPC_LOCK (mlockall) + */ + priv_drop (); + + schedwrk_init ( + serialize_lock, + serialize_unlock); + + /* + * Start main processing loop + */ + qb_loop_run (corosync_poll_handle); + + /* + * Exit was requested + */ + totempg_finalize (); + + /* + * free the loop resources + */ + qb_loop_destroy (corosync_poll_handle); + + /* + * free up the icmap + */ + + /* + * Remove pid lock file + */ + close (lockfile_fd); + unlink (corosync_lock_file); + + corosync_exit_error (COROSYNC_DONE_EXIT); + + return EXIT_SUCCESS; +} diff --git a/exec/main.h b/exec/main.h new file mode 100644 index 0000000..ee86ad9 --- /dev/null +++ b/exec/main.h @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2002-2006 MontaVista Software, Inc. + * Copyright (c) 2006-2018 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * + * @warning DO NOT USE SYMBOLS IN THIS FILE + */ + +#ifndef MAIN_H_DEFINED +#define MAIN_H_DEFINED + +#define TRUE 1 +#define FALSE 0 +#include <corosync/corotypes.h> +#include <corosync/hdb.h> +#include <qb/qbloop.h> +#include <corosync/totem/totempg.h> +#include <corosync/icmap.h> +#include <corosync/coroapi.h> + +extern unsigned long long *(*main_clm_get_by_nodeid) (unsigned int node_id); + +extern int main_mcast ( + const struct iovec *iovec, + unsigned int iov_len, + unsigned int guarantee); + +extern void message_source_set (mar_message_source_t *source, void *conn); + +extern int message_source_is_local (const mar_message_source_t *source); + +extern void corosync_shutdown_request (void); + +extern void corosync_state_dump (void); + +extern qb_loop_t *cs_poll_handle_get (void); + +extern int cs_poll_dispatch_add (qb_loop_t * handle, + int fd, + int events, + void *data, + + int (*dispatch_fn) (int fd, + int revents, + void *data)); + +extern int cs_poll_dispatch_delete ( + qb_loop_t * handle, + int fd); + + +extern int corosync_sending_allowed ( + unsigned int service, + unsigned int id, + const void *msg, + void *sending_allowed_private_data); + +extern void corosync_sending_allowed_release (void *sending_allowed_private_data); + +extern void corosync_recheck_the_q_level(void *data); + +extern void cs_ipcs_init(void); + +extern const char *cs_ipcs_service_init(struct corosync_service_engine *service); + +extern void cs_ipcs_stats_update(void); + +extern int32_t cs_ipcs_service_destroy(int32_t service_id); + +extern int32_t cs_ipcs_q_level_get(void); + +extern int cs_ipcs_dispatch_send(void *conn, const void *msg, size_t mlen); +extern int cs_ipcs_dispatch_iov_send (void *conn, + const struct iovec *iov, + unsigned int iov_len); + +extern int cs_ipcs_response_send(void *conn, const void *msg, size_t mlen); +extern int cs_ipcs_response_iov_send (void *conn, + const struct iovec *iov, + unsigned int iov_len); + +extern void cs_ipcs_sync_state_changed(int32_t sync_in_process); + +extern void *cs_ipcs_private_data_get(void *conn); + +extern void cs_ipc_refcnt_inc(void *conn); + +extern void cs_ipc_refcnt_dec(void *conn); + +extern void cs_ipc_allow_connections(int32_t allow); + +extern int coroparse_configparse (icmap_map_t config_map, const char **error_string); + +extern const char *corosync_get_config_file(void); + +#endif /* MAIN_H_DEFINED */ diff --git a/exec/mon.c b/exec/mon.c new file mode 100644 index 0000000..3f71fb5 --- /dev/null +++ b/exec/mon.c @@ -0,0 +1,511 @@ +/* + * Copyright (c) 2010-2012 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Angus Salkeld <asalkeld@redhat.com> + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <config.h> + +#include <unistd.h> +#include <statgrab.h> + +#include <corosync/corotypes.h> +#include <corosync/corodefs.h> +#include <corosync/coroapi.h> +#include <qb/qblist.h> +#include <corosync/logsys.h> +#include <corosync/icmap.h> +#include "fsm.h" + +#include "service.h" + +LOGSYS_DECLARE_SUBSYS ("MON"); + +/* + * Service Interfaces required by service_message_handler struct + */ +static char *mon_exec_init_fn (struct corosync_api_v1 *corosync_api); + +static struct corosync_api_v1 *api; +#define MON_DEFAULT_PERIOD 3000 +#define MON_MIN_PERIOD 500 +#define MON_MAX_PERIOD (120 * CS_TIME_MS_IN_SEC) + +struct corosync_service_engine mon_service_engine = { + .name = "corosync resource monitoring service", + .id = MON_SERVICE, + .priority = 1, + .private_data_size = 0, + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED, + .lib_init_fn = NULL, + .lib_exit_fn = NULL, + .lib_engine = NULL, + .lib_engine_count = 0, + .exec_engine = NULL, + .exec_engine_count = 0, + .confchg_fn = NULL, + .exec_init_fn = mon_exec_init_fn, + .exec_dump_fn = NULL +}; + +static QB_LIST_DECLARE (confchg_notify); + + +struct resource_instance { + const char *icmap_path; + const char *name; + corosync_timer_handle_t timer_handle; + void (*update_stats_fn) (void *data); + struct cs_fsm fsm; + uint64_t period; + icmap_value_types_t max_type; + union { + int32_t int32; + double dbl; + } max; +}; + +static void mem_update_stats_fn (void *data); +static void load_update_stats_fn (void *data); + +static struct resource_instance memory_used_inst = { + .name = "memory_used", + .icmap_path = "resources.system.memory_used.", + .update_stats_fn = mem_update_stats_fn, + .max_type = ICMAP_VALUETYPE_INT32, + .max.int32 = INT32_MAX, + .period = MON_DEFAULT_PERIOD, +}; + +static struct resource_instance load_15min_inst = { + .name = "load_15min", + .icmap_path = "resources.system.load_15min.", + .update_stats_fn = load_update_stats_fn, + .max_type = ICMAP_VALUETYPE_DOUBLE, + .max.dbl = INT32_MAX, + .period = MON_DEFAULT_PERIOD, +}; + + +/* + * F S M + */ +static void mon_config_changed (struct cs_fsm* fsm, int32_t event, void * data); +static void mon_resource_failed (struct cs_fsm* fsm, int32_t event, void * data); + +const char * mon_running_str = "running"; +const char * mon_failed_str = "failed"; +const char * mon_failure_str = "failure"; +const char * mon_stopped_str = "stopped"; +const char * mon_config_changed_str = "config_changed"; + +enum mon_resource_state { + MON_S_STOPPED, + MON_S_RUNNING, + MON_S_FAILED +}; +enum mon_resource_event { + MON_E_CONFIG_CHANGED, + MON_E_FAILURE +}; + +struct cs_fsm_entry mon_fsm_table[] = { + { MON_S_STOPPED, MON_E_CONFIG_CHANGED, mon_config_changed, {MON_S_STOPPED, MON_S_RUNNING, -1} }, + { MON_S_STOPPED, MON_E_FAILURE, NULL, {-1} }, + { MON_S_RUNNING, MON_E_CONFIG_CHANGED, mon_config_changed, {MON_S_RUNNING, MON_S_STOPPED, -1} }, + { MON_S_RUNNING, MON_E_FAILURE, mon_resource_failed, {MON_S_FAILED, -1} }, + { MON_S_FAILED, MON_E_CONFIG_CHANGED, mon_config_changed, {MON_S_RUNNING, MON_S_STOPPED, -1} }, + { MON_S_FAILED, MON_E_FAILURE, NULL, {-1} }, +}; + +struct corosync_service_engine *mon_get_service_engine_ver0 (void) +{ + return (&mon_service_engine); +} + +static const char * mon_res_state_to_str(struct cs_fsm* fsm, + int32_t state) +{ + switch (state) { + case MON_S_STOPPED: + return mon_stopped_str; + break; + case MON_S_RUNNING: + return mon_running_str; + break; + case MON_S_FAILED: + return mon_failed_str; + break; + } + return NULL; +} + +static const char * mon_res_event_to_str(struct cs_fsm* fsm, + int32_t event) +{ + switch (event) { + case MON_E_CONFIG_CHANGED: + return mon_config_changed_str; + break; + case MON_E_FAILURE: + return mon_failure_str; + break; + } + return NULL; +} + +static void mon_fsm_cb (struct cs_fsm *fsm, int cb_event, int32_t curr_state, + int32_t next_state, int32_t fsm_event, void *data) +{ + switch (cb_event) { + case CS_FSM_CB_EVENT_PROCESS_NF: + log_printf (LOGSYS_LEVEL_ERROR, "Fsm:%s could not find event \"%s\" in state \"%s\"", + fsm->name, fsm->event_to_str(fsm, fsm_event), fsm->state_to_str(fsm, curr_state)); + corosync_exit_error(COROSYNC_DONE_FATAL_ERR); + break; + case CS_FSM_CB_EVENT_STATE_SET: + log_printf (LOGSYS_LEVEL_INFO, "Fsm:%s event \"%s\", state \"%s\" --> \"%s\"", + fsm->name, + fsm->event_to_str(fsm, fsm_event), + fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state), + fsm->state_to_str(fsm, next_state)); + break; + case CS_FSM_CB_EVENT_STATE_SET_NF: + log_printf (LOGSYS_LEVEL_CRIT, "Fsm:%s Can't change state from \"%s\" to \"%s\" (event was \"%s\")", + fsm->name, + fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state), + fsm->state_to_str(fsm, next_state), + fsm->event_to_str(fsm, fsm_event)); + corosync_exit_error(COROSYNC_DONE_FATAL_ERR); + break; + default: + log_printf (LOGSYS_LEVEL_CRIT, "Fsm: Can't find callback event!"); + corosync_exit_error(COROSYNC_DONE_FATAL_ERR); + break; + } +} + +static void mon_fsm_state_set (struct cs_fsm* fsm, + enum mon_resource_state next_state, struct resource_instance* inst) +{ + enum mon_resource_state prev_state = fsm->curr_state; + const char *state_str; + char key_name[ICMAP_KEYNAME_MAXLEN]; + + ENTER(); + + cs_fsm_state_set(fsm, next_state, inst, mon_fsm_cb); + + if (prev_state == fsm->curr_state) { + return; + } + state_str = mon_res_state_to_str(fsm, fsm->curr_state); + + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "state"); + icmap_set_string(key_name, state_str); +} + + +static void mon_config_changed (struct cs_fsm* fsm, int32_t event, void * data) +{ + struct resource_instance * inst = (struct resource_instance *)data; + char *tmp_str; + uint64_t tmp_value; + char key_name[ICMAP_KEYNAME_MAXLEN]; + int run_updater; + int scanf_res = 0; + int32_t i32; + double dbl; + + ENTER(); + + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "poll_period"); + if (icmap_get_string(key_name, &tmp_str) == CS_OK) { + scanf_res = sscanf(tmp_str, "%"PRIu64, &tmp_value); + if (scanf_res != 1) { + log_printf (LOGSYS_LEVEL_WARNING, + "Could NOT use poll_period: %s (not uint64 type) for resource %s", + tmp_str, inst->name); + } + free(tmp_str); + + if (tmp_value >= MON_MIN_PERIOD && tmp_value <= MON_MAX_PERIOD) { + log_printf (LOGSYS_LEVEL_DEBUG, + "poll_period changing from:%"PRIu64" to %"PRIu64".", + inst->period, tmp_value); + inst->period = tmp_value; + } else { + log_printf (LOGSYS_LEVEL_WARNING, + "Could NOT use poll_period:%"PRIu64" ms for resource %s", + tmp_value, inst->name); + } + } + + if (inst->timer_handle) { + api->timer_delete(inst->timer_handle); + inst->timer_handle = 0; + } + + run_updater = 0; + + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "max"); + + if (icmap_get_string(key_name, &tmp_str) == CS_OK) { + if (inst->max_type == ICMAP_VALUETYPE_INT32) { + if (sscanf(tmp_str, "%"PRId32, &i32) != 1) { + inst->max.int32 = INT32_MAX; + + mon_fsm_state_set (fsm, MON_S_STOPPED, inst); + } else { + inst->max.int32 = i32; + run_updater = 1; + } + } + if (inst->max_type == ICMAP_VALUETYPE_DOUBLE) { + if (sscanf(tmp_str, "%lf", &dbl) != 1) { + inst->max.dbl = INT32_MAX; + + mon_fsm_state_set (fsm, MON_S_STOPPED, inst); + } else { + inst->max.dbl = dbl; + run_updater = 1; + } + } + free(tmp_str); + } + + if (run_updater) { + mon_fsm_state_set (fsm, MON_S_RUNNING, inst); + /* + * run the updater, incase the period has shortened + * and to start the timer. + */ + inst->update_stats_fn (inst); + } +} + +void mon_resource_failed (struct cs_fsm* fsm, int32_t event, void * data) +{ + struct resource_instance * inst = (struct resource_instance *)data; + ENTER(); + mon_fsm_state_set (fsm, MON_S_FAILED, inst); +} + +static int32_t percent_mem_used_get(void) +{ + sg_mem_stats *mem_stats; + sg_swap_stats *swap_stats; + long long total, freemem; + +#ifdef HAVE_LIBSTATGRAB_GE_090 + mem_stats = sg_get_mem_stats(NULL); + swap_stats = sg_get_swap_stats(NULL); +#else + mem_stats = sg_get_mem_stats(); + swap_stats = sg_get_swap_stats(); +#endif + + if (mem_stats == NULL || swap_stats == NULL) { + log_printf (LOGSYS_LEVEL_ERROR, "Unable to get memory stats: %s", + sg_str_error(sg_get_error())); + return -1; + } + total = mem_stats->total + swap_stats->total; + freemem = mem_stats->free + swap_stats->free; + return ((total - freemem) * 100) / total; +} + +static void mem_update_stats_fn (void *data) +{ + struct resource_instance * inst = (struct resource_instance *)data; + int32_t new_value; + uint64_t timestamp; + char key_name[ICMAP_KEYNAME_MAXLEN]; + + new_value = percent_mem_used_get(); + if (new_value > 0) { + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "current"); + icmap_set_uint32(key_name, new_value); + + timestamp = cs_timestamp_get(); + + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "last_updated"); + icmap_set_uint64(key_name, timestamp); + + if (new_value > inst->max.int32 && inst->fsm.curr_state != MON_S_FAILED) { + cs_fsm_process (&inst->fsm, MON_E_FAILURE, inst, mon_fsm_cb); + } + } + api->timer_add_duration(inst->period * MILLI_2_NANO_SECONDS, + inst, inst->update_stats_fn, &inst->timer_handle); +} + +static double min15_loadavg_get(void) +{ + sg_load_stats *load_stats; + +#ifdef HAVE_LIBSTATGRAB_GE_090 + load_stats = sg_get_load_stats (NULL); +#else + load_stats = sg_get_load_stats (); +#endif + if (load_stats == NULL) { + log_printf (LOGSYS_LEVEL_ERROR, "Unable to get load stats: %s", + sg_str_error (sg_get_error())); + return -1; + } + return load_stats->min15; +} + +static void load_update_stats_fn (void *data) +{ + struct resource_instance * inst = (struct resource_instance *)data; + uint64_t timestamp; + char key_name[ICMAP_KEYNAME_MAXLEN]; + double min15 = min15_loadavg_get(); + + if (min15 > 0) { + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "current"); + icmap_set_double(key_name, min15); + + timestamp = cs_timestamp_get(); + + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "last_updated"); + icmap_set_uint64(key_name, timestamp); + + if (min15 > inst->max.dbl && inst->fsm.curr_state != MON_S_FAILED) { + cs_fsm_process (&inst->fsm, MON_E_FAILURE, inst, mon_fsm_cb); + } + } + + api->timer_add_duration(inst->period * MILLI_2_NANO_SECONDS, + inst, inst->update_stats_fn, &inst->timer_handle); +} + +static void mon_key_changed_cb ( + int32_t event, + const char *key_name, + struct icmap_notify_value new_value, + struct icmap_notify_value old_value, + void *user_data) +{ + struct resource_instance* inst = (struct resource_instance*)user_data; + char *last_key_part; + + if (event == ICMAP_TRACK_DELETE && inst) { + log_printf (LOGSYS_LEVEL_WARNING, + "resource \"%s\" deleted from cmap!", + inst->name); + + cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst, mon_fsm_cb); + } + + if (event == ICMAP_TRACK_MODIFY) { + last_key_part = strrchr(key_name, '.'); + if (last_key_part == NULL) + return ; + + last_key_part++; + if (strcmp(last_key_part, "max") == 0 || + strcmp(last_key_part, "poll_period") == 0) { + ENTER(); + cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst, mon_fsm_cb); + } + } +} + +static void mon_instance_init (struct resource_instance* inst) +{ + uint64_t tmp_value; + char key_name[ICMAP_KEYNAME_MAXLEN]; + icmap_track_t icmap_track = NULL; + char *tmp_str; + + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "current"); + if (inst->max_type == ICMAP_VALUETYPE_INT32) { + icmap_set_int32(key_name, 0); + } else { + icmap_set_double(key_name, 0); + } + + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "last_updated"); + icmap_set_uint64(key_name, 0); + + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "state"); + icmap_set_string(key_name, mon_stopped_str); + + inst->fsm.name = inst->name; + inst->fsm.curr_entry = 0; + inst->fsm.curr_state = MON_S_STOPPED; + inst->fsm.table = mon_fsm_table; + inst->fsm.entries = sizeof(mon_fsm_table) / sizeof(struct cs_fsm_entry); + inst->fsm.state_to_str = mon_res_state_to_str; + inst->fsm.event_to_str = mon_res_event_to_str; + + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "poll_period"); + if (icmap_get_string(key_name, &tmp_str) != CS_OK || + sscanf(tmp_str, "%"PRIu64, &tmp_value) != 1) { + icmap_set_uint64(key_name, inst->period); + } + else { + if (tmp_value >= MON_MIN_PERIOD && tmp_value <= MON_MAX_PERIOD) { + inst->period = tmp_value; + } else { + log_printf (LOGSYS_LEVEL_WARNING, + "Could NOT use poll_period:%"PRIu64" ms for resource %s", + tmp_value, inst->name); + } + free(tmp_str); + } + cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst, mon_fsm_cb); + + icmap_track_add(inst->icmap_path, + ICMAP_TRACK_ADD | ICMAP_TRACK_MODIFY | ICMAP_TRACK_DELETE | ICMAP_TRACK_PREFIX, + mon_key_changed_cb, inst, &icmap_track); +} + +static char *mon_exec_init_fn (struct corosync_api_v1 *corosync_api) +{ +#ifdef HAVE_LIBSTATGRAB_GE_090 + sg_init(1); +#else + sg_init(); +#endif + + api = corosync_api; + + mon_instance_init (&memory_used_inst); + mon_instance_init (&load_15min_inst); + + return NULL; +} + + diff --git a/exec/pload.c b/exec/pload.c new file mode 100644 index 0000000..206338d --- /dev/null +++ b/exec/pload.c @@ -0,0 +1,357 @@ +/* + * Copyright (c) 2008-2012 Red Hat, Inc. + * + * All rights reserved. + * + * Authors: Steven Dake (sdake@redhat.com) + * Fabio M. Di Nitto (fdinitto@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <config.h> + +#include <qb/qblist.h> +#include <qb/qbutil.h> +#include <qb/qbipc_common.h> + +#include <corosync/swab.h> +#include <corosync/corodefs.h> +#include <corosync/coroapi.h> +#include <corosync/icmap.h> +#include <corosync/logsys.h> + +#include "service.h" +#include "util.h" + +LOGSYS_DECLARE_SUBSYS ("PLOAD"); + +/* + * Service Interfaces required by service_message_handler struct + */ +static struct corosync_api_v1 *api; + +static char *pload_exec_init_fn (struct corosync_api_v1 *corosync_api); + +/* + * on wire / network bits + */ +enum pload_exec_message_req_types { + MESSAGE_REQ_EXEC_PLOAD_START = 0, + MESSAGE_REQ_EXEC_PLOAD_MCAST = 1 +}; + +struct req_exec_pload_start { + struct qb_ipc_request_header header; + uint32_t msg_count; + uint32_t msg_size; +}; + +struct req_exec_pload_mcast { + struct qb_ipc_request_header header; +}; + +static void message_handler_req_exec_pload_start (const void *msg, + unsigned int nodeid); +static void req_exec_pload_start_endian_convert (void *msg); + +static void message_handler_req_exec_pload_mcast (const void *msg, + unsigned int nodeid); +static void req_exec_pload_mcast_endian_convert (void *msg); + +static struct corosync_exec_handler pload_exec_engine[] = +{ + { + .exec_handler_fn = message_handler_req_exec_pload_start, + .exec_endian_convert_fn = req_exec_pload_start_endian_convert + }, + { + .exec_handler_fn = message_handler_req_exec_pload_mcast, + .exec_endian_convert_fn = req_exec_pload_mcast_endian_convert + } +}; + +/* + * internal bits and pieces + */ + +/* + * really unused buffer but we need to give something to iovec + */ +static char *buffer = NULL; + +/* + * wanted/size come from config + * sent/delivered track the runtime status + */ +static uint32_t msgs_wanted = 0; +static uint32_t msg_size = 0; +static uint32_t msgs_sent = 0; +static uint32_t msgs_delivered = 0; + +/* + * bit flip to track if we are running or not and avoid multiple instances + */ +static uint8_t pload_started = 0; + +/* + * handle for scheduler + */ +static hdb_handle_t start_mcasting_handle; + +/* + * timing/profiling + */ +static unsigned long long int tv1; +static unsigned long long int tv2; +static unsigned long long int tv_elapsed; + +/* + * Service engine hooks + */ +struct corosync_service_engine pload_service_engine = { + .name = "corosync profile loading service", + .id = PLOAD_SERVICE, + .priority = 1, + .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED, + .exec_engine = pload_exec_engine, + .exec_engine_count = sizeof (pload_exec_engine) / sizeof (struct corosync_exec_handler), + .exec_init_fn = pload_exec_init_fn +}; + +struct corosync_service_engine *pload_get_service_engine_ver0 (void) +{ + return (&pload_service_engine); +} + +/* + * internal use only functions + */ + +/* + * not all architectures / OSes define timersub in sys/time.h or time.h + */ + +#ifndef timersub +#warning Using internal timersub definition. Check your include header files +#define timersub(a, b, result) \ +do { \ + (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \ + (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \ + if ((result)->tv_usec < 0) { \ + --(result)->tv_sec; \ + (result)->tv_usec += 1000000; \ + } \ +} while (0) +#endif /* timersub */ + +/* + * tell all cluster nodes to start mcasting + */ +static void pload_send_start (uint32_t count, uint32_t size) +{ + struct req_exec_pload_start req_exec_pload_start; + struct iovec iov; + + req_exec_pload_start.header.id = SERVICE_ID_MAKE (PLOAD_SERVICE, MESSAGE_REQ_EXEC_PLOAD_START); + req_exec_pload_start.msg_count = count; + req_exec_pload_start.msg_size = size; + iov.iov_base = (void *)&req_exec_pload_start; + iov.iov_len = sizeof (struct req_exec_pload_start); + + api->totem_mcast (&iov, 1, TOTEM_AGREED); +} + +/* + * send N empty data messages of size X + */ +static int pload_send_message (const void *arg) +{ + struct req_exec_pload_mcast req_exec_pload_mcast; + struct iovec iov[2]; + unsigned int res; + unsigned int iov_len = 1; + + req_exec_pload_mcast.header.id = SERVICE_ID_MAKE (PLOAD_SERVICE, MESSAGE_REQ_EXEC_PLOAD_MCAST); + req_exec_pload_mcast.header.size = sizeof (struct req_exec_pload_mcast) + msg_size; + + iov[0].iov_base = (void *)&req_exec_pload_mcast; + iov[0].iov_len = sizeof (struct req_exec_pload_mcast); + if (msg_size > sizeof (req_exec_pload_mcast)) { + iov[1].iov_base = &buffer; + iov[1].iov_len = msg_size - sizeof (req_exec_pload_mcast); + iov_len = 2; + } + + do { + res = api->totem_mcast (iov, iov_len, TOTEM_AGREED); + if (res == -1) { + break; + } else { + msgs_sent++; + } + } while (msgs_sent < msgs_wanted); + + if (msgs_sent == msgs_wanted) { + return (0); + } else { + return (-1); + } +} + +/* + * hook into icmap to read config at runtime + * we do NOT start by default, ever! + */ +static void pload_read_config( + int32_t event, + const char *key_name, + struct icmap_notify_value new_val, + struct icmap_notify_value old_val, + void *user_data) +{ + uint32_t pload_count = 1500000; + uint32_t pload_size = 300; + char *pload_start = NULL; + + icmap_get_uint32("pload.count", &pload_count); + icmap_get_uint32("pload.size", &pload_size); + + if (pload_size > MESSAGE_SIZE_MAX) { + pload_size = MESSAGE_SIZE_MAX; + log_printf(LOGSYS_LEVEL_WARNING, "pload size limited to %u", pload_size); + } + + if ((!pload_started) && + (icmap_get_string("pload.start", &pload_start) == CS_OK)) { + if (!strcmp(pload_start, + "i_totally_understand_pload_will_crash_my_cluster_and_kill_corosync_on_exit")) { + buffer = malloc(pload_size); + if (buffer) { + log_printf(LOGSYS_LEVEL_WARNING, "Starting pload!"); + pload_send_start(pload_count, pload_size); + } else { + log_printf(LOGSYS_LEVEL_WARNING, + "Unable to allocate pload buffer!"); + } + } + free(pload_start); + } +} + +/* + * exec functions + */ +static char *pload_exec_init_fn (struct corosync_api_v1 *corosync_api) +{ + icmap_track_t pload_track = NULL; + + api = corosync_api; + + /* + * track changes to pload config and start only on demand + */ + if (icmap_track_add("pload.", + ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX, + pload_read_config, + NULL, + &pload_track) != CS_OK) { + return (char *)"Unable to setup pload config tracking!\n"; + } + + return NULL; +} + +/* + * network messages/onwire handlers + */ + +static void req_exec_pload_start_endian_convert (void *msg) +{ + struct req_exec_pload_start *req_exec_pload_start = msg; + + req_exec_pload_start->msg_count = swab32(req_exec_pload_start->msg_count); + req_exec_pload_start->msg_size = swab32(req_exec_pload_start->msg_size); +} + +static void message_handler_req_exec_pload_start ( + const void *msg, + unsigned int nodeid) +{ + const struct req_exec_pload_start *req_exec_pload_start = msg; + + /* + * don't start multiple instances + */ + if (pload_started) { + return; + } + + pload_started = 1; + + msgs_wanted = req_exec_pload_start->msg_count; + msg_size = req_exec_pload_start->msg_size; + + api->schedwrk_create ( + &start_mcasting_handle, + pload_send_message, + &start_mcasting_handle); +} + +static void req_exec_pload_mcast_endian_convert (void *msg) +{ +} + +static void message_handler_req_exec_pload_mcast ( + const void *msg, + unsigned int nodeid) +{ + char log_buffer[1024]; + + if (msgs_delivered == 0) { + tv1 = qb_util_nano_current_get (); + } + msgs_delivered += 1; + if (msgs_delivered == msgs_wanted) { + tv2 = qb_util_nano_current_get (); + tv_elapsed = tv2 - tv1; + sprintf (log_buffer, "%5d Writes %d bytes per write %7.3f seconds runtime, %9.3f TP/S, %9.3f MB/S.", + msgs_delivered, + msg_size, + (tv_elapsed / 1000000000.0), + ((float)msgs_delivered) / (tv_elapsed / 1000000000.0), + (((float)msgs_delivered) * ((float)msg_size) / + (tv_elapsed / 1000000000.0)) / (1024.0 * 1024.0)); + log_printf (LOGSYS_LEVEL_NOTICE, "%s", log_buffer); + log_printf (LOGSYS_LEVEL_WARNING, "Stopping corosync the hard way"); + if (buffer) { + free(buffer); + buffer = NULL; + } + exit(COROSYNC_DONE_PLOAD); + } +} diff --git a/exec/quorum.c b/exec/quorum.c new file mode 100644 index 0000000..323a15f --- /dev/null +++ b/exec/quorum.c @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2008-2012 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Christine Caulfield (ccaulfie@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <config.h> + +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <sys/ioctl.h> +#include <netinet/in.h> +#include <sys/uio.h> +#include <unistd.h> +#include <fcntl.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <time.h> +#include <arpa/inet.h> + +#include <corosync/corotypes.h> +#include <corosync/swab.h> +#include <corosync/totem/totempg.h> +#include <corosync/totem/totem.h> +#include <corosync/logsys.h> + +#include "quorum.h" +#include "main.h" +#include "vsf.h" + +LOGSYS_DECLARE_SUBSYS ("QUORUM"); + +static struct quorum_callin_functions *corosync_quorum_fns = NULL; + +int corosync_quorum_is_quorate (void) +{ + if (corosync_quorum_fns) { + return corosync_quorum_fns->quorate(); + } + else { + return 1; + } +} + +int corosync_quorum_register_callback (quorum_callback_fn_t fn, void *context) +{ + if (corosync_quorum_fns) { + return corosync_quorum_fns->register_callback(fn, context); + } + else { + return 0; + } +} + +int corosync_quorum_unregister_callback (quorum_callback_fn_t fn, void *context) +{ + if (corosync_quorum_fns) { + return corosync_quorum_fns->unregister_callback(fn, context); + } + else { + return 0; + } +} + +int corosync_quorum_initialize (struct quorum_callin_functions *fns) +{ + if (corosync_quorum_fns) + return -1; + + corosync_quorum_fns = fns; + return 0; +} + +int quorum_none(void) +{ + if (corosync_quorum_fns) + return 0; + else + return 1; +} diff --git a/exec/quorum.h b/exec/quorum.h new file mode 100644 index 0000000..bd99ee5 --- /dev/null +++ b/exec/quorum.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2008-2012 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Christine Caulfield (ccaulfie@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the Red Hat, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef QUORUM_H_DEFINED +#define QUORUM_H_DEFINED + +struct memb_ring_id; + +typedef void (*quorum_callback_fn_t) (int quorate, void *context); + +typedef void (*quorum_set_quorate_fn_t) (const unsigned int *view_list, + size_t view_list_entries, + int quorate, struct memb_ring_id *); + +struct quorum_callin_functions +{ + int (*quorate) (void); + int (*register_callback) (quorum_callback_fn_t, void*); + int (*unregister_callback) (quorum_callback_fn_t, void*); +}; + +extern int corosync_quorum_is_quorate (void); + +extern int corosync_quorum_register_callback (quorum_callback_fn_t fn, void *context); + +extern int corosync_quorum_unregister_callback (quorum_callback_fn_t fn, void *context); + +extern int corosync_quorum_initialize (struct quorum_callin_functions *fns); + + +extern int quorum_none(void); + + +#endif /* QUORUM_H_DEFINED */ diff --git a/exec/schedwrk.c b/exec/schedwrk.c new file mode 100644 index 0000000..3f5d424 --- /dev/null +++ b/exec/schedwrk.c @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2009-2010 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <config.h> +#include <corosync/totem/totempg.h> +#include <corosync/hdb.h> +#include "schedwrk.h" + +static void (*serialize_lock) (void); +static void (*serialize_unlock) (void); + +DECLARE_HDB_DATABASE (schedwrk_instance_database,NULL); + +struct schedwrk_instance { + int (*schedwrk_fn) (const void *); + const void *context; + void *callback_handle; + int lock; +}; + +static int schedwrk_do (enum totem_callback_token_type type, const void *context) +{ + hdb_handle_t handle = *((hdb_handle_t *)context); + struct schedwrk_instance *instance; + int res; + + res = hdb_handle_get (&schedwrk_instance_database, + handle, + (void *)&instance); + if (res != 0) { + goto error_exit; + } + + if (instance->lock) + serialize_lock (); + + res = instance->schedwrk_fn (instance->context); + + if (instance->lock) + serialize_unlock (); + + if (res == 0) { + hdb_handle_destroy (&schedwrk_instance_database, handle); + } + hdb_handle_put (&schedwrk_instance_database, handle); + return (res); + +error_exit: + return (-1); +} + +void schedwrk_init ( + void (*serialize_lock_fn) (void), + void (*serialize_unlock_fn) (void)) +{ + serialize_lock = serialize_lock_fn; + serialize_unlock = serialize_unlock_fn; +} + +static int schedwrk_internal_create ( + hdb_handle_t *handle, + int (schedwrk_fn) (const void *), + const void *context, + int lock) +{ + struct schedwrk_instance *instance; + int res; + + res = hdb_handle_create (&schedwrk_instance_database, + sizeof (struct schedwrk_instance), handle); + if (res != 0) { + goto error_exit; + } + res = hdb_handle_get (&schedwrk_instance_database, *handle, + (void *)&instance); + if (res != 0) { + goto error_destroy; + } + + totempg_callback_token_create ( + &instance->callback_handle, + TOTEM_CALLBACK_TOKEN_SENT, + 1, + schedwrk_do, + handle); + + instance->schedwrk_fn = schedwrk_fn; + instance->context = context; + instance->lock = lock; + + hdb_handle_put (&schedwrk_instance_database, *handle); + + return (0); + +error_destroy: + hdb_handle_destroy (&schedwrk_instance_database, *handle); + +error_exit: + return (-1); +} + +/* + * handle pointer is internally used by totempg_callback_token_create. To make schedwrk work, + * handle must be pointer to ether heap or .text or static memory (not stack) which is not + * changed by caller. + */ +int schedwrk_create ( + hdb_handle_t *handle, + int (schedwrk_fn) (const void *), + const void *context) +{ + return schedwrk_internal_create (handle, schedwrk_fn, context, 1); +} + +int schedwrk_create_nolock ( + hdb_handle_t *handle, + int (schedwrk_fn) (const void *), + const void *context) +{ + return schedwrk_internal_create (handle, schedwrk_fn, context, 0); +} + +void schedwrk_destroy (hdb_handle_t handle) +{ + hdb_handle_destroy (&schedwrk_instance_database, handle); +} diff --git a/exec/schedwrk.h b/exec/schedwrk.h new file mode 100644 index 0000000..32ea646 --- /dev/null +++ b/exec/schedwrk.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2009-2010 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef SCHEDWRK_H_DEFINED +#define SCHEDWRK_H_DEFINED + +extern void schedwrk_init ( + void (*serialize_lock_fn) (void), + void (*serialize_unlock_fn) (void)); + +extern int schedwrk_create ( + hdb_handle_t *handle, + int (schedwrk_fn) (const void *), + const void *context); + +extern int schedwrk_create_nolock ( + hdb_handle_t *handle, + int (schedwrk_fn) (const void *), + const void *context); + +extern void schedwrk_destroy (hdb_handle_t handle); + +#endif /* SCHEDWRK_H_DEFINED */ diff --git a/exec/service.c b/exec/service.c new file mode 100644 index 0000000..fdd16d9 --- /dev/null +++ b/exec/service.c @@ -0,0 +1,468 @@ +/* + * Copyright (c) 2006 MontaVista Software, Inc. + * Copyright (c) 2006-2012 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <config.h> + +#include <stdlib.h> +#include <string.h> + +#include <corosync/swab.h> +#include <corosync/totem/totem.h> + +#include <corosync/corotypes.h> +#include "util.h" +#include <corosync/logsys.h> +#include <corosync/icmap.h> + +#include "timer.h" +#include <corosync/totem/totempg.h> +#include <corosync/totem/totemip.h> +#include "main.h" +#include "service.h" + +#include <qb/qbipcs.h> +#include <qb/qbloop.h> + +LOGSYS_DECLARE_SUBSYS ("SERV"); + +static struct default_service default_services[] = { + { + .name = "corosync_cmap", + .ver = 0, + .loader = cmap_get_service_engine_ver0 + }, + { + .name = "corosync_cfg", + .ver = 0, + .loader = cfg_get_service_engine_ver0 + }, + { + .name = "corosync_cpg", + .ver = 0, + .loader = cpg_get_service_engine_ver0 + }, + { + .name = "corosync_pload", + .ver = 0, + .loader = pload_get_service_engine_ver0 + }, +#ifdef HAVE_MONITORING + { + .name = "corosync_mon", + .ver = 0, + .loader = mon_get_service_engine_ver0 + }, +#endif +#ifdef HAVE_WATCHDOG + { + .name = "corosync_wd", + .ver = 0, + .loader = wd_get_service_engine_ver0 + }, +#endif + { + .name = "corosync_quorum", + .ver = 0, + .loader = vsf_quorum_get_service_engine_ver0 + }, +}; + +/* + * service exit and unlink schedwrk handler data structure + */ +struct seus_handler_data { + int service_engine; + struct corosync_api_v1 *api; +}; + +struct corosync_service_engine *corosync_service[SERVICES_COUNT_MAX]; + +const char *service_stats_rx[SERVICES_COUNT_MAX][SERVICE_HANDLER_MAXIMUM_COUNT]; +const char *service_stats_tx[SERVICES_COUNT_MAX][SERVICE_HANDLER_MAXIMUM_COUNT]; + +static void (*service_unlink_all_complete) (void) = NULL; + +char *corosync_service_link_and_init ( + struct corosync_api_v1 *corosync_api, + struct default_service *service) +{ + struct corosync_service_engine *service_engine; + int fn; + char *name_sufix; + char key_name[ICMAP_KEYNAME_MAXLEN]; + char *init_result; + + /* + * Initialize service + */ + service_engine = service->loader(); + + corosync_service[service_engine->id] = service_engine; + + if (service_engine->config_init_fn) { + service_engine->config_init_fn (corosync_api); + } + + if (service_engine->exec_init_fn) { + init_result = service_engine->exec_init_fn (corosync_api); + if (init_result) { + return (init_result); + } + } + + /* + * Store service in cmap db + */ + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "internal_configuration.service.%u.name", service_engine->id); + icmap_set_string(key_name, service->name); + + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "internal_configuration.service.%u.ver", service_engine->id); + icmap_set_uint32(key_name, service->ver); + + name_sufix = strrchr (service->name, '_'); + if (name_sufix) + name_sufix++; + else + name_sufix = (char*)service->name; + + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "runtime.services.%s.service_id", name_sufix); + icmap_set_uint16(key_name, service_engine->id); + + for (fn = 0; fn < service_engine->exec_engine_count; fn++) { + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "runtime.services.%s.%d.tx", name_sufix, fn); + icmap_set_uint64(key_name, 0); + service_stats_tx[service_engine->id][fn] = strdup(key_name); + + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "runtime.services.%s.%d.rx", name_sufix, fn); + icmap_set_uint64(key_name, 0); + service_stats_rx[service_engine->id][fn] = strdup(key_name); + } + + log_printf (LOGSYS_LEVEL_NOTICE, + "Service engine loaded: %s [%d]", service_engine->name, service_engine->id); + init_result = (char *)cs_ipcs_service_init(service_engine); + if (init_result != NULL) { + return (init_result); + } + + return NULL; +} + +static int service_priority_max(void) +{ + int lpc = 0, max = 0; + for(; lpc < SERVICES_COUNT_MAX; lpc++) { + if(corosync_service[lpc] != NULL && corosync_service[lpc]->priority > max) { + max = corosync_service[lpc]->priority; + } + } + return max; +} + +/* + * use the force + */ +static unsigned int +corosync_service_unlink_and_exit_priority ( + struct corosync_api_v1 *corosync_api, + int lowest_priority, + int *current_priority, + int *current_service_engine) +{ + unsigned short service_id; + int res; + + for(; *current_priority >= lowest_priority; *current_priority = *current_priority - 1) { + for(*current_service_engine = 0; + *current_service_engine < SERVICES_COUNT_MAX; + *current_service_engine = *current_service_engine + 1) { + + if(corosync_service[*current_service_engine] == NULL || + corosync_service[*current_service_engine]->priority != *current_priority) { + continue; + } + + /* + * find service handle and unload it if possible. + * + * If the service engine's exec_exit_fn returns -1 indicating + * it was busy, this function returns -1 and can be called again + * at a later time (usually via the schedwrk api). + */ + service_id = corosync_service[*current_service_engine]->id; + + if (corosync_service[service_id]->exec_exit_fn) { + res = corosync_service[service_id]->exec_exit_fn (); + if (res == -1) { + return (-1); + } + } + + /* + * Exit all ipc connections dependent on this service + */ + cs_ipcs_service_destroy (*current_service_engine); + + log_printf(LOGSYS_LEVEL_NOTICE, + "Service engine unloaded: %s", + corosync_service[*current_service_engine]->name); + + corosync_service[*current_service_engine] = NULL; + + /* + * Call should call this function again + */ + return (1); + } + } + /* + * We finish unlink of all services -> no need to call this function again + */ + return (0); +} + +static unsigned int service_unlink_and_exit ( + struct corosync_api_v1 *corosync_api, + const char *service_name, + unsigned int service_ver) +{ + unsigned short service_id; + char *name_sufix; + int res; + const char *iter_key_name; + icmap_iter_t iter; + char key_name[ICMAP_KEYNAME_MAXLEN]; + unsigned int found_service_ver; + char *found_service_name; + int service_found; + + name_sufix = strrchr (service_name, '_'); + if (name_sufix) + name_sufix++; + else + name_sufix = (char*)service_name; + + + service_found = 0; + found_service_name = NULL; + iter = icmap_iter_init("internal_configuration.service."); + while ((iter_key_name = icmap_iter_next(iter, NULL, NULL)) != NULL) { + res = sscanf(iter_key_name, "internal_configuration.service.%hu.%s", &service_id, key_name); + if (res != 2) { + continue; + } + + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "internal_configuration.service.%hu.name", service_id); + if (icmap_get_string(key_name, &found_service_name) != CS_OK) { + continue; + } + + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "internal_configuration.service.%u.ver", service_id); + if (icmap_get_uint32(key_name, &found_service_ver) != CS_OK) { + free(found_service_name); + continue; + } + + if (service_ver == found_service_ver && strcmp(found_service_name, service_name) == 0) { + free(found_service_name); + service_found = 1; + break; + } + free(found_service_name); + } + icmap_iter_finalize(iter); + + if (service_found && service_id < SERVICES_COUNT_MAX + && corosync_service[service_id] != NULL) { + + if (corosync_service[service_id]->exec_exit_fn) { + res = corosync_service[service_id]->exec_exit_fn (); + if (res == -1) { + return (-1); + } + } + + log_printf(LOGSYS_LEVEL_NOTICE, + "Service engine unloaded: %s", + corosync_service[service_id]->name); + + corosync_service[service_id] = NULL; + + cs_ipcs_service_destroy (service_id); + + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "internal_configuration.service.%u.handle", service_id); + icmap_delete(key_name); + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "internal_configuration.service.%u.name", service_id); + icmap_delete(key_name); + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "internal_configuration.service.%u.ver", service_id); + icmap_delete(key_name); + } + + return (0); +} + +/* + * Links default services into the executive + */ +unsigned int corosync_service_defaults_link_and_init (struct corosync_api_v1 *corosync_api) +{ + unsigned int i; + char *error; + + for (i = 0; + i < sizeof (default_services) / sizeof (struct default_service); i++) { + + default_services[i].loader(); + error = corosync_service_link_and_init ( + corosync_api, + &default_services[i]); + if (error) { + log_printf(LOGSYS_LEVEL_ERROR, + "Service engine '%s' failed to load for reason '%s'", + default_services[i].name, + error); + corosync_exit_error (COROSYNC_DONE_SERVICE_ENGINE_INIT); + } + } + + return (0); +} + +static void service_exit_schedwrk_handler (void *data) { + int res; + static int current_priority = 0; + static int current_service_engine = 0; + static int called = 0; + struct seus_handler_data *cb_data = (struct seus_handler_data *)data; + struct corosync_api_v1 *api = (struct corosync_api_v1 *)cb_data->api; + + if (called == 0) { + log_printf(LOGSYS_LEVEL_NOTICE, + "Unloading all Corosync service engines."); + current_priority = service_priority_max (); + called = 1; + } + + res = corosync_service_unlink_and_exit_priority ( + api, + 0, + ¤t_priority, + ¤t_service_engine); + if (res == 0) { + service_unlink_all_complete(); + return; + } + + qb_loop_job_add(cs_poll_handle_get(), + QB_LOOP_HIGH, + data, + service_exit_schedwrk_handler); +} + +void corosync_service_unlink_all ( + struct corosync_api_v1 *api, + void (*unlink_all_complete) (void)) +{ + static int called = 0; + static struct seus_handler_data cb_data; + + assert (api); + + service_unlink_all_complete = unlink_all_complete; + + if (called) { + return; + } + if (called == 0) { + called = 1; + } + + cb_data.api = api; + + qb_loop_job_add(cs_poll_handle_get(), + QB_LOOP_HIGH, + &cb_data, + service_exit_schedwrk_handler); +} + +struct service_unlink_and_exit_data { + hdb_handle_t handle; + struct corosync_api_v1 *api; + const char *name; + unsigned int ver; +}; + +static void service_unlink_and_exit_schedwrk_handler (void *data) +{ + struct service_unlink_and_exit_data *service_unlink_and_exit_data = + data; + int res; + + res = service_unlink_and_exit ( + service_unlink_and_exit_data->api, + service_unlink_and_exit_data->name, + service_unlink_and_exit_data->ver); + + if (res == 0) { + free (service_unlink_and_exit_data); + } else { + qb_loop_job_add(cs_poll_handle_get(), + QB_LOOP_HIGH, + data, + service_unlink_and_exit_schedwrk_handler); + } +} + +typedef int (*schedwrk_cast) (const void *); + +unsigned int corosync_service_unlink_and_exit ( + struct corosync_api_v1 *api, + const char *service_name, + unsigned int service_ver) +{ + struct service_unlink_and_exit_data *service_unlink_and_exit_data; + + assert (api); + service_unlink_and_exit_data = malloc (sizeof (struct service_unlink_and_exit_data)); + service_unlink_and_exit_data->api = api; + service_unlink_and_exit_data->name = strdup (service_name); + service_unlink_and_exit_data->ver = service_ver; + + qb_loop_job_add(cs_poll_handle_get(), + QB_LOOP_HIGH, + service_unlink_and_exit_data, + service_unlink_and_exit_schedwrk_handler); + return (0); +} diff --git a/exec/service.h b/exec/service.h new file mode 100644 index 0000000..12fd751 --- /dev/null +++ b/exec/service.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2002-2006 MontaVista Software, Inc. + * Copyright (c) 2006-2012 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef COROSYNC_SERVICE_H_DEFINED +#define COROSYNC_SERVICE_H_DEFINED + +#include <corosync/hdb.h> + +struct corosync_api_v1; + +struct default_service { + const char *name; + int ver; + struct corosync_service_engine *(*loader)(void); +}; + +/** + * Link and initialize a service + */ +char *corosync_service_link_and_init ( + struct corosync_api_v1 *objdb, + struct default_service *service_engine); + +/** + * Unlink and exit a service + */ +extern unsigned int corosync_service_unlink_and_exit ( + struct corosync_api_v1 *objdb, + const char *service_name, + unsigned int service_ver); + +/** + * Unlink and exit all corosync services + */ +extern void corosync_service_unlink_all ( + struct corosync_api_v1 *api, + void (*unlink_all_complete) (void)); + +/** + * Load all of the default services + */ +extern unsigned int corosync_service_defaults_link_and_init ( + struct corosync_api_v1 *objdb); + +extern struct corosync_service_engine *corosync_service[]; + +extern const char *service_stats_rx[SERVICES_COUNT_MAX][SERVICE_HANDLER_MAXIMUM_COUNT]; +extern const char *service_stats_tx[SERVICES_COUNT_MAX][SERVICE_HANDLER_MAXIMUM_COUNT]; + +struct corosync_service_engine *votequorum_get_service_engine_ver0 (void); +struct corosync_service_engine *vsf_quorum_get_service_engine_ver0 (void); +struct corosync_service_engine *quorum_get_service_handler_ver0 (void); +struct corosync_service_engine *pload_get_service_engine_ver0 (void); +struct corosync_service_engine *cfg_get_service_engine_ver0 (void); +struct corosync_service_engine *cpg_get_service_engine_ver0 (void); +struct corosync_service_engine *mon_get_service_engine_ver0 (void); +struct corosync_service_engine *wd_get_service_engine_ver0 (void); +struct corosync_service_engine *cmap_get_service_engine_ver0 (void); + +#endif /* SERVICE_H_DEFINED */ diff --git a/exec/stats.c b/exec/stats.c new file mode 100644 index 0000000..d9fd115 --- /dev/null +++ b/exec/stats.c @@ -0,0 +1,784 @@ +/* + * Copyright (c) 2017-2020 Red Hat, Inc. + * + * All rights reserved. + * + * Authors: Christine Caulfield (ccaulfie@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <config.h> + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdint.h> +#include <stddef.h> +#include <unistd.h> +#include <libknet.h> + +#include <qb/qblist.h> +#include <qb/qbipcs.h> +#include <qb/qbipc_common.h> + +#include <corosync/corodefs.h> +#include <corosync/coroapi.h> +#include <corosync/logsys.h> +#include <corosync/icmap.h> +#include <corosync/totem/totemstats.h> + +#include "util.h" +#include "ipcs_stats.h" +#include "stats.h" + +LOGSYS_DECLARE_SUBSYS ("STATS"); + +static qb_map_t *stats_map; + +/* Structure of an element in the schedmiss array */ +struct schedmiss_entry { + uint64_t timestamp; + float delay; +}; +#define MAX_SCHEDMISS_EVENTS 10 +static struct schedmiss_entry schedmiss_event[MAX_SCHEDMISS_EVENTS]; +static unsigned int highest_schedmiss_event; + +#define SCHEDMISS_PREFIX "stats.schedmiss" + +/* Convert iterator number to text and a stats pointer */ +struct cs_stats_conv { + enum {STAT_PG, STAT_SRP, STAT_KNET, STAT_KNET_HANDLE, STAT_IPCSC, STAT_IPCSG, STAT_SCHEDMISS} type; + const char *name; + const size_t offset; + const icmap_value_types_t value_type; +}; + +struct cs_stats_conv cs_pg_stats[] = { + { STAT_PG, "msg_queue_avail", offsetof(totempg_stats_t, msg_queue_avail), ICMAP_VALUETYPE_UINT32}, + { STAT_PG, "msg_reserved", offsetof(totempg_stats_t, msg_reserved), ICMAP_VALUETYPE_UINT32}, +}; +struct cs_stats_conv cs_srp_stats[] = { + { STAT_SRP, "orf_token_tx", offsetof(totemsrp_stats_t, orf_token_tx), ICMAP_VALUETYPE_UINT64}, + { STAT_SRP, "orf_token_rx", offsetof(totemsrp_stats_t, orf_token_rx), ICMAP_VALUETYPE_UINT64}, + { STAT_SRP, "memb_merge_detect_tx", offsetof(totemsrp_stats_t, memb_merge_detect_tx), ICMAP_VALUETYPE_UINT64}, + { STAT_SRP, "memb_merge_detect_rx", offsetof(totemsrp_stats_t, memb_merge_detect_rx), ICMAP_VALUETYPE_UINT64}, + { STAT_SRP, "memb_join_tx", offsetof(totemsrp_stats_t, memb_join_tx), ICMAP_VALUETYPE_UINT64}, + { STAT_SRP, "memb_join_rx", offsetof(totemsrp_stats_t, memb_join_rx), ICMAP_VALUETYPE_UINT64}, + { STAT_SRP, "mcast_tx", offsetof(totemsrp_stats_t, mcast_tx), ICMAP_VALUETYPE_UINT64}, + { STAT_SRP, "mcast_retx", offsetof(totemsrp_stats_t, mcast_retx), ICMAP_VALUETYPE_UINT64}, + { STAT_SRP, "mcast_rx", offsetof(totemsrp_stats_t, mcast_rx), ICMAP_VALUETYPE_UINT64}, + { STAT_SRP, "memb_commit_token_tx", offsetof(totemsrp_stats_t, memb_commit_token_tx), ICMAP_VALUETYPE_UINT64}, + { STAT_SRP, "memb_commit_token_rx", offsetof(totemsrp_stats_t, memb_commit_token_rx), ICMAP_VALUETYPE_UINT64}, + { STAT_SRP, "token_hold_cancel_tx", offsetof(totemsrp_stats_t, token_hold_cancel_tx), ICMAP_VALUETYPE_UINT64}, + { STAT_SRP, "token_hold_cancel_rx", offsetof(totemsrp_stats_t, token_hold_cancel_rx), ICMAP_VALUETYPE_UINT64}, + { STAT_SRP, "operational_entered", offsetof(totemsrp_stats_t, operational_entered), ICMAP_VALUETYPE_UINT64}, + { STAT_SRP, "operational_token_lost", offsetof(totemsrp_stats_t, operational_token_lost), ICMAP_VALUETYPE_UINT64}, + { STAT_SRP, "gather_entered", offsetof(totemsrp_stats_t, gather_entered), ICMAP_VALUETYPE_UINT64}, + { STAT_SRP, "gather_token_lost", offsetof(totemsrp_stats_t, gather_token_lost), ICMAP_VALUETYPE_UINT64}, + { STAT_SRP, "commit_entered", offsetof(totemsrp_stats_t, commit_entered), ICMAP_VALUETYPE_UINT64}, + { STAT_SRP, "commit_token_lost", offsetof(totemsrp_stats_t, commit_token_lost), ICMAP_VALUETYPE_UINT64}, + { STAT_SRP, "recovery_entered", offsetof(totemsrp_stats_t, recovery_entered), ICMAP_VALUETYPE_UINT64}, + { STAT_SRP, "recovery_token_lost", offsetof(totemsrp_stats_t, recovery_token_lost), ICMAP_VALUETYPE_UINT64}, + { STAT_SRP, "consensus_timeouts", offsetof(totemsrp_stats_t, consensus_timeouts), ICMAP_VALUETYPE_UINT64}, + { STAT_SRP, "rx_msg_dropped", offsetof(totemsrp_stats_t, rx_msg_dropped), ICMAP_VALUETYPE_UINT64}, + { STAT_SRP, "time_since_token_last_received", offsetof(totemsrp_stats_t, time_since_token_last_received), ICMAP_VALUETYPE_UINT64}, + { STAT_SRP, "continuous_gather", offsetof(totemsrp_stats_t, continuous_gather), ICMAP_VALUETYPE_UINT32}, + { STAT_SRP, "continuous_sendmsg_failures", offsetof(totemsrp_stats_t, continuous_sendmsg_failures), ICMAP_VALUETYPE_UINT32}, + { STAT_SRP, "firewall_enabled_or_nic_failure", offsetof(totemsrp_stats_t, firewall_enabled_or_nic_failure), ICMAP_VALUETYPE_UINT8}, + { STAT_SRP, "mtt_rx_token", offsetof(totemsrp_stats_t, mtt_rx_token), ICMAP_VALUETYPE_UINT32}, + { STAT_SRP, "avg_token_workload", offsetof(totemsrp_stats_t, avg_token_workload), ICMAP_VALUETYPE_UINT32}, + { STAT_SRP, "avg_backlog_calc", offsetof(totemsrp_stats_t, avg_backlog_calc), ICMAP_VALUETYPE_UINT32}, +}; + +struct cs_stats_conv cs_knet_stats[] = { + { STAT_KNET, "enabled", offsetof(struct knet_link_status, enabled), ICMAP_VALUETYPE_UINT8}, + { STAT_KNET, "connected", offsetof(struct knet_link_status, connected), ICMAP_VALUETYPE_UINT8}, + { STAT_KNET, "mtu", offsetof(struct knet_link_status, mtu), ICMAP_VALUETYPE_UINT32}, + { STAT_KNET, "tx_data_packets", offsetof(struct knet_link_status, stats.tx_data_packets), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET, "rx_data_packets", offsetof(struct knet_link_status, stats.rx_data_packets), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET, "tx_data_bytes", offsetof(struct knet_link_status, stats.tx_data_bytes), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET, "rx_data_bytes", offsetof(struct knet_link_status, stats.rx_data_bytes), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET, "tx_ping_packets", offsetof(struct knet_link_status, stats.tx_ping_packets), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET, "rx_ping_packets", offsetof(struct knet_link_status, stats.rx_ping_packets), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET, "tx_ping_bytes", offsetof(struct knet_link_status, stats.tx_ping_bytes), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET, "rx_ping_bytes", offsetof(struct knet_link_status, stats.rx_ping_bytes), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET, "tx_pong_packets", offsetof(struct knet_link_status, stats.tx_pong_packets), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET, "rx_pong_packets", offsetof(struct knet_link_status, stats.rx_pong_packets), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET, "tx_pong_bytes", offsetof(struct knet_link_status, stats.tx_pong_bytes), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET, "rx_pong_bytes", offsetof(struct knet_link_status, stats.rx_pong_bytes), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET, "tx_pmtu_packets", offsetof(struct knet_link_status, stats.tx_pmtu_packets), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET, "rx_pmtu_packets", offsetof(struct knet_link_status, stats.rx_pmtu_packets), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET, "tx_pmtu_bytes", offsetof(struct knet_link_status, stats.tx_pmtu_bytes), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET, "rx_pmtu_bytes", offsetof(struct knet_link_status, stats.rx_pmtu_bytes), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET, "tx_total_packets", offsetof(struct knet_link_status, stats.tx_total_packets), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET, "rx_total_packets", offsetof(struct knet_link_status, stats.rx_total_packets), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET, "tx_total_bytes", offsetof(struct knet_link_status, stats.tx_total_bytes), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET, "rx_total_bytes", offsetof(struct knet_link_status, stats.rx_total_bytes), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET, "tx_total_errors", offsetof(struct knet_link_status, stats.tx_total_errors), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET, "rx_total_retries", offsetof(struct knet_link_status, stats.tx_total_retries), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET, "tx_pmtu_errors", offsetof(struct knet_link_status, stats.tx_pmtu_errors), ICMAP_VALUETYPE_UINT32}, + { STAT_KNET, "tx_pmtu_retries", offsetof(struct knet_link_status, stats.tx_pmtu_retries), ICMAP_VALUETYPE_UINT32}, + { STAT_KNET, "tx_ping_errors", offsetof(struct knet_link_status, stats.tx_ping_errors), ICMAP_VALUETYPE_UINT32}, + { STAT_KNET, "tx_ping_retries", offsetof(struct knet_link_status, stats.tx_ping_retries), ICMAP_VALUETYPE_UINT32}, + { STAT_KNET, "tx_pong_errors", offsetof(struct knet_link_status, stats.tx_pong_errors), ICMAP_VALUETYPE_UINT32}, + { STAT_KNET, "tx_pong_retries", offsetof(struct knet_link_status, stats.tx_pong_retries), ICMAP_VALUETYPE_UINT32}, + { STAT_KNET, "tx_data_errors", offsetof(struct knet_link_status, stats.tx_data_errors), ICMAP_VALUETYPE_UINT32}, + { STAT_KNET, "tx_data_retries", offsetof(struct knet_link_status, stats.tx_data_retries), ICMAP_VALUETYPE_UINT32}, + { STAT_KNET, "latency_min", offsetof(struct knet_link_status, stats.latency_min), ICMAP_VALUETYPE_UINT32}, + { STAT_KNET, "latency_max", offsetof(struct knet_link_status, stats.latency_max), ICMAP_VALUETYPE_UINT32}, + { STAT_KNET, "latency_ave", offsetof(struct knet_link_status, stats.latency_ave), ICMAP_VALUETYPE_UINT32}, + { STAT_KNET, "latency_samples", offsetof(struct knet_link_status, stats.latency_samples), ICMAP_VALUETYPE_UINT32}, + { STAT_KNET, "down_count", offsetof(struct knet_link_status, stats.down_count), ICMAP_VALUETYPE_UINT32}, + { STAT_KNET, "up_count", offsetof(struct knet_link_status, stats.up_count), ICMAP_VALUETYPE_UINT32}, +}; +struct cs_stats_conv cs_knet_handle_stats[] = { + { STAT_KNET_HANDLE, "tx_uncompressed_packets", offsetof(struct knet_handle_stats, tx_uncompressed_packets), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET_HANDLE, "tx_compressed_packets", offsetof(struct knet_handle_stats, tx_compressed_packets), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET_HANDLE, "tx_compressed_original_bytes", offsetof(struct knet_handle_stats, tx_compressed_original_bytes), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET_HANDLE, "tx_compressed_size_bytes", offsetof(struct knet_handle_stats, tx_compressed_size_bytes), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET_HANDLE, "tx_compress_time_min", offsetof(struct knet_handle_stats, tx_compress_time_min), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET_HANDLE, "tx_compress_time_max", offsetof(struct knet_handle_stats, tx_compress_time_max), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET_HANDLE, "tx_compress_time_ave", offsetof(struct knet_handle_stats, tx_compress_time_ave), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET_HANDLE, "rx_compressed_packets", offsetof(struct knet_handle_stats, rx_compressed_packets), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET_HANDLE, "rx_compressed_original_bytes", offsetof(struct knet_handle_stats, rx_compressed_original_bytes), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET_HANDLE, "rx_compressed_size_bytes", offsetof(struct knet_handle_stats, rx_compressed_size_bytes), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET_HANDLE, "rx_compress_time_min", offsetof(struct knet_handle_stats, rx_compress_time_min), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET_HANDLE, "rx_compress_time_max", offsetof(struct knet_handle_stats, rx_compress_time_max), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET_HANDLE, "rx_compress_time_ave", offsetof(struct knet_handle_stats, rx_compress_time_ave), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET_HANDLE, "tx_crypt_time_min", offsetof(struct knet_handle_stats, tx_crypt_time_min), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET_HANDLE, "tx_crypt_time_max", offsetof(struct knet_handle_stats, tx_crypt_time_max), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET_HANDLE, "tx_crypt_time_ave", offsetof(struct knet_handle_stats, tx_crypt_time_ave), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET_HANDLE, "tx_crypt_byte_overhead", offsetof(struct knet_handle_stats, tx_crypt_byte_overhead), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET_HANDLE, "tx_crypt_packets", offsetof(struct knet_handle_stats, tx_crypt_packets), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET_HANDLE, "rx_crypt_time_min", offsetof(struct knet_handle_stats, rx_crypt_time_min), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET_HANDLE, "rx_crypt_time_max", offsetof(struct knet_handle_stats, rx_crypt_time_max), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET_HANDLE, "rx_crypt_time_ave", offsetof(struct knet_handle_stats, rx_crypt_time_ave), ICMAP_VALUETYPE_UINT64}, + { STAT_KNET_HANDLE, "rx_crypt_packets", offsetof(struct knet_handle_stats, rx_crypt_packets), ICMAP_VALUETYPE_UINT64}, +}; + +struct cs_stats_conv cs_ipcs_conn_stats[] = { + { STAT_IPCSC, "queueing", offsetof(struct ipcs_conn_stats, cnx.queuing), ICMAP_VALUETYPE_INT32}, + { STAT_IPCSC, "queued", offsetof(struct ipcs_conn_stats, cnx.queued), ICMAP_VALUETYPE_UINT32}, + { STAT_IPCSC, "invalid_request", offsetof(struct ipcs_conn_stats, cnx.invalid_request), ICMAP_VALUETYPE_UINT64}, + { STAT_IPCSC, "overload", offsetof(struct ipcs_conn_stats, cnx.overload), ICMAP_VALUETYPE_UINT64}, + { STAT_IPCSC, "sent", offsetof(struct ipcs_conn_stats, cnx.sent), ICMAP_VALUETYPE_UINT32}, + { STAT_IPCSC, "procname", offsetof(struct ipcs_conn_stats, cnx.proc_name), ICMAP_VALUETYPE_STRING}, + { STAT_IPCSC, "requests", offsetof(struct ipcs_conn_stats, conn.requests), ICMAP_VALUETYPE_UINT64}, + { STAT_IPCSC, "responses", offsetof(struct ipcs_conn_stats, conn.responses), ICMAP_VALUETYPE_UINT64}, + { STAT_IPCSC, "dispatched", offsetof(struct ipcs_conn_stats, conn.events), ICMAP_VALUETYPE_UINT64}, + { STAT_IPCSC, "send_retries", offsetof(struct ipcs_conn_stats, conn.send_retries), ICMAP_VALUETYPE_UINT64}, + { STAT_IPCSC, "recv_retries", offsetof(struct ipcs_conn_stats, conn.recv_retries), ICMAP_VALUETYPE_UINT64}, + { STAT_IPCSC, "flow_control", offsetof(struct ipcs_conn_stats, conn.flow_control_state), ICMAP_VALUETYPE_UINT32}, + { STAT_IPCSC, "flow_control_count", offsetof(struct ipcs_conn_stats, conn.flow_control_count), ICMAP_VALUETYPE_UINT64}, +}; +struct cs_stats_conv cs_ipcs_global_stats[] = { + { STAT_IPCSG, "global.active", offsetof(struct ipcs_global_stats, active), ICMAP_VALUETYPE_UINT64}, + { STAT_IPCSG, "global.closed", offsetof(struct ipcs_global_stats, closed), ICMAP_VALUETYPE_UINT64}, +}; +struct cs_stats_conv cs_schedmiss_stats[] = { + { STAT_SCHEDMISS, "timestamp", offsetof(struct schedmiss_entry, timestamp), ICMAP_VALUETYPE_UINT64}, + { STAT_SCHEDMISS, "delay", offsetof(struct schedmiss_entry, delay), ICMAP_VALUETYPE_FLOAT}, +}; + +#define NUM_PG_STATS (sizeof(cs_pg_stats) / sizeof(struct cs_stats_conv)) +#define NUM_SRP_STATS (sizeof(cs_srp_stats) / sizeof(struct cs_stats_conv)) +#define NUM_KNET_STATS (sizeof(cs_knet_stats) / sizeof(struct cs_stats_conv)) +#define NUM_KNET_HANDLE_STATS (sizeof(cs_knet_handle_stats) / sizeof(struct cs_stats_conv)) +#define NUM_IPCSC_STATS (sizeof(cs_ipcs_conn_stats) / sizeof(struct cs_stats_conv)) +#define NUM_IPCSG_STATS (sizeof(cs_ipcs_global_stats) / sizeof(struct cs_stats_conv)) + +/* What goes in the trie */ +struct stats_item { + char *key_name; + struct cs_stats_conv * cs_conv; +}; + +/* One of these per tracker */ +struct cs_stats_tracker +{ + char *key_name; + void *user_data; + int32_t events; + icmap_notify_fn_t notify_fn; + uint64_t old_value; + struct qb_list_head list; +}; +QB_LIST_DECLARE (stats_tracker_list_head); +static const struct corosync_api_v1 *api; + +static void stats_map_set_value(struct cs_stats_conv *conv, + void *stat_array, + void *value, + size_t *value_len, + icmap_value_types_t *type) +{ + if (value_len) { + *value_len = icmap_get_valuetype_len(conv->value_type); + } + if (type) { + *type = conv->value_type; + if ((*type == ICMAP_VALUETYPE_STRING) && value_len && stat_array) { + *value_len = strlen((char *)(stat_array) + conv->offset)+1; + } + } + if (value) { + assert(value_len != NULL); + + memcpy(value, (char *)(stat_array) + conv->offset, *value_len); + } +} + +static void stats_add_entry(const char *key, struct cs_stats_conv *cs_conv) +{ + struct stats_item *item = malloc(sizeof(struct stats_item)); + + if (item) { + item->cs_conv = cs_conv; + item->key_name = strdup(key); + qb_map_put(stats_map, item->key_name, item); + } +} +static void stats_rm_entry(const char *key) +{ + struct stats_item *item = qb_map_get(stats_map, key); + + if (item) { + qb_map_rm(stats_map, item->key_name); + /* Structures freed in callback below */ + } +} + +static void stats_map_free_cb(uint32_t event, + char* key, void* old_value, + void* value, void* user_data) +{ + struct stats_item *item = (struct stats_item *)old_value; + + if (item) { + free(item->key_name); + free(item); + } +} + +cs_error_t stats_map_init(const struct corosync_api_v1 *corosync_api) +{ + int i; + char param[ICMAP_KEYNAME_MAXLEN]; + int32_t err; + + api = corosync_api; + + stats_map = qb_trie_create(); + if (!stats_map) { + return CS_ERR_INIT; + } + + /* Populate the static portions of the trie */ + for (i = 0; i<NUM_PG_STATS; i++) { + sprintf(param, "stats.pg.%s", cs_pg_stats[i].name); + stats_add_entry(param, &cs_pg_stats[i]); + } + for (i = 0; i<NUM_SRP_STATS; i++) { + sprintf(param, "stats.srp.%s", cs_srp_stats[i].name); + stats_add_entry(param, &cs_srp_stats[i]); + } + for (i = 0; i<NUM_IPCSG_STATS; i++) { + sprintf(param, "stats.ipcs.%s", cs_ipcs_global_stats[i].name); + stats_add_entry(param, &cs_ipcs_global_stats[i]); + } + + /* KNET, IPCS & SCHEDMISS stats are added when appropriate */ + + + /* Call us when we can free things */ + err = qb_map_notify_add(stats_map, NULL, stats_map_free_cb, QB_MAP_NOTIFY_FREE, NULL); + + return (qb_to_cs_error(err)); +} + +cs_error_t stats_map_get(const char *key_name, + void *value, + size_t *value_len, + icmap_value_types_t *type) +{ + struct cs_stats_conv *statinfo; + struct stats_item *item; + totempg_stats_t *pg_stats; + struct knet_link_status link_status; + struct ipcs_conn_stats ipcs_conn_stats; + struct ipcs_global_stats ipcs_global_stats; + struct knet_handle_stats knet_handle_stats; + int res; + int nodeid; + int link_no; + int service_id; + uint32_t pid; + unsigned int sm_event; + char *sm_type; + void *conn_ptr; + + item = qb_map_get(stats_map, key_name); + if (!item) { + return CS_ERR_NOT_EXIST; + } + + statinfo = item->cs_conv; + switch (statinfo->type) { + case STAT_PG: + pg_stats = api->totem_get_stats(); + stats_map_set_value(statinfo, pg_stats, value, value_len, type); + break; + case STAT_SRP: + pg_stats = api->totem_get_stats(); + stats_map_set_value(statinfo, pg_stats->srp, value, value_len, type); + break; + case STAT_KNET_HANDLE: + res = totemknet_handle_get_stats(&knet_handle_stats); + if (res != CS_OK) { + return res; + } + stats_map_set_value(statinfo, &knet_handle_stats, value, value_len, type); + break; + case STAT_KNET: + if (sscanf(key_name, "stats.knet.node%d.link%d", &nodeid, &link_no) != 2) { + return CS_ERR_NOT_EXIST; + } + + /* Validate node & link IDs */ + if (nodeid <= 0 || nodeid > KNET_MAX_HOST || + link_no < 0 || link_no > KNET_MAX_LINK) { + return CS_ERR_NOT_EXIST; + } + + /* Always get the latest stats */ + res = totemknet_link_get_status((knet_node_id_t)nodeid, (uint8_t)link_no, &link_status); + if (res != CS_OK) { + return CS_ERR_LIBRARY; + } + stats_map_set_value(statinfo, &link_status, value, value_len, type); + break; + case STAT_IPCSC: + if (sscanf(key_name, "stats.ipcs.service%d.%d.%p", &service_id, &pid, &conn_ptr) != 3) { + return CS_ERR_NOT_EXIST; + } + res = cs_ipcs_get_conn_stats(service_id, pid, conn_ptr, &ipcs_conn_stats); + if (res != CS_OK) { + return res; + } + stats_map_set_value(statinfo, &ipcs_conn_stats, value, value_len, type); + break; + case STAT_IPCSG: + cs_ipcs_get_global_stats(&ipcs_global_stats); + stats_map_set_value(statinfo, &ipcs_global_stats, value, value_len, type); + break; + case STAT_SCHEDMISS: + if (sscanf(key_name, SCHEDMISS_PREFIX ".%d", &sm_event) != 1) { + return CS_ERR_NOT_EXIST; + } + + sm_type = strrchr(key_name, '.'); + if (sm_type == NULL) { + return CS_ERR_NOT_EXIST; + } + sm_type++; + + if (strcmp(sm_type, "timestamp") == 0) { + memcpy(value, &schedmiss_event[sm_event].timestamp, sizeof(uint64_t)); + *value_len = sizeof(uint64_t); + *type = ICMAP_VALUETYPE_UINT64; + } + if (strcmp(sm_type, "delay") == 0) { + memcpy(value, &schedmiss_event[sm_event].delay, sizeof(float)); + *value_len = sizeof(float); + *type = ICMAP_VALUETYPE_FLOAT; + } + break; + default: + return CS_ERR_LIBRARY; + } + return CS_OK; +} + +static void schedmiss_clear_stats(void) +{ + int i; + char param[ICMAP_KEYNAME_MAXLEN]; + + for (i=0; i<MAX_SCHEDMISS_EVENTS; i++) { + if (i < highest_schedmiss_event) { + sprintf(param, SCHEDMISS_PREFIX ".%i.timestamp", i); + stats_rm_entry(param); + sprintf(param, SCHEDMISS_PREFIX ".%i.delay", i); + stats_rm_entry(param); + } + schedmiss_event[i].timestamp = (uint64_t)0LL; + schedmiss_event[i].delay = 0.0f; + } + highest_schedmiss_event = 0; +} + +/* Called from main.c */ +void stats_add_schedmiss_event(uint64_t timestamp, float delay) +{ + char param[ICMAP_KEYNAME_MAXLEN]; + int i; + + /* Move 'em all up */ + for (i=MAX_SCHEDMISS_EVENTS-2; i>=0; i--) { + schedmiss_event[i+1].timestamp = schedmiss_event[i].timestamp; + schedmiss_event[i+1].delay = schedmiss_event[i].delay; + } + + /* New entries are always at the front */ + schedmiss_event[0].timestamp = timestamp; + schedmiss_event[0].delay = delay; + + /* If we've not run off the end then add an entry in the trie for the new 'end' one */ + if (highest_schedmiss_event < MAX_SCHEDMISS_EVENTS) { + sprintf(param, SCHEDMISS_PREFIX ".%i.timestamp", highest_schedmiss_event); + stats_add_entry(param, &cs_schedmiss_stats[0]); + sprintf(param, SCHEDMISS_PREFIX ".%i.delay", highest_schedmiss_event); + stats_add_entry(param, &cs_schedmiss_stats[1]); + highest_schedmiss_event++; + } + /* Notifications get sent by the stats_updater */ +} + +#define STATS_CLEAR "stats.clear." +#define STATS_CLEAR_KNET "stats.clear.knet" +#define STATS_CLEAR_IPC "stats.clear.ipc" +#define STATS_CLEAR_TOTEM "stats.clear.totem" +#define STATS_CLEAR_ALL "stats.clear.all" +#define STATS_CLEAR_SCHEDMISS "stats.clear.schedmiss" + +cs_error_t stats_map_set(const char *key_name, + const void *value, + size_t value_len, + icmap_value_types_t type) +{ + int cleared = 0; + + if (strncmp(key_name, STATS_CLEAR_KNET, strlen(STATS_CLEAR_KNET)) == 0) { + totempg_stats_clear(TOTEMPG_STATS_CLEAR_TRANSPORT); + cleared = 1; + } + if (strncmp(key_name, STATS_CLEAR_IPC, strlen(STATS_CLEAR_IPC)) == 0) { + cs_ipcs_clear_stats(); + cleared = 1; + } + if (strncmp(key_name, STATS_CLEAR_TOTEM, strlen(STATS_CLEAR_TOTEM)) == 0) { + totempg_stats_clear(TOTEMPG_STATS_CLEAR_TOTEM); + cleared = 1; + } + if (strncmp(key_name, STATS_CLEAR_SCHEDMISS, strlen(STATS_CLEAR_SCHEDMISS)) == 0) { + schedmiss_clear_stats(); + cleared = 1; + } + if (strncmp(key_name, STATS_CLEAR_ALL, strlen(STATS_CLEAR_ALL)) == 0) { + totempg_stats_clear(TOTEMPG_STATS_CLEAR_TRANSPORT | TOTEMPG_STATS_CLEAR_TOTEM); + cs_ipcs_clear_stats(); + schedmiss_clear_stats(); + cleared = 1; + } + if (!cleared) { + return CS_ERR_NOT_SUPPORTED; + } + return CS_OK; +} + +cs_error_t stats_map_adjust_int(const char *key_name, int32_t step) +{ + return CS_ERR_NOT_SUPPORTED; +} + +cs_error_t stats_map_delete(const char *key_name) +{ + return CS_ERR_NOT_SUPPORTED; +} + +int stats_map_is_key_ro(const char *key_name) +{ + /* It's all read-only apart from the 'clear' destinations */ + if (strncmp(key_name, STATS_CLEAR, strlen(STATS_CLEAR)) == 0) { + return 0; + } else { + return 1; + } +} + +icmap_iter_t stats_map_iter_init(const char *prefix) +{ + return (qb_map_pref_iter_create(stats_map, prefix)); +} + + +const char *stats_map_iter_next(icmap_iter_t iter, size_t *value_len, icmap_value_types_t *type) +{ + const char *res; + struct stats_item *item; + + res = qb_map_iter_next(iter, (void **)&item); + if (res == NULL) { + return (res); + } + stats_map_set_value(item->cs_conv, NULL, NULL, value_len, type); + + return res; +} + +void stats_map_iter_finalize(icmap_iter_t iter) +{ + qb_map_iter_free(iter); +} + + +void stats_trigger_trackers() +{ + struct cs_stats_tracker *tracker; + struct qb_list_head *iter; + cs_error_t res; + size_t value_len; + icmap_value_types_t type; + uint64_t value; + struct icmap_notify_value new_val; + struct icmap_notify_value old_val; + + qb_list_for_each(iter, &stats_tracker_list_head) { + + tracker = qb_list_entry(iter, struct cs_stats_tracker, list); + if (tracker->events & ICMAP_TRACK_PREFIX || !tracker->key_name ) { + continue; + } + + res = stats_map_get(tracker->key_name, + &value, &value_len, &type); + + /* Check if it has changed */ + if ((res == CS_OK) && (memcmp(&value, &tracker->old_value, value_len) != 0)) { + + old_val.type = new_val.type = type; + old_val.len = new_val.len = value_len; + old_val.data = new_val.data = &value; + + tracker->notify_fn(ICMAP_TRACK_MODIFY, tracker->key_name, + old_val, new_val, tracker->user_data); + + memcpy(&tracker->old_value, &value, value_len); + } + } +} + + +/* Callback from libqb when a key is added/removed */ +static void stats_map_notify_fn(uint32_t event, char *key, void *old_value, void *value, void *user_data) +{ + struct cs_stats_tracker *tracker = user_data; + struct icmap_notify_value new_val; + struct icmap_notify_value old_val; + char new_value[64]; + + if (value == NULL && old_value == NULL) { + return ; + } + + /* Ignore schedmiss trackers as the values are read from the circular buffer */ + if (strncmp(key, SCHEDMISS_PREFIX, strlen(SCHEDMISS_PREFIX)) == 0 ) { + return ; + } + + new_val.data = new_value; + if (stats_map_get(key, + &new_value, + &new_val.len, + &new_val.type) != CS_OK) { + log_printf(LOGSYS_LEVEL_WARNING, "get value of notified key %s failed", key); + return ; + } + + /* We don't know what the old value was + but as this only tracks ADD & DELETE I'm not worried + about it */ + memcpy(&old_val, &new_val, sizeof(new_val)); + + tracker->notify_fn(icmap_qbtt_to_tt(event), + key, + new_val, + old_val, + tracker->user_data); + +} + +cs_error_t stats_map_track_add(const char *key_name, + int32_t track_type, + icmap_notify_fn_t notify_fn, + void *user_data, + icmap_track_t *icmap_track) +{ + struct cs_stats_tracker *tracker; + size_t value_len; + icmap_value_types_t type; + cs_error_t err; + + /* We can track adding or deleting a key under a prefix */ + if ((track_type & ICMAP_TRACK_PREFIX) && + (!(track_type & ICMAP_TRACK_DELETE) || + !(track_type & ICMAP_TRACK_ADD))) { + return CS_ERR_NOT_SUPPORTED; + } + + tracker = malloc(sizeof(struct cs_stats_tracker)); + if (!tracker) { + return CS_ERR_NO_MEMORY; + } + + tracker->notify_fn = notify_fn; + tracker->user_data = user_data; + tracker->events = track_type; + if (key_name) { + tracker->key_name = strdup(key_name); + if (!tracker->key_name) { + free(tracker); + return CS_ERR_NO_MEMORY; + } + /* Get initial value */ + if (stats_map_get(tracker->key_name, + &tracker->old_value, &value_len, &type) != CS_OK) { + tracker->old_value = 0ULL; + } + } else { + tracker->key_name = NULL; + tracker->old_value = 0ULL; + } + + /* Add/delete trackers can use the qb_map tracking */ + if ((track_type & ICMAP_TRACK_ADD) || + (track_type & ICMAP_TRACK_DELETE)) { + err = qb_map_notify_add(stats_map, tracker->key_name, + stats_map_notify_fn, + icmap_tt_to_qbtt(track_type), + tracker); + if (err != 0) { + log_printf(LOGSYS_LEVEL_ERROR, "creating stats tracker %s failed. %d\n", tracker->key_name, err); + free(tracker->key_name); + free(tracker); + return (qb_to_cs_error(err)); + } + } + + qb_list_add (&tracker->list, &stats_tracker_list_head); + + *icmap_track = (icmap_track_t)tracker; + return CS_OK; +} + +cs_error_t stats_map_track_delete(icmap_track_t icmap_track) +{ + struct cs_stats_tracker *tracker = (struct cs_stats_tracker *)icmap_track; + int err; + + if ((tracker->events & ICMAP_TRACK_ADD) || + (tracker->events & ICMAP_TRACK_DELETE)) { + err = qb_map_notify_del_2(stats_map, + tracker->key_name, stats_map_notify_fn, + icmap_tt_to_qbtt(tracker->events), tracker); + if (err) { + log_printf(LOGSYS_LEVEL_ERROR, "deleting tracker %s failed. %d\n", tracker->key_name, err); + } + } + + qb_list_del(&tracker->list); + free(tracker->key_name); + free(tracker); + + return CS_OK; +} + +void *stats_map_track_get_user_data(icmap_track_t icmap_track) +{ + struct cs_stats_tracker *tracker = (struct cs_stats_tracker *)icmap_track; + + return tracker->user_data; +} + +/* Called from totemknet to add/remove keys from our map */ +void stats_knet_add_member(knet_node_id_t nodeid, uint8_t link_no) +{ + int i; + char param[ICMAP_KEYNAME_MAXLEN]; + + for (i = 0; i<NUM_KNET_STATS; i++) { + sprintf(param, "stats.knet.node%d.link%d.%s", nodeid, link_no, cs_knet_stats[i].name); + stats_add_entry(param, &cs_knet_stats[i]); + } +} +void stats_knet_del_member(knet_node_id_t nodeid, uint8_t link_no) +{ + int i; + char param[ICMAP_KEYNAME_MAXLEN]; + + for (i = 0; i<NUM_KNET_STATS; i++) { + sprintf(param, "stats.knet.node%d.link%d.%s", nodeid, link_no, cs_knet_stats[i].name); + stats_rm_entry(param); + } +} + +/* This is separated out from stats_map_init() because we don't know whether + knet is in use until much later in the startup */ +void stats_knet_add_handle(void) +{ + int i; + char param[ICMAP_KEYNAME_MAXLEN]; + + for (i = 0; i<NUM_KNET_HANDLE_STATS; i++) { + sprintf(param, "stats.knet.handle.%s", cs_knet_handle_stats[i].name); + stats_add_entry(param, &cs_knet_handle_stats[i]); + } +} + +/* Called from ipc_glue to add/remove keys from our map */ +void stats_ipcs_add_connection(int service_id, uint32_t pid, void *ptr) +{ + int i; + char param[ICMAP_KEYNAME_MAXLEN]; + + for (i = 0; i<NUM_IPCSC_STATS; i++) { + sprintf(param, "stats.ipcs.service%d.%d.%p.%s", service_id, pid, ptr, cs_ipcs_conn_stats[i].name); + stats_add_entry(param, &cs_ipcs_conn_stats[i]); + } +} +void stats_ipcs_del_connection(int service_id, uint32_t pid, void *ptr) +{ + int i; + char param[ICMAP_KEYNAME_MAXLEN]; + + for (i = 0; i<NUM_IPCSC_STATS; i++) { + sprintf(param, "stats.ipcs.service%d.%d.%p.%s", service_id, pid, ptr, cs_ipcs_conn_stats[i].name); + stats_rm_entry(param); + } +} diff --git a/exec/stats.h b/exec/stats.h new file mode 100644 index 0000000..eac9e7c --- /dev/null +++ b/exec/stats.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2017 Red Hat, Inc. + * + * All rights reserved. + * + * Authors: Christine Caulfield (ccaulfie@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +cs_error_t stats_map_init(const struct corosync_api_v1 *api); + +cs_error_t stats_map_get(const char *key_name, + void *value, + size_t *value_len, + icmap_value_types_t *type); + +cs_error_t stats_map_set(const char *key_name, + const void *value, + size_t value_len, + icmap_value_types_t type); + +cs_error_t stats_map_adjust_int(const char *key_name, int32_t step); + +cs_error_t stats_map_delete(const char *key_name); + +int stats_map_is_key_ro(const char *key_name); + +icmap_iter_t stats_map_iter_init(const char *prefix); +const char *stats_map_iter_next(icmap_iter_t iter, size_t *value_len, icmap_value_types_t *type); +void stats_map_iter_finalize(icmap_iter_t iter); + +cs_error_t stats_map_track_add(const char *key_name, + int32_t track_type, + icmap_notify_fn_t notify_fn, + void *user_data, + icmap_track_t *icmap_track); + +cs_error_t stats_map_track_delete(icmap_track_t icmap_track); +void *stats_map_track_get_user_data(icmap_track_t icmap_track); + +void stats_trigger_trackers(void); + + +void stats_ipcs_add_connection(int service_id, uint32_t pid, void *ptr); +void stats_ipcs_del_connection(int service_id, uint32_t pid, void *ptr); +cs_error_t cs_ipcs_get_conn_stats(int service_id, uint32_t pid, void *conn_ptr, struct ipcs_conn_stats *ipcs_stats); + +void stats_add_schedmiss_event(uint64_t, float delay); diff --git a/exec/sync.c b/exec/sync.c new file mode 100644 index 0000000..962f341 --- /dev/null +++ b/exec/sync.c @@ -0,0 +1,549 @@ +/* + * Copyright (c) 2009-2012 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <config.h> + +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <sys/ioctl.h> +#include <netinet/in.h> +#include <sys/uio.h> +#include <unistd.h> +#include <fcntl.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <time.h> +#include <arpa/inet.h> + +#include <corosync/corotypes.h> +#include <corosync/swab.h> +#include <corosync/totem/totempg.h> +#include <corosync/totem/totem.h> +#include <corosync/logsys.h> +#include <qb/qbipc_common.h> +#include "schedwrk.h" +#include "quorum.h" +#include "sync.h" +#include "main.h" + +LOGSYS_DECLARE_SUBSYS ("SYNC"); + +#define MESSAGE_REQ_SYNC_BARRIER 0 +#define MESSAGE_REQ_SYNC_SERVICE_BUILD 1 + +enum sync_process_state { + PROCESS, + ACTIVATE +}; + +enum sync_state { + SYNC_SERVICELIST_BUILD, + SYNC_PROCESS, + SYNC_BARRIER +}; + +struct service_entry { + int service_id; + void (*sync_init) ( + const unsigned int *trans_list, + size_t trans_list_entries, + const unsigned int *member_list, + size_t member_list_entries, + const struct memb_ring_id *ring_id); + void (*sync_abort) (void); + int (*sync_process) (void); + void (*sync_activate) (void); + enum sync_process_state state; + char name[128]; +}; + +struct processor_entry { + int nodeid; + int received; +}; + +struct req_exec_service_build_message { + struct qb_ipc_request_header header __attribute__((aligned(8))); + struct memb_ring_id ring_id __attribute__((aligned(8))); + int service_list_entries __attribute__((aligned(8))); + int service_list[128] __attribute__((aligned(8))); +}; + +struct req_exec_barrier_message { + struct qb_ipc_request_header header __attribute__((aligned(8))); + struct memb_ring_id ring_id __attribute__((aligned(8))); +}; + +static enum sync_state my_state = SYNC_BARRIER; + +static struct memb_ring_id my_ring_id; + +static int my_processing_idx = 0; + +static hdb_handle_t my_schedwrk_handle; + +static struct processor_entry my_processor_list[PROCESSOR_COUNT_MAX]; + +static unsigned int my_member_list[PROCESSOR_COUNT_MAX]; + +static unsigned int my_trans_list[PROCESSOR_COUNT_MAX]; + +static size_t my_member_list_entries = 0; + +static size_t my_trans_list_entries = 0; + +static int my_processor_list_entries = 0; + +static struct service_entry my_service_list[SERVICES_COUNT_MAX]; + +static int my_service_list_entries = 0; + +static void (*sync_synchronization_completed) (void); + +static void sync_deliver_fn ( + unsigned int nodeid, + const void *msg, + unsigned int msg_len, + int endian_conversion_required); + +static int schedwrk_processor (const void *context); + +static void sync_process_enter (void); + +static void sync_process_call_init (void); + +static struct totempg_group sync_group = { + .group = "sync", + .group_len = 4 +}; + +static void *sync_group_handle; + +int (*my_sync_callbacks_retrieve) ( + int service_id, + struct sync_callbacks *callbacks); + +int sync_init ( + int (*sync_callbacks_retrieve) ( + int service_id, + struct sync_callbacks *callbacks), + void (*synchronization_completed) (void)) +{ + unsigned int res; + + res = totempg_groups_initialize ( + &sync_group_handle, + sync_deliver_fn, + NULL); + if (res == -1) { + log_printf (LOGSYS_LEVEL_ERROR, + "Couldn't initialize groups interface."); + return (-1); + } + + res = totempg_groups_join ( + sync_group_handle, + &sync_group, + 1); + if (res == -1) { + log_printf (LOGSYS_LEVEL_ERROR, "Couldn't join group."); + return (-1); + } + + sync_synchronization_completed = synchronization_completed; + my_sync_callbacks_retrieve = sync_callbacks_retrieve; + + return (0); +} + +static void sync_barrier_handler (unsigned int nodeid, const void *msg) +{ + const struct req_exec_barrier_message *req_exec_barrier_message = msg; + int i; + int barrier_reached = 1; + + if (memcmp (&my_ring_id, &req_exec_barrier_message->ring_id, + sizeof (struct memb_ring_id)) != 0) { + + log_printf (LOGSYS_LEVEL_DEBUG, "barrier for old ring - discarding"); + return; + } + for (i = 0; i < my_processor_list_entries; i++) { + if (my_processor_list[i].nodeid == nodeid) { + my_processor_list[i].received = 1; + } + } + for (i = 0; i < my_processor_list_entries; i++) { + if (my_processor_list[i].received == 0) { + barrier_reached = 0; + } + } + if (barrier_reached) { + log_printf (LOGSYS_LEVEL_DEBUG, "Committing synchronization for %s", + my_service_list[my_processing_idx].name); + my_service_list[my_processing_idx].state = ACTIVATE; + + if (my_sync_callbacks_retrieve(my_service_list[my_processing_idx].service_id, NULL) != -1) { + my_service_list[my_processing_idx].sync_activate (); + } + + my_processing_idx += 1; + if (my_service_list_entries == my_processing_idx) { + sync_synchronization_completed (); + } else { + sync_process_enter (); + } + } +} + +static void dummy_sync_abort (void) +{ +} + +static int dummy_sync_process (void) +{ + return (0); +} + +static void dummy_sync_activate (void) +{ +} + +static int service_entry_compare (const void *a, const void *b) +{ + const struct service_entry *service_entry_a = a; + const struct service_entry *service_entry_b = b; + + return (service_entry_a->service_id > service_entry_b->service_id); +} + +static void sync_service_build_handler (unsigned int nodeid, const void *msg) +{ + const struct req_exec_service_build_message *req_exec_service_build_message = msg; + int i, j; + int barrier_reached = 1; + int found; + int qsort_trigger = 0; + + if (memcmp (&my_ring_id, &req_exec_service_build_message->ring_id, + sizeof (struct memb_ring_id)) != 0) { + log_printf (LOGSYS_LEVEL_DEBUG, "service build for old ring - discarding"); + return; + } + for (i = 0; i < req_exec_service_build_message->service_list_entries; i++) { + + found = 0; + for (j = 0; j < my_service_list_entries; j++) { + if (req_exec_service_build_message->service_list[i] == + my_service_list[j].service_id) { + found = 1; + break; + } + } + if (found == 0) { + my_service_list[my_service_list_entries].state = PROCESS; + my_service_list[my_service_list_entries].service_id = + req_exec_service_build_message->service_list[i]; + sprintf (my_service_list[my_service_list_entries].name, + "Unknown External Service (id = %d)\n", + req_exec_service_build_message->service_list[i]); + my_service_list[my_service_list_entries].sync_init = + NULL; + my_service_list[my_service_list_entries].sync_abort = + dummy_sync_abort; + my_service_list[my_service_list_entries].sync_process = + dummy_sync_process; + my_service_list[my_service_list_entries].sync_activate = + dummy_sync_activate; + my_service_list_entries += 1; + + qsort_trigger = 1; + } + } + if (qsort_trigger) { + qsort (my_service_list, my_service_list_entries, + sizeof (struct service_entry), service_entry_compare); + } + for (i = 0; i < my_processor_list_entries; i++) { + if (my_processor_list[i].nodeid == nodeid) { + my_processor_list[i].received = 1; + } + } + for (i = 0; i < my_processor_list_entries; i++) { + if (my_processor_list[i].received == 0) { + barrier_reached = 0; + } + } + if (barrier_reached) { + log_printf (LOGSYS_LEVEL_DEBUG, "enter sync process"); + sync_process_enter (); + } +} + +static void sync_deliver_fn ( + unsigned int nodeid, + const void *msg, + unsigned int msg_len, + int endian_conversion_required) +{ + struct qb_ipc_request_header *header = (struct qb_ipc_request_header *)msg; + + switch (header->id) { + case MESSAGE_REQ_SYNC_BARRIER: + sync_barrier_handler (nodeid, msg); + break; + case MESSAGE_REQ_SYNC_SERVICE_BUILD: + sync_service_build_handler (nodeid, msg); + break; + } +} + +static void barrier_message_transmit (void) +{ + struct iovec iovec; + struct req_exec_barrier_message req_exec_barrier_message; + + memset(&req_exec_barrier_message, 0, sizeof(req_exec_barrier_message)); + + req_exec_barrier_message.header.size = sizeof (struct req_exec_barrier_message); + req_exec_barrier_message.header.id = MESSAGE_REQ_SYNC_BARRIER; + + memcpy (&req_exec_barrier_message.ring_id, &my_ring_id, + sizeof (struct memb_ring_id)); + + iovec.iov_base = (char *)&req_exec_barrier_message; + iovec.iov_len = sizeof (req_exec_barrier_message); + + (void)totempg_groups_mcast_joined (sync_group_handle, + &iovec, 1, TOTEMPG_AGREED); +} + +static void service_build_message_transmit (struct req_exec_service_build_message *service_build_message) +{ + struct iovec iovec; + + service_build_message->header.size = sizeof (struct req_exec_service_build_message); + service_build_message->header.id = MESSAGE_REQ_SYNC_SERVICE_BUILD; + + memcpy (&service_build_message->ring_id, &my_ring_id, + sizeof (struct memb_ring_id)); + + iovec.iov_base = (void *)service_build_message; + iovec.iov_len = sizeof (struct req_exec_service_build_message); + + (void)totempg_groups_mcast_joined (sync_group_handle, + &iovec, 1, TOTEMPG_AGREED); +} + +static void sync_barrier_enter (void) +{ + my_state = SYNC_BARRIER; + barrier_message_transmit (); +} + +static void sync_process_call_init (void) +{ + unsigned int old_trans_list[PROCESSOR_COUNT_MAX]; + size_t old_trans_list_entries = 0; + int o, m; + int i; + + memcpy (old_trans_list, my_trans_list, my_trans_list_entries * + sizeof (unsigned int)); + old_trans_list_entries = my_trans_list_entries; + + my_trans_list_entries = 0; + for (o = 0; o < old_trans_list_entries; o++) { + for (m = 0; m < my_member_list_entries; m++) { + if (old_trans_list[o] == my_member_list[m]) { + my_trans_list[my_trans_list_entries] = my_member_list[m]; + my_trans_list_entries++; + break; + } + } + } + + for (i = 0; i < my_service_list_entries; i++) { + if (my_sync_callbacks_retrieve(my_service_list[i].service_id, NULL) != -1) { + my_service_list[i].sync_init (my_trans_list, + my_trans_list_entries, my_member_list, + my_member_list_entries, + &my_ring_id); + } + } +} + +static void sync_process_enter (void) +{ + int i; + + my_state = SYNC_PROCESS; + + /* + * No sync services + */ + if (my_service_list_entries == 0) { + my_state = SYNC_SERVICELIST_BUILD; + sync_synchronization_completed (); + return; + } + for (i = 0; i < my_processor_list_entries; i++) { + my_processor_list[i].received = 0; + } + + schedwrk_create (&my_schedwrk_handle, + schedwrk_processor, + NULL); +} + +static void sync_servicelist_build_enter ( + const unsigned int *member_list, + size_t member_list_entries, + const struct memb_ring_id *ring_id) +{ + struct req_exec_service_build_message service_build; + int i; + int res; + struct sync_callbacks sync_callbacks; + + memset(&service_build, 0, sizeof(service_build)); + + my_state = SYNC_SERVICELIST_BUILD; + for (i = 0; i < member_list_entries; i++) { + my_processor_list[i].nodeid = member_list[i]; + my_processor_list[i].received = 0; + } + my_processor_list_entries = member_list_entries; + + memcpy (my_member_list, member_list, + member_list_entries * sizeof (unsigned int)); + my_member_list_entries = member_list_entries; + + my_processing_idx = 0; + + memset(my_service_list, 0, sizeof (struct service_entry) * SERVICES_COUNT_MAX); + my_service_list_entries = 0; + + for (i = 0; i < SERVICES_COUNT_MAX; i++) { + res = my_sync_callbacks_retrieve (i, &sync_callbacks); + if (res == -1) { + continue; + } + if (sync_callbacks.sync_init == NULL) { + continue; + } + my_service_list[my_service_list_entries].state = PROCESS; + my_service_list[my_service_list_entries].service_id = i; + + assert(strlen(sync_callbacks.name) < sizeof(my_service_list[my_service_list_entries].name)); + + strcpy (my_service_list[my_service_list_entries].name, + sync_callbacks.name); + my_service_list[my_service_list_entries].sync_init = sync_callbacks.sync_init; + my_service_list[my_service_list_entries].sync_process = sync_callbacks.sync_process; + my_service_list[my_service_list_entries].sync_abort = sync_callbacks.sync_abort; + my_service_list[my_service_list_entries].sync_activate = sync_callbacks.sync_activate; + my_service_list_entries += 1; + } + + for (i = 0; i < my_service_list_entries; i++) { + service_build.service_list[i] = + my_service_list[i].service_id; + } + service_build.service_list_entries = my_service_list_entries; + + service_build_message_transmit (&service_build); + + log_printf (LOGSYS_LEVEL_DEBUG, "call init for locally known services"); + sync_process_call_init (); +} + +static int schedwrk_processor (const void *context) +{ + int res = 0; + + if (my_service_list[my_processing_idx].state == PROCESS) { + if (my_sync_callbacks_retrieve(my_service_list[my_processing_idx].service_id, NULL) != -1) { + res = my_service_list[my_processing_idx].sync_process (); + } else { + res = 0; + } + if (res == 0) { + sync_barrier_enter(); + } else { + return (-1); + } + } + return (0); +} + +void sync_start ( + const unsigned int *member_list, + size_t member_list_entries, + const struct memb_ring_id *ring_id) +{ + ENTER(); + memcpy (&my_ring_id, ring_id, sizeof (struct memb_ring_id)); + + sync_servicelist_build_enter (member_list, member_list_entries, + ring_id); +} + +void sync_save_transitional ( + const unsigned int *member_list, + size_t member_list_entries, + const struct memb_ring_id *ring_id) +{ + ENTER(); + memcpy (my_trans_list, member_list, member_list_entries * + sizeof (unsigned int)); + my_trans_list_entries = member_list_entries; +} + +void sync_abort (void) +{ + ENTER(); + if (my_state == SYNC_PROCESS) { + schedwrk_destroy (my_schedwrk_handle); + if (my_sync_callbacks_retrieve(my_service_list[my_processing_idx].service_id, NULL) != -1) { + my_service_list[my_processing_idx].sync_abort (); + } + } + + /* this will cause any "old" barrier messages from causing + * problems. + */ + memset (&my_ring_id, 0, sizeof (struct memb_ring_id)); +} diff --git a/exec/sync.h b/exec/sync.h new file mode 100644 index 0000000..ee2f076 --- /dev/null +++ b/exec/sync.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2009-2010 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SYNC_H_DEFINED +#define SYNC_H_DEFINED + +struct sync_callbacks { + void (*sync_init) ( + const unsigned int *trans_list, + size_t trans_list_entries, + const unsigned int *member_list, + size_t member_list_entries, + const struct memb_ring_id *ring_id); + int (*sync_process) (void); + void (*sync_activate) (void); + void (*sync_abort) (void); + const char *name; +}; + +extern int sync_init ( + int (*sync_callbacks_retrieve) ( + int service_id, + struct sync_callbacks *callbacks), + void (*synchronization_completed) (void)); + +extern void sync_start ( + const unsigned int *member_list, + size_t member_list_entries, + const struct memb_ring_id *ring_id); + +extern void sync_save_transitional ( + const unsigned int *member_list, + size_t member_list_entries, + const struct memb_ring_id *ring_id); + +extern void sync_abort (void); + +extern void sync_memb_list_determine (const struct memb_ring_id *ring_id); + +extern void sync_memb_list_abort (void); + +#endif /* SYNC_H_DEFINED */ diff --git a/exec/timer.c b/exec/timer.c new file mode 100644 index 0000000..27383bd --- /dev/null +++ b/exec/timer.c @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2002-2006 MontaVista Software, Inc. + * Copyright (c) 2006-2010 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <config.h> + +#include "timer.h" +#include "main.h" +#include <qb/qbdefs.h> +#include <qb/qbutil.h> + +int corosync_timer_add_absolute ( + unsigned long long nanosec_from_epoch, + void *data, + void (*timer_fn) (void *data), + corosync_timer_handle_t *handle) +{ + uint64_t expire_time = nanosec_from_epoch - qb_util_nano_current_get(); + return qb_loop_timer_add(cs_poll_handle_get(), + QB_LOOP_MED, + expire_time, + data, + timer_fn, + handle); +} + +int corosync_timer_add_duration ( + unsigned long long nanosec_duration, + void *data, + void (*timer_fn) (void *data), + corosync_timer_handle_t *handle) +{ + return qb_loop_timer_add(cs_poll_handle_get(), + QB_LOOP_MED, + nanosec_duration, + data, + timer_fn, + handle); +} + +void corosync_timer_delete ( + corosync_timer_handle_t th) +{ + qb_loop_timer_del(cs_poll_handle_get(), th); +} + +unsigned long long corosync_timer_expire_time_get ( + corosync_timer_handle_t th) +{ + uint64_t expire; + + if (th == 0) { + return (0); + } + + expire = qb_loop_timer_expire_time_get(cs_poll_handle_get(), th); + + return (expire); +} + +unsigned long long cs_timer_time_get (void) +{ + return qb_util_nano_from_epoch_get(); +} + diff --git a/exec/timer.h b/exec/timer.h new file mode 100644 index 0000000..9d23868 --- /dev/null +++ b/exec/timer.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2006-2011 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef TIMER_H_DEFINED +#define TIMER_H_DEFINED + +#include <stdint.h> +#include <time.h> +#include <qb/qbloop.h> + +#ifndef TIMER_HANDLE_T +typedef qb_loop_timer_handle corosync_timer_handle_t; +#define TIMER_HANDLE_T 1 +#endif + +extern int corosync_timer_add_duration ( + unsigned long long nanosec_duration, + void *data, + void (*timer_fn) (void *data), + corosync_timer_handle_t *handle); + +extern int corosync_timer_add_absolute ( + unsigned long long nanoseconds_from_epoch, + void *data, + void (*timer_fn) (void *data), + corosync_timer_handle_t *handle); + +extern void corosync_timer_delete (corosync_timer_handle_t handle); + +extern unsigned long long corosync_timer_expire_time_get (corosync_timer_handle_t handle); + +extern unsigned long long cs_timer_time_get (void); + +#endif /* TIMER_H_DEFINED */ diff --git a/exec/totemconfig.c b/exec/totemconfig.c new file mode 100644 index 0000000..a6394a2 --- /dev/null +++ b/exec/totemconfig.c @@ -0,0 +1,2454 @@ +/* + * Copyright (c) 2002-2005 MontaVista Software, Inc. + * Copyright (c) 2006-2022 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + * Jan Friesse (jfriesse@redhat.com) + * Chrissie Caulfield (ccaulfie@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <config.h> + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <unistd.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <ifaddrs.h> +#include <netdb.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <sys/param.h> +#include <sys/utsname.h> + +#include <corosync/swab.h> +#include <qb/qblist.h> +#include <qb/qbdefs.h> +#include <libknet.h> +#include <corosync/totem/totem.h> +#include <corosync/config.h> +#include <corosync/logsys.h> +#include <corosync/icmap.h> + +#include "util.h" +#include "totemconfig.h" + +#define TOKEN_RETRANSMITS_BEFORE_LOSS_CONST 4 +#define TOKEN_TIMEOUT 3000 +#define TOKEN_WARNING 75 +#define TOKEN_COEFFICIENT 650 +#define JOIN_TIMEOUT 50 +#define MERGE_TIMEOUT 200 +#define DOWNCHECK_TIMEOUT 1000 +#define FAIL_TO_RECV_CONST 2500 +#define SEQNO_UNCHANGED_CONST 30 +#define MINIMUM_TIMEOUT (int)(1000/HZ)*3 +#define MINIMUM_TIMEOUT_HOLD (int)(MINIMUM_TIMEOUT * 0.8 - (1000/HZ)) +#define MAX_NETWORK_DELAY 50 +#define WINDOW_SIZE 50 +#define MAX_MESSAGES 17 +#define MISS_COUNT_CONST 5 +#define BLOCK_UNLISTED_IPS 1 +#define CANCEL_TOKEN_HOLD_ON_RETRANSMIT 0 +/* This constant is not used for knet */ +#define UDP_NETMTU 1500 + +/* Currently all but PONG_COUNT match the defaults in libknet.h */ +#define KNET_PING_INTERVAL 1000 +#define KNET_PING_TIMEOUT 2000 +#define KNET_PING_PRECISION 2048 +#define KNET_PONG_COUNT 2 +#define KNET_PMTUD_INTERVAL 30 +#define KNET_MTU 0 +#define KNET_DEFAULT_TRANSPORT KNET_TRANSPORT_UDP + +#define DEFAULT_PORT 5405 + +static char error_string_response[768]; + +static void add_totem_config_notification(struct totem_config *totem_config); + +static void *totem_get_param_by_name(struct totem_config *totem_config, const char *param_name) +{ + if (strcmp(param_name, "totem.token") == 0) + return &totem_config->token_timeout; + if (strcmp(param_name, "totem.token_warning") == 0) + return &totem_config->token_warning; + if (strcmp(param_name, "totem.token_retransmit") == 0) + return &totem_config->token_retransmit_timeout; + if (strcmp(param_name, "totem.hold") == 0) + return &totem_config->token_hold_timeout; + if (strcmp(param_name, "totem.token_retransmits_before_loss_const") == 0) + return &totem_config->token_retransmits_before_loss_const; + if (strcmp(param_name, "totem.join") == 0) + return &totem_config->join_timeout; + if (strcmp(param_name, "totem.send_join") == 0) + return &totem_config->send_join_timeout; + if (strcmp(param_name, "totem.consensus") == 0) + return &totem_config->consensus_timeout; + if (strcmp(param_name, "totem.merge") == 0) + return &totem_config->merge_timeout; + if (strcmp(param_name, "totem.downcheck") == 0) + return &totem_config->downcheck_timeout; + if (strcmp(param_name, "totem.fail_recv_const") == 0) + return &totem_config->fail_to_recv_const; + if (strcmp(param_name, "totem.seqno_unchanged_const") == 0) + return &totem_config->seqno_unchanged_const; + if (strcmp(param_name, "totem.heartbeat_failures_allowed") == 0) + return &totem_config->heartbeat_failures_allowed; + if (strcmp(param_name, "totem.max_network_delay") == 0) + return &totem_config->max_network_delay; + if (strcmp(param_name, "totem.window_size") == 0) + return &totem_config->window_size; + if (strcmp(param_name, "totem.max_messages") == 0) + return &totem_config->max_messages; + if (strcmp(param_name, "totem.miss_count_const") == 0) + return &totem_config->miss_count_const; + if (strcmp(param_name, "totem.knet_pmtud_interval") == 0) + return &totem_config->knet_pmtud_interval; + if (strcmp(param_name, "totem.knet_mtu") == 0) + return &totem_config->knet_mtu; + if (strcmp(param_name, "totem.knet_compression_threshold") == 0) + return &totem_config->knet_compression_threshold; + if (strcmp(param_name, "totem.knet_compression_level") == 0) + return &totem_config->knet_compression_level; + if (strcmp(param_name, "totem.knet_compression_model") == 0) + return totem_config->knet_compression_model; + if (strcmp(param_name, "totem.block_unlisted_ips") == 0) + return &totem_config->block_unlisted_ips; + if (strcmp(param_name, "totem.cancel_token_hold_on_retransmit") == 0) + return &totem_config->cancel_token_hold_on_retransmit; + + return NULL; +} + +/* + * Read key_name from icmap. If key is not found or key_name == delete_key or if allow_zero is false + * and readed value is zero, default value is used and stored into totem_config. + */ +static void totem_volatile_config_set_uint32_value (struct totem_config *totem_config, icmap_map_t map, + const char *key_name, const char *deleted_key, unsigned int default_value, + int allow_zero_value) +{ + char runtime_key_name[ICMAP_KEYNAME_MAXLEN]; + + if (icmap_get_uint32_r(map, key_name, totem_get_param_by_name(totem_config, key_name)) != CS_OK || + (deleted_key != NULL && strcmp(deleted_key, key_name) == 0) || + (!allow_zero_value && *(uint32_t *)totem_get_param_by_name(totem_config, key_name) == 0)) { + *(uint32_t *)totem_get_param_by_name(totem_config, key_name) = default_value; + } + + /* + * Store totem_config value to cmap runtime section + */ + if (strlen("runtime.config.") + strlen(key_name) >= ICMAP_KEYNAME_MAXLEN) { + /* + * This shouldn't happen + */ + return ; + } + + strcpy(runtime_key_name, "runtime.config."); + strcat(runtime_key_name, key_name); + + icmap_set_uint32_r(map, runtime_key_name, *(uint32_t *)totem_get_param_by_name(totem_config, key_name)); +} + +static void totem_volatile_config_set_int32_value (struct totem_config *totem_config, icmap_map_t map, + const char *key_name, const char *deleted_key, int default_value, + int allow_zero_value) +{ + char runtime_key_name[ICMAP_KEYNAME_MAXLEN]; + + if (icmap_get_int32_r(map, key_name, totem_get_param_by_name(totem_config, key_name)) != CS_OK || + (deleted_key != NULL && strcmp(deleted_key, key_name) == 0) || + (!allow_zero_value && *(int32_t *)totem_get_param_by_name(totem_config, key_name) == 0)) { + *(int32_t *)totem_get_param_by_name(totem_config, key_name) = default_value; + } + + /* + * Store totem_config value to cmap runtime section + */ + if (strlen("runtime.config.") + strlen(key_name) >= ICMAP_KEYNAME_MAXLEN) { + /* + * This shouldn't happen + */ + return ; + } + + strcpy(runtime_key_name, "runtime.config."); + strcat(runtime_key_name, key_name); + + icmap_set_int32_r(map, runtime_key_name, *(int32_t *)totem_get_param_by_name(totem_config, key_name)); +} + +static void totem_volatile_config_set_string_value (struct totem_config *totem_config, icmap_map_t map, + const char *key_name, const char *deleted_key, const char *default_value) +{ + char runtime_key_name[ICMAP_KEYNAME_MAXLEN]; + int res; + char *new_config_value; + const void *config_value; + + config_value = totem_get_param_by_name(totem_config, key_name); + + res = icmap_get_string_r(map, key_name, (char **)&new_config_value); + if (res != CS_OK || + (deleted_key != NULL && strcmp(deleted_key, key_name) == 0)) { + + /* Slightly pointless use of strncpy but it keeps coverity happy */ + strncpy((char *)config_value, default_value, CONFIG_STRING_LEN_MAX); + } else { + strncpy((char *)config_value, new_config_value, CONFIG_STRING_LEN_MAX); + } + if (res == CS_OK) { + free(new_config_value); + } + + /* + * Store totem_config value to cmap runtime section + */ + if (strlen("runtime.config.") + strlen(key_name) >= ICMAP_KEYNAME_MAXLEN) { + /* + * This shouldn't happen + */ + return ; + } + + strcpy(runtime_key_name, "runtime.config."); + strcat(runtime_key_name, key_name); + + (void)icmap_set_string_r(map, runtime_key_name, (char *)config_value); +} + +/* + * Read string value stored in key_name from icmap, use it as a boolean (yes/no) type, convert it + * to integer value (1/0) and store into totem_config. + * + * If key is not found or key_name == delete_key default value is used + * and stored into totem_config. + */ +static void totem_volatile_config_set_boolean_value (struct totem_config *totem_config, icmap_map_t map, + const char *key_name, const char *deleted_key, unsigned int default_value) +{ + char runtime_key_name[ICMAP_KEYNAME_MAXLEN]; + char *str; + int val; + + str = NULL; + val = default_value; + + if ((deleted_key != NULL && strcmp(deleted_key, key_name) == 0) || + (icmap_get_string_r(map, key_name, &str) != CS_OK)) { + /* + * Do nothing. str is NULL (icmap_get_string ether not called or + * not changed str). + */ + } else { + if (strcmp(str, "yes") == 0) { + val = 1; + } else if (strcmp(str, "no") == 0) { + val = 0; + } + free(str); + } + + /* + * Store totem_config value to cmap runtime section + */ + if (strlen("runtime.config.") + strlen(key_name) >= ICMAP_KEYNAME_MAXLEN) { + /* + * This shouldn't happen + */ + return ; + } + + strcpy(runtime_key_name, "runtime.config."); + strcat(runtime_key_name, key_name); + + *(uint32_t *)totem_get_param_by_name(totem_config, key_name) = val; + + icmap_set_uint32_r(map, runtime_key_name, val); +} + +/* + * Read and validate config values from cmap and store them into totem_config. If key doesn't exists, + * default value is stored. deleted_key is name of key beeing processed by delete operation + * from cmap. It is considered as non existing even if it can be read. Can be NULL. + */ +void totem_volatile_config_read (struct totem_config *totem_config, icmap_map_t temp_map, const char *deleted_key) +{ + uint32_t u32; + + totem_volatile_config_set_uint32_value(totem_config, temp_map, "totem.token_retransmits_before_loss_const", deleted_key, + TOKEN_RETRANSMITS_BEFORE_LOSS_CONST, 0); + + totem_volatile_config_set_uint32_value(totem_config, temp_map, "totem.token", deleted_key, TOKEN_TIMEOUT, 0); + + totem_volatile_config_set_uint32_value(totem_config, temp_map, "totem.token_warning", deleted_key, TOKEN_WARNING, 1); + + if (totem_config->interfaces[0].member_count > 2) { + u32 = TOKEN_COEFFICIENT; + icmap_get_uint32_r(temp_map, "totem.token_coefficient", &u32); + totem_config->token_timeout += (totem_config->interfaces[0].member_count - 2) * u32; + + /* + * Store totem_config value to cmap runtime section + */ + icmap_set_uint32_r(temp_map, "runtime.config.totem.token", totem_config->token_timeout); + } + + totem_volatile_config_set_uint32_value(totem_config, temp_map, "totem.max_network_delay", deleted_key, MAX_NETWORK_DELAY, 0); + + totem_volatile_config_set_uint32_value(totem_config, temp_map, "totem.window_size", deleted_key, WINDOW_SIZE, 0); + + totem_volatile_config_set_uint32_value(totem_config, temp_map, "totem.max_messages", deleted_key, MAX_MESSAGES, 0); + + totem_volatile_config_set_uint32_value(totem_config, temp_map, "totem.miss_count_const", deleted_key, MISS_COUNT_CONST, 0); + totem_volatile_config_set_uint32_value(totem_config, temp_map, "totem.knet_pmtud_interval", deleted_key, KNET_PMTUD_INTERVAL, 0); + totem_volatile_config_set_uint32_value(totem_config, temp_map, "totem.knet_mtu", deleted_key, KNET_MTU, 0); + + totem_volatile_config_set_uint32_value(totem_config, temp_map, "totem.token_retransmit", deleted_key, + (int)(totem_config->token_timeout / (totem_config->token_retransmits_before_loss_const + 0.2)), 0); + + totem_volatile_config_set_uint32_value(totem_config, temp_map, "totem.hold", deleted_key, + (int)(totem_config->token_retransmit_timeout * 0.8 - (1000/HZ)), 0); + + totem_volatile_config_set_uint32_value(totem_config, temp_map, "totem.join", deleted_key, JOIN_TIMEOUT, 0); + + totem_volatile_config_set_uint32_value(totem_config, temp_map, "totem.consensus", deleted_key, + (int)(float)(1.2 * totem_config->token_timeout), 0); + + totem_volatile_config_set_uint32_value(totem_config, temp_map, "totem.merge", deleted_key, MERGE_TIMEOUT, 0); + + totem_volatile_config_set_uint32_value(totem_config, temp_map, "totem.downcheck", deleted_key, DOWNCHECK_TIMEOUT, 0); + + totem_volatile_config_set_uint32_value(totem_config, temp_map, "totem.fail_recv_const", deleted_key, FAIL_TO_RECV_CONST, 0); + + totem_volatile_config_set_uint32_value(totem_config, temp_map, "totem.seqno_unchanged_const", deleted_key, + SEQNO_UNCHANGED_CONST, 0); + + totem_volatile_config_set_uint32_value(totem_config, temp_map, "totem.send_join", deleted_key, 0, 1); + + totem_volatile_config_set_uint32_value(totem_config, temp_map, "totem.heartbeat_failures_allowed", deleted_key, 0, 1); + + totem_volatile_config_set_uint32_value(totem_config, temp_map, "totem.knet_compression_threshold", deleted_key, 0, 1); + + totem_volatile_config_set_int32_value(totem_config, temp_map, "totem.knet_compression_level", deleted_key, 0, 1); + + totem_volatile_config_set_string_value(totem_config, temp_map, "totem.knet_compression_model", deleted_key, "none"); + + totem_volatile_config_set_boolean_value(totem_config, temp_map, "totem.block_unlisted_ips", deleted_key, + BLOCK_UNLISTED_IPS); + + totem_volatile_config_set_boolean_value(totem_config, temp_map, "totem.cancel_token_hold_on_retransmit", + deleted_key, CANCEL_TOKEN_HOLD_ON_RETRANSMIT); +} + +int totem_volatile_config_validate ( + struct totem_config *totem_config, + icmap_map_t temp_map, + const char **error_string) +{ + /* Static just to keep them off the stack */ + static char local_error_reason[512]; + static char addr_str_buf[INET6_ADDRSTRLEN]; + const char *error_reason = local_error_reason; + char name_key[ICMAP_KEYNAME_MAXLEN]; + char *name_str; + int i, j, num_configured, members; + uint32_t tmp_config_value; + + if (totem_config->max_network_delay < MINIMUM_TIMEOUT) { + snprintf (local_error_reason, sizeof(local_error_reason), + "The max_network_delay parameter (%d ms) may not be less than (%d ms).", + totem_config->max_network_delay, MINIMUM_TIMEOUT); + goto parse_error; + } + + if (totem_config->token_timeout < MINIMUM_TIMEOUT) { + snprintf (local_error_reason, sizeof(local_error_reason), + "The token timeout parameter (%d ms) may not be less than (%d ms).", + totem_config->token_timeout, MINIMUM_TIMEOUT); + goto parse_error; + } + + if (totem_config->token_warning > 100 || totem_config->token_warning < 0) { + snprintf (local_error_reason, sizeof(local_error_reason), + "The token warning parameter (%d%%) must be between 0 (disabled) and 100.", + totem_config->token_warning); + goto parse_error; + } + + if (totem_config->token_retransmit_timeout < MINIMUM_TIMEOUT) { + if (icmap_get_uint32_r(temp_map, "totem.token_retransmit", &tmp_config_value) == CS_OK) { + snprintf (local_error_reason, sizeof(local_error_reason), + "The token retransmit timeout parameter (%d ms) may not be less than (%d ms).", + totem_config->token_retransmit_timeout, MINIMUM_TIMEOUT); + goto parse_error; + } else { + snprintf (local_error_reason, sizeof(local_error_reason), + "Not appropriate token or token_retransmits_before_loss_const value set"); + goto parse_error; + } + } + + if (totem_config->token_hold_timeout < MINIMUM_TIMEOUT_HOLD) { + snprintf (local_error_reason, sizeof(local_error_reason), + "The token hold timeout parameter (%d ms) may not be less than (%d ms).", + totem_config->token_hold_timeout, MINIMUM_TIMEOUT_HOLD); + goto parse_error; + } + + if (totem_config->join_timeout < MINIMUM_TIMEOUT) { + snprintf (local_error_reason, sizeof(local_error_reason), + "The join timeout parameter (%d ms) may not be less than (%d ms).", + totem_config->join_timeout, MINIMUM_TIMEOUT); + goto parse_error; + } + + if (totem_config->consensus_timeout < MINIMUM_TIMEOUT) { + snprintf (local_error_reason, sizeof(local_error_reason), + "The consensus timeout parameter (%d ms) may not be less than (%d ms).", + totem_config->consensus_timeout, MINIMUM_TIMEOUT); + goto parse_error; + } + + if (totem_config->consensus_timeout < totem_config->join_timeout) { + snprintf (local_error_reason, sizeof(local_error_reason), + "The consensus timeout parameter (%d ms) may not be less than join timeout (%d ms).", + totem_config->consensus_timeout, totem_config->join_timeout); + goto parse_error; + } + + if (totem_config->merge_timeout < MINIMUM_TIMEOUT) { + snprintf (local_error_reason, sizeof(local_error_reason), + "The merge timeout parameter (%d ms) may not be less than (%d ms).", + totem_config->merge_timeout, MINIMUM_TIMEOUT); + goto parse_error; + } + + if (totem_config->downcheck_timeout < MINIMUM_TIMEOUT) { + snprintf (local_error_reason, sizeof(local_error_reason), + "The downcheck timeout parameter (%d ms) may not be less than (%d ms).", + totem_config->downcheck_timeout, MINIMUM_TIMEOUT); + goto parse_error; + } + + /* Check that we have nodelist 'name' if there is more than one link */ + num_configured = 0; + members = -1; + for (i = 0; i < INTERFACE_MAX; i++) { + if (totem_config->interfaces[i].configured) { + if (num_configured == 0) { + members = totem_config->interfaces[i].member_count; + } + num_configured++; + } + } + + if (num_configured > 1) { + /* + * This assert is here just to make compiler happy + */ + assert(members != -1); + for (i=0; i < members; i++) { + snprintf(name_key, sizeof(name_key), "nodelist.node.%d.name", i); + + if (icmap_get_string_r(temp_map, name_key, &name_str) != CS_OK) { + snprintf (local_error_reason, sizeof(local_error_reason), + "for a multi-link configuration, all nodes must have a 'name' attribute"); + goto parse_error; + } + + free(name_str); + } + + for (i=0; i < INTERFACE_MAX; i++) { + if (!totem_config->interfaces[i].configured) { + continue; + } + if (totem_config->interfaces[i].member_count != members) { + snprintf (local_error_reason, sizeof(local_error_reason), + "Not all nodes have the same number of links"); + goto parse_error; + } + } + } + + /* Verify that all nodes on the same link have the same IP family */ + for (i=0; i < INTERFACE_MAX; i++) { + for (j=1; j<totem_config->interfaces[i].member_count; j++) { + if (totem_config->interfaces[i].configured) { + if (totem_config->interfaces[i].member_list[j].family != + totem_config->interfaces[i].member_list[0].family) { + memcpy(addr_str_buf, + totemip_print(&(totem_config->interfaces[i].member_list[j])), + sizeof(addr_str_buf)); + + snprintf (local_error_reason, sizeof(local_error_reason), + "Nodes for link %d have different IP families " + "(compared %s with %s)", i, + addr_str_buf, + totemip_print(&(totem_config->interfaces[i].member_list[0]))); + goto parse_error; + } + } + } + } + + return 0; + +parse_error: + snprintf (error_string_response, sizeof(error_string_response), + "parse error in config: %s\n", error_reason); + *error_string = error_string_response; + return (-1); + +} + +static int totem_get_crypto(struct totem_config *totem_config, icmap_map_t map, const char **error_string) +{ + char *str; + const char *tmp_cipher; + const char *tmp_hash; + const char *tmp_model; + char *crypto_model_str; + int res = 0; + + tmp_hash = "none"; + tmp_cipher = "none"; + tmp_model = "none"; + + crypto_model_str = NULL; + if (icmap_get_string_r(map, "totem.crypto_model", &crypto_model_str) == CS_OK) { + tmp_model = crypto_model_str; + } else { + tmp_model = "nss"; + } + + if (icmap_get_string_r(map, "totem.secauth", &str) == CS_OK) { + if (strcmp(str, "on") == 0) { + tmp_cipher = "aes256"; + tmp_hash = "sha256"; + } + free(str); + } + + if (icmap_get_string_r(map, "totem.crypto_cipher", &str) == CS_OK) { + if (strcmp(str, "none") == 0) { + tmp_cipher = "none"; + } + if (strcmp(str, "aes256") == 0) { + tmp_cipher = "aes256"; + } + if (strcmp(str, "aes192") == 0) { + tmp_cipher = "aes192"; + } + if (strcmp(str, "aes128") == 0) { + tmp_cipher = "aes128"; + } + free(str); + } + + if (icmap_get_string_r(map, "totem.crypto_hash", &str) == CS_OK) { + if (strcmp(str, "none") == 0) { + tmp_hash = "none"; + } + if (strcmp(str, "md5") == 0) { + tmp_hash = "md5"; + } + if (strcmp(str, "sha1") == 0) { + tmp_hash = "sha1"; + } + if (strcmp(str, "sha256") == 0) { + tmp_hash = "sha256"; + } + if (strcmp(str, "sha384") == 0) { + tmp_hash = "sha384"; + } + if (strcmp(str, "sha512") == 0) { + tmp_hash = "sha512"; + } + free(str); + } + + if ((strcmp(tmp_cipher, "none") != 0) && + (strcmp(tmp_hash, "none") == 0)) { + *error_string = "crypto_cipher requires crypto_hash with value other than none"; + res = -1; + + goto out_free_crypto_model_str; + } + + if (strcmp(tmp_model, "none") == 0) { + /* + * Shouldn't happen because it is handled by coroparse + */ + *error_string = "invalid crypto_model"; + res = -1; + + goto out_free_crypto_model_str; + } + + if (strcmp(tmp_cipher, totem_config->crypto_cipher_type) || + strcmp(tmp_hash, totem_config->crypto_hash_type) || + strcmp(tmp_model, totem_config->crypto_model)) { + totem_config->crypto_changed = 1; + } + + strncpy(totem_config->crypto_cipher_type, tmp_cipher, CONFIG_STRING_LEN_MAX - 1); + totem_config->crypto_cipher_type[CONFIG_STRING_LEN_MAX - 1] = '\0'; + + strncpy(totem_config->crypto_hash_type, tmp_hash, CONFIG_STRING_LEN_MAX - 1); + totem_config->crypto_hash_type[CONFIG_STRING_LEN_MAX - 1] = '\0'; + + strncpy(totem_config->crypto_model, tmp_model, CONFIG_STRING_LEN_MAX - 1); + totem_config->crypto_model[CONFIG_STRING_LEN_MAX - 1] = '\0'; + +out_free_crypto_model_str: + free(crypto_model_str); + + return (res); +} + +static int nodelist_byname(icmap_map_t map, const char *find_name, int strip_domain) +{ + icmap_iter_t iter; + const char *iter_key; + char name_str[ICMAP_KEYNAME_MAXLEN]; + int res = 0; + unsigned int node_pos; + char *name; + unsigned int namelen; + + iter = icmap_iter_init_r(map, "nodelist.node."); + while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { + res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, name_str); + if (res != 2) { + continue; + } + /* ring0_addr is allowed as a fallback */ + if (strcmp(name_str, "name") && strcmp(name_str, "ring0_addr")) { + continue; + } + if (icmap_get_string_r(map, iter_key, &name) != CS_OK) { + continue; + } + namelen = strlen(name); + + if (strip_domain) { + char *dot; + dot = strchr(name, '.'); + if (dot) { + namelen = dot - name; + } + } + if (strncmp(find_name, name, namelen) == 0 && + strlen(find_name) == namelen) { + icmap_iter_finalize(iter); + return node_pos; + } + } + icmap_iter_finalize(iter); + return -1; +} + +/* Compare two addresses - only address part (sin_addr/sin6_addr) is checked */ +static int ipaddr_equal(const struct sockaddr *addr1, const struct sockaddr *addr2) +{ + int addrlen = 0; + const void *addr1p, *addr2p; + + if (addr1->sa_family != addr2->sa_family) + return 0; + + switch (addr1->sa_family) { + case AF_INET: + addrlen = sizeof(struct in_addr); + addr1p = &((struct sockaddr_in *)addr1)->sin_addr; + addr2p = &((struct sockaddr_in *)addr2)->sin_addr; + break; + case AF_INET6: + addrlen = sizeof(struct in6_addr); + addr1p = &((struct sockaddr_in6 *)addr1)->sin6_addr; + addr2p = &((struct sockaddr_in6 *)addr2)->sin6_addr; + break; + default: + assert(0); + } + + return (memcmp(addr1p, addr2p, addrlen) == 0); +} + + +/* Finds the local node and returns its position in the nodelist. + * Uses nodelist.local_node_pos as a cache to save effort + */ +static int find_local_node(icmap_map_t map, int use_cache) +{ + char nodename2[PATH_MAX]; + char name_str[ICMAP_KEYNAME_MAXLEN]; + icmap_iter_t iter; + const char *iter_key; + unsigned int cached_pos; + char *dot = NULL; + const char *node; + struct ifaddrs *ifa, *ifa_list; + struct sockaddr *sa; + int found = 0; + int node_pos = -1; + int res; + struct utsname utsname; + + /* Check for cached value first */ + if (use_cache) { + if (icmap_get_uint32("nodelist.local_node_pos", &cached_pos) == CS_OK) { + return cached_pos; + } + } + + res = uname(&utsname); + if (res < 0) { + return -1; + } + node = utsname.nodename; + + /* 1. Exact match */ + node_pos = nodelist_byname(map, node, 0); + if (node_pos > -1) { + found = 1; + goto ret_found; + } + + /* 2. Try to match with increasingly more + * specific versions of it + */ + strcpy(nodename2, node); + dot = strrchr(nodename2, '.'); + while (dot) { + *dot = '\0'; + + node_pos = nodelist_byname(map, nodename2, 0); + if (node_pos > -1) { + found = 1; + goto ret_found; + } + dot = strrchr(nodename2, '.'); + } + + node_pos = nodelist_byname(map, nodename2, 1); + if (node_pos > -1) { + found = 1; + goto ret_found; + } + + /* + * The corosync.conf name may not be related to uname at all, + * they may match a hostname on some network interface. + */ + if (getifaddrs(&ifa_list)) + return -1; + + for (ifa = ifa_list; ifa; ifa = ifa->ifa_next) { + socklen_t salen = 0; + + /* Restore this */ + strcpy(nodename2, node); + sa = ifa->ifa_addr; + if (!sa) { + continue; + } + if (sa->sa_family != AF_INET && sa->sa_family != AF_INET6) { + continue; + } + + if (sa->sa_family == AF_INET) { + salen = sizeof(struct sockaddr_in); + } + if (sa->sa_family == AF_INET6) { + salen = sizeof(struct sockaddr_in6); + } + + if (getnameinfo(sa, salen, + nodename2, sizeof(nodename2), + NULL, 0, 0) == 0) { + + node_pos = nodelist_byname(map, nodename2, 0); + if (node_pos > -1) { + found = 1; + goto out; + } + + /* Truncate this name and try again */ + dot = strchr(nodename2, '.'); + if (dot) { + *dot = '\0'; + + node_pos = nodelist_byname(map, nodename2, 0); + if (node_pos > -1) { + found = 1; + goto out; + } + } + } + + /* See if it's the IP address that's in corosync.conf */ + if (getnameinfo(sa, sizeof(*sa), + nodename2, sizeof(nodename2), + NULL, 0, NI_NUMERICHOST)) + continue; + + node_pos = nodelist_byname(map, nodename2, 0); + if (node_pos > -1) { + found = 1; + goto out; + } + } + + out: + if (found) { + freeifaddrs(ifa_list); + goto ret_found; + } + + /* + * This section covers the usecase where the nodename specified in cluster.conf + * is an alias specified in /etc/hosts. For example: + * <ipaddr> hostname alias1 alias2 + * and <clusternode name="alias2"> + * the above calls use uname and getnameinfo does not return aliases. + * here we take the name specified in cluster.conf, resolve it to an address + * and then compare against all known local ip addresses. + * if we have a match, we found our nodename. In theory this chunk of code + * could replace all the checks above, but let's avoid any possible regressions + * and use it as last. + */ + + iter = icmap_iter_init_r(map, "nodelist.node."); + while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { + char *dbnodename = NULL; + struct addrinfo hints; + struct addrinfo *result = NULL, *rp = NULL; + + res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, name_str); + if (res != 2) { + continue; + } + /* 'ring0_addr' is allowed as a fallback, but 'name' will be found first + * because the names are in alpha order. + */ + if (strcmp(name_str, "name") && strcmp(name_str, "ring0_addr")) { + continue; + } + if (icmap_get_string_r(map, iter_key, &dbnodename) != CS_OK) { + continue; + } + + memset(&hints, 0, sizeof(struct addrinfo)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_DGRAM; + hints.ai_flags = 0; + hints.ai_protocol = IPPROTO_UDP; + + if (getaddrinfo(dbnodename, NULL, &hints, &result)) { + continue; + } + + for (rp = result; rp != NULL; rp = rp->ai_next) { + for (ifa = ifa_list; ifa; ifa = ifa->ifa_next) { + if (ifa->ifa_addr && + ipaddr_equal(rp->ai_addr, ifa->ifa_addr)) { + freeaddrinfo(result); + found = 1; + goto out2; + } + } + } + + freeaddrinfo(result); + } +out2: + icmap_iter_finalize(iter); + freeifaddrs(ifa_list); + +ret_found: + if (found) { + res = icmap_set_uint32_r(map, "nodelist.local_node_pos", node_pos); + } + + return node_pos; +} + +static enum totem_ip_version_enum totem_config_get_ip_version(struct totem_config *totem_config) +{ + enum totem_ip_version_enum res; + char *str; + + res = TOTEM_IP_VERSION_6_4; + + if (totem_config->transport_number == TOTEM_TRANSPORT_UDP) { + res = TOTEM_IP_VERSION_4; + } + + if (icmap_get_string("totem.ip_version", &str) == CS_OK) { + if (strcmp(str, "ipv4") == 0) { + res = TOTEM_IP_VERSION_4; + } + if (strcmp(str, "ipv6") == 0) { + res = TOTEM_IP_VERSION_6; + } + if (strcmp(str, "ipv6-4") == 0) { + res = TOTEM_IP_VERSION_6_4; + } + if (strcmp(str, "ipv4-6") == 0) { + res = TOTEM_IP_VERSION_4_6; + } + free(str); + } + + return (res); +} + +static uint16_t generate_cluster_id (const char *cluster_name) +{ + int i; + int value = 0; + + for (i = 0; i < strlen(cluster_name); i++) { + value <<= 1; + value += cluster_name[i]; + } + + return (value & 0xFFFF); +} + +static int get_cluster_mcast_addr ( + const char *cluster_name, + unsigned int linknumber, + enum totem_ip_version_enum ip_version, + struct totem_ip_address *res) +{ + uint16_t clusterid; + char addr[INET6_ADDRSTRLEN + 1]; + int err; + + if (cluster_name == NULL) { + return (-1); + } + + clusterid = generate_cluster_id(cluster_name) + linknumber; + memset (res, 0, sizeof(*res)); + + switch (ip_version) { + case TOTEM_IP_VERSION_4: + case TOTEM_IP_VERSION_4_6: + snprintf(addr, sizeof(addr), "239.192.%d.%d", clusterid >> 8, clusterid % 0xFF); + break; + case TOTEM_IP_VERSION_6: + case TOTEM_IP_VERSION_6_4: + snprintf(addr, sizeof(addr), "ff15::%x", clusterid); + break; + default: + /* + * Unknown family + */ + return (-1); + } + + err = totemip_parse (res, addr, ip_version); + + return (err); +} + +static unsigned int generate_nodeid( + struct totem_config *totem_config, + char *addr) +{ + unsigned int nodeid; + struct totem_ip_address totemip; + + /* AF_INET hard-coded here because auto-generated nodeids + are only for IPv4 */ + if (totemip_parse(&totemip, addr, TOTEM_IP_VERSION_4) != 0) + return -1; + + memcpy (&nodeid, &totemip.addr, sizeof (unsigned int)); + +#if __BYTE_ORDER == __LITTLE_ENDIAN + nodeid = swab32 (nodeid); +#endif + + if (totem_config->clear_node_high_bit) { + nodeid &= 0x7FFFFFFF; + } + return nodeid; +} + +static int check_for_duplicate_nodeids( + struct totem_config *totem_config, + const char **error_string) +{ + icmap_iter_t iter; + icmap_iter_t subiter; + const char *iter_key; + int res = 0; + int retval = 0; + char tmp_key[ICMAP_KEYNAME_MAXLEN]; + char *ring0_addr=NULL; + char *ring0_addr1=NULL; + unsigned int node_pos; + unsigned int node_pos1; + unsigned int last_node_pos = -1; + unsigned int nodeid; + unsigned int nodeid1; + int autogenerated; + + iter = icmap_iter_init("nodelist.node."); + while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { + res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, tmp_key); + if (res != 2) { + continue; + } + + /* + * This relies on the fact the icmap keys are always returned in order + * so all of the keys for a node will be grouped together. We're basically + * just running the code below once for each node. + */ + if (last_node_pos == node_pos) { + continue; + } + last_node_pos = node_pos; + + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.nodeid", node_pos); + autogenerated = 0; + + /* Generated nodeids are only allowed for UDP/UDPU so ring0_addr is valid here */ + if (icmap_get_uint32(tmp_key, &nodeid) != CS_OK) { + + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.ring0_addr", node_pos); + if (icmap_get_string(tmp_key, &ring0_addr) != CS_OK) { + continue; + } + + /* Generate nodeid so we can check that auto-generated nodeids don't clash either */ + nodeid = generate_nodeid(totem_config, ring0_addr); + if (nodeid == -1) { + continue; + } + autogenerated = 1; + } + + node_pos1 = 0; + subiter = icmap_iter_init("nodelist.node."); + while (((iter_key = icmap_iter_next(subiter, NULL, NULL)) != NULL) && (node_pos1 < node_pos)) { + res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos1, tmp_key); + if ((res != 2) || (node_pos1 >= node_pos)) { + continue; + } + + if (strcmp(tmp_key, "nodeid") != 0) { + continue; + } + + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.nodeid", node_pos1); + if (icmap_get_uint32(tmp_key, &nodeid1) != CS_OK) { + + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.ring0_addr", node_pos1); + if (icmap_get_string(tmp_key, &ring0_addr1) != CS_OK) { + continue; + } + nodeid1 = generate_nodeid(totem_config, ring0_addr1); + if (nodeid1 == -1) { + continue; + } + } + + if (nodeid == nodeid1) { + retval = -1; + snprintf (error_string_response, sizeof(error_string_response), + "Nodeid %u%s%s%s appears twice in corosync.conf", nodeid, + autogenerated?"(autogenerated from ":"", + autogenerated?ring0_addr:"", + autogenerated?")":""); + *error_string = error_string_response; + break; + } + } + icmap_iter_finalize(subiter); + } + icmap_iter_finalize(iter); + return retval; +} + + +/* + * This needs to be done last of all. It would be nice to do it when reading the + * interface params, but the totem params need to have them to be read first. We + * need both, so this is a way round that circular dependancy. + */ +static void calc_knet_ping_timers(struct totem_config *totem_config) +{ + char runtime_key_name[ICMAP_KEYNAME_MAXLEN]; + int interface; + + for (interface = 0; interface < INTERFACE_MAX; interface++) { + + if (totem_config->interfaces[interface].configured) { + if (!totem_config->interfaces[interface].knet_pong_count) { + totem_config->interfaces[interface].knet_pong_count = KNET_PONG_COUNT; + } + if (!totem_config->interfaces[interface].knet_ping_timeout) { + totem_config->interfaces[interface].knet_ping_timeout = + totem_config->token_timeout / totem_config->interfaces[interface].knet_pong_count; + } + snprintf(runtime_key_name, sizeof(runtime_key_name), + "runtime.config.totem.interface.%d.knet_ping_timeout", interface); + icmap_set_uint32(runtime_key_name, totem_config->interfaces[interface].knet_ping_timeout); + + if (!totem_config->interfaces[interface].knet_ping_interval) { + totem_config->interfaces[interface].knet_ping_interval = + totem_config->token_timeout / (totem_config->interfaces[interface].knet_pong_count * 2); + } + snprintf(runtime_key_name, sizeof(runtime_key_name), + "runtime.config.totem.interface.%d.knet_ping_interval", interface); + icmap_set_uint32(runtime_key_name, totem_config->interfaces[interface].knet_ping_interval); + } + } +} + +/* + * Compute difference between two set of totem interface arrays and commit it. + * set1 and set2 + * are changed so for same ring, ip existing in both set1 and set2 are cleared + * (set to 0), and ips which are only in set1 or set2 remains untouched. + * totempg_node_add/remove is called. + */ +static int compute_and_set_totempg_interfaces(struct totem_interface *set1, + struct totem_interface *set2) +{ + int ring_no, set1_pos, set2_pos; + struct totem_ip_address empty_ip_address; + int res = 0; + + memset(&empty_ip_address, 0, sizeof(empty_ip_address)); + + for (ring_no = 0; ring_no < INTERFACE_MAX; ring_no++) { + if (!set1[ring_no].configured && !set2[ring_no].configured) { + continue; + } + + for (set1_pos = 0; set1_pos < set1[ring_no].member_count; set1_pos++) { + for (set2_pos = 0; set2_pos < set2[ring_no].member_count; set2_pos++) { + /* + * For current ring_no remove all set1 items existing + * in set2 + */ + if (memcmp(&set1[ring_no].member_list[set1_pos], + &set2[ring_no].member_list[set2_pos], + sizeof(struct totem_ip_address)) == 0) { + memset(&set1[ring_no].member_list[set1_pos], 0, + sizeof(struct totem_ip_address)); + memset(&set2[ring_no].member_list[set2_pos], 0, + sizeof(struct totem_ip_address)); + } + } + } + } + + for (ring_no = 0; ring_no < INTERFACE_MAX; ring_no++) { + for (set1_pos = 0; set1_pos < set1[ring_no].member_count; set1_pos++) { + /* + * All items which remain in set1 and don't exist in set2 any more + * have to be removed. + */ + if (memcmp(&set1[ring_no].member_list[set1_pos], &empty_ip_address, sizeof(empty_ip_address)) != 0) { + log_printf(LOGSYS_LEVEL_DEBUG, + "removing dynamic member %s for ring %u", + totemip_print(&set1[ring_no].member_list[set1_pos]), + ring_no); + + totempg_member_remove(&set1[ring_no].member_list[set1_pos], ring_no); + } + } + if (!set2[ring_no].configured) { + continue; + } + for (set2_pos = 0; set2_pos < set2[ring_no].member_count; set2_pos++) { + /* + * All items which remain in set2 and don't exist in set1 are new nodes + * and have to be added. + */ + if (memcmp(&set2[ring_no].member_list[set2_pos], &empty_ip_address, sizeof(empty_ip_address)) != 0) { + log_printf(LOGSYS_LEVEL_DEBUG, + "adding dynamic member %s for ring %u", + totemip_print(&set2[ring_no].member_list[set2_pos]), + ring_no); + + if (totempg_member_add(&set2[ring_no].member_list[set2_pos], ring_no)) { + res = -1; + } + } + } + } + return res; +} + +/* + * Configure parameters for links + */ +static void configure_link_params(struct totem_config *totem_config, icmap_map_t map) +{ + int i; + char tmp_key[ICMAP_KEYNAME_MAXLEN]; + char *addr_string; + int err; + int local_node_pos = find_local_node(map, 0); + + for (i = 0; i<INTERFACE_MAX; i++) { + if (!totem_config->interfaces[i].configured) { + continue; + } + + log_printf(LOGSYS_LEVEL_DEBUG, "Configuring link %d params\n", i); + + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.ring%u_addr", local_node_pos, i); + if (icmap_get_string_r(map, tmp_key, &addr_string) != CS_OK) { + continue; + } + + err = totemip_parse(&totem_config->interfaces[i].local_ip, addr_string, totem_config->ip_version); + if (err != 0) { + continue; + } + totem_config->interfaces[i].local_ip.nodeid = totem_config->node_id; + + /* In case this is a new link, fill in the defaults if there was no interface{} section for it */ + if (!totem_config->interfaces[i].knet_link_priority) + totem_config->interfaces[i].knet_link_priority = 1; + + /* knet_ping_interval & knet_ping_timeout are set later once we know all the other params */ + if (!totem_config->interfaces[i].knet_ping_precision) + totem_config->interfaces[i].knet_ping_precision = KNET_PING_PRECISION; + if (!totem_config->interfaces[i].knet_pong_count) + totem_config->interfaces[i].knet_pong_count = KNET_PONG_COUNT; + if (!totem_config->interfaces[i].knet_transport) + totem_config->interfaces[i].knet_transport = KNET_TRANSPORT_UDP; + if (!totem_config->interfaces[i].ip_port) + totem_config->interfaces[i].ip_port = DEFAULT_PORT + i; + } +} + + +static void configure_totem_links(struct totem_config *totem_config, icmap_map_t map) +{ + int i; + + for (i = 0; i<INTERFACE_MAX; i++) { + if (!totem_config->interfaces[i].configured) { + continue; + } + + log_printf(LOGSYS_LEVEL_INFO, "Configuring link %d\n", i); + + totempg_iface_set(&totem_config->interfaces[i].local_ip, totem_config->interfaces[i].ip_port, i); + } +} + +/* Check for differences in config that can't be done on-the-fly and print an error */ +static int check_things_have_not_changed(struct totem_config *totem_config, const char **error_string) +{ + int i,j,k; + const char *ip_str; + char addr_buf[INET6_ADDRSTRLEN]; + int changed = 0; + + for (i = 0; i<INTERFACE_MAX; i++) { + if (totem_config->interfaces[i].configured && + totem_config->orig_interfaces[i].configured) { + if (totem_config->interfaces[i].knet_transport != + totem_config->orig_interfaces[i].knet_transport) { + log_printf(LOGSYS_LEVEL_ERROR, + "New config has different knet transport for link %d. Internal value was NOT changed.\n", i); + changed = 1; + } + + /* Check each nodeid in the new configuration and make sure its IP address on this link has not changed */ + for (j=0; j < totem_config->interfaces[i].member_count; j++) { + for (k=0; k < totem_config->orig_interfaces[i].member_count; k++) { + + if (totem_config->interfaces[i].member_list[j].nodeid == + totem_config->orig_interfaces[i].member_list[k].nodeid) { + + /* Found our nodeid - check the IP address */ + if (memcmp(&totem_config->interfaces[i].member_list[j], + &totem_config->orig_interfaces[i].member_list[k], + sizeof(struct totem_ip_address))) { + + ip_str = totemip_print(&totem_config->orig_interfaces[i].member_list[k]); + + /* if ip_str is NULL then the old address was invalid and is allowed to change */ + if (ip_str) { + strncpy(addr_buf, ip_str, sizeof(addr_buf)); + addr_buf[sizeof(addr_buf) - 1] = '\0'; + log_printf(LOGSYS_LEVEL_ERROR, + "new config has different address for link %d (addr changed from %s to %s). Internal value was NOT changed.\n", + i, addr_buf, totemip_print(&totem_config->interfaces[i].member_list[j])); + changed = 1; + } + } + } + } + } + } + } + + if (changed) { + snprintf (error_string_response, sizeof(error_string_response), + "To reconfigure an interface it must be deleted and recreated. A working interface needs to be available to corosync at all times"); + *error_string = error_string_response; + return -1; + } + return 0; +} + + +static int put_nodelist_members_to_config(struct totem_config *totem_config, icmap_map_t map, + int reload, const char **error_string) +{ + icmap_iter_t iter, iter2; + const char *iter_key, *iter_key2; + int res = 0; + unsigned int node_pos; + char tmp_key[ICMAP_KEYNAME_MAXLEN]; + char tmp_key2[ICMAP_KEYNAME_MAXLEN]; + char *node_addr_str; + int member_count; + unsigned int linknumber = 0; + int i, j; + int last_node_pos = -1; + + /* Clear out nodelist so we can put the new one in if needed */ + for (i = 0; i < INTERFACE_MAX; i++) { + for (j = 0; j < PROCESSOR_COUNT_MAX; j++) { + memset(&totem_config->interfaces[i].member_list[j], 0, sizeof(struct totem_ip_address)); + } + totem_config->interfaces[i].member_count = 0; + } + + iter = icmap_iter_init_r(map, "nodelist.node."); + while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { + res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, tmp_key); + if (res != 2) { + continue; + } + /* If it's the same as the last node_pos then skip it */ + if (node_pos == last_node_pos) { + continue; + } + last_node_pos = node_pos; + + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.", node_pos); + iter2 = icmap_iter_init_r(map, tmp_key); + while ((iter_key2 = icmap_iter_next(iter2, NULL, NULL)) != NULL) { + unsigned int nodeid; + char *str; + + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.nodeid", node_pos); + if (icmap_get_uint32_r(map, tmp_key, &nodeid) != CS_OK) { + nodeid = 0; + } + + res = sscanf(iter_key2, "nodelist.node.%u.ring%u%s", &node_pos, &linknumber, tmp_key2); + if (res != 3 || strcmp(tmp_key2, "_addr") != 0) { + continue; + } + if (linknumber >= INTERFACE_MAX) { + snprintf (error_string_response, sizeof(error_string_response), + "parse error in config: interface ring number %u is bigger than allowed maximum %u\n", + linknumber, INTERFACE_MAX - 1); + *error_string = error_string_response; + + icmap_iter_finalize(iter2); + icmap_iter_finalize(iter); + return (-1); + } + + if (icmap_get_string_r(map, iter_key2, &node_addr_str) != CS_OK) { + continue; + } + + /* Generate nodeids if they are not provided and transport is UDP/U */ + if (!nodeid && + (totem_config->transport_number == TOTEM_TRANSPORT_UDP || + totem_config->transport_number == TOTEM_TRANSPORT_UDPU)) { + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.ring0_addr", node_pos); + if (icmap_get_string_r(map, tmp_key, &str) == CS_OK) { + nodeid = generate_nodeid(totem_config, str); + if (nodeid == -1) { + sprintf(error_string_response, + "An IPV6 network requires that a node ID be specified " + "for address '%s'.", node_addr_str); + *error_string = error_string_response; + free(str); + + return (-1); + } + + log_printf(LOGSYS_LEVEL_DEBUG, + "Generated nodeid = " CS_PRI_NODE_ID " for %s", nodeid, str); + + free(str); + /* + * Put nodeid back to nodelist to make cfgtool work + */ + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.nodeid", node_pos); + /* + * Not critical + */ + (void)icmap_set_uint32_r(map, tmp_key, nodeid); + } + } + + if (!nodeid && totem_config->transport_number == TOTEM_TRANSPORT_KNET) { + sprintf(error_string_response, + "Knet requires an explicit nodeid to be specified " + "for address '%s'.", node_addr_str); + *error_string = error_string_response; + + return (-1); + } + + if (totem_config->transport_number == TOTEM_TRANSPORT_KNET && nodeid >= KNET_MAX_HOST) { + sprintf(error_string_response, + "Knet requires nodeid to be less than %u " + "for address '%s'.", KNET_MAX_HOST, node_addr_str); + *error_string = error_string_response; + + return (-1); + } + + member_count = totem_config->interfaces[linknumber].member_count; + res = totemip_parse(&totem_config->interfaces[linknumber].member_list[member_count], + node_addr_str, totem_config->ip_version); + if (res == 0) { + totem_config->interfaces[linknumber].member_list[member_count].nodeid = nodeid; + totem_config->interfaces[linknumber].member_count++; + totem_config->interfaces[linknumber].configured = 1; + } else { + sprintf(error_string_response, "failed to parse node address '%s'\n", node_addr_str); + *error_string = error_string_response; + + memset(&totem_config->interfaces[linknumber].member_list[member_count], 0, + sizeof(struct totem_ip_address)); + + free(node_addr_str); + icmap_iter_finalize(iter2); + icmap_iter_finalize(iter); + return -1; + } + + free(node_addr_str); + } + + icmap_iter_finalize(iter2); + } + + icmap_iter_finalize(iter); + + configure_link_params(totem_config, map); + if (reload) { + log_printf(LOGSYS_LEVEL_DEBUG, "About to reconfigure links from nodelist.\n"); + + if (check_things_have_not_changed(totem_config, error_string) == -1) { + return -1; + } + } + return 0; +} + +static void config_convert_nodelist_to_interface(icmap_map_t map, struct totem_config *totem_config) +{ + int res = 0; + int node_pos; + char tmp_key[ICMAP_KEYNAME_MAXLEN]; + char tmp_key2[ICMAP_KEYNAME_MAXLEN]; + char *node_addr_str; + unsigned int linknumber = 0; + icmap_iter_t iter; + const char *iter_key; + + node_pos = find_local_node(map, 1); + if (node_pos > -1) { + /* + * We found node, so create interface section + */ + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.", node_pos); + iter = icmap_iter_init_r(map, tmp_key); + while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { + res = sscanf(iter_key, "nodelist.node.%u.ring%u%s", &node_pos, &linknumber, tmp_key2); + if (res != 3 || strcmp(tmp_key2, "_addr") != 0) { + continue ; + } + + if (icmap_get_string_r(map, iter_key, &node_addr_str) != CS_OK) { + continue; + } + + snprintf(tmp_key2, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.bindnetaddr", linknumber); + icmap_set_string_r(map, tmp_key2, node_addr_str); + free(node_addr_str); + } + icmap_iter_finalize(iter); + } +} + +static int get_interface_params(struct totem_config *totem_config, icmap_map_t map, + const char **error_string, uint64_t *warnings, + int reload) +{ + int res = 0; + unsigned int linknumber = 0; + int member_count = 0; + int i; + icmap_iter_t iter, member_iter; + const char *iter_key; + const char *member_iter_key; + char linknumber_key[ICMAP_KEYNAME_MAXLEN]; + char tmp_key[ICMAP_KEYNAME_MAXLEN]; + uint8_t u8; + uint32_t u32; + char *str; + char *cluster_name = NULL; + enum totem_ip_version_enum tmp_ip_version = TOTEM_IP_VERSION_4; + int ret = 0; + + if (reload) { + for (i=0; i<INTERFACE_MAX; i++) { + /* + * Set back to defaults things that might have been configured and + * now have been taken out of corosync.conf. These won't be caught by the + * code below which only looks at interface{} sections that actually exist. + */ + totem_config->interfaces[i].configured = 0; + totem_config->interfaces[i].knet_ping_timeout = 0; + totem_config->interfaces[i].knet_ping_interval = 0; + totem_config->interfaces[i].knet_ping_precision = KNET_PING_PRECISION; + totem_config->interfaces[i].knet_pong_count = KNET_PONG_COUNT; + } + } + if (icmap_get_string_r(map, "totem.cluster_name", &cluster_name) != CS_OK) { + cluster_name = NULL; + } + + iter = icmap_iter_init_r(map, "totem.interface."); + while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { + res = sscanf(iter_key, "totem.interface.%[^.].%s", linknumber_key, tmp_key); + if (res != 2) { + continue; + } + + if (strcmp(tmp_key, "bindnetaddr") != 0 && totem_config->transport_number == TOTEM_TRANSPORT_UDP) { + continue; + } + + member_count = 0; + linknumber = atoi(linknumber_key); + + if (linknumber >= INTERFACE_MAX) { + snprintf (error_string_response, sizeof(error_string_response), + "parse error in config: interface ring number %u is bigger than allowed maximum %u\n", + linknumber, INTERFACE_MAX - 1); + + *error_string = error_string_response; + ret = -1; + goto out; + } + + /* These things are only valid for the initial read */ + if (!reload) { + /* + * Get the bind net address + */ + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.bindnetaddr", linknumber); + + if (icmap_get_string_r(map, tmp_key, &str) == CS_OK) { + res = totemip_parse (&totem_config->interfaces[linknumber].bindnet, str, + totem_config->ip_version); + + if (res) { + sprintf(error_string_response, "failed to parse bindnet address '%s'\n", str); + *error_string = error_string_response; + free(str); + + ret = -1; + goto out; + } + + free(str); + } + + /* + * Get interface multicast address + */ + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastaddr", linknumber); + if (icmap_get_string_r(map, tmp_key, &str) == CS_OK) { + res = totemip_parse (&totem_config->interfaces[linknumber].mcast_addr, str, + totem_config->ip_version); + + if (res) { + sprintf(error_string_response, "failed to parse mcast address '%s'\n", str); + *error_string = error_string_response; + free(str); + + ret = -1; + goto out; + } + + free(str); + } else if (totem_config->transport_number == TOTEM_TRANSPORT_UDP) { + /* + * User not specified address -> autogenerate one from cluster_name key + * (if available). Return code is intentionally ignored, because + * udpu doesn't need mcastaddr and validity of mcastaddr for udp is + * checked later anyway. + */ + + if (totem_config->interfaces[0].bindnet.family == AF_INET) { + tmp_ip_version = TOTEM_IP_VERSION_4; + } else if (totem_config->interfaces[0].bindnet.family == AF_INET6) { + tmp_ip_version = TOTEM_IP_VERSION_6; + } + + (void)get_cluster_mcast_addr (cluster_name, + linknumber, + tmp_ip_version, + &totem_config->interfaces[linknumber].mcast_addr); + } + + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.broadcast", linknumber); + if (icmap_get_string(tmp_key, &str) == CS_OK) { + if (strcmp (str, "yes") == 0) { + totem_config->broadcast_use = 1; + } + free(str); + } + } + + /* These things are only valid for the initial read OR a newly-defined link */ + if (!reload || (totem_config->interfaces[linknumber].configured == 0)) { + + /* + * Get mcast port + */ + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastport", linknumber); + if (icmap_get_uint16_r(map, tmp_key, &totem_config->interfaces[linknumber].ip_port) != CS_OK) { + if (totem_config->broadcast_use) { + totem_config->interfaces[linknumber].ip_port = DEFAULT_PORT + (2 * linknumber); + } else { + totem_config->interfaces[linknumber].ip_port = DEFAULT_PORT + linknumber; + } + } + + /* + * Get the TTL + */ + totem_config->interfaces[linknumber].ttl = 1; + + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.ttl", linknumber); + + if (icmap_get_uint8_r(map, tmp_key, &u8) == CS_OK) { + totem_config->interfaces[linknumber].ttl = u8; + } + + totem_config->interfaces[linknumber].knet_transport = KNET_DEFAULT_TRANSPORT; + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.knet_transport", linknumber); + if (icmap_get_string_r(map, tmp_key, &str) == CS_OK) { + if (strcmp(str, "sctp") == 0) { + totem_config->interfaces[linknumber].knet_transport = KNET_TRANSPORT_SCTP; + } + else if (strcmp(str, "udp") == 0) { + totem_config->interfaces[linknumber].knet_transport = KNET_TRANSPORT_UDP; + } + else { + *error_string = "Unrecognised knet_transport. expected 'udp' or 'sctp'"; + ret = -1; + goto out; + } + } + } + totem_config->interfaces[linknumber].configured = 1; + + /* + * Get the knet link params + */ + totem_config->interfaces[linknumber].knet_link_priority = 1; + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.knet_link_priority", linknumber); + + if (icmap_get_uint8_r(map, tmp_key, &u8) == CS_OK) { + totem_config->interfaces[linknumber].knet_link_priority = u8; + } + + totem_config->interfaces[linknumber].knet_ping_interval = 0; /* real default applied later */ + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.knet_ping_interval", linknumber); + if (icmap_get_uint32_r(map, tmp_key, &u32) == CS_OK) { + totem_config->interfaces[linknumber].knet_ping_interval = u32; + } + totem_config->interfaces[linknumber].knet_ping_timeout = 0; /* real default applied later */ + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.knet_ping_timeout", linknumber); + if (icmap_get_uint32_r(map, tmp_key, &u32) == CS_OK) { + totem_config->interfaces[linknumber].knet_ping_timeout = u32; + } + totem_config->interfaces[linknumber].knet_ping_precision = KNET_PING_PRECISION; + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.knet_ping_precision", linknumber); + if (icmap_get_uint32_r(map, tmp_key, &u32) == CS_OK) { + totem_config->interfaces[linknumber].knet_ping_precision = u32; + } + totem_config->interfaces[linknumber].knet_pong_count = KNET_PONG_COUNT; + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.knet_pong_count", linknumber); + if (icmap_get_uint32_r(map, tmp_key, &u32) == CS_OK) { + totem_config->interfaces[linknumber].knet_pong_count = u32; + } + + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.member.", linknumber); + member_iter = icmap_iter_init_r(map, tmp_key); + while ((member_iter_key = icmap_iter_next(member_iter, NULL, NULL)) != NULL) { + if (member_count == 0) { + if (icmap_get_string("nodelist.node.0.ring0_addr", &str) == CS_OK) { + free(str); + *warnings |= TOTEM_CONFIG_WARNING_MEMBERS_IGNORED; + break; + } else { + *warnings |= TOTEM_CONFIG_WARNING_MEMBERS_DEPRECATED; + } + } + + if (icmap_get_string_r(map, member_iter_key, &str) == CS_OK) { + res = totemip_parse (&totem_config->interfaces[linknumber].member_list[member_count++], + str, totem_config->ip_version); + if (res) { + sprintf(error_string_response, "failed to parse node address '%s'\n", str); + *error_string = error_string_response; + + icmap_iter_finalize(member_iter); + free(str); + ret = -1; + goto out; + } + + free(str); + } + } + icmap_iter_finalize(member_iter); + + totem_config->interfaces[linknumber].member_count = member_count; + + } + +out: + icmap_iter_finalize(iter); + free(cluster_name); + + return (ret); +} + +extern int totem_config_read ( + struct totem_config *totem_config, + const char **error_string, + uint64_t *warnings) +{ + int res = 0; + char *str, *ring0_addr_str; + char tmp_key[ICMAP_KEYNAME_MAXLEN]; + uint16_t u16; + int i; + int local_node_pos; + uint32_t u32; + + *warnings = 0; + + memset (totem_config, 0, sizeof (struct totem_config)); + totem_config->interfaces = malloc (sizeof (struct totem_interface) * INTERFACE_MAX); + if (totem_config->interfaces == 0) { + *error_string = "Out of memory trying to allocate ethernet interface storage area"; + return -1; + } + + totem_config->transport_number = TOTEM_TRANSPORT_KNET; + if (icmap_get_string("totem.transport", &str) == CS_OK) { + if (strcmp (str, "udpu") == 0) { + totem_config->transport_number = TOTEM_TRANSPORT_UDPU; + } else if (strcmp (str, "udp") == 0) { + totem_config->transport_number = TOTEM_TRANSPORT_UDP; + } else if (strcmp (str, "knet") == 0) { + totem_config->transport_number = TOTEM_TRANSPORT_KNET; + } else { + *error_string = "Invalid transport type. Should be udpu, udp or knet"; + free(str); + return -1; + } + + free(str); + } + + memset (totem_config->interfaces, 0, + sizeof (struct totem_interface) * INTERFACE_MAX); + + strcpy (totem_config->link_mode, "passive"); + + icmap_get_uint32("totem.version", (uint32_t *)&totem_config->version); + + /* initial crypto load */ + if (totem_get_crypto(totem_config, icmap_get_global_map(), error_string) != 0) { + return -1; + } + if (totem_config_keyread(totem_config, icmap_get_global_map(), error_string) != 0) { + return -1; + } + totem_config->crypto_index = 1; + totem_config->crypto_changed = 0; + + if (icmap_get_string("totem.link_mode", &str) == CS_OK) { + if (strlen(str) >= TOTEM_LINK_MODE_BYTES) { + *error_string = "totem.link_mode is too long"; + free(str); + + return -1; + } + strcpy (totem_config->link_mode, str); + free(str); + } + + if (icmap_get_uint32("totem.nodeid", &u32) == CS_OK) { + *warnings |= TOTEM_CONFIG_WARNING_TOTEM_NODEID_SET; + } + + totem_config->clear_node_high_bit = 0; + if (icmap_get_string("totem.clear_node_high_bit", &str) == CS_OK) { + if (strcmp (str, "yes") == 0) { + totem_config->clear_node_high_bit = 1; + } + free(str); + } + + icmap_get_uint32("totem.threads", &totem_config->threads); + + icmap_get_uint32("totem.netmtu", &totem_config->net_mtu); + + totem_config->ip_version = totem_config_get_ip_version(totem_config); + + if (icmap_get_string("totem.interface.0.bindnetaddr", &str) != CS_OK) { + /* + * We were not able to find ring 0 bindnet addr. Try to use nodelist informations + */ + config_convert_nodelist_to_interface(icmap_get_global_map(), totem_config); + } else { + if (icmap_get_string("nodelist.node.0.ring0_addr", &ring0_addr_str) == CS_OK) { + /* + * Both bindnetaddr and ring0_addr are set. + * Log warning information, and use nodelist instead + */ + *warnings |= TOTEM_CONFIG_BINDNETADDR_NODELIST_SET; + + config_convert_nodelist_to_interface(icmap_get_global_map(), totem_config); + + free(ring0_addr_str); + } + + free(str); + } + + /* + * Broadcast option is global but set in interface section, + * so reset before processing interfaces. + */ + totem_config->broadcast_use = 0; + + res = get_interface_params(totem_config, icmap_get_global_map(), error_string, warnings, 0); + if (res < 0) { + return res; + } + + /* + * Use broadcast is global, so if set, make sure to fill mcast addr correctly + * broadcast is only supported for UDP so just do interface 0; + */ + if (totem_config->broadcast_use) { + totemip_parse (&totem_config->interfaces[0].mcast_addr, + "255.255.255.255", TOTEM_IP_VERSION_4); + } + + + /* + * Store automatically generated items back to icmap only for UDP + */ + if (totem_config->transport_number == TOTEM_TRANSPORT_UDP) { + for (i = 0; i < INTERFACE_MAX; i++) { + if (!totem_config->interfaces[i].configured) { + continue; + } + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastaddr", i); + if (icmap_get_string(tmp_key, &str) == CS_OK) { + free(str); + } else { + str = (char *)totemip_print(&totem_config->interfaces[i].mcast_addr); + icmap_set_string(tmp_key, str); + } + + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastport", i); + if (icmap_get_uint16(tmp_key, &u16) != CS_OK) { + icmap_set_uint16(tmp_key, totem_config->interfaces[i].ip_port); + } + } + } + + /* + * Check existence of nodelist + */ + if ((icmap_get_string("nodelist.node.0.name", &str) == CS_OK) || + (icmap_get_string("nodelist.node.0.ring0_addr", &str) == CS_OK)) { + free(str); + /* + * find local node + */ + local_node_pos = find_local_node(icmap_get_global_map(), 1); + if (local_node_pos != -1) { + + assert(totem_config->node_id == 0); + + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.nodeid", local_node_pos); + (void)icmap_get_uint32(tmp_key, &totem_config->node_id); + + + if ((totem_config->transport_number == TOTEM_TRANSPORT_KNET) && (!totem_config->node_id)) { + *error_string = "Knet requires an explicit nodeid for the local node"; + return -1; + } + + if ((totem_config->transport_number == TOTEM_TRANSPORT_UDP || + totem_config->transport_number == TOTEM_TRANSPORT_UDPU) && (!totem_config->node_id)) { + + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.ring0_addr", local_node_pos); + icmap_get_string(tmp_key, &str); + + totem_config->node_id = generate_nodeid(totem_config, str); + if (totem_config->node_id == -1) { + *error_string = "An IPV6 network requires that a node ID be specified"; + + free(str); + return (-1); + } + + totem_config->interfaces[0].member_list[local_node_pos].nodeid = totem_config->node_id; + + free(str); + } + + /* Users must not change this */ + icmap_set_ro_access("nodelist.local_node_pos", 0, 1); + } + + if (put_nodelist_members_to_config(totem_config, icmap_get_global_map(), 0, error_string)) { + return -1; + } + } + + /* + * Get things that might change in the future (and can depend on totem_config->interfaces); + */ + totem_volatile_config_read(totem_config, icmap_get_global_map(), NULL); + + calc_knet_ping_timers(totem_config); + + /* This is now done in the totemknet interface callback */ + /* configure_totem_links(totem_config, icmap_get_global_map()); */ + + add_totem_config_notification(totem_config); + + return 0; +} + + +int totem_config_validate ( + struct totem_config *totem_config, + const char **error_string) +{ + static char local_error_reason[512]; + char parse_error[512]; + const char *error_reason = local_error_reason; + int i; + uint32_t u32; + int num_configured = 0; + unsigned int interface_max = INTERFACE_MAX; + + for (i = 0; i < INTERFACE_MAX; i++) { + if (totem_config->interfaces[i].configured) { + num_configured++; + } + } + if (num_configured == 0) { + error_reason = "No interfaces defined"; + goto parse_error; + } + + /* Check we found a local node name */ + if (icmap_get_uint32("nodelist.local_node_pos", &u32) != CS_OK) { + error_reason = "No valid name found for local host"; + goto parse_error; + } + + for (i = 0; i < INTERFACE_MAX; i++) { + /* + * Some error checking of parsed data to make sure its valid + */ + + struct totem_ip_address null_addr; + + if (!totem_config->interfaces[i].configured) { + continue; + } + + memset (&null_addr, 0, sizeof (struct totem_ip_address)); + + if ((totem_config->transport_number == TOTEM_TRANSPORT_UDP) && + memcmp (&totem_config->interfaces[i].mcast_addr, &null_addr, + sizeof (struct totem_ip_address)) == 0) { + snprintf (local_error_reason, sizeof(local_error_reason), + "No multicast address specified for interface %u", i); + goto parse_error; + } + + if (totem_config->interfaces[i].ip_port == 0) { + snprintf (local_error_reason, sizeof(local_error_reason), + "No multicast port specified for interface %u", i); + goto parse_error; + } + + if (totem_config->interfaces[i].ttl > 255) { + snprintf (local_error_reason, sizeof(local_error_reason), + "Invalid TTL (should be 0..255) for interface %u", i); + goto parse_error; + } + if (totem_config->transport_number != TOTEM_TRANSPORT_UDP && + totem_config->interfaces[i].ttl != 1) { + snprintf (local_error_reason, sizeof(local_error_reason), + "Can only set ttl on multicast transport types for interface %u", i); + goto parse_error; + } + if (totem_config->interfaces[i].knet_link_priority > 255) { + snprintf (local_error_reason, sizeof(local_error_reason), + "Invalid link priority (should be 0..255) for interface %u", i); + goto parse_error; + } + if (totem_config->transport_number != TOTEM_TRANSPORT_KNET && + totem_config->interfaces[i].knet_link_priority != 1) { + snprintf (local_error_reason, sizeof(local_error_reason), + "Can only set link priority on knet transport type for interface %u", i); + goto parse_error; + } + + if (totem_config->interfaces[i].mcast_addr.family == AF_INET6 && + totem_config->node_id == 0) { + snprintf (local_error_reason, sizeof(local_error_reason), + "An IPV6 network requires that a node ID be specified for interface %u", i); + goto parse_error; + } + + if (totem_config->broadcast_use == 0 && totem_config->transport_number == TOTEM_TRANSPORT_UDP) { + if (totem_config->interfaces[i].mcast_addr.family != totem_config->interfaces[i].bindnet.family) { + snprintf (local_error_reason, sizeof(local_error_reason), + "Multicast address family does not match bind address family for interface %u", i); + goto parse_error; + } + + if (totemip_is_mcast (&totem_config->interfaces[i].mcast_addr) != 0) { + snprintf (local_error_reason, sizeof(local_error_reason), + "mcastaddr is not a correct multicast address for interface %u", i); + goto parse_error; + } + } + } + + if (totem_config->version != 2) { + error_reason = "This totem parser can only parse version 2 configurations."; + goto parse_error; + } + + if (totem_volatile_config_validate(totem_config, icmap_get_global_map(), error_string) == -1) { + return (-1); + } + + if (check_for_duplicate_nodeids(totem_config, error_string) == -1) { + return (-1); + } + + /* + * KNET Link values validation + */ + if (strcmp (totem_config->link_mode, "active") && + strcmp (totem_config->link_mode, "rr") && + strcmp (totem_config->link_mode, "passive")) { + snprintf (local_error_reason, sizeof(local_error_reason), + "The Knet link mode \"%s\" specified is invalid. It must be active, passive or rr.\n", totem_config->link_mode); + goto parse_error; + } + + /* Only Knet does multiple interfaces */ + if (totem_config->transport_number != TOTEM_TRANSPORT_KNET) { + interface_max = 1; + } + + if (interface_max < num_configured) { + snprintf (parse_error, sizeof(parse_error), + "%d is too many configured interfaces for non-Knet transport.", + num_configured); + error_reason = parse_error; + goto parse_error; + } + + /* Only knet allows crypto */ + if (totem_config->transport_number != TOTEM_TRANSPORT_KNET) { + if ((strcmp(totem_config->crypto_cipher_type, "none") != 0) || + (strcmp(totem_config->crypto_hash_type, "none") != 0)) { + + snprintf (parse_error, sizeof(parse_error), + "crypto_cipher & crypto_hash are only valid for the Knet transport."); + error_reason = parse_error; + goto parse_error; + } + } + + if (totem_config->net_mtu == 0) { + if (totem_config->transport_number == TOTEM_TRANSPORT_KNET) { + totem_config->net_mtu = KNET_MAX_PACKET_SIZE; + } + else { + totem_config->net_mtu = UDP_NETMTU; + } + } + + return 0; + +parse_error: + snprintf (error_string_response, sizeof(error_string_response), + "parse error in config: %s\n", error_reason); + *error_string = error_string_response; + return (-1); + +} + +static int read_keyfile ( + const char *key_location, + struct totem_config *totem_config, + const char **error_string) +{ + int fd; + int res; + int saved_errno; + char error_str[100]; + const char *error_ptr; + + fd = open (key_location, O_RDONLY); + if (fd == -1) { + error_ptr = qb_strerror_r(errno, error_str, sizeof(error_str)); + snprintf (error_string_response, sizeof(error_string_response), + "Could not open %s: %s\n", + key_location, error_ptr); + goto parse_error; + } + + res = read (fd, totem_config->private_key, TOTEM_PRIVATE_KEY_LEN_MAX); + saved_errno = errno; + close (fd); + + if (res == -1) { + error_ptr = qb_strerror_r (saved_errno, error_str, sizeof(error_str)); + snprintf (error_string_response, sizeof(error_string_response), + "Could not read %s: %s\n", + key_location, error_ptr); + goto parse_error; + } + + if (res < TOTEM_PRIVATE_KEY_LEN_MIN) { + snprintf (error_string_response, sizeof(error_string_response), + "Could only read %d bits of minimum %u bits from %s.\n", + res * 8, TOTEM_PRIVATE_KEY_LEN_MIN * 8, key_location); + goto parse_error; + } + + totem_config->private_key_len = res; + + return 0; + +parse_error: + *error_string = error_string_response; + return (-1); +} + +int totem_config_keyread ( + struct totem_config *totem_config, + icmap_map_t map, + const char **error_string) +{ + int got_key = 0; + char *key_location = NULL; + int res; + size_t key_len; + char old_key[TOTEM_PRIVATE_KEY_LEN_MAX]; + size_t old_key_len; + + /* Take a copy so we can see if it has changed */ + memcpy(old_key, totem_config->private_key, sizeof(totem_config->private_key)); + old_key_len = totem_config->private_key_len; + + memset (totem_config->private_key, 0, sizeof(totem_config->private_key)); + totem_config->private_key_len = 0; + + if (strcmp(totem_config->crypto_cipher_type, "none") == 0 && + strcmp(totem_config->crypto_hash_type, "none") == 0) { + return (0); + } + + /* cmap may store the location of the key file */ + if (icmap_get_string_r(map, "totem.keyfile", &key_location) == CS_OK) { + res = read_keyfile(key_location, totem_config, error_string); + free(key_location); + if (res) { + goto key_error; + } + got_key = 1; + } else { /* Or the key itself may be in the cmap */ + if (icmap_get_r(map, "totem.key", NULL, &key_len, NULL) == CS_OK) { + if (key_len > sizeof(totem_config->private_key)) { + sprintf(error_string_response, "key is too long"); + goto key_error; + } + if (key_len < TOTEM_PRIVATE_KEY_LEN_MIN) { + sprintf(error_string_response, "key is too short"); + goto key_error; + } + if (icmap_get_r(map, "totem.key", totem_config->private_key, &key_len, NULL) == CS_OK) { + totem_config->private_key_len = key_len; + got_key = 1; + } else { + sprintf(error_string_response, "can't load private key"); + goto key_error; + } + } + } + + /* In desperation we read the default filename */ + if (!got_key) { + res = read_keyfile(COROSYSCONFDIR "/authkey", totem_config, error_string); + if (res) + goto key_error; + } + + if (old_key_len != totem_config->private_key_len || + memcmp(old_key, totem_config->private_key, sizeof(totem_config->private_key))) { + totem_config->crypto_changed = 1; + } + + return (0); + +key_error: + *error_string = error_string_response; + return (-1); + +} + +int totem_reread_crypto_config(struct totem_config *totem_config, icmap_map_t map, const char **error_string) +{ + if (totem_get_crypto(totem_config, map, error_string) != 0) { + return -1; + } + if (totem_config_keyread(totem_config, map, error_string) != 0) { + return -1; + } + return 0; +} + +static void debug_dump_totem_config(const struct totem_config *totem_config) +{ + + log_printf(LOGSYS_LEVEL_DEBUG, "Token Timeout (%d ms) retransmit timeout (%d ms)", + totem_config->token_timeout, totem_config->token_retransmit_timeout); + if (totem_config->token_warning) { + uint32_t token_warning_ms = totem_config->token_warning * totem_config->token_timeout / 100; + log_printf(LOGSYS_LEVEL_DEBUG, "Token warning every %d ms (%d%% of Token Timeout)", + token_warning_ms, totem_config->token_warning); + if (token_warning_ms < totem_config->token_retransmit_timeout) + log_printf (LOGSYS_LEVEL_DEBUG, + "The token warning interval (%d ms) is less than the token retransmit timeout (%d ms) " + "which can lead to spurious token warnings. Consider increasing the token_warning parameter.", + token_warning_ms, totem_config->token_retransmit_timeout); + + } else + log_printf(LOGSYS_LEVEL_DEBUG, "Token warnings disabled"); + log_printf(LOGSYS_LEVEL_DEBUG, "token hold (%d ms) retransmits before loss (%d retrans)", + totem_config->token_hold_timeout, totem_config->token_retransmits_before_loss_const); + log_printf(LOGSYS_LEVEL_DEBUG, "join (%d ms) send_join (%d ms) consensus (%d ms) merge (%d ms)", + totem_config->join_timeout, totem_config->send_join_timeout, totem_config->consensus_timeout, + totem_config->merge_timeout); + log_printf(LOGSYS_LEVEL_DEBUG, "downcheck (%d ms) fail to recv const (%d msgs)", + totem_config->downcheck_timeout, totem_config->fail_to_recv_const); + log_printf(LOGSYS_LEVEL_DEBUG, + "seqno unchanged const (%d rotations) Maximum network MTU %d", + totem_config->seqno_unchanged_const, totem_config->net_mtu); + log_printf(LOGSYS_LEVEL_DEBUG, + "window size per rotation (%d messages) maximum messages per rotation (%d messages)", + totem_config->window_size, totem_config->max_messages); + log_printf(LOGSYS_LEVEL_DEBUG, "missed count const (%d messages)", totem_config->miss_count_const); + log_printf(LOGSYS_LEVEL_DEBUG, "heartbeat_failures_allowed (%d)", + totem_config->heartbeat_failures_allowed); + log_printf(LOGSYS_LEVEL_DEBUG, "max_network_delay (%d ms)", totem_config->max_network_delay); +} + + +static void totem_change_notify( + int32_t event, + const char *key_name, + struct icmap_notify_value new_val, + struct icmap_notify_value old_val, + void *user_data) +{ + struct totem_config *totem_config = (struct totem_config *)user_data; + uint32_t *param; + uint8_t reloading; + const char *deleted_key = NULL; + const char *error_string; + + /* + * If a full reload is in progress then don't do anything until it's done and + * can reconfigure it all atomically + */ + if (icmap_get_uint8("config.reload_in_progress", &reloading) == CS_OK && reloading) + return; + + param = totem_get_param_by_name((struct totem_config *)user_data, key_name); + /* + * Process change only if changed key is found in totem_config (-> param is not NULL) + * or for special key token_coefficient. token_coefficient key is not stored in + * totem_config, but it is used for computation of token timeout. + */ + if (!param && strcmp(key_name, "totem.token_coefficient") != 0) + return; + + /* + * Values other than UINT32 are not supported, or needed (yet) + */ + switch (event) { + case ICMAP_TRACK_DELETE: + deleted_key = key_name; + break; + case ICMAP_TRACK_ADD: + case ICMAP_TRACK_MODIFY: + deleted_key = NULL; + break; + default: + break; + } + + totem_volatile_config_read (totem_config, icmap_get_global_map(), deleted_key); + log_printf(LOGSYS_LEVEL_DEBUG, "Totem related config key changed. Dumping actual totem config."); + debug_dump_totem_config(totem_config); + if (totem_volatile_config_validate(totem_config, icmap_get_global_map(), &error_string) == -1) { + log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); + /* + * TODO: Consider corosync exit and/or load defaults for volatile + * values. For now, log error seems to be enough + */ + } +} + + +int totemconfig_configure_new_params( + struct totem_config *totem_config, + icmap_map_t map, + const char **error_string) +{ + uint64_t warnings = 0LL; + + get_interface_params(totem_config, map, error_string, &warnings, 1); + if (put_nodelist_members_to_config (totem_config, map, 1, error_string)) { + return -1; + } + + calc_knet_ping_timers(totem_config); + + log_printf(LOGSYS_LEVEL_DEBUG, "Configuration reloaded. Dumping actual totem config."); + debug_dump_totem_config(totem_config); + + /* Reinstate the local_node_pos */ + (void)find_local_node(map, 0); + + return 0; +} + +int totemconfig_commit_new_params( + struct totem_config *totem_config, + icmap_map_t map) +{ + int res; + struct totem_interface *new_interfaces = NULL; + + new_interfaces = malloc (sizeof (struct totem_interface) * INTERFACE_MAX); + assert(new_interfaces != NULL); + memcpy(new_interfaces, totem_config->interfaces, sizeof (struct totem_interface) * INTERFACE_MAX); + + /* Set link parameters including local_ip */ + configure_totem_links(totem_config, map); + + /* Add & remove nodes & link properties */ + res = compute_and_set_totempg_interfaces(totem_config->orig_interfaces, new_interfaces); + + /* Does basic global params (like compression) */ + totempg_reconfigure(); + + free(new_interfaces); + return res; /* On a reload this is ignored */ +} + +static void add_totem_config_notification(struct totem_config *totem_config) +{ + icmap_track_t icmap_track; + + icmap_track_add("totem.", + ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX, + totem_change_notify, + totem_config, + &icmap_track); +} diff --git a/exec/totemconfig.h b/exec/totemconfig.h new file mode 100644 index 0000000..a0b2e10 --- /dev/null +++ b/exec/totemconfig.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2005 MontaVista Software, Inc. + * Copyright (c) 2006-2012 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef TOTEMCONFIG_H_DEFINED +#define TOTEMCONFIG_H_DEFINED + +#include <netinet/in.h> +#include <corosync/corotypes.h> +#include <qb/qbloop.h> +#include <corosync/totem/totempg.h> + +#include "totemsrp.h" + +#define TOTEM_CONFIG_WARNING_MEMBERS_IGNORED (1<<1) +#define TOTEM_CONFIG_WARNING_MEMBERS_DEPRECATED (1<<2) +#define TOTEM_CONFIG_WARNING_TOTEM_NODEID_SET (1<<3) +#define TOTEM_CONFIG_BINDNETADDR_NODELIST_SET (1<<4) + +extern int totem_config_read ( + struct totem_config *totem_config, + const char **error_string, + uint64_t *warnings); + +extern int totem_config_validate ( + struct totem_config *totem_config, + const char **error_string); + +extern int totem_config_keyread ( + struct totem_config *totem_config, + icmap_map_t map, + const char **error_string); + +extern int totem_config_find_local_addr_in_nodelist( + struct totem_config *totem_config, + const char *ipaddr_key_prefix, + unsigned int *node_pos); + +extern void totem_volatile_config_read( + struct totem_config *totem_config, + icmap_map_t temp_map, + const char *deleted_key); + +extern int totem_reread_crypto_config( + struct totem_config *totem_config, + icmap_map_t map, + const char **error_string); + +extern int totem_volatile_config_validate( + struct totem_config *totem_config, + icmap_map_t temp_map, + const char **error_string); + +extern int totemconfig_configure_new_params( + struct totem_config *totem_config, + icmap_map_t map, + const char **error_string); + +extern int totemconfig_commit_new_params( + struct totem_config *totem_config, + icmap_map_t map); + +#endif /* TOTEMCONFIG_H_DEFINED */ diff --git a/exec/totemip.c b/exec/totemip.c new file mode 100644 index 0000000..2bf7b08 --- /dev/null +++ b/exec/totemip.c @@ -0,0 +1,624 @@ +/* + * Copyright (c) 2005-2020 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Patrick Caulfield (pcaulfie@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* IPv4/6 abstraction */ + +#include <config.h> + +#include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <netdb.h> +#include <net/if.h> +#include <string.h> +#include <stdio.h> +#include <errno.h> +#include <assert.h> +#include <stdlib.h> +#include <unistd.h> +#include <ifaddrs.h> + +#include <corosync/totem/totemip.h> +#include <corosync/logsys.h> +#include <corosync/swab.h> + +#define LOCALHOST_IPV4 "127.0.0.1" +#define LOCALHOST_IPV6 "::1" + +#define NETLINK_BUFSIZE 16384 + +#ifdef SO_NOSIGPIPE +void totemip_nosigpipe(int s) +{ + int on = 1; + setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, (void *)&on, sizeof(on)); +} +#endif + +/* Compare two addresses */ +int totemip_equal(const struct totem_ip_address *addr1, + const struct totem_ip_address *addr2) +{ + int addrlen = 0; + + if (addr1->family != addr2->family) + return 0; + + if (addr1->family == AF_INET) { + addrlen = sizeof(struct in_addr); + } + if (addr1->family == AF_INET6) { + addrlen = sizeof(struct in6_addr); + } + assert(addrlen); + + if (memcmp(addr1->addr, addr2->addr, addrlen) == 0) + return 1; + else + return 0; + +} + +int totemip_sa_equal(const struct totem_ip_address *totem_ip, + const struct sockaddr *sa) +{ + int res; + + res = 0; + + if (totem_ip->family != sa->sa_family) { + return (res); + } + + switch (totem_ip->family) { + case AF_INET: + res = (memcmp(totem_ip->addr, + &((const struct sockaddr_in *)sa)->sin_addr, sizeof(struct in_addr)) == 0); + break; + case AF_INET6: + res = (memcmp(totem_ip->addr, + &((const struct sockaddr_in6 *)sa)->sin6_addr, sizeof(struct in6_addr)) == 0); + break; + default: + assert(0); + } + + return (res); +} + +/* Copy a totem_ip_address */ +void totemip_copy(struct totem_ip_address *addr1, + const struct totem_ip_address *addr2) +{ + memcpy(addr1, addr2, sizeof(struct totem_ip_address)); +} + +/* + * Multicast address range is 224.0.0.0 to 239.255.255.255 this + * translates to the first 4 bits == 1110 (0xE). + * http://en.wikipedia.org/wiki/Multicast_address + */ +int32_t totemip_is_mcast(struct totem_ip_address *ip_addr) +{ + uint32_t addr = 0; + + memcpy (&addr, ip_addr->addr, sizeof (uint32_t)); + + if (ip_addr->family == AF_INET) { + addr = ntohl(addr); + if ((addr >> 28) != 0xE) { + return -1; + } + } + return 0; +} + +/* For sorting etc. params are void * for qsort's benefit */ +int totemip_compare(const void *a, const void *b) +{ + int i; + const struct totem_ip_address *totemip_a = (const struct totem_ip_address *)a; + const struct totem_ip_address *totemip_b = (const struct totem_ip_address *)b; + struct in_addr ipv4_a1; + struct in_addr ipv4_a2; + struct in6_addr ipv6_a1; + struct in6_addr ipv6_a2; + unsigned short family; + + /* + * Use memcpy to align since totem_ip_address is unaligned on various archs + */ + memcpy (&family, &totemip_a->family, sizeof (unsigned short)); + + if (family == AF_INET) { + memcpy (&ipv4_a1, totemip_a->addr, sizeof (struct in_addr)); + memcpy (&ipv4_a2, totemip_b->addr, sizeof (struct in_addr)); + if (ipv4_a1.s_addr == ipv4_a2.s_addr) { + return (0); + } + if (htonl(ipv4_a1.s_addr) < htonl(ipv4_a2.s_addr)) { + return -1; + } else { + return +1; + } + } else + if (family == AF_INET6) { + /* + * We can only compare 8 bits at time for portability reasons + */ + memcpy (&ipv6_a1, totemip_a->addr, sizeof (struct in6_addr)); + memcpy (&ipv6_a2, totemip_b->addr, sizeof (struct in6_addr)); + for (i = 0; i < 16; i++) { + int res = ipv6_a1.s6_addr[i] - + ipv6_a2.s6_addr[i]; + if (res) { + return res; + } + } + return 0; + } else { + /* + * Family not set, should be! + */ + assert (0); + } + return 0; +} + +/* Build a localhost totem_ip_address */ +int totemip_localhost(int family, struct totem_ip_address *localhost) +{ + const char *addr_text; + + memset (localhost, 0, sizeof (struct totem_ip_address)); + + if (family == AF_INET) { + addr_text = LOCALHOST_IPV4; + if (inet_pton(family, addr_text, (char *)&localhost->nodeid) <= 0) { + return -1; + } + } else { + addr_text = LOCALHOST_IPV6; + } + + if (inet_pton(family, addr_text, (char *)localhost->addr) <= 0) + return -1; + + localhost->family = family; + + return 0; +} + +int totemip_localhost_check(const struct totem_ip_address *addr) +{ + struct totem_ip_address localhost; + + if (totemip_localhost(addr->family, &localhost)) + return 0; + return totemip_equal(addr, &localhost); +} + +const char *totemip_sa_print(const struct sockaddr *sa) +{ + static char buf[INET6_ADDRSTRLEN]; + + buf[0] = 0; + + switch (sa->sa_family) { + case AF_INET: + inet_ntop(sa->sa_family, &((struct sockaddr_in *)(sa))->sin_addr, buf, + INET6_ADDRSTRLEN); + break; + case AF_INET6: + inet_ntop(sa->sa_family, &((struct sockaddr_in6 *)(sa))->sin6_addr, buf, + INET6_ADDRSTRLEN); + break; + default: + return (NULL); + } + + return (buf); +} + +const char *totemip_print(const struct totem_ip_address *addr) +{ + static char buf[INET6_ADDRSTRLEN]; + + return (inet_ntop(addr->family, addr->addr, buf, sizeof(buf))); +} + +/* Make a totem_ip_address into a usable sockaddr_storage */ +int totemip_totemip_to_sockaddr_convert(struct totem_ip_address *ip_addr, + uint16_t port, struct sockaddr_storage *saddr, int *addrlen) +{ + int ret = -1; + + if (ip_addr->family == AF_INET) { + struct sockaddr_in *sin = (struct sockaddr_in *)saddr; + + memset(sin, 0, sizeof(struct sockaddr_in)); +#ifdef HAVE_SOCK_SIN_LEN + sin->sin_len = sizeof(struct sockaddr_in); +#endif + sin->sin_family = ip_addr->family; + sin->sin_port = ntohs(port); + memcpy(&sin->sin_addr, ip_addr->addr, sizeof(struct in_addr)); + *addrlen = sizeof(struct sockaddr_in); + ret = 0; + } + + if (ip_addr->family == AF_INET6) { + struct sockaddr_in6 *sin = (struct sockaddr_in6 *)saddr; + + memset(sin, 0, sizeof(struct sockaddr_in6)); +#ifdef HAVE_SOCK_SIN6_LEN + sin->sin6_len = sizeof(struct sockaddr_in6); +#endif + sin->sin6_family = ip_addr->family; + sin->sin6_port = ntohs(port); + sin->sin6_scope_id = 2; + memcpy(&sin->sin6_addr, ip_addr->addr, sizeof(struct in6_addr)); + + *addrlen = sizeof(struct sockaddr_in6); + ret = 0; + } + + return ret; +} + +/* + * Converts an address string string into a totem_ip_address. ip_version enum + * defines order. + */ +int totemip_parse(struct totem_ip_address *totemip, const char *addr, + enum totem_ip_version_enum ip_version) +{ + struct addrinfo *ainfo; + struct addrinfo *ainfo_iter; + struct addrinfo *ainfo_ipv4; + struct addrinfo *ainfo_ipv6; + struct addrinfo *ainfo_final; + struct addrinfo ahints; + struct sockaddr_in *sa; + struct sockaddr_in6 *sa6; + int ret; + int debug_ip_family; + int ai_family; + + memset(&ahints, 0, sizeof(ahints)); + ahints.ai_socktype = SOCK_DGRAM; + ahints.ai_protocol = IPPROTO_UDP; + + ai_family = AF_UNSPEC; + debug_ip_family = 0; + + switch (ip_version) { + case TOTEM_IP_VERSION_4: + ai_family = AF_INET; + debug_ip_family = 4; + break; + case TOTEM_IP_VERSION_6: + ai_family = AF_INET6; + debug_ip_family = 6; + break; + case TOTEM_IP_VERSION_6_4: + case TOTEM_IP_VERSION_4_6: + /* + * ai_family and debug_ip_family are already set correctly + */ + break; + } + + ahints.ai_family = ai_family; + + ret = getaddrinfo(addr, NULL, &ahints, &ainfo); + + if (ret == 0 && ai_family == AF_UNSPEC) { + ainfo_ipv4 = ainfo_ipv6 = NULL; + + /* + * Walk thru results and store first AF_INET and AF_INET6 + */ + for (ainfo_iter = ainfo; ainfo_iter != NULL; ainfo_iter = ainfo_iter->ai_next) { + if (ainfo_iter->ai_family == AF_INET && ainfo_ipv4 == NULL) { + ainfo_ipv4 = ainfo_iter; + } + + if (ainfo_iter->ai_family == AF_INET6 && ainfo_ipv6 == NULL) { + ainfo_ipv6 = ainfo_iter; + } + } + + if (ip_version == TOTEM_IP_VERSION_6_4) { + if (ainfo_ipv6 != NULL) { + ainfo_final = ainfo_ipv6; + } else { + ainfo_final = ainfo_ipv4; + } + } else { + if (ainfo_ipv4 != NULL) { + ainfo_final = ainfo_ipv4; + } else { + ainfo_final = ainfo_ipv6; + } + } + } else if (ret == 0) { + ainfo_final = ainfo; + } else { + ainfo_final = NULL; + } + + if (ainfo_final == NULL) { + if (ret == 0) { + freeaddrinfo(ainfo); + } + + if (debug_ip_family == 0) { + log_printf(LOGSYS_LEVEL_DEBUG, "totemip_parse: IP address of %s not resolvable", + addr); + } else { + log_printf(LOGSYS_LEVEL_DEBUG, "totemip_parse: IPv%u address of %s not resolvable", + debug_ip_family, addr); + } + + return (-1); + } + + totemip->family = ainfo_final->ai_family; + if (ainfo_final->ai_family == AF_INET) { + sa = (struct sockaddr_in *)ainfo_final->ai_addr; + memcpy(totemip->addr, &sa->sin_addr, sizeof(struct in_addr)); + debug_ip_family = 4; + } else { + sa6 = (struct sockaddr_in6 *)ainfo_final->ai_addr; + memcpy(totemip->addr, &sa6->sin6_addr, sizeof(struct in6_addr)); + debug_ip_family = 6; + } + + log_printf(LOGSYS_LEVEL_DEBUG, "totemip_parse: IPv%u address of %s resolved as %s", + debug_ip_family, addr, totemip_print(totemip)); + + freeaddrinfo(ainfo); + + return (0); +} + +/* Make a sockaddr_* into a totem_ip_address */ +int totemip_sockaddr_to_totemip_convert(const struct sockaddr_storage *saddr, + struct totem_ip_address *ip_addr) +{ + int ret = -1; + + ip_addr->family = saddr->ss_family; + ip_addr->nodeid = 0; + + if (saddr->ss_family == AF_INET) { + const struct sockaddr_in *sin = (const struct sockaddr_in *)saddr; + + memcpy(ip_addr->addr, &sin->sin_addr, sizeof(struct in_addr)); + ret = 0; + } + + if (saddr->ss_family == AF_INET6) { + const struct sockaddr_in6 *sin + = (const struct sockaddr_in6 *)saddr; + + memcpy(ip_addr->addr, &sin->sin6_addr, sizeof(struct in6_addr)); + + ret = 0; + } + return ret; +} + +int totemip_getifaddrs(struct qb_list_head *addrs) +{ + struct ifaddrs *ifap, *ifa; + struct totem_ip_if_address *if_addr; + + if (getifaddrs(&ifap) != 0) + return (-1); + + qb_list_init(addrs); + + for (ifa = ifap; ifa; ifa = ifa->ifa_next) { + if (ifa->ifa_addr == NULL || ifa->ifa_netmask == NULL) + continue ; + + if ((ifa->ifa_addr->sa_family != AF_INET && ifa->ifa_addr->sa_family != AF_INET6) || + (ifa->ifa_netmask->sa_family != AF_INET && ifa->ifa_netmask->sa_family != AF_INET6 && + ifa->ifa_netmask->sa_family != 0)) + continue ; + + if (ifa->ifa_netmask->sa_family == 0) { + ifa->ifa_netmask->sa_family = ifa->ifa_addr->sa_family; + } + + if_addr = malloc(sizeof(struct totem_ip_if_address)); + if (if_addr == NULL) { + goto error_free_ifaddrs; + } + + qb_list_init(&if_addr->list); + + memset(if_addr, 0, sizeof(struct totem_ip_if_address)); + + if_addr->interface_up = ifa->ifa_flags & IFF_UP; + if_addr->interface_num = if_nametoindex(ifa->ifa_name); + if_addr->name = strdup(ifa->ifa_name); + if (if_addr->name == NULL) { + goto error_free_addr; + } + + if (totemip_sockaddr_to_totemip_convert((const struct sockaddr_storage *)ifa->ifa_addr, + &if_addr->ip_addr) == -1) { + goto error_free_addr_name; + } + + if (totemip_sockaddr_to_totemip_convert((const struct sockaddr_storage *)ifa->ifa_netmask, + &if_addr->mask_addr) == -1) { + goto error_free_addr_name; + } + + qb_list_add_tail(&if_addr->list, addrs); + } + + freeifaddrs(ifap); + + return (0); + +error_free_addr_name: + free(if_addr->name); + +error_free_addr: + free(if_addr); + +error_free_ifaddrs: + totemip_freeifaddrs(addrs); + freeifaddrs(ifap); + return (-1); +} + +void totemip_freeifaddrs(struct qb_list_head *addrs) +{ + struct totem_ip_if_address *if_addr; + struct qb_list_head *list, *tmp_iter; + + qb_list_for_each_safe(list, tmp_iter, addrs) { + if_addr = qb_list_entry(list, struct totem_ip_if_address, list); + + free(if_addr->name); + qb_list_del(&if_addr->list); + free(if_addr); + } + qb_list_init(addrs); +} + +int totemip_iface_check(struct totem_ip_address *bindnet, + struct totem_ip_address *boundto, + int *interface_up, + int *interface_num, + int mask_high_bit) +{ + struct qb_list_head addrs; + struct qb_list_head *list; + struct totem_ip_if_address *if_addr; + struct totem_ip_address bn_netaddr, if_netaddr; + socklen_t addr_len; + socklen_t si; + int res = -1; + int exact_match_found = 0; + int net_match_found = 0; + + *interface_up = 0; + *interface_num = 0; + + if (totemip_getifaddrs(&addrs) == -1) { + return (-1); + } + + qb_list_for_each(list, &addrs) { + if_addr = qb_list_entry(list, struct totem_ip_if_address, list); + + if (bindnet->family != if_addr->ip_addr.family) + continue ; + + addr_len = 0; + + switch (bindnet->family) { + case AF_INET: + addr_len = sizeof(struct in_addr); + break; + case AF_INET6: + addr_len = sizeof(struct in6_addr); + break; + } + + if (addr_len == 0) + continue ; + + totemip_copy(&bn_netaddr, bindnet); + totemip_copy(&if_netaddr, &if_addr->ip_addr); + + if (totemip_equal(&bn_netaddr, &if_netaddr)) { + exact_match_found = 1; + } + + for (si = 0; si < addr_len; si++) { + bn_netaddr.addr[si] = bn_netaddr.addr[si] & if_addr->mask_addr.addr[si]; + if_netaddr.addr[si] = if_netaddr.addr[si] & if_addr->mask_addr.addr[si]; + } + + if (exact_match_found || (!net_match_found && totemip_equal(&bn_netaddr, &if_netaddr))) { + totemip_copy(boundto, &if_addr->ip_addr); + boundto->nodeid = bindnet->nodeid; + *interface_up = if_addr->interface_up; + *interface_num = if_addr->interface_num; + + net_match_found = 1; + res = 0; + + if (exact_match_found) { + goto finished; + } + } + } + +finished: + totemip_freeifaddrs(&addrs); + return (res); +} + +#define TOTEMIP_UDP_HEADER_SIZE 8 +#define TOTEMIP_IPV4_HEADER_SIZE 20 +#define TOTEMIP_IPV6_HEADER_SIZE 40 + +size_t totemip_udpip_header_size(int family) +{ + size_t header_size; + + header_size = 0; + + switch (family) { + case AF_INET: + header_size = TOTEMIP_UDP_HEADER_SIZE + TOTEMIP_IPV4_HEADER_SIZE; + break; + case AF_INET6: + header_size = TOTEMIP_UDP_HEADER_SIZE + TOTEMIP_IPV6_HEADER_SIZE; + break; + } + + return (header_size); +} diff --git a/exec/totemknet.c b/exec/totemknet.c new file mode 100644 index 0000000..f280a09 --- /dev/null +++ b/exec/totemknet.c @@ -0,0 +1,2306 @@ +/* + * Copyright (c) 2016-2022 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Christine Caulfield (ccaulfie@redhat.com) + + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <config.h> + +#include <assert.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <netdb.h> +#include <sys/un.h> +#include <sys/ioctl.h> +#include <sys/param.h> +#include <netinet/in.h> +#include <net/ethernet.h> +#include <arpa/inet.h> +#include <unistd.h> +#include <fcntl.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <pthread.h> +#include <sched.h> +#include <time.h> +#include <sys/time.h> +#include <sys/poll.h> +#include <sys/uio.h> +#include <limits.h> + +#include <qb/qbdefs.h> +#include <qb/qbloop.h> +#ifdef HAVE_LIBNOZZLE +#include <libgen.h> +#include <libnozzle.h> +#endif + +#include <corosync/sq.h> +#include <corosync/swab.h> +#include <corosync/logsys.h> +#include <corosync/icmap.h> +#include <corosync/totem/totemip.h> +#include "totemknet.h" + +#include "main.h" +#include "util.h" + +#include <libknet.h> +#include <corosync/totem/totemstats.h> + +#ifndef MSG_NOSIGNAL +#define MSG_NOSIGNAL 0 +#endif + +#ifdef HAVE_LIBNOZZLE +static int setup_nozzle(void *knet_context); +#endif + +/* Should match that used by cfg */ +#define CFG_INTERFACE_STATUS_MAX_LEN 512 + +struct totemknet_instance { + struct crypto_instance *crypto_inst; + + qb_loop_t *poll_handle; + + knet_handle_t knet_handle; + + int link_mode; + + void *context; + + int (*totemknet_deliver_fn) ( + void *context, + const void *msg, + unsigned int msg_len, + const struct sockaddr_storage *system_from); + + int (*totemknet_iface_change_fn) ( + void *context, + const struct totem_ip_address *iface_address, + unsigned int link_no); + + void (*totemknet_mtu_changed) ( + void *context, + int net_mtu); + + void (*totemknet_target_set_completed) (void *context); + + /* + * Function and data used to log messages + */ + int totemknet_log_level_security; + + int totemknet_log_level_error; + + int totemknet_log_level_warning; + + int totemknet_log_level_notice; + + int totemknet_log_level_debug; + + int totemknet_subsys_id; + + int knet_subsys_id; + + void (*totemknet_log_printf) ( + int level, + int subsys, + const char *function, + const char *file, + int line, + const char *format, + ...)__attribute__((format(printf, 6, 7))); + + void *knet_context; + + char iov_buffer[KNET_MAX_PACKET_SIZE]; + + char *link_status[INTERFACE_MAX]; + + struct totem_ip_address my_ids[INTERFACE_MAX]; + + uint16_t ip_port[INTERFACE_MAX]; + + int our_nodeid; + + int loopback_link; + + struct totem_config *totem_config; + + struct totem_ip_address token_target; + + qb_loop_timer_handle timer_netif_check_timeout; + + qb_loop_timer_handle timer_merge_detect_timeout; + + int send_merge_detect_message; + + unsigned int merge_detect_messages_sent_before_timeout; + + int logpipes[2]; + int knet_fd; + + pthread_mutex_t log_mutex; +#ifdef HAVE_LIBNOZZLE + char *nozzle_name; + char *nozzle_ipaddr; + char *nozzle_prefix; + char *nozzle_macaddr; + nozzle_t nozzle_handle; +#endif +}; + +/* Awkward. But needed to get stats from knet */ +struct totemknet_instance *global_instance; + +struct work_item { + const void *msg; + unsigned int msg_len; + struct totemknet_instance *instance; +}; + +int totemknet_member_list_rebind_ip ( + void *knet_context); + + +static int totemknet_configure_compression ( + struct totemknet_instance *instance, + struct totem_config *totem_config); + +static void totemknet_start_merge_detect_timeout( + void *knet_context); + +static void totemknet_stop_merge_detect_timeout( + void *knet_context); + +static void log_flush_messages ( + void *knet_context); + +static void totemknet_instance_initialize (struct totemknet_instance *instance) +{ + int res; + + memset (instance, 0, sizeof (struct totemknet_instance)); + res = pthread_mutex_init(&instance->log_mutex, NULL); + /* + * There is not too much else what can be done. + */ + assert(res == 0); +} + +#define knet_log_printf_lock(level, subsys, function, file, line, format, args...) \ +do { \ + (void)pthread_mutex_lock(&instance->log_mutex); \ + instance->totemknet_log_printf ( \ + level, subsys, function, file, line, \ + (const char *)format, ##args); \ + (void)pthread_mutex_unlock(&instance->log_mutex); \ +} while (0); + +#define knet_log_printf(level, format, args...) \ +do { \ + knet_log_printf_lock ( \ + level, instance->totemknet_subsys_id, \ + __FUNCTION__, __FILE__, __LINE__, \ + (const char *)format, ##args); \ +} while (0); + +#define libknet_log_printf(level, format, args...) \ +do { \ + knet_log_printf_lock ( \ + level, instance->knet_subsys_id, \ + __FUNCTION__, "libknet.h", __LINE__, \ + (const char *)format, ##args); \ +} while (0); + +#define KNET_LOGSYS_PERROR(err_num, level, fmt, args...) \ +do { \ + char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \ + const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \ + instance->totemknet_log_printf ( \ + level, instance->totemknet_subsys_id, \ + __FUNCTION__, __FILE__, __LINE__, \ + fmt ": %s (%d)", ##args, _error_ptr, err_num); \ + } while(0) + + +#ifdef HAVE_LIBNOZZLE +static inline int is_ether_addr_multicast(const uint8_t *addr) +{ + return (addr[0] & 0x01); +} +static inline int is_ether_addr_zero(const uint8_t *addr) +{ + return (!addr[0] && !addr[1] && !addr[2] && !addr[3] && !addr[4] && !addr[5]); +} + +static int ether_host_filter_fn(void *private_data, + const unsigned char *outdata, + ssize_t outdata_len, + uint8_t tx_rx, + knet_node_id_t this_host_id, + knet_node_id_t src_host_id, + int8_t *channel, + knet_node_id_t *dst_host_ids, + size_t *dst_host_ids_entries) +{ + struct ether_header *eth_h = (struct ether_header *)outdata; + uint8_t *dst_mac = (uint8_t *)eth_h->ether_dhost; + uint16_t dst_host_id; + + if (is_ether_addr_zero(dst_mac)) + return -1; + + if (is_ether_addr_multicast(dst_mac)) { + return 1; + } + + memmove(&dst_host_id, &dst_mac[4], 2); + + dst_host_ids[0] = ntohs(dst_host_id); + *dst_host_ids_entries = 1; + + return 0; +} +#endif + +static int dst_host_filter_callback_fn(void *private_data, + const unsigned char *outdata, + ssize_t outdata_len, + uint8_t tx_rx, + knet_node_id_t this_host_id, + knet_node_id_t src_host_id, + int8_t *channel, + knet_node_id_t *dst_host_ids, + size_t *dst_host_ids_entries) +{ + struct totem_message_header *header = (struct totem_message_header *)outdata; + int res; + +#ifdef HAVE_LIBNOZZLE + if (*channel != 0) { + return ether_host_filter_fn(private_data, + outdata, outdata_len, + tx_rx, + this_host_id, src_host_id, + channel, + dst_host_ids, + dst_host_ids_entries); + } +#endif + if (header->target_nodeid) { + dst_host_ids[0] = header->target_nodeid; + *dst_host_ids_entries = 1; + res = 0; /* unicast message */ + } + else { + *dst_host_ids_entries = 0; + res = 1; /* multicast message */ + } + return res; +} + +static void socket_error_callback_fn(void *private_data, int datafd, int8_t channel, uint8_t tx_rx, int error, int errorno) +{ + struct totemknet_instance *instance = (struct totemknet_instance *)private_data; + + knet_log_printf (LOGSYS_LEVEL_DEBUG, "Knet socket ERROR notification called: txrx=%d, error=%d, errorno=%d", tx_rx, error, errorno); + if ((error == -1 && errorno != EAGAIN) || (error == 0)) { + knet_handle_remove_datafd(instance->knet_handle, datafd); + } +} + +static void host_change_callback_fn(void *private_data, knet_node_id_t host_id, uint8_t reachable, uint8_t remote, uint8_t external) +{ + struct totemknet_instance *instance = (struct totemknet_instance *)private_data; + + // TODO: what? if anything. + knet_log_printf (LOGSYS_LEVEL_DEBUG, "Knet host change callback. nodeid: " CS_PRI_NODE_ID " reachable: %d", host_id, reachable); +} + +static void pmtu_change_callback_fn(void *private_data, unsigned int data_mtu) +{ + struct totemknet_instance *instance = (struct totemknet_instance *)private_data; + knet_log_printf (LOGSYS_LEVEL_DEBUG, "Knet pMTU change: %d", data_mtu); + + /* We don't need to tell corosync the actual knet MTU */ +// instance->totemknet_mtu_changed(instance->context, data_mtu); +} + +int totemknet_crypto_set ( + void *knet_context, + const char *cipher_type, + const char *hash_type) +{ + return (0); +} + + +static inline void ucast_sendmsg ( + struct totemknet_instance *instance, + struct totem_ip_address *system_to, + const void *msg, + unsigned int msg_len) +{ + int res = 0; + struct totem_message_header *header = (struct totem_message_header *)msg; + struct msghdr msg_ucast; + struct iovec iovec; + + header->target_nodeid = system_to->nodeid; + + iovec.iov_base = (void *)msg; + iovec.iov_len = msg_len; + + /* + * Build unicast message + */ + memset(&msg_ucast, 0, sizeof(msg_ucast)); + msg_ucast.msg_iov = (void *)&iovec; + msg_ucast.msg_iovlen = 1; +#ifdef HAVE_MSGHDR_CONTROL + msg_ucast.msg_control = 0; +#endif +#ifdef HAVE_MSGHDR_CONTROLLEN + msg_ucast.msg_controllen = 0; +#endif +#ifdef HAVE_MSGHDR_FLAGS + msg_ucast.msg_flags = 0; +#endif +#ifdef HAVE_MSGHDR_ACCRIGHTS + msg_ucast.msg_accrights = NULL; +#endif +#ifdef HAVE_MSGHDR_ACCRIGHTSLEN + msg_ucast.msg_accrightslen = 0; +#endif + + /* + * Transmit unicast message + * An error here is recovered by totemsrp + */ + + res = sendmsg (instance->knet_fd, &msg_ucast, MSG_NOSIGNAL); + if (res < 0) { + KNET_LOGSYS_PERROR (errno, instance->totemknet_log_level_debug, + "sendmsg(ucast) failed (non-critical)"); + } +} + +static inline void mcast_sendmsg ( + struct totemknet_instance *instance, + const void *msg, + unsigned int msg_len, + int only_active) +{ + int res; + struct totem_message_header *header = (struct totem_message_header *)msg; + struct msghdr msg_mcast; + struct iovec iovec; + + iovec.iov_base = (void *)msg; + iovec.iov_len = msg_len; + + header->target_nodeid = 0; + + /* + * Build multicast message + */ + memset(&msg_mcast, 0, sizeof(msg_mcast)); + msg_mcast.msg_iov = (void *)&iovec; + msg_mcast.msg_iovlen = 1; +#ifdef HAVE_MSGHDR_CONTROL + msg_mcast.msg_control = 0; +#endif +#ifdef HAVE_MSGHDR_CONTROLLEN + msg_mcast.msg_controllen = 0; +#endif +#ifdef HAVE_MSGHDR_FLAGS + msg_mcast.msg_flags = 0; +#endif +#ifdef HAVE_MSGHDR_ACCRIGHTS + msg_mcast.msg_accrights = NULL; +#endif +#ifdef HAVE_MSGHDR_ACCRIGHTSLEN + msg_mcast.msg_accrightslen = 0; +#endif + + +// log_printf (LOGSYS_LEVEL_DEBUG, "totemknet: mcast_sendmsg. only_active=%d, len=%d", only_active, msg_len); + + res = sendmsg (instance->knet_fd, &msg_mcast, MSG_NOSIGNAL); + if (res < msg_len) { + knet_log_printf (LOGSYS_LEVEL_DEBUG, "totemknet: mcast_send sendmsg returned %d", res); + } + + if (!only_active || instance->send_merge_detect_message) { + /* + * Current message was sent to all nodes + */ + instance->merge_detect_messages_sent_before_timeout++; + instance->send_merge_detect_message = 0; + } +} + +static int node_compare(const void *aptr, const void *bptr) +{ + uint16_t a,b; + + a = *(uint16_t *)aptr; + b = *(uint16_t *)bptr; + + return a > b; +} + +#ifndef OWN_INDEX_NONE +#define OWN_INDEX_NONE -1 +#endif + +int totemknet_nodestatus_get ( + void *knet_context, + unsigned int nodeid, + struct totem_node_status *node_status) +{ + int i; + int res = 0; + struct knet_link_status link_status; + struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; + struct knet_host_status knet_host_status; + uint8_t link_list[KNET_MAX_LINK]; + size_t num_links; + + if (!instance->knet_handle) { + return CS_ERR_NOT_EXIST; /* Not using knet */ + } + + if (!node_status) { + return CS_ERR_INVALID_PARAM; + } + + res = knet_host_get_status(instance->knet_handle, + nodeid, + &knet_host_status); + if (res) { + knet_log_printf (LOGSYS_LEVEL_WARNING, "knet_handle_get_host_status(%d) failed: %d", nodeid, res); + return (-1); + } + node_status->nodeid = nodeid; + node_status->reachable = knet_host_status.reachable; + node_status->remote = knet_host_status.remote; + node_status->external = knet_host_status.external; + +#ifdef HAVE_KNET_ONWIRE_VER + res = knet_handle_get_onwire_ver(instance->knet_handle, + nodeid, + &node_status->onwire_min, + &node_status->onwire_max, + &node_status->onwire_ver); + if (res) { + knet_log_printf (LOGSYS_LEVEL_WARNING, "knet_handle_get_onwire_ver(%d) failed: %d", nodeid, res); + return (-1); + } +#endif + /* Get link info */ + res = knet_link_get_link_list(instance->knet_handle, + nodeid, link_list, &num_links); + if (res) { + knet_log_printf (LOGSYS_LEVEL_WARNING, "knet_link_get_link_list(%d) failed: %d", nodeid, res); + return (-1); + } + + /* node_status[] has been zeroed for us in totempg.c */ + for (i=0; i < num_links; i++) { + if (!instance->totem_config->interfaces[link_list[i]].configured) { + continue; + } + res = knet_link_get_status(instance->knet_handle, + nodeid, + link_list[i], + &link_status, + sizeof(link_status)); + if (res == 0) { + node_status->link_status[link_list[i]].enabled = link_status.enabled; + node_status->link_status[link_list[i]].connected = link_status.connected; + node_status->link_status[link_list[i]].dynconnected = link_status.dynconnected; + node_status->link_status[link_list[i]].mtu = link_status.mtu; + memcpy(node_status->link_status[link_list[i]].src_ipaddr, link_status.src_ipaddr, KNET_MAX_HOST_LEN); + memcpy(node_status->link_status[link_list[i]].dst_ipaddr, link_status.dst_ipaddr, KNET_MAX_HOST_LEN); + } else { + knet_log_printf (LOGSYS_LEVEL_WARNING, "knet_link_get_link_status(%d, %d) failed: %d", nodeid, link_list[i], res); + } + } + return res; +} + + + +int totemknet_ifaces_get (void *knet_context, + char ***status, + unsigned int *iface_count) +{ + struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; + struct knet_link_status link_status; + knet_node_id_t host_list[KNET_MAX_HOST]; + uint8_t link_list[KNET_MAX_LINK]; + size_t num_hosts; + size_t num_links; + size_t link_idx; + int i,j; + char *ptr; + int res = 0; + + /* + * Don't do the whole 'link_info' bit if the caller just wants + * a count of interfaces. + */ + if (status) { + int own_idx = OWN_INDEX_NONE; + + res = knet_host_get_host_list(instance->knet_handle, + host_list, &num_hosts); + if (res) { + return (-1); + } + qsort(host_list, num_hosts, sizeof(uint16_t), node_compare); + + for (j=0; j<num_hosts; j++) { + if (host_list[j] == instance->our_nodeid) { + own_idx = j; + break; + } + } + + for (i=0; i<INTERFACE_MAX; i++) { + memset(instance->link_status[i], 'd', CFG_INTERFACE_STATUS_MAX_LEN-1); + if (own_idx != OWN_INDEX_NONE) { + instance->link_status[i][own_idx] = 'n'; + } + instance->link_status[i][num_hosts] = '\0'; + } + + /* This is all a bit "inside-out" because "status" is a set of strings per link + * and knet orders things by host + */ + for (j=0; j<num_hosts; j++) { + if (own_idx != OWN_INDEX_NONE && j == own_idx) { + continue ; + } + + res = knet_link_get_link_list(instance->knet_handle, + host_list[j], link_list, &num_links); + if (res) { + return (-1); + } + + link_idx = 0; + for (i=0; i < num_links; i++) { + /* + * Skip over links that are unconfigured to corosync. This is basically + * link0 if corosync isn't using it for comms, as we will still + * have it set up for loopback. + */ + if (!instance->totem_config->interfaces[link_list[i]].configured) { + continue; + } + ptr = instance->link_status[link_idx++]; + + res = knet_link_get_status(instance->knet_handle, + host_list[j], + link_list[i], + &link_status, + sizeof(link_status)); + if (res == 0) { + ptr[j] = '0' + (link_status.enabled | + link_status.connected<<1 | + link_status.dynconnected<<2); + } + else { + knet_log_printf (LOGSYS_LEVEL_ERROR, + "totemknet_ifaces_get: Cannot get link status: %s", strerror(errno)); + ptr[j] = '?'; + } + } + } + *status = instance->link_status; + } + + *iface_count = INTERFACE_MAX; + + return (res); +} + +int totemknet_finalize ( + void *knet_context) +{ + struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; + int res = 0; + int i,j; + static knet_node_id_t nodes[KNET_MAX_HOST]; /* static to save stack */ + uint8_t links[KNET_MAX_LINK]; + size_t num_nodes; + size_t num_links; + + knet_log_printf(LOG_DEBUG, "totemknet: finalize"); + + qb_loop_poll_del (instance->poll_handle, instance->logpipes[0]); + qb_loop_poll_del (instance->poll_handle, instance->knet_fd); + + /* + * Disable forwarding to make knet flush send queue. This ensures that the LEAVE message will be sent. + */ + res = knet_handle_setfwd(instance->knet_handle, 0); + if (res) { + knet_log_printf (LOGSYS_LEVEL_CRIT, "totemknet: knet_handle_setfwd failed: %s", strerror(errno)); + } + + res = knet_host_get_host_list(instance->knet_handle, nodes, &num_nodes); + if (res) { + knet_log_printf (LOGSYS_LEVEL_ERROR, "Cannot get knet node list for shutdown: %s", strerror(errno)); + /* Crash out anyway */ + goto finalise_error; + } + + /* Tidily shut down all nodes & links. */ + for (i=0; i<num_nodes; i++) { + + res = knet_link_get_link_list(instance->knet_handle, nodes[i], links, &num_links); + if (res) { + knet_log_printf (LOGSYS_LEVEL_ERROR, "Cannot get knet link list for node " CS_PRI_NODE_ID ": %s", nodes[i], strerror(errno)); + goto finalise_error; + } + for (j=0; j<num_links; j++) { + res = knet_link_set_enable(instance->knet_handle, nodes[i], links[j], 0); + if (res) { + knet_log_printf (LOGSYS_LEVEL_ERROR, "totemknet: knet_link_set_enable(node " CS_PRI_NODE_ID ", link %d) failed: %s", nodes[i], links[j], strerror(errno)); + } + res = knet_link_clear_config(instance->knet_handle, nodes[i], links[j]); + if (res) { + knet_log_printf (LOGSYS_LEVEL_ERROR, "totemknet: knet_link_clear_config(node " CS_PRI_NODE_ID ", link %d) failed: %s", nodes[i], links[j], strerror(errno)); + } + } + res = knet_host_remove(instance->knet_handle, nodes[i]); + if (res) { + knet_log_printf (LOGSYS_LEVEL_ERROR, "totemknet: knet_host_remove(node " CS_PRI_NODE_ID ") failed: %s", nodes[i], strerror(errno)); + } + } + +finalise_error: + res = knet_handle_free(instance->knet_handle); + if (res) { + knet_log_printf (LOGSYS_LEVEL_CRIT, "totemknet: knet_handle_free failed: %s", strerror(errno)); + } + + totemknet_stop_merge_detect_timeout(instance); + + log_flush_messages(instance); + + /* + * Error is deliberately ignored + */ + (void)pthread_mutex_destroy(&instance->log_mutex); + + return (res); +} + +static int log_deliver_fn ( + int fd, + int revents, + void *data) +{ + struct totemknet_instance *instance = (struct totemknet_instance *)data; + char buffer[sizeof(struct knet_log_msg)*4]; + char *bufptr = buffer; + int done = 0; + int len; + + len = read(fd, buffer, sizeof(buffer)); + while (done < len) { + struct knet_log_msg *msg = (struct knet_log_msg *)bufptr; + switch (msg->msglevel) { + case KNET_LOG_ERR: + libknet_log_printf (LOGSYS_LEVEL_ERROR, "%s: %s", + knet_log_get_subsystem_name(msg->subsystem), + msg->msg); + break; + case KNET_LOG_WARN: + libknet_log_printf (LOGSYS_LEVEL_WARNING, "%s: %s", + knet_log_get_subsystem_name(msg->subsystem), + msg->msg); + break; + case KNET_LOG_INFO: + libknet_log_printf (LOGSYS_LEVEL_INFO, "%s: %s", + knet_log_get_subsystem_name(msg->subsystem), + msg->msg); + break; + case KNET_LOG_DEBUG: + libknet_log_printf (LOGSYS_LEVEL_DEBUG, "%s: %s", + knet_log_get_subsystem_name(msg->subsystem), + msg->msg); + break; +#ifdef KNET_LOG_TRACE + case KNET_LOG_TRACE: + libknet_log_printf (LOGSYS_LEVEL_TRACE, "%s: %s", + knet_log_get_subsystem_name(msg->subsystem), + msg->msg); + break; +#endif + } + bufptr += sizeof(struct knet_log_msg); + done += sizeof(struct knet_log_msg); + } + return 0; +} + +static int data_deliver_fn ( + int fd, + int revents, + void *data) +{ + struct totemknet_instance *instance = (struct totemknet_instance *)data; + struct msghdr msg_hdr; + struct iovec iov_recv; + struct sockaddr_storage system_from; + ssize_t msg_len; + int truncated_packet; + + iov_recv.iov_base = instance->iov_buffer; + iov_recv.iov_len = KNET_MAX_PACKET_SIZE; + + msg_hdr.msg_name = &system_from; + msg_hdr.msg_namelen = sizeof (struct sockaddr_storage); + msg_hdr.msg_iov = &iov_recv; + msg_hdr.msg_iovlen = 1; +#ifdef HAVE_MSGHDR_CONTROL + msg_hdr.msg_control = 0; +#endif +#ifdef HAVE_MSGHDR_CONTROLLEN + msg_hdr.msg_controllen = 0; +#endif +#ifdef HAVE_MSGHDR_FLAGS + msg_hdr.msg_flags = 0; +#endif +#ifdef HAVE_MSGHDR_ACCRIGHTS + msg_hdr.msg_accrights = NULL; +#endif +#ifdef HAVE_MSGHDR_ACCRIGHTSLEN + msg_hdr.msg_accrightslen = 0; +#endif + + msg_len = recvmsg (fd, &msg_hdr, MSG_NOSIGNAL | MSG_DONTWAIT); + if (msg_len <= 0) { + return (0); + } + + truncated_packet = 0; + +#ifdef HAVE_MSGHDR_FLAGS + if (msg_hdr.msg_flags & MSG_TRUNC) { + truncated_packet = 1; + } +#else + /* + * We don't have MSGHDR_FLAGS, but we can (hopefully) safely make assumption that + * if bytes_received == KNET_MAX_PACKET_SIZE then packet is truncated + */ + if (bytes_received == KNET_MAX_PACKET_SIZE) { + truncated_packet = 1; + } +#endif + + if (truncated_packet) { + knet_log_printf(instance->totemknet_log_level_error, + "Received too big message. This may be because something bad is happening" + "on the network (attack?), or you tried join more nodes than corosync is" + "compiled with (%u) or bug in the code (bad estimation of " + "the KNET_MAX_PACKET_SIZE). Dropping packet.", PROCESSOR_COUNT_MAX); + return (0); + } + + /* + * Handle incoming message + */ + instance->totemknet_deliver_fn ( + instance->context, + instance->iov_buffer, + msg_len, + &system_from); + + return (0); +} + +static void timer_function_netif_check_timeout ( + void *data) +{ + struct totemknet_instance *instance = (struct totemknet_instance *)data; + int i; + int res = 0; + + for (i=0; i < INTERFACE_MAX; i++) { + if (!instance->totem_config->interfaces[i].configured) { + continue; + } + res = instance->totemknet_iface_change_fn (instance->context, + &instance->my_ids[i], + i); + } + if (res != 0) { + /* This is only called at startup, so we can quit here. + Refresh takes a different path */ + corosync_exit_error(COROSYNC_DONE_MAINCONFIGREAD); + } +} + +static void knet_set_access_list_config(struct totemknet_instance *instance) +{ +#ifdef HAVE_KNET_ACCESS_LIST + uint32_t value; + cs_error_t err; + + value = instance->totem_config->block_unlisted_ips; + knet_log_printf (LOGSYS_LEVEL_DEBUG, "knet_enable access list: %d", value); + + err = knet_handle_enable_access_lists(instance->knet_handle, value); + if (err) { + KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, "knet_handle_enable_access_lists failed"); + } +#endif +} + +void totemknet_configure_log_level() +{ + int logsys_log_mode; + int knet_log_mode = KNET_LOG_INFO; + uint8_t s; + int err; + + if (!global_instance || !global_instance->knet_handle) { + return; + } + + /* Reconfigure logging level */ + logsys_log_mode = logsys_config_debug_get("KNET"); + + switch (logsys_log_mode) { + case LOGSYS_DEBUG_OFF: + knet_log_mode = KNET_LOG_INFO; + break; + case LOGSYS_DEBUG_ON: + knet_log_mode = KNET_LOG_DEBUG; + break; + case LOGSYS_DEBUG_TRACE: +#ifdef KNET_LOG_TRACE + knet_log_mode = KNET_LOG_TRACE; +#else + knet_log_mode = KNET_LOG_DEBUG; +#endif + break; + } + log_printf (LOGSYS_LEVEL_DEBUG, "totemknet setting log level %s", knet_log_get_loglevel_name(knet_log_mode)); + err = 0; + for (s = 0; s<KNET_MAX_SUBSYSTEMS; s++) { + err = knet_log_set_loglevel(global_instance->knet_handle, s, knet_log_mode); + } + + /* If one fails, they all fail. no point in issuing KNET_MAX_SUBSYSTEMS errors */ + if (err) { + log_printf (LOGSYS_LEVEL_ERROR, "totemknet failed to set log level: %s", strerror(errno)); + } +} + + +/* NOTE: this relies on the fact that totem_reload_notify() is called first */ +static void totemknet_refresh_config( + int32_t event, + const char *key_name, + struct icmap_notify_value new_val, + struct icmap_notify_value old_val, + void *user_data) +{ + uint8_t reloading; + int after_reload; + uint32_t link_no; + size_t num_nodes; + knet_node_id_t host_ids[KNET_MAX_HOST]; + int i; + int err; + struct totemknet_instance *instance = (struct totemknet_instance *)user_data; + + ENTER(); + + /* + * If a full reload is in progress then don't do anything until it's done and + * can reconfigure it all atomically + */ + if (icmap_get_uint8("config.totemconfig_reload_in_progress", &reloading) == CS_OK && reloading) { + return; + } + + after_reload = (strcmp(key_name, "config.totemconfig_reload_in_progress") == 0); + + knet_set_access_list_config(instance); + + if (strcmp(key_name, "totem.knet_pmtud_interval") == 0 || after_reload) { + knet_log_printf (LOGSYS_LEVEL_DEBUG, "knet_pmtud_interval now %u", + instance->totem_config->knet_pmtud_interval); + err = knet_handle_pmtud_setfreq(instance->knet_handle, instance->totem_config->knet_pmtud_interval); + if (err) { + KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, "knet_handle_pmtud_setfreq failed"); + } + } + + if (strcmp(key_name, "totem.knet_mtu") == 0 || after_reload) { + knet_log_printf (LOGSYS_LEVEL_DEBUG, "knet_mtu now %u", instance->totem_config->knet_mtu); + err = knet_handle_pmtud_set(instance->knet_handle, instance->totem_config->knet_mtu); + if (err) { + KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, "knet_handle_pmtud failed"); + } + } + + /* Configure link parameters for each node */ + err = knet_host_get_host_list(instance->knet_handle, host_ids, &num_nodes); + if (err != 0) { + KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_host_get_host_list failed"); + } + + for (i=0; i<num_nodes; i++) { + for (link_no = 0; link_no < INTERFACE_MAX; link_no++) { + if (host_ids[i] == instance->our_nodeid || !instance->totem_config->interfaces[link_no].configured) { + continue; + } + + err = knet_link_set_ping_timers(instance->knet_handle, host_ids[i], link_no, + instance->totem_config->interfaces[link_no].knet_ping_interval, + instance->totem_config->interfaces[link_no].knet_ping_timeout, + instance->totem_config->interfaces[link_no].knet_ping_precision); + if (err) { + KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_link_set_ping_timers for node " CS_PRI_NODE_ID " link %d failed", host_ids[i], link_no); + } + err = knet_link_set_pong_count(instance->knet_handle, host_ids[i], link_no, + instance->totem_config->interfaces[link_no].knet_pong_count); + if (err) { + KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_link_set_pong_count for node " CS_PRI_NODE_ID " link %d failed",host_ids[i], link_no); + } + err = knet_link_set_priority(instance->knet_handle, host_ids[i], link_no, + instance->totem_config->interfaces[link_no].knet_link_priority); + if (err) { + KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_link_set_priority for node " CS_PRI_NODE_ID " link %d failed", host_ids[i], link_no); + } + + } + } + + /* Log levels get reconfigured from logconfig.c as that happens last in the reload */ + LEAVE(); +} + +static void totemknet_add_config_notifications(struct totemknet_instance *instance) +{ + icmap_track_t icmap_track_totem = NULL; + icmap_track_t icmap_track_reload = NULL; + + ENTER(); + + icmap_track_add("totem.", + ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX, + totemknet_refresh_config, + instance, + &icmap_track_totem); + + icmap_track_add("config.totemconfig_reload_in_progress", + ICMAP_TRACK_ADD | ICMAP_TRACK_MODIFY, + totemknet_refresh_config, + instance, + &icmap_track_reload); + + LEAVE(); +} + +static int totemknet_is_crypto_enabled(const struct totemknet_instance *instance) +{ + + return (!(strcmp(instance->totem_config->crypto_cipher_type, "none") == 0 && + strcmp(instance->totem_config->crypto_hash_type, "none") == 0)); + +} + +static int totemknet_set_knet_crypto(struct totemknet_instance *instance) +{ + struct knet_handle_crypto_cfg crypto_cfg; + int res; + + /* These have already been validated */ + memcpy(crypto_cfg.crypto_model, instance->totem_config->crypto_model, sizeof(crypto_cfg.crypto_model)); + memcpy(crypto_cfg.crypto_cipher_type, instance->totem_config->crypto_cipher_type, sizeof(crypto_cfg.crypto_model)); + memcpy(crypto_cfg.crypto_hash_type, instance->totem_config->crypto_hash_type, sizeof(crypto_cfg.crypto_model)); + memcpy(crypto_cfg.private_key, instance->totem_config->private_key, instance->totem_config->private_key_len); + crypto_cfg.private_key_len = instance->totem_config->private_key_len; + +#ifdef HAVE_KNET_CRYPTO_RECONF + + knet_log_printf(LOGSYS_LEVEL_DEBUG, "Configuring crypto %s/%s/%s on index %d", + crypto_cfg.crypto_model, + crypto_cfg.crypto_cipher_type, + crypto_cfg.crypto_hash_type, + instance->totem_config->crypto_index + ); + + /* If crypto is being disabled we need to explicitly allow cleartext traffic in knet */ + if (!totemknet_is_crypto_enabled(instance)) { + res = knet_handle_crypto_rx_clear_traffic(instance->knet_handle, KNET_CRYPTO_RX_ALLOW_CLEAR_TRAFFIC); + if (res) { + knet_log_printf(LOGSYS_LEVEL_ERROR, "knet_handle_crypto_rx_clear_traffic(ALLOW) failed %s", strerror(errno)); + } + } + + /* use_config will be called later when all nodes are synced */ + res = knet_handle_crypto_set_config(instance->knet_handle, &crypto_cfg, instance->totem_config->crypto_index); + if (res == -1) { + knet_log_printf(LOGSYS_LEVEL_ERROR, "knet_handle_crypto_set_config (index %d) failed: %s", instance->totem_config->crypto_index, strerror(errno)); + goto exit_error; + } + if (res == -2) { + knet_log_printf(LOGSYS_LEVEL_ERROR, "knet_handle_crypto_set_config (index %d) failed: -2", instance->totem_config->crypto_index); + goto exit_error; + } +#else + knet_log_printf(LOGSYS_LEVEL_DEBUG, "Configuring crypto %s/%s/%s", + crypto_cfg.crypto_model, + crypto_cfg.crypto_cipher_type, + crypto_cfg.crypto_hash_type + ); + + res = knet_handle_crypto(instance->knet_handle, &crypto_cfg); + if (res == -1) { + knet_log_printf(LOGSYS_LEVEL_ERROR, "knet_handle_crypto failed: %s", strerror(errno)); + goto exit_error; + } + if (res == -2) { + knet_log_printf(LOGSYS_LEVEL_ERROR, "knet_handle_crypto failed: -2"); + goto exit_error; + } +#endif + + +exit_error: + return res; +} + +/* + * Create an instance + */ +int totemknet_initialize ( + qb_loop_t *poll_handle, + void **knet_context, + struct totem_config *totem_config, + totemsrp_stats_t *stats, + void *context, + + int (*deliver_fn) ( + void *context, + const void *msg, + unsigned int msg_len, + const struct sockaddr_storage *system_from), + + int (*iface_change_fn) ( + void *context, + const struct totem_ip_address *iface_address, + unsigned int link_no), + + void (*mtu_changed) ( + void *context, + int net_mtu), + + void (*target_set_completed) ( + void *context)) +{ + struct totemknet_instance *instance; + char *tmp_str; + int8_t channel=0; + int allow_knet_handle_fallback=0; + int res; + int i; + + instance = malloc (sizeof (struct totemknet_instance)); + if (instance == NULL) { + return (-1); + } + + totemknet_instance_initialize (instance); + + instance->totem_config = totem_config; + + /* + * Configure logging + */ + instance->totemknet_log_level_security = 1; //totem_config->totem_logging_configuration.log_level_security; + instance->totemknet_log_level_error = totem_config->totem_logging_configuration.log_level_error; + instance->totemknet_log_level_warning = totem_config->totem_logging_configuration.log_level_warning; + instance->totemknet_log_level_notice = totem_config->totem_logging_configuration.log_level_notice; + instance->totemknet_log_level_debug = totem_config->totem_logging_configuration.log_level_debug; + instance->totemknet_subsys_id = totem_config->totem_logging_configuration.log_subsys_id; + instance->totemknet_log_printf = totem_config->totem_logging_configuration.log_printf; + + instance->knet_subsys_id = _logsys_subsys_create("KNET", "libknet.h"); + + /* + * Initialize local variables for totemknet + */ + + instance->our_nodeid = instance->totem_config->node_id; + + for (i=0; i< INTERFACE_MAX; i++) { + totemip_copy(&instance->my_ids[i], &totem_config->interfaces[i].bindnet); + instance->my_ids[i].nodeid = instance->our_nodeid; + instance->ip_port[i] = totem_config->interfaces[i].ip_port; + + /* Needed for totemsrp */ + totem_config->interfaces[i].boundto.nodeid = instance->our_nodeid; + } + + instance->poll_handle = poll_handle; + + instance->context = context; + instance->totemknet_deliver_fn = deliver_fn; + + instance->totemknet_iface_change_fn = iface_change_fn; + + instance->totemknet_mtu_changed = mtu_changed; + + instance->totemknet_target_set_completed = target_set_completed; + + instance->loopback_link = 0; + + res = pipe(instance->logpipes); + if (res == -1) { + KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_CRIT, "failed to create pipe for instance->logpipes"); + goto exit_error; + } + if (fcntl(instance->logpipes[0], F_SETFL, O_NONBLOCK) == -1 || + fcntl(instance->logpipes[1], F_SETFL, O_NONBLOCK) == -1) { + KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_CRIT, "failed to set O_NONBLOCK flag for instance->logpipes"); + goto exit_error; + } + + if (icmap_get_string("system.allow_knet_handle_fallback", &tmp_str) == CS_OK) { + if (strcmp(tmp_str, "yes") == 0) { + allow_knet_handle_fallback = 1; + } + free(tmp_str); + } + +#if defined(KNET_API_VER) && (KNET_API_VER == 2) + instance->knet_handle = knet_handle_new(instance->totem_config->node_id, instance->logpipes[1], KNET_LOG_DEBUG, KNET_HANDLE_FLAG_PRIVILEGED); +#else + instance->knet_handle = knet_handle_new(instance->totem_config->node_id, instance->logpipes[1], KNET_LOG_DEBUG); +#endif + + if (allow_knet_handle_fallback && !instance->knet_handle && errno == ENAMETOOLONG) { + KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, "knet_handle_new failed, trying unprivileged"); +#if defined(KNET_API_VER) && (KNET_API_VER == 2) + instance->knet_handle = knet_handle_new(instance->totem_config->node_id, instance->logpipes[1], KNET_LOG_DEBUG, 0); +#else + instance->knet_handle = knet_handle_new_ex(instance->totem_config->node_id, instance->logpipes[1], KNET_LOG_DEBUG, 0); +#endif + } + + if (!instance->knet_handle) { + KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_CRIT, "knet_handle_new failed"); + goto exit_error; + } + + knet_set_access_list_config(instance); + + res = knet_handle_pmtud_setfreq(instance->knet_handle, instance->totem_config->knet_pmtud_interval); + if (res) { + KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, "knet_handle_pmtud_setfreq failed"); + } + res = knet_handle_pmtud_set(instance->knet_handle, instance->totem_config->knet_mtu); + if (res) { + KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, "knet_handle_pmtud_set failed"); + } + res = knet_handle_enable_filter(instance->knet_handle, instance, dst_host_filter_callback_fn); + if (res) { + KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, "knet_handle_enable_filter failed"); + } + res = knet_handle_enable_sock_notify(instance->knet_handle, instance, socket_error_callback_fn); + if (res) { + KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, "knet_handle_enable_sock_notify failed"); + } + res = knet_host_enable_status_change_notify(instance->knet_handle, instance, host_change_callback_fn); + if (res) { + KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, "knet_host_enable_status_change_notify failed"); + } + res = knet_handle_enable_pmtud_notify(instance->knet_handle, instance, pmtu_change_callback_fn); + if (res) { + KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, "knet_handle_enable_pmtud_notify failed"); + } + global_instance = instance; + + /* Setup knet logging level */ + totemknet_configure_log_level(); + + /* Get an fd into knet */ + instance->knet_fd = 0; + res = knet_handle_add_datafd(instance->knet_handle, &instance->knet_fd, &channel); + if (res) { + knet_log_printf(LOG_DEBUG, "knet_handle_add_datafd failed: %s", strerror(errno)); + goto exit_error; + } + + /* Enable crypto if requested */ +#ifdef HAVE_KNET_CRYPTO_RECONF + if (totemknet_is_crypto_enabled(instance)) { + res = totemknet_set_knet_crypto(instance); + if (res == 0) { + res = knet_handle_crypto_use_config(instance->knet_handle, totem_config->crypto_index); + if (res) { + knet_log_printf(LOG_DEBUG, "knet_handle_crypto_use_config failed: %s", strerror(errno)); + goto exit_error; + } + } else { + knet_log_printf(LOG_DEBUG, "Failed to set up knet crypto"); + goto exit_error; + } + res = knet_handle_crypto_rx_clear_traffic(instance->knet_handle, KNET_CRYPTO_RX_DISALLOW_CLEAR_TRAFFIC); + if (res) { + knet_log_printf(LOG_DEBUG, "knet_handle_crypto_rx_clear_traffic (DISALLOW) failed: %s", strerror(errno)); + goto exit_error; + } + + } else { + res = knet_handle_crypto_rx_clear_traffic(instance->knet_handle, KNET_CRYPTO_RX_ALLOW_CLEAR_TRAFFIC); + if (res) { + knet_log_printf(LOG_DEBUG, "knet_handle_crypto_rx_clear_traffic (ALLOW) failed: %s", strerror(errno)); + goto exit_error; + } + } +#else + if (totemknet_is_crypto_enabled(instance)) { + res = totemknet_set_knet_crypto(instance); + if (res) { + knet_log_printf(LOG_DEBUG, "Failed to set up knet crypto"); + goto exit_error; + } + } +#endif + + /* Set up compression */ + if (strcmp(totem_config->knet_compression_model, "none") != 0) { + /* Not fatal, but will log */ + (void)totemknet_configure_compression(instance, totem_config); + } + + knet_handle_setfwd(instance->knet_handle, 1); + + instance->link_mode = KNET_LINK_POLICY_PASSIVE; + if (strcmp(instance->totem_config->link_mode, "active")==0) { + instance->link_mode = KNET_LINK_POLICY_ACTIVE; + } + if (strcmp(instance->totem_config->link_mode, "rr")==0) { + instance->link_mode = KNET_LINK_POLICY_RR; + } + + for (i=0; i<INTERFACE_MAX; i++) { + instance->link_status[i] = malloc(CFG_INTERFACE_STATUS_MAX_LEN); + if (!instance->link_status[i]) { + goto exit_error; + } + } + + qb_loop_poll_add (instance->poll_handle, + QB_LOOP_MED, + instance->logpipes[0], + POLLIN, instance, log_deliver_fn); + + qb_loop_poll_add (instance->poll_handle, + QB_LOOP_HIGH, + instance->knet_fd, + POLLIN, instance, data_deliver_fn); + + /* + * Upper layer isn't ready to receive message because it hasn't + * initialized yet. Add short timer to check the interfaces. + */ + qb_loop_timer_add (instance->poll_handle, + QB_LOOP_MED, + 100*QB_TIME_NS_IN_MSEC, + (void *)instance, + timer_function_netif_check_timeout, + &instance->timer_netif_check_timeout); + + totemknet_start_merge_detect_timeout(instance); + + /* Start listening for config changes */ + totemknet_add_config_notifications(instance); + + /* Add stats keys to icmap */ + stats_knet_add_handle(); + + knet_log_printf (LOGSYS_LEVEL_INFO, "totemknet initialized"); + *knet_context = instance; + + return (0); + +exit_error: + log_flush_messages(instance); + free(instance); + return (-1); +} + +void *totemknet_buffer_alloc (void) +{ + /* Need to have space for a message AND a struct mcast in case of encapsulated messages */ + return malloc(KNET_MAX_PACKET_SIZE + 512); +} + +void totemknet_buffer_release (void *ptr) +{ + return free (ptr); +} + +int totemknet_processor_count_set ( + void *knet_context, + int processor_count) +{ + return (0); +} + +int totemknet_recv_flush (void *knet_context) +{ + return (0); +} + +int totemknet_send_flush (void *knet_context) +{ + return (0); +} + +int totemknet_token_send ( + void *knet_context, + const void *msg, + unsigned int msg_len) +{ + struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; + int res = 0; + + ucast_sendmsg (instance, &instance->token_target, msg, msg_len); + + return (res); +} +int totemknet_mcast_flush_send ( + void *knet_context, + const void *msg, + unsigned int msg_len) +{ + struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; + int res = 0; + + mcast_sendmsg (instance, msg, msg_len, 0); + + return (res); +} + +int totemknet_mcast_noflush_send ( + void *knet_context, + const void *msg, + unsigned int msg_len) +{ + struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; + int res = 0; + + mcast_sendmsg (instance, msg, msg_len, 1); + + return (res); +} + + +extern int totemknet_iface_check (void *knet_context) +{ + struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; + int res = 0; + + knet_log_printf(LOG_DEBUG, "totemknet: iface_check"); + + return (res); +} + +extern void totemknet_net_mtu_adjust (void *knet_context, struct totem_config *totem_config) +{ + struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; + + knet_log_printf(LOG_DEBUG, "totemknet: Returning MTU of %d", totem_config->net_mtu); +} + +int totemknet_token_target_set ( + void *knet_context, + unsigned int nodeid) +{ + struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; + int res = 0; + + instance->token_target.nodeid = nodeid; + + instance->totemknet_target_set_completed (instance->context); + + return (res); +} + +extern int totemknet_recv_mcast_empty ( + void *knet_context) +{ + struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; + unsigned int res; + struct sockaddr_storage system_from; + struct msghdr msg_hdr; + struct iovec iov_recv; + struct pollfd ufd; + int nfds; + int msg_processed = 0; + + iov_recv.iov_base = instance->iov_buffer; + iov_recv.iov_len = KNET_MAX_PACKET_SIZE; + + msg_hdr.msg_name = &system_from; + msg_hdr.msg_namelen = sizeof (struct sockaddr_storage); + msg_hdr.msg_iov = &iov_recv; + msg_hdr.msg_iovlen = 1; +#ifdef HAVE_MSGHDR_CONTROL + msg_hdr.msg_control = 0; +#endif +#ifdef HAVE_MSGHDR_CONTROLLEN + msg_hdr.msg_controllen = 0; +#endif +#ifdef HAVE_MSGHDR_FLAGS + msg_hdr.msg_flags = 0; +#endif +#ifdef HAVE_MSGHDR_ACCRIGHTS + msg_msg_hdr.msg_accrights = NULL; +#endif +#ifdef HAVE_MSGHDR_ACCRIGHTSLEN + msg_msg_hdr.msg_accrightslen = 0; +#endif + + do { + ufd.fd = instance->knet_fd; + ufd.events = POLLIN; + nfds = poll (&ufd, 1, 0); + if (nfds == 1 && ufd.revents & POLLIN) { + res = recvmsg (instance->knet_fd, &msg_hdr, MSG_NOSIGNAL | MSG_DONTWAIT); + if (res != -1) { + msg_processed = 1; + } else { + msg_processed = -1; + } + } + } while (nfds == 1); + + return (msg_processed); +} + +int totemknet_iface_set (void *knet_context, + const struct totem_ip_address *local_addr, + unsigned short ip_port, + unsigned int iface_no) +{ + struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; + + totemip_copy(&instance->my_ids[iface_no], local_addr); + + knet_log_printf(LOG_INFO, "Configured link number %d: local addr: %s, port=%d", iface_no, totemip_print(local_addr), ip_port); + + instance->ip_port[iface_no] = ip_port; + + return 0; +} + + +int totemknet_member_add ( + void *knet_context, + const struct totem_ip_address *local, + const struct totem_ip_address *member, + int link_no) +{ + struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; + int err; + int port = instance->ip_port[link_no]; + struct sockaddr_storage remote_ss; + struct sockaddr_storage local_ss; + int addrlen; + int i; + int host_found = 0; + knet_node_id_t host_ids[KNET_MAX_HOST]; + size_t num_host_ids; + + /* Only create 1 loopback link and use link 0 */ + if (member->nodeid == instance->our_nodeid) { + if (!instance->loopback_link) { + link_no = 0; + instance->loopback_link = 1; + } else { + /* Already done */ + return 0; + } + } + + knet_log_printf (LOGSYS_LEVEL_DEBUG, "knet: member_add: " CS_PRI_NODE_ID " (%s), link=%d", member->nodeid, totemip_print(member), link_no); + knet_log_printf (LOGSYS_LEVEL_DEBUG, "knet: local: " CS_PRI_NODE_ID " (%s)", local->nodeid, totemip_print(local)); + + + /* Only add the host if it doesn't already exist in knet */ + err = knet_host_get_host_list(instance->knet_handle, host_ids, &num_host_ids); + if (err) { + KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_host_get_host_list"); + return -1; + } + for (i=0; i<num_host_ids; i++) { + if (host_ids[i] == member->nodeid) { + host_found = 1; + } + } + + if (!host_found) { + err = knet_host_add(instance->knet_handle, member->nodeid); + if (err != 0 && errno != EEXIST) { + KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_host_add"); + return -1; + } + } else { + knet_log_printf (LOGSYS_LEVEL_DEBUG, "nodeid " CS_PRI_NODE_ID " already added", member->nodeid); + } + + + if (err == 0) { + if (knet_host_set_policy(instance->knet_handle, member->nodeid, instance->link_mode)) { + KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_set_policy failed"); + return -1; + } + } + + memset(&local_ss, 0, sizeof(local_ss)); + memset(&remote_ss, 0, sizeof(remote_ss)); + /* Casts to remove const */ + totemip_totemip_to_sockaddr_convert((struct totem_ip_address *)member, port, &remote_ss, &addrlen); + totemip_totemip_to_sockaddr_convert((struct totem_ip_address *)local, port, &local_ss, &addrlen); + + if (member->nodeid == instance->our_nodeid) { + knet_log_printf (LOGSYS_LEVEL_DEBUG, "knet: loopback link is %d\n", link_no); + + err = knet_link_set_config(instance->knet_handle, member->nodeid, link_no, + KNET_TRANSPORT_LOOPBACK, + &local_ss, &remote_ss, KNET_LINK_FLAG_TRAFFICHIPRIO); + } + else { + err = knet_link_set_config(instance->knet_handle, member->nodeid, link_no, + instance->totem_config->interfaces[link_no].knet_transport, + &local_ss, &remote_ss, KNET_LINK_FLAG_TRAFFICHIPRIO); + } + if (err) { + KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_link_set_config failed"); + return -1; + } + + knet_log_printf (LOGSYS_LEVEL_DEBUG, "knet: member_add: Setting link prio to %d", + instance->totem_config->interfaces[link_no].knet_link_priority); + + err = knet_link_set_priority(instance->knet_handle, member->nodeid, link_no, + instance->totem_config->interfaces[link_no].knet_link_priority); + if (err) { + KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_link_set_priority for nodeid " CS_PRI_NODE_ID ", link %d failed", member->nodeid, link_no); + } + + /* + * Ping timeouts may be 0 here for a newly added interface (on a reload), + * so we leave this till later, it will get done in totemknet_refresh_config. + * For the initial startup, we are all preset and ready to go from here. + */ + if (instance->totem_config->interfaces[link_no].knet_ping_interval != 0) { + err = knet_link_set_ping_timers(instance->knet_handle, member->nodeid, link_no, + instance->totem_config->interfaces[link_no].knet_ping_interval, + instance->totem_config->interfaces[link_no].knet_ping_timeout, + instance->totem_config->interfaces[link_no].knet_ping_precision); + if (err) { + /* Flush logs before reporting this error so that the knet message prints before ours */ + int saved_errno = errno; + log_flush_messages(instance); + errno = saved_errno; + KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_link_set_ping_timers for nodeid " CS_PRI_NODE_ID ", link %d failed", member->nodeid, link_no); + return -1; + } + err = knet_link_set_pong_count(instance->knet_handle, member->nodeid, link_no, + instance->totem_config->interfaces[link_no].knet_pong_count); + if (err) { + /* Flush logs before reporting this error so that the knet message prints before ours */ + int saved_errno = errno; + log_flush_messages(instance); + errno = saved_errno; + KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_link_set_pong_count for nodeid " CS_PRI_NODE_ID ", link %d failed", member->nodeid, link_no); + return -1; + } + } + + err = knet_link_set_enable(instance->knet_handle, member->nodeid, link_no, 1); + if (err) { + KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_link_set_enable for nodeid " CS_PRI_NODE_ID ", link %d failed", member->nodeid, link_no); + return -1; + } + + /* register stats */ + stats_knet_add_member(member->nodeid, link_no); + return (0); +} + +int totemknet_member_remove ( + void *knet_context, + const struct totem_ip_address *token_target, + int link_no) +{ + struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; + int res; + uint8_t link_list[KNET_MAX_LINK]; + size_t num_links; + + knet_log_printf (LOGSYS_LEVEL_DEBUG, "knet: member_remove: " CS_PRI_NODE_ID ", link=%d", token_target->nodeid, link_no); + + /* Don't remove the link with the loopback on it until we shut down */ + if (token_target->nodeid == instance->our_nodeid) { + return 0; + } + + /* Tidy stats */ + stats_knet_del_member(token_target->nodeid, link_no); + + /* Remove the link first */ + res = knet_link_set_enable(instance->knet_handle, token_target->nodeid, link_no, 0); + if (res != 0) { + KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_link_set enable(off) for nodeid " CS_PRI_NODE_ID ", link %d failed", token_target->nodeid, link_no); + return res; + } + + res = knet_link_clear_config(instance->knet_handle, token_target->nodeid, link_no); + if (res != 0) { + KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_link_clear_config for nodeid " CS_PRI_NODE_ID ", link %d failed", token_target->nodeid, link_no); + return res; + } + + /* If this is the last link, then remove the node */ + res = knet_link_get_link_list(instance->knet_handle, + token_target->nodeid, link_list, &num_links); + if (res) { + return (0); /* not really failure */ + } + + if (num_links == 0) { + res = knet_host_remove(instance->knet_handle, token_target->nodeid); + } + return res; +} + +int totemknet_member_list_rebind_ip ( + void *knet_context) +{ + return (0); +} + + +static int totemknet_configure_compression ( + struct totemknet_instance *instance, + struct totem_config *totem_config) +{ + struct knet_handle_compress_cfg compress_cfg; + int res = 0; + + assert(strlen(totem_config->knet_compression_model) < sizeof(compress_cfg.compress_model)); + strcpy(compress_cfg.compress_model, totem_config->knet_compression_model); + + compress_cfg.compress_threshold = totem_config->knet_compression_threshold; + compress_cfg.compress_level = totem_config->knet_compression_level; + + res = knet_handle_compress(instance->knet_handle, &compress_cfg); + if (res) { + KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_handle_compress failed"); + } + return res; +} + +int totemknet_reconfigure ( + void *knet_context, + struct totem_config *totem_config) +{ + struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; + int res = 0; + + (void)totemknet_configure_compression(instance, totem_config); + +#ifdef HAVE_LIBNOZZLE + /* Set up nozzle device(s). Return code is ignored, because inability + * configure nozzle is not fatal problem, errors are logged and + * there is not much else we can do */ + (void)setup_nozzle(instance); +#endif + + if (totem_config->crypto_changed) { + /* Flip crypto_index */ + totem_config->crypto_index = 3-totem_config->crypto_index; + res = totemknet_set_knet_crypto(instance); + + knet_log_printf(LOG_INFO, "kronosnet crypto reconfigured on index %d: %s/%s/%s", totem_config->crypto_index, + totem_config->crypto_model, + totem_config->crypto_cipher_type, + totem_config->crypto_hash_type); + } + return (res); +} + + +int totemknet_crypto_reconfigure_phase ( + void *knet_context, + struct totem_config *totem_config, + cfg_message_crypto_reconfig_phase_t phase) +{ +#ifdef HAVE_KNET_CRYPTO_RECONF + int res; + int config_to_use; + int config_to_clear; + struct knet_handle_crypto_cfg crypto_cfg; + struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; + + knet_log_printf(LOGSYS_LEVEL_DEBUG, "totemknet_crypto_reconfigure_phase %d, index=%d\n", phase, totem_config->crypto_index); + + switch (phase) { + case CRYPTO_RECONFIG_PHASE_ACTIVATE: + config_to_use = totem_config->crypto_index; + if (!totemknet_is_crypto_enabled(instance)) { + config_to_use = 0; /* we are clearing it */ + } + + /* Enable the new config on this node */ + res = knet_handle_crypto_use_config(instance->knet_handle, config_to_use); + if (res == -1) { + knet_log_printf(LOGSYS_LEVEL_ERROR, "knet_handle_crypto_use_config %d failed: %s", config_to_use, strerror(errno)); + } + break; + + case CRYPTO_RECONFIG_PHASE_CLEANUP: + /* + * All nodes should now have the new config. clear the old one out + * OR disable crypto entirely if that's what the new config insists on. + */ + config_to_clear = 3-totem_config->crypto_index; + knet_log_printf(LOGSYS_LEVEL_DEBUG, "Clearing old knet crypto config %d\n", config_to_clear); + + strcpy(crypto_cfg.crypto_model, "none"); + strcpy(crypto_cfg.crypto_cipher_type, "none"); + strcpy(crypto_cfg.crypto_hash_type, "none"); + res = knet_handle_crypto_set_config(instance->knet_handle, &crypto_cfg, config_to_clear); + if (res == -1) { + knet_log_printf(LOGSYS_LEVEL_ERROR, "knet_handle_crypto_set_config to clear index %d failed: %s", config_to_clear, strerror(errno)); + } + if (res == -2) { + knet_log_printf(LOGSYS_LEVEL_ERROR, "knet_handle_crypto_set_config to clear index %d failed: -2", config_to_clear); + } + + /* If crypto is enabled then disable all cleartext reception */ + if (totemknet_is_crypto_enabled(instance)) { + res = knet_handle_crypto_rx_clear_traffic(instance->knet_handle, KNET_CRYPTO_RX_DISALLOW_CLEAR_TRAFFIC); + if (res) { + knet_log_printf(LOGSYS_LEVEL_ERROR, "knet_handle_crypto_rx_clear_traffic(DISALLOW) failed %s", strerror(errno)); + } + } + } +#endif + return 0; +} + +void totemknet_stats_clear ( + void *knet_context) +{ + struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; + + (void) knet_handle_clear_stats(instance->knet_handle, KNET_CLEARSTATS_HANDLE_AND_LINK); +} + +/* For the stats module */ +int totemknet_link_get_status ( + knet_node_id_t node, uint8_t link_no, + struct knet_link_status *status) +{ + int res; + int ret = CS_OK; + + /* We are probably not using knet */ + if (!global_instance) { + return CS_ERR_NOT_EXIST; + } + + if (link_no >= INTERFACE_MAX) { + return CS_ERR_NOT_EXIST; /* Invalid link number */ + } + + res = knet_link_get_status(global_instance->knet_handle, node, link_no, status, sizeof(struct knet_link_status)); + if (res) { + switch (errno) { + case EINVAL: + ret = CS_ERR_INVALID_PARAM; + break; + case EBUSY: + ret = CS_ERR_BUSY; + break; + case EDEADLK: + ret = CS_ERR_TRY_AGAIN; + break; + default: + ret = CS_ERR_LIBRARY; + break; + } + } + + return (ret); +} + +int totemknet_handle_get_stats ( + struct knet_handle_stats *stats) +{ + int res; + + /* We are probably not using knet */ + if (!global_instance) { + return CS_ERR_NOT_EXIST; + } + + res = knet_handle_get_stats(global_instance->knet_handle, stats, sizeof(struct knet_handle_stats)); + if (res != 0) { + return (qb_to_cs_error(-errno)); + } + + return CS_OK; +} + +static void timer_function_merge_detect_timeout ( + void *data) +{ + struct totemknet_instance *instance = (struct totemknet_instance *)data; + + if (instance->merge_detect_messages_sent_before_timeout == 0) { + instance->send_merge_detect_message = 1; + } + + instance->merge_detect_messages_sent_before_timeout = 0; + + totemknet_start_merge_detect_timeout(instance); +} + +static void totemknet_start_merge_detect_timeout( + void *knet_context) +{ + struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; + + qb_loop_timer_add(instance->poll_handle, + QB_LOOP_MED, + instance->totem_config->merge_timeout * 2 * QB_TIME_NS_IN_MSEC, + (void *)instance, + timer_function_merge_detect_timeout, + &instance->timer_merge_detect_timeout); + +} + +static void totemknet_stop_merge_detect_timeout( + void *knet_context) +{ + struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; + + qb_loop_timer_del(instance->poll_handle, + instance->timer_merge_detect_timeout); +} + +static void log_flush_messages (void *knet_context) +{ + struct pollfd pfd; + struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; + int cont; + + cont = 1; + + while (cont) { + pfd.fd = instance->logpipes[0]; + pfd.events = POLLIN; + pfd.revents = 0; + + if ((poll(&pfd, 1, 0) > 0) && + (pfd.revents & POLLIN) && + (log_deliver_fn(instance->logpipes[0], POLLIN, instance) == 0)) { + cont = 1; + } else { + cont = 0; + } + } +} + + +#ifdef HAVE_LIBNOZZLE +#define NOZZLE_NAME "nozzle.name" +#define NOZZLE_IPADDR "nozzle.ipaddr" +#define NOZZLE_PREFIX "nozzle.ipprefix" +#define NOZZLE_MACADDR "nozzle.macaddr" + +#define NOZZLE_CHANNEL 1 + + +static char *get_nozzle_script_dir(void *knet_context) +{ + struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; + char filename[PATH_MAX + FILENAME_MAX + 1]; + static char updown_dirname[PATH_MAX + FILENAME_MAX + 1]; + int res; + const char *dirname_res; + + /* + * Build script directory based on corosync.conf file location + */ + res = snprintf(filename, sizeof(filename), "%s", + corosync_get_config_file()); + if (res >= sizeof(filename)) { + knet_log_printf (LOGSYS_LEVEL_DEBUG, "nozzle up/down path too long"); + return NULL; + } + + dirname_res = dirname(filename); + + res = snprintf(updown_dirname, sizeof(updown_dirname), "%s/%s", + dirname_res, "updown.d"); + if (res >= sizeof(updown_dirname)) { + knet_log_printf (LOGSYS_LEVEL_DEBUG, "nozzle up/down path too long"); + return NULL; + } + return updown_dirname; +} + +/* + * Deliberately doesn't return the status as caller doesn't care. + * The result will be logged though + */ +static void run_nozzle_script(struct totemknet_instance *instance, int type, const char *typename) +{ + int res; + char *exec_string; + + res = nozzle_run_updown(instance->nozzle_handle, type, &exec_string); + if (res == -1 && errno != ENOENT) { + knet_log_printf (LOGSYS_LEVEL_INFO, "exec nozzle %s script failed: %s", typename, strerror(errno)); + } else if (res == -2) { + knet_log_printf (LOGSYS_LEVEL_INFO, "nozzle %s script failed", typename); + knet_log_printf (LOGSYS_LEVEL_INFO, "%s", exec_string); + } +} + +/* + * Reparse IP address to add in our node ID + * IPv6 addresses must end in '::' + * IPv4 addresses must just be valid + * '/xx' lengths are optional for IPv6, mandatory for IPv4 + * + * Returns the modified IP address as a string to pass into libnozzle + */ +static int reparse_nozzle_ip_address(struct totemknet_instance *instance, + const char *input_addr, + const char *prefix, int nodeid, + char *output_addr, size_t output_len) +{ + char *coloncolon; + int bits; + int max_prefix = 64; + uint32_t nodeid_mask; + uint32_t addr_mask; + uint32_t masked_nodeid; + struct in_addr *addr; + struct totem_ip_address totemip; + + coloncolon = strstr(input_addr, "::"); + if (!coloncolon) { + max_prefix = 30; + } + + bits = atoi(prefix); + if (bits < 8 || bits > max_prefix) { + knet_log_printf(LOGSYS_LEVEL_ERROR, "nozzle IP address prefix must be >= 8 and <= %d (got %d)", max_prefix, bits); + return -1; + } + + /* IPv6 is easy */ + if (coloncolon) { + memcpy(output_addr, input_addr, coloncolon-input_addr); + sprintf(output_addr + (coloncolon-input_addr), "::%x", nodeid); + return 0; + } + + /* For IPv4 we need to parse the address into binary, mask off the required bits, + * add in the masked_nodeid and 'print' it out again + */ + nodeid_mask = UINT32_MAX & ((1<<(32 - bits)) - 1); + addr_mask = UINT32_MAX ^ nodeid_mask; + masked_nodeid = nodeid & nodeid_mask; + + if (totemip_parse(&totemip, input_addr, AF_INET)) { + knet_log_printf(LOGSYS_LEVEL_ERROR, "Failed to parse IPv4 nozzle IP address"); + return -1; + } + addr = (struct in_addr *)&totemip.addr; + addr->s_addr &= htonl(addr_mask); + addr->s_addr |= htonl(masked_nodeid); + + inet_ntop(AF_INET, addr, output_addr, output_len); + return 0; +} + +static int create_nozzle_device(void *knet_context, const char *name, + const char *ipaddr, const char *prefix, + const char *macaddr) +{ + struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; + char device_name[IFNAMSIZ+1]; + size_t size = IFNAMSIZ; + int8_t channel = NOZZLE_CHANNEL; + nozzle_t nozzle_dev; + int nozzle_fd; + int res; + char *updown_dir; + char parsed_ipaddr[INET6_ADDRSTRLEN]; + char mac[19]; + + memset(device_name, 0, size); + memset(&mac, 0, sizeof(mac)); + strncpy(device_name, name, size); + + updown_dir = get_nozzle_script_dir(knet_context); + knet_log_printf (LOGSYS_LEVEL_INFO, "nozzle script dir is %s", updown_dir); + + nozzle_dev = nozzle_open(device_name, size, updown_dir); + if (!nozzle_dev) { + knet_log_printf (LOGSYS_LEVEL_ERROR, "Unable to init nozzle device %s: %s", device_name, strerror(errno)); + return -1; + } + instance->nozzle_handle = nozzle_dev; + + if (nozzle_set_mac(nozzle_dev, macaddr) < 0) { + knet_log_printf (LOGSYS_LEVEL_ERROR, "Unable to add set nozzle MAC to %s: %s", mac, strerror(errno)); + goto out_clean; + } + + if (reparse_nozzle_ip_address(instance, ipaddr, prefix, instance->our_nodeid, parsed_ipaddr, sizeof(parsed_ipaddr))) { + /* Prints its own errors */ + goto out_clean; + } + knet_log_printf (LOGSYS_LEVEL_INFO, "Local nozzle IP address is %s / %d", parsed_ipaddr, atoi(prefix)); + if (nozzle_add_ip(nozzle_dev, parsed_ipaddr, prefix) < 0) { + knet_log_printf (LOGSYS_LEVEL_ERROR, "Unable to add set nozzle IP addr to %s/%s: %s", parsed_ipaddr, prefix, strerror(errno)); + goto out_clean; + } + + nozzle_fd = nozzle_get_fd(nozzle_dev); + knet_log_printf (LOGSYS_LEVEL_INFO, "Opened '%s' on fd %d", device_name, nozzle_fd); + + res = knet_handle_add_datafd(instance->knet_handle, &nozzle_fd, &channel); + if (res != 0) { + knet_log_printf (LOGSYS_LEVEL_ERROR, "Unable to add nozzle FD to knet: %s", strerror(errno)); + goto out_clean; + } + + run_nozzle_script(instance, NOZZLE_PREUP, "pre-up"); + + res = nozzle_set_up(nozzle_dev); + if (res != 0) { + knet_log_printf (LOGSYS_LEVEL_ERROR, "Unable to set nozzle interface UP: %s", strerror(errno)); + goto out_clean; + } + run_nozzle_script(instance, NOZZLE_UP, "up"); + + return 0; + +out_clean: + nozzle_close(nozzle_dev); + return -1; +} + +static int remove_nozzle_device(void *knet_context) +{ + struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; + int res; + int datafd; + + res = knet_handle_get_datafd(instance->knet_handle, NOZZLE_CHANNEL, &datafd); + if (res != 0) { + knet_log_printf (LOGSYS_LEVEL_ERROR, "Can't find datafd for channel %d: %s", NOZZLE_CHANNEL, strerror(errno)); + return -1; + } + + res = knet_handle_remove_datafd(instance->knet_handle, datafd); + if (res != 0) { + knet_log_printf (LOGSYS_LEVEL_ERROR, "Can't remove datafd for nozzle channel %d: %s", NOZZLE_CHANNEL, strerror(errno)); + return -1; + } + + run_nozzle_script(instance, NOZZLE_DOWN, "pre-down"); + res = nozzle_set_down(instance->nozzle_handle); + if (res != 0) { + knet_log_printf (LOGSYS_LEVEL_ERROR, "Can't set nozzle device down: %s", strerror(errno)); + return -1; + } + run_nozzle_script(instance, NOZZLE_POSTDOWN, "post-down"); + + res = nozzle_close(instance->nozzle_handle); + if (res != 0) { + knet_log_printf (LOGSYS_LEVEL_ERROR, "Can't close nozzle device: %s", strerror(errno)); + return -1; + } + knet_log_printf (LOGSYS_LEVEL_INFO, "Removed nozzle device"); + return 0; +} + +static void free_nozzle(struct totemknet_instance *instance) +{ + free(instance->nozzle_name); + free(instance->nozzle_ipaddr); + free(instance->nozzle_prefix); + free(instance->nozzle_macaddr); + + instance->nozzle_name = instance->nozzle_ipaddr = instance->nozzle_prefix = + instance->nozzle_macaddr = NULL; +} + +static int setup_nozzle(void *knet_context) +{ + struct totemknet_instance *instance = (struct totemknet_instance *)knet_context; + char *ipaddr_str = NULL; + char *name_str = NULL; + char *prefix_str = NULL; + char *macaddr_str = NULL; + char mac[32]; + int name_res; + int macaddr_res; + int res = -1; + + /* + * Return value ignored on purpose. icmap_get_string changes + * ipaddr_str/prefix_str only on success. + */ + (void)icmap_get_string(NOZZLE_IPADDR, &ipaddr_str); + (void)icmap_get_string(NOZZLE_PREFIX, &prefix_str); + macaddr_res = icmap_get_string(NOZZLE_MACADDR, &macaddr_str); + name_res = icmap_get_string(NOZZLE_NAME, &name_str); + + /* Is is being removed? */ + if (name_res == CS_ERR_NOT_EXIST && instance->nozzle_handle) { + remove_nozzle_device(instance); + free_nozzle(instance); + goto out_free; + } + + if (!name_str) { + /* no nozzle */ + goto out_free; + } + + if (!ipaddr_str) { + knet_log_printf (LOGSYS_LEVEL_ERROR, "No IP address supplied for Nozzle device"); + goto out_free; + } + + if (!prefix_str) { + knet_log_printf (LOGSYS_LEVEL_ERROR, "No prefix supplied for Nozzle IP address"); + goto out_free; + } + + if (macaddr_str && strlen(macaddr_str) != 17) { + knet_log_printf (LOGSYS_LEVEL_ERROR, "macaddr for nozzle device is not in the correct format '%s'", macaddr_str); + goto out_free; + } + if (!macaddr_str) { + macaddr_str = (char*)"54:54:01:00:00:00"; + } + + if (instance->nozzle_name && + (strcmp(name_str, instance->nozzle_name) == 0) && + (strcmp(ipaddr_str, instance->nozzle_ipaddr) == 0) && + (strcmp(prefix_str, instance->nozzle_prefix) == 0) && + (instance->nozzle_macaddr == NULL || + strcmp(macaddr_str, instance->nozzle_macaddr) == 0)) { + /* Nothing has changed */ + knet_log_printf (LOGSYS_LEVEL_DEBUG, "Nozzle device info not changed"); + goto out_free; + } + + /* Add nodeid into MAC address */ + memcpy(mac, macaddr_str, 12); + snprintf(mac+12, sizeof(mac) - 13, "%02x:%02x", + instance->our_nodeid >> 8, + instance->our_nodeid & 0xFF); + knet_log_printf (LOGSYS_LEVEL_INFO, "Local nozzle MAC address is %s", mac); + + if (name_res == CS_OK && name_str) { + /* Reconfigure */ + if (instance->nozzle_name) { + remove_nozzle_device(instance); + free_nozzle(instance); + } + + res = create_nozzle_device(knet_context, name_str, ipaddr_str, prefix_str, + mac); + + instance->nozzle_name = strdup(name_str); + instance->nozzle_ipaddr = strdup(ipaddr_str); + instance->nozzle_prefix = strdup(prefix_str); + instance->nozzle_macaddr = strdup(macaddr_str); + if (!instance->nozzle_name || !instance->nozzle_ipaddr || + !instance->nozzle_prefix) { + knet_log_printf (LOGSYS_LEVEL_ERROR, "strdup failed in nozzle allocation"); + /* + * This 'free' will cause a complete reconfigure of the device next time we reload + * but will also let the the current device keep working until then. + * remove_nozzle() only needs the, statically-allocated, nozzle_handle + */ + free_nozzle(instance); + } + } + +out_free: + free(name_str); + free(ipaddr_str); + free(prefix_str); + if (macaddr_res == CS_OK) { + free(macaddr_str); + } + + return res; +} +#endif // HAVE_LIBNOZZLE diff --git a/exec/totemknet.h b/exec/totemknet.h new file mode 100644 index 0000000..4d4f61e --- /dev/null +++ b/exec/totemknet.h @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2005 MontaVista Software, Inc. + * Copyright (c) 2006-2011 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef TOTEMKNET_H_DEFINED +#define TOTEMKNET_H_DEFINED + +#include <sys/types.h> +#include <sys/socket.h> +#include <qb/qbloop.h> + +#include <corosync/totem/totem.h> + +/** + * Create an instance + */ +extern int totemknet_initialize ( + qb_loop_t *poll_handle, + void **knet_context, + struct totem_config *totem_config, + totemsrp_stats_t *stats, + void *context, + + int (*deliver_fn) ( + void *context, + const void *msg, + unsigned int msg_len, + const struct sockaddr_storage *system_from), + + int (*iface_change_fn) ( + void *context, + const struct totem_ip_address *iface_address, + unsigned int ring_no), + + void (*mtu_changed) ( + void *context, + int net_mtu), + + void (*target_set_completed) ( + void *context)); + +extern void *totemknet_buffer_alloc (void); + +extern void totemknet_buffer_release (void *ptr); + +extern int totemknet_processor_count_set ( + void *knet_context, + int processor_count); + +extern int totemknet_token_send ( + void *knet_context, + const void *msg, + unsigned int msg_len); + +extern int totemknet_mcast_flush_send ( + void *knet_context, + const void *msg, + unsigned int msg_len); + +extern int totemknet_mcast_noflush_send ( + void *knet_context, + const void *msg, + unsigned int msg_len); + +extern int totemknet_recv_flush (void *knet_context); + +extern int totemknet_send_flush (void *knet_context); + +extern int totemknet_iface_check (void *knet_context); + +extern int totemknet_finalize (void *knet_context); + +extern void totemknet_net_mtu_adjust (void *knet_context, struct totem_config *totem_config); + +extern int totemknet_nodestatus_get (void *knet_context, unsigned int nodeid, + struct totem_node_status *node_status); + +extern int totemknet_ifaces_get (void *net_context, + char ***status, + unsigned int *iface_count); + +extern int totemknet_iface_set (void *net_context, + const struct totem_ip_address *local_addr, + unsigned short ip_port, + unsigned int iface_no); + +extern int totemknet_token_target_set ( + void *knet_context, + unsigned int nodeid); + +extern int totemknet_crypto_set ( + void *knet_context, + const char *cipher_type, + const char *hash_type); + +extern int totemknet_recv_mcast_empty ( + void *knet_context); + +extern int totemknet_member_add ( + void *knet_context, + const struct totem_ip_address *local, + const struct totem_ip_address *member, + int ring_no); + +extern int totemknet_member_remove ( + void *knet_context, + const struct totem_ip_address *member, + int ring_no); + +extern int totemknet_member_set_active ( + void *knet_context, + const struct totem_ip_address *member_ip, + int active); + +extern int totemknet_reconfigure ( + void *knet_context, + struct totem_config *totem_config); + +extern int totemknet_crypto_reconfigure_phase ( + void *knet_context, + struct totem_config *totem_config, + cfg_message_crypto_reconfig_phase_t phase); + +extern void totemknet_stats_clear ( + void *knet_context); + +extern void totemknet_configure_log_level (void); + +#endif /* TOTEMKNET_H_DEFINED */ diff --git a/exec/totemnet.c b/exec/totemnet.c new file mode 100644 index 0000000..58992e6 --- /dev/null +++ b/exec/totemnet.c @@ -0,0 +1,628 @@ +/* + * Copyright (c) 2005 MontaVista Software, Inc. + * Copyright (c) 2006-2018 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <config.h> + +#include <assert.h> + +#include <totemudp.h> +#include <totemudpu.h> +#include <totemknet.h> +#include <totemnet.h> +#include <qb/qbloop.h> + +#define LOGSYS_UTILS_ONLY 1 +#include <corosync/logsys.h> + +struct transport { + const char *name; + + int (*initialize) ( + qb_loop_t *loop_pt, + void **transport_instance, + struct totem_config *totem_config, + totemsrp_stats_t *stats, + void *context, + + int (*deliver_fn) ( + void *context, + const void *msg, + unsigned int msg_len, + const struct sockaddr_storage *system_from), + + int (*iface_change_fn) ( + void *context, + const struct totem_ip_address *iface_address, + unsigned int ring_no), + + void (*mtu_changed) ( + void *context, + int net_mtu), + + void (*target_set_completed) ( + void *context)); + + void *(*buffer_alloc) (void); + + void (*buffer_release) (void *ptr); + + int (*processor_count_set) ( + void *transport_context, + int processor_count); + + int (*token_send) ( + void *transport_context, + const void *msg, + unsigned int msg_len); + + int (*mcast_flush_send) ( + void *transport_context, + const void *msg, + unsigned int msg_len); + + + int (*mcast_noflush_send) ( + void *transport_context, + const void *msg, + unsigned int msg_len); + + int (*recv_flush) (void *transport_context); + + int (*send_flush) (void *transport_context); + + int (*iface_check) (void *transport_context); + + int (*finalize) (void *transport_context); + + void (*net_mtu_adjust) (void *transport_context, struct totem_config *totem_config); + + const char *(*iface_print) (void *transport_context); + + int (*ifaces_get) ( + void *transport_context, + char ***status, + unsigned int *iface_count); + + int (*nodestatus_get) ( + void *transport_context, + unsigned int nodeid, + struct totem_node_status *node_status); + + int (*token_target_set) ( + void *transport_context, + unsigned int nodeid); + + int (*crypto_set) ( + void *transport_context, + const char *cipher_type, + const char *hash_type); + + int (*recv_mcast_empty) ( + void *transport_context); + + int (*iface_set) ( + void *transport_context, + const struct totem_ip_address *local, + unsigned short ip_port, + unsigned int ring_no); + + int (*member_add) ( + void *transport_context, + const struct totem_ip_address *local, + const struct totem_ip_address *member, + int ring_no); + + int (*member_remove) ( + void *transport_context, + const struct totem_ip_address *member, + int ring_no); + + int (*member_set_active) ( + void *transport_context, + const struct totem_ip_address *member, + int active); + + int (*reconfigure) ( + void *net_context, + struct totem_config *totem_config); + + int (*crypto_reconfigure_phase) ( + void *net_context, + struct totem_config *totem_config, + cfg_message_crypto_reconfig_phase_t phase); + + void (*stats_clear) ( + void *net_context); +}; + +struct transport transport_entries[] = { + { + .name = "UDP/IP Multicast", + .initialize = totemudp_initialize, + .buffer_alloc = totemudp_buffer_alloc, + .buffer_release = totemudp_buffer_release, + .processor_count_set = totemudp_processor_count_set, + .token_send = totemudp_token_send, + .mcast_flush_send = totemudp_mcast_flush_send, + .mcast_noflush_send = totemudp_mcast_noflush_send, + .recv_flush = totemudp_recv_flush, + .send_flush = totemudp_send_flush, + .iface_set = totemudp_iface_set, + .iface_check = totemudp_iface_check, + .finalize = totemudp_finalize, + .net_mtu_adjust = totemudp_net_mtu_adjust, + .ifaces_get = totemudp_ifaces_get, + .nodestatus_get = totemudp_nodestatus_get, + .token_target_set = totemudp_token_target_set, + .crypto_set = totemudp_crypto_set, + .recv_mcast_empty = totemudp_recv_mcast_empty, + .member_add = totemudp_member_add, + .member_remove = totemudp_member_remove, + .reconfigure = totemudp_reconfigure, + .crypto_reconfigure_phase = NULL + }, + { + .name = "UDP/IP Unicast", + .initialize = totemudpu_initialize, + .buffer_alloc = totemudpu_buffer_alloc, + .buffer_release = totemudpu_buffer_release, + .processor_count_set = totemudpu_processor_count_set, + .token_send = totemudpu_token_send, + .mcast_flush_send = totemudpu_mcast_flush_send, + .mcast_noflush_send = totemudpu_mcast_noflush_send, + .recv_flush = totemudpu_recv_flush, + .send_flush = totemudpu_send_flush, + .iface_set = totemudpu_iface_set, + .iface_check = totemudpu_iface_check, + .finalize = totemudpu_finalize, + .net_mtu_adjust = totemudpu_net_mtu_adjust, + .ifaces_get = totemudpu_ifaces_get, + .nodestatus_get = totemudpu_nodestatus_get, + .token_target_set = totemudpu_token_target_set, + .crypto_set = totemudpu_crypto_set, + .recv_mcast_empty = totemudpu_recv_mcast_empty, + .member_add = totemudpu_member_add, + .member_remove = totemudpu_member_remove, + .reconfigure = totemudpu_reconfigure, + .crypto_reconfigure_phase = NULL + }, + { + .name = "Kronosnet", + .initialize = totemknet_initialize, + .buffer_alloc = totemknet_buffer_alloc, + .buffer_release = totemknet_buffer_release, + .processor_count_set = totemknet_processor_count_set, + .token_send = totemknet_token_send, + .mcast_flush_send = totemknet_mcast_flush_send, + .mcast_noflush_send = totemknet_mcast_noflush_send, + .recv_flush = totemknet_recv_flush, + .send_flush = totemknet_send_flush, + .iface_set = totemknet_iface_set, + .iface_check = totemknet_iface_check, + .finalize = totemknet_finalize, + .net_mtu_adjust = totemknet_net_mtu_adjust, + .ifaces_get = totemknet_ifaces_get, + .nodestatus_get = totemknet_nodestatus_get, + .token_target_set = totemknet_token_target_set, + .crypto_set = totemknet_crypto_set, + .recv_mcast_empty = totemknet_recv_mcast_empty, + .member_add = totemknet_member_add, + .member_remove = totemknet_member_remove, + .reconfigure = totemknet_reconfigure, + .crypto_reconfigure_phase = totemknet_crypto_reconfigure_phase, + .stats_clear = totemknet_stats_clear + } +}; + +struct totemnet_instance { + void *transport_context; + + struct transport *transport; + void (*totemnet_log_printf) ( + int level, + int subsys, + const char *function, + const char *file, + int line, + const char *format, + ...)__attribute__((format(printf, 6, 7))); + + int totemnet_subsys_id; +}; + +#define log_printf(level, format, args...) \ +do { \ + instance->totemnet_log_printf ( \ + level, \ + instance->totemnet_subsys_id, \ + __FUNCTION__, __FILE__, __LINE__, \ + (const char *)format, ##args); \ +} while (0); + +static void totemnet_instance_initialize ( + struct totemnet_instance *instance, + struct totem_config *config) +{ + int transport; + + instance->totemnet_log_printf = config->totem_logging_configuration.log_printf; + instance->totemnet_subsys_id = config->totem_logging_configuration.log_subsys_id; + + + transport = config->transport_number; + + log_printf (LOGSYS_LEVEL_NOTICE, + "Initializing transport (%s).", transport_entries[transport].name); + + instance->transport = &transport_entries[transport]; +} + +int totemnet_crypto_set ( + void *net_context, + const char *cipher_type, + const char *hash_type) +{ + struct totemnet_instance *instance = (struct totemnet_instance *)net_context; + int res = 0; + + res = instance->transport->crypto_set (instance->transport_context, + cipher_type, hash_type); + + return res; +} + +int totemnet_finalize ( + void *net_context) +{ + struct totemnet_instance *instance = (struct totemnet_instance *)net_context; + int res = 0; + + res = instance->transport->finalize (instance->transport_context); + + return (res); +} + +int totemnet_initialize ( + qb_loop_t *loop_pt, + void **net_context, + struct totem_config *totem_config, + totemsrp_stats_t *stats, + void *context, + + int (*deliver_fn) ( + void *context, + const void *msg, + unsigned int msg_len, + const struct sockaddr_storage *system_from), + + int (*iface_change_fn) ( + void *context, + const struct totem_ip_address *iface_address, + unsigned int ring_no), + + void (*mtu_changed) ( + void *context, + int net_mtu), + + void (*target_set_completed) ( + void *context)) +{ + struct totemnet_instance *instance; + unsigned int res; + + instance = malloc (sizeof (struct totemnet_instance)); + if (instance == NULL) { + return (-1); + } + totemnet_instance_initialize (instance, totem_config); + + res = instance->transport->initialize (loop_pt, + &instance->transport_context, totem_config, stats, + context, deliver_fn, iface_change_fn, mtu_changed, target_set_completed); + + if (res == -1) { + goto error_destroy; + } + + *net_context = instance; + return (0); + +error_destroy: + free (instance); + return (-1); +} + +void *totemnet_buffer_alloc (void *net_context) +{ + struct totemnet_instance *instance = net_context; + assert (instance != NULL); + assert (instance->transport != NULL); + return instance->transport->buffer_alloc(); +} + +void totemnet_buffer_release (void *net_context, void *ptr) +{ + struct totemnet_instance *instance = net_context; + assert (instance != NULL); + assert (instance->transport != NULL); + instance->transport->buffer_release (ptr); +} + +int totemnet_processor_count_set ( + void *net_context, + int processor_count) +{ + struct totemnet_instance *instance = (struct totemnet_instance *)net_context; + int res = 0; + + res = instance->transport->processor_count_set (instance->transport_context, processor_count); + return (res); +} + +int totemnet_recv_flush (void *net_context) +{ + struct totemnet_instance *instance = (struct totemnet_instance *)net_context; + int res = 0; + + res = instance->transport->recv_flush (instance->transport_context); + + return (res); +} + +int totemnet_send_flush (void *net_context) +{ + struct totemnet_instance *instance = (struct totemnet_instance *)net_context; + int res = 0; + + res = instance->transport->send_flush (instance->transport_context); + + return (res); +} + +int totemnet_token_send ( + void *net_context, + const void *msg, + unsigned int msg_len) +{ + struct totemnet_instance *instance = (struct totemnet_instance *)net_context; + int res = 0; + + res = instance->transport->token_send (instance->transport_context, msg, msg_len); + + return (res); +} +int totemnet_mcast_flush_send ( + void *net_context, + const void *msg, + unsigned int msg_len) +{ + struct totemnet_instance *instance = (struct totemnet_instance *)net_context; + int res = 0; + + res = instance->transport->mcast_flush_send (instance->transport_context, msg, msg_len); + + return (res); +} + +int totemnet_mcast_noflush_send ( + void *net_context, + const void *msg, + unsigned int msg_len) +{ + struct totemnet_instance *instance = (struct totemnet_instance *)net_context; + int res = 0; + + res = instance->transport->mcast_noflush_send (instance->transport_context, msg, msg_len); + + return (res); +} + +extern int totemnet_iface_check (void *net_context) +{ + struct totemnet_instance *instance = (struct totemnet_instance *)net_context; + int res = 0; + + res = instance->transport->iface_check (instance->transport_context); + + return (res); +} + +extern int totemnet_net_mtu_adjust (void *net_context, struct totem_config *totem_config) +{ + struct totemnet_instance *instance = (struct totemnet_instance *)net_context; + int res = 0; + + instance->transport->net_mtu_adjust (instance->transport_context, totem_config); + return (res); +} + +int totemnet_iface_set (void *net_context, + const struct totem_ip_address *interface_addr, + unsigned short ip_port, + unsigned int iface_no) +{ + struct totemnet_instance *instance = (struct totemnet_instance *)net_context; + int res; + + res = instance->transport->iface_set (instance->transport_context, interface_addr, ip_port, iface_no); + + return (res); +} + +extern int totemnet_nodestatus_get ( + void *net_context, + unsigned int nodeid, + struct totem_node_status *node_status) +{ + struct totemnet_instance *instance = (struct totemnet_instance *)net_context; + unsigned int res; + + res = instance->transport->nodestatus_get (instance->transport_context, nodeid, node_status); + + return (res); +} + +int totemnet_ifaces_get ( + void *net_context, + char ***status, + unsigned int *iface_count) +{ + struct totemnet_instance *instance = (struct totemnet_instance *)net_context; + unsigned int res; + + res = instance->transport->ifaces_get (instance->transport_context, status, iface_count); + + return (res); +} + +int totemnet_token_target_set ( + void *net_context, + unsigned int nodeid) +{ + struct totemnet_instance *instance = (struct totemnet_instance *)net_context; + unsigned int res; + + res = instance->transport->token_target_set (instance->transport_context, nodeid); + + return (res); +} + +extern int totemnet_recv_mcast_empty ( + void *net_context) +{ + struct totemnet_instance *instance = (struct totemnet_instance *)net_context; + unsigned int res; + + res = instance->transport->recv_mcast_empty (instance->transport_context); + + return (res); +} + +extern int totemnet_member_add ( + void *net_context, + const struct totem_ip_address *local, + const struct totem_ip_address *member, + int ring_no) +{ + struct totemnet_instance *instance = (struct totemnet_instance *)net_context; + unsigned int res = 0; + + if (instance->transport->member_add) { + res = instance->transport->member_add ( + instance->transport_context, + local, + member, + ring_no); + } + + return (res); +} + +extern int totemnet_member_remove ( + void *net_context, + const struct totem_ip_address *member, + int ring_no) +{ + struct totemnet_instance *instance = (struct totemnet_instance *)net_context; + unsigned int res = 0; + + if (instance->transport->member_remove) { + res = instance->transport->member_remove ( + instance->transport_context, + member, + ring_no); + } + + return (res); +} + +int totemnet_member_set_active ( + void *net_context, + const struct totem_ip_address *member, + int active) +{ + struct totemnet_instance *instance = (struct totemnet_instance *)net_context; + unsigned int res = 0; + + if (instance->transport->member_set_active) { + res = instance->transport->member_set_active ( + instance->transport_context, + member, + active); + } + + return (res); +} + +int totemnet_reconfigure ( + void *net_context, + struct totem_config *totem_config) +{ + struct totemnet_instance *instance = (struct totemnet_instance *)net_context; + unsigned int res = 0; + + res = instance->transport->reconfigure ( + instance->transport_context, + totem_config); + + return (res); +} + +int totemnet_crypto_reconfigure_phase ( + void *net_context, + struct totem_config *totem_config, + cfg_message_crypto_reconfig_phase_t phase) +{ + struct totemnet_instance *instance = (struct totemnet_instance *)net_context; + unsigned int res = 0; + + if (instance->transport->crypto_reconfigure_phase) { + res = instance->transport->crypto_reconfigure_phase ( + instance->transport_context, + totem_config, phase); + } + return (res); +} + +void totemnet_stats_clear ( + void *net_context) +{ + struct totemnet_instance *instance = (struct totemnet_instance *)net_context; + + if (instance->transport->stats_clear) { + instance->transport->stats_clear ( + instance->transport_context); + } +} diff --git a/exec/totemnet.h b/exec/totemnet.h new file mode 100644 index 0000000..e71d9e0 --- /dev/null +++ b/exec/totemnet.h @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2005 MontaVista Software, Inc. + * Copyright (c) 2006-2007, 2009 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Totem Network interface - also does encryption/decryption + * + * depends on poll abstraction, POSIX, IPV4 + */ + +#ifndef TOTEMNET_H_DEFINED +#define TOTEMNET_H_DEFINED + +#include <sys/types.h> +#include <sys/socket.h> + +#include <corosync/totem/totem.h> + +#define TOTEMNET_NOFLUSH 0 +#define TOTEMNET_FLUSH 1 + +/** + * Create an instance + */ +extern int totemnet_initialize ( + qb_loop_t *poll_handle, + void **net_context, + struct totem_config *totem_config, + totemsrp_stats_t *stats, + void *context, + + int (*deliver_fn) ( + void *context, + const void *msg, + unsigned int msg_len, + const struct sockaddr_storage *system_from), + + int (*iface_change_fn) ( + void *context, + const struct totem_ip_address *iface_address, + unsigned int iface_no), + + void (*mtu_changed) ( + void *context, + int net_mtu), + + void (*target_set_completed) ( + void *context)); + +extern void *totemnet_buffer_alloc (void *net_context); + +extern void totemnet_buffer_release (void *net_context, void *ptr); + +extern int totemnet_processor_count_set ( + void *net_context, + int processor_count); + +extern int totemnet_token_send ( + void *net_context, + const void *msg, + unsigned int msg_len); + +extern int totemnet_mcast_flush_send ( + void *net_context, + const void *msg, + unsigned int msg_len); + +extern int totemnet_mcast_noflush_send ( + void *net_context, + const void *msg, + unsigned int msg_len); + +extern int totemnet_recv_flush (void *net_context); + +extern int totemnet_send_flush (void *net_context); + +extern int totemnet_iface_set (void *net_context, + const struct totem_ip_address *interface_addr, + unsigned short ip_port, + unsigned int iface_no); + +extern int totemnet_iface_check (void *net_context); + +extern int totemnet_finalize (void *net_context); + +extern int totemnet_net_mtu_adjust (void *net_context, struct totem_config *totem_config); + +extern int totemnet_reconfigure (void *net_context, struct totem_config *totem_config); + +extern int totemnet_crypto_reconfigure_phase (void *net_context, struct totem_config *totem_config, cfg_message_crypto_reconfig_phase_t phase); + +extern void totemnet_stats_clear (void *net_context); + +extern const char *totemnet_iface_print (void *net_context); + +extern int totemnet_nodestatus_get ( + void *net_context, + unsigned int nodeid, + struct totem_node_status *node_status); + +extern int totemnet_ifaces_get ( + void *net_context, + char ***status, + unsigned int *iface_count); + +extern int totemnet_token_target_set ( + void *net_context, + unsigned int target_nodeid); + +extern int totemnet_crypto_set ( + void *net_context, + const char *cipher_type, + const char *hash_type); + +extern int totemnet_recv_mcast_empty ( + void *net_context); + +extern int totemnet_member_add ( + void *net_context, + const struct totem_ip_address *local, + const struct totem_ip_address *member, + int ring_no); + +extern int totemnet_member_remove ( + void *net_context, + const struct totem_ip_address *member, + int ring_no); + +extern int totemnet_member_set_active ( + void *net_context, + const struct totem_ip_address *member, + int active); + +#endif /* TOTEMNET_H_DEFINED */ diff --git a/exec/totempg.c b/exec/totempg.c new file mode 100644 index 0000000..c30c077 --- /dev/null +++ b/exec/totempg.c @@ -0,0 +1,1620 @@ +/* + * Copyright (c) 2003-2005 MontaVista Software, Inc. + * Copyright (c) 2005 OSDL. + * Copyright (c) 2006-2012 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + * Author: Mark Haverkamp (markh@osdl.org) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * FRAGMENTATION AND PACKING ALGORITHM: + * + * Assemble the entire message into one buffer + * if full fragment + * store fragment into lengths list + * for each full fragment + * multicast fragment + * set length and fragment fields of pg mesage + * store remaining multicast into head of fragmentation data and set lens field + * + * If a message exceeds the maximum packet size allowed by the totem + * single ring protocol, the protocol could lose forward progress. + * Statically calculating the allowed data amount doesn't work because + * the amount of data allowed depends on the number of fragments in + * each message. In this implementation, the maximum fragment size + * is dynamically calculated for each fragment added to the message. + + * It is possible for a message to be two bytes short of the maximum + * packet size. This occurs when a message or collection of + * messages + the mcast header + the lens are two bytes short of the + * end of the packet. Since another len field consumes two bytes, the + * len field would consume the rest of the packet without room for data. + * + * One optimization would be to forgo the final len field and determine + * it from the size of the udp datagram. Then this condition would no + * longer occur. + */ + +/* + * ASSEMBLY AND UNPACKING ALGORITHM: + * + * copy incoming packet into assembly data buffer indexed by current + * location of end of fragment + * + * if not fragmented + * deliver all messages in assembly data buffer + * else + * if msg_count > 1 and fragmented + * deliver all messages except last message in assembly data buffer + * copy last fragmented section to start of assembly data buffer + * else + * if msg_count = 1 and fragmented + * do nothing + * + */ + +#include <config.h> + +#ifdef HAVE_ALLOCA_H +#include <alloca.h> +#endif +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <sys/uio.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <pthread.h> +#include <errno.h> +#include <limits.h> + +#include <corosync/swab.h> +#include <qb/qblist.h> +#include <qb/qbloop.h> +#include <qb/qbipcs.h> +#include <corosync/totem/totempg.h> +#define LOGSYS_UTILS_ONLY 1 +#include <corosync/logsys.h> + +#include "util.h" +#include "totemsrp.h" + +struct totempg_mcast_header { + short version; + short type; +}; + +#if !(defined(__i386__) || defined(__x86_64__)) +/* + * Need align on architectures different then i386 or x86_64 + */ +#define TOTEMPG_NEED_ALIGN 1 +#endif + +/* + * totempg_mcast structure + * + * header: Identify the mcast. + * fragmented: Set if this message continues into next message + * continuation: Set if this message is a continuation from last message + * msg_count Indicates how many packed messages are contained + * in the mcast. + * Also, the size of each packed message and the messages themselves are + * appended to the end of this structure when sent. + */ +struct totempg_mcast { + struct totempg_mcast_header header; + unsigned char fragmented; + unsigned char continuation; + unsigned short msg_count; + /* + * short msg_len[msg_count]; + */ + /* + * data for messages + */ +}; + +/* + * Maximum packet size for totem pg messages + */ +#define TOTEMPG_PACKET_SIZE (totempg_totem_config->net_mtu - \ + sizeof (struct totempg_mcast)) + +/* + * Local variables used for packing small messages + */ +static unsigned short mcast_packed_msg_lens[FRAME_SIZE_MAX]; + +static int mcast_packed_msg_count = 0; + +static int totempg_reserved = 1; + +static unsigned int totempg_size_limit; + +static totem_queue_level_changed_fn totem_queue_level_changed = NULL; + +static uint32_t totempg_threaded_mode = 0; + +static void *totemsrp_context; + +/* + * Function and data used to log messages + */ +static int totempg_log_level_security; +static int totempg_log_level_error; +static int totempg_log_level_warning; +static int totempg_log_level_notice; +static int totempg_log_level_debug; +static int totempg_subsys_id; +static void (*totempg_log_printf) ( + int level, + int subsys, + const char *function, + const char *file, + int line, + const char *format, ...) __attribute__((format(printf, 6, 7))); + +struct totem_config *totempg_totem_config; + +static totempg_stats_t totempg_stats; + +enum throw_away_mode { + THROW_AWAY_INACTIVE, + THROW_AWAY_ACTIVE +}; + +struct assembly { + unsigned int nodeid; + unsigned char data[MESSAGE_SIZE_MAX+KNET_MAX_PACKET_SIZE]; + int index; + unsigned char last_frag_num; + enum throw_away_mode throw_away_mode; + struct qb_list_head list; +}; + +static void assembly_deref (struct assembly *assembly); + +static int callback_token_received_fn (enum totem_callback_token_type type, + const void *data); + +QB_LIST_DECLARE(assembly_list_inuse); + +/* + * Free list is used both for transitional and operational assemblies + */ +QB_LIST_DECLARE(assembly_list_free); + +QB_LIST_DECLARE(assembly_list_inuse_trans); + +QB_LIST_DECLARE(totempg_groups_list); + +/* + * Staging buffer for packed messages. Messages are staged in this buffer + * before sending. Multiple messages may fit which cuts down on the + * number of mcasts sent. If a message doesn't completely fit, then + * the mcast header has a fragment bit set that says that there are more + * data to follow. fragment_size is an index into the buffer. It indicates + * the size of message data and where to place new message data. + * fragment_contuation indicates whether the first packed message in + * the buffer is a continuation of a previously packed fragment. + */ +static unsigned char *fragmentation_data; + +static int fragment_size = 0; + +static int fragment_continuation = 0; + +static int totempg_waiting_transack = 0; + +struct totempg_group_instance { + void (*deliver_fn) ( + unsigned int nodeid, + const void *msg, + unsigned int msg_len, + int endian_conversion_required); + + void (*confchg_fn) ( + enum totem_configuration_type configuration_type, + const unsigned int *member_list, size_t member_list_entries, + const unsigned int *left_list, size_t left_list_entries, + const unsigned int *joined_list, size_t joined_list_entries, + const struct memb_ring_id *ring_id); + + struct totempg_group *groups; + + int groups_cnt; + int32_t q_level; + + struct qb_list_head list; +}; + +static unsigned char next_fragment = 1; + +static pthread_mutex_t totempg_mutex = PTHREAD_MUTEX_INITIALIZER; + +static pthread_mutex_t callback_token_mutex = PTHREAD_MUTEX_INITIALIZER; + +static pthread_mutex_t mcast_msg_mutex = PTHREAD_MUTEX_INITIALIZER; + +#define log_printf(level, format, args...) \ +do { \ + totempg_log_printf(level, \ + totempg_subsys_id, \ + __FUNCTION__, __FILE__, __LINE__, \ + format, ##args); \ +} while (0); + +static int msg_count_send_ok (int msg_count); + +static int byte_count_send_ok (int byte_count); + +static void totempg_waiting_trans_ack_cb (int waiting_trans_ack) +{ + log_printf(LOG_DEBUG, "waiting_trans_ack changed to %u", waiting_trans_ack); + totempg_waiting_transack = waiting_trans_ack; +} + +static struct assembly *assembly_ref (unsigned int nodeid) +{ + struct assembly *assembly; + struct qb_list_head *list; + struct qb_list_head *active_assembly_list_inuse; + + if (totempg_waiting_transack) { + active_assembly_list_inuse = &assembly_list_inuse_trans; + } else { + active_assembly_list_inuse = &assembly_list_inuse; + } + + /* + * Search inuse list for node id and return assembly buffer if found + */ + qb_list_for_each(list, active_assembly_list_inuse) { + assembly = qb_list_entry (list, struct assembly, list); + + if (nodeid == assembly->nodeid) { + return (assembly); + } + } + + /* + * Nothing found in inuse list get one from free list if available + */ + if (qb_list_empty (&assembly_list_free) == 0) { + assembly = qb_list_first_entry (&assembly_list_free, struct assembly, list); + qb_list_del (&assembly->list); + qb_list_add (&assembly->list, active_assembly_list_inuse); + assembly->nodeid = nodeid; + assembly->index = 0; + assembly->last_frag_num = 0; + assembly->throw_away_mode = THROW_AWAY_INACTIVE; + return (assembly); + } + + /* + * Nothing available in inuse or free list, so allocate a new one + */ + assembly = malloc (sizeof (struct assembly)); + /* + * TODO handle memory allocation failure here + */ + assert (assembly); + assembly->nodeid = nodeid; + assembly->data[0] = 0; + assembly->index = 0; + assembly->last_frag_num = 0; + assembly->throw_away_mode = THROW_AWAY_INACTIVE; + qb_list_init (&assembly->list); + qb_list_add (&assembly->list, active_assembly_list_inuse); + + return (assembly); +} + +static void assembly_deref (struct assembly *assembly) +{ + qb_list_del (&assembly->list); + qb_list_add (&assembly->list, &assembly_list_free); +} + +static void assembly_deref_from_normal_and_trans (int nodeid) +{ + int j; + struct qb_list_head *list, *tmp_iter; + struct qb_list_head *active_assembly_list_inuse; + struct assembly *assembly; + + for (j = 0; j < 2; j++) { + if (j == 0) { + active_assembly_list_inuse = &assembly_list_inuse; + } else { + active_assembly_list_inuse = &assembly_list_inuse_trans; + } + + qb_list_for_each_safe(list, tmp_iter, active_assembly_list_inuse) { + assembly = qb_list_entry (list, struct assembly, list); + + if (nodeid == assembly->nodeid) { + qb_list_del (&assembly->list); + qb_list_add (&assembly->list, &assembly_list_free); + } + } + } + +} + +static inline void app_confchg_fn ( + enum totem_configuration_type configuration_type, + const unsigned int *member_list, size_t member_list_entries, + const unsigned int *left_list, size_t left_list_entries, + const unsigned int *joined_list, size_t joined_list_entries, + const struct memb_ring_id *ring_id) +{ + int i; + struct totempg_group_instance *instance; + struct qb_list_head *list; + + /* + * For every leaving processor, add to free list + * This also has the side effect of clearing out the dataset + * In the leaving processor's assembly buffer. + */ + for (i = 0; i < left_list_entries; i++) { + assembly_deref_from_normal_and_trans (left_list[i]); + } + + qb_list_for_each(list, &totempg_groups_list) { + instance = qb_list_entry (list, struct totempg_group_instance, list); + + if (instance->confchg_fn) { + instance->confchg_fn ( + configuration_type, + member_list, + member_list_entries, + left_list, + left_list_entries, + joined_list, + joined_list_entries, + ring_id); + } + } +} + +static inline void group_endian_convert ( + void *msg, + int msg_len) +{ + unsigned short *group_len; + int i; + char *aligned_msg; + +#ifdef TOTEMPG_NEED_ALIGN + /* + * Align data structure for not i386 or x86_64 + */ + if ((size_t)msg % sizeof(char *) != 0) { + aligned_msg = alloca(msg_len); + memcpy(aligned_msg, msg, msg_len); + } else { + aligned_msg = msg; + } +#else + aligned_msg = msg; +#endif + + group_len = (unsigned short *)aligned_msg; + group_len[0] = swab16(group_len[0]); + for (i = 1; i < group_len[0] + 1; i++) { + group_len[i] = swab16(group_len[i]); + } + + if (aligned_msg != msg) { + memcpy(msg, aligned_msg, msg_len); + } +} + +static inline int group_matches ( + struct iovec *iovec, + unsigned int iov_len, + struct totempg_group *groups_b, + unsigned int group_b_cnt, + unsigned int *adjust_iovec) +{ + unsigned short *group_len; + char *group_name; + int i; + int j; +#ifdef TOTEMPG_NEED_ALIGN + struct iovec iovec_aligned = { NULL, 0 }; +#endif + + assert (iov_len == 1); + +#ifdef TOTEMPG_NEED_ALIGN + /* + * Align data structure for not i386 or x86_64 + */ + if ((size_t)iovec->iov_base % sizeof(char *) != 0) { + iovec_aligned.iov_base = alloca(iovec->iov_len); + memcpy(iovec_aligned.iov_base, iovec->iov_base, iovec->iov_len); + iovec_aligned.iov_len = iovec->iov_len; + iovec = &iovec_aligned; + } +#endif + + group_len = (unsigned short *)iovec->iov_base; + group_name = ((char *)iovec->iov_base) + + sizeof (unsigned short) * (group_len[0] + 1); + + + /* + * Calculate amount to adjust the iovec by before delivering to app + */ + *adjust_iovec = sizeof (unsigned short) * (group_len[0] + 1); + for (i = 1; i < group_len[0] + 1; i++) { + *adjust_iovec += group_len[i]; + } + + /* + * Determine if this message should be delivered to this instance + */ + for (i = 1; i < group_len[0] + 1; i++) { + for (j = 0; j < group_b_cnt; j++) { + if ((group_len[i] == groups_b[j].group_len) && + (memcmp (groups_b[j].group, group_name, group_len[i]) == 0)) { + return (1); + } + } + group_name += group_len[i]; + } + return (0); +} + + +static inline void app_deliver_fn ( + unsigned int nodeid, + void *msg, + unsigned int msg_len, + int endian_conversion_required) +{ + struct totempg_group_instance *instance; + struct iovec stripped_iovec; + unsigned int adjust_iovec; + struct iovec *iovec; + struct qb_list_head *list; + + struct iovec aligned_iovec = { NULL, 0 }; + + if (endian_conversion_required) { + group_endian_convert (msg, msg_len); + } + + /* + * TODO: segmentation/assembly need to be redesigned to provide aligned access + * in all cases to avoid memory copies on non386 archs. Probably broke backwars + * compatibility + */ + +#ifdef TOTEMPG_NEED_ALIGN + /* + * Align data structure for not i386 or x86_64 + */ + aligned_iovec.iov_base = alloca(msg_len); + aligned_iovec.iov_len = msg_len; + memcpy(aligned_iovec.iov_base, msg, msg_len); +#else + aligned_iovec.iov_base = msg; + aligned_iovec.iov_len = msg_len; +#endif + + iovec = &aligned_iovec; + + qb_list_for_each(list, &totempg_groups_list) { + instance = qb_list_entry (list, struct totempg_group_instance, list); + if (group_matches (iovec, 1, instance->groups, instance->groups_cnt, &adjust_iovec)) { + stripped_iovec.iov_len = iovec->iov_len - adjust_iovec; + stripped_iovec.iov_base = (char *)iovec->iov_base + adjust_iovec; + +#ifdef TOTEMPG_NEED_ALIGN + /* + * Align data structure for not i386 or x86_64 + */ + if ((uintptr_t)((char *)iovec->iov_base + adjust_iovec) % (sizeof(char *)) != 0) { + /* + * Deal with misalignment + */ + stripped_iovec.iov_base = + alloca (stripped_iovec.iov_len); + memcpy (stripped_iovec.iov_base, + (char *)iovec->iov_base + adjust_iovec, + stripped_iovec.iov_len); + } +#endif + instance->deliver_fn ( + nodeid, + stripped_iovec.iov_base, + stripped_iovec.iov_len, + endian_conversion_required); + } + } +} + +static void totempg_confchg_fn ( + enum totem_configuration_type configuration_type, + const unsigned int *member_list, size_t member_list_entries, + const unsigned int *left_list, size_t left_list_entries, + const unsigned int *joined_list, size_t joined_list_entries, + const struct memb_ring_id *ring_id) +{ +// TODO optimize this + app_confchg_fn (configuration_type, + member_list, member_list_entries, + left_list, left_list_entries, + joined_list, joined_list_entries, + ring_id); +} + +static void totempg_deliver_fn ( + unsigned int nodeid, + const void *msg, + unsigned int msg_len, + int endian_conversion_required) +{ + struct totempg_mcast *mcast; + unsigned short *msg_lens; + int i; + struct assembly *assembly; + char header[FRAME_SIZE_MAX]; + int msg_count; + int continuation; + int start; + const char *data; + int datasize; + struct iovec iov_delv; + size_t expected_msg_len; + + assembly = assembly_ref (nodeid); + assert (assembly); + + if (msg_len < sizeof(struct totempg_mcast)) { + log_printf(LOG_WARNING, + "Message (totempg_mcast) received from node " CS_PRI_NODE_ID " is too short... Ignoring.", nodeid); + + return ; + } + + /* + * Assemble the header into one block of data and + * assemble the packet contents into one block of data to simplify delivery + */ + + mcast = (struct totempg_mcast *)msg; + if (endian_conversion_required) { + mcast->msg_count = swab16 (mcast->msg_count); + } + + msg_count = mcast->msg_count; + datasize = sizeof (struct totempg_mcast) + + msg_count * sizeof (unsigned short); + + if (msg_len < datasize) { + log_printf(LOG_WARNING, + "Message (totempg_mcast datasize) received from node " CS_PRI_NODE_ID + " is too short... Ignoring.", nodeid); + + return ; + } + + memcpy (header, msg, datasize); + data = msg; + + msg_lens = (unsigned short *) (header + sizeof (struct totempg_mcast)); + expected_msg_len = datasize; + for (i = 0; i < mcast->msg_count; i++) { + if (endian_conversion_required) { + msg_lens[i] = swab16 (msg_lens[i]); + } + + expected_msg_len += msg_lens[i]; + } + + if (msg_len != expected_msg_len) { + log_printf(LOG_WARNING, + "Message (totempg_mcast) received from node " CS_PRI_NODE_ID + " doesn't have expected length of %zu (has %u) bytes... Ignoring.", + nodeid, expected_msg_len, msg_len); + + return ; + } + + assert((assembly->index+msg_len) < sizeof(assembly->data)); + memcpy (&assembly->data[assembly->index], &data[datasize], + msg_len - datasize); + + /* + * If the last message in the buffer is a fragment, then we + * can't deliver it. We'll first deliver the full messages + * then adjust the assembly buffer so we can add the rest of the + * fragment when it arrives. + */ + msg_count = mcast->fragmented ? mcast->msg_count - 1 : mcast->msg_count; + continuation = mcast->continuation; + iov_delv.iov_base = (void *)&assembly->data[0]; + iov_delv.iov_len = assembly->index + msg_lens[0]; + + /* + * Make sure that if this message is a continuation, that it + * matches the sequence number of the previous fragment. + * Also, if the first packed message is a continuation + * of a previous message, but the assembly buffer + * is empty, then we need to discard it since we can't + * assemble a complete message. Likewise, if this message isn't a + * continuation and the assembly buffer is empty, we have to discard + * the continued message. + */ + start = 0; + + if (assembly->throw_away_mode == THROW_AWAY_ACTIVE) { + /* Throw away the first msg block */ + if (mcast->fragmented == 0 || mcast->fragmented == 1) { + assembly->throw_away_mode = THROW_AWAY_INACTIVE; + + assembly->index += msg_lens[0]; + iov_delv.iov_base = (void *)&assembly->data[assembly->index]; + iov_delv.iov_len = msg_lens[1]; + start = 1; + } + } else + if (assembly->throw_away_mode == THROW_AWAY_INACTIVE) { + if (continuation == assembly->last_frag_num) { + assembly->last_frag_num = mcast->fragmented; + for (i = start; i < msg_count; i++) { + app_deliver_fn(nodeid, iov_delv.iov_base, iov_delv.iov_len, + endian_conversion_required); + assembly->index += msg_lens[i]; + iov_delv.iov_base = (void *)&assembly->data[assembly->index]; + if (i < (msg_count - 1)) { + iov_delv.iov_len = msg_lens[i + 1]; + } + } + } else { + log_printf (LOG_DEBUG, "fragmented continuation %u is not equal to assembly last_frag_num %u", + continuation, assembly->last_frag_num); + assembly->throw_away_mode = THROW_AWAY_ACTIVE; + } + } + + if (mcast->fragmented == 0) { + /* + * End of messages, dereference assembly struct + */ + assembly->last_frag_num = 0; + assembly->index = 0; + assembly_deref (assembly); + } else { + /* + * Message is fragmented, keep around assembly list + */ + if (mcast->msg_count > 1) { + memmove (&assembly->data[0], + &assembly->data[assembly->index], + msg_lens[msg_count]); + + assembly->index = 0; + } + assembly->index += msg_lens[msg_count]; + } +} + +/* + * Totem Process Group Abstraction + * depends on poll abstraction, POSIX, IPV4 + */ + +void *callback_token_received_handle; + +int callback_token_received_fn (enum totem_callback_token_type type, + const void *data) +{ + struct totempg_mcast mcast; + struct iovec iovecs[3]; + + if (totempg_threaded_mode == 1) { + pthread_mutex_lock (&mcast_msg_mutex); + } + if (mcast_packed_msg_count == 0) { + if (totempg_threaded_mode == 1) { + pthread_mutex_unlock (&mcast_msg_mutex); + } + return (0); + } + if (totemsrp_avail(totemsrp_context) == 0) { + if (totempg_threaded_mode == 1) { + pthread_mutex_unlock (&mcast_msg_mutex); + } + return (0); + } + mcast.header.version = 0; + mcast.header.type = 0; + mcast.fragmented = 0; + + /* + * Was the first message in this buffer a continuation of a + * fragmented message? + */ + mcast.continuation = fragment_continuation; + fragment_continuation = 0; + + mcast.msg_count = mcast_packed_msg_count; + + iovecs[0].iov_base = (void *)&mcast; + iovecs[0].iov_len = sizeof (struct totempg_mcast); + iovecs[1].iov_base = (void *)mcast_packed_msg_lens; + iovecs[1].iov_len = mcast_packed_msg_count * sizeof (unsigned short); + iovecs[2].iov_base = (void *)&fragmentation_data[0]; + iovecs[2].iov_len = fragment_size; + (void)totemsrp_mcast (totemsrp_context, iovecs, 3, 0); + + mcast_packed_msg_count = 0; + fragment_size = 0; + + if (totempg_threaded_mode == 1) { + pthread_mutex_unlock (&mcast_msg_mutex); + } + return (0); +} + +/* + * Initialize the totem process group abstraction + */ +int totempg_initialize ( + qb_loop_t *poll_handle, + struct totem_config *totem_config) +{ + int res; + + totempg_totem_config = totem_config; + totempg_log_level_security = totem_config->totem_logging_configuration.log_level_security; + totempg_log_level_error = totem_config->totem_logging_configuration.log_level_error; + totempg_log_level_warning = totem_config->totem_logging_configuration.log_level_warning; + totempg_log_level_notice = totem_config->totem_logging_configuration.log_level_notice; + totempg_log_level_debug = totem_config->totem_logging_configuration.log_level_debug; + totempg_log_printf = totem_config->totem_logging_configuration.log_printf; + totempg_subsys_id = totem_config->totem_logging_configuration.log_subsys_id; + + fragmentation_data = malloc (TOTEMPG_PACKET_SIZE); + if (fragmentation_data == 0) { + return (-1); + } + + totemsrp_net_mtu_adjust (totem_config); + + res = totemsrp_initialize ( + poll_handle, + &totemsrp_context, + totem_config, + &totempg_stats, + totempg_deliver_fn, + totempg_confchg_fn, + totempg_waiting_trans_ack_cb); + + if (res == -1) { + goto error_exit; + } + + totemsrp_callback_token_create ( + totemsrp_context, + &callback_token_received_handle, + TOTEM_CALLBACK_TOKEN_RECEIVED, + 0, + callback_token_received_fn, + 0); + + totempg_size_limit = (totemsrp_avail(totemsrp_context) - 1) * + (totempg_totem_config->net_mtu - + sizeof (struct totempg_mcast) - 16); + + qb_list_init (&totempg_groups_list); + +error_exit: + return (res); +} + +void totempg_finalize (void) +{ + if (totempg_threaded_mode == 1) { + pthread_mutex_lock (&totempg_mutex); + } + totemsrp_finalize (totemsrp_context); + if (totempg_threaded_mode == 1) { + pthread_mutex_unlock (&totempg_mutex); + } +} + +/* + * Multicast a message + */ +static int mcast_msg ( + struct iovec *iovec_in, + unsigned int iov_len, + int guarantee) +{ + int res = 0; + struct totempg_mcast mcast; + struct iovec iovecs[3]; + struct iovec iovec[64]; + int i; + int dest, src; + int max_packet_size = 0; + int copy_len = 0; + int copy_base = 0; + int total_size = 0; + + if (totempg_threaded_mode == 1) { + pthread_mutex_lock (&mcast_msg_mutex); + } + totemsrp_event_signal (totemsrp_context, TOTEM_EVENT_NEW_MSG, 1); + + /* + * Remove zero length iovectors from the list + */ + assert (iov_len < 64); + for (dest = 0, src = 0; src < iov_len; src++) { + if (iovec_in[src].iov_len) { + memcpy (&iovec[dest++], &iovec_in[src], + sizeof (struct iovec)); + } + } + iov_len = dest; + + max_packet_size = TOTEMPG_PACKET_SIZE - + (sizeof (unsigned short) * (mcast_packed_msg_count + 1)); + + mcast_packed_msg_lens[mcast_packed_msg_count] = 0; + + /* + * Check if we would overwrite new message queue + */ + for (i = 0; i < iov_len; i++) { + total_size += iovec[i].iov_len; + } + + if (byte_count_send_ok (total_size + sizeof(unsigned short) * + (mcast_packed_msg_count)) == 0) { + + if (totempg_threaded_mode == 1) { + pthread_mutex_unlock (&mcast_msg_mutex); + } + return(-1); + } + + memset(&mcast, 0, sizeof(mcast)); + + mcast.header.version = 0; + for (i = 0; i < iov_len; ) { + mcast.fragmented = 0; + mcast.continuation = fragment_continuation; + copy_len = iovec[i].iov_len - copy_base; + + /* + * If it all fits with room left over, copy it in. + * We need to leave at least sizeof(short) + 1 bytes in the + * fragment_buffer on exit so that max_packet_size + fragment_size + * doesn't exceed the size of the fragment_buffer on the next call. + */ + if ((iovec[i].iov_len + fragment_size) < + (max_packet_size - sizeof (unsigned short))) { + + memcpy (&fragmentation_data[fragment_size], + (char *)iovec[i].iov_base + copy_base, copy_len); + fragment_size += copy_len; + mcast_packed_msg_lens[mcast_packed_msg_count] += copy_len; + next_fragment = 1; + copy_len = 0; + copy_base = 0; + i++; + continue; + + /* + * If it just fits or is too big, then send out what fits. + */ + } else { + unsigned char *data_ptr; + + copy_len = min(copy_len, max_packet_size - fragment_size); + if( copy_len == max_packet_size ) + data_ptr = (unsigned char *)iovec[i].iov_base + copy_base; + else { + data_ptr = fragmentation_data; + } + + memcpy (&fragmentation_data[fragment_size], + (unsigned char *)iovec[i].iov_base + copy_base, copy_len); + mcast_packed_msg_lens[mcast_packed_msg_count] += copy_len; + + /* + * if we're not on the last iovec or the iovec is too large to + * fit, then indicate a fragment. This also means that the next + * message will have the continuation of this one. + */ + if ((i < (iov_len - 1)) || + ((copy_base + copy_len) < iovec[i].iov_len)) { + if (!next_fragment) { + next_fragment++; + } + fragment_continuation = next_fragment; + mcast.fragmented = next_fragment++; + assert(fragment_continuation != 0); + assert(mcast.fragmented != 0); + } else { + fragment_continuation = 0; + } + + /* + * assemble the message and send it + */ + mcast.msg_count = ++mcast_packed_msg_count; + iovecs[0].iov_base = (void *)&mcast; + iovecs[0].iov_len = sizeof(struct totempg_mcast); + iovecs[1].iov_base = (void *)mcast_packed_msg_lens; + iovecs[1].iov_len = mcast_packed_msg_count * + sizeof(unsigned short); + iovecs[2].iov_base = (void *)data_ptr; + iovecs[2].iov_len = fragment_size + copy_len; + assert (totemsrp_avail(totemsrp_context) > 0); + res = totemsrp_mcast (totemsrp_context, iovecs, 3, guarantee); + if (res == -1) { + goto error_exit; + } + + /* + * Recalculate counts and indexes for the next. + */ + mcast_packed_msg_lens[0] = 0; + mcast_packed_msg_count = 0; + fragment_size = 0; + max_packet_size = TOTEMPG_PACKET_SIZE - (sizeof(unsigned short)); + + /* + * If the iovec all fit, go to the next iovec + */ + if ((copy_base + copy_len) == iovec[i].iov_len) { + copy_len = 0; + copy_base = 0; + i++; + + /* + * Continue with the rest of the current iovec. + */ + } else { + copy_base += copy_len; + } + } + } + + /* + * Bump only if we added message data. This may be zero if + * the last buffer just fit into the fragmentation_data buffer + * and we were at the last iovec. + */ + if (mcast_packed_msg_lens[mcast_packed_msg_count]) { + mcast_packed_msg_count++; + } + +error_exit: + if (totempg_threaded_mode == 1) { + pthread_mutex_unlock (&mcast_msg_mutex); + } + return (res); +} + +/* + * Determine if a message of msg_size could be queued + */ +static int msg_count_send_ok ( + int msg_count) +{ + int avail = 0; + + avail = totemsrp_avail (totemsrp_context); + totempg_stats.msg_queue_avail = avail; + + return ((avail - totempg_reserved) > msg_count); +} + +static int byte_count_send_ok ( + int byte_count) +{ + unsigned int msg_count = 0; + int avail = 0; + + avail = totemsrp_avail (totemsrp_context); + + msg_count = (byte_count / (totempg_totem_config->net_mtu - sizeof (struct totempg_mcast) - 16)) + 1; + + return (avail >= msg_count); +} + +static int send_reserve ( + int msg_size) +{ + unsigned int msg_count = 0; + + msg_count = (msg_size / (totempg_totem_config->net_mtu - sizeof (struct totempg_mcast) - 16)) + 1; + totempg_reserved += msg_count; + totempg_stats.msg_reserved = totempg_reserved; + + return (msg_count); +} + +static void send_release ( + int msg_count) +{ + totempg_reserved -= msg_count; + totempg_stats.msg_reserved = totempg_reserved; +} + +#ifndef HAVE_SMALL_MEMORY_FOOTPRINT +#undef MESSAGE_QUEUE_MAX +#define MESSAGE_QUEUE_MAX ((4 * MESSAGE_SIZE_MAX) / totempg_totem_config->net_mtu) +#endif /* HAVE_SMALL_MEMORY_FOOTPRINT */ + +static uint32_t q_level_precent_used(void) +{ + return (100 - (((totemsrp_avail(totemsrp_context) - totempg_reserved) * 100) / MESSAGE_QUEUE_MAX)); +} + +int totempg_callback_token_create ( + void **handle_out, + enum totem_callback_token_type type, + int delete, + int (*callback_fn) (enum totem_callback_token_type type, const void *), + const void *data) +{ + unsigned int res; + if (totempg_threaded_mode == 1) { + pthread_mutex_lock (&callback_token_mutex); + } + res = totemsrp_callback_token_create (totemsrp_context, handle_out, type, delete, + callback_fn, data); + if (totempg_threaded_mode == 1) { + pthread_mutex_unlock (&callback_token_mutex); + } + return (res); +} + +void totempg_callback_token_destroy ( + void *handle_out) +{ + if (totempg_threaded_mode == 1) { + pthread_mutex_lock (&callback_token_mutex); + } + totemsrp_callback_token_destroy (totemsrp_context, handle_out); + if (totempg_threaded_mode == 1) { + pthread_mutex_unlock (&callback_token_mutex); + } +} + +/* + * vi: set autoindent tabstop=4 shiftwidth=4 : + */ + +int totempg_groups_initialize ( + void **totempg_groups_instance, + + void (*deliver_fn) ( + unsigned int nodeid, + const void *msg, + unsigned int msg_len, + int endian_conversion_required), + + void (*confchg_fn) ( + enum totem_configuration_type configuration_type, + const unsigned int *member_list, size_t member_list_entries, + const unsigned int *left_list, size_t left_list_entries, + const unsigned int *joined_list, size_t joined_list_entries, + const struct memb_ring_id *ring_id)) +{ + struct totempg_group_instance *instance; + + if (totempg_threaded_mode == 1) { + pthread_mutex_lock (&totempg_mutex); + } + + instance = malloc (sizeof (struct totempg_group_instance)); + if (instance == NULL) { + goto error_exit; + } + + instance->deliver_fn = deliver_fn; + instance->confchg_fn = confchg_fn; + instance->groups = 0; + instance->groups_cnt = 0; + instance->q_level = QB_LOOP_MED; + qb_list_init (&instance->list); + qb_list_add (&instance->list, &totempg_groups_list); + + if (totempg_threaded_mode == 1) { + pthread_mutex_unlock (&totempg_mutex); + } + *totempg_groups_instance = instance; + return (0); + +error_exit: + if (totempg_threaded_mode == 1) { + pthread_mutex_unlock (&totempg_mutex); + } + return (-1); +} + +int totempg_groups_join ( + void *totempg_groups_instance, + const struct totempg_group *groups, + size_t group_cnt) +{ + struct totempg_group_instance *instance = (struct totempg_group_instance *)totempg_groups_instance; + struct totempg_group *new_groups; + int res = 0; + + if (totempg_threaded_mode == 1) { + pthread_mutex_lock (&totempg_mutex); + } + + new_groups = realloc (instance->groups, + sizeof (struct totempg_group) * + (instance->groups_cnt + group_cnt)); + if (new_groups == 0) { + res = -1; + goto error_exit; + } + memcpy (&new_groups[instance->groups_cnt], + groups, group_cnt * sizeof (struct totempg_group)); + instance->groups = new_groups; + instance->groups_cnt += group_cnt; + +error_exit: + if (totempg_threaded_mode == 1) { + pthread_mutex_unlock (&totempg_mutex); + } + return (res); +} + +int totempg_groups_leave ( + void *totempg_groups_instance, + const struct totempg_group *groups, + size_t group_cnt) +{ + if (totempg_threaded_mode == 1) { + pthread_mutex_lock (&totempg_mutex); + } + + if (totempg_threaded_mode == 1) { + pthread_mutex_unlock (&totempg_mutex); + } + return (0); +} + +#define MAX_IOVECS_FROM_APP 32 +#define MAX_GROUPS_PER_MSG 32 + +int totempg_groups_mcast_joined ( + void *totempg_groups_instance, + const struct iovec *iovec, + unsigned int iov_len, + int guarantee) +{ + struct totempg_group_instance *instance = (struct totempg_group_instance *)totempg_groups_instance; + unsigned short group_len[MAX_GROUPS_PER_MSG + 1]; + struct iovec iovec_mcast[MAX_GROUPS_PER_MSG + 1 + MAX_IOVECS_FROM_APP]; + int i; + unsigned int res; + + if (totempg_threaded_mode == 1) { + pthread_mutex_lock (&totempg_mutex); + } + + /* + * Build group_len structure and the iovec_mcast structure + */ + group_len[0] = instance->groups_cnt; + for (i = 0; i < instance->groups_cnt; i++) { + group_len[i + 1] = instance->groups[i].group_len; + iovec_mcast[i + 1].iov_len = instance->groups[i].group_len; + iovec_mcast[i + 1].iov_base = (void *) instance->groups[i].group; + } + iovec_mcast[0].iov_len = (instance->groups_cnt + 1) * sizeof (unsigned short); + iovec_mcast[0].iov_base = group_len; + for (i = 0; i < iov_len; i++) { + iovec_mcast[i + instance->groups_cnt + 1].iov_len = iovec[i].iov_len; + iovec_mcast[i + instance->groups_cnt + 1].iov_base = iovec[i].iov_base; + } + + res = mcast_msg (iovec_mcast, iov_len + instance->groups_cnt + 1, guarantee); + + if (totempg_threaded_mode == 1) { + pthread_mutex_unlock (&totempg_mutex); + } + + return (res); +} + +static void check_q_level( + void *totempg_groups_instance) +{ + struct totempg_group_instance *instance = (struct totempg_group_instance *)totempg_groups_instance; + int32_t old_level = instance->q_level; + int32_t percent_used = q_level_precent_used(); + + if (percent_used >= 75 && instance->q_level != TOTEM_Q_LEVEL_CRITICAL) { + instance->q_level = TOTEM_Q_LEVEL_CRITICAL; + } else if (percent_used < 30 && instance->q_level != TOTEM_Q_LEVEL_LOW) { + instance->q_level = TOTEM_Q_LEVEL_LOW; + } else if (percent_used > 40 && percent_used < 50 && instance->q_level != TOTEM_Q_LEVEL_GOOD) { + instance->q_level = TOTEM_Q_LEVEL_GOOD; + } else if (percent_used > 60 && percent_used < 70 && instance->q_level != TOTEM_Q_LEVEL_HIGH) { + instance->q_level = TOTEM_Q_LEVEL_HIGH; + } + if (totem_queue_level_changed && old_level != instance->q_level) { + totem_queue_level_changed(instance->q_level); + } +} + +void totempg_check_q_level( + void *totempg_groups_instance) +{ + struct totempg_group_instance *instance = (struct totempg_group_instance *)totempg_groups_instance; + + check_q_level(instance); +} + +int totempg_groups_joined_reserve ( + void *totempg_groups_instance, + const struct iovec *iovec, + unsigned int iov_len) +{ + struct totempg_group_instance *instance = (struct totempg_group_instance *)totempg_groups_instance; + unsigned int size = 0; + unsigned int i; + unsigned int reserved = 0; + + if (totempg_threaded_mode == 1) { + pthread_mutex_lock (&totempg_mutex); + pthread_mutex_lock (&mcast_msg_mutex); + } + + for (i = 0; i < instance->groups_cnt; i++) { + size += instance->groups[i].group_len; + } + for (i = 0; i < iov_len; i++) { + size += iovec[i].iov_len; + } + + if (size >= totempg_size_limit) { + reserved = -1; + goto error_exit; + } + + if (byte_count_send_ok (size)) { + reserved = send_reserve (size); + } else { + reserved = 0; + } + +error_exit: + check_q_level(instance); + + if (totempg_threaded_mode == 1) { + pthread_mutex_unlock (&mcast_msg_mutex); + pthread_mutex_unlock (&totempg_mutex); + } + return (reserved); +} + + +int totempg_groups_joined_release (int msg_count) +{ + if (totempg_threaded_mode == 1) { + pthread_mutex_lock (&totempg_mutex); + pthread_mutex_lock (&mcast_msg_mutex); + } + send_release (msg_count); + if (totempg_threaded_mode == 1) { + pthread_mutex_unlock (&mcast_msg_mutex); + pthread_mutex_unlock (&totempg_mutex); + } + return 0; +} + +int totempg_groups_mcast_groups ( + void *totempg_groups_instance, + int guarantee, + const struct totempg_group *groups, + size_t groups_cnt, + const struct iovec *iovec, + unsigned int iov_len) +{ + unsigned short group_len[MAX_GROUPS_PER_MSG + 1]; + struct iovec iovec_mcast[MAX_GROUPS_PER_MSG + 1 + MAX_IOVECS_FROM_APP]; + int i; + unsigned int res; + + if (totempg_threaded_mode == 1) { + pthread_mutex_lock (&totempg_mutex); + } + + /* + * Build group_len structure and the iovec_mcast structure + */ + group_len[0] = groups_cnt; + for (i = 0; i < groups_cnt; i++) { + group_len[i + 1] = groups[i].group_len; + iovec_mcast[i + 1].iov_len = groups[i].group_len; + iovec_mcast[i + 1].iov_base = (void *) groups[i].group; + } + iovec_mcast[0].iov_len = (groups_cnt + 1) * sizeof (unsigned short); + iovec_mcast[0].iov_base = group_len; + for (i = 0; i < iov_len; i++) { + iovec_mcast[i + groups_cnt + 1].iov_len = iovec[i].iov_len; + iovec_mcast[i + groups_cnt + 1].iov_base = iovec[i].iov_base; + } + + res = mcast_msg (iovec_mcast, iov_len + groups_cnt + 1, guarantee); + + if (totempg_threaded_mode == 1) { + pthread_mutex_unlock (&totempg_mutex); + } + return (res); +} + +/* + * Returns -1 if error, 0 if can't send, 1 if can send the message + */ +int totempg_groups_send_ok_groups ( + void *totempg_groups_instance, + const struct totempg_group *groups, + size_t groups_cnt, + const struct iovec *iovec, + unsigned int iov_len) +{ + unsigned int size = 0; + unsigned int i; + unsigned int res; + + if (totempg_threaded_mode == 1) { + pthread_mutex_lock (&totempg_mutex); + } + + for (i = 0; i < groups_cnt; i++) { + size += groups[i].group_len; + } + for (i = 0; i < iov_len; i++) { + size += iovec[i].iov_len; + } + + res = msg_count_send_ok (size); + + if (totempg_threaded_mode == 1) { + pthread_mutex_unlock (&totempg_mutex); + } + return (res); +} + +int totempg_iface_set ( + struct totem_ip_address *interface_addr, + unsigned short ip_port, + unsigned int iface_no) +{ + int res; + + res = totemsrp_iface_set ( + totemsrp_context, + interface_addr, + ip_port, + iface_no); + + return (res); +} + +int totempg_nodestatus_get (unsigned int nodeid, + struct totem_node_status *node_status) +{ + memset(node_status, 0, sizeof(struct totem_node_status)); + return totemsrp_nodestatus_get (totemsrp_context, nodeid, node_status); +} + +int totempg_ifaces_get ( + unsigned int nodeid, + unsigned int *interface_id, + struct totem_ip_address *interfaces, + unsigned int interfaces_size, + char ***status, + unsigned int *iface_count) +{ + int res; + + res = totemsrp_ifaces_get ( + totemsrp_context, + nodeid, + interface_id, + interfaces, + interfaces_size, + status, + iface_count); + + return (res); +} + +void totempg_event_signal (enum totem_event_type type, int value) +{ + totemsrp_event_signal (totemsrp_context, type, value); +} + +void* totempg_get_stats (void) +{ + return &totempg_stats; +} + +int totempg_crypto_set ( + const char *cipher_type, + const char *hash_type) +{ + int res; + + res = totemsrp_crypto_set (totemsrp_context, cipher_type, hash_type); + + return (res); +} + +#define ONE_IFACE_LEN 63 +const char *totempg_ifaces_print (unsigned int nodeid) +{ + static char iface_string[256 * INTERFACE_MAX]; + char one_iface[ONE_IFACE_LEN+1]; + struct totem_ip_address interfaces[INTERFACE_MAX]; + unsigned int iface_count; + unsigned int iface_ids[INTERFACE_MAX]; + unsigned int i; + int res; + + iface_string[0] = '\0'; + + res = totempg_ifaces_get (nodeid, iface_ids, interfaces, INTERFACE_MAX, NULL, &iface_count); + if (res == -1) { + return ("no interface found for nodeid"); + } + + res = totempg_ifaces_get (nodeid, iface_ids, interfaces, INTERFACE_MAX, NULL, &iface_count); + + for (i = 0; i < iface_count; i++) { + if (!interfaces[i].family) { + continue; + } + snprintf (one_iface, ONE_IFACE_LEN, + "r(%d) ip(%s) ", + i, totemip_print (&interfaces[i])); + strcat (iface_string, one_iface); + } + return (iface_string); +} + +unsigned int totempg_my_nodeid_get (void) +{ + return (totemsrp_my_nodeid_get(totemsrp_context)); +} + +int totempg_my_family_get (void) +{ + return (totemsrp_my_family_get(totemsrp_context)); +} +extern void totempg_service_ready_register ( + void (*totem_service_ready) (void)) +{ + totemsrp_service_ready_register (totemsrp_context, totem_service_ready); +} + +void totempg_queue_level_register_callback (totem_queue_level_changed_fn fn) +{ + totem_queue_level_changed = fn; +} + +extern int totempg_member_add ( + const struct totem_ip_address *member, + int ring_no) +{ + return totemsrp_member_add (totemsrp_context, member, ring_no); +} + +extern int totempg_member_remove ( + const struct totem_ip_address *member, + int ring_no) +{ + return totemsrp_member_remove (totemsrp_context, member, ring_no); +} + +extern int totempg_reconfigure (void) +{ + return totemsrp_reconfigure (totemsrp_context, totempg_totem_config); +} + +extern int totempg_crypto_reconfigure_phase (cfg_message_crypto_reconfig_phase_t phase) +{ + return totemsrp_crypto_reconfigure_phase (totemsrp_context, totempg_totem_config, phase); +} + +extern void totempg_stats_clear (int flags) +{ + if (flags & TOTEMPG_STATS_CLEAR_TOTEM) { + totempg_stats.msg_reserved = 0; + totempg_stats.msg_queue_avail = 0; + } + return totemsrp_stats_clear (totemsrp_context, flags); +} + +void totempg_threaded_mode_enable (void) +{ + totempg_threaded_mode = 1; + totemsrp_threaded_mode_enable (totemsrp_context); +} + +void totempg_trans_ack (void) +{ + totemsrp_trans_ack (totemsrp_context); +} + +void totempg_force_gather (void) +{ + totemsrp_force_gather(totemsrp_context); +} + +/* Assumes ->orig_interfaces is already allocated */ +void totempg_get_config(struct totem_config *config) +{ + struct totem_interface *temp_if = config->orig_interfaces; + + memcpy(config, totempg_totem_config, sizeof(struct totem_config)); + config->orig_interfaces = temp_if; + memcpy(config->orig_interfaces, totempg_totem_config->interfaces, sizeof(struct totem_interface) * INTERFACE_MAX); + config->interfaces = NULL; +} + +void totempg_put_config(struct totem_config *config) +{ + struct totem_interface *temp_if = totempg_totem_config->interfaces; + + /* Preseve the existing interfaces[] array as transports might have pointers saved */ + memcpy(totempg_totem_config->interfaces, config->interfaces, sizeof(struct totem_interface) * INTERFACE_MAX); + memcpy(totempg_totem_config, config, sizeof(struct totem_config)); + totempg_totem_config->interfaces = temp_if; +} diff --git a/exec/totemsrp.c b/exec/totemsrp.c new file mode 100644 index 0000000..63a47c1 --- /dev/null +++ b/exec/totemsrp.c @@ -0,0 +1,5252 @@ +/* + * Copyright (c) 2003-2006 MontaVista Software, Inc. + * Copyright (c) 2006-2018 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * The first version of this code was based upon Yair Amir's PhD thesis: + * http://www.cs.jhu.edu/~yairamir/phd.ps) (ch4,5). + * + * The current version of totemsrp implements the Totem protocol specified in: + * http://citeseer.ist.psu.edu/amir95totem.html + * + * The deviations from the above published protocols are: + * - token hold mode where token doesn't rotate on unused ring - reduces cpu + * usage on 1.6ghz xeon from 35% to less then .1 % as measured by top + */ + +#include <config.h> + +#include <assert.h> +#ifdef HAVE_ALLOCA_H +#include <alloca.h> +#endif +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <netdb.h> +#include <sys/un.h> +#include <sys/ioctl.h> +#include <sys/param.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <unistd.h> +#include <fcntl.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <sched.h> +#include <time.h> +#include <sys/time.h> +#include <sys/poll.h> +#include <sys/uio.h> +#include <limits.h> + +#include <qb/qblist.h> +#include <qb/qbdefs.h> +#include <qb/qbutil.h> +#include <qb/qbloop.h> + +#include <corosync/swab.h> +#include <corosync/sq.h> + +#define LOGSYS_UTILS_ONLY 1 +#include <corosync/logsys.h> + +#include "totemsrp.h" +#include "totemnet.h" + +#include "icmap.h" +#include "totemconfig.h" + +#include "cs_queue.h" + +#define LOCALHOST_IP inet_addr("127.0.0.1") +#define QUEUE_RTR_ITEMS_SIZE_MAX 16384 /* allow 16384 retransmit items */ +#define RETRANS_MESSAGE_QUEUE_SIZE_MAX 16384 /* allow 500 messages to be queued */ +#define RECEIVED_MESSAGE_QUEUE_SIZE_MAX 500 /* allow 500 messages to be queued */ +#define MAXIOVS 5 +#define RETRANSMIT_ENTRIES_MAX 30 +#define TOKEN_SIZE_MAX 64000 /* bytes */ +#define LEAVE_DUMMY_NODEID 0 + +/* + * SRP address. + */ +struct srp_addr { + unsigned int nodeid; +}; + +/* + * Rollover handling: + * SEQNO_START_MSG is the starting sequence number after a new configuration + * This should remain zero, unless testing overflow in which case + * 0x7ffff000 and 0xfffff000 are good starting values. + * + * SEQNO_START_TOKEN is the starting sequence number after a new configuration + * for a token. This should remain zero, unless testing overflow in which + * case 07fffff00 or 0xffffff00 are good starting values. + */ +#define SEQNO_START_MSG 0x0 +#define SEQNO_START_TOKEN 0x0 + +/* + * These can be used ot test different rollover points + * #define SEQNO_START_MSG 0xfffffe00 + * #define SEQNO_START_TOKEN 0xfffffe00 + */ + +/* + * These can be used to test the error recovery algorithms + * #define TEST_DROP_ORF_TOKEN_PERCENTAGE 30 + * #define TEST_DROP_COMMIT_TOKEN_PERCENTAGE 30 + * #define TEST_DROP_MCAST_PERCENTAGE 50 + * #define TEST_RECOVERY_MSG_COUNT 300 + */ + +/* + * we compare incoming messages to determine if their endian is + * different - if so convert them + * + * do not change + */ +#define ENDIAN_LOCAL 0xff22 + +enum message_type { + MESSAGE_TYPE_ORF_TOKEN = 0, /* Ordering, Reliability, Flow (ORF) control Token */ + MESSAGE_TYPE_MCAST = 1, /* ring ordered multicast message */ + MESSAGE_TYPE_MEMB_MERGE_DETECT = 2, /* merge rings if there are available rings */ + MESSAGE_TYPE_MEMB_JOIN = 3, /* membership join message */ + MESSAGE_TYPE_MEMB_COMMIT_TOKEN = 4, /* membership commit token */ + MESSAGE_TYPE_TOKEN_HOLD_CANCEL = 5, /* cancel the holding of the token */ +}; + +enum encapsulation_type { + MESSAGE_ENCAPSULATED = 1, + MESSAGE_NOT_ENCAPSULATED = 2 +}; + +/* + * New membership algorithm local variables + */ +struct consensus_list_item { + struct srp_addr addr; + int set; +}; + + +struct token_callback_instance { + struct qb_list_head list; + int (*callback_fn) (enum totem_callback_token_type type, const void *); + enum totem_callback_token_type callback_type; + int delete; + void *data; +}; + + +struct totemsrp_socket { + int mcast; + int token; +}; + +struct mcast { + struct totem_message_header header; + struct srp_addr system_from; + unsigned int seq; + int this_seqno; + struct memb_ring_id ring_id; + unsigned int node_id; + int guarantee; +} __attribute__((packed)); + + +struct rtr_item { + struct memb_ring_id ring_id; + unsigned int seq; +}__attribute__((packed)); + + +struct orf_token { + struct totem_message_header header; + unsigned int seq; + unsigned int token_seq; + unsigned int aru; + unsigned int aru_addr; + struct memb_ring_id ring_id; + unsigned int backlog; + unsigned int fcc; + int retrans_flg; + int rtr_list_entries; + struct rtr_item rtr_list[0]; +}__attribute__((packed)); + + +struct memb_join { + struct totem_message_header header; + struct srp_addr system_from; + unsigned int proc_list_entries; + unsigned int failed_list_entries; + unsigned long long ring_seq; + unsigned char end_of_memb_join[0]; +/* + * These parts of the data structure are dynamic: + * struct srp_addr proc_list[]; + * struct srp_addr failed_list[]; + */ +} __attribute__((packed)); + + +struct memb_merge_detect { + struct totem_message_header header; + struct srp_addr system_from; + struct memb_ring_id ring_id; +} __attribute__((packed)); + + +struct token_hold_cancel { + struct totem_message_header header; + struct memb_ring_id ring_id; +} __attribute__((packed)); + + +struct memb_commit_token_memb_entry { + struct memb_ring_id ring_id; + unsigned int aru; + unsigned int high_delivered; + unsigned int received_flg; +}__attribute__((packed)); + + +struct memb_commit_token { + struct totem_message_header header; + unsigned int token_seq; + struct memb_ring_id ring_id; + unsigned int retrans_flg; + int memb_index; + int addr_entries; + unsigned char end_of_commit_token[0]; +/* + * These parts of the data structure are dynamic: + * + * struct srp_addr addr[PROCESSOR_COUNT_MAX]; + * struct memb_commit_token_memb_entry memb_list[PROCESSOR_COUNT_MAX]; + */ +}__attribute__((packed)); + +struct message_item { + struct mcast *mcast; + unsigned int msg_len; +}; + +struct sort_queue_item { + struct mcast *mcast; + unsigned int msg_len; +}; + +enum memb_state { + MEMB_STATE_OPERATIONAL = 1, + MEMB_STATE_GATHER = 2, + MEMB_STATE_COMMIT = 3, + MEMB_STATE_RECOVERY = 4 +}; + +struct totemsrp_instance { + int iface_changes; + + int failed_to_recv; + + /* + * Flow control mcasts and remcasts on last and current orf_token + */ + int fcc_remcast_last; + + int fcc_mcast_last; + + int fcc_remcast_current; + + struct consensus_list_item consensus_list[PROCESSOR_COUNT_MAX]; + + int consensus_list_entries; + + int lowest_active_if; + + struct srp_addr my_id; + + struct totem_ip_address my_addrs[INTERFACE_MAX]; + + struct srp_addr my_proc_list[PROCESSOR_COUNT_MAX]; + + struct srp_addr my_failed_list[PROCESSOR_COUNT_MAX]; + + struct srp_addr my_new_memb_list[PROCESSOR_COUNT_MAX]; + + struct srp_addr my_trans_memb_list[PROCESSOR_COUNT_MAX]; + + struct srp_addr my_memb_list[PROCESSOR_COUNT_MAX]; + + struct srp_addr my_deliver_memb_list[PROCESSOR_COUNT_MAX]; + + struct srp_addr my_left_memb_list[PROCESSOR_COUNT_MAX]; + + unsigned int my_leave_memb_list[PROCESSOR_COUNT_MAX]; + + int my_proc_list_entries; + + int my_failed_list_entries; + + int my_new_memb_entries; + + int my_trans_memb_entries; + + int my_memb_entries; + + int my_deliver_memb_entries; + + int my_left_memb_entries; + + int my_leave_memb_entries; + + struct memb_ring_id my_ring_id; + + struct memb_ring_id my_old_ring_id; + + int my_aru_count; + + int my_merge_detect_timeout_outstanding; + + unsigned int my_last_aru; + + int my_seq_unchanged; + + int my_received_flg; + + unsigned int my_high_seq_received; + + unsigned int my_install_seq; + + int my_rotation_counter; + + int my_set_retrans_flg; + + int my_retrans_flg_count; + + unsigned int my_high_ring_delivered; + + int heartbeat_timeout; + + /* + * Queues used to order, deliver, and recover messages + */ + struct cs_queue new_message_queue; + + struct cs_queue new_message_queue_trans; + + struct cs_queue retrans_message_queue; + + struct sq regular_sort_queue; + + struct sq recovery_sort_queue; + + /* + * Received up to and including + */ + unsigned int my_aru; + + unsigned int my_high_delivered; + + struct qb_list_head token_callback_received_listhead; + + struct qb_list_head token_callback_sent_listhead; + + char orf_token_retransmit[TOKEN_SIZE_MAX]; + + int orf_token_retransmit_size; + + unsigned int my_token_seq; + + /* + * Timers + */ + qb_loop_timer_handle timer_pause_timeout; + + qb_loop_timer_handle timer_orf_token_timeout; + + qb_loop_timer_handle timer_orf_token_warning; + + qb_loop_timer_handle timer_orf_token_retransmit_timeout; + + qb_loop_timer_handle timer_orf_token_hold_retransmit_timeout; + + qb_loop_timer_handle timer_merge_detect_timeout; + + qb_loop_timer_handle memb_timer_state_gather_join_timeout; + + qb_loop_timer_handle memb_timer_state_gather_consensus_timeout; + + qb_loop_timer_handle memb_timer_state_commit_timeout; + + qb_loop_timer_handle timer_heartbeat_timeout; + + /* + * Function and data used to log messages + */ + int totemsrp_log_level_security; + + int totemsrp_log_level_error; + + int totemsrp_log_level_warning; + + int totemsrp_log_level_notice; + + int totemsrp_log_level_debug; + + int totemsrp_log_level_trace; + + int totemsrp_subsys_id; + + void (*totemsrp_log_printf) ( + int level, + int subsys, + const char *function, + const char *file, + int line, + const char *format, ...)__attribute__((format(printf, 6, 7)));; + + enum memb_state memb_state; + +//TODO struct srp_addr next_memb; + + qb_loop_t *totemsrp_poll_handle; + + struct totem_ip_address mcast_address; + + void (*totemsrp_deliver_fn) ( + unsigned int nodeid, + const void *msg, + unsigned int msg_len, + int endian_conversion_required); + + void (*totemsrp_confchg_fn) ( + enum totem_configuration_type configuration_type, + const unsigned int *member_list, size_t member_list_entries, + const unsigned int *left_list, size_t left_list_entries, + const unsigned int *joined_list, size_t joined_list_entries, + const struct memb_ring_id *ring_id); + + void (*totemsrp_service_ready_fn) (void); + + void (*totemsrp_waiting_trans_ack_cb_fn) ( + int waiting_trans_ack); + + void (*memb_ring_id_create_or_load) ( + struct memb_ring_id *memb_ring_id, + unsigned int nodeid); + + void (*memb_ring_id_store) ( + const struct memb_ring_id *memb_ring_id, + unsigned int nodeid); + + int global_seqno; + + int my_token_held; + + unsigned long long token_ring_id_seq; + + unsigned int last_released; + + unsigned int set_aru; + + int old_ring_state_saved; + + int old_ring_state_aru; + + unsigned int old_ring_state_high_seq_received; + + unsigned int my_last_seq; + + struct timeval tv_old; + + void *totemnet_context; + + struct totem_config *totem_config; + + unsigned int use_heartbeat; + + unsigned int my_trc; + + unsigned int my_pbl; + + unsigned int my_cbl; + + uint64_t pause_timestamp; + + struct memb_commit_token *commit_token; + + totemsrp_stats_t stats; + + uint32_t orf_token_discard; + + uint32_t originated_orf_token; + + uint32_t threaded_mode_enabled; + + uint32_t waiting_trans_ack; + + int flushing; + + void * token_recv_event_handle; + void * token_sent_event_handle; + char commit_token_storage[40000]; +}; + +struct message_handlers { + int count; + int (*handler_functions[6]) ( + struct totemsrp_instance *instance, + const void *msg, + size_t msg_len, + int endian_conversion_needed); +}; + +enum gather_state_from { + TOTEMSRP_GSFROM_CONSENSUS_TIMEOUT = 0, + TOTEMSRP_GSFROM_GATHER_MISSING1 = 1, + TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_OPERATIONAL_STATE = 2, + TOTEMSRP_GSFROM_THE_CONSENSUS_TIMEOUT_EXPIRED = 3, + TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_COMMIT_STATE = 4, + TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_RECOVERY_STATE = 5, + TOTEMSRP_GSFROM_FAILED_TO_RECEIVE = 6, + TOTEMSRP_GSFROM_FOREIGN_MESSAGE_IN_OPERATIONAL_STATE = 7, + TOTEMSRP_GSFROM_FOREIGN_MESSAGE_IN_GATHER_STATE = 8, + TOTEMSRP_GSFROM_MERGE_DURING_OPERATIONAL_STATE = 9, + TOTEMSRP_GSFROM_MERGE_DURING_GATHER_STATE = 10, + TOTEMSRP_GSFROM_MERGE_DURING_JOIN = 11, + TOTEMSRP_GSFROM_JOIN_DURING_OPERATIONAL_STATE = 12, + TOTEMSRP_GSFROM_JOIN_DURING_COMMIT_STATE = 13, + TOTEMSRP_GSFROM_JOIN_DURING_RECOVERY = 14, + TOTEMSRP_GSFROM_INTERFACE_CHANGE = 15, + TOTEMSRP_GSFROM_MAX = TOTEMSRP_GSFROM_INTERFACE_CHANGE, +}; + +const char* gather_state_from_desc [] = { + [TOTEMSRP_GSFROM_CONSENSUS_TIMEOUT] = "consensus timeout", + [TOTEMSRP_GSFROM_GATHER_MISSING1] = "MISSING", + [TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_OPERATIONAL_STATE] = "The token was lost in the OPERATIONAL state.", + [TOTEMSRP_GSFROM_THE_CONSENSUS_TIMEOUT_EXPIRED] = "The consensus timeout expired.", + [TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_COMMIT_STATE] = "The token was lost in the COMMIT state.", + [TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_RECOVERY_STATE] = "The token was lost in the RECOVERY state.", + [TOTEMSRP_GSFROM_FAILED_TO_RECEIVE] = "failed to receive", + [TOTEMSRP_GSFROM_FOREIGN_MESSAGE_IN_OPERATIONAL_STATE] = "foreign message in operational state", + [TOTEMSRP_GSFROM_FOREIGN_MESSAGE_IN_GATHER_STATE] = "foreign message in gather state", + [TOTEMSRP_GSFROM_MERGE_DURING_OPERATIONAL_STATE] = "merge during operational state", + [TOTEMSRP_GSFROM_MERGE_DURING_GATHER_STATE] = "merge during gather state", + [TOTEMSRP_GSFROM_MERGE_DURING_JOIN] = "merge during join", + [TOTEMSRP_GSFROM_JOIN_DURING_OPERATIONAL_STATE] = "join during operational state", + [TOTEMSRP_GSFROM_JOIN_DURING_COMMIT_STATE] = "join during commit state", + [TOTEMSRP_GSFROM_JOIN_DURING_RECOVERY] = "join during recovery", + [TOTEMSRP_GSFROM_INTERFACE_CHANGE] = "interface change", +}; + +/* + * forward decls + */ +static int message_handler_orf_token ( + struct totemsrp_instance *instance, + const void *msg, + size_t msg_len, + int endian_conversion_needed); + +static int message_handler_mcast ( + struct totemsrp_instance *instance, + const void *msg, + size_t msg_len, + int endian_conversion_needed); + +static int message_handler_memb_merge_detect ( + struct totemsrp_instance *instance, + const void *msg, + size_t msg_len, + int endian_conversion_needed); + +static int message_handler_memb_join ( + struct totemsrp_instance *instance, + const void *msg, + size_t msg_len, + int endian_conversion_needed); + +static int message_handler_memb_commit_token ( + struct totemsrp_instance *instance, + const void *msg, + size_t msg_len, + int endian_conversion_needed); + +static int message_handler_token_hold_cancel ( + struct totemsrp_instance *instance, + const void *msg, + size_t msg_len, + int endian_conversion_needed); + +static void totemsrp_instance_initialize (struct totemsrp_instance *instance); + +static void srp_addr_to_nodeid ( + struct totemsrp_instance *instance, + unsigned int *nodeid_out, + struct srp_addr *srp_addr_in, + unsigned int entries); + +static int srp_addr_equal (const struct srp_addr *a, const struct srp_addr *b); + +static void memb_leave_message_send (struct totemsrp_instance *instance); + +static void token_callbacks_execute (struct totemsrp_instance *instance, enum totem_callback_token_type type); +static void memb_state_gather_enter (struct totemsrp_instance *instance, enum gather_state_from gather_from); +static void messages_deliver_to_app (struct totemsrp_instance *instance, int skip, unsigned int end_point); +static int orf_token_mcast (struct totemsrp_instance *instance, struct orf_token *oken, + int fcc_mcasts_allowed); +static void messages_free (struct totemsrp_instance *instance, unsigned int token_aru); + +static void memb_ring_id_set (struct totemsrp_instance *instance, + const struct memb_ring_id *ring_id); +static void target_set_completed (void *context); +static void memb_state_commit_token_update (struct totemsrp_instance *instance); +static void memb_state_commit_token_target_set (struct totemsrp_instance *instance); +static int memb_state_commit_token_send (struct totemsrp_instance *instance); +static int memb_state_commit_token_send_recovery (struct totemsrp_instance *instance, struct memb_commit_token *memb_commit_token); +static void memb_state_commit_token_create (struct totemsrp_instance *instance); +static int token_hold_cancel_send (struct totemsrp_instance *instance); +static void orf_token_endian_convert (const struct orf_token *in, struct orf_token *out); +static void memb_commit_token_endian_convert (const struct memb_commit_token *in, struct memb_commit_token *out); +static void memb_join_endian_convert (const struct memb_join *in, struct memb_join *out); +static void mcast_endian_convert (const struct mcast *in, struct mcast *out); +static void memb_merge_detect_endian_convert ( + const struct memb_merge_detect *in, + struct memb_merge_detect *out); +static struct srp_addr srp_addr_endian_convert (struct srp_addr in); +static void timer_function_orf_token_timeout (void *data); +static void timer_function_orf_token_warning (void *data); +static void timer_function_pause_timeout (void *data); +static void timer_function_heartbeat_timeout (void *data); +static void timer_function_token_retransmit_timeout (void *data); +static void timer_function_token_hold_retransmit_timeout (void *data); +static void timer_function_merge_detect_timeout (void *data); +static void *totemsrp_buffer_alloc (struct totemsrp_instance *instance); +static void totemsrp_buffer_release (struct totemsrp_instance *instance, void *ptr); +static const char* gsfrom_to_msg(enum gather_state_from gsfrom); + +int main_deliver_fn ( + void *context, + const void *msg, + unsigned int msg_len, + const struct sockaddr_storage *system_from); + +int main_iface_change_fn ( + void *context, + const struct totem_ip_address *iface_address, + unsigned int iface_no); + +struct message_handlers totemsrp_message_handlers = { + 6, + { + message_handler_orf_token, /* MESSAGE_TYPE_ORF_TOKEN */ + message_handler_mcast, /* MESSAGE_TYPE_MCAST */ + message_handler_memb_merge_detect, /* MESSAGE_TYPE_MEMB_MERGE_DETECT */ + message_handler_memb_join, /* MESSAGE_TYPE_MEMB_JOIN */ + message_handler_memb_commit_token, /* MESSAGE_TYPE_MEMB_COMMIT_TOKEN */ + message_handler_token_hold_cancel /* MESSAGE_TYPE_TOKEN_HOLD_CANCEL */ + } +}; + +#define log_printf(level, format, args...) \ +do { \ + instance->totemsrp_log_printf ( \ + level, instance->totemsrp_subsys_id, \ + __FUNCTION__, __FILE__, __LINE__, \ + format, ##args); \ +} while (0); +#define LOGSYS_PERROR(err_num, level, fmt, args...) \ +do { \ + char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \ + const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \ + instance->totemsrp_log_printf ( \ + level, instance->totemsrp_subsys_id, \ + __FUNCTION__, __FILE__, __LINE__, \ + fmt ": %s (%d)\n", ##args, _error_ptr, err_num); \ + } while(0) + +static const char* gsfrom_to_msg(enum gather_state_from gsfrom) +{ + if (gsfrom <= TOTEMSRP_GSFROM_MAX) { + return gather_state_from_desc[gsfrom]; + } + else { + return "UNKNOWN"; + } +} + +static void totemsrp_instance_initialize (struct totemsrp_instance *instance) +{ + memset (instance, 0, sizeof (struct totemsrp_instance)); + + qb_list_init (&instance->token_callback_received_listhead); + + qb_list_init (&instance->token_callback_sent_listhead); + + instance->my_received_flg = 1; + + instance->my_token_seq = SEQNO_START_TOKEN - 1; + + instance->memb_state = MEMB_STATE_OPERATIONAL; + + instance->set_aru = -1; + + instance->my_aru = SEQNO_START_MSG; + + instance->my_high_seq_received = SEQNO_START_MSG; + + instance->my_high_delivered = SEQNO_START_MSG; + + instance->orf_token_discard = 0; + + instance->originated_orf_token = 0; + + instance->commit_token = (struct memb_commit_token *)instance->commit_token_storage; + + instance->waiting_trans_ack = 1; +} + +static int pause_flush (struct totemsrp_instance *instance) +{ + uint64_t now_msec; + uint64_t timestamp_msec; + int res = 0; + + now_msec = (qb_util_nano_current_get () / QB_TIME_NS_IN_MSEC); + timestamp_msec = instance->pause_timestamp / QB_TIME_NS_IN_MSEC; + + if ((now_msec - timestamp_msec) > (instance->totem_config->token_timeout / 2)) { + log_printf (instance->totemsrp_log_level_notice, + "Process pause detected for %d ms, flushing membership messages.", (unsigned int)(now_msec - timestamp_msec)); + /* + * -1 indicates an error from recvmsg + */ + do { + res = totemnet_recv_mcast_empty (instance->totemnet_context); + } while (res == -1); + } + return (res); +} + +static int token_event_stats_collector (enum totem_callback_token_type type, const void *void_instance) +{ + struct totemsrp_instance *instance = (struct totemsrp_instance *)void_instance; + uint32_t time_now; + unsigned long long nano_secs = qb_util_nano_current_get (); + + time_now = (nano_secs / QB_TIME_NS_IN_MSEC); + + if (type == TOTEM_CALLBACK_TOKEN_RECEIVED) { + /* incr latest token the index */ + if (instance->stats.latest_token == (TOTEM_TOKEN_STATS_MAX - 1)) + instance->stats.latest_token = 0; + else + instance->stats.latest_token++; + + if (instance->stats.earliest_token == instance->stats.latest_token) { + /* we have filled up the array, start overwriting */ + if (instance->stats.earliest_token == (TOTEM_TOKEN_STATS_MAX - 1)) + instance->stats.earliest_token = 0; + else + instance->stats.earliest_token++; + + instance->stats.token[instance->stats.earliest_token].rx = 0; + instance->stats.token[instance->stats.earliest_token].tx = 0; + instance->stats.token[instance->stats.earliest_token].backlog_calc = 0; + } + + instance->stats.token[instance->stats.latest_token].rx = time_now; + instance->stats.token[instance->stats.latest_token].tx = 0; /* in case we drop the token */ + } else { + instance->stats.token[instance->stats.latest_token].tx = time_now; + } + return 0; +} + +static void totempg_mtu_changed(void *context, int net_mtu) +{ + struct totemsrp_instance *instance = context; + + instance->totem_config->net_mtu = net_mtu - 2 * sizeof (struct mcast); + + log_printf (instance->totemsrp_log_level_debug, + "Net MTU changed to %d, new value is %d", + net_mtu, instance->totem_config->net_mtu); +} + +/* + * Exported interfaces + */ +int totemsrp_initialize ( + qb_loop_t *poll_handle, + void **srp_context, + struct totem_config *totem_config, + totempg_stats_t *stats, + + void (*deliver_fn) ( + unsigned int nodeid, + const void *msg, + unsigned int msg_len, + int endian_conversion_required), + + void (*confchg_fn) ( + enum totem_configuration_type configuration_type, + const unsigned int *member_list, size_t member_list_entries, + const unsigned int *left_list, size_t left_list_entries, + const unsigned int *joined_list, size_t joined_list_entries, + const struct memb_ring_id *ring_id), + void (*waiting_trans_ack_cb_fn) ( + int waiting_trans_ack)) +{ + struct totemsrp_instance *instance; + int res; + + instance = malloc (sizeof (struct totemsrp_instance)); + if (instance == NULL) { + goto error_exit; + } + + totemsrp_instance_initialize (instance); + + instance->totemsrp_waiting_trans_ack_cb_fn = waiting_trans_ack_cb_fn; + instance->totemsrp_waiting_trans_ack_cb_fn (1); + + stats->srp = &instance->stats; + instance->stats.latest_token = 0; + instance->stats.earliest_token = 0; + + instance->totem_config = totem_config; + + /* + * Configure logging + */ + instance->totemsrp_log_level_security = totem_config->totem_logging_configuration.log_level_security; + instance->totemsrp_log_level_error = totem_config->totem_logging_configuration.log_level_error; + instance->totemsrp_log_level_warning = totem_config->totem_logging_configuration.log_level_warning; + instance->totemsrp_log_level_notice = totem_config->totem_logging_configuration.log_level_notice; + instance->totemsrp_log_level_debug = totem_config->totem_logging_configuration.log_level_debug; + instance->totemsrp_log_level_trace = totem_config->totem_logging_configuration.log_level_trace; + instance->totemsrp_subsys_id = totem_config->totem_logging_configuration.log_subsys_id; + instance->totemsrp_log_printf = totem_config->totem_logging_configuration.log_printf; + + /* + * Configure totem store and load functions + */ + instance->memb_ring_id_create_or_load = totem_config->totem_memb_ring_id_create_or_load; + instance->memb_ring_id_store = totem_config->totem_memb_ring_id_store; + + /* + * Initialize local variables for totemsrp + */ + totemip_copy (&instance->mcast_address, &totem_config->interfaces[instance->lowest_active_if].mcast_addr); + + /* + * Display totem configuration + */ + log_printf (instance->totemsrp_log_level_debug, + "Token Timeout (%d ms) retransmit timeout (%d ms)", + totem_config->token_timeout, totem_config->token_retransmit_timeout); + if (totem_config->token_warning) { + uint32_t token_warning_ms = totem_config->token_warning * totem_config->token_timeout / 100; + log_printf(instance->totemsrp_log_level_debug, + "Token warning every %d ms (%d%% of Token Timeout)", + token_warning_ms, totem_config->token_warning); + if (token_warning_ms < totem_config->token_retransmit_timeout) + log_printf (LOGSYS_LEVEL_DEBUG, + "The token warning interval (%d ms) is less than the token retransmit timeout (%d ms) " + "which can lead to spurious token warnings. Consider increasing the token_warning parameter.", + token_warning_ms, totem_config->token_retransmit_timeout); + } else { + log_printf(instance->totemsrp_log_level_debug, + "Token warnings disabled"); + } + log_printf (instance->totemsrp_log_level_debug, + "token hold (%d ms) retransmits before loss (%d retrans)", + totem_config->token_hold_timeout, totem_config->token_retransmits_before_loss_const); + log_printf (instance->totemsrp_log_level_debug, + "join (%d ms) send_join (%d ms) consensus (%d ms) merge (%d ms)", + totem_config->join_timeout, + totem_config->send_join_timeout, + totem_config->consensus_timeout, + + totem_config->merge_timeout); + log_printf (instance->totemsrp_log_level_debug, + "downcheck (%d ms) fail to recv const (%d msgs)", + totem_config->downcheck_timeout, totem_config->fail_to_recv_const); + log_printf (instance->totemsrp_log_level_debug, + "seqno unchanged const (%d rotations) Maximum network MTU %d", totem_config->seqno_unchanged_const, totem_config->net_mtu); + + log_printf (instance->totemsrp_log_level_debug, + "window size per rotation (%d messages) maximum messages per rotation (%d messages)", + totem_config->window_size, totem_config->max_messages); + + log_printf (instance->totemsrp_log_level_debug, + "missed count const (%d messages)", + totem_config->miss_count_const); + + log_printf (instance->totemsrp_log_level_debug, + "send threads (%d threads)", totem_config->threads); + + log_printf (instance->totemsrp_log_level_debug, + "heartbeat_failures_allowed (%d)", totem_config->heartbeat_failures_allowed); + log_printf (instance->totemsrp_log_level_debug, + "max_network_delay (%d ms)", totem_config->max_network_delay); + + + cs_queue_init (&instance->retrans_message_queue, RETRANS_MESSAGE_QUEUE_SIZE_MAX, + sizeof (struct message_item), instance->threaded_mode_enabled); + + sq_init (&instance->regular_sort_queue, + QUEUE_RTR_ITEMS_SIZE_MAX, sizeof (struct sort_queue_item), 0); + + sq_init (&instance->recovery_sort_queue, + QUEUE_RTR_ITEMS_SIZE_MAX, sizeof (struct sort_queue_item), 0); + + instance->totemsrp_poll_handle = poll_handle; + + instance->totemsrp_deliver_fn = deliver_fn; + + instance->totemsrp_confchg_fn = confchg_fn; + instance->use_heartbeat = 1; + + timer_function_pause_timeout (instance); + + if ( totem_config->heartbeat_failures_allowed == 0 ) { + log_printf (instance->totemsrp_log_level_debug, + "HeartBeat is Disabled. To enable set heartbeat_failures_allowed > 0"); + instance->use_heartbeat = 0; + } + + if (instance->use_heartbeat) { + instance->heartbeat_timeout + = (totem_config->heartbeat_failures_allowed) * totem_config->token_retransmit_timeout + + totem_config->max_network_delay; + + if (instance->heartbeat_timeout >= totem_config->token_timeout) { + log_printf (instance->totemsrp_log_level_debug, + "total heartbeat_timeout (%d ms) is not less than token timeout (%d ms)", + instance->heartbeat_timeout, + totem_config->token_timeout); + log_printf (instance->totemsrp_log_level_debug, + "heartbeat_timeout = heartbeat_failures_allowed * token_retransmit_timeout + max_network_delay"); + log_printf (instance->totemsrp_log_level_debug, + "heartbeat timeout should be less than the token timeout. Heartbeat is disabled!!"); + instance->use_heartbeat = 0; + } + else { + log_printf (instance->totemsrp_log_level_debug, + "total heartbeat_timeout (%d ms)", instance->heartbeat_timeout); + } + } + + res = totemnet_initialize ( + poll_handle, + &instance->totemnet_context, + totem_config, + stats->srp, + instance, + main_deliver_fn, + main_iface_change_fn, + totempg_mtu_changed, + target_set_completed); + if (res == -1) { + goto error_exit; + } + + instance->my_id.nodeid = instance->totem_config->interfaces[instance->lowest_active_if].boundto.nodeid; + + /* + * Must have net_mtu adjusted by totemnet_initialize first + */ + cs_queue_init (&instance->new_message_queue, + MESSAGE_QUEUE_MAX, + sizeof (struct message_item), instance->threaded_mode_enabled); + + cs_queue_init (&instance->new_message_queue_trans, + MESSAGE_QUEUE_MAX, + sizeof (struct message_item), instance->threaded_mode_enabled); + + totemsrp_callback_token_create (instance, + &instance->token_recv_event_handle, + TOTEM_CALLBACK_TOKEN_RECEIVED, + 0, + token_event_stats_collector, + instance); + totemsrp_callback_token_create (instance, + &instance->token_sent_event_handle, + TOTEM_CALLBACK_TOKEN_SENT, + 0, + token_event_stats_collector, + instance); + *srp_context = instance; + return (0); + +error_exit: + return (-1); +} + +void totemsrp_finalize ( + void *srp_context) +{ + struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; + + memb_leave_message_send (instance); + totemnet_finalize (instance->totemnet_context); + cs_queue_free (&instance->new_message_queue); + cs_queue_free (&instance->new_message_queue_trans); + cs_queue_free (&instance->retrans_message_queue); + sq_free (&instance->regular_sort_queue); + sq_free (&instance->recovery_sort_queue); + free (instance); +} + +int totemsrp_nodestatus_get ( + void *srp_context, + unsigned int nodeid, + struct totem_node_status *node_status) +{ + struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; + int i; + + node_status->version = TOTEM_NODE_STATUS_STRUCTURE_VERSION; + + /* Fill in 'reachable' here as the lower level UDP[u] layers don't know */ + for (i = 0; i < instance->my_proc_list_entries; i++) { + if (instance->my_proc_list[i].nodeid == nodeid) { + node_status->reachable = 1; + } + } + + return totemnet_nodestatus_get(instance->totemnet_context, nodeid, node_status); +} + + +/* + * Return configured interfaces. interfaces is array of totem_ip addresses allocated by caller, + * with interaces_size number of items. iface_count is final number of interfaces filled by this + * function. + * + * Function returns 0 on success, otherwise if interfaces array is not big enough, -2 is returned, + * and if interface was not found, -1 is returned. + */ +int totemsrp_ifaces_get ( + void *srp_context, + unsigned int nodeid, + unsigned int *interface_id, + struct totem_ip_address *interfaces, + unsigned int interfaces_size, + char ***status, + unsigned int *iface_count) +{ + struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; + struct totem_ip_address *iface_ptr = interfaces; + int res = 0; + int i,n; + int num_ifs = 0; + + memset(interfaces, 0, sizeof(struct totem_ip_address) * interfaces_size); + *iface_count = INTERFACE_MAX; + + for (i=0; i<INTERFACE_MAX; i++) { + for (n=0; n < instance->totem_config->interfaces[i].member_count; n++) { + if (instance->totem_config->interfaces[i].configured && + instance->totem_config->interfaces[i].member_list[n].nodeid == nodeid) { + memcpy(iface_ptr, &instance->totem_config->interfaces[i].member_list[n], sizeof(struct totem_ip_address)); + interface_id[num_ifs] = i; + iface_ptr++; + if (++num_ifs > interfaces_size) { + res = -2; + break; + } + } + } + } + + totemnet_ifaces_get(instance->totemnet_context, status, iface_count); + *iface_count = num_ifs; + return (res); +} + +int totemsrp_crypto_set ( + void *srp_context, + const char *cipher_type, + const char *hash_type) +{ + struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; + int res; + + res = totemnet_crypto_set(instance->totemnet_context, cipher_type, hash_type); + + return (res); +} + + +unsigned int totemsrp_my_nodeid_get ( + void *srp_context) +{ + struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; + unsigned int res; + + res = instance->my_id.nodeid; + + return (res); +} + +int totemsrp_my_family_get ( + void *srp_context) +{ + struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; + int res; + + res = instance->totem_config->interfaces[instance->lowest_active_if].boundto.family; + + return (res); +} + + +/* + * Set operations for use by the membership algorithm + */ +static int srp_addr_equal (const struct srp_addr *a, const struct srp_addr *b) +{ + if (a->nodeid == b->nodeid) { + return 1; + } + return 0; +} + +static void srp_addr_to_nodeid ( + struct totemsrp_instance *instance, + unsigned int *nodeid_out, + struct srp_addr *srp_addr_in, + unsigned int entries) +{ + unsigned int i; + + for (i = 0; i < entries; i++) { + nodeid_out[i] = srp_addr_in[i].nodeid; + } +} + +static struct srp_addr srp_addr_endian_convert (struct srp_addr in) +{ + struct srp_addr res; + + res.nodeid = swab32 (in.nodeid); + + return (res); +} + +static void memb_consensus_reset (struct totemsrp_instance *instance) +{ + instance->consensus_list_entries = 0; +} + +static void memb_set_subtract ( + struct srp_addr *out_list, int *out_list_entries, + struct srp_addr *one_list, int one_list_entries, + struct srp_addr *two_list, int two_list_entries) +{ + int found = 0; + int i; + int j; + + *out_list_entries = 0; + + for (i = 0; i < one_list_entries; i++) { + for (j = 0; j < two_list_entries; j++) { + if (srp_addr_equal (&one_list[i], &two_list[j])) { + found = 1; + break; + } + } + if (found == 0) { + out_list[*out_list_entries] = one_list[i]; + *out_list_entries = *out_list_entries + 1; + } + found = 0; + } +} + +/* + * Set consensus for a specific processor + */ +static void memb_consensus_set ( + struct totemsrp_instance *instance, + const struct srp_addr *addr) +{ + int found = 0; + int i; + + for (i = 0; i < instance->consensus_list_entries; i++) { + if (srp_addr_equal(addr, &instance->consensus_list[i].addr)) { + found = 1; + break; /* found entry */ + } + } + instance->consensus_list[i].addr = *addr; + instance->consensus_list[i].set = 1; + if (found == 0) { + instance->consensus_list_entries++; + } + return; +} + +/* + * Is consensus set for a specific processor + */ +static int memb_consensus_isset ( + struct totemsrp_instance *instance, + const struct srp_addr *addr) +{ + int i; + + for (i = 0; i < instance->consensus_list_entries; i++) { + if (srp_addr_equal (addr, &instance->consensus_list[i].addr)) { + return (instance->consensus_list[i].set); + } + } + return (0); +} + +/* + * Is consensus agreed upon based upon consensus database + */ +static int memb_consensus_agreed ( + struct totemsrp_instance *instance) +{ + struct srp_addr token_memb[PROCESSOR_COUNT_MAX]; + int token_memb_entries = 0; + int agreed = 1; + int i; + + memb_set_subtract (token_memb, &token_memb_entries, + instance->my_proc_list, instance->my_proc_list_entries, + instance->my_failed_list, instance->my_failed_list_entries); + + for (i = 0; i < token_memb_entries; i++) { + if (memb_consensus_isset (instance, &token_memb[i]) == 0) { + agreed = 0; + break; + } + } + + if (agreed && instance->failed_to_recv == 1) { + /* + * Both nodes agreed on our failure. We don't care how many proc list items left because we + * will create single ring anyway. + */ + + return (agreed); + } + + assert (token_memb_entries >= 1); + + return (agreed); +} + +static void memb_consensus_notset ( + struct totemsrp_instance *instance, + struct srp_addr *no_consensus_list, + int *no_consensus_list_entries, + struct srp_addr *comparison_list, + int comparison_list_entries) +{ + int i; + + *no_consensus_list_entries = 0; + + for (i = 0; i < instance->my_proc_list_entries; i++) { + if (memb_consensus_isset (instance, &instance->my_proc_list[i]) == 0) { + no_consensus_list[*no_consensus_list_entries] = instance->my_proc_list[i]; + *no_consensus_list_entries = *no_consensus_list_entries + 1; + } + } +} + +/* + * Is set1 equal to set2 Entries can be in different orders + */ +static int memb_set_equal ( + struct srp_addr *set1, int set1_entries, + struct srp_addr *set2, int set2_entries) +{ + int i; + int j; + + int found = 0; + + if (set1_entries != set2_entries) { + return (0); + } + for (i = 0; i < set2_entries; i++) { + for (j = 0; j < set1_entries; j++) { + if (srp_addr_equal (&set1[j], &set2[i])) { + found = 1; + break; + } + } + if (found == 0) { + return (0); + } + found = 0; + } + return (1); +} + +/* + * Is subset fully contained in fullset + */ +static int memb_set_subset ( + const struct srp_addr *subset, int subset_entries, + const struct srp_addr *fullset, int fullset_entries) +{ + int i; + int j; + int found = 0; + + if (subset_entries > fullset_entries) { + return (0); + } + for (i = 0; i < subset_entries; i++) { + for (j = 0; j < fullset_entries; j++) { + if (srp_addr_equal (&subset[i], &fullset[j])) { + found = 1; + } + } + if (found == 0) { + return (0); + } + found = 0; + } + return (1); +} +/* + * merge subset into fullset taking care not to add duplicates + */ +static void memb_set_merge ( + const struct srp_addr *subset, int subset_entries, + struct srp_addr *fullset, int *fullset_entries) +{ + int found = 0; + int i; + int j; + + for (i = 0; i < subset_entries; i++) { + for (j = 0; j < *fullset_entries; j++) { + if (srp_addr_equal (&fullset[j], &subset[i])) { + found = 1; + break; + } + } + if (found == 0) { + fullset[*fullset_entries] = subset[i]; + *fullset_entries = *fullset_entries + 1; + } + found = 0; + } + return; +} + +static void memb_set_and_with_ring_id ( + struct srp_addr *set1, + struct memb_ring_id *set1_ring_ids, + int set1_entries, + struct srp_addr *set2, + int set2_entries, + struct memb_ring_id *old_ring_id, + struct srp_addr *and, + int *and_entries) +{ + int i; + int j; + int found = 0; + + *and_entries = 0; + + for (i = 0; i < set2_entries; i++) { + for (j = 0; j < set1_entries; j++) { + if (srp_addr_equal (&set1[j], &set2[i])) { + if (memcmp (&set1_ring_ids[j], old_ring_id, sizeof (struct memb_ring_id)) == 0) { + found = 1; + } + break; + } + } + if (found) { + and[*and_entries] = set1[j]; + *and_entries = *and_entries + 1; + } + found = 0; + } + return; +} + +static void memb_set_log( + struct totemsrp_instance *instance, + int level, + const char *string, + struct srp_addr *list, + int list_entries) +{ + char int_buf[32]; + char list_str[512]; + int i; + + memset(list_str, 0, sizeof(list_str)); + + for (i = 0; i < list_entries; i++) { + if (i == 0) { + snprintf(int_buf, sizeof(int_buf), CS_PRI_NODE_ID, list[i].nodeid); + } else { + snprintf(int_buf, sizeof(int_buf), "," CS_PRI_NODE_ID, list[i].nodeid); + } + + if (strlen(list_str) + strlen(int_buf) >= sizeof(list_str)) { + break ; + } + strcat(list_str, int_buf); + } + + log_printf(level, "List '%s' contains %d entries: %s", string, list_entries, list_str); +} + +static void my_leave_memb_clear( + struct totemsrp_instance *instance) +{ + memset(instance->my_leave_memb_list, 0, sizeof(instance->my_leave_memb_list)); + instance->my_leave_memb_entries = 0; +} + +static unsigned int my_leave_memb_match( + struct totemsrp_instance *instance, + unsigned int nodeid) +{ + int i; + unsigned int ret = 0; + + for (i = 0; i < instance->my_leave_memb_entries; i++){ + if (instance->my_leave_memb_list[i] == nodeid){ + ret = nodeid; + break; + } + } + return ret; +} + +static void my_leave_memb_set( + struct totemsrp_instance *instance, + unsigned int nodeid) +{ + int i, found = 0; + for (i = 0; i < instance->my_leave_memb_entries; i++){ + if (instance->my_leave_memb_list[i] == nodeid){ + found = 1; + break; + } + } + if (found == 1) { + return; + } + if (instance->my_leave_memb_entries < (PROCESSOR_COUNT_MAX - 1)) { + instance->my_leave_memb_list[instance->my_leave_memb_entries] = nodeid; + instance->my_leave_memb_entries++; + } else { + log_printf (instance->totemsrp_log_level_warning, + "Cannot set LEAVE nodeid=" CS_PRI_NODE_ID, nodeid); + } +} + + +static void *totemsrp_buffer_alloc (struct totemsrp_instance *instance) +{ + assert (instance != NULL); + return totemnet_buffer_alloc (instance->totemnet_context); +} + +static void totemsrp_buffer_release (struct totemsrp_instance *instance, void *ptr) +{ + assert (instance != NULL); + totemnet_buffer_release (instance->totemnet_context, ptr); +} + +static void reset_token_retransmit_timeout (struct totemsrp_instance *instance) +{ + int32_t res; + + qb_loop_timer_del (instance->totemsrp_poll_handle, + instance->timer_orf_token_retransmit_timeout); + res = qb_loop_timer_add (instance->totemsrp_poll_handle, + QB_LOOP_MED, + instance->totem_config->token_retransmit_timeout*QB_TIME_NS_IN_MSEC, + (void *)instance, + timer_function_token_retransmit_timeout, + &instance->timer_orf_token_retransmit_timeout); + if (res != 0) { + log_printf(instance->totemsrp_log_level_error, "reset_token_retransmit_timeout - qb_loop_timer_add error : %d", res); + } + +} + +static void start_merge_detect_timeout (struct totemsrp_instance *instance) +{ + int32_t res; + + if (instance->my_merge_detect_timeout_outstanding == 0) { + res = qb_loop_timer_add (instance->totemsrp_poll_handle, + QB_LOOP_MED, + instance->totem_config->merge_timeout*QB_TIME_NS_IN_MSEC, + (void *)instance, + timer_function_merge_detect_timeout, + &instance->timer_merge_detect_timeout); + if (res != 0) { + log_printf(instance->totemsrp_log_level_error, "start_merge_detect_timeout - qb_loop_timer_add error : %d", res); + } + + instance->my_merge_detect_timeout_outstanding = 1; + } +} + +static void cancel_merge_detect_timeout (struct totemsrp_instance *instance) +{ + qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_merge_detect_timeout); + instance->my_merge_detect_timeout_outstanding = 0; +} + +/* + * ring_state_* is used to save and restore the sort queue + * state when a recovery operation fails (and enters gather) + */ +static void old_ring_state_save (struct totemsrp_instance *instance) +{ + if (instance->old_ring_state_saved == 0) { + instance->old_ring_state_saved = 1; + memcpy (&instance->my_old_ring_id, &instance->my_ring_id, + sizeof (struct memb_ring_id)); + instance->old_ring_state_aru = instance->my_aru; + instance->old_ring_state_high_seq_received = instance->my_high_seq_received; + log_printf (instance->totemsrp_log_level_debug, + "Saving state aru %x high seq received %x", + instance->my_aru, instance->my_high_seq_received); + } +} + +static void old_ring_state_restore (struct totemsrp_instance *instance) +{ + instance->my_aru = instance->old_ring_state_aru; + instance->my_high_seq_received = instance->old_ring_state_high_seq_received; + log_printf (instance->totemsrp_log_level_debug, + "Restoring instance->my_aru %x my high seq received %x", + instance->my_aru, instance->my_high_seq_received); +} + +static void old_ring_state_reset (struct totemsrp_instance *instance) +{ + log_printf (instance->totemsrp_log_level_debug, + "Resetting old ring state"); + instance->old_ring_state_saved = 0; +} + +static void reset_pause_timeout (struct totemsrp_instance *instance) +{ + int32_t res; + + qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_pause_timeout); + res = qb_loop_timer_add (instance->totemsrp_poll_handle, + QB_LOOP_MED, + instance->totem_config->token_timeout * QB_TIME_NS_IN_MSEC / 5, + (void *)instance, + timer_function_pause_timeout, + &instance->timer_pause_timeout); + if (res != 0) { + log_printf(instance->totemsrp_log_level_error, "reset_pause_timeout - qb_loop_timer_add error : %d", res); + } +} + +static void reset_token_warning (struct totemsrp_instance *instance) { + int32_t res; + + qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_warning); + res = qb_loop_timer_add (instance->totemsrp_poll_handle, + QB_LOOP_MED, + instance->totem_config->token_warning * instance->totem_config->token_timeout / 100 * QB_TIME_NS_IN_MSEC, + (void *)instance, + timer_function_orf_token_warning, + &instance->timer_orf_token_warning); + if (res != 0) { + log_printf(instance->totemsrp_log_level_error, "reset_token_warning - qb_loop_timer_add error : %d", res); + } +} + +static void reset_token_timeout (struct totemsrp_instance *instance) { + int32_t res; + + qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_timeout); + res = qb_loop_timer_add (instance->totemsrp_poll_handle, + QB_LOOP_MED, + instance->totem_config->token_timeout*QB_TIME_NS_IN_MSEC, + (void *)instance, + timer_function_orf_token_timeout, + &instance->timer_orf_token_timeout); + if (res != 0) { + log_printf(instance->totemsrp_log_level_error, "reset_token_timeout - qb_loop_timer_add error : %d", res); + } + + if (instance->totem_config->token_warning) + reset_token_warning(instance); +} + +static void reset_heartbeat_timeout (struct totemsrp_instance *instance) { + int32_t res; + + qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_heartbeat_timeout); + res = qb_loop_timer_add (instance->totemsrp_poll_handle, + QB_LOOP_MED, + instance->heartbeat_timeout*QB_TIME_NS_IN_MSEC, + (void *)instance, + timer_function_heartbeat_timeout, + &instance->timer_heartbeat_timeout); + if (res != 0) { + log_printf(instance->totemsrp_log_level_error, "reset_heartbeat_timeout - qb_loop_timer_add error : %d", res); + } +} + + +static void cancel_token_warning (struct totemsrp_instance *instance) { + qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_warning); +} + +static void cancel_token_timeout (struct totemsrp_instance *instance) { + qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_timeout); + + if (instance->totem_config->token_warning) + cancel_token_warning(instance); +} + +static void cancel_heartbeat_timeout (struct totemsrp_instance *instance) { + qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_heartbeat_timeout); +} + +static void cancel_token_retransmit_timeout (struct totemsrp_instance *instance) +{ + qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_retransmit_timeout); +} + +static void start_token_hold_retransmit_timeout (struct totemsrp_instance *instance) +{ + int32_t res; + + res = qb_loop_timer_add (instance->totemsrp_poll_handle, + QB_LOOP_MED, + instance->totem_config->token_hold_timeout*QB_TIME_NS_IN_MSEC, + (void *)instance, + timer_function_token_hold_retransmit_timeout, + &instance->timer_orf_token_hold_retransmit_timeout); + if (res != 0) { + log_printf(instance->totemsrp_log_level_error, "start_token_hold_retransmit_timeout - qb_loop_timer_add error : %d", res); + } +} + +static void cancel_token_hold_retransmit_timeout (struct totemsrp_instance *instance) +{ + qb_loop_timer_del (instance->totemsrp_poll_handle, + instance->timer_orf_token_hold_retransmit_timeout); +} + +static void memb_state_consensus_timeout_expired ( + struct totemsrp_instance *instance) +{ + struct srp_addr no_consensus_list[PROCESSOR_COUNT_MAX]; + int no_consensus_list_entries; + + instance->stats.consensus_timeouts++; + if (memb_consensus_agreed (instance)) { + memb_consensus_reset (instance); + + memb_consensus_set (instance, &instance->my_id); + + reset_token_timeout (instance); // REVIEWED + } else { + memb_consensus_notset ( + instance, + no_consensus_list, + &no_consensus_list_entries, + instance->my_proc_list, + instance->my_proc_list_entries); + + memb_set_merge (no_consensus_list, no_consensus_list_entries, + instance->my_failed_list, &instance->my_failed_list_entries); + memb_state_gather_enter (instance, TOTEMSRP_GSFROM_CONSENSUS_TIMEOUT); + } +} + +static void memb_join_message_send (struct totemsrp_instance *instance); + +static void memb_merge_detect_transmit (struct totemsrp_instance *instance); + +/* + * Timers used for various states of the membership algorithm + */ +static void timer_function_pause_timeout (void *data) +{ + struct totemsrp_instance *instance = data; + + instance->pause_timestamp = qb_util_nano_current_get (); + reset_pause_timeout (instance); +} + +static void memb_recovery_state_token_loss (struct totemsrp_instance *instance) +{ + old_ring_state_restore (instance); + memb_state_gather_enter (instance, TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_RECOVERY_STATE); + instance->stats.recovery_token_lost++; +} + +static void timer_function_orf_token_warning (void *data) +{ + struct totemsrp_instance *instance = data; + uint64_t tv_diff; + + /* need to protect against the case where token_warning is set to 0 dynamically */ + if (instance->totem_config->token_warning) { + tv_diff = qb_util_nano_current_get () / QB_TIME_NS_IN_MSEC - + instance->stats.token[instance->stats.latest_token].rx; + log_printf (instance->totemsrp_log_level_notice, + "Token has not been received in %d ms ", (unsigned int) tv_diff); + reset_token_warning(instance); + } else { + cancel_token_warning(instance); + } +} + +static void timer_function_orf_token_timeout (void *data) +{ + struct totemsrp_instance *instance = data; + + switch (instance->memb_state) { + case MEMB_STATE_OPERATIONAL: + log_printf (instance->totemsrp_log_level_debug, + "The token was lost in the OPERATIONAL state."); + log_printf (instance->totemsrp_log_level_notice, + "A processor failed, forming new configuration:" + " token timed out (%ums), waiting %ums for consensus.", + instance->totem_config->token_timeout, + instance->totem_config->consensus_timeout); + totemnet_iface_check (instance->totemnet_context); + memb_state_gather_enter (instance, TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_OPERATIONAL_STATE); + instance->stats.operational_token_lost++; + break; + + case MEMB_STATE_GATHER: + log_printf (instance->totemsrp_log_level_debug, + "The consensus timeout expired (%ums).", + instance->totem_config->consensus_timeout); + memb_state_consensus_timeout_expired (instance); + memb_state_gather_enter (instance, TOTEMSRP_GSFROM_THE_CONSENSUS_TIMEOUT_EXPIRED); + instance->stats.gather_token_lost++; + break; + + case MEMB_STATE_COMMIT: + log_printf (instance->totemsrp_log_level_debug, + "The token was lost in the COMMIT state."); + memb_state_gather_enter (instance, TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_COMMIT_STATE); + instance->stats.commit_token_lost++; + break; + + case MEMB_STATE_RECOVERY: + log_printf (instance->totemsrp_log_level_debug, + "The token was lost in the RECOVERY state."); + memb_recovery_state_token_loss (instance); + instance->orf_token_discard = 1; + break; + } +} + +static void timer_function_heartbeat_timeout (void *data) +{ + struct totemsrp_instance *instance = data; + log_printf (instance->totemsrp_log_level_debug, + "HeartBeat Timer expired Invoking token loss mechanism in state %d ", instance->memb_state); + timer_function_orf_token_timeout(data); +} + +static void memb_timer_function_state_gather (void *data) +{ + struct totemsrp_instance *instance = data; + int32_t res; + + switch (instance->memb_state) { + case MEMB_STATE_OPERATIONAL: + case MEMB_STATE_RECOVERY: + assert (0); /* this should never happen */ + break; + case MEMB_STATE_GATHER: + case MEMB_STATE_COMMIT: + memb_join_message_send (instance); + + /* + * Restart the join timeout + `*/ + qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_join_timeout); + + res = qb_loop_timer_add (instance->totemsrp_poll_handle, + QB_LOOP_MED, + instance->totem_config->join_timeout*QB_TIME_NS_IN_MSEC, + (void *)instance, + memb_timer_function_state_gather, + &instance->memb_timer_state_gather_join_timeout); + + if (res != 0) { + log_printf(instance->totemsrp_log_level_error, "memb_timer_function_state_gather - qb_loop_timer_add error : %d", res); + } + break; + } +} + +static void memb_timer_function_gather_consensus_timeout (void *data) +{ + struct totemsrp_instance *instance = data; + memb_state_consensus_timeout_expired (instance); +} + +static void deliver_messages_from_recovery_to_regular (struct totemsrp_instance *instance) +{ + unsigned int i; + struct sort_queue_item *recovery_message_item; + struct sort_queue_item regular_message_item; + unsigned int range = 0; + int res; + void *ptr; + struct mcast *mcast; + + log_printf (instance->totemsrp_log_level_debug, + "recovery to regular %x-%x", SEQNO_START_MSG + 1, instance->my_aru); + + range = instance->my_aru - SEQNO_START_MSG; + /* + * Move messages from recovery to regular sort queue + */ +// todo should i be initialized to 0 or 1 ? + for (i = 1; i <= range; i++) { + res = sq_item_get (&instance->recovery_sort_queue, + i + SEQNO_START_MSG, &ptr); + if (res != 0) { + continue; + } + recovery_message_item = ptr; + + /* + * Convert recovery message into regular message + */ + mcast = recovery_message_item->mcast; + if (mcast->header.encapsulated == MESSAGE_ENCAPSULATED) { + /* + * Message is a recovery message encapsulated + * in a new ring message + */ + regular_message_item.mcast = + (struct mcast *)(((char *)recovery_message_item->mcast) + sizeof (struct mcast)); + regular_message_item.msg_len = + recovery_message_item->msg_len - sizeof (struct mcast); + mcast = regular_message_item.mcast; + } else { + /* + * TODO this case shouldn't happen + */ + continue; + } + + log_printf (instance->totemsrp_log_level_debug, + "comparing if ring id is for this processors old ring seqno " CS_PRI_RING_ID_SEQ, + (uint64_t)mcast->seq); + + /* + * Only add this message to the regular sort + * queue if it was originated with the same ring + * id as the previous ring + */ + if (memcmp (&instance->my_old_ring_id, &mcast->ring_id, + sizeof (struct memb_ring_id)) == 0) { + + res = sq_item_inuse (&instance->regular_sort_queue, mcast->seq); + if (res == 0) { + sq_item_add (&instance->regular_sort_queue, + ®ular_message_item, mcast->seq); + if (sq_lt_compare (instance->old_ring_state_high_seq_received, mcast->seq)) { + instance->old_ring_state_high_seq_received = mcast->seq; + } + } + } else { + log_printf (instance->totemsrp_log_level_debug, + "-not adding msg with seq no " CS_PRI_RING_ID_SEQ, (uint64_t)mcast->seq); + } + } +} + +/* + * Change states in the state machine of the membership algorithm + */ +static void memb_state_operational_enter (struct totemsrp_instance *instance) +{ + struct srp_addr joined_list[PROCESSOR_COUNT_MAX]; + int joined_list_entries = 0; + unsigned int aru_save; + unsigned int joined_list_totemip[PROCESSOR_COUNT_MAX]; + unsigned int trans_memb_list_totemip[PROCESSOR_COUNT_MAX]; + unsigned int new_memb_list_totemip[PROCESSOR_COUNT_MAX]; + unsigned int left_list[PROCESSOR_COUNT_MAX]; + unsigned int i; + unsigned int res; + char left_node_msg[1024]; + char joined_node_msg[1024]; + char failed_node_msg[1024]; + + instance->originated_orf_token = 0; + + memb_consensus_reset (instance); + + old_ring_state_reset (instance); + + deliver_messages_from_recovery_to_regular (instance); + + log_printf (instance->totemsrp_log_level_trace, + "Delivering to app %x to %x", + instance->my_high_delivered + 1, instance->old_ring_state_high_seq_received); + + aru_save = instance->my_aru; + instance->my_aru = instance->old_ring_state_aru; + + messages_deliver_to_app (instance, 0, instance->old_ring_state_high_seq_received); + + /* + * Calculate joined and left list + */ + memb_set_subtract (instance->my_left_memb_list, + &instance->my_left_memb_entries, + instance->my_memb_list, instance->my_memb_entries, + instance->my_trans_memb_list, instance->my_trans_memb_entries); + + memb_set_subtract (joined_list, &joined_list_entries, + instance->my_new_memb_list, instance->my_new_memb_entries, + instance->my_trans_memb_list, instance->my_trans_memb_entries); + + /* + * Install new membership + */ + instance->my_memb_entries = instance->my_new_memb_entries; + memcpy (&instance->my_memb_list, instance->my_new_memb_list, + sizeof (struct srp_addr) * instance->my_memb_entries); + instance->last_released = 0; + instance->my_set_retrans_flg = 0; + + /* + * Deliver transitional configuration to application + */ + srp_addr_to_nodeid (instance, left_list, instance->my_left_memb_list, + instance->my_left_memb_entries); + srp_addr_to_nodeid (instance, trans_memb_list_totemip, + instance->my_trans_memb_list, instance->my_trans_memb_entries); + instance->totemsrp_confchg_fn (TOTEM_CONFIGURATION_TRANSITIONAL, + trans_memb_list_totemip, instance->my_trans_memb_entries, + left_list, instance->my_left_memb_entries, + 0, 0, &instance->my_ring_id); + /* + * Switch new totemsrp messages queue. Messages sent from now on are stored + * in different queue so synchronization messages are delivered first. Totempg + * buffers will be switched later. + */ + instance->waiting_trans_ack = 1; + +// TODO we need to filter to ensure we only deliver those +// messages which are part of instance->my_deliver_memb + messages_deliver_to_app (instance, 1, instance->old_ring_state_high_seq_received); + + /* + * Switch totempg buffers. This used to be right after + * instance->waiting_trans_ack = 1; + * line. This was causing problem, because there may be not yet + * processed parts of messages in totempg buffers. + * So when buffers were switched and recovered messages + * got delivered it was not possible to assemble them. + */ + instance->totemsrp_waiting_trans_ack_cb_fn (1); + + instance->my_aru = aru_save; + + /* + * Deliver regular configuration to application + */ + srp_addr_to_nodeid (instance, new_memb_list_totemip, + instance->my_new_memb_list, instance->my_new_memb_entries); + srp_addr_to_nodeid (instance, joined_list_totemip, joined_list, + joined_list_entries); + instance->totemsrp_confchg_fn (TOTEM_CONFIGURATION_REGULAR, + new_memb_list_totemip, instance->my_new_memb_entries, + 0, 0, + joined_list_totemip, joined_list_entries, &instance->my_ring_id); + + /* + * The recovery sort queue now becomes the regular + * sort queue. It is necessary to copy the state + * into the regular sort queue. + */ + sq_copy (&instance->regular_sort_queue, &instance->recovery_sort_queue); + instance->my_last_aru = SEQNO_START_MSG; + + /* When making my_proc_list smaller, ensure that the + * now non-used entries are zero-ed out. There are some suspect + * assert's that assume that there is always 2 entries in the list. + * These fail when my_proc_list is reduced to 1 entry (and the + * valid [0] entry is the same as the 'unused' [1] entry). + */ + memset(instance->my_proc_list, 0, + sizeof (struct srp_addr) * instance->my_proc_list_entries); + + instance->my_proc_list_entries = instance->my_new_memb_entries; + memcpy (instance->my_proc_list, instance->my_new_memb_list, + sizeof (struct srp_addr) * instance->my_memb_entries); + + instance->my_failed_list_entries = 0; + /* + * TODO Not exactly to spec + * + * At the entry to this function all messages without a gap are + * deliered. + * + * This code throw away messages from the last gap in the sort queue + * to my_high_seq_received + * + * What should really happen is we should deliver all messages up to + * a gap, then delier the transitional configuration, then deliver + * the messages between the first gap and my_high_seq_received, then + * deliver a regular configuration, then deliver the regular + * configuration + * + * Unfortunately totempg doesn't appear to like this operating mode + * which needs more inspection + */ + i = instance->my_high_seq_received + 1; + do { + void *ptr; + + i -= 1; + res = sq_item_get (&instance->regular_sort_queue, i, &ptr); + if (i == 0) { + break; + } + } while (res); + + instance->my_high_delivered = i; + + for (i = 0; i <= instance->my_high_delivered; i++) { + void *ptr; + + res = sq_item_get (&instance->regular_sort_queue, i, &ptr); + if (res == 0) { + struct sort_queue_item *regular_message; + + regular_message = ptr; + free (regular_message->mcast); + } + } + sq_items_release (&instance->regular_sort_queue, instance->my_high_delivered); + instance->last_released = instance->my_high_delivered; + + if (joined_list_entries) { + int sptr = 0; + sptr += snprintf(joined_node_msg, sizeof(joined_node_msg)-sptr, " joined:"); + for (i=0; i< joined_list_entries; i++) { + sptr += snprintf(joined_node_msg+sptr, sizeof(joined_node_msg)-sptr, " " CS_PRI_NODE_ID, joined_list_totemip[i]); + } + } + else { + joined_node_msg[0] = '\0'; + } + + if (instance->my_left_memb_entries) { + int sptr = 0; + int sptr2 = 0; + sptr += snprintf(left_node_msg, sizeof(left_node_msg)-sptr, " left:"); + for (i=0; i< instance->my_left_memb_entries; i++) { + sptr += snprintf(left_node_msg+sptr, sizeof(left_node_msg)-sptr, " " CS_PRI_NODE_ID, left_list[i]); + } + for (i=0; i< instance->my_left_memb_entries; i++) { + if (my_leave_memb_match(instance, left_list[i]) == 0) { + if (sptr2 == 0) { + sptr2 += snprintf(failed_node_msg, sizeof(failed_node_msg)-sptr2, " failed:"); + } + sptr2 += snprintf(failed_node_msg+sptr2, sizeof(left_node_msg)-sptr2, " " CS_PRI_NODE_ID, left_list[i]); + } + } + if (sptr2 == 0) { + failed_node_msg[0] = '\0'; + } + } + else { + left_node_msg[0] = '\0'; + failed_node_msg[0] = '\0'; + } + + my_leave_memb_clear(instance); + + log_printf (instance->totemsrp_log_level_debug, + "entering OPERATIONAL state."); + log_printf (instance->totemsrp_log_level_notice, + "A new membership (" CS_PRI_RING_ID ") was formed. Members%s%s", + instance->my_ring_id.rep, + (uint64_t)instance->my_ring_id.seq, + joined_node_msg, + left_node_msg); + + if (strlen(failed_node_msg)) { + log_printf (instance->totemsrp_log_level_notice, + "Failed to receive the leave message.%s", + failed_node_msg); + } + + instance->memb_state = MEMB_STATE_OPERATIONAL; + + instance->stats.operational_entered++; + instance->stats.continuous_gather = 0; + + instance->my_received_flg = 1; + + reset_pause_timeout (instance); + + /* + * Save ring id information from this configuration to determine + * which processors are transitioning from old regular configuration + * in to new regular configuration on the next configuration change + */ + memcpy (&instance->my_old_ring_id, &instance->my_ring_id, + sizeof (struct memb_ring_id)); + + return; +} + +static void memb_state_gather_enter ( + struct totemsrp_instance *instance, + enum gather_state_from gather_from) +{ + int32_t res; + + instance->orf_token_discard = 1; + + instance->originated_orf_token = 0; + + memb_set_merge ( + &instance->my_id, 1, + instance->my_proc_list, &instance->my_proc_list_entries); + + memb_join_message_send (instance); + + /* + * Restart the join timeout + */ + qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_join_timeout); + + res = qb_loop_timer_add (instance->totemsrp_poll_handle, + QB_LOOP_MED, + instance->totem_config->join_timeout*QB_TIME_NS_IN_MSEC, + (void *)instance, + memb_timer_function_state_gather, + &instance->memb_timer_state_gather_join_timeout); + if (res != 0) { + log_printf(instance->totemsrp_log_level_error, "memb_state_gather_enter - qb_loop_timer_add error(1) : %d", res); + } + + /* + * Restart the consensus timeout + */ + qb_loop_timer_del (instance->totemsrp_poll_handle, + instance->memb_timer_state_gather_consensus_timeout); + + res = qb_loop_timer_add (instance->totemsrp_poll_handle, + QB_LOOP_MED, + instance->totem_config->consensus_timeout*QB_TIME_NS_IN_MSEC, + (void *)instance, + memb_timer_function_gather_consensus_timeout, + &instance->memb_timer_state_gather_consensus_timeout); + if (res != 0) { + log_printf(instance->totemsrp_log_level_error, "memb_state_gather_enter - qb_loop_timer_add error(2) : %d", res); + } + + /* + * Cancel the token loss and token retransmission timeouts + */ + cancel_token_retransmit_timeout (instance); // REVIEWED + cancel_token_timeout (instance); // REVIEWED + cancel_merge_detect_timeout (instance); + + memb_consensus_reset (instance); + + memb_consensus_set (instance, &instance->my_id); + + log_printf (instance->totemsrp_log_level_debug, + "entering GATHER state from %d(%s).", + gather_from, gsfrom_to_msg(gather_from)); + + instance->memb_state = MEMB_STATE_GATHER; + instance->stats.gather_entered++; + + if (gather_from == TOTEMSRP_GSFROM_THE_CONSENSUS_TIMEOUT_EXPIRED) { + /* + * State 3 means gather, so we are continuously gathering. + */ + instance->stats.continuous_gather++; + } + + return; +} + +static void timer_function_token_retransmit_timeout (void *data); + +static void target_set_completed ( + void *context) +{ + struct totemsrp_instance *instance = (struct totemsrp_instance *)context; + + memb_state_commit_token_send (instance); + +} + +static void memb_state_commit_enter ( + struct totemsrp_instance *instance) +{ + old_ring_state_save (instance); + + memb_state_commit_token_update (instance); + + memb_state_commit_token_target_set (instance); + + qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_join_timeout); + + instance->memb_timer_state_gather_join_timeout = 0; + + qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_consensus_timeout); + + instance->memb_timer_state_gather_consensus_timeout = 0; + + memb_ring_id_set (instance, &instance->commit_token->ring_id); + + instance->memb_ring_id_store (&instance->my_ring_id, instance->my_id.nodeid); + + instance->token_ring_id_seq = instance->my_ring_id.seq; + + log_printf (instance->totemsrp_log_level_debug, + "entering COMMIT state."); + + instance->memb_state = MEMB_STATE_COMMIT; + reset_token_retransmit_timeout (instance); // REVIEWED + reset_token_timeout (instance); // REVIEWED + + instance->stats.commit_entered++; + instance->stats.continuous_gather = 0; + + /* + * reset all flow control variables since we are starting a new ring + */ + instance->my_trc = 0; + instance->my_pbl = 0; + instance->my_cbl = 0; + /* + * commit token sent after callback that token target has been set + */ +} + +static void memb_state_recovery_enter ( + struct totemsrp_instance *instance, + struct memb_commit_token *commit_token) +{ + int i; + int local_received_flg = 1; + unsigned int low_ring_aru; + unsigned int range = 0; + unsigned int messages_originated = 0; + const struct srp_addr *addr; + struct memb_commit_token_memb_entry *memb_list; + struct memb_ring_id my_new_memb_ring_id_list[PROCESSOR_COUNT_MAX]; + + addr = (const struct srp_addr *)commit_token->end_of_commit_token; + memb_list = (struct memb_commit_token_memb_entry *)(addr + commit_token->addr_entries); + + log_printf (instance->totemsrp_log_level_debug, + "entering RECOVERY state."); + + instance->orf_token_discard = 0; + + instance->my_high_ring_delivered = 0; + + sq_reinit (&instance->recovery_sort_queue, SEQNO_START_MSG); + cs_queue_reinit (&instance->retrans_message_queue); + + low_ring_aru = instance->old_ring_state_high_seq_received; + + memb_state_commit_token_send_recovery (instance, commit_token); + + instance->my_token_seq = SEQNO_START_TOKEN - 1; + + /* + * Build regular configuration + */ + totemnet_processor_count_set ( + instance->totemnet_context, + commit_token->addr_entries); + + /* + * Build transitional configuration + */ + for (i = 0; i < instance->my_new_memb_entries; i++) { + memcpy (&my_new_memb_ring_id_list[i], + &memb_list[i].ring_id, + sizeof (struct memb_ring_id)); + } + memb_set_and_with_ring_id ( + instance->my_new_memb_list, + my_new_memb_ring_id_list, + instance->my_new_memb_entries, + instance->my_memb_list, + instance->my_memb_entries, + &instance->my_old_ring_id, + instance->my_trans_memb_list, + &instance->my_trans_memb_entries); + + for (i = 0; i < instance->my_trans_memb_entries; i++) { + log_printf (instance->totemsrp_log_level_debug, + "TRANS [%d] member " CS_PRI_NODE_ID ":", i, instance->my_trans_memb_list[i].nodeid); + } + for (i = 0; i < instance->my_new_memb_entries; i++) { + log_printf (instance->totemsrp_log_level_debug, + "position [%d] member " CS_PRI_NODE_ID ":", i, addr[i].nodeid); + log_printf (instance->totemsrp_log_level_debug, + "previous ringid (" CS_PRI_RING_ID ")", + memb_list[i].ring_id.rep, (uint64_t)memb_list[i].ring_id.seq); + + log_printf (instance->totemsrp_log_level_debug, + "aru %x high delivered %x received flag %d", + memb_list[i].aru, + memb_list[i].high_delivered, + memb_list[i].received_flg); + + // assert (totemip_print (&memb_list[i].ring_id.rep) != 0); + } + /* + * Determine if any received flag is false + */ + for (i = 0; i < commit_token->addr_entries; i++) { + if (memb_set_subset (&instance->my_new_memb_list[i], 1, + instance->my_trans_memb_list, instance->my_trans_memb_entries) && + + memb_list[i].received_flg == 0) { + instance->my_deliver_memb_entries = instance->my_trans_memb_entries; + memcpy (instance->my_deliver_memb_list, instance->my_trans_memb_list, + sizeof (struct srp_addr) * instance->my_trans_memb_entries); + local_received_flg = 0; + break; + } + } + if (local_received_flg == 1) { + goto no_originate; + } /* Else originate messages if we should */ + + /* + * Calculate my_low_ring_aru, instance->my_high_ring_delivered for the transitional membership + */ + for (i = 0; i < commit_token->addr_entries; i++) { + if (memb_set_subset (&instance->my_new_memb_list[i], 1, + instance->my_deliver_memb_list, + instance->my_deliver_memb_entries) && + + memcmp (&instance->my_old_ring_id, + &memb_list[i].ring_id, + sizeof (struct memb_ring_id)) == 0) { + + if (sq_lt_compare (memb_list[i].aru, low_ring_aru)) { + + low_ring_aru = memb_list[i].aru; + } + if (sq_lt_compare (instance->my_high_ring_delivered, memb_list[i].high_delivered)) { + instance->my_high_ring_delivered = memb_list[i].high_delivered; + } + } + } + + /* + * Copy all old ring messages to instance->retrans_message_queue + */ + range = instance->old_ring_state_high_seq_received - low_ring_aru; + if (range == 0) { + /* + * No messages to copy + */ + goto no_originate; + } + assert (range < QUEUE_RTR_ITEMS_SIZE_MAX); + + log_printf (instance->totemsrp_log_level_debug, + "copying all old ring messages from %x-%x.", + low_ring_aru + 1, instance->old_ring_state_high_seq_received); + + for (i = 1; i <= range; i++) { + struct sort_queue_item *sort_queue_item; + struct message_item message_item; + void *ptr; + int res; + + res = sq_item_get (&instance->regular_sort_queue, + low_ring_aru + i, &ptr); + if (res != 0) { + continue; + } + sort_queue_item = ptr; + messages_originated++; + memset (&message_item, 0, sizeof (struct message_item)); + // TODO LEAK + message_item.mcast = totemsrp_buffer_alloc (instance); + assert (message_item.mcast); + memset(message_item.mcast, 0, sizeof (struct mcast)); + message_item.mcast->header.magic = TOTEM_MH_MAGIC; + message_item.mcast->header.version = TOTEM_MH_VERSION; + message_item.mcast->header.type = MESSAGE_TYPE_MCAST; + message_item.mcast->system_from = instance->my_id; + message_item.mcast->header.encapsulated = MESSAGE_ENCAPSULATED; + + message_item.mcast->header.nodeid = instance->my_id.nodeid; + assert (message_item.mcast->header.nodeid); + memcpy (&message_item.mcast->ring_id, &instance->my_ring_id, + sizeof (struct memb_ring_id)); + message_item.msg_len = sort_queue_item->msg_len + sizeof (struct mcast); + memcpy (((char *)message_item.mcast) + sizeof (struct mcast), + sort_queue_item->mcast, + sort_queue_item->msg_len); + cs_queue_item_add (&instance->retrans_message_queue, &message_item); + } + log_printf (instance->totemsrp_log_level_debug, + "Originated %d messages in RECOVERY.", messages_originated); + goto originated; + +no_originate: + log_printf (instance->totemsrp_log_level_debug, + "Did not need to originate any messages in recovery."); + +originated: + instance->my_aru = SEQNO_START_MSG; + instance->my_aru_count = 0; + instance->my_seq_unchanged = 0; + instance->my_high_seq_received = SEQNO_START_MSG; + instance->my_install_seq = SEQNO_START_MSG; + instance->last_released = SEQNO_START_MSG; + + reset_token_timeout (instance); // REVIEWED + reset_token_retransmit_timeout (instance); // REVIEWED + + instance->memb_state = MEMB_STATE_RECOVERY; + instance->stats.recovery_entered++; + instance->stats.continuous_gather = 0; + + return; +} + +void totemsrp_event_signal (void *srp_context, enum totem_event_type type, int value) +{ + struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; + + token_hold_cancel_send (instance); + + return; +} + +int totemsrp_mcast ( + void *srp_context, + struct iovec *iovec, + unsigned int iov_len, + int guarantee) +{ + struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; + int i; + struct message_item message_item; + char *addr; + unsigned int addr_idx; + struct cs_queue *queue_use; + + if (instance->waiting_trans_ack) { + queue_use = &instance->new_message_queue_trans; + } else { + queue_use = &instance->new_message_queue; + } + + if (cs_queue_is_full (queue_use)) { + log_printf (instance->totemsrp_log_level_debug, "queue full"); + return (-1); + } + + memset (&message_item, 0, sizeof (struct message_item)); + + /* + * Allocate pending item + */ + message_item.mcast = totemsrp_buffer_alloc (instance); + if (message_item.mcast == 0) { + goto error_mcast; + } + + /* + * Set mcast header + */ + memset(message_item.mcast, 0, sizeof (struct mcast)); + message_item.mcast->header.magic = TOTEM_MH_MAGIC; + message_item.mcast->header.version = TOTEM_MH_VERSION; + message_item.mcast->header.type = MESSAGE_TYPE_MCAST; + message_item.mcast->header.encapsulated = MESSAGE_NOT_ENCAPSULATED; + + message_item.mcast->header.nodeid = instance->my_id.nodeid; + assert (message_item.mcast->header.nodeid); + + message_item.mcast->guarantee = guarantee; + message_item.mcast->system_from = instance->my_id; + + addr = (char *)message_item.mcast; + addr_idx = sizeof (struct mcast); + for (i = 0; i < iov_len; i++) { + memcpy (&addr[addr_idx], iovec[i].iov_base, iovec[i].iov_len); + addr_idx += iovec[i].iov_len; + } + + message_item.msg_len = addr_idx; + + log_printf (instance->totemsrp_log_level_trace, "mcasted message added to pending queue"); + instance->stats.mcast_tx++; + cs_queue_item_add (queue_use, &message_item); + + return (0); + +error_mcast: + return (-1); +} + +/* + * Determine if there is room to queue a new message + */ +int totemsrp_avail (void *srp_context) +{ + struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; + int avail; + struct cs_queue *queue_use; + + if (instance->waiting_trans_ack) { + queue_use = &instance->new_message_queue_trans; + } else { + queue_use = &instance->new_message_queue; + } + cs_queue_avail (queue_use, &avail); + + return (avail); +} + +/* + * ORF Token Management + */ +/* + * Recast message to mcast group if it is available + */ +static int orf_token_remcast ( + struct totemsrp_instance *instance, + int seq) +{ + struct sort_queue_item *sort_queue_item; + int res; + void *ptr; + + struct sq *sort_queue; + + if (instance->memb_state == MEMB_STATE_RECOVERY) { + sort_queue = &instance->recovery_sort_queue; + } else { + sort_queue = &instance->regular_sort_queue; + } + + res = sq_in_range (sort_queue, seq); + if (res == 0) { + log_printf (instance->totemsrp_log_level_debug, "sq not in range"); + return (-1); + } + + /* + * Get RTR item at seq, if not available, return + */ + res = sq_item_get (sort_queue, seq, &ptr); + if (res != 0) { + return -1; + } + + sort_queue_item = ptr; + + totemnet_mcast_noflush_send ( + instance->totemnet_context, + sort_queue_item->mcast, + sort_queue_item->msg_len); + + return (0); +} + + +/* + * Free all freeable messages from ring + */ +static void messages_free ( + struct totemsrp_instance *instance, + unsigned int token_aru) +{ + struct sort_queue_item *regular_message; + unsigned int i; + int res; + int log_release = 0; + unsigned int release_to; + unsigned int range = 0; + + release_to = token_aru; + if (sq_lt_compare (instance->my_last_aru, release_to)) { + release_to = instance->my_last_aru; + } + if (sq_lt_compare (instance->my_high_delivered, release_to)) { + release_to = instance->my_high_delivered; + } + + /* + * Ensure we dont try release before an already released point + */ + if (sq_lt_compare (release_to, instance->last_released)) { + return; + } + + range = release_to - instance->last_released; + assert (range < QUEUE_RTR_ITEMS_SIZE_MAX); + + /* + * Release retransmit list items if group aru indicates they are transmitted + */ + for (i = 1; i <= range; i++) { + void *ptr; + + res = sq_item_get (&instance->regular_sort_queue, + instance->last_released + i, &ptr); + if (res == 0) { + regular_message = ptr; + totemsrp_buffer_release (instance, regular_message->mcast); + } + sq_items_release (&instance->regular_sort_queue, + instance->last_released + i); + + log_release = 1; + } + instance->last_released += range; + + if (log_release) { + log_printf (instance->totemsrp_log_level_trace, + "releasing messages up to and including %x", release_to); + } +} + +static void update_aru ( + struct totemsrp_instance *instance) +{ + unsigned int i; + int res; + struct sq *sort_queue; + unsigned int range; + unsigned int my_aru_saved = 0; + + if (instance->memb_state == MEMB_STATE_RECOVERY) { + sort_queue = &instance->recovery_sort_queue; + } else { + sort_queue = &instance->regular_sort_queue; + } + + range = instance->my_high_seq_received - instance->my_aru; + + my_aru_saved = instance->my_aru; + for (i = 1; i <= range; i++) { + + void *ptr; + + res = sq_item_get (sort_queue, my_aru_saved + i, &ptr); + /* + * If hole, stop updating aru + */ + if (res != 0) { + break; + } + } + instance->my_aru += i - 1; +} + +/* + * Multicasts pending messages onto the ring (requires orf_token possession) + */ +static int orf_token_mcast ( + struct totemsrp_instance *instance, + struct orf_token *token, + int fcc_mcasts_allowed) +{ + struct message_item *message_item = 0; + struct cs_queue *mcast_queue; + struct sq *sort_queue; + struct sort_queue_item sort_queue_item; + struct mcast *mcast; + unsigned int fcc_mcast_current; + + if (instance->memb_state == MEMB_STATE_RECOVERY) { + mcast_queue = &instance->retrans_message_queue; + sort_queue = &instance->recovery_sort_queue; + reset_token_retransmit_timeout (instance); // REVIEWED + } else { + if (instance->waiting_trans_ack) { + mcast_queue = &instance->new_message_queue_trans; + } else { + mcast_queue = &instance->new_message_queue; + } + + sort_queue = &instance->regular_sort_queue; + } + + for (fcc_mcast_current = 0; fcc_mcast_current < fcc_mcasts_allowed; fcc_mcast_current++) { + if (cs_queue_is_empty (mcast_queue)) { + break; + } + message_item = (struct message_item *)cs_queue_item_get (mcast_queue); + + message_item->mcast->seq = ++token->seq; + message_item->mcast->this_seqno = instance->global_seqno++; + + /* + * Build IO vector + */ + memset (&sort_queue_item, 0, sizeof (struct sort_queue_item)); + sort_queue_item.mcast = message_item->mcast; + sort_queue_item.msg_len = message_item->msg_len; + + mcast = sort_queue_item.mcast; + + memcpy (&mcast->ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)); + + /* + * Add message to retransmit queue + */ + sq_item_add (sort_queue, &sort_queue_item, message_item->mcast->seq); + + totemnet_mcast_noflush_send ( + instance->totemnet_context, + message_item->mcast, + message_item->msg_len); + + /* + * Delete item from pending queue + */ + cs_queue_item_remove (mcast_queue); + + /* + * If messages mcasted, deliver any new messages to totempg + */ + instance->my_high_seq_received = token->seq; + } + + update_aru (instance); + + /* + * Return 1 if more messages are available for single node clusters + */ + return (fcc_mcast_current); +} + +/* + * Remulticasts messages in orf_token's retransmit list (requires orf_token) + * Modify's orf_token's rtr to include retransmits required by this process + */ +static int orf_token_rtr ( + struct totemsrp_instance *instance, + struct orf_token *orf_token, + unsigned int *fcc_allowed) +{ + unsigned int res; + unsigned int i, j; + unsigned int found; + struct sq *sort_queue; + struct rtr_item *rtr_list; + unsigned int range = 0; + char retransmit_msg[1024]; + char value[64]; + + if (instance->memb_state == MEMB_STATE_RECOVERY) { + sort_queue = &instance->recovery_sort_queue; + } else { + sort_queue = &instance->regular_sort_queue; + } + + rtr_list = &orf_token->rtr_list[0]; + + strcpy (retransmit_msg, "Retransmit List: "); + if (orf_token->rtr_list_entries) { + log_printf (instance->totemsrp_log_level_debug, + "Retransmit List %d", orf_token->rtr_list_entries); + for (i = 0; i < orf_token->rtr_list_entries; i++) { + sprintf (value, "%x ", rtr_list[i].seq); + strcat (retransmit_msg, value); + } + strcat (retransmit_msg, ""); + log_printf (instance->totemsrp_log_level_notice, + "%s", retransmit_msg); + } + + /* + * Retransmit messages on orf_token's RTR list from RTR queue + */ + for (instance->fcc_remcast_current = 0, i = 0; + instance->fcc_remcast_current < *fcc_allowed && i < orf_token->rtr_list_entries;) { + + /* + * If this retransmit request isn't from this configuration, + * try next rtr entry + */ + if (memcmp (&rtr_list[i].ring_id, &instance->my_ring_id, + sizeof (struct memb_ring_id)) != 0) { + + i += 1; + continue; + } + + res = orf_token_remcast (instance, rtr_list[i].seq); + if (res == 0) { + /* + * Multicasted message, so no need to copy to new retransmit list + */ + orf_token->rtr_list_entries -= 1; + assert (orf_token->rtr_list_entries >= 0); + memmove (&rtr_list[i], &rtr_list[i + 1], + sizeof (struct rtr_item) * (orf_token->rtr_list_entries - i)); + + instance->stats.mcast_retx++; + instance->fcc_remcast_current++; + } else { + i += 1; + } + } + *fcc_allowed = *fcc_allowed - instance->fcc_remcast_current; + + /* + * Add messages to retransmit to RTR list + * but only retry if there is room in the retransmit list + */ + + range = orf_token->seq - instance->my_aru; + assert (range < QUEUE_RTR_ITEMS_SIZE_MAX); + + for (i = 1; (orf_token->rtr_list_entries < RETRANSMIT_ENTRIES_MAX) && + (i <= range); i++) { + + /* + * Ensure message is within the sort queue range + */ + res = sq_in_range (sort_queue, instance->my_aru + i); + if (res == 0) { + break; + } + + /* + * Find if a message is missing from this processor + */ + res = sq_item_inuse (sort_queue, instance->my_aru + i); + if (res == 0) { + /* + * Determine how many times we have missed receiving + * this sequence number. sq_item_miss_count increments + * a counter for the sequence number. The miss count + * will be returned and compared. This allows time for + * delayed multicast messages to be received before + * declaring the message is missing and requesting a + * retransmit. + */ + res = sq_item_miss_count (sort_queue, instance->my_aru + i); + if (res < instance->totem_config->miss_count_const) { + continue; + } + + /* + * Determine if missing message is already in retransmit list + */ + found = 0; + for (j = 0; j < orf_token->rtr_list_entries; j++) { + if (instance->my_aru + i == rtr_list[j].seq) { + found = 1; + } + } + if (found == 0) { + /* + * Missing message not found in current retransmit list so add it + */ + memcpy (&rtr_list[orf_token->rtr_list_entries].ring_id, + &instance->my_ring_id, sizeof (struct memb_ring_id)); + rtr_list[orf_token->rtr_list_entries].seq = instance->my_aru + i; + orf_token->rtr_list_entries++; + } + } + } + return (instance->fcc_remcast_current); +} + +static void token_retransmit (struct totemsrp_instance *instance) +{ + totemnet_token_send (instance->totemnet_context, + instance->orf_token_retransmit, + instance->orf_token_retransmit_size); +} + +/* + * Retransmit the regular token if no mcast or token has + * been received in retransmit token period retransmit + * the token to the next processor + */ +static void timer_function_token_retransmit_timeout (void *data) +{ + struct totemsrp_instance *instance = data; + + switch (instance->memb_state) { + case MEMB_STATE_GATHER: + break; + case MEMB_STATE_COMMIT: + case MEMB_STATE_OPERATIONAL: + case MEMB_STATE_RECOVERY: + token_retransmit (instance); + reset_token_retransmit_timeout (instance); // REVIEWED + break; + } +} + +static void timer_function_token_hold_retransmit_timeout (void *data) +{ + struct totemsrp_instance *instance = data; + + switch (instance->memb_state) { + case MEMB_STATE_GATHER: + break; + case MEMB_STATE_COMMIT: + break; + case MEMB_STATE_OPERATIONAL: + case MEMB_STATE_RECOVERY: + token_retransmit (instance); + break; + } +} + +static void timer_function_merge_detect_timeout(void *data) +{ + struct totemsrp_instance *instance = data; + + instance->my_merge_detect_timeout_outstanding = 0; + + switch (instance->memb_state) { + case MEMB_STATE_OPERATIONAL: + if (instance->my_ring_id.rep == instance->my_id.nodeid) { + memb_merge_detect_transmit (instance); + } + break; + case MEMB_STATE_GATHER: + case MEMB_STATE_COMMIT: + case MEMB_STATE_RECOVERY: + break; + } +} + +/* + * Send orf_token to next member (requires orf_token) + */ +static int token_send ( + struct totemsrp_instance *instance, + struct orf_token *orf_token, + int forward_token) +{ + int res = 0; + unsigned int orf_token_size; + + orf_token_size = sizeof (struct orf_token) + + (orf_token->rtr_list_entries * sizeof (struct rtr_item)); + + orf_token->header.nodeid = instance->my_id.nodeid; + memcpy (instance->orf_token_retransmit, orf_token, orf_token_size); + instance->orf_token_retransmit_size = orf_token_size; + assert (orf_token->header.nodeid); + + if (forward_token == 0) { + return (0); + } + + totemnet_token_send (instance->totemnet_context, + orf_token, + orf_token_size); + + return (res); +} + +static int token_hold_cancel_send (struct totemsrp_instance *instance) +{ + struct token_hold_cancel token_hold_cancel; + + /* + * Only cancel if the token is currently held + */ + if (instance->my_token_held == 0) { + return (0); + } + instance->my_token_held = 0; + + /* + * Build message + */ + token_hold_cancel.header.magic = TOTEM_MH_MAGIC; + token_hold_cancel.header.version = TOTEM_MH_VERSION; + token_hold_cancel.header.type = MESSAGE_TYPE_TOKEN_HOLD_CANCEL; + token_hold_cancel.header.encapsulated = 0; + token_hold_cancel.header.nodeid = instance->my_id.nodeid; + memcpy (&token_hold_cancel.ring_id, &instance->my_ring_id, + sizeof (struct memb_ring_id)); + assert (token_hold_cancel.header.nodeid); + + instance->stats.token_hold_cancel_tx++; + + totemnet_mcast_flush_send (instance->totemnet_context, &token_hold_cancel, + sizeof (struct token_hold_cancel)); + + return (0); +} + +static int orf_token_send_initial (struct totemsrp_instance *instance) +{ + struct orf_token orf_token; + int res; + + orf_token.header.magic = TOTEM_MH_MAGIC; + orf_token.header.version = TOTEM_MH_VERSION; + orf_token.header.type = MESSAGE_TYPE_ORF_TOKEN; + orf_token.header.encapsulated = 0; + orf_token.header.nodeid = instance->my_id.nodeid; + assert (orf_token.header.nodeid); + orf_token.seq = SEQNO_START_MSG; + orf_token.token_seq = SEQNO_START_TOKEN; + orf_token.retrans_flg = 1; + instance->my_set_retrans_flg = 1; + instance->stats.orf_token_tx++; + + if (cs_queue_is_empty (&instance->retrans_message_queue) == 1) { + orf_token.retrans_flg = 0; + instance->my_set_retrans_flg = 0; + } else { + orf_token.retrans_flg = 1; + instance->my_set_retrans_flg = 1; + } + + orf_token.aru = 0; + orf_token.aru = SEQNO_START_MSG - 1; + orf_token.aru_addr = instance->my_id.nodeid; + + memcpy (&orf_token.ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)); + orf_token.fcc = 0; + orf_token.backlog = 0; + + orf_token.rtr_list_entries = 0; + + res = token_send (instance, &orf_token, 1); + + return (res); +} + +static void memb_state_commit_token_update ( + struct totemsrp_instance *instance) +{ + struct srp_addr *addr; + struct memb_commit_token_memb_entry *memb_list; + unsigned int high_aru; + unsigned int i; + + addr = (struct srp_addr *)instance->commit_token->end_of_commit_token; + memb_list = (struct memb_commit_token_memb_entry *)(addr + instance->commit_token->addr_entries); + + memcpy (instance->my_new_memb_list, addr, + sizeof (struct srp_addr) * instance->commit_token->addr_entries); + + instance->my_new_memb_entries = instance->commit_token->addr_entries; + + memcpy (&memb_list[instance->commit_token->memb_index].ring_id, + &instance->my_old_ring_id, sizeof (struct memb_ring_id)); + + memb_list[instance->commit_token->memb_index].aru = instance->old_ring_state_aru; + /* + * TODO high delivered is really instance->my_aru, but with safe this + * could change? + */ + instance->my_received_flg = + (instance->my_aru == instance->my_high_seq_received); + + memb_list[instance->commit_token->memb_index].received_flg = instance->my_received_flg; + + memb_list[instance->commit_token->memb_index].high_delivered = instance->my_high_delivered; + /* + * find high aru up to current memb_index for all matching ring ids + * if any ring id matching memb_index has aru less then high aru set + * received flag for that entry to false + */ + high_aru = memb_list[instance->commit_token->memb_index].aru; + for (i = 0; i <= instance->commit_token->memb_index; i++) { + if (memcmp (&memb_list[instance->commit_token->memb_index].ring_id, + &memb_list[i].ring_id, + sizeof (struct memb_ring_id)) == 0) { + + if (sq_lt_compare (high_aru, memb_list[i].aru)) { + high_aru = memb_list[i].aru; + } + } + } + + for (i = 0; i <= instance->commit_token->memb_index; i++) { + if (memcmp (&memb_list[instance->commit_token->memb_index].ring_id, + &memb_list[i].ring_id, + sizeof (struct memb_ring_id)) == 0) { + + if (sq_lt_compare (memb_list[i].aru, high_aru)) { + memb_list[i].received_flg = 0; + if (i == instance->commit_token->memb_index) { + instance->my_received_flg = 0; + } + } + } + } + + instance->commit_token->header.nodeid = instance->my_id.nodeid; + instance->commit_token->memb_index += 1; + assert (instance->commit_token->memb_index <= instance->commit_token->addr_entries); + assert (instance->commit_token->header.nodeid); +} + +static void memb_state_commit_token_target_set ( + struct totemsrp_instance *instance) +{ + struct srp_addr *addr; + + addr = (struct srp_addr *)instance->commit_token->end_of_commit_token; + + /* Totemnet just looks at the node id */ + totemnet_token_target_set ( + instance->totemnet_context, + addr[instance->commit_token->memb_index % + instance->commit_token->addr_entries].nodeid); +} + +static int memb_state_commit_token_send_recovery ( + struct totemsrp_instance *instance, + struct memb_commit_token *commit_token) +{ + unsigned int commit_token_size; + + commit_token->token_seq++; + commit_token->header.nodeid = instance->my_id.nodeid; + commit_token_size = sizeof (struct memb_commit_token) + + ((sizeof (struct srp_addr) + + sizeof (struct memb_commit_token_memb_entry)) * commit_token->addr_entries); + /* + * Make a copy for retransmission if necessary + */ + memcpy (instance->orf_token_retransmit, commit_token, commit_token_size); + instance->orf_token_retransmit_size = commit_token_size; + + instance->stats.memb_commit_token_tx++; + + totemnet_token_send (instance->totemnet_context, + commit_token, + commit_token_size); + + /* + * Request retransmission of the commit token in case it is lost + */ + reset_token_retransmit_timeout (instance); + return (0); +} + +static int memb_state_commit_token_send ( + struct totemsrp_instance *instance) +{ + unsigned int commit_token_size; + + instance->commit_token->token_seq++; + instance->commit_token->header.nodeid = instance->my_id.nodeid; + commit_token_size = sizeof (struct memb_commit_token) + + ((sizeof (struct srp_addr) + + sizeof (struct memb_commit_token_memb_entry)) * instance->commit_token->addr_entries); + /* + * Make a copy for retransmission if necessary + */ + memcpy (instance->orf_token_retransmit, instance->commit_token, commit_token_size); + instance->orf_token_retransmit_size = commit_token_size; + + instance->stats.memb_commit_token_tx++; + + totemnet_token_send (instance->totemnet_context, + instance->commit_token, + commit_token_size); + + /* + * Request retransmission of the commit token in case it is lost + */ + reset_token_retransmit_timeout (instance); + return (0); +} + + +static int memb_lowest_in_config (struct totemsrp_instance *instance) +{ + struct srp_addr token_memb[PROCESSOR_COUNT_MAX]; + int token_memb_entries = 0; + int i; + unsigned int lowest_nodeid; + + memb_set_subtract (token_memb, &token_memb_entries, + instance->my_proc_list, instance->my_proc_list_entries, + instance->my_failed_list, instance->my_failed_list_entries); + + /* + * find representative by searching for smallest identifier + */ + assert(token_memb_entries > 0); + + lowest_nodeid = token_memb[0].nodeid; + for (i = 1; i < token_memb_entries; i++) { + if (lowest_nodeid > token_memb[i].nodeid) { + lowest_nodeid = token_memb[i].nodeid; + } + } + return (lowest_nodeid == instance->my_id.nodeid); +} + +static int srp_addr_compare (const void *a, const void *b) +{ + const struct srp_addr *srp_a = (const struct srp_addr *)a; + const struct srp_addr *srp_b = (const struct srp_addr *)b; + + if (srp_a->nodeid < srp_b->nodeid) { + return -1; + } else if (srp_a->nodeid > srp_b->nodeid) { + return 1; + } else { + return 0; + } +} + +static void memb_state_commit_token_create ( + struct totemsrp_instance *instance) +{ + struct srp_addr token_memb[PROCESSOR_COUNT_MAX]; + struct srp_addr *addr; + struct memb_commit_token_memb_entry *memb_list; + int token_memb_entries = 0; + + log_printf (instance->totemsrp_log_level_debug, + "Creating commit token because I am the rep."); + + memb_set_subtract (token_memb, &token_memb_entries, + instance->my_proc_list, instance->my_proc_list_entries, + instance->my_failed_list, instance->my_failed_list_entries); + + memset (instance->commit_token, 0, sizeof (struct memb_commit_token)); + instance->commit_token->header.magic = TOTEM_MH_MAGIC; + instance->commit_token->header.version = TOTEM_MH_VERSION; + instance->commit_token->header.type = MESSAGE_TYPE_MEMB_COMMIT_TOKEN; + instance->commit_token->header.encapsulated = 0; + instance->commit_token->header.nodeid = instance->my_id.nodeid; + assert (instance->commit_token->header.nodeid); + + instance->commit_token->ring_id.rep = instance->my_id.nodeid; + instance->commit_token->ring_id.seq = instance->token_ring_id_seq + 4; + + /* + * This qsort is necessary to ensure the commit token traverses + * the ring in the proper order + */ + qsort (token_memb, token_memb_entries, sizeof (struct srp_addr), + srp_addr_compare); + + instance->commit_token->memb_index = 0; + instance->commit_token->addr_entries = token_memb_entries; + + addr = (struct srp_addr *)instance->commit_token->end_of_commit_token; + memb_list = (struct memb_commit_token_memb_entry *)(addr + instance->commit_token->addr_entries); + + memcpy (addr, token_memb, + token_memb_entries * sizeof (struct srp_addr)); + memset (memb_list, 0, + sizeof (struct memb_commit_token_memb_entry) * token_memb_entries); +} + +static void memb_join_message_send (struct totemsrp_instance *instance) +{ + char memb_join_data[40000]; + struct memb_join *memb_join = (struct memb_join *)memb_join_data; + char *addr; + unsigned int addr_idx; + size_t msg_len; + + memb_join->header.magic = TOTEM_MH_MAGIC; + memb_join->header.version = TOTEM_MH_VERSION; + memb_join->header.type = MESSAGE_TYPE_MEMB_JOIN; + memb_join->header.encapsulated = 0; + memb_join->header.nodeid = instance->my_id.nodeid; + assert (memb_join->header.nodeid); + + msg_len = sizeof(struct memb_join) + + ((instance->my_proc_list_entries + instance->my_failed_list_entries) * sizeof(struct srp_addr)); + + if (msg_len > sizeof(memb_join_data)) { + log_printf (instance->totemsrp_log_level_error, + "memb_join_message too long. Ignoring message."); + + return ; + } + + memb_join->ring_seq = instance->my_ring_id.seq; + memb_join->proc_list_entries = instance->my_proc_list_entries; + memb_join->failed_list_entries = instance->my_failed_list_entries; + memb_join->system_from = instance->my_id; + + /* + * This mess adds the joined and failed processor lists into the join + * message + */ + addr = (char *)memb_join; + addr_idx = sizeof (struct memb_join); + memcpy (&addr[addr_idx], + instance->my_proc_list, + instance->my_proc_list_entries * + sizeof (struct srp_addr)); + addr_idx += + instance->my_proc_list_entries * + sizeof (struct srp_addr); + memcpy (&addr[addr_idx], + instance->my_failed_list, + instance->my_failed_list_entries * + sizeof (struct srp_addr)); + addr_idx += + instance->my_failed_list_entries * + sizeof (struct srp_addr); + + if (instance->totem_config->send_join_timeout) { + usleep (random() % (instance->totem_config->send_join_timeout * 1000)); + } + + instance->stats.memb_join_tx++; + + totemnet_mcast_flush_send ( + instance->totemnet_context, + memb_join, + addr_idx); +} + +static void memb_leave_message_send (struct totemsrp_instance *instance) +{ + char memb_join_data[40000]; + struct memb_join *memb_join = (struct memb_join *)memb_join_data; + char *addr; + unsigned int addr_idx; + int active_memb_entries; + struct srp_addr active_memb[PROCESSOR_COUNT_MAX]; + size_t msg_len; + + log_printf (instance->totemsrp_log_level_debug, + "sending join/leave message"); + + /* + * add us to the failed list, and remove us from + * the members list + */ + memb_set_merge( + &instance->my_id, 1, + instance->my_failed_list, &instance->my_failed_list_entries); + + memb_set_subtract (active_memb, &active_memb_entries, + instance->my_proc_list, instance->my_proc_list_entries, + &instance->my_id, 1); + + msg_len = sizeof(struct memb_join) + + ((active_memb_entries + instance->my_failed_list_entries) * sizeof(struct srp_addr)); + + if (msg_len > sizeof(memb_join_data)) { + log_printf (instance->totemsrp_log_level_error, + "memb_leave message too long. Ignoring message."); + + return ; + } + + memb_join->header.magic = TOTEM_MH_MAGIC; + memb_join->header.version = TOTEM_MH_VERSION; + memb_join->header.type = MESSAGE_TYPE_MEMB_JOIN; + memb_join->header.encapsulated = 0; + memb_join->header.nodeid = LEAVE_DUMMY_NODEID; + + memb_join->ring_seq = instance->my_ring_id.seq; + memb_join->proc_list_entries = active_memb_entries; + memb_join->failed_list_entries = instance->my_failed_list_entries; + memb_join->system_from = instance->my_id; + + // TODO: CC Maybe use the actual join send routine. + /* + * This mess adds the joined and failed processor lists into the join + * message + */ + addr = (char *)memb_join; + addr_idx = sizeof (struct memb_join); + memcpy (&addr[addr_idx], + active_memb, + active_memb_entries * + sizeof (struct srp_addr)); + addr_idx += + active_memb_entries * + sizeof (struct srp_addr); + memcpy (&addr[addr_idx], + instance->my_failed_list, + instance->my_failed_list_entries * + sizeof (struct srp_addr)); + addr_idx += + instance->my_failed_list_entries * + sizeof (struct srp_addr); + + + if (instance->totem_config->send_join_timeout) { + usleep (random() % (instance->totem_config->send_join_timeout * 1000)); + } + instance->stats.memb_join_tx++; + + totemnet_mcast_flush_send ( + instance->totemnet_context, + memb_join, + addr_idx); +} + +static void memb_merge_detect_transmit (struct totemsrp_instance *instance) +{ + struct memb_merge_detect memb_merge_detect; + + memb_merge_detect.header.magic = TOTEM_MH_MAGIC; + memb_merge_detect.header.version = TOTEM_MH_VERSION; + memb_merge_detect.header.type = MESSAGE_TYPE_MEMB_MERGE_DETECT; + memb_merge_detect.header.encapsulated = 0; + memb_merge_detect.header.nodeid = instance->my_id.nodeid; + memb_merge_detect.system_from = instance->my_id; + memcpy (&memb_merge_detect.ring_id, &instance->my_ring_id, + sizeof (struct memb_ring_id)); + assert (memb_merge_detect.header.nodeid); + + instance->stats.memb_merge_detect_tx++; + totemnet_mcast_flush_send (instance->totemnet_context, + &memb_merge_detect, + sizeof (struct memb_merge_detect)); +} + +static void memb_ring_id_set ( + struct totemsrp_instance *instance, + const struct memb_ring_id *ring_id) +{ + + memcpy (&instance->my_ring_id, ring_id, sizeof (struct memb_ring_id)); +} + +int totemsrp_callback_token_create ( + void *srp_context, + void **handle_out, + enum totem_callback_token_type type, + int delete, + int (*callback_fn) (enum totem_callback_token_type type, const void *), + const void *data) +{ + struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; + struct token_callback_instance *callback_handle; + + token_hold_cancel_send (instance); + + callback_handle = malloc (sizeof (struct token_callback_instance)); + if (callback_handle == 0) { + return (-1); + } + *handle_out = (void *)callback_handle; + qb_list_init (&callback_handle->list); + callback_handle->callback_fn = callback_fn; + callback_handle->data = (void *) data; + callback_handle->callback_type = type; + callback_handle->delete = delete; + switch (type) { + case TOTEM_CALLBACK_TOKEN_RECEIVED: + qb_list_add (&callback_handle->list, &instance->token_callback_received_listhead); + break; + case TOTEM_CALLBACK_TOKEN_SENT: + qb_list_add (&callback_handle->list, &instance->token_callback_sent_listhead); + break; + } + + return (0); +} + +void totemsrp_callback_token_destroy (void *srp_context, void **handle_out) +{ + struct token_callback_instance *h; + + if (*handle_out) { + h = (struct token_callback_instance *)*handle_out; + qb_list_del (&h->list); + free (h); + h = NULL; + *handle_out = 0; + } +} + +static void token_callbacks_execute ( + struct totemsrp_instance *instance, + enum totem_callback_token_type type) +{ + struct qb_list_head *list, *tmp_iter; + struct qb_list_head *callback_listhead = 0; + struct token_callback_instance *token_callback_instance; + int res; + int del; + + switch (type) { + case TOTEM_CALLBACK_TOKEN_RECEIVED: + callback_listhead = &instance->token_callback_received_listhead; + break; + case TOTEM_CALLBACK_TOKEN_SENT: + callback_listhead = &instance->token_callback_sent_listhead; + break; + default: + assert (0); + } + + qb_list_for_each_safe(list, tmp_iter, callback_listhead) { + token_callback_instance = qb_list_entry (list, struct token_callback_instance, list); + del = token_callback_instance->delete; + if (del == 1) { + qb_list_del (list); + } + + res = token_callback_instance->callback_fn ( + token_callback_instance->callback_type, + token_callback_instance->data); + /* + * This callback failed to execute, try it again on the next token + */ + if (res == -1 && del == 1) { + qb_list_add (list, callback_listhead); + } else if (del) { + free (token_callback_instance); + } + } +} + +/* + * Flow control functions + */ +static unsigned int backlog_get (struct totemsrp_instance *instance) +{ + unsigned int backlog = 0; + struct cs_queue *queue_use = NULL; + + if (instance->memb_state == MEMB_STATE_OPERATIONAL) { + if (instance->waiting_trans_ack) { + queue_use = &instance->new_message_queue_trans; + } else { + queue_use = &instance->new_message_queue; + } + } else + if (instance->memb_state == MEMB_STATE_RECOVERY) { + queue_use = &instance->retrans_message_queue; + } + + if (queue_use != NULL) { + backlog = cs_queue_used (queue_use); + } + + instance->stats.token[instance->stats.latest_token].backlog_calc = backlog; + return (backlog); +} + +static int fcc_calculate ( + struct totemsrp_instance *instance, + struct orf_token *token) +{ + unsigned int transmits_allowed; + unsigned int backlog_calc; + + transmits_allowed = instance->totem_config->max_messages; + + if (transmits_allowed > instance->totem_config->window_size - token->fcc) { + transmits_allowed = instance->totem_config->window_size - token->fcc; + } + + instance->my_cbl = backlog_get (instance); + + /* + * Only do backlog calculation if there is a backlog otherwise + * we would result in div by zero + */ + if (token->backlog + instance->my_cbl - instance->my_pbl) { + backlog_calc = (instance->totem_config->window_size * instance->my_pbl) / + (token->backlog + instance->my_cbl - instance->my_pbl); + if (backlog_calc > 0 && transmits_allowed > backlog_calc) { + transmits_allowed = backlog_calc; + } + } + + return (transmits_allowed); +} + +/* + * don't overflow the RTR sort queue + */ +static void fcc_rtr_limit ( + struct totemsrp_instance *instance, + struct orf_token *token, + unsigned int *transmits_allowed) +{ + int check = QUEUE_RTR_ITEMS_SIZE_MAX; + check -= (*transmits_allowed + instance->totem_config->window_size); + assert (check >= 0); + if (sq_lt_compare (instance->last_released + + QUEUE_RTR_ITEMS_SIZE_MAX - *transmits_allowed - + instance->totem_config->window_size, + + token->seq)) { + + *transmits_allowed = 0; + } +} + +static void fcc_token_update ( + struct totemsrp_instance *instance, + struct orf_token *token, + unsigned int msgs_transmitted) +{ + token->fcc += msgs_transmitted - instance->my_trc; + token->backlog += instance->my_cbl - instance->my_pbl; + instance->my_trc = msgs_transmitted; + instance->my_pbl = instance->my_cbl; +} + +/* + * Sanity checkers + */ +static int check_orf_token_sanity( + const struct totemsrp_instance *instance, + const void *msg, + size_t msg_len, + int endian_conversion_needed) +{ + int rtr_entries; + const struct orf_token *token = (const struct orf_token *)msg; + size_t required_len; + + if (msg_len < sizeof(struct orf_token)) { + log_printf (instance->totemsrp_log_level_security, + "Received orf_token message is too short... ignoring."); + + return (-1); + } + + if (endian_conversion_needed) { + rtr_entries = swab32(token->rtr_list_entries); + } else { + rtr_entries = token->rtr_list_entries; + } + + required_len = sizeof(struct orf_token) + rtr_entries * sizeof(struct rtr_item); + if (msg_len < required_len) { + log_printf (instance->totemsrp_log_level_security, + "Received orf_token message is too short... ignoring."); + + return (-1); + } + + return (0); +} + +static int check_mcast_sanity( + struct totemsrp_instance *instance, + const void *msg, + size_t msg_len, + int endian_conversion_needed) +{ + + if (msg_len < sizeof(struct mcast)) { + log_printf (instance->totemsrp_log_level_security, + "Received mcast message is too short... ignoring."); + + return (-1); + } + + return (0); +} + +static int check_memb_merge_detect_sanity( + struct totemsrp_instance *instance, + const void *msg, + size_t msg_len, + int endian_conversion_needed) +{ + + if (msg_len < sizeof(struct memb_merge_detect)) { + log_printf (instance->totemsrp_log_level_security, + "Received memb_merge_detect message is too short... ignoring."); + + return (-1); + } + + return (0); +} + +static int check_memb_join_sanity( + struct totemsrp_instance *instance, + const void *msg, + size_t msg_len, + int endian_conversion_needed) +{ + const struct memb_join *mj_msg = (const struct memb_join *)msg; + unsigned int proc_list_entries; + unsigned int failed_list_entries; + size_t required_len; + + if (msg_len < sizeof(struct memb_join)) { + log_printf (instance->totemsrp_log_level_security, + "Received memb_join message is too short... ignoring."); + + return (-1); + } + + proc_list_entries = mj_msg->proc_list_entries; + failed_list_entries = mj_msg->failed_list_entries; + + if (endian_conversion_needed) { + proc_list_entries = swab32(proc_list_entries); + failed_list_entries = swab32(failed_list_entries); + } + + required_len = sizeof(struct memb_join) + ((proc_list_entries + failed_list_entries) * sizeof(struct srp_addr)); + if (msg_len < required_len) { + log_printf (instance->totemsrp_log_level_security, + "Received memb_join message is too short... ignoring."); + + return (-1); + } + + return (0); +} + +static int check_memb_commit_token_sanity( + struct totemsrp_instance *instance, + const void *msg, + size_t msg_len, + int endian_conversion_needed) +{ + const struct memb_commit_token *mct_msg = (const struct memb_commit_token *)msg; + unsigned int addr_entries; + size_t required_len; + + if (msg_len < sizeof(struct memb_commit_token)) { + log_printf (instance->totemsrp_log_level_security, + "Received memb_commit_token message is too short... ignoring."); + + return (0); + } + + addr_entries= mct_msg->addr_entries; + if (endian_conversion_needed) { + addr_entries = swab32(addr_entries); + } + + required_len = sizeof(struct memb_commit_token) + + (addr_entries * (sizeof(struct srp_addr) + sizeof(struct memb_commit_token_memb_entry))); + if (msg_len < required_len) { + log_printf (instance->totemsrp_log_level_security, + "Received memb_commit_token message is too short... ignoring."); + + return (-1); + } + + return (0); +} + +static int check_token_hold_cancel_sanity( + struct totemsrp_instance *instance, + const void *msg, + size_t msg_len, + int endian_conversion_needed) +{ + + if (msg_len < sizeof(struct token_hold_cancel)) { + log_printf (instance->totemsrp_log_level_security, + "Received token_hold_cancel message is too short... ignoring."); + + return (-1); + } + + return (0); +} + +/* + * Message Handlers + */ + +unsigned long long int tv_old; +/* + * message handler called when TOKEN message type received + */ +static int message_handler_orf_token ( + struct totemsrp_instance *instance, + const void *msg, + size_t msg_len, + int endian_conversion_needed) +{ + char token_storage[1500]; + char token_convert[1500]; + struct orf_token *token = NULL; + int forward_token; + unsigned int transmits_allowed; + unsigned int mcasted_retransmit; + unsigned int mcasted_regular; + unsigned int last_aru; + +#ifdef GIVEINFO + unsigned long long tv_current; + unsigned long long tv_diff; + + tv_current = qb_util_nano_current_get (); + tv_diff = tv_current - tv_old; + tv_old = tv_current; + + log_printf (instance->totemsrp_log_level_debug, + "Time since last token %0.4f ms", ((float)tv_diff) / 1000000.0); +#endif + + if (check_orf_token_sanity(instance, msg, msg_len, endian_conversion_needed) == -1) { + return (0); + } + + if (instance->orf_token_discard) { + return (0); + } +#ifdef TEST_DROP_ORF_TOKEN_PERCENTAGE + if (random()%100 < TEST_DROP_ORF_TOKEN_PERCENTAGE) { + return (0); + } +#endif + + if (endian_conversion_needed) { + orf_token_endian_convert ((struct orf_token *)msg, + (struct orf_token *)token_convert); + msg = (struct orf_token *)token_convert; + } + + /* + * Make copy of token and retransmit list in case we have + * to flush incoming messages from the kernel queue + */ + token = (struct orf_token *)token_storage; + memcpy (token, msg, sizeof (struct orf_token)); + memcpy (&token->rtr_list[0], (char *)msg + sizeof (struct orf_token), + sizeof (struct rtr_item) * RETRANSMIT_ENTRIES_MAX); + + + /* + * Handle merge detection timeout + */ + if (token->seq == instance->my_last_seq) { + start_merge_detect_timeout (instance); + instance->my_seq_unchanged += 1; + } else { + cancel_merge_detect_timeout (instance); + cancel_token_hold_retransmit_timeout (instance); + instance->my_seq_unchanged = 0; + } + + instance->my_last_seq = token->seq; + +#ifdef TEST_RECOVERY_MSG_COUNT + if (instance->memb_state == MEMB_STATE_OPERATIONAL && token->seq > TEST_RECOVERY_MSG_COUNT) { + return (0); + } +#endif + instance->flushing = 1; + totemnet_recv_flush (instance->totemnet_context); + instance->flushing = 0; + + /* + * Determine if we should hold (in reality drop) the token + */ + instance->my_token_held = 0; + if (instance->my_ring_id.rep == instance->my_id.nodeid && + instance->my_seq_unchanged > instance->totem_config->seqno_unchanged_const) { + instance->my_token_held = 1; + } else { + if (instance->my_ring_id.rep != instance->my_id.nodeid && + instance->my_seq_unchanged >= instance->totem_config->seqno_unchanged_const) { + instance->my_token_held = 1; + } + } + + /* + * Hold onto token when there is no activity on ring and + * this processor is the ring rep + */ + forward_token = 1; + if (instance->my_ring_id.rep == instance->my_id.nodeid) { + if (instance->my_token_held) { + forward_token = 0; + } + } + + switch (instance->memb_state) { + case MEMB_STATE_COMMIT: + /* Discard token */ + break; + + case MEMB_STATE_OPERATIONAL: + messages_free (instance, token->aru); + /* + * Do NOT add break, this case should also execute code in gather case. + */ + + case MEMB_STATE_GATHER: + /* + * DO NOT add break, we use different free mechanism in recovery state + */ + + case MEMB_STATE_RECOVERY: + /* + * Discard tokens from another configuration + */ + if (memcmp (&token->ring_id, &instance->my_ring_id, + sizeof (struct memb_ring_id)) != 0) { + + if ((forward_token) + && instance->use_heartbeat) { + reset_heartbeat_timeout(instance); + } + else { + cancel_heartbeat_timeout(instance); + } + + return (0); /* discard token */ + } + + /* + * Discard retransmitted tokens + */ + if (sq_lte_compare (token->token_seq, instance->my_token_seq)) { + return (0); /* discard token */ + } + + /* + * Token is valid so trigger callbacks + */ + token_callbacks_execute (instance, TOTEM_CALLBACK_TOKEN_RECEIVED); + + last_aru = instance->my_last_aru; + instance->my_last_aru = token->aru; + + transmits_allowed = fcc_calculate (instance, token); + mcasted_retransmit = orf_token_rtr (instance, token, &transmits_allowed); + + if (instance->totem_config->cancel_token_hold_on_retransmit && + instance->my_token_held == 1 && + (token->rtr_list_entries > 0 || mcasted_retransmit > 0)) { + instance->my_token_held = 0; + forward_token = 1; + } + + fcc_rtr_limit (instance, token, &transmits_allowed); + mcasted_regular = orf_token_mcast (instance, token, transmits_allowed); +/* +if (mcasted_regular) { +printf ("mcasted regular %d\n", mcasted_regular); +printf ("token seq %d\n", token->seq); +} +*/ + fcc_token_update (instance, token, mcasted_retransmit + + mcasted_regular); + + if (sq_lt_compare (instance->my_aru, token->aru) || + instance->my_id.nodeid == token->aru_addr || + token->aru_addr == 0) { + + token->aru = instance->my_aru; + if (token->aru == token->seq) { + token->aru_addr = 0; + } else { + token->aru_addr = instance->my_id.nodeid; + } + } + if (token->aru == last_aru && token->aru_addr != 0) { + instance->my_aru_count += 1; + } else { + instance->my_aru_count = 0; + } + + /* + * We really don't follow specification there. In specification, OTHER nodes + * detect failure of one node (based on aru_count) and my_id IS NEVER added + * to failed list (so node never mark itself as failed) + */ + if (instance->my_aru_count > instance->totem_config->fail_to_recv_const && + token->aru_addr == instance->my_id.nodeid) { + + log_printf (instance->totemsrp_log_level_error, + "FAILED TO RECEIVE"); + + instance->failed_to_recv = 1; + + memb_set_merge (&instance->my_id, 1, + instance->my_failed_list, + &instance->my_failed_list_entries); + + memb_state_gather_enter (instance, TOTEMSRP_GSFROM_FAILED_TO_RECEIVE); + } else { + instance->my_token_seq = token->token_seq; + token->token_seq += 1; + + if (instance->memb_state == MEMB_STATE_RECOVERY) { + /* + * instance->my_aru == instance->my_high_seq_received means this processor + * has recovered all messages it can recover + * (ie: its retrans queue is empty) + */ + if (cs_queue_is_empty (&instance->retrans_message_queue) == 0) { + + if (token->retrans_flg == 0) { + token->retrans_flg = 1; + instance->my_set_retrans_flg = 1; + } + } else + if (token->retrans_flg == 1 && instance->my_set_retrans_flg) { + token->retrans_flg = 0; + instance->my_set_retrans_flg = 0; + } + log_printf (instance->totemsrp_log_level_debug, + "token retrans flag is %d my set retrans flag%d retrans queue empty %d count %d, aru %x", + token->retrans_flg, instance->my_set_retrans_flg, + cs_queue_is_empty (&instance->retrans_message_queue), + instance->my_retrans_flg_count, token->aru); + if (token->retrans_flg == 0) { + instance->my_retrans_flg_count += 1; + } else { + instance->my_retrans_flg_count = 0; + } + if (instance->my_retrans_flg_count == 2) { + instance->my_install_seq = token->seq; + } + log_printf (instance->totemsrp_log_level_debug, + "install seq %x aru %x high seq received %x", + instance->my_install_seq, instance->my_aru, instance->my_high_seq_received); + if (instance->my_retrans_flg_count >= 2 && + instance->my_received_flg == 0 && + sq_lte_compare (instance->my_install_seq, instance->my_aru)) { + instance->my_received_flg = 1; + instance->my_deliver_memb_entries = instance->my_trans_memb_entries; + memcpy (instance->my_deliver_memb_list, instance->my_trans_memb_list, + sizeof (struct totem_ip_address) * instance->my_trans_memb_entries); + } + if (instance->my_retrans_flg_count >= 3 && + sq_lte_compare (instance->my_install_seq, token->aru)) { + instance->my_rotation_counter += 1; + } else { + instance->my_rotation_counter = 0; + } + if (instance->my_rotation_counter == 2) { + log_printf (instance->totemsrp_log_level_debug, + "retrans flag count %x token aru %x install seq %x aru %x %x", + instance->my_retrans_flg_count, token->aru, instance->my_install_seq, + instance->my_aru, token->seq); + + memb_state_operational_enter (instance); + instance->my_rotation_counter = 0; + instance->my_retrans_flg_count = 0; + } + } + + totemnet_send_flush (instance->totemnet_context); + token_send (instance, token, forward_token); + +#ifdef GIVEINFO + tv_current = qb_util_nano_current_get (); + tv_diff = tv_current - tv_old; + tv_old = tv_current; + log_printf (instance->totemsrp_log_level_debug, + "I held %0.4f ms", + ((float)tv_diff) / 1000000.0); +#endif + if (instance->memb_state == MEMB_STATE_OPERATIONAL) { + messages_deliver_to_app (instance, 0, + instance->my_high_seq_received); + } + + /* + * Deliver messages after token has been transmitted + * to improve performance + */ + reset_token_timeout (instance); // REVIEWED + reset_token_retransmit_timeout (instance); // REVIEWED + if (instance->my_id.nodeid == instance->my_ring_id.rep && + instance->my_token_held == 1) { + + start_token_hold_retransmit_timeout (instance); + } + + token_callbacks_execute (instance, TOTEM_CALLBACK_TOKEN_SENT); + } + break; + } + + if ((forward_token) + && instance->use_heartbeat) { + reset_heartbeat_timeout(instance); + } + else { + cancel_heartbeat_timeout(instance); + } + + return (0); +} + +static void messages_deliver_to_app ( + struct totemsrp_instance *instance, + int skip, + unsigned int end_point) +{ + struct sort_queue_item *sort_queue_item_p; + unsigned int i; + int res; + struct mcast *mcast_in; + struct mcast mcast_header; + unsigned int range = 0; + int endian_conversion_required; + unsigned int my_high_delivered_stored = 0; + struct srp_addr aligned_system_from; + + range = end_point - instance->my_high_delivered; + + if (range) { + log_printf (instance->totemsrp_log_level_trace, + "Delivering %x to %x", instance->my_high_delivered, + end_point); + } + assert (range < QUEUE_RTR_ITEMS_SIZE_MAX); + my_high_delivered_stored = instance->my_high_delivered; + + /* + * Deliver messages in order from rtr queue to pending delivery queue + */ + for (i = 1; i <= range; i++) { + + void *ptr = 0; + + /* + * If out of range of sort queue, stop assembly + */ + res = sq_in_range (&instance->regular_sort_queue, + my_high_delivered_stored + i); + if (res == 0) { + break; + } + + res = sq_item_get (&instance->regular_sort_queue, + my_high_delivered_stored + i, &ptr); + /* + * If hole, stop assembly + */ + if (res != 0 && skip == 0) { + break; + } + + instance->my_high_delivered = my_high_delivered_stored + i; + + if (res != 0) { + continue; + + } + + sort_queue_item_p = ptr; + + mcast_in = sort_queue_item_p->mcast; + assert (mcast_in != (struct mcast *)0xdeadbeef); + + endian_conversion_required = 0; + if (mcast_in->header.magic != TOTEM_MH_MAGIC) { + endian_conversion_required = 1; + mcast_endian_convert (mcast_in, &mcast_header); + } else { + memcpy (&mcast_header, mcast_in, sizeof (struct mcast)); + } + + aligned_system_from = mcast_header.system_from; + + /* + * Skip messages not originated in instance->my_deliver_memb + */ + if (skip && + memb_set_subset (&aligned_system_from, + 1, + instance->my_deliver_memb_list, + instance->my_deliver_memb_entries) == 0) { + + instance->my_high_delivered = my_high_delivered_stored + i; + + continue; + } + + /* + * Message found + */ + log_printf (instance->totemsrp_log_level_trace, + "Delivering MCAST message with seq %x to pending delivery queue", + mcast_header.seq); + + /* + * Message is locally originated multicast + */ + instance->totemsrp_deliver_fn ( + mcast_header.header.nodeid, + ((char *)sort_queue_item_p->mcast) + sizeof (struct mcast), + sort_queue_item_p->msg_len - sizeof (struct mcast), + endian_conversion_required); + } +} + +/* + * recv message handler called when MCAST message type received + */ +static int message_handler_mcast ( + struct totemsrp_instance *instance, + const void *msg, + size_t msg_len, + int endian_conversion_needed) +{ + struct sort_queue_item sort_queue_item; + struct sq *sort_queue; + struct mcast mcast_header; + struct srp_addr aligned_system_from; + + if (check_mcast_sanity(instance, msg, msg_len, endian_conversion_needed) == -1) { + return (0); + } + + if (endian_conversion_needed) { + mcast_endian_convert (msg, &mcast_header); + } else { + memcpy (&mcast_header, msg, sizeof (struct mcast)); + } + + if (mcast_header.header.encapsulated == MESSAGE_ENCAPSULATED) { + sort_queue = &instance->recovery_sort_queue; + } else { + sort_queue = &instance->regular_sort_queue; + } + + assert (msg_len <= FRAME_SIZE_MAX); + +#ifdef TEST_DROP_MCAST_PERCENTAGE + if (random()%100 < TEST_DROP_MCAST_PERCENTAGE) { + return (0); + } +#endif + + /* + * If the message is foreign execute the switch below + */ + if (memcmp (&instance->my_ring_id, &mcast_header.ring_id, + sizeof (struct memb_ring_id)) != 0) { + + aligned_system_from = mcast_header.system_from; + + switch (instance->memb_state) { + case MEMB_STATE_OPERATIONAL: + memb_set_merge ( + &aligned_system_from, 1, + instance->my_proc_list, &instance->my_proc_list_entries); + memb_state_gather_enter (instance, TOTEMSRP_GSFROM_FOREIGN_MESSAGE_IN_OPERATIONAL_STATE); + break; + + case MEMB_STATE_GATHER: + if (!memb_set_subset ( + &aligned_system_from, + 1, + instance->my_proc_list, + instance->my_proc_list_entries)) { + + memb_set_merge (&aligned_system_from, 1, + instance->my_proc_list, &instance->my_proc_list_entries); + memb_state_gather_enter (instance, TOTEMSRP_GSFROM_FOREIGN_MESSAGE_IN_GATHER_STATE); + return (0); + } + break; + + case MEMB_STATE_COMMIT: + /* discard message */ + instance->stats.rx_msg_dropped++; + break; + + case MEMB_STATE_RECOVERY: + /* discard message */ + instance->stats.rx_msg_dropped++; + break; + } + return (0); + } + + log_printf (instance->totemsrp_log_level_trace, + "Received ringid (" CS_PRI_RING_ID ") seq %x", + mcast_header.ring_id.rep, + (uint64_t)mcast_header.ring_id.seq, + mcast_header.seq); + + /* + * Add mcast message to rtr queue if not already in rtr queue + * otherwise free io vectors + */ + if (msg_len > 0 && msg_len <= FRAME_SIZE_MAX && + sq_in_range (sort_queue, mcast_header.seq) && + sq_item_inuse (sort_queue, mcast_header.seq) == 0) { + + /* + * Allocate new multicast memory block + */ +// TODO LEAK + sort_queue_item.mcast = totemsrp_buffer_alloc (instance); + if (sort_queue_item.mcast == NULL) { + return (-1); /* error here is corrected by the algorithm */ + } + memcpy (sort_queue_item.mcast, msg, msg_len); + sort_queue_item.msg_len = msg_len; + + if (sq_lt_compare (instance->my_high_seq_received, + mcast_header.seq)) { + instance->my_high_seq_received = mcast_header.seq; + } + + sq_item_add (sort_queue, &sort_queue_item, mcast_header.seq); + } + + update_aru (instance); + if (instance->memb_state == MEMB_STATE_OPERATIONAL) { + messages_deliver_to_app (instance, 0, instance->my_high_seq_received); + } + +/* TODO remove from retrans message queue for old ring in recovery state */ + return (0); +} + +static int message_handler_memb_merge_detect ( + struct totemsrp_instance *instance, + const void *msg, + size_t msg_len, + int endian_conversion_needed) +{ + struct memb_merge_detect memb_merge_detect; + struct srp_addr aligned_system_from; + + if (check_memb_merge_detect_sanity(instance, msg, msg_len, endian_conversion_needed) == -1) { + return (0); + } + + if (endian_conversion_needed) { + memb_merge_detect_endian_convert (msg, &memb_merge_detect); + } else { + memcpy (&memb_merge_detect, msg, + sizeof (struct memb_merge_detect)); + } + + /* + * do nothing if this is a merge detect from this configuration + */ + if (memcmp (&instance->my_ring_id, &memb_merge_detect.ring_id, + sizeof (struct memb_ring_id)) == 0) { + + return (0); + } + + aligned_system_from = memb_merge_detect.system_from; + + /* + * Execute merge operation + */ + switch (instance->memb_state) { + case MEMB_STATE_OPERATIONAL: + memb_set_merge (&aligned_system_from, 1, + instance->my_proc_list, &instance->my_proc_list_entries); + memb_state_gather_enter (instance, TOTEMSRP_GSFROM_MERGE_DURING_OPERATIONAL_STATE); + break; + + case MEMB_STATE_GATHER: + if (!memb_set_subset ( + &aligned_system_from, + 1, + instance->my_proc_list, + instance->my_proc_list_entries)) { + + memb_set_merge (&aligned_system_from, 1, + instance->my_proc_list, &instance->my_proc_list_entries); + memb_state_gather_enter (instance, TOTEMSRP_GSFROM_MERGE_DURING_GATHER_STATE); + return (0); + } + break; + + case MEMB_STATE_COMMIT: + /* do nothing in commit */ + break; + + case MEMB_STATE_RECOVERY: + /* do nothing in recovery */ + break; + } + return (0); +} + +static void memb_join_process ( + struct totemsrp_instance *instance, + const struct memb_join *memb_join) +{ + struct srp_addr *proc_list; + struct srp_addr *failed_list; + int gather_entered = 0; + int fail_minus_memb_entries = 0; + struct srp_addr fail_minus_memb[PROCESSOR_COUNT_MAX]; + struct srp_addr aligned_system_from; + + proc_list = (struct srp_addr *)memb_join->end_of_memb_join; + failed_list = proc_list + memb_join->proc_list_entries; + aligned_system_from = memb_join->system_from; + + log_printf(instance->totemsrp_log_level_trace, "memb_join_process"); + memb_set_log(instance, instance->totemsrp_log_level_trace, + "proclist", proc_list, memb_join->proc_list_entries); + memb_set_log(instance, instance->totemsrp_log_level_trace, + "faillist", failed_list, memb_join->failed_list_entries); + memb_set_log(instance, instance->totemsrp_log_level_trace, + "my_proclist", instance->my_proc_list, instance->my_proc_list_entries); + memb_set_log(instance, instance->totemsrp_log_level_trace, + "my_faillist", instance->my_failed_list, instance->my_failed_list_entries); + + if (memb_join->header.type == MESSAGE_TYPE_MEMB_JOIN) { + if (instance->flushing) { + if (memb_join->header.nodeid == LEAVE_DUMMY_NODEID) { + log_printf (instance->totemsrp_log_level_warning, + "Discarding LEAVE message during flush, nodeid=" CS_PRI_NODE_ID, + memb_join->failed_list_entries > 0 ? failed_list[memb_join->failed_list_entries - 1 ].nodeid : LEAVE_DUMMY_NODEID); + if (memb_join->failed_list_entries > 0) { + my_leave_memb_set(instance, failed_list[memb_join->failed_list_entries - 1 ].nodeid); + } + } else { + log_printf (instance->totemsrp_log_level_warning, + "Discarding JOIN message during flush, nodeid=" CS_PRI_NODE_ID, memb_join->header.nodeid); + } + return; + } else { + if (memb_join->header.nodeid == LEAVE_DUMMY_NODEID) { + log_printf (instance->totemsrp_log_level_debug, + "Received LEAVE message from " CS_PRI_NODE_ID, memb_join->failed_list_entries > 0 ? failed_list[memb_join->failed_list_entries - 1 ].nodeid : LEAVE_DUMMY_NODEID); + if (memb_join->failed_list_entries > 0) { + my_leave_memb_set(instance, failed_list[memb_join->failed_list_entries - 1 ].nodeid); + } + } + } + + } + + if (memb_set_equal (proc_list, + memb_join->proc_list_entries, + instance->my_proc_list, + instance->my_proc_list_entries) && + + memb_set_equal (failed_list, + memb_join->failed_list_entries, + instance->my_failed_list, + instance->my_failed_list_entries)) { + + if (memb_join->header.nodeid != LEAVE_DUMMY_NODEID) { + memb_consensus_set (instance, &aligned_system_from); + } + + if (memb_consensus_agreed (instance) && instance->failed_to_recv == 1) { + instance->failed_to_recv = 0; + instance->my_proc_list[0] = instance->my_id; + instance->my_proc_list_entries = 1; + instance->my_failed_list_entries = 0; + + memb_state_commit_token_create (instance); + + memb_state_commit_enter (instance); + return; + } + if (memb_consensus_agreed (instance) && + memb_lowest_in_config (instance)) { + + memb_state_commit_token_create (instance); + + memb_state_commit_enter (instance); + } else { + goto out; + } + } else + if (memb_set_subset (proc_list, + memb_join->proc_list_entries, + instance->my_proc_list, + instance->my_proc_list_entries) && + + memb_set_subset (failed_list, + memb_join->failed_list_entries, + instance->my_failed_list, + instance->my_failed_list_entries)) { + + goto out; + } else + if (memb_set_subset (&aligned_system_from, 1, + instance->my_failed_list, instance->my_failed_list_entries)) { + + goto out; + } else { + memb_set_merge (proc_list, + memb_join->proc_list_entries, + instance->my_proc_list, &instance->my_proc_list_entries); + + if (memb_set_subset ( + &instance->my_id, 1, + failed_list, memb_join->failed_list_entries)) { + + memb_set_merge ( + &aligned_system_from, 1, + instance->my_failed_list, &instance->my_failed_list_entries); + } else { + if (memb_set_subset ( + &aligned_system_from, 1, + instance->my_memb_list, + instance->my_memb_entries)) { + + if (memb_set_subset ( + &aligned_system_from, 1, + instance->my_failed_list, + instance->my_failed_list_entries) == 0) { + + memb_set_merge (failed_list, + memb_join->failed_list_entries, + instance->my_failed_list, &instance->my_failed_list_entries); + } else { + memb_set_subtract (fail_minus_memb, + &fail_minus_memb_entries, + failed_list, + memb_join->failed_list_entries, + instance->my_memb_list, + instance->my_memb_entries); + + memb_set_merge (fail_minus_memb, + fail_minus_memb_entries, + instance->my_failed_list, + &instance->my_failed_list_entries); + } + } + } + memb_state_gather_enter (instance, TOTEMSRP_GSFROM_MERGE_DURING_JOIN); + gather_entered = 1; + } + +out: + if (gather_entered == 0 && + instance->memb_state == MEMB_STATE_OPERATIONAL) { + + memb_state_gather_enter (instance, TOTEMSRP_GSFROM_JOIN_DURING_OPERATIONAL_STATE); + } +} + +static void memb_join_endian_convert (const struct memb_join *in, struct memb_join *out) +{ + int i; + struct srp_addr *in_proc_list; + struct srp_addr *in_failed_list; + struct srp_addr *out_proc_list; + struct srp_addr *out_failed_list; + + out->header.magic = TOTEM_MH_MAGIC; + out->header.version = TOTEM_MH_VERSION; + out->header.type = in->header.type; + out->header.nodeid = swab32 (in->header.nodeid); + out->system_from = srp_addr_endian_convert(in->system_from); + out->proc_list_entries = swab32 (in->proc_list_entries); + out->failed_list_entries = swab32 (in->failed_list_entries); + out->ring_seq = swab64 (in->ring_seq); + + in_proc_list = (struct srp_addr *)in->end_of_memb_join; + in_failed_list = in_proc_list + out->proc_list_entries; + out_proc_list = (struct srp_addr *)out->end_of_memb_join; + out_failed_list = out_proc_list + out->proc_list_entries; + + for (i = 0; i < out->proc_list_entries; i++) { + out_proc_list[i] = srp_addr_endian_convert (in_proc_list[i]); + } + for (i = 0; i < out->failed_list_entries; i++) { + out_failed_list[i] = srp_addr_endian_convert (in_failed_list[i]); + } +} + +static void memb_commit_token_endian_convert (const struct memb_commit_token *in, struct memb_commit_token *out) +{ + int i; + struct srp_addr *in_addr = (struct srp_addr *)in->end_of_commit_token; + struct srp_addr *out_addr = (struct srp_addr *)out->end_of_commit_token; + struct memb_commit_token_memb_entry *in_memb_list; + struct memb_commit_token_memb_entry *out_memb_list; + + out->header.magic = TOTEM_MH_MAGIC; + out->header.version = TOTEM_MH_VERSION; + out->header.type = in->header.type; + out->header.nodeid = swab32 (in->header.nodeid); + out->token_seq = swab32 (in->token_seq); + out->ring_id.rep = swab32(in->ring_id.rep); + out->ring_id.seq = swab64 (in->ring_id.seq); + out->retrans_flg = swab32 (in->retrans_flg); + out->memb_index = swab32 (in->memb_index); + out->addr_entries = swab32 (in->addr_entries); + + in_memb_list = (struct memb_commit_token_memb_entry *)(in_addr + out->addr_entries); + out_memb_list = (struct memb_commit_token_memb_entry *)(out_addr + out->addr_entries); + for (i = 0; i < out->addr_entries; i++) { + out_addr[i] = srp_addr_endian_convert (in_addr[i]); + + /* + * Only convert the memb entry if it has been set + */ + if (in_memb_list[i].ring_id.rep != 0) { + out_memb_list[i].ring_id.rep = swab32(in_memb_list[i].ring_id.rep); + + out_memb_list[i].ring_id.seq = + swab64 (in_memb_list[i].ring_id.seq); + out_memb_list[i].aru = swab32 (in_memb_list[i].aru); + out_memb_list[i].high_delivered = swab32 (in_memb_list[i].high_delivered); + out_memb_list[i].received_flg = swab32 (in_memb_list[i].received_flg); + } + } +} + +static void orf_token_endian_convert (const struct orf_token *in, struct orf_token *out) +{ + int i; + + out->header.magic = TOTEM_MH_MAGIC; + out->header.version = TOTEM_MH_VERSION; + out->header.type = in->header.type; + out->header.nodeid = swab32 (in->header.nodeid); + out->seq = swab32 (in->seq); + out->token_seq = swab32 (in->token_seq); + out->aru = swab32 (in->aru); + out->ring_id.rep = swab32(in->ring_id.rep); + out->aru_addr = swab32(in->aru_addr); + out->ring_id.seq = swab64 (in->ring_id.seq); + out->fcc = swab32 (in->fcc); + out->backlog = swab32 (in->backlog); + out->retrans_flg = swab32 (in->retrans_flg); + out->rtr_list_entries = swab32 (in->rtr_list_entries); + for (i = 0; i < out->rtr_list_entries; i++) { + out->rtr_list[i].ring_id.rep = swab32(in->rtr_list[i].ring_id.rep); + out->rtr_list[i].ring_id.seq = swab64 (in->rtr_list[i].ring_id.seq); + out->rtr_list[i].seq = swab32 (in->rtr_list[i].seq); + } +} + +static void mcast_endian_convert (const struct mcast *in, struct mcast *out) +{ + out->header.magic = TOTEM_MH_MAGIC; + out->header.version = TOTEM_MH_VERSION; + out->header.type = in->header.type; + out->header.nodeid = swab32 (in->header.nodeid); + out->header.encapsulated = in->header.encapsulated; + + out->seq = swab32 (in->seq); + out->this_seqno = swab32 (in->this_seqno); + out->ring_id.rep = swab32(in->ring_id.rep); + out->ring_id.seq = swab64 (in->ring_id.seq); + out->node_id = swab32 (in->node_id); + out->guarantee = swab32 (in->guarantee); + out->system_from = srp_addr_endian_convert(in->system_from); +} + +static void memb_merge_detect_endian_convert ( + const struct memb_merge_detect *in, + struct memb_merge_detect *out) +{ + out->header.magic = TOTEM_MH_MAGIC; + out->header.version = TOTEM_MH_VERSION; + out->header.type = in->header.type; + out->header.nodeid = swab32 (in->header.nodeid); + out->ring_id.rep = swab32(in->ring_id.rep); + out->ring_id.seq = swab64 (in->ring_id.seq); + out->system_from = srp_addr_endian_convert (in->system_from); +} + +static int ignore_join_under_operational ( + struct totemsrp_instance *instance, + const struct memb_join *memb_join) +{ + struct srp_addr *proc_list; + struct srp_addr *failed_list; + unsigned long long ring_seq; + struct srp_addr aligned_system_from; + + proc_list = (struct srp_addr *)memb_join->end_of_memb_join; + failed_list = proc_list + memb_join->proc_list_entries; + ring_seq = memb_join->ring_seq; + aligned_system_from = memb_join->system_from; + + if (memb_set_subset (&instance->my_id, 1, + failed_list, memb_join->failed_list_entries)) { + return (1); + } + + /* + * In operational state, my_proc_list is exactly the same as + * my_memb_list. + */ + if ((memb_set_subset (&aligned_system_from, 1, + instance->my_memb_list, instance->my_memb_entries)) && + (ring_seq < instance->my_ring_id.seq)) { + return (1); + } + + return (0); +} + +static int message_handler_memb_join ( + struct totemsrp_instance *instance, + const void *msg, + size_t msg_len, + int endian_conversion_needed) +{ + const struct memb_join *memb_join; + struct memb_join *memb_join_convert = alloca (msg_len); + struct srp_addr aligned_system_from; + + if (check_memb_join_sanity(instance, msg, msg_len, endian_conversion_needed) == -1) { + return (0); + } + + if (endian_conversion_needed) { + memb_join = memb_join_convert; + memb_join_endian_convert (msg, memb_join_convert); + + } else { + memb_join = msg; + } + + aligned_system_from = memb_join->system_from; + + /* + * If the process paused because it wasn't scheduled in a timely + * fashion, flush the join messages because they may be queued + * entries + */ + if (pause_flush (instance)) { + return (0); + } + + if (instance->token_ring_id_seq < memb_join->ring_seq) { + instance->token_ring_id_seq = memb_join->ring_seq; + } + switch (instance->memb_state) { + case MEMB_STATE_OPERATIONAL: + if (!ignore_join_under_operational (instance, memb_join)) { + memb_join_process (instance, memb_join); + } + break; + + case MEMB_STATE_GATHER: + memb_join_process (instance, memb_join); + break; + + case MEMB_STATE_COMMIT: + if (memb_set_subset (&aligned_system_from, + 1, + instance->my_new_memb_list, + instance->my_new_memb_entries) && + + memb_join->ring_seq >= instance->my_ring_id.seq) { + + memb_join_process (instance, memb_join); + memb_state_gather_enter (instance, TOTEMSRP_GSFROM_JOIN_DURING_COMMIT_STATE); + } + break; + + case MEMB_STATE_RECOVERY: + if (memb_set_subset (&aligned_system_from, + 1, + instance->my_new_memb_list, + instance->my_new_memb_entries) && + + memb_join->ring_seq >= instance->my_ring_id.seq) { + + memb_join_process (instance, memb_join); + memb_recovery_state_token_loss (instance); + memb_state_gather_enter (instance, TOTEMSRP_GSFROM_JOIN_DURING_RECOVERY); + } + break; + } + return (0); +} + +static int message_handler_memb_commit_token ( + struct totemsrp_instance *instance, + const void *msg, + size_t msg_len, + int endian_conversion_needed) +{ + struct memb_commit_token *memb_commit_token_convert = alloca (msg_len); + struct memb_commit_token *memb_commit_token; + struct srp_addr sub[PROCESSOR_COUNT_MAX]; + int sub_entries; + + struct srp_addr *addr; + + log_printf (instance->totemsrp_log_level_debug, + "got commit token"); + + if (check_memb_commit_token_sanity(instance, msg, msg_len, endian_conversion_needed) == -1) { + return (0); + } + + if (endian_conversion_needed) { + memb_commit_token_endian_convert (msg, memb_commit_token_convert); + } else { + memcpy (memb_commit_token_convert, msg, msg_len); + } + memb_commit_token = memb_commit_token_convert; + addr = (struct srp_addr *)memb_commit_token->end_of_commit_token; + +#ifdef TEST_DROP_COMMIT_TOKEN_PERCENTAGE + if (random()%100 < TEST_DROP_COMMIT_TOKEN_PERCENTAGE) { + return (0); + } +#endif + switch (instance->memb_state) { + case MEMB_STATE_OPERATIONAL: + /* discard token */ + break; + + case MEMB_STATE_GATHER: + memb_set_subtract (sub, &sub_entries, + instance->my_proc_list, instance->my_proc_list_entries, + instance->my_failed_list, instance->my_failed_list_entries); + + if (memb_set_equal (addr, + memb_commit_token->addr_entries, + sub, + sub_entries) && + + memb_commit_token->ring_id.seq > instance->my_ring_id.seq) { + memcpy (instance->commit_token, memb_commit_token, msg_len); + memb_state_commit_enter (instance); + } + break; + + case MEMB_STATE_COMMIT: + /* + * If retransmitted commit tokens are sent on this ring + * filter them out and only enter recovery once the + * commit token has traversed the array. This is + * determined by : + * memb_commit_token->memb_index == memb_commit_token->addr_entries) { + */ + if (memb_commit_token->ring_id.seq == instance->my_ring_id.seq && + memb_commit_token->memb_index == memb_commit_token->addr_entries) { + memb_state_recovery_enter (instance, memb_commit_token); + } + break; + + case MEMB_STATE_RECOVERY: + if (instance->my_id.nodeid == instance->my_ring_id.rep) { + + /* Filter out duplicated tokens */ + if (instance->originated_orf_token) { + break; + } + + instance->originated_orf_token = 1; + + log_printf (instance->totemsrp_log_level_debug, + "Sending initial ORF token"); + + // TODO convert instead of initiate + orf_token_send_initial (instance); + reset_token_timeout (instance); // REVIEWED + reset_token_retransmit_timeout (instance); // REVIEWED + } + break; + } + return (0); +} + +static int message_handler_token_hold_cancel ( + struct totemsrp_instance *instance, + const void *msg, + size_t msg_len, + int endian_conversion_needed) +{ + const struct token_hold_cancel *token_hold_cancel = msg; + + if (check_token_hold_cancel_sanity(instance, msg, msg_len, endian_conversion_needed) == -1) { + return (0); + } + + if (memcmp (&token_hold_cancel->ring_id, &instance->my_ring_id, + sizeof (struct memb_ring_id)) == 0) { + + instance->my_seq_unchanged = 0; + if (instance->my_ring_id.rep == instance->my_id.nodeid) { + timer_function_token_retransmit_timeout (instance); + } + } + return (0); +} + +static int check_message_header_validity( + void *context, + const void *msg, + unsigned int msg_len, + const struct sockaddr_storage *system_from) +{ + struct totemsrp_instance *instance = context; + const struct totem_message_header *message_header = msg; + const char *guessed_str; + const char *msg_byte = msg; + + if (msg_len < sizeof (struct totem_message_header)) { + log_printf (instance->totemsrp_log_level_security, + "Message received from %s is too short... Ignoring %u.", + totemip_sa_print((struct sockaddr *)system_from), (unsigned int)msg_len); + return (-1); + } + + if (message_header->magic != TOTEM_MH_MAGIC && + message_header->magic != swab16(TOTEM_MH_MAGIC)) { + /* + * We've received ether Knet, old version of Corosync, + * or something else. Do some guessing to display (hopefully) + * helpful message + */ + guessed_str = NULL; + + if (message_header->magic == 0xFFFF) { + /* + * Corosync 2.2 used header with two UINT8_MAX + */ + guessed_str = "Corosync 2.2"; + } else if (message_header->magic == 0xFEFE) { + /* + * Corosync 2.3+ used header with two UINT8_MAX - 1 + */ + guessed_str = "Corosync 2.3+"; + } else if (msg_byte[0] == 0x01) { + /* + * Knet has stable1 with first byte of message == 1 + */ + guessed_str = "unencrypted Kronosnet"; + } else if (msg_byte[0] >= 0 && msg_byte[0] <= 5) { + /* + * Unencrypted Corosync 1.x/OpenAIS has first byte + * 0-5. Collision with Knet (but still worth the try) + */ + guessed_str = "unencrypted Corosync 2.0/2.1/1.x/OpenAIS"; + } else { + /* + * Encrypted Kronosned packet has a hash at the end of + * the packet and nothing specific at the beginning of the + * packet (just encrypted data). + * Encrypted Corosync 1.x/OpenAIS is quite similar but hash_digest + * is in the beginning of the packet. + * + * So it's not possible to reliably detect ether of them. + */ + guessed_str = "encrypted Kronosnet/Corosync 2.0/2.1/1.x/OpenAIS or unknown"; + } + + log_printf(instance->totemsrp_log_level_security, + "Message received from %s has bad magic number (probably sent by %s).. Ignoring", + totemip_sa_print((struct sockaddr *)system_from), + guessed_str); + + return (-1); + } + + if (message_header->version != TOTEM_MH_VERSION) { + log_printf(instance->totemsrp_log_level_security, + "Message received from %s has unsupported version %u... Ignoring", + totemip_sa_print((struct sockaddr *)system_from), + message_header->version); + + return (-1); + } + + return (0); +} + + +int main_deliver_fn ( + void *context, + const void *msg, + unsigned int msg_len, + const struct sockaddr_storage *system_from) +{ + struct totemsrp_instance *instance = context; + const struct totem_message_header *message_header = msg; + + if (check_message_header_validity(context, msg, msg_len, system_from) == -1) { + return -1; + } + + switch (message_header->type) { + case MESSAGE_TYPE_ORF_TOKEN: + instance->stats.orf_token_rx++; + break; + case MESSAGE_TYPE_MCAST: + instance->stats.mcast_rx++; + break; + case MESSAGE_TYPE_MEMB_MERGE_DETECT: + instance->stats.memb_merge_detect_rx++; + break; + case MESSAGE_TYPE_MEMB_JOIN: + instance->stats.memb_join_rx++; + break; + case MESSAGE_TYPE_MEMB_COMMIT_TOKEN: + instance->stats.memb_commit_token_rx++; + break; + case MESSAGE_TYPE_TOKEN_HOLD_CANCEL: + instance->stats.token_hold_cancel_rx++; + break; + default: + log_printf (instance->totemsrp_log_level_security, + "Message received from %s has wrong type... ignoring %d.\n", + totemip_sa_print((struct sockaddr *)system_from), + (int)message_header->type); + + instance->stats.rx_msg_dropped++; + return 0; + } + /* + * Handle incoming message + */ + return totemsrp_message_handlers.handler_functions[(int)message_header->type] ( + instance, + msg, + msg_len, + message_header->magic != TOTEM_MH_MAGIC); +} + +int totemsrp_iface_set ( + void *context, + const struct totem_ip_address *interface_addr, + unsigned short ip_port, + unsigned int iface_no) +{ + struct totemsrp_instance *instance = context; + int res; + + totemip_copy(&instance->my_addrs[iface_no], interface_addr); + + res = totemnet_iface_set ( + instance->totemnet_context, + interface_addr, + ip_port, + iface_no); + + return (res); +} + +/* Contrary to its name, this only gets called when the interface is enabled */ +int main_iface_change_fn ( + void *context, + const struct totem_ip_address *iface_addr, + unsigned int iface_no) +{ + struct totemsrp_instance *instance = context; + int num_interfaces; + int i; + int res = 0; + + if (!instance->my_id.nodeid) { + instance->my_id.nodeid = iface_addr->nodeid; + } + totemip_copy (&instance->my_addrs[iface_no], iface_addr); + + if (instance->iface_changes++ == 0) { + instance->memb_ring_id_create_or_load (&instance->my_ring_id, instance->my_id.nodeid); + /* + * Increase the ring_id sequence number. This doesn't follow specification. + * Solves problem with restarted leader node (node with lowest nodeid) before + * rest of the cluster forms new membership and guarantees unique ring_id for + * new singleton configuration. + */ + instance->my_ring_id.seq++; + + instance->token_ring_id_seq = instance->my_ring_id.seq; + log_printf ( + instance->totemsrp_log_level_debug, + "Created or loaded sequence id " CS_PRI_RING_ID " for this ring.", + instance->my_ring_id.rep, + (uint64_t)instance->my_ring_id.seq); + + if (instance->totemsrp_service_ready_fn) { + instance->totemsrp_service_ready_fn (); + } + + } + + num_interfaces = 0; + for (i = 0; i < INTERFACE_MAX; i++) { + if (instance->totem_config->interfaces[i].configured) { + num_interfaces++; + } + } + + if (instance->iface_changes >= num_interfaces) { + /* We need to clear orig_interfaces so that 'commit' diffs against nothing */ + instance->totem_config->orig_interfaces = malloc (sizeof (struct totem_interface) * INTERFACE_MAX); + assert(instance->totem_config->orig_interfaces != NULL); + memset(instance->totem_config->orig_interfaces, 0, sizeof (struct totem_interface) * INTERFACE_MAX); + + res = totemconfig_commit_new_params(instance->totem_config, icmap_get_global_map()); + + memb_state_gather_enter (instance, TOTEMSRP_GSFROM_INTERFACE_CHANGE); + free(instance->totem_config->orig_interfaces); + } + return res; +} + +void totemsrp_net_mtu_adjust (struct totem_config *totem_config) { + totem_config->net_mtu -= 2 * sizeof (struct mcast); +} + +void totemsrp_service_ready_register ( + void *context, + void (*totem_service_ready) (void)) +{ + struct totemsrp_instance *instance = (struct totemsrp_instance *)context; + + instance->totemsrp_service_ready_fn = totem_service_ready; +} + +int totemsrp_member_add ( + void *context, + const struct totem_ip_address *member, + int iface_no) +{ + struct totemsrp_instance *instance = (struct totemsrp_instance *)context; + int res; + + res = totemnet_member_add (instance->totemnet_context, &instance->my_addrs[iface_no], member, iface_no); + + return (res); +} + +int totemsrp_member_remove ( + void *context, + const struct totem_ip_address *member, + int iface_no) +{ + struct totemsrp_instance *instance = (struct totemsrp_instance *)context; + int res; + + res = totemnet_member_remove (instance->totemnet_context, member, iface_no); + + return (res); +} + +void totemsrp_threaded_mode_enable (void *context) +{ + struct totemsrp_instance *instance = (struct totemsrp_instance *)context; + + instance->threaded_mode_enabled = 1; +} + +void totemsrp_trans_ack (void *context) +{ + struct totemsrp_instance *instance = (struct totemsrp_instance *)context; + + instance->waiting_trans_ack = 0; + instance->totemsrp_waiting_trans_ack_cb_fn (0); +} + + +int totemsrp_reconfigure (void *context, struct totem_config *totem_config) +{ + struct totemsrp_instance *instance = (struct totemsrp_instance *)context; + int res; + + res = totemnet_reconfigure (instance->totemnet_context, totem_config); + return (res); +} + +int totemsrp_crypto_reconfigure_phase (void *context, struct totem_config *totem_config, cfg_message_crypto_reconfig_phase_t phase) +{ + struct totemsrp_instance *instance = (struct totemsrp_instance *)context; + int res; + + res = totemnet_crypto_reconfigure_phase (instance->totemnet_context, totem_config, phase); + return (res); +} + +void totemsrp_stats_clear (void *context, int flags) +{ + struct totemsrp_instance *instance = (struct totemsrp_instance *)context; + + memset(&instance->stats, 0, sizeof(totemsrp_stats_t)); + if (flags & TOTEMPG_STATS_CLEAR_TRANSPORT) { + totemnet_stats_clear (instance->totemnet_context); + } +} + +void totemsrp_force_gather (void *context) +{ + timer_function_orf_token_timeout(context); +} diff --git a/exec/totemsrp.h b/exec/totemsrp.h new file mode 100644 index 0000000..49e0095 --- /dev/null +++ b/exec/totemsrp.h @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2003-2005 MontaVista Software, Inc. + * Copyright (c) 2006-2011 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Totem Single Ring Protocol + * + * depends on poll abstraction, POSIX, IPV4 + */ + +#ifndef TOTEMSRP_H_DEFINED +#define TOTEMSRP_H_DEFINED + +#include <corosync/totem/totem.h> +#include <qb/qbloop.h> + +/** + * Create a protocol instance + */ +int totemsrp_initialize ( + qb_loop_t *poll_handle, + void **srp_context, + struct totem_config *totem_config, + totempg_stats_t *stats, + + void (*deliver_fn) ( + unsigned int nodeid, + const void *msg, + unsigned int msg_len, + int endian_conversion_required), + void (*confchg_fn) ( + enum totem_configuration_type configuration_type, + const unsigned int *member_list, size_t member_list_entries, + const unsigned int *left_list, size_t left_list_entries, + const unsigned int *joined_list, size_t joined_list_entries, + const struct memb_ring_id *ring_id), + void (*waiting_trans_ack_cb_fn) ( + int waiting_trans_ack)); + +void totemsrp_finalize (void *srp_context); + +/** + * Multicast a message + */ +int totemsrp_mcast ( + void *srp_context, + struct iovec *iovec, + unsigned int iov_len, + int priority); + +/** + * Return number of available messages that can be queued + */ +int totemsrp_avail (void *srp_context); + +int totemsrp_callback_token_create ( + void *srp_context, + void **handle_out, + enum totem_callback_token_type type, + int delete, + int (*callback_fn) (enum totem_callback_token_type type, const void *), + const void *data); + +void totemsrp_callback_token_destroy ( + void *srp_context, + void **handle_out); + +void totemsrp_event_signal (void *srp_context, enum totem_event_type type, int value); + +extern void totemsrp_net_mtu_adjust (struct totem_config *totem_config); + +extern int totemsrp_nodestatus_get (void *srp_context, unsigned int nodeid, + struct totem_node_status *node_status); + +extern int totemsrp_ifaces_get ( + void *srp_context, + unsigned int nodeid, + unsigned int *interface_id, + struct totem_ip_address *interfaces, + unsigned int interfaces_size, + char ***status, + unsigned int *iface_count); + +extern unsigned int totemsrp_my_nodeid_get ( + void *srp_context); + +extern int totemsrp_my_family_get ( + void *srp_context); + +extern int totemsrp_crypto_set ( + void *srp_context, + const char *cipher_type, + const char *hash_type); + +void totemsrp_service_ready_register ( + void *srp_context, + void (*totem_service_ready) (void)); + +extern int totemsrp_iface_set ( + void *srp_context, + const struct totem_ip_address *interface_addr, + unsigned short ip_port, + unsigned int iface_no); + +extern int totemsrp_member_add ( + void *srp_context, + const struct totem_ip_address *member, + int ring_no); + +extern int totemsrp_member_remove ( + void *srp_context, + const struct totem_ip_address *member, + int ring_no); + +void totemsrp_threaded_mode_enable ( + void *srp_context); + +void totemsrp_trans_ack ( + void *srp_context); + +int totemsrp_reconfigure ( + void *context, + struct totem_config *totem_config); + +int totemsrp_crypto_reconfigure_phase ( + void *context, + struct totem_config *totem_config, + cfg_message_crypto_reconfig_phase_t phase); + +void totemsrp_stats_clear ( + void *srp_context, int flags); + +void totemsrp_force_gather ( + void *context); + +#endif /* TOTEMSRP_H_DEFINED */ diff --git a/exec/totemudp.c b/exec/totemudp.c new file mode 100644 index 0000000..0ebe127 --- /dev/null +++ b/exec/totemudp.c @@ -0,0 +1,1549 @@ +/* + * Copyright (c) 2005 MontaVista Software, Inc. + * Copyright (c) 2006-2018 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <config.h> + +#include <assert.h> +#include <pthread.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <netdb.h> +#include <sys/un.h> +#include <sys/ioctl.h> +#include <sys/param.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <unistd.h> +#include <fcntl.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <sched.h> +#include <time.h> +#include <sys/time.h> +#include <sys/poll.h> +#include <sys/uio.h> +#include <limits.h> + +#include <corosync/sq.h> +#include <corosync/swab.h> +#include <qb/qbdefs.h> +#include <qb/qbloop.h> +#define LOGSYS_UTILS_ONLY 1 +#include <corosync/logsys.h> +#include "totemudp.h" + +#include "util.h" + +#ifndef MSG_NOSIGNAL +#define MSG_NOSIGNAL 0 +#endif + +#define MCAST_SOCKET_BUFFER_SIZE (TRANSMITS_ALLOWED * FRAME_SIZE_MAX) +#define NETIF_STATE_REPORT_UP 1 +#define NETIF_STATE_REPORT_DOWN 2 + +#define BIND_STATE_UNBOUND 0 +#define BIND_STATE_REGULAR 1 +#define BIND_STATE_LOOPBACK 2 + +struct totemudp_member { + struct qb_list_head list; + struct totem_ip_address member; +}; + +struct totemudp_socket { + int mcast_recv; + int mcast_send; + int token; + /* + * Socket used for local multicast delivery. We don't rely on multicast + * loop and rather this UNIX DGRAM socket is used. Socket is created by + * socketpair call and they are used in same way as pipe (so [0] is read + * end and [1] is write end) + */ + int local_mcast_loop[2]; +}; + +struct totemudp_instance { + qb_loop_t *totemudp_poll_handle; + + struct totem_interface *totem_interface; + + int netif_state_report; + + int netif_bind_state; + + void *context; + + int (*totemudp_deliver_fn) ( + void *context, + const void *msg, + unsigned int msg_len, + const struct sockaddr_storage *system_from); + + int (*totemudp_iface_change_fn) ( + void *context, + const struct totem_ip_address *iface_address, + unsigned int ring_no); + + void (*totemudp_target_set_completed) (void *context); + + /* + * Function and data used to log messages + */ + int totemudp_log_level_security; + + int totemudp_log_level_error; + + int totemudp_log_level_warning; + + int totemudp_log_level_notice; + + int totemudp_log_level_debug; + + int totemudp_subsys_id; + + void (*totemudp_log_printf) ( + int level, + int subsys, + const char *function, + const char *file, + int line, + const char *format, + ...)__attribute__((format(printf, 6, 7))); + + void *udp_context; + + struct qb_list_head member_list; + + char iov_buffer[UDP_RECEIVE_FRAME_SIZE_MAX]; + + char iov_buffer_flush[UDP_RECEIVE_FRAME_SIZE_MAX]; + + struct iovec totemudp_iov_recv; + + struct iovec totemudp_iov_recv_flush; + + struct totemudp_socket totemudp_sockets; + + struct totem_ip_address mcast_address; + + int stats_sent; + + int stats_recv; + + int stats_delv; + + int stats_remcasts; + + int stats_orf_token; + + struct timeval stats_tv_start; + + struct totem_ip_address my_id; + + int firstrun; + + qb_loop_timer_handle timer_netif_check_timeout; + + unsigned int my_memb_entries; + + int flushing; + + struct totem_config *totem_config; + + totemsrp_stats_t *stats; + + struct totem_ip_address token_target; +}; + +struct work_item { + const void *msg; + unsigned int msg_len; + struct totemudp_instance *instance; +}; + +static int totemudp_build_sockets ( + struct totemudp_instance *instance, + struct totem_ip_address *bindnet_address, + struct totem_ip_address *mcastaddress, + struct totemudp_socket *sockets, + struct totem_ip_address *bound_to); + +static struct totem_ip_address localhost; + +static void totemudp_instance_initialize (struct totemudp_instance *instance) +{ + memset (instance, 0, sizeof (struct totemudp_instance)); + + instance->netif_state_report = NETIF_STATE_REPORT_UP | NETIF_STATE_REPORT_DOWN; + + instance->totemudp_iov_recv.iov_base = instance->iov_buffer; + + instance->totemudp_iov_recv.iov_len = UDP_RECEIVE_FRAME_SIZE_MAX; //sizeof (instance->iov_buffer); + instance->totemudp_iov_recv_flush.iov_base = instance->iov_buffer_flush; + + instance->totemudp_iov_recv_flush.iov_len = UDP_RECEIVE_FRAME_SIZE_MAX; //sizeof (instance->iov_buffer); + + /* + * There is always atleast 1 processor + */ + instance->my_memb_entries = 1; + + qb_list_init (&instance->member_list); +} + +#define log_printf(level, format, args...) \ +do { \ + instance->totemudp_log_printf ( \ + level, instance->totemudp_subsys_id, \ + __FUNCTION__, __FILE__, __LINE__, \ + (const char *)format, ##args); \ +} while (0); + +#define LOGSYS_PERROR(err_num, level, fmt, args...) \ +do { \ + char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \ + const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \ + instance->totemudp_log_printf ( \ + level, instance->totemudp_subsys_id, \ + __FUNCTION__, __FILE__, __LINE__, \ + fmt ": %s (%d)\n", ##args, _error_ptr, err_num); \ + } while(0) + +int totemudp_crypto_set ( + void *udp_context, + const char *cipher_type, + const char *hash_type) +{ + + return (0); +} + + +static inline void ucast_sendmsg ( + struct totemudp_instance *instance, + struct totem_ip_address *system_to, + const void *msg, + unsigned int msg_len) +{ + struct msghdr msg_ucast; + int res = 0; + struct sockaddr_storage sockaddr; + struct iovec iovec; + int addrlen; + + iovec.iov_base = (void*)msg; + iovec.iov_len = msg_len; + + /* + * Build unicast message + */ + memset(&msg_ucast, 0, sizeof(msg_ucast)); + totemip_totemip_to_sockaddr_convert(system_to, + instance->totem_interface->ip_port, &sockaddr, &addrlen); + msg_ucast.msg_name = &sockaddr; + msg_ucast.msg_namelen = addrlen; + msg_ucast.msg_iov = (void *)&iovec; + msg_ucast.msg_iovlen = 1; +#ifdef HAVE_MSGHDR_CONTROL + msg_ucast.msg_control = 0; +#endif +#ifdef HAVE_MSGHDR_CONTROLLEN + msg_ucast.msg_controllen = 0; +#endif +#ifdef HAVE_MSGHDR_FLAGS + msg_ucast.msg_flags = 0; +#endif +#ifdef HAVE_MSGHDR_ACCRIGHTS + msg_ucast.msg_accrights = NULL; +#endif +#ifdef HAVE_MSGHDR_ACCRIGHTSLEN + msg_ucast.msg_accrightslen = 0; +#endif + + + /* + * Transmit unicast message + * An error here is recovered by totemsrp + */ + res = sendmsg (instance->totemudp_sockets.mcast_send, &msg_ucast, + MSG_NOSIGNAL); + if (res < 0) { + LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, + "sendmsg(ucast) failed (non-critical)"); + } +} + +static inline void mcast_sendmsg ( + struct totemudp_instance *instance, + const void *msg, + unsigned int msg_len) +{ + struct msghdr msg_mcast; + int res = 0; + struct iovec iovec; + struct sockaddr_storage sockaddr; + int addrlen; + + iovec.iov_base = (void *)msg; + iovec.iov_len = msg_len; + + /* + * Build multicast message + */ + totemip_totemip_to_sockaddr_convert(&instance->mcast_address, + instance->totem_interface->ip_port, &sockaddr, &addrlen); + memset(&msg_mcast, 0, sizeof(msg_mcast)); + msg_mcast.msg_name = &sockaddr; + msg_mcast.msg_namelen = addrlen; + msg_mcast.msg_iov = (void *)&iovec; + msg_mcast.msg_iovlen = 1; +#ifdef HAVE_MSGHDR_CONTROL + msg_mcast.msg_control = 0; +#endif +#ifdef HAVE_MSGHDR_CONTROLLEN + msg_mcast.msg_controllen = 0; +#endif +#ifdef HAVE_MSGHDR_FLAGS + msg_mcast.msg_flags = 0; +#endif +#ifdef HAVE_MSGHDR_ACCRIGHTS + msg_mcast.msg_accrights = NULL; +#endif +#ifdef HAVE_MSGHDR_ACCRIGHTSLEN + msg_mcast.msg_accrightslen = 0; +#endif + + /* + * Transmit multicast message + * An error here is recovered by totemsrp + */ + res = sendmsg (instance->totemudp_sockets.mcast_send, &msg_mcast, + MSG_NOSIGNAL); + if (res < 0) { + LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, + "sendmsg(mcast) failed (non-critical)"); + instance->stats->continuous_sendmsg_failures++; + } else { + instance->stats->continuous_sendmsg_failures = 0; + } + + /* + * Transmit multicast message to local unix mcast loop + * An error here is recovered by totemsrp + */ + msg_mcast.msg_name = NULL; + msg_mcast.msg_namelen = 0; + + res = sendmsg (instance->totemudp_sockets.local_mcast_loop[1], &msg_mcast, + MSG_NOSIGNAL); + if (res < 0) { + LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, + "sendmsg(local mcast loop) failed (non-critical)"); + } +} + + +int totemudp_finalize ( + void *udp_context) +{ + struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; + int res = 0; + + if (instance->totemudp_sockets.mcast_recv > 0) { + qb_loop_poll_del (instance->totemudp_poll_handle, + instance->totemudp_sockets.mcast_recv); + close (instance->totemudp_sockets.mcast_recv); + } + if (instance->totemudp_sockets.mcast_send > 0) { + close (instance->totemudp_sockets.mcast_send); + } + if (instance->totemudp_sockets.local_mcast_loop[0] > 0) { + qb_loop_poll_del (instance->totemudp_poll_handle, + instance->totemudp_sockets.local_mcast_loop[0]); + close (instance->totemudp_sockets.local_mcast_loop[0]); + close (instance->totemudp_sockets.local_mcast_loop[1]); + } + if (instance->totemudp_sockets.token > 0) { + qb_loop_poll_del (instance->totemudp_poll_handle, + instance->totemudp_sockets.token); + close (instance->totemudp_sockets.token); + } + + return (res); +} + +/* + * Only designed to work with a message with one iov + */ + +static int net_deliver_fn ( + int fd, + int revents, + void *data) +{ + struct totemudp_instance *instance = (struct totemudp_instance *)data; + struct msghdr msg_recv; + struct iovec *iovec; + struct sockaddr_storage system_from; + int bytes_received; + int truncated_packet; + + if (instance->flushing == 1) { + iovec = &instance->totemudp_iov_recv_flush; + } else { + iovec = &instance->totemudp_iov_recv; + } + + /* + * Receive datagram + */ + msg_recv.msg_name = &system_from; + msg_recv.msg_namelen = sizeof (struct sockaddr_storage); + msg_recv.msg_iov = iovec; + msg_recv.msg_iovlen = 1; +#ifdef HAVE_MSGHDR_CONTROL + msg_recv.msg_control = 0; +#endif +#ifdef HAVE_MSGHDR_CONTROLLEN + msg_recv.msg_controllen = 0; +#endif +#ifdef HAVE_MSGHDR_FLAGS + msg_recv.msg_flags = 0; +#endif +#ifdef HAVE_MSGHDR_ACCRIGHTS + msg_recv.msg_accrights = NULL; +#endif +#ifdef HAVE_MSGHDR_ACCRIGHTSLEN + msg_recv.msg_accrightslen = 0; +#endif + + bytes_received = recvmsg (fd, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT); + if (bytes_received == -1) { + return (0); + } else { + instance->stats_recv += bytes_received; + } + + truncated_packet = 0; + +#ifdef HAVE_MSGHDR_FLAGS + if (msg_recv.msg_flags & MSG_TRUNC) { + truncated_packet = 1; + } +#else + /* + * We don't have MSGHDR_FLAGS, but we can (hopefully) safely make assumption that + * if bytes_received == UDP_RECIEVE_FRAME_SIZE_MAX then packet is truncated + */ + if (bytes_received == UDP_RECEIVE_FRAME_SIZE_MAX) { + truncated_packet = 1; + } +#endif + + if (truncated_packet) { + log_printf (instance->totemudp_log_level_error, + "Received too big message. This may be because something bad is happening" + "on the network (attack?), or you tried join more nodes than corosync is" + "compiled with (%u) or bug in the code (bad estimation of " + "the UDP_RECEIVE_FRAME_SIZE_MAX). Dropping packet.", PROCESSOR_COUNT_MAX); + return (0); + } + + iovec->iov_len = bytes_received; + + /* + * Handle incoming message + */ + instance->totemudp_deliver_fn ( + instance->context, + iovec->iov_base, + iovec->iov_len, + &system_from); + + iovec->iov_len = UDP_RECEIVE_FRAME_SIZE_MAX; + return (0); +} + +static int netif_determine ( + struct totemudp_instance *instance, + struct totem_ip_address *bindnet, + struct totem_ip_address *bound_to, + int *interface_up, + int *interface_num) +{ + int res; + + res = totemip_iface_check (bindnet, bound_to, + interface_up, interface_num, + instance->totem_config->clear_node_high_bit); + + + return (res); +} + + +/* + * If the interface is up, the sockets for totem are built. If the interface is down + * this function is requeued in the timer list to retry building the sockets later. + */ +static void timer_function_netif_check_timeout ( + void *data) +{ + struct totemudp_instance *instance = (struct totemudp_instance *)data; + int interface_up; + int interface_num; + struct totem_ip_address *bind_address; + + /* + * Build sockets for every interface + */ + netif_determine (instance, + &instance->totem_interface->bindnet, + &instance->totem_interface->boundto, + &interface_up, &interface_num); + /* + * If the network interface isn't back up and we are already + * in loopback mode, add timer to check again and return + */ + if ((instance->netif_bind_state == BIND_STATE_LOOPBACK && + interface_up == 0) || + + (instance->my_memb_entries == 1 && + instance->netif_bind_state == BIND_STATE_REGULAR && + interface_up == 1)) { + + qb_loop_timer_add (instance->totemudp_poll_handle, + QB_LOOP_MED, + instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, + (void *)instance, + timer_function_netif_check_timeout, + &instance->timer_netif_check_timeout); + + /* + * Add a timer to check for a downed regular interface + */ + return; + } + + if (instance->totemudp_sockets.mcast_recv > 0) { + qb_loop_poll_del (instance->totemudp_poll_handle, + instance->totemudp_sockets.mcast_recv); + close (instance->totemudp_sockets.mcast_recv); + } + if (instance->totemudp_sockets.mcast_send > 0) { + close (instance->totemudp_sockets.mcast_send); + } + if (instance->totemudp_sockets.local_mcast_loop[0] > 0) { + qb_loop_poll_del (instance->totemudp_poll_handle, + instance->totemudp_sockets.local_mcast_loop[0]); + close (instance->totemudp_sockets.local_mcast_loop[0]); + close (instance->totemudp_sockets.local_mcast_loop[1]); + } + if (instance->totemudp_sockets.token > 0) { + qb_loop_poll_del (instance->totemudp_poll_handle, + instance->totemudp_sockets.token); + close (instance->totemudp_sockets.token); + } + + if (interface_up == 0) { + /* + * Interface is not up + */ + instance->netif_bind_state = BIND_STATE_LOOPBACK; + bind_address = &localhost; + + /* + * Add a timer to retry building interfaces and request memb_gather_enter + */ + qb_loop_timer_add (instance->totemudp_poll_handle, + QB_LOOP_MED, + instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, + (void *)instance, + timer_function_netif_check_timeout, + &instance->timer_netif_check_timeout); + } else { + /* + * Interface is up + */ + instance->netif_bind_state = BIND_STATE_REGULAR; + bind_address = &instance->totem_interface->bindnet; + } + /* + * Create and bind the multicast and unicast sockets + */ + (void)totemudp_build_sockets (instance, + &instance->mcast_address, + bind_address, + &instance->totemudp_sockets, + &instance->totem_interface->boundto); + + qb_loop_poll_add ( + instance->totemudp_poll_handle, + QB_LOOP_MED, + instance->totemudp_sockets.mcast_recv, + POLLIN, instance, net_deliver_fn); + + qb_loop_poll_add ( + instance->totemudp_poll_handle, + QB_LOOP_MED, + instance->totemudp_sockets.local_mcast_loop[0], + POLLIN, instance, net_deliver_fn); + + qb_loop_poll_add ( + instance->totemudp_poll_handle, + QB_LOOP_MED, + instance->totemudp_sockets.token, + POLLIN, instance, net_deliver_fn); + + totemip_copy (&instance->my_id, &instance->totem_interface->boundto); + + /* + * This reports changes in the interface to the user and totemsrp + */ + if (instance->netif_bind_state == BIND_STATE_REGULAR) { + if (instance->netif_state_report & NETIF_STATE_REPORT_UP) { + log_printf (instance->totemudp_log_level_notice, + "The network interface [%s] is now up.", + totemip_print (&instance->totem_interface->boundto)); + instance->netif_state_report = NETIF_STATE_REPORT_DOWN; + instance->totemudp_iface_change_fn (instance->context, &instance->my_id, 0); + } + /* + * Add a timer to check for interface going down in single membership + */ + if (instance->my_memb_entries == 1) { + qb_loop_timer_add (instance->totemudp_poll_handle, + QB_LOOP_MED, + instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, + (void *)instance, + timer_function_netif_check_timeout, + &instance->timer_netif_check_timeout); + } + + } else { + if (instance->netif_state_report & NETIF_STATE_REPORT_DOWN) { + log_printf (instance->totemudp_log_level_notice, + "The network interface is down."); + instance->totemudp_iface_change_fn (instance->context, &instance->my_id, 0); + } + instance->netif_state_report = NETIF_STATE_REPORT_UP; + + } +} + +/* Set the socket priority to INTERACTIVE to ensure + that our messages don't get queued behind anything else */ +static void totemudp_traffic_control_set(struct totemudp_instance *instance, int sock) +{ +#ifdef SO_PRIORITY + int prio = 6; /* TC_PRIO_INTERACTIVE */ + + if (setsockopt(sock, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(int))) { + LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Could not set traffic priority"); + } +#endif +} + +static int totemudp_build_sockets_ip ( + struct totemudp_instance *instance, + struct totem_ip_address *mcast_address, + struct totem_ip_address *bindnet_address, + struct totemudp_socket *sockets, + struct totem_ip_address *bound_to, + int interface_num) +{ + struct sockaddr_storage sockaddr; + struct ipv6_mreq mreq6; + struct ip_mreq mreq; + struct sockaddr_storage mcast_ss, boundto_ss; + struct sockaddr_in6 *mcast_sin6 = (struct sockaddr_in6 *)&mcast_ss; + struct sockaddr_in *mcast_sin = (struct sockaddr_in *)&mcast_ss; + struct sockaddr_in *boundto_sin = (struct sockaddr_in *)&boundto_ss; + unsigned int sendbuf_size; + unsigned int recvbuf_size; + unsigned int optlen = sizeof (sendbuf_size); + unsigned int retries; + int addrlen; + int res; + int flag; + uint8_t sflag; + int i; + + /* + * Create multicast recv socket + */ + sockets->mcast_recv = socket (bindnet_address->family, SOCK_DGRAM, 0); + if (sockets->mcast_recv == -1) { + LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, + "socket() failed"); + return (-1); + } + + totemip_nosigpipe (sockets->mcast_recv); + res = fcntl (sockets->mcast_recv, F_SETFL, O_NONBLOCK); + if (res == -1) { + LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, + "Could not set non-blocking operation on multicast socket"); + return (-1); + } + + /* + * Force reuse + */ + flag = 1; + if ( setsockopt(sockets->mcast_recv, SOL_SOCKET, SO_REUSEADDR, (char *)&flag, sizeof (flag)) < 0) { + LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, + "setsockopt(SO_REUSEADDR) failed"); + return (-1); + } + + /* + * Create local multicast loop socket + */ + if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sockets->local_mcast_loop) == -1) { + LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, + "socket() failed"); + return (-1); + } + + for (i = 0; i < 2; i++) { + totemip_nosigpipe (sockets->local_mcast_loop[i]); + res = fcntl (sockets->local_mcast_loop[i], F_SETFL, O_NONBLOCK); + if (res == -1) { + LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, + "Could not set non-blocking operation on multicast socket"); + return (-1); + } + } + + + + /* + * Setup mcast send socket + */ + sockets->mcast_send = socket (bindnet_address->family, SOCK_DGRAM, 0); + if (sockets->mcast_send == -1) { + LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, + "socket() failed"); + return (-1); + } + + totemip_nosigpipe (sockets->mcast_send); + res = fcntl (sockets->mcast_send, F_SETFL, O_NONBLOCK); + if (res == -1) { + LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, + "Could not set non-blocking operation on multicast socket"); + return (-1); + } + + /* + * Force reuse + */ + flag = 1; + if ( setsockopt(sockets->mcast_send, SOL_SOCKET, SO_REUSEADDR, (char *)&flag, sizeof (flag)) < 0) { + LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, + "setsockopt(SO_REUSEADDR) failed"); + return (-1); + } + + totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port - 1, + &sockaddr, &addrlen); + + retries = 0; + while (1) { + res = bind (sockets->mcast_send, (struct sockaddr *)&sockaddr, addrlen); + if (res == 0) { + break; + } + LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, + "Unable to bind the socket to send multicast packets"); + if (++retries > BIND_MAX_RETRIES) { + break; + } + + /* + * Wait for a while + */ + (void)poll(NULL, 0, BIND_RETRIES_INTERVAL * retries); + } + if (res == -1) { + return (-1); + } + + /* + * Setup unicast socket + */ + sockets->token = socket (bindnet_address->family, SOCK_DGRAM, 0); + if (sockets->token == -1) { + LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, + "socket() failed"); + return (-1); + } + + totemip_nosigpipe (sockets->token); + res = fcntl (sockets->token, F_SETFL, O_NONBLOCK); + if (res == -1) { + LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, + "Could not set non-blocking operation on token socket"); + return (-1); + } + + /* + * Force reuse + */ + flag = 1; + if ( setsockopt(sockets->token, SOL_SOCKET, SO_REUSEADDR, (char *)&flag, sizeof (flag)) < 0) { + LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, + "setsockopt(SO_REUSEADDR) failed"); + return (-1); + } + + /* + * Bind to unicast socket used for token send/receives + * This has the side effect of binding to the correct interface + */ + totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port, &sockaddr, &addrlen); + + retries = 0; + while (1) { + res = bind (sockets->token, (struct sockaddr *)&sockaddr, addrlen); + if (res == 0) { + break; + } + LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, + "Unable to bind UDP unicast socket"); + if (++retries > BIND_MAX_RETRIES) { + break; + } + + /* + * Wait for a while + */ + (void)poll(NULL, 0, BIND_RETRIES_INTERVAL * retries); + } + if (res == -1) { + return (-1); + } + + recvbuf_size = MCAST_SOCKET_BUFFER_SIZE; + sendbuf_size = MCAST_SOCKET_BUFFER_SIZE; + /* + * Set buffer sizes to avoid overruns + */ + res = setsockopt (sockets->mcast_recv, SOL_SOCKET, SO_RCVBUF, &recvbuf_size, optlen); + if (res == -1) { + LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, + "Unable to set SO_RCVBUF size on UDP mcast socket"); + return (-1); + } + res = setsockopt (sockets->mcast_send, SOL_SOCKET, SO_SNDBUF, &sendbuf_size, optlen); + if (res == -1) { + LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, + "Unable to set SO_SNDBUF size on UDP mcast socket"); + return (-1); + } + res = setsockopt (sockets->local_mcast_loop[0], SOL_SOCKET, SO_RCVBUF, &recvbuf_size, optlen); + if (res == -1) { + LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, + "Unable to set SO_RCVBUF size on UDP local mcast loop socket"); + return (-1); + } + res = setsockopt (sockets->local_mcast_loop[1], SOL_SOCKET, SO_SNDBUF, &sendbuf_size, optlen); + if (res == -1) { + LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, + "Unable to set SO_SNDBUF size on UDP local mcast loop socket"); + return (-1); + } + + res = getsockopt (sockets->mcast_recv, SOL_SOCKET, SO_RCVBUF, &recvbuf_size, &optlen); + if (res == 0) { + log_printf (instance->totemudp_log_level_debug, + "Receive multicast socket recv buffer size (%d bytes).", recvbuf_size); + } + + res = getsockopt (sockets->mcast_send, SOL_SOCKET, SO_SNDBUF, &sendbuf_size, &optlen); + if (res == 0) { + log_printf (instance->totemudp_log_level_debug, + "Transmit multicast socket send buffer size (%d bytes).", sendbuf_size); + } + + res = getsockopt (sockets->local_mcast_loop[0], SOL_SOCKET, SO_RCVBUF, &recvbuf_size, &optlen); + if (res == 0) { + log_printf (instance->totemudp_log_level_debug, + "Local receive multicast loop socket recv buffer size (%d bytes).", recvbuf_size); + } + + res = getsockopt (sockets->local_mcast_loop[1], SOL_SOCKET, SO_SNDBUF, &sendbuf_size, &optlen); + if (res == 0) { + log_printf (instance->totemudp_log_level_debug, + "Local transmit multicast loop socket send buffer size (%d bytes).", sendbuf_size); + } + + + /* + * Join group membership on socket + */ + totemip_totemip_to_sockaddr_convert(mcast_address, instance->totem_interface->ip_port, &mcast_ss, &addrlen); + totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port, &boundto_ss, &addrlen); + + if (instance->totem_config->broadcast_use == 1) { + unsigned int broadcast = 1; + + if ((setsockopt(sockets->mcast_recv, SOL_SOCKET, + SO_BROADCAST, &broadcast, sizeof (broadcast))) == -1) { + LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, + "setting broadcast option failed"); + return (-1); + } + if ((setsockopt(sockets->mcast_send, SOL_SOCKET, + SO_BROADCAST, &broadcast, sizeof (broadcast))) == -1) { + LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, + "setting broadcast option failed"); + return (-1); + } + } else { + switch (bindnet_address->family) { + case AF_INET: + memset(&mreq, 0, sizeof(mreq)); + mreq.imr_multiaddr.s_addr = mcast_sin->sin_addr.s_addr; + mreq.imr_interface.s_addr = boundto_sin->sin_addr.s_addr; + res = setsockopt (sockets->mcast_recv, IPPROTO_IP, IP_ADD_MEMBERSHIP, + &mreq, sizeof (mreq)); + if (res == -1) { + LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, + "join ipv4 multicast group failed"); + return (-1); + } + break; + case AF_INET6: + memset(&mreq6, 0, sizeof(mreq6)); + memcpy(&mreq6.ipv6mr_multiaddr, &mcast_sin6->sin6_addr, sizeof(struct in6_addr)); + mreq6.ipv6mr_interface = interface_num; + + res = setsockopt (sockets->mcast_recv, IPPROTO_IPV6, IPV6_JOIN_GROUP, + &mreq6, sizeof (mreq6)); + if (res == -1) { + LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, + "join ipv6 multicast group failed"); + return (-1); + } + break; + } + } + + /* + * Turn off multicast loopback + */ + + flag = 0; + switch ( bindnet_address->family ) { + case AF_INET: + sflag = 0; + res = setsockopt (sockets->mcast_send, IPPROTO_IP, IP_MULTICAST_LOOP, + &sflag, sizeof (sflag)); + break; + case AF_INET6: + res = setsockopt (sockets->mcast_send, IPPROTO_IPV6, IPV6_MULTICAST_LOOP, + &flag, sizeof (flag)); + } + if (res == -1) { + LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, + "Unable to turn off multicast loopback"); + return (-1); + } + + /* + * Set multicast packets TTL + */ + flag = instance->totem_interface->ttl; + if (bindnet_address->family == AF_INET6) { + res = setsockopt (sockets->mcast_send, IPPROTO_IPV6, IPV6_MULTICAST_HOPS, + &flag, sizeof (flag)); + if (res == -1) { + LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, + "set mcast v6 TTL failed"); + return (-1); + } + } else { + sflag = flag; + res = setsockopt(sockets->mcast_send, IPPROTO_IP, IP_MULTICAST_TTL, + &sflag, sizeof(sflag)); + if (res == -1) { + LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, + "set mcast v4 TTL failed"); + return (-1); + } + } + + /* + * Bind to a specific interface for multicast send and receive + */ + switch ( bindnet_address->family ) { + case AF_INET: + if (setsockopt (sockets->mcast_send, IPPROTO_IP, IP_MULTICAST_IF, + &boundto_sin->sin_addr, sizeof (boundto_sin->sin_addr)) < 0) { + LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, + "cannot select interface for multicast packets (send)"); + return (-1); + } + if (setsockopt (sockets->mcast_recv, IPPROTO_IP, IP_MULTICAST_IF, + &boundto_sin->sin_addr, sizeof (boundto_sin->sin_addr)) < 0) { + LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, + "cannot select interface for multicast packets (recv)"); + return (-1); + } + break; + case AF_INET6: + if (setsockopt (sockets->mcast_send, IPPROTO_IPV6, IPV6_MULTICAST_IF, + &interface_num, sizeof (interface_num)) < 0) { + LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, + "cannot select interface for multicast packets (send v6)"); + return (-1); + } + if (setsockopt (sockets->mcast_recv, IPPROTO_IPV6, IPV6_MULTICAST_IF, + &interface_num, sizeof (interface_num)) < 0) { + LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, + "cannot select interface for multicast packets (recv v6)"); + return (-1); + } + break; + } + + /* + * Bind to multicast socket used for multicast receives + * This needs to happen after all of the multicast setsockopt() calls + * as the kernel seems to only put them into effect (for IPV6) when bind() + * is called. + */ + totemip_totemip_to_sockaddr_convert(mcast_address, + instance->totem_interface->ip_port, &sockaddr, &addrlen); + + retries = 0; + while (1) { + res = bind (sockets->mcast_recv, (struct sockaddr *)&sockaddr, addrlen); + if (res == 0) { + break; + } + LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, + "Unable to bind the socket to receive multicast packets"); + if (++retries > BIND_MAX_RETRIES) { + break; + } + + /* + * Wait for a while + */ + (void)poll(NULL, 0, BIND_RETRIES_INTERVAL * retries); + } + + if (res == -1) { + return (-1); + } + return 0; +} + +static int totemudp_build_sockets ( + struct totemudp_instance *instance, + struct totem_ip_address *mcast_address, + struct totem_ip_address *bindnet_address, + struct totemudp_socket *sockets, + struct totem_ip_address *bound_to) +{ + int interface_num; + int interface_up; + int res; + + /* + * Determine the ip address bound to and the interface name + */ + res = netif_determine (instance, + bindnet_address, + bound_to, + &interface_up, + &interface_num); + + if (res == -1) { + return (-1); + } + + totemip_copy(&instance->my_id, bound_to); + + res = totemudp_build_sockets_ip (instance, mcast_address, + bindnet_address, sockets, bound_to, interface_num); + + if (res == -1) { + /* if we get here, corosync won't work anyway, so better leaving than faking to work */ + LOGSYS_PERROR (errno, instance->totemudp_log_level_error, + "Unable to create sockets, exiting"); + exit(EXIT_FAILURE); + } + + /* We only send out of the token socket */ + totemudp_traffic_control_set(instance, sockets->token); + return res; +} + +/* + * Totem Network interface + * depends on poll abstraction, POSIX, IPV4 + */ + +/* + * Create an instance + */ +int totemudp_initialize ( + qb_loop_t *poll_handle, + void **udp_context, + struct totem_config *totem_config, + totemsrp_stats_t *stats, + + void *context, + + int (*deliver_fn) ( + void *context, + const void *msg, + unsigned int msg_len, + const struct sockaddr_storage *system_from), + + int (*iface_change_fn) ( + void *context, + const struct totem_ip_address *iface_address, + unsigned int ring_no), + + void (*mtu_changed) ( + void *context, + int net_mtu), + + void (*target_set_completed) ( + void *context)) +{ + struct totemudp_instance *instance; + + instance = malloc (sizeof (struct totemudp_instance)); + if (instance == NULL) { + return (-1); + } + + totemudp_instance_initialize (instance); + + instance->totem_config = totem_config; + instance->stats = stats; + + /* + * Configure logging + */ + instance->totemudp_log_level_security = 1; //totem_config->totem_logging_configuration.log_level_security; + instance->totemudp_log_level_error = totem_config->totem_logging_configuration.log_level_error; + instance->totemudp_log_level_warning = totem_config->totem_logging_configuration.log_level_warning; + instance->totemudp_log_level_notice = totem_config->totem_logging_configuration.log_level_notice; + instance->totemudp_log_level_debug = totem_config->totem_logging_configuration.log_level_debug; + instance->totemudp_subsys_id = totem_config->totem_logging_configuration.log_subsys_id; + instance->totemudp_log_printf = totem_config->totem_logging_configuration.log_printf; + + /* + * Initialize local variables for totemudp + */ + instance->totem_interface = &totem_config->interfaces[0]; + totemip_copy (&instance->mcast_address, &instance->totem_interface->mcast_addr); + memset (instance->iov_buffer, 0, UDP_RECEIVE_FRAME_SIZE_MAX); + + instance->totemudp_poll_handle = poll_handle; + + instance->totem_interface->bindnet.nodeid = instance->totem_config->node_id; + + instance->context = context; + instance->totemudp_deliver_fn = deliver_fn; + + instance->totemudp_iface_change_fn = iface_change_fn; + + instance->totemudp_target_set_completed = target_set_completed; + + totemip_localhost (instance->mcast_address.family, &localhost); + localhost.nodeid = instance->totem_config->node_id; + + /* + * RRP layer isn't ready to receive message because it hasn't + * initialized yet. Add short timer to check the interfaces. + */ + qb_loop_timer_add (instance->totemudp_poll_handle, + QB_LOOP_MED, + 100*QB_TIME_NS_IN_MSEC, + (void *)instance, + timer_function_netif_check_timeout, + &instance->timer_netif_check_timeout); + + *udp_context = instance; + return (0); +} + +void *totemudp_buffer_alloc (void) +{ + return malloc (FRAME_SIZE_MAX); +} + +void totemudp_buffer_release (void *ptr) +{ + return free (ptr); +} + +int totemudp_processor_count_set ( + void *udp_context, + int processor_count) +{ + struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; + int res = 0; + + instance->my_memb_entries = processor_count; + qb_loop_timer_del (instance->totemudp_poll_handle, + instance->timer_netif_check_timeout); + if (processor_count == 1) { + qb_loop_timer_add (instance->totemudp_poll_handle, + QB_LOOP_MED, + instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, + (void *)instance, + timer_function_netif_check_timeout, + &instance->timer_netif_check_timeout); + } + + return (res); +} + +int totemudp_recv_flush (void *udp_context) +{ + struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; + struct pollfd ufd; + int nfds; + int res = 0; + int i; + int sock; + + instance->flushing = 1; + + for (i = 0; i < 2; i++) { + sock = -1; + if (i == 0) { + sock = instance->totemudp_sockets.mcast_recv; + } + if (i == 1) { + sock = instance->totemudp_sockets.local_mcast_loop[0]; + } + assert(sock != -1); + + do { + ufd.fd = sock; + ufd.events = POLLIN; + nfds = poll (&ufd, 1, 0); + if (nfds == 1 && ufd.revents & POLLIN) { + net_deliver_fn (sock, ufd.revents, instance); + } + } while (nfds == 1); + } + + instance->flushing = 0; + + return (res); +} + +int totemudp_send_flush (void *udp_context) +{ + return 0; +} + +int totemudp_token_send ( + void *udp_context, + const void *msg, + unsigned int msg_len) +{ + struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; + int res = 0; + + ucast_sendmsg (instance, &instance->token_target, msg, msg_len); + + return (res); +} +int totemudp_mcast_flush_send ( + void *udp_context, + const void *msg, + unsigned int msg_len) +{ + struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; + int res = 0; + + mcast_sendmsg (instance, msg, msg_len); + + return (res); +} + +int totemudp_mcast_noflush_send ( + void *udp_context, + const void *msg, + unsigned int msg_len) +{ + struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; + int res = 0; + + mcast_sendmsg (instance, msg, msg_len); + + return (res); +} + +extern int totemudp_iface_check (void *udp_context) +{ + struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; + int res = 0; + + timer_function_netif_check_timeout (instance); + + return (res); +} + +int totemudp_nodestatus_get (void *udp_context, unsigned int nodeid, + struct totem_node_status *node_status) +{ + struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; + struct qb_list_head *list; + struct totemudp_member *member; + + qb_list_for_each(list, &(instance->member_list)) { + member = qb_list_entry (list, + struct totemudp_member, + list); + + if (member->member.nodeid == nodeid) { + node_status->nodeid = nodeid; + /* reachable is filled in by totemsrp */ + node_status->link_status[0].enabled = 1; + if (instance->netif_bind_state == BIND_STATE_REGULAR) { + node_status->link_status[0].enabled = 1; + } else { + node_status->link_status[0].enabled = 0; + } + node_status->link_status[0].connected = node_status->reachable; + node_status->link_status[0].mtu = instance->totem_config->net_mtu; + strncpy(node_status->link_status[0].src_ipaddr, totemip_print(&member->member), KNET_MAX_HOST_LEN-1); + } + } + return (0); +} + +int totemudp_ifaces_get ( + void *net_context, + char ***status, + unsigned int *iface_count) +{ + static char *statuses[INTERFACE_MAX] = {(char*)"OK"}; + + if (status) { + *status = statuses; + } + *iface_count = 1; + + return (0); +} + +extern void totemudp_net_mtu_adjust (void *udp_context, struct totem_config *totem_config) +{ + totem_config->net_mtu -= totemip_udpip_header_size(totem_config->interfaces[0].bindnet.family); +} + +int totemudp_token_target_set ( + void *udp_context, + unsigned int nodeid) +{ + struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; + struct qb_list_head *list; + struct totemudp_member *member; + int res = 0; + + qb_list_for_each(list, &(instance->member_list)) { + member = qb_list_entry (list, + struct totemudp_member, + list); + + if (member->member.nodeid == nodeid) { + memcpy (&instance->token_target, &member->member, + sizeof (struct totem_ip_address)); + + instance->totemudp_target_set_completed (instance->context); + break; + } + } + return (res); +} + +extern int totemudp_recv_mcast_empty ( + void *udp_context) +{ + struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; + unsigned int res; + struct sockaddr_storage system_from; + struct msghdr msg_recv; + struct pollfd ufd; + int nfds; + int msg_processed = 0; + int i; + int sock; + + /* + * Receive datagram + */ + msg_recv.msg_name = &system_from; + msg_recv.msg_namelen = sizeof (struct sockaddr_storage); + msg_recv.msg_iov = &instance->totemudp_iov_recv_flush; + msg_recv.msg_iovlen = 1; +#ifdef HAVE_MSGHDR_CONTROL + msg_recv.msg_control = 0; +#endif +#ifdef HAVE_MSGHDR_CONTROLLEN + msg_recv.msg_controllen = 0; +#endif +#ifdef HAVE_MSGHDR_FLAGS + msg_recv.msg_flags = 0; +#endif +#ifdef HAVE_MSGHDR_ACCRIGHTS + msg_recv.msg_accrights = NULL; +#endif +#ifdef HAVE_MSGHDR_ACCRIGHTSLEN + msg_recv.msg_accrightslen = 0; +#endif + + for (i = 0; i < 2; i++) { + sock = -1; + if (i == 0) { + sock = instance->totemudp_sockets.mcast_recv; + } + if (i == 1) { + sock = instance->totemudp_sockets.local_mcast_loop[0]; + } + assert(sock != -1); + + do { + ufd.fd = sock; + ufd.events = POLLIN; + nfds = poll (&ufd, 1, 0); + if (nfds == 1 && ufd.revents & POLLIN) { + res = recvmsg (sock, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT); + if (res != -1) { + msg_processed = 1; + } else { + msg_processed = -1; + } + } + } while (nfds == 1); + } + + return (msg_processed); +} + + +int totemudp_member_add ( + void *udp_context, + const struct totem_ip_address *local, + const struct totem_ip_address *member, + int ring_no) +{ + struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; + + struct totemudp_member *new_member; + + new_member = malloc (sizeof (struct totemudp_member)); + if (new_member == NULL) { + return (-1); + } + + memset(new_member, 0, sizeof(*new_member)); + + qb_list_init (&new_member->list); + qb_list_add_tail (&new_member->list, &instance->member_list); + memcpy (&new_member->member, member, sizeof (struct totem_ip_address)); + + return (0); +} + +int totemudp_member_remove ( + void *udp_context, + const struct totem_ip_address *token_target, + int ring_no) +{ + int found = 0; + struct qb_list_head *list; + struct totemudp_member *member; + struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; + + /* + * Find the member to remove and close its socket + */ + qb_list_for_each(list, &(instance->member_list)) { + member = qb_list_entry (list, + struct totemudp_member, + list); + + if (totemip_compare (token_target, &member->member)==0) { + found = 1; + break; + } + } + + /* + * Delete the member from the list + */ + if (found) { + qb_list_del (list); + } + + return (0); +} + +int totemudp_iface_set (void *net_context, + const struct totem_ip_address *local_addr, + unsigned short ip_port, + unsigned int iface_no) +{ + /* Not supported */ + return (-1); +} + +int totemudp_reconfigure ( + void *udp_context, + struct totem_config *totem_config) +{ + /* Not supported */ + return (-1); +} diff --git a/exec/totemudp.h b/exec/totemudp.h new file mode 100644 index 0000000..6642472 --- /dev/null +++ b/exec/totemudp.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2005 MontaVista Software, Inc. + * Copyright (c) 2006-2011 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef TOTEMUDP_H_DEFINED +#define TOTEMUDP_H_DEFINED + +#include <sys/types.h> +#include <sys/socket.h> +#include <qb/qbloop.h> + +#include <corosync/totem/totem.h> + +/** + * Create an instance + */ +extern int totemudp_initialize ( + qb_loop_t* poll_handle, + void **udp_context, + struct totem_config *totem_config, + totemsrp_stats_t *stats, + void *context, + + int (*deliver_fn) ( + void *context, + const void *msg, + unsigned int msg_len, + const struct sockaddr_storage *system_from), + + int (*iface_change_fn) ( + void *context, + const struct totem_ip_address *iface_address, + unsigned int ring_no), + + void (*mtu_changed) ( + void *context, + int net_mtu), + + void (*target_set_completed) ( + void *context)); + +extern void *totemudp_buffer_alloc (void); + +extern void totemudp_buffer_release (void *ptr); + +extern int totemudp_processor_count_set ( + void *udp_context, + int processor_count); + +extern int totemudp_token_send ( + void *udp_context, + const void *msg, + unsigned int msg_len); + +extern int totemudp_mcast_flush_send ( + void *udp_context, + const void *msg, + unsigned int msg_len); + +extern int totemudp_mcast_noflush_send ( + void *udp_context, + const void *msg, + unsigned int msg_len); + +extern int totemudp_nodestatus_get (void *net_context, unsigned int nodeid, + struct totem_node_status *node_status); + +extern int totemudp_ifaces_get (void *net_context, + char ***status, + unsigned int *iface_count); + +extern int totemudp_recv_flush (void *udp_context); + +extern int totemudp_send_flush (void *udp_context); + +extern int totemudp_iface_set (void *net_context, + const struct totem_ip_address *local_addr, + unsigned short ip_port, + unsigned int iface_no); + +extern int totemudp_iface_check (void *udp_context); + +extern int totemudp_finalize (void *udp_context); + +extern void totemudp_net_mtu_adjust (void *udp_context, struct totem_config *totem_config); + +extern int totemudp_token_target_set ( + void *udp_context, + unsigned int nodeid); + +extern int totemudp_crypto_set ( + void *udp_context, + const char *cipher_type, + const char *hash_type); + +extern int totemudp_recv_mcast_empty ( + void *udp_context); + +extern int totemudp_member_add ( + void *udpu_context, + const struct totem_ip_address *local, + const struct totem_ip_address *member, + int ring_no); + +extern int totemudp_member_remove ( + void *udpu_context, + const struct totem_ip_address *member, + int ring_no); + +extern int totemudp_reconfigure ( + void *udp_context, + struct totem_config *totem_config); + +#endif /* TOTEMUDP_H_DEFINED */ diff --git a/exec/totemudpu.c b/exec/totemudpu.c new file mode 100644 index 0000000..399b47b --- /dev/null +++ b/exec/totemudpu.c @@ -0,0 +1,1453 @@ +/* + * Copyright (c) 2005 MontaVista Software, Inc. + * Copyright (c) 2006-2018 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <config.h> + +#include <assert.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <netdb.h> +#include <sys/un.h> +#include <sys/ioctl.h> +#include <sys/param.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <unistd.h> +#include <fcntl.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <sched.h> +#include <time.h> +#include <sys/time.h> +#include <sys/poll.h> +#include <sys/uio.h> +#include <limits.h> + +#include <qb/qblist.h> +#include <qb/qbdefs.h> +#include <qb/qbloop.h> + +#include <corosync/sq.h> +#include <corosync/swab.h> +#define LOGSYS_UTILS_ONLY 1 +#include <corosync/logsys.h> +#include "totemudpu.h" + +#include "util.h" + +#ifndef MSG_NOSIGNAL +#define MSG_NOSIGNAL 0 +#endif + +#define MCAST_SOCKET_BUFFER_SIZE (TRANSMITS_ALLOWED * UDP_RECEIVE_FRAME_SIZE_MAX) +#define NETIF_STATE_REPORT_UP 1 +#define NETIF_STATE_REPORT_DOWN 2 + +#define BIND_STATE_UNBOUND 0 +#define BIND_STATE_REGULAR 1 +#define BIND_STATE_LOOPBACK 2 + +struct totemudpu_member { + struct qb_list_head list; + struct totem_ip_address member; + int fd; + int active; +}; + +struct totemudpu_instance { + qb_loop_t *totemudpu_poll_handle; + + struct totem_interface *totem_interface; + + int netif_state_report; + + int netif_bind_state; + + void *context; + + int (*totemudpu_deliver_fn) ( + void *context, + const void *msg, + unsigned int msg_len, + const struct sockaddr_storage *system_from); + + int (*totemudpu_iface_change_fn) ( + void *context, + const struct totem_ip_address *iface_address, + unsigned int ring_no); + + void (*totemudpu_target_set_completed) (void *context); + + /* + * Function and data used to log messages + */ + int totemudpu_log_level_security; + + int totemudpu_log_level_error; + + int totemudpu_log_level_warning; + + int totemudpu_log_level_notice; + + int totemudpu_log_level_debug; + + int totemudpu_subsys_id; + + void (*totemudpu_log_printf) ( + int level, + int subsys, + const char *function, + const char *file, + int line, + const char *format, + ...)__attribute__((format(printf, 6, 7))); + + void *udpu_context; + + char iov_buffer[UDP_RECEIVE_FRAME_SIZE_MAX]; + + struct iovec totemudpu_iov_recv; + + struct qb_list_head member_list; + + int stats_sent; + + int stats_recv; + + int stats_delv; + + int stats_remcasts; + + int stats_orf_token; + + struct timeval stats_tv_start; + + struct totem_ip_address my_id; + + int firstrun; + + qb_loop_timer_handle timer_netif_check_timeout; + + unsigned int my_memb_entries; + + struct totem_config *totem_config; + + totemsrp_stats_t *stats; + + struct totem_ip_address token_target; + + int token_socket; + + int local_loop_sock[2]; + + qb_loop_timer_handle timer_merge_detect_timeout; + + int send_merge_detect_message; + + unsigned int merge_detect_messages_sent_before_timeout; +}; + +struct work_item { + const void *msg; + unsigned int msg_len; + struct totemudpu_instance *instance; +}; + +static int totemudpu_build_sockets ( + struct totemudpu_instance *instance, + struct totem_ip_address *bindnet_address, + struct totem_ip_address *bound_to); + +static int totemudpu_create_sending_socket( + void *udpu_context, + const struct totem_ip_address *member); + +int totemudpu_member_list_rebind_ip ( + void *udpu_context); + +static void totemudpu_start_merge_detect_timeout( + void *udpu_context); + +static void totemudpu_stop_merge_detect_timeout( + void *udpu_context); + +static void totemudpu_instance_initialize (struct totemudpu_instance *instance) +{ + memset (instance, 0, sizeof (struct totemudpu_instance)); + + instance->netif_state_report = NETIF_STATE_REPORT_UP | NETIF_STATE_REPORT_DOWN; + + instance->totemudpu_iov_recv.iov_base = instance->iov_buffer; + + instance->totemudpu_iov_recv.iov_len = UDP_RECEIVE_FRAME_SIZE_MAX; //sizeof (instance->iov_buffer); + + /* + * There is always atleast 1 processor + */ + instance->my_memb_entries = 1; + + qb_list_init (&instance->member_list); +} + +#define log_printf(level, format, args...) \ +do { \ + instance->totemudpu_log_printf ( \ + level, instance->totemudpu_subsys_id, \ + __FUNCTION__, __FILE__, __LINE__, \ + (const char *)format, ##args); \ +} while (0); +#define LOGSYS_PERROR(err_num, level, fmt, args...) \ +do { \ + char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \ + const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \ + instance->totemudpu_log_printf ( \ + level, instance->totemudpu_subsys_id, \ + __FUNCTION__, __FILE__, __LINE__, \ + fmt ": %s (%d)", ##args, _error_ptr, err_num); \ + } while(0) + +int totemudpu_crypto_set ( + void *udpu_context, + const char *cipher_type, + const char *hash_type) +{ + + return (0); +} + + +static inline void ucast_sendmsg ( + struct totemudpu_instance *instance, + struct totem_ip_address *system_to, + const void *msg, + unsigned int msg_len) +{ + struct msghdr msg_ucast; + int res = 0; + struct sockaddr_storage sockaddr; + struct iovec iovec; + int addrlen; + int send_sock; + + iovec.iov_base = (void *)msg; + iovec.iov_len = msg_len; + + /* + * Build unicast message + */ + totemip_totemip_to_sockaddr_convert(system_to, + instance->totem_interface->ip_port, &sockaddr, &addrlen); + memset(&msg_ucast, 0, sizeof(msg_ucast)); + msg_ucast.msg_name = &sockaddr; + msg_ucast.msg_namelen = addrlen; + msg_ucast.msg_iov = (void *)&iovec; + msg_ucast.msg_iovlen = 1; +#ifdef HAVE_MSGHDR_CONTROL + msg_ucast.msg_control = 0; +#endif +#ifdef HAVE_MSGHDR_CONTROLLEN + msg_ucast.msg_controllen = 0; +#endif +#ifdef HAVE_MSGHDR_FLAGS + msg_ucast.msg_flags = 0; +#endif +#ifdef HAVE_MSGHDR_ACCRIGHTS + msg_ucast.msg_accrights = NULL; +#endif +#ifdef HAVE_MSGHDR_ACCRIGHTSLEN + msg_ucast.msg_accrightslen = 0; +#endif + + if (instance->netif_bind_state == BIND_STATE_REGULAR) { + send_sock = instance->token_socket; + } else { + send_sock = instance->local_loop_sock[1]; + msg_ucast.msg_name = NULL; + msg_ucast.msg_namelen = 0; + } + + + /* + * Transmit unicast message + * An error here is recovered by totemsrp + */ + res = sendmsg (send_sock, &msg_ucast, MSG_NOSIGNAL); + if (res < 0) { + LOGSYS_PERROR (errno, instance->totemudpu_log_level_debug, + "sendmsg(ucast) failed (non-critical)"); + } +} + +static inline void mcast_sendmsg ( + struct totemudpu_instance *instance, + const void *msg, + unsigned int msg_len, + int only_active) +{ + struct msghdr msg_mcast; + int res = 0; + struct iovec iovec; + struct sockaddr_storage sockaddr; + int addrlen; + struct qb_list_head *list; + struct totemudpu_member *member; + + iovec.iov_base = (void *)msg; + iovec.iov_len = msg_len; + + memset(&msg_mcast, 0, sizeof(msg_mcast)); + /* + * Build multicast message + */ + if (instance->netif_bind_state == BIND_STATE_REGULAR) { + qb_list_for_each(list, &(instance->member_list)) { + member = qb_list_entry (list, + struct totemudpu_member, + list); + /* + * Do not send multicast message if message is not "flush", member + * is inactive and timeout for sending merge message didn't expired. + */ + if (only_active && !member->active && !instance->send_merge_detect_message) + continue ; + + totemip_totemip_to_sockaddr_convert(&member->member, + instance->totem_interface->ip_port, &sockaddr, &addrlen); + msg_mcast.msg_name = &sockaddr; + msg_mcast.msg_namelen = addrlen; + msg_mcast.msg_iov = (void *)&iovec; + msg_mcast.msg_iovlen = 1; + #ifdef HAVE_MSGHDR_CONTROL + msg_mcast.msg_control = 0; + #endif + #ifdef HAVE_MSGHDR_CONTROLLEN + msg_mcast.msg_controllen = 0; + #endif + #ifdef HAVE_MSGHDR_FLAGS + msg_mcast.msg_flags = 0; + #endif + #ifdef HAVE_MSGHDR_ACCRIGHTS + msg_mcast.msg_accrights = NULL; + #endif + #ifdef HAVE_MSGHDR_ACCRIGHTSLEN + msg_mcast.msg_accrightslen = 0; + #endif + + /* + * Transmit multicast message + * An error here is recovered by totemsrp + */ + res = sendmsg (member->fd, &msg_mcast, MSG_NOSIGNAL); + if (res < 0) { + LOGSYS_PERROR (errno, instance->totemudpu_log_level_debug, + "sendmsg(mcast) failed (non-critical)"); + } + } + + if (!only_active || instance->send_merge_detect_message) { + /* + * Current message was sent to all nodes + */ + instance->merge_detect_messages_sent_before_timeout++; + instance->send_merge_detect_message = 0; + } + } else { + /* + * Transmit multicast message to local unix mcast loop + * An error here is recovered by totemsrp + */ + msg_mcast.msg_name = NULL; + msg_mcast.msg_namelen = 0; + msg_mcast.msg_iov = (void *)&iovec; + msg_mcast.msg_iovlen = 1; + #ifdef HAVE_MSGHDR_CONTROL + msg_mcast.msg_control = 0; + #endif + #ifdef HAVE_MSGHDR_CONTROLLEN + msg_mcast.msg_controllen = 0; + #endif + #ifdef HAVE_MSGHDR_FLAGS + msg_mcast.msg_flags = 0; + #endif + #ifdef HAVE_MSGHDR_ACCRIGHTS + msg_mcast.msg_accrights = NULL; + #endif + #ifdef HAVE_MSGHDR_ACCRIGHTSLEN + msg_mcast.msg_accrightslen = 0; + #endif + + res = sendmsg (instance->local_loop_sock[1], &msg_mcast, + MSG_NOSIGNAL); + if (res < 0) { + LOGSYS_PERROR (errno, instance->totemudpu_log_level_debug, + "sendmsg(local mcast loop) failed (non-critical)"); + } + } +} + +int totemudpu_finalize ( + void *udpu_context) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + int res = 0; + + if (instance->token_socket > 0) { + qb_loop_poll_del (instance->totemudpu_poll_handle, + instance->token_socket); + close (instance->token_socket); + } + + if (instance->local_loop_sock[0] > 0) { + qb_loop_poll_del (instance->totemudpu_poll_handle, + instance->local_loop_sock[0]); + close (instance->local_loop_sock[0]); + close (instance->local_loop_sock[1]); + } + + totemudpu_stop_merge_detect_timeout(instance); + + return (res); +} + +static struct totemudpu_member *find_member_by_sockaddr( + const void *udpu_context, + const struct sockaddr *sa) +{ + struct qb_list_head *list; + struct totemudpu_member *member; + struct totemudpu_member *res_member; + const struct totemudpu_instance *instance = (const struct totemudpu_instance *)udpu_context; + + res_member = NULL; + + qb_list_for_each(list, &(instance->member_list)) { + member = qb_list_entry (list, + struct totemudpu_member, + list); + + if (totemip_sa_equal(&member->member, sa)) { + res_member = member; + break ; + } + } + + return (res_member); +} + + +static int net_deliver_fn ( + int fd, + int revents, + void *data) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)data; + struct msghdr msg_recv; + struct iovec *iovec; + struct sockaddr_storage system_from; + int bytes_received; + int truncated_packet; + + iovec = &instance->totemudpu_iov_recv; + + /* + * Receive datagram + */ + msg_recv.msg_name = &system_from; + msg_recv.msg_namelen = sizeof (struct sockaddr_storage); + msg_recv.msg_iov = iovec; + msg_recv.msg_iovlen = 1; +#ifdef HAVE_MSGHDR_CONTROL + msg_recv.msg_control = 0; +#endif +#ifdef HAVE_MSGHDR_CONTROLLEN + msg_recv.msg_controllen = 0; +#endif +#ifdef HAVE_MSGHDR_FLAGS + msg_recv.msg_flags = 0; +#endif +#ifdef HAVE_MSGHDR_ACCRIGHTS + msg_recv.msg_accrights = NULL; +#endif +#ifdef HAVE_MSGHDR_ACCRIGHTSLEN + msg_recv.msg_accrightslen = 0; +#endif + + bytes_received = recvmsg (fd, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT); + if (bytes_received == -1) { + return (0); + } else { + instance->stats_recv += bytes_received; + } + + truncated_packet = 0; + +#ifdef HAVE_MSGHDR_FLAGS + if (msg_recv.msg_flags & MSG_TRUNC) { + truncated_packet = 1; + } +#else + /* + * We don't have MSGHDR_FLAGS, but we can (hopefully) safely make assumption that + * if bytes_received == UDP_RECEIVE_FRAME_SIZE_MAX then packet is truncated + */ + if (bytes_received == UDP_RECEIVE_FRAME_SIZE_MAX) { + truncated_packet = 1; + } +#endif + + if (truncated_packet) { + log_printf (instance->totemudpu_log_level_error, + "Received too big message. This may be because something bad is happening" + "on the network (attack?), or you tried join more nodes than corosync is" + "compiled with (%u) or bug in the code (bad estimation of " + "the UDP_RECEIVE_FRAME_SIZE_MAX). Dropping packet.", PROCESSOR_COUNT_MAX); + return (0); + } + + if (instance->totem_config->block_unlisted_ips && + instance->netif_bind_state == BIND_STATE_REGULAR && + find_member_by_sockaddr(instance, (const struct sockaddr *)&system_from) == NULL) { + log_printf(instance->totemudpu_log_level_debug, "Packet rejected from %s", + totemip_sa_print((const struct sockaddr *)&system_from)); + + return (0); + } + + iovec->iov_len = bytes_received; + + /* + * Handle incoming message + */ + instance->totemudpu_deliver_fn ( + instance->context, + iovec->iov_base, + iovec->iov_len, + &system_from); + + iovec->iov_len = UDP_RECEIVE_FRAME_SIZE_MAX; + return (0); +} + +static int netif_determine ( + struct totemudpu_instance *instance, + struct totem_ip_address *bindnet, + struct totem_ip_address *bound_to, + int *interface_up, + int *interface_num) +{ + int res; + + res = totemip_iface_check (bindnet, bound_to, + interface_up, interface_num, + instance->totem_config->clear_node_high_bit); + + + return (res); +} + + +/* + * If the interface is up, the sockets for totem are built. If the interface is down + * this function is requeued in the timer list to retry building the sockets later. + */ +static void timer_function_netif_check_timeout ( + void *data) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)data; + int interface_up; + int interface_num; + + /* + * Build sockets for every interface + */ + netif_determine (instance, + &instance->totem_interface->bindnet, + &instance->totem_interface->boundto, + &interface_up, &interface_num); + /* + * If the network interface isn't back up and we are already + * in loopback mode, add timer to check again and return + */ + if ((instance->netif_bind_state == BIND_STATE_LOOPBACK && + interface_up == 0) || + + (instance->my_memb_entries == 1 && + instance->netif_bind_state == BIND_STATE_REGULAR && + interface_up == 1)) { + + qb_loop_timer_add (instance->totemudpu_poll_handle, + QB_LOOP_MED, + instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, + (void *)instance, + timer_function_netif_check_timeout, + &instance->timer_netif_check_timeout); + + /* + * Add a timer to check for a downed regular interface + */ + return; + } + + if (instance->token_socket > 0) { + qb_loop_poll_del (instance->totemudpu_poll_handle, + instance->token_socket); + close (instance->token_socket); + instance->token_socket = -1; + } + + if (interface_up == 0) { + if (instance->netif_bind_state == BIND_STATE_UNBOUND) { + log_printf (instance->totemudpu_log_level_error, + "One of your ip addresses are now bound to localhost. " + "Corosync would not work correctly."); + exit(COROSYNC_DONE_FATAL_ERR); + } + + /* + * Interface is not up + */ + instance->netif_bind_state = BIND_STATE_LOOPBACK; + + /* + * Add a timer to retry building interfaces and request memb_gather_enter + */ + qb_loop_timer_add (instance->totemudpu_poll_handle, + QB_LOOP_MED, + instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, + (void *)instance, + timer_function_netif_check_timeout, + &instance->timer_netif_check_timeout); + } else { + /* + * Interface is up + */ + instance->netif_bind_state = BIND_STATE_REGULAR; + } + /* + * Create and bind the multicast and unicast sockets + */ + totemudpu_build_sockets (instance, + &instance->totem_interface->bindnet, + &instance->totem_interface->boundto); + + if (instance->netif_bind_state == BIND_STATE_REGULAR) { + qb_loop_poll_add (instance->totemudpu_poll_handle, + QB_LOOP_MED, + instance->token_socket, + POLLIN, instance, net_deliver_fn); + } + + totemip_copy (&instance->my_id, &instance->totem_interface->boundto); + + /* + * This reports changes in the interface to the user and totemsrp + */ + if (instance->netif_bind_state == BIND_STATE_REGULAR) { + if (instance->netif_state_report & NETIF_STATE_REPORT_UP) { + log_printf (instance->totemudpu_log_level_notice, + "The network interface [%s] is now up.", + totemip_print (&instance->totem_interface->boundto)); + instance->netif_state_report = NETIF_STATE_REPORT_DOWN; + instance->totemudpu_iface_change_fn (instance->context, &instance->my_id, 0); + } + /* + * Add a timer to check for interface going down in single membership + */ + if (instance->my_memb_entries == 1) { + qb_loop_timer_add (instance->totemudpu_poll_handle, + QB_LOOP_MED, + instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, + (void *)instance, + timer_function_netif_check_timeout, + &instance->timer_netif_check_timeout); + } + + } else { + if (instance->netif_state_report & NETIF_STATE_REPORT_DOWN) { + log_printf (instance->totemudpu_log_level_notice, + "The network interface is down."); + instance->totemudpu_iface_change_fn (instance->context, &instance->my_id, 0); + } + instance->netif_state_report = NETIF_STATE_REPORT_UP; + + } +} + +/* Set the socket priority to INTERACTIVE to ensure + that our messages don't get queued behind anything else */ +static void totemudpu_traffic_control_set(struct totemudpu_instance *instance, int sock) +{ +#ifdef SO_PRIORITY + int prio = 6; /* TC_PRIO_INTERACTIVE */ + + if (setsockopt(sock, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(int))) { + LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, + "Could not set traffic priority"); + } +#endif +} + +static int totemudpu_build_sockets_ip ( + struct totemudpu_instance *instance, + struct totem_ip_address *bindnet_address, + struct totem_ip_address *bound_to, + int interface_num) +{ + struct sockaddr_storage sockaddr; + int addrlen; + int res; + unsigned int recvbuf_size; + unsigned int optlen = sizeof (recvbuf_size); + unsigned int retries = 0; + + /* + * Setup unicast socket + */ + instance->token_socket = socket (bindnet_address->family, SOCK_DGRAM, 0); + if (instance->token_socket == -1) { + LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, + "socket() failed"); + return (-1); + } + + totemip_nosigpipe (instance->token_socket); + res = fcntl (instance->token_socket, F_SETFL, O_NONBLOCK); + if (res == -1) { + LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, + "Could not set non-blocking operation on token socket"); + return (-1); + } + + /* + * Bind to unicast socket used for token send/receives + * This has the side effect of binding to the correct interface + */ + totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port, &sockaddr, &addrlen); + while (1) { + res = bind (instance->token_socket, (struct sockaddr *)&sockaddr, addrlen); + if (res == 0) { + break; + } + LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, + "bind token socket failed"); + if (++retries > BIND_MAX_RETRIES) { + break; + } + + /* + * Wait for a while + */ + (void)poll(NULL, 0, BIND_RETRIES_INTERVAL * retries); + } + + if (res == -1) { + return (-1); + } + + /* + * the token_socket can receive many messages. Allow a large number + * of receive messages on this socket + */ + recvbuf_size = MCAST_SOCKET_BUFFER_SIZE; + res = setsockopt (instance->token_socket, SOL_SOCKET, SO_RCVBUF, + &recvbuf_size, optlen); + if (res == -1) { + LOGSYS_PERROR (errno, instance->totemudpu_log_level_notice, + "Could not set recvbuf size"); + } + + return 0; +} + +int totemudpu_nodestatus_get (void *udpu_context, unsigned int nodeid, + struct totem_node_status *node_status) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + struct qb_list_head *list; + struct totemudpu_member *member; + + qb_list_for_each(list, &(instance->member_list)) { + member = qb_list_entry (list, + struct totemudpu_member, + list); + + if (member->member.nodeid == nodeid) { + node_status->nodeid = nodeid; + /* reachable is filled in by totemsrp */ + if (instance->netif_bind_state == BIND_STATE_REGULAR) { + node_status->link_status[0].enabled = 1; + } else { + node_status->link_status[0].enabled = 0; + } + node_status->link_status[0].connected = node_status->reachable; + node_status->link_status[0].mtu = instance->totem_config->net_mtu; + strncpy(node_status->link_status[0].src_ipaddr, totemip_print(&member->member), KNET_MAX_HOST_LEN-1); + } + } + return (0); +} + +int totemudpu_ifaces_get ( + void *net_context, + char ***status, + unsigned int *iface_count) +{ + static char *statuses[INTERFACE_MAX] = {(char*)"OK"}; + + if (status) { + *status = statuses; + } + *iface_count = 1; + + return (0); +} + + +static int totemudpu_build_local_sockets( + struct totemudpu_instance *instance) +{ + int i; + unsigned int sendbuf_size; + unsigned int recvbuf_size; + unsigned int optlen = sizeof (sendbuf_size); + int res; + + /* + * Create local multicast loop socket + */ + if (socketpair(AF_UNIX, SOCK_DGRAM, 0, instance->local_loop_sock) == -1) { + LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, + "socket() failed"); + return (-1); + } + + for (i = 0; i < 2; i++) { + totemip_nosigpipe (instance->local_loop_sock[i]); + res = fcntl (instance->local_loop_sock[i], F_SETFL, O_NONBLOCK); + if (res == -1) { + LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, + "Could not set non-blocking operation on multicast socket"); + return (-1); + } + } + + recvbuf_size = MCAST_SOCKET_BUFFER_SIZE; + sendbuf_size = MCAST_SOCKET_BUFFER_SIZE; + + res = setsockopt (instance->local_loop_sock[0], SOL_SOCKET, SO_RCVBUF, &recvbuf_size, optlen); + if (res == -1) { + LOGSYS_PERROR (errno, instance->totemudpu_log_level_debug, + "Unable to set SO_RCVBUF size on UDP local mcast loop socket"); + return (-1); + } + res = setsockopt (instance->local_loop_sock[1], SOL_SOCKET, SO_SNDBUF, &sendbuf_size, optlen); + if (res == -1) { + LOGSYS_PERROR (errno, instance->totemudpu_log_level_debug, + "Unable to set SO_SNDBUF size on UDP local mcast loop socket"); + return (-1); + } + + res = getsockopt (instance->local_loop_sock[0], SOL_SOCKET, SO_RCVBUF, &recvbuf_size, &optlen); + if (res == 0) { + log_printf (instance->totemudpu_log_level_debug, + "Local receive multicast loop socket recv buffer size (%d bytes).", recvbuf_size); + } + + res = getsockopt (instance->local_loop_sock[1], SOL_SOCKET, SO_SNDBUF, &sendbuf_size, &optlen); + if (res == 0) { + log_printf (instance->totemudpu_log_level_debug, + "Local transmit multicast loop socket send buffer size (%d bytes).", sendbuf_size); + } + + return (0); +} + +static int totemudpu_build_sockets ( + struct totemudpu_instance *instance, + struct totem_ip_address *bindnet_address, + struct totem_ip_address *bound_to) +{ + int interface_num; + int interface_up; + int res; + + /* + * Determine the ip address bound to and the interface name + */ + res = netif_determine (instance, + bindnet_address, + bound_to, + &interface_up, + &interface_num); + + if (res == -1) { + return (-1); + } + + totemip_copy(&instance->my_id, bound_to); + + res = totemudpu_build_sockets_ip (instance, + bindnet_address, bound_to, interface_num); + + if (res == -1) { + /* if we get here, corosync won't work anyway, so better leaving than faking to work */ + LOGSYS_PERROR (errno, instance->totemudpu_log_level_error, + "Unable to create sockets, exiting"); + exit(EXIT_FAILURE); + } + + /* We only send out of the token socket */ + totemudpu_traffic_control_set(instance, instance->token_socket); + + /* + * Rebind all members to new ips + */ + totemudpu_member_list_rebind_ip(instance); + + return res; +} + +/* + * Totem Network interface + * depends on poll abstraction, POSIX, IPV4 + */ + +/* + * Create an instance + */ +int totemudpu_initialize ( + qb_loop_t *poll_handle, + void **udpu_context, + struct totem_config *totem_config, + totemsrp_stats_t *stats, + void *context, + + int (*deliver_fn) ( + void *context, + const void *msg, + unsigned int msg_len, + const struct sockaddr_storage *system_from), + + int (*iface_change_fn) ( + void *context, + const struct totem_ip_address *iface_address, + unsigned int ring_no), + + void (*mtu_changed) ( + void *context, + int net_mtu), + + void (*target_set_completed) ( + void *context)) +{ + struct totemudpu_instance *instance; + + instance = malloc (sizeof (struct totemudpu_instance)); + if (instance == NULL) { + return (-1); + } + + totemudpu_instance_initialize (instance); + + instance->totem_config = totem_config; + instance->stats = stats; + + /* + * Configure logging + */ + instance->totemudpu_log_level_security = 1; //totem_config->totem_logging_configuration.log_level_security; + instance->totemudpu_log_level_error = totem_config->totem_logging_configuration.log_level_error; + instance->totemudpu_log_level_warning = totem_config->totem_logging_configuration.log_level_warning; + instance->totemudpu_log_level_notice = totem_config->totem_logging_configuration.log_level_notice; + instance->totemudpu_log_level_debug = totem_config->totem_logging_configuration.log_level_debug; + instance->totemudpu_subsys_id = totem_config->totem_logging_configuration.log_subsys_id; + instance->totemudpu_log_printf = totem_config->totem_logging_configuration.log_printf; + + /* + * Initialize local variables for totemudpu + */ + instance->totem_interface = &totem_config->interfaces[0]; + memset (instance->iov_buffer, 0, UDP_RECEIVE_FRAME_SIZE_MAX); + + instance->totemudpu_poll_handle = poll_handle; + + instance->totem_interface->bindnet.nodeid = instance->totem_config->node_id; + + instance->context = context; + instance->totemudpu_deliver_fn = deliver_fn; + + instance->totemudpu_iface_change_fn = iface_change_fn; + + instance->totemudpu_target_set_completed = target_set_completed; + + /* + * Create static local mcast sockets + */ + if (totemudpu_build_local_sockets(instance) == -1) { + free(instance); + return (-1); + } + + qb_loop_poll_add ( + instance->totemudpu_poll_handle, + QB_LOOP_MED, + instance->local_loop_sock[0], + POLLIN, instance, net_deliver_fn); + + /* + * RRP layer isn't ready to receive message because it hasn't + * initialized yet. Add short timer to check the interfaces. + */ + qb_loop_timer_add (instance->totemudpu_poll_handle, + QB_LOOP_MED, + 100*QB_TIME_NS_IN_MSEC, + (void *)instance, + timer_function_netif_check_timeout, + &instance->timer_netif_check_timeout); + + totemudpu_start_merge_detect_timeout((void*)instance); + + *udpu_context = instance; + return (0); +} + +void *totemudpu_buffer_alloc (void) +{ + return malloc (FRAME_SIZE_MAX); +} + +void totemudpu_buffer_release (void *ptr) +{ + return free (ptr); +} + +int totemudpu_processor_count_set ( + void *udpu_context, + int processor_count) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + int res = 0; + + instance->my_memb_entries = processor_count; + qb_loop_timer_del (instance->totemudpu_poll_handle, + instance->timer_netif_check_timeout); + if (processor_count == 1) { + qb_loop_timer_add (instance->totemudpu_poll_handle, + QB_LOOP_MED, + instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, + (void *)instance, + timer_function_netif_check_timeout, + &instance->timer_netif_check_timeout); + } + + return (res); +} + +int totemudpu_recv_flush (void *udpu_context) +{ + int res = 0; + + return (res); +} + +int totemudpu_send_flush (void *udpu_context) +{ + int res = 0; + + return (res); +} + +int totemudpu_token_send ( + void *udpu_context, + const void *msg, + unsigned int msg_len) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + int res = 0; + + ucast_sendmsg (instance, &instance->token_target, msg, msg_len); + + return (res); +} +int totemudpu_mcast_flush_send ( + void *udpu_context, + const void *msg, + unsigned int msg_len) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + int res = 0; + + mcast_sendmsg (instance, msg, msg_len, 0); + + return (res); +} + +int totemudpu_mcast_noflush_send ( + void *udpu_context, + const void *msg, + unsigned int msg_len) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + int res = 0; + + mcast_sendmsg (instance, msg, msg_len, 1); + + return (res); +} + +extern int totemudpu_iface_check (void *udpu_context) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + int res = 0; + + timer_function_netif_check_timeout (instance); + + return (res); +} + +extern void totemudpu_net_mtu_adjust (void *udpu_context, struct totem_config *totem_config) +{ + totem_config->net_mtu -= totemip_udpip_header_size(totem_config->interfaces[0].bindnet.family); +} + + +int totemudpu_token_target_set ( + void *udpu_context, + unsigned int nodeid) +{ + + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + struct qb_list_head *list; + struct totemudpu_member *member; + int res = 0; + + qb_list_for_each(list, &(instance->member_list)) { + member = qb_list_entry (list, + struct totemudpu_member, + list); + + if (member->member.nodeid == nodeid) { + memcpy (&instance->token_target, &member->member, + sizeof (struct totem_ip_address)); + + instance->totemudpu_target_set_completed (instance->context); + break; + } + } + return (res); +} + +extern int totemudpu_recv_mcast_empty ( + void *udpu_context) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + unsigned int res; + struct sockaddr_storage system_from; + struct msghdr msg_recv; + struct pollfd ufd; + int nfds, i; + int msg_processed = 0; + int sock; + + /* + * Receive datagram + */ + msg_recv.msg_name = &system_from; + msg_recv.msg_namelen = sizeof (struct sockaddr_storage); + msg_recv.msg_iov = &instance->totemudpu_iov_recv; + msg_recv.msg_iovlen = 1; +#ifdef HAVE_MSGHDR_CONTROL + msg_recv.msg_control = 0; +#endif +#ifdef HAVE_MSGHDR_CONTROLLEN + msg_recv.msg_controllen = 0; +#endif +#ifdef HAVE_MSGHDR_FLAGS + msg_recv.msg_flags = 0; +#endif +#ifdef HAVE_MSGHDR_ACCRIGHTS + msg_recv.msg_accrights = NULL; +#endif +#ifdef HAVE_MSGHDR_ACCRIGHTSLEN + msg_recv.msg_accrightslen = 0; +#endif + + for (i = 0; i < 2; i++) { + sock = -1; + if (i == 0) { + if (instance->netif_bind_state == BIND_STATE_REGULAR) { + sock = instance->token_socket; + } else { + continue; + } + } + if (i == 1) { + sock = instance->local_loop_sock[0]; + } + assert(sock != -1); + + do { + ufd.fd = sock; + ufd.events = POLLIN; + nfds = poll (&ufd, 1, 0); + if (nfds == 1 && ufd.revents & POLLIN) { + res = recvmsg (sock, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT); + if (res != -1) { + msg_processed = 1; + } else { + msg_processed = -1; + } + } + } while (nfds == 1); + } + + return (msg_processed); +} + +static int totemudpu_create_sending_socket( + void *udpu_context, + const struct totem_ip_address *member) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + int fd; + int res; + unsigned int sendbuf_size; + unsigned int optlen = sizeof (sendbuf_size); + struct sockaddr_storage sockaddr; + int addrlen; + + fd = socket (member->family, SOCK_DGRAM, 0); + if (fd == -1) { + LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, + "Could not create socket for new member"); + return (-1); + } + totemip_nosigpipe (fd); + res = fcntl (fd, F_SETFL, O_NONBLOCK); + if (res == -1) { + LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, + "Could not set non-blocking operation on token socket"); + goto error_close_fd; + } + + /* + * These sockets are used to send multicast messages, so their buffers + * should be large + */ + sendbuf_size = MCAST_SOCKET_BUFFER_SIZE; + res = setsockopt (fd, SOL_SOCKET, SO_SNDBUF, + &sendbuf_size, optlen); + if (res == -1) { + LOGSYS_PERROR (errno, instance->totemudpu_log_level_notice, + "Could not set sendbuf size"); + /* + * Fail in setting sendbuf size is not fatal -> don't exit + */ + } + + /* + * Bind to sending interface + */ + totemip_totemip_to_sockaddr_convert(&instance->my_id, 0, &sockaddr, &addrlen); + res = bind (fd, (struct sockaddr *)&sockaddr, addrlen); + if (res == -1) { + LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, + "bind token socket failed"); + goto error_close_fd; + } + + return (fd); + +error_close_fd: + close(fd); + return (-1); +} + +int totemudpu_iface_set (void *net_context, + const struct totem_ip_address *local_addr, + unsigned short ip_port, + unsigned int iface_no) +{ + /* Not supported */ + return (-1); +} + +int totemudpu_member_add ( + void *udpu_context, + const struct totem_ip_address *local, + const struct totem_ip_address *member, + int ring_no) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + + struct totemudpu_member *new_member; + + new_member = malloc (sizeof (struct totemudpu_member)); + if (new_member == NULL) { + return (-1); + } + + memset(new_member, 0, sizeof(*new_member)); + + log_printf (LOGSYS_LEVEL_NOTICE, "adding new UDPU member {%s}", + totemip_print(member)); + qb_list_init (&new_member->list); + qb_list_add_tail (&new_member->list, &instance->member_list); + memcpy (&new_member->member, member, sizeof (struct totem_ip_address)); + new_member->fd = totemudpu_create_sending_socket(udpu_context, member); + new_member->active = 1; + + return (0); +} + +int totemudpu_member_remove ( + void *udpu_context, + const struct totem_ip_address *token_target, + int ring_no) +{ + int found = 0; + struct qb_list_head *list; + struct totemudpu_member *member; + + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + + /* + * Find the member to remove and close its socket + */ + qb_list_for_each(list, &(instance->member_list)) { + member = qb_list_entry (list, + struct totemudpu_member, + list); + + if (totemip_compare (token_target, &member->member)==0) { + log_printf(LOGSYS_LEVEL_NOTICE, + "removing UDPU member {%s}", + totemip_print(&member->member)); + + if (member->fd > 0) { + log_printf(LOGSYS_LEVEL_DEBUG, + "Closing socket to: {%s}", + totemip_print(&member->member)); + qb_loop_poll_del (instance->totemudpu_poll_handle, + member->fd); + close (member->fd); + } + found = 1; + break; + } + } + + /* + * Delete the member from the list + */ + if (found) { + qb_list_del (list); + } + + instance = NULL; + return (0); +} + +int totemudpu_member_list_rebind_ip ( + void *udpu_context) +{ + struct qb_list_head *list; + struct totemudpu_member *member; + + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + + qb_list_for_each(list, &(instance->member_list)) { + member = qb_list_entry (list, + struct totemudpu_member, + list); + + if (member->fd > 0) { + close (member->fd); + } + + member->fd = totemudpu_create_sending_socket(udpu_context, &member->member); + } + + return (0); +} + + +static void timer_function_merge_detect_timeout ( + void *data) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)data; + + if (instance->merge_detect_messages_sent_before_timeout == 0) { + instance->send_merge_detect_message = 1; + } + + instance->merge_detect_messages_sent_before_timeout = 0; + + totemudpu_start_merge_detect_timeout(instance); +} + +static void totemudpu_start_merge_detect_timeout( + void *udpu_context) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + + qb_loop_timer_add(instance->totemudpu_poll_handle, + QB_LOOP_MED, + instance->totem_config->merge_timeout * 2 * QB_TIME_NS_IN_MSEC, + (void *)instance, + timer_function_merge_detect_timeout, + &instance->timer_merge_detect_timeout); + +} + +static void totemudpu_stop_merge_detect_timeout( + void *udpu_context) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + + qb_loop_timer_del(instance->totemudpu_poll_handle, + instance->timer_merge_detect_timeout); +} + +int totemudpu_reconfigure ( + void *udpu_context, + struct totem_config *totem_config) +{ + /* Not supported */ + return (-1); +} diff --git a/exec/totemudpu.h b/exec/totemudpu.h new file mode 100644 index 0000000..fe530ca --- /dev/null +++ b/exec/totemudpu.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2005 MontaVista Software, Inc. + * Copyright (c) 2006-2011 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef TOTEMUDPU_H_DEFINED +#define TOTEMUDPU_H_DEFINED + +#include <sys/types.h> +#include <sys/socket.h> +#include <qb/qbloop.h> + +#include <corosync/totem/totem.h> + +/** + * Create an instance + */ +extern int totemudpu_initialize ( + qb_loop_t *poll_handle, + void **udpu_context, + struct totem_config *totem_config, + totemsrp_stats_t *stats, + void *context, + + int (*deliver_fn) ( + void *context, + const void *msg, + unsigned int msg_len, + const struct sockaddr_storage *system_from), + + int (*iface_change_fn) ( + void *context, + const struct totem_ip_address *iface_address, + unsigned int ring_no), + + void (*mtu_changed) ( + void *context, + int net_mtu), + + void (*target_set_completed) ( + void *context)); + +extern void *totemudpu_buffer_alloc (void); + +extern void totemudpu_buffer_release (void *ptr); + +extern int totemudpu_processor_count_set ( + void *udpu_context, + int processor_count); + +extern int totemudpu_token_send ( + void *udpu_context, + const void *msg, + unsigned int msg_len); + +extern int totemudpu_mcast_flush_send ( + void *udpu_context, + const void *msg, + unsigned int msg_len); + +extern int totemudpu_mcast_noflush_send ( + void *udpu_context, + const void *msg, + unsigned int msg_len); + +extern int totemudpu_nodestatus_get (void *net_context, unsigned int nodeid, + struct totem_node_status *node_status); + +extern int totemudpu_ifaces_get (void *net_context, + char ***status, + unsigned int *iface_count); + +extern int totemudpu_recv_flush (void *udpu_context); + +extern int totemudpu_send_flush (void *udpu_context); + +extern int totemudpu_iface_set (void *net_context, + const struct totem_ip_address *local_addr, + unsigned short ip_port, + unsigned int iface_no); + +extern int totemudpu_iface_check (void *udpu_context); + +extern int totemudpu_finalize (void *udpu_context); + +extern void totemudpu_net_mtu_adjust (void *udpu_context, struct totem_config *totem_config); + +extern int totemudpu_token_target_set ( + void *udpu_context, + unsigned int nodeid); + +extern int totemudpu_crypto_set ( + void *udpu_context, + const char *cipher_type, + const char *hash_type); + +extern int totemudpu_recv_mcast_empty ( + void *udpu_context); + +extern int totemudpu_member_add ( + void *udpu_context, + const struct totem_ip_address *local, + const struct totem_ip_address *member, + int ring_no); + +extern int totemudpu_member_remove ( + void *udpu_context, + const struct totem_ip_address *member, + int ring_no); + +extern int totemudpu_reconfigure ( + void *udpu_context, + struct totem_config *totem_config); + +#endif /* TOTEMUDPU_H_DEFINED */ diff --git a/exec/util.c b/exec/util.c new file mode 100644 index 0000000..8988ab2 --- /dev/null +++ b/exec/util.c @@ -0,0 +1,343 @@ +/* + * Copyright (c) 2002-2004 MontaVista Software, Inc. + * Copyright (c) 2004 Open Source Development Lab + * Copyright (c) 2006-2012 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com), Mark Haverkamp (markh@osdl.org) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <config.h> + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/time.h> +#include <assert.h> + +#include <libknet.h> + +#include <corosync/corotypes.h> +#include <corosync/corodefs.h> +#include <corosync/icmap.h> +#include <corosync/logsys.h> +#include "util.h" + +LOGSYS_DECLARE_SUBSYS ("MAIN"); + +struct service_names { + const char *c_name; + int32_t c_val; +}; + +static struct service_names servicenames[] = +{ + { "CFG", CFG_SERVICE }, + { "CPG", CPG_SERVICE }, + { "QUORUM", QUORUM_SERVICE }, + { "PLOAD", PLOAD_SERVICE }, + { "VOTEQUORUM", VOTEQUORUM_SERVICE }, + { "MON", MON_SERVICE }, + { "WD", WD_SERVICE }, + { "CMAP", CMAP_SERVICE }, + { NULL, -1 } +}; + +const char * short_service_name_get(uint32_t service_id, + char *buf, size_t buf_size) +{ + uint32_t i; + + for (i = 0; servicenames[i].c_name != NULL; i++) { + if (service_id == servicenames[i].c_val) { + return (servicenames[i].c_name); + } + } + snprintf(buf, buf_size, "%d", service_id); + return buf; +} + +/* + * Compare two names. returns non-zero on match. + */ +int name_match(cs_name_t *name1, cs_name_t *name2) +{ + if (name1->length == name2->length) { + return ((strncmp ((char *)name1->value, (char *)name2->value, + name1->length)) == 0); + } + return 0; +} + +/* + * Get the time of day and convert to nanoseconds + */ +cs_time_t clust_time_now(void) +{ + struct timeval tv; + cs_time_t time_now; + + if (gettimeofday(&tv, 0)) { + return 0ULL; + } + + time_now = (cs_time_t)(tv.tv_sec) * 1000000000ULL; + time_now += (cs_time_t)(tv.tv_usec) * 1000ULL; + + return time_now; +} + +void _corosync_out_of_memory_error (void) __attribute__((noreturn)); +void _corosync_out_of_memory_error (void) +{ + assert (0==1); + exit (EXIT_FAILURE); +} + +void _corosync_exit_error ( + enum e_corosync_done err, const char *file, unsigned int line) __attribute__((noreturn)); + +void _corosync_exit_error ( + enum e_corosync_done err, const char *file, unsigned int line) +{ + if (err == COROSYNC_DONE_EXIT) { + log_printf (LOGSYS_LEVEL_NOTICE, + "Corosync Cluster Engine exiting normally"); + } else { + log_printf (LOGSYS_LEVEL_ERROR, "Corosync Cluster Engine exiting " + "with status %d at %s:%u.", err, file, line); + } + logsys_system_fini (); + exit (err); +} + +char *getcs_name_t (cs_name_t *name) +{ + static char ret_name[CS_MAX_NAME_LENGTH]; + + /* if string is corrupt (non-terminated), ensure it's displayed safely */ + if (name->length >= CS_MAX_NAME_LENGTH || name->value[name->length] != '\0') { + memset (ret_name, 0, sizeof (ret_name)); + memcpy (ret_name, name->value, min(name->length, CS_MAX_NAME_LENGTH -1)); + return (ret_name); + } + return ((char *)name->value); +} + +void setcs_name_t (cs_name_t *name, char *str) { + strncpy ((char *)name->value, str, sizeof (name->value) - 1); + ((char *)name->value)[sizeof (name->value) - 1] = '\0'; + if (strlen ((char *)name->value) > CS_MAX_NAME_LENGTH) { + name->length = CS_MAX_NAME_LENGTH; + } else { + name->length = strlen (str); + } +} + +int cs_name_tisEqual (cs_name_t *str1, char *str2) { + if (str1->length == strlen (str2)) { + return ((strncmp ((char *)str1->value, (char *)str2, + str1->length)) == 0); + } else { + return 0; + } +} + +const char *get_state_dir(void) +{ + static char path[PATH_MAX] = {'\0'}; + char *cmap_state_dir; + int res; + + if (path[0] == '\0') { + if (icmap_get_string("system.state_dir", &cmap_state_dir) == CS_OK) { + res = snprintf(path, PATH_MAX, "%s", cmap_state_dir); + free(cmap_state_dir); + } else { + res = snprintf(path, PATH_MAX, "%s/%s", LOCALSTATEDIR, "lib/corosync"); + } + + assert(res < PATH_MAX); + } + + return (path); +} + +static int safe_strcat(char *dst, size_t dst_len, const char *src) +{ + + if (strlen(dst) + strlen(src) >= dst_len - 1) { + return (-1); + } + + strcat(dst, src); + + return (0); +} + +/* + * val - knet crypto model to find + * crypto_list_str - string with concatenated list of available crypto models - can be NULL + * machine_parseable_str - 0 - split strings by space, 1 - use human form (split by "," and last item with "or") + * error_string_prefix - Prefix to add into error string + * error_string - Complete error string + */ +int util_is_valid_knet_crypto_model(const char *val, + const char **list_str, int machine_parseable_str, + const char *error_string_prefix, const char **error_string) +{ + size_t entries; + struct knet_crypto_info crypto_list[16]; + size_t zi; + static char local_error_str[512]; + static char local_list_str[256]; + int model_found = 0; + + if (list_str != NULL) { + *list_str = local_list_str; + } + + memset(local_error_str, 0, sizeof(local_error_str)); + memset(local_list_str, 0, sizeof(local_list_str)); + + safe_strcat(local_error_str, sizeof(local_error_str), error_string_prefix); + + if (knet_get_crypto_list(NULL, &entries) != 0) { + *error_string = "internal error - cannot get knet crypto list"; + return (-1); + } + + if (entries > sizeof(crypto_list) / sizeof(crypto_list[0])) { + *error_string = "internal error - too many knet crypto list entries"; + return (-1); + } + + if (knet_get_crypto_list(crypto_list, &entries) != 0) { + *error_string = "internal error - cannot get knet crypto list"; + return (-1); + } + + for (zi = 0; zi < entries; zi++) { + if (zi == 0) { + } else if (zi == entries - 1) { + if (machine_parseable_str) { + (void)safe_strcat(local_list_str, sizeof(local_list_str), " "); + } else { + (void)safe_strcat(local_list_str, sizeof(local_list_str), " or "); + } + } else { + if (machine_parseable_str) { + (void)safe_strcat(local_list_str, sizeof(local_list_str), " "); + } else { + (void)safe_strcat(local_list_str, sizeof(local_list_str), ", "); + } + } + + (void)safe_strcat(local_list_str, sizeof(local_list_str), crypto_list[zi].name); + + if (val != NULL && strcmp(val, crypto_list[zi].name) == 0) { + model_found = 1; + } + } + + if (!model_found) { + (void)safe_strcat(local_error_str, sizeof(local_error_str), local_list_str); + *error_string = local_error_str; + } + + return (model_found); +} + +int util_is_valid_knet_compress_model(const char *val, + const char **list_str, int machine_parseable_str, + const char *error_string_prefix, const char **error_string) +{ + size_t entries; + struct knet_compress_info compress_list[16]; + size_t zi; + static char local_error_str[512]; + static char local_list_str[256]; + int model_found = 0; + + if (list_str != NULL) { + *list_str = local_list_str; + } + + memset(local_error_str, 0, sizeof(local_error_str)); + memset(local_list_str, 0, sizeof(local_list_str)); + + safe_strcat(local_error_str, sizeof(local_error_str), error_string_prefix); + + if (knet_get_compress_list(NULL, &entries) != 0) { + *error_string = "internal error - cannot get knet compress list"; + return (-1); + } + + if (entries > sizeof(compress_list) / sizeof(compress_list[0])) { + *error_string = "internal error - too many knet compress list entries"; + return (-1); + } + + if (knet_get_compress_list(compress_list, &entries) != 0) { + *error_string = "internal error - cannot get knet compress list"; + return (-1); + } + + for (zi = 0; zi < entries; zi++) { + if (zi == 0) { + } else if (zi == entries - 1) { + if (machine_parseable_str) { + (void)safe_strcat(local_list_str, sizeof(local_list_str), " "); + } else { + (void)safe_strcat(local_list_str, sizeof(local_list_str), " or "); + } + } else { + if (machine_parseable_str) { + (void)safe_strcat(local_list_str, sizeof(local_list_str), " "); + } else { + (void)safe_strcat(local_list_str, sizeof(local_list_str), ", "); + } + } + + (void)safe_strcat(local_list_str, sizeof(local_list_str), compress_list[zi].name); + + if (val != NULL && strcmp(val, compress_list[zi].name) == 0) { + model_found = 1; + } + } + + if (!model_found) { + (void)safe_strcat(local_error_str, sizeof(local_error_str), local_list_str); + *error_string = local_error_str; + } + + return (model_found); +} diff --git a/exec/util.h b/exec/util.h new file mode 100644 index 0000000..e6e12af --- /dev/null +++ b/exec/util.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2002-2004 MontaVista Software, Inc. + * Copyright (c) 2004 Open Source Development Lab + * Copyright (c) 2006-2017 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com), Mark Haverkamp (markh@osdl.org) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef UTIL_H_DEFINED +#define UTIL_H_DEFINED + +#include <sys/time.h> +#include <corosync/corotypes.h> + +/** + * Get the time of day and convert to nanoseconds + */ +extern cs_time_t clust_time_now(void); + +enum e_corosync_done { + COROSYNC_DONE_EXIT = 0, + COROSYNC_DONE_FORK = 4, + COROSYNC_DONE_LOGCONFIGREAD = 7, + COROSYNC_DONE_MAINCONFIGREAD = 8, + COROSYNC_DONE_LOGSETUP = 9, + COROSYNC_DONE_ICMAP = 12, + COROSYNC_DONE_INIT_SERVICES = 13, + COROSYNC_DONE_FATAL_ERR = 15, + COROSYNC_DONE_DIR_NOT_PRESENT = 16, + COROSYNC_DONE_ACQUIRE_LOCK = 17, + COROSYNC_DONE_ALREADY_RUNNING = 18, + COROSYNC_DONE_STD_TO_NULL_REDIR = 19, + COROSYNC_DONE_SERVICE_ENGINE_INIT = 20, + COROSYNC_DONE_STORE_RINGID = 21, + COROSYNC_DONE_STATS = 22, + COROSYNC_DONE_PLOAD = 99 +}; + +#define min(a,b) ((a) < (b) ? (a) : (b)) + +/** + * Compare two names. returns non-zero on match. + */ +extern int name_match(cs_name_t *name1, cs_name_t *name2); +#define corosync_exit_error(err) _corosync_exit_error ((err), __FILE__, __LINE__) +extern void _corosync_exit_error (enum e_corosync_done err, const char *file, + unsigned int line) __attribute__((noreturn)); +void _corosync_out_of_memory_error (void) __attribute__((noreturn)); +extern char *getcs_name_t (cs_name_t *name); +extern void setcs_name_t (cs_name_t *name, char *str); +extern int cs_name_tisEqual (cs_name_t *str1, char *str2); +/** + * Get the short name of a service from the service_id. + */ +const char * short_service_name_get(uint32_t service_id, + char *buf, size_t buf_size); + +/* + * Return state directory (ether icmap system.state_dir or LOCALSTATEDIR/lib/corosync) + */ +const char *get_state_dir(void); + +extern int util_is_valid_knet_crypto_model(const char *val, + const char **list_str, int machine_parseable_str, + const char *error_string_prefix, const char **error_string); + +extern int util_is_valid_knet_compress_model(const char *val, + const char **list_str, int machine_parseable_str, + const char *error_string_prefix, const char **error_string); + +#endif /* UTIL_H_DEFINED */ diff --git a/exec/votequorum.c b/exec/votequorum.c new file mode 100644 index 0000000..7c6ed3b --- /dev/null +++ b/exec/votequorum.c @@ -0,0 +1,3082 @@ +/* + * Copyright (c) 2009-2020 Red Hat, Inc. + * + * All rights reserved. + * + * Authors: Christine Caulfield (ccaulfie@redhat.com) + * Fabio M. Di Nitto (fdinitto@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <config.h> + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdint.h> +#include <unistd.h> + +#include <qb/qblist.h> +#include <qb/qbipc_common.h> + +#include "quorum.h" +#include <corosync/corodefs.h> +#include <corosync/logsys.h> +#include <corosync/coroapi.h> +#include <corosync/icmap.h> +#include <corosync/votequorum.h> +#include <corosync/ipc_votequorum.h> + +#include "service.h" +#include "util.h" + +LOGSYS_DECLARE_SUBSYS ("VOTEQ"); + +/* + * interface with corosync + */ + +static struct corosync_api_v1 *corosync_api; + +/* + * votequorum global config vars + */ + + +static char qdevice_name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]; +static struct cluster_node *qdevice = NULL; +static unsigned int qdevice_timeout = VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT; +static unsigned int qdevice_sync_timeout = VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT; +static uint8_t qdevice_can_operate = 1; +static void *qdevice_reg_conn = NULL; +static uint8_t qdevice_master_wins = 0; + +static uint8_t two_node = 0; + +static uint8_t wait_for_all = 0; +static uint8_t wait_for_all_status = 0; +static uint8_t wait_for_all_autoset = 0; /* Wait for all is not set explicitly and follows two_node */ + +static enum {ATB_NONE, ATB_LOWEST, ATB_HIGHEST, ATB_LIST} auto_tie_breaker = ATB_NONE, initial_auto_tie_breaker = ATB_NONE; +static int lowest_node_id = -1; +static int highest_node_id = -1; + +#define DEFAULT_LMS_WIN 10000 +static uint8_t last_man_standing = 0; +static uint32_t last_man_standing_window = DEFAULT_LMS_WIN; + +static uint8_t allow_downscale = 0; +static uint32_t ev_barrier = 0; + +static uint8_t ev_tracking = 0; +static uint32_t ev_tracking_barrier = 0; +static int ev_tracking_fd = -1; + +/* + * votequorum_exec defines/structs/forward definitions + */ + +struct req_exec_quorum_nodeinfo { + struct qb_ipc_request_header header __attribute__((aligned(8))); + uint32_t nodeid; + uint32_t votes; + uint32_t expected_votes; + uint32_t flags; +} __attribute__((packed)); + +struct req_exec_quorum_reconfigure { + struct qb_ipc_request_header header __attribute__((aligned(8))); + uint32_t nodeid; + uint32_t value; + uint8_t param; + uint8_t _pad0; + uint8_t _pad1; + uint8_t _pad2; +} __attribute__((packed)); + +struct req_exec_quorum_qdevice_reg { + struct qb_ipc_request_header header __attribute__((aligned(8))); + uint32_t operation; + char qdevice_name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]; +} __attribute__((packed)); + +struct req_exec_quorum_qdevice_reconfigure { + struct qb_ipc_request_header header __attribute__((aligned(8))); + char oldname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]; + char newname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]; +} __attribute__((packed)); + +/* + * votequorum_exec onwire version (via totem) + */ + +#include "votequorum.h" + +/* + * votequorum_exec onwire messages (via totem) + */ + +#define MESSAGE_REQ_EXEC_VOTEQUORUM_NODEINFO 0 +#define MESSAGE_REQ_EXEC_VOTEQUORUM_RECONFIGURE 1 +#define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_REG 2 +#define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_RECONFIGURE 3 + +static void votequorum_exec_send_expectedvotes_notification(void); +static int votequorum_exec_send_quorum_notification(void *conn, uint64_t context); +static int votequorum_exec_send_nodelist_notification(void *conn, uint64_t context); + +#define VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES 1 +#define VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES 2 +#define VOTEQUORUM_RECONFIG_PARAM_CANCEL_WFA 3 + +static int votequorum_exec_send_reconfigure(uint8_t param, unsigned int nodeid, uint32_t value); + +/* + * used by req_exec_quorum_qdevice_reg + */ +#define VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER 0 +#define VOTEQUORUM_QDEVICE_OPERATION_REGISTER 1 + +/* + * votequorum internal node status/view + */ + +#define NODE_FLAGS_QUORATE 1 +#define NODE_FLAGS_LEAVING 2 +#define NODE_FLAGS_WFASTATUS 4 +#define NODE_FLAGS_FIRST 8 +#define NODE_FLAGS_QDEVICE_REGISTERED 16 +#define NODE_FLAGS_QDEVICE_ALIVE 32 +#define NODE_FLAGS_QDEVICE_CAST_VOTE 64 +#define NODE_FLAGS_QDEVICE_MASTER_WINS 128 + +typedef enum { + NODESTATE_MEMBER=1, + NODESTATE_DEAD, + NODESTATE_LEAVING +} nodestate_t; + +struct cluster_node { + int node_id; + nodestate_t state; + uint32_t votes; + uint32_t expected_votes; + uint32_t flags; + struct qb_list_head list; +}; + +/* + * votequorum internal quorum status + */ + +static uint8_t quorum; +static uint8_t cluster_is_quorate; + +/* + * votequorum membership data + */ + +static struct cluster_node *us; +static struct qb_list_head cluster_members_list; +static unsigned int quorum_members[PROCESSOR_COUNT_MAX]; +static unsigned int previous_quorum_members[PROCESSOR_COUNT_MAX]; +static unsigned int atb_nodelist[PROCESSOR_COUNT_MAX]; +static int quorum_members_entries = 0; +static int previous_quorum_members_entries = 0; +static int atb_nodelist_entries = 0; +static struct memb_ring_id quorum_ringid; + +/* + * pre allocate all cluster_nodes + one for qdevice + */ +static struct cluster_node cluster_nodes[PROCESSOR_COUNT_MAX+2]; +static int cluster_nodes_entries = 0; + +/* + * votequorum tracking + */ +struct quorum_pd { + unsigned char track_flags; + int tracking_enabled; + uint64_t tracking_context; + struct qb_list_head list; + void *conn; +}; + +static struct qb_list_head trackers_list; + +/* + * votequorum timers + */ + +static corosync_timer_handle_t qdevice_timer; +static int qdevice_timer_set = 0; +static corosync_timer_handle_t last_man_standing_timer; +static int last_man_standing_timer_set = 0; +static int sync_nodeinfo_sent = 0; +static int sync_wait_for_poll_or_timeout = 0; + +/* + * Service Interfaces required by service_message_handler struct + */ + +static int sync_in_progress = 0; + +static void votequorum_sync_init ( + const unsigned int *trans_list, + size_t trans_list_entries, + const unsigned int *member_list, + size_t member_list_entries, + const struct memb_ring_id *ring_id); + +static int votequorum_sync_process (void); +static void votequorum_sync_activate (void); +static void votequorum_sync_abort (void); + +static quorum_set_quorate_fn_t quorum_callback; + +/* + * votequorum_exec handler and definitions + */ + +static char *votequorum_exec_init_fn (struct corosync_api_v1 *api); +static int votequorum_exec_exit_fn (void); +static int votequorum_exec_send_nodeinfo(uint32_t nodeid); + +static void message_handler_req_exec_votequorum_nodeinfo ( + const void *message, + unsigned int nodeid); +static void exec_votequorum_nodeinfo_endian_convert (void *message); + +static void message_handler_req_exec_votequorum_reconfigure ( + const void *message, + unsigned int nodeid); +static void exec_votequorum_reconfigure_endian_convert (void *message); + +static void message_handler_req_exec_votequorum_qdevice_reg ( + const void *message, + unsigned int nodeid); +static void exec_votequorum_qdevice_reg_endian_convert (void *message); + +static void message_handler_req_exec_votequorum_qdevice_reconfigure ( + const void *message, + unsigned int nodeid); +static void exec_votequorum_qdevice_reconfigure_endian_convert (void *message); + +static struct corosync_exec_handler votequorum_exec_engine[] = +{ + { /* 0 */ + .exec_handler_fn = message_handler_req_exec_votequorum_nodeinfo, + .exec_endian_convert_fn = exec_votequorum_nodeinfo_endian_convert + }, + { /* 1 */ + .exec_handler_fn = message_handler_req_exec_votequorum_reconfigure, + .exec_endian_convert_fn = exec_votequorum_reconfigure_endian_convert + }, + { /* 2 */ + .exec_handler_fn = message_handler_req_exec_votequorum_qdevice_reg, + .exec_endian_convert_fn = exec_votequorum_qdevice_reg_endian_convert + }, + { /* 3 */ + .exec_handler_fn = message_handler_req_exec_votequorum_qdevice_reconfigure, + .exec_endian_convert_fn = exec_votequorum_qdevice_reconfigure_endian_convert + }, +}; + +/* + * Library Handler and Functions Definitions + */ + +static int quorum_lib_init_fn (void *conn); + +static int quorum_lib_exit_fn (void *conn); + +static void qdevice_timer_fn(void *arg); + +static void message_handler_req_lib_votequorum_getinfo (void *conn, + const void *message); + +static void message_handler_req_lib_votequorum_setexpected (void *conn, + const void *message); + +static void message_handler_req_lib_votequorum_setvotes (void *conn, + const void *message); + +static void message_handler_req_lib_votequorum_trackstart (void *conn, + const void *message); + +static void message_handler_req_lib_votequorum_trackstop (void *conn, + const void *message); + +static void message_handler_req_lib_votequorum_qdevice_register (void *conn, + const void *message); + +static void message_handler_req_lib_votequorum_qdevice_unregister (void *conn, + const void *message); + +static void message_handler_req_lib_votequorum_qdevice_update (void *conn, + const void *message); + +static void message_handler_req_lib_votequorum_qdevice_poll (void *conn, + const void *message); + +static void message_handler_req_lib_votequorum_qdevice_master_wins (void *conn, + const void *message); + +static struct corosync_lib_handler quorum_lib_service[] = +{ + { /* 0 */ + .lib_handler_fn = message_handler_req_lib_votequorum_getinfo, + .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 1 */ + .lib_handler_fn = message_handler_req_lib_votequorum_setexpected, + .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 2 */ + .lib_handler_fn = message_handler_req_lib_votequorum_setvotes, + .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 3 */ + .lib_handler_fn = message_handler_req_lib_votequorum_trackstart, + .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 4 */ + .lib_handler_fn = message_handler_req_lib_votequorum_trackstop, + .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 5 */ + .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_register, + .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 6 */ + .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_unregister, + .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 7 */ + .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_update, + .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 8 */ + .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_poll, + .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 9 */ + .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_master_wins, + .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED + } +}; + +static struct corosync_service_engine votequorum_service_engine = { + .name = "corosync vote quorum service v1.0", + .id = VOTEQUORUM_SERVICE, + .priority = 2, + .private_data_size = sizeof (struct quorum_pd), + .allow_inquorate = CS_LIB_ALLOW_INQUORATE, + .flow_control = COROSYNC_LIB_FLOW_CONTROL_REQUIRED, + .lib_init_fn = quorum_lib_init_fn, + .lib_exit_fn = quorum_lib_exit_fn, + .lib_engine = quorum_lib_service, + .lib_engine_count = sizeof (quorum_lib_service) / sizeof (struct corosync_lib_handler), + .exec_init_fn = votequorum_exec_init_fn, + .exec_exit_fn = votequorum_exec_exit_fn, + .exec_engine = votequorum_exec_engine, + .exec_engine_count = sizeof (votequorum_exec_engine) / sizeof (struct corosync_exec_handler), + .sync_init = votequorum_sync_init, + .sync_process = votequorum_sync_process, + .sync_activate = votequorum_sync_activate, + .sync_abort = votequorum_sync_abort +}; + +struct corosync_service_engine *votequorum_get_service_engine_ver0 (void) +{ + return (&votequorum_service_engine); +} + +static struct default_service votequorum_service[] = { + { + .name = "corosync_votequorum", + .ver = 0, + .loader = votequorum_get_service_engine_ver0 + }, +}; + +/* + * common/utility macros/functions + */ + +#define max(a,b) (((a) > (b)) ? (a) : (b)) + +static void node_add_ordered(struct cluster_node *newnode) +{ + struct cluster_node *node = NULL; + struct qb_list_head *tmp; + + ENTER(); + + qb_list_for_each(tmp, &cluster_members_list) { + node = qb_list_entry(tmp, struct cluster_node, list); + if (newnode->node_id < node->node_id) { + break; + } + } + + if (!node) { + qb_list_add(&newnode->list, &cluster_members_list); + } else { + qb_list_add_tail(&newnode->list, &node->list); + } + + LEAVE(); +} + +static struct cluster_node *allocate_node(unsigned int nodeid) +{ + struct cluster_node *cl = NULL; + struct qb_list_head *tmp; + + ENTER(); + + if (cluster_nodes_entries <= PROCESSOR_COUNT_MAX + 1) { + cl = (struct cluster_node *)&cluster_nodes[cluster_nodes_entries]; + cluster_nodes_entries++; + } else { + qb_list_for_each(tmp, &cluster_members_list) { + cl = qb_list_entry(tmp, struct cluster_node, list); + if (cl->state == NODESTATE_DEAD) { + break; + } + } + /* + * this should never happen + */ + if (!cl) { + log_printf(LOGSYS_LEVEL_CRIT, "Unable to find memory for node " CS_PRI_NODE_ID " data!!", nodeid); + goto out; + } + qb_list_del(tmp); + } + + memset(cl, 0, sizeof(struct cluster_node)); + cl->node_id = nodeid; + if (nodeid != VOTEQUORUM_QDEVICE_NODEID) { + node_add_ordered(cl); + } + +out: + LEAVE(); + + return cl; +} + +static struct cluster_node *find_node_by_nodeid(unsigned int nodeid) +{ + struct cluster_node *node; + struct qb_list_head *tmp; + + ENTER(); + + if (nodeid == us->node_id) { + LEAVE(); + return us; + } + + if (nodeid == VOTEQUORUM_QDEVICE_NODEID) { + LEAVE(); + return qdevice; + } + + qb_list_for_each(tmp, &cluster_members_list) { + node = qb_list_entry(tmp, struct cluster_node, list); + if (node->node_id == nodeid) { + LEAVE(); + return node; + } + } + + LEAVE(); + return NULL; +} + +static void get_lowest_node_id(void) +{ + struct cluster_node *node = NULL; + struct qb_list_head *tmp; + + ENTER(); + + lowest_node_id = us->node_id; + + qb_list_for_each(tmp, &cluster_members_list) { + node = qb_list_entry(tmp, struct cluster_node, list); + if ((node->state == NODESTATE_MEMBER) && + (node->node_id < lowest_node_id)) { + lowest_node_id = node->node_id; + } + } + log_printf(LOGSYS_LEVEL_DEBUG, "lowest node id: " CS_PRI_NODE_ID " us: " CS_PRI_NODE_ID, lowest_node_id, us->node_id); + icmap_set_uint32("runtime.votequorum.lowest_node_id", lowest_node_id); + + LEAVE(); +} + +static void get_highest_node_id(void) +{ + struct cluster_node *node = NULL; + struct qb_list_head *tmp; + + ENTER(); + + highest_node_id = us->node_id; + + qb_list_for_each(tmp, &cluster_members_list) { + node = qb_list_entry(tmp, struct cluster_node, list); + if ((node->state == NODESTATE_MEMBER) && + (node->node_id > highest_node_id)) { + highest_node_id = node->node_id; + } + } + log_printf(LOGSYS_LEVEL_DEBUG, "highest node id: " CS_PRI_NODE_ID " us: " CS_PRI_NODE_ID, highest_node_id, us->node_id); + icmap_set_uint32("runtime.votequorum.highest_node_id", highest_node_id); + + LEAVE(); +} + +static int check_low_node_id_partition(void) +{ + struct cluster_node *node = NULL; + struct qb_list_head *tmp; + int found = 0; + + ENTER(); + + qb_list_for_each(tmp, &cluster_members_list) { + node = qb_list_entry(tmp, struct cluster_node, list); + if ((node->state == NODESTATE_MEMBER) && + (node->node_id == lowest_node_id)) { + found = 1; + } + } + + LEAVE(); + return found; +} + +static int check_high_node_id_partition(void) +{ + struct cluster_node *node = NULL; + struct qb_list_head *tmp; + int found = 0; + + ENTER(); + + qb_list_for_each(tmp, &cluster_members_list) { + node = qb_list_entry(tmp, struct cluster_node, list); + if ((node->state == NODESTATE_MEMBER) && + (node->node_id == highest_node_id)) { + found = 1; + } + } + + LEAVE(); + return found; +} + +static int is_in_nodelist(int nodeid, unsigned int *members, int entries) +{ + int i; + ENTER(); + + for (i=0; i<entries; i++) { + if (nodeid == members[i]) { + LEAVE(); + return 1; + } + } + LEAVE(); + return 0; +} + +/* + * The algorithm for a list of tie-breaker nodes is: + * travel the list of nodes in the auto_tie_breaker list, + * if the node IS in our current partition, check if the + * nodes earlier in the atb list are in the 'previous' partition; + * If none are found then we are safe to be quorate, if any are + * then we cannot be as we don't know if that node is up or down. + * If we don't have a node in the current list we are NOT quorate. + * Obviously if we find the first node in the atb list in our + * partition then we are quorate. + * + * Special cases lowest nodeid, and highest nodeid are handled separately. + */ +static int check_auto_tie_breaker(void) +{ + int i, j; + int res; + ENTER(); + + if (auto_tie_breaker == ATB_LOWEST) { + res = check_low_node_id_partition(); + log_printf(LOGSYS_LEVEL_DEBUG, "ATB_LOWEST decision: %d", res); + LEAVE(); + return res; + } + if (auto_tie_breaker == ATB_HIGHEST) { + res = check_high_node_id_partition(); + log_printf(LOGSYS_LEVEL_DEBUG, "ATB_HIGHEST decision: %d", res); + LEAVE(); + return res; + } + + /* Assume ATB_LIST, we should never be called for ATB_NONE */ + for (i=0; i < atb_nodelist_entries; i++) { + if (is_in_nodelist(atb_nodelist[i], quorum_members, quorum_members_entries)) { + /* + * Node is in our partition, if any of its predecessors are + * in the previous quorum partition then it might be in the + * 'other half' (as we've got this far without seeing it here) + * and so we can't be quorate. + */ + for (j=0; j<i; j++) { + if (is_in_nodelist(atb_nodelist[j], previous_quorum_members, previous_quorum_members_entries)) { + log_printf(LOGSYS_LEVEL_DEBUG, "ATB_LIST found node " CS_PRI_NODE_ID " in previous partition but not here, quorum denied", atb_nodelist[j]); + LEAVE(); + return 0; + } + } + + /* + * None of the other list nodes were in the previous partition, if there + * are enough votes, we can be quorate + */ + log_printf(LOGSYS_LEVEL_DEBUG, "ATB_LIST found node " CS_PRI_NODE_ID " in current partition, we can be quorate", atb_nodelist[i]); + LEAVE(); + return 1; + } + } + log_printf(LOGSYS_LEVEL_DEBUG, "ATB_LIST found no list nodes in current partition, we cannot be quorate"); + LEAVE(); + return 0; +} + +/* + * atb_string can be either: + * 'lowest' + * 'highest' + * a list of nodeids + */ +static void parse_atb_string(char *atb_string) +{ + char *ptr; + long num; + + ENTER(); + auto_tie_breaker = ATB_NONE; + + if (!strcmp(atb_string, "lowest")) + auto_tie_breaker = ATB_LOWEST; + + if (!strcmp(atb_string, "highest")) + auto_tie_breaker = ATB_HIGHEST; + + if (atoi(atb_string)) { + + atb_nodelist_entries = 0; + ptr = atb_string; + do { + num = strtol(ptr, &ptr, 10); + if (num) { + log_printf(LOGSYS_LEVEL_DEBUG, "ATB nodelist[%d] = %d", atb_nodelist_entries, num); + atb_nodelist[atb_nodelist_entries++] = num; + } + } while (num); + + if (atb_nodelist_entries) { + auto_tie_breaker = ATB_LIST; + } + } + icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker); + log_printf(LOGSYS_LEVEL_DEBUG, "ATB type = %d", auto_tie_breaker); + + /* Make sure we got something */ + if (auto_tie_breaker == ATB_NONE) { + log_printf(LOGSYS_LEVEL_WARNING, "auto_tie_breaker_nodes is not valid. It must be 'lowest', 'highest' or a space-separated list of node IDs. auto_tie_breaker is disabled"); + auto_tie_breaker = ATB_NONE; + } + LEAVE(); +} + +static int check_qdevice_master(void) +{ + struct cluster_node *node = NULL; + struct qb_list_head *tmp; + int found = 0; + + ENTER(); + + qb_list_for_each(tmp, &cluster_members_list) { + node = qb_list_entry(tmp, struct cluster_node, list); + if ((node->state == NODESTATE_MEMBER) && + (node->flags & NODE_FLAGS_QDEVICE_MASTER_WINS) && + (node->flags & NODE_FLAGS_QDEVICE_CAST_VOTE)) { + found = 1; + } + } + + LEAVE(); + return found; +} + +static void decode_flags(uint32_t flags) +{ + ENTER(); + + log_printf(LOGSYS_LEVEL_DEBUG, + "flags: quorate: %s Leaving: %s WFA Status: %s First: %s Qdevice: %s QdeviceAlive: %s QdeviceCastVote: %s QdeviceMasterWins: %s", + (flags & NODE_FLAGS_QUORATE)?"Yes":"No", + (flags & NODE_FLAGS_LEAVING)?"Yes":"No", + (flags & NODE_FLAGS_WFASTATUS)?"Yes":"No", + (flags & NODE_FLAGS_FIRST)?"Yes":"No", + (flags & NODE_FLAGS_QDEVICE_REGISTERED)?"Yes":"No", + (flags & NODE_FLAGS_QDEVICE_ALIVE)?"Yes":"No", + (flags & NODE_FLAGS_QDEVICE_CAST_VOTE)?"Yes":"No", + (flags & NODE_FLAGS_QDEVICE_MASTER_WINS)?"Yes":"No"); + + LEAVE(); +} + +/* + * load/save are copied almost pristine from totemsrp,c + */ +static int load_ev_tracking_barrier(void) +{ + int res = 0; + char filename[PATH_MAX]; + + ENTER(); + + snprintf(filename, sizeof(filename) - 1, "%s/ev_tracking", get_state_dir()); + + ev_tracking_fd = open(filename, O_RDWR, 0700); + if (ev_tracking_fd != -1) { + res = read (ev_tracking_fd, &ev_tracking_barrier, sizeof(uint32_t)); + close(ev_tracking_fd); + if (res == sizeof (uint32_t)) { + LEAVE(); + return 0; + } + } + + ev_tracking_barrier = 0; + umask(0); + ev_tracking_fd = open (filename, O_CREAT|O_RDWR, 0700); + if (ev_tracking_fd != -1) { + res = write (ev_tracking_fd, &ev_tracking_barrier, sizeof (uint32_t)); + if ((res == -1) || (res != sizeof (uint32_t))) { + log_printf(LOGSYS_LEVEL_WARNING, + "Unable to write to %s", filename); + } + close(ev_tracking_fd); + LEAVE(); + return 0; + } + log_printf(LOGSYS_LEVEL_WARNING, + "Unable to create %s file", filename); + + LEAVE(); + + return -1; +} + +static void update_wait_for_all_status(uint8_t wfa_status) +{ + ENTER(); + + wait_for_all_status = wfa_status; + if (wait_for_all_status) { + us->flags |= NODE_FLAGS_WFASTATUS; + } else { + us->flags &= ~NODE_FLAGS_WFASTATUS; + } + icmap_set_uint8("runtime.votequorum.wait_for_all_status", + wait_for_all_status); + + LEAVE(); +} + +static void update_two_node(void) +{ + ENTER(); + + icmap_set_uint8("runtime.votequorum.two_node", two_node); + + LEAVE(); +} + +static void update_ev_barrier(uint32_t expected_votes) +{ + ENTER(); + + ev_barrier = expected_votes; + icmap_set_uint32("runtime.votequorum.ev_barrier", ev_barrier); + + LEAVE(); +} + +static void update_qdevice_can_operate(uint8_t status) +{ + ENTER(); + + qdevice_can_operate = status; + icmap_set_uint8("runtime.votequorum.qdevice_can_operate", qdevice_can_operate); + + LEAVE(); +} + +static void update_qdevice_master_wins(uint8_t allow) +{ + ENTER(); + + qdevice_master_wins = allow; + icmap_set_uint8("runtime.votequorum.qdevice_master_wins", qdevice_master_wins); + + LEAVE(); +} + +static void update_ev_tracking_barrier(uint32_t ev_t_barrier) +{ + int res; + + ENTER(); + + ev_tracking_barrier = ev_t_barrier; + icmap_set_uint32("runtime.votequorum.ev_tracking_barrier", ev_tracking_barrier); + + if (lseek (ev_tracking_fd, 0, SEEK_SET) != 0) { + log_printf(LOGSYS_LEVEL_WARNING, + "Unable to update ev_tracking_barrier on disk data!!!"); + LEAVE(); + return; + } + + res = write (ev_tracking_fd, &ev_tracking_barrier, sizeof (uint32_t)); + if (res != sizeof (uint32_t)) { + log_printf(LOGSYS_LEVEL_WARNING, + "Unable to update ev_tracking_barrier on disk data!!!"); + } +#ifdef HAVE_FDATASYNC + fdatasync(ev_tracking_fd); +#else + fsync(ev_tracking_fd); +#endif + + LEAVE(); +} + +/* + * quorum calculation core bits + */ + +static int calculate_quorum(int allow_decrease, unsigned int max_expected, unsigned int *ret_total_votes) +{ + struct qb_list_head *nodelist; + struct cluster_node *node; + unsigned int total_votes = 0; + unsigned int highest_expected = 0; + unsigned int newquorum, q1, q2; + unsigned int total_nodes = 0; + + ENTER(); + + if ((allow_downscale) && (allow_decrease) && (max_expected)) { + max_expected = max(ev_barrier, max_expected); + } + + qb_list_for_each(nodelist, &cluster_members_list) { + node = qb_list_entry(nodelist, struct cluster_node, list); + + log_printf(LOGSYS_LEVEL_DEBUG, "node " CS_PRI_NODE_ID " state=%d, votes=%u, expected=%u", + node->node_id, node->state, node->votes, node->expected_votes); + + if (node->state == NODESTATE_MEMBER) { + highest_expected = max(highest_expected, node->expected_votes); + total_votes += node->votes; + total_nodes++; + } + } + + if (us->flags & NODE_FLAGS_QDEVICE_CAST_VOTE) { + log_printf(LOGSYS_LEVEL_DEBUG, "node 0 state=1, votes=%u", qdevice->votes); + total_votes += qdevice->votes; + total_nodes++; + } + + if (max_expected > 0) { + highest_expected = max_expected; + } + + /* + * This quorum calculation is taken from the OpenVMS Cluster Systems + * manual, but, then, you guessed that didn't you + */ + q1 = (highest_expected + 2) / 2; + q2 = (total_votes + 2) / 2; + newquorum = max(q1, q2); + + /* + * Normally quorum never decreases but the system administrator can + * force it down by setting expected votes to a maximum value + */ + if (!allow_decrease) { + newquorum = max(quorum, newquorum); + } + + /* + * The special two_node mode allows each of the two nodes to retain + * quorum if the other fails. Only one of the two should live past + * fencing (as both nodes try to fence each other in split-brain.) + * Also: if there are more than two nodes, force us inquorate to avoid + * any damage or confusion. + */ + if (two_node && total_nodes <= 2) { + newquorum = 1; + } + + if (ret_total_votes) { + *ret_total_votes = total_votes; + } + + LEAVE(); + return newquorum; +} + +static void update_node_expected_votes(int new_expected_votes) +{ + struct qb_list_head *nodelist; + struct cluster_node *node; + + if (new_expected_votes) { + qb_list_for_each(nodelist, &cluster_members_list) { + node = qb_list_entry(nodelist, struct cluster_node, list); + + if (node->state == NODESTATE_MEMBER) { + node->expected_votes = new_expected_votes; + } + } + } +} + +static void are_we_quorate(unsigned int total_votes) +{ + int quorate; + int quorum_change = 0; + + ENTER(); + + /* + * wait for all nodes to show up before granting quorum + */ + + if ((wait_for_all) && (wait_for_all_status)) { + if (total_votes != us->expected_votes) { + log_printf(LOGSYS_LEVEL_NOTICE, + "Waiting for all cluster members. " + "Current votes: %d expected_votes: %d", + total_votes, us->expected_votes); + assert(!cluster_is_quorate); + return; + } + update_wait_for_all_status(0); + } + + if (quorum > total_votes) { + quorate = 0; + } else { + quorate = 1; + get_lowest_node_id(); + get_highest_node_id(); + } + + if ((auto_tie_breaker != ATB_NONE) && + /* Must be a half (or half-1) split */ + (total_votes == (us->expected_votes / 2)) && + /* If the 'other' partition in a split might have quorum then we can't run ATB */ + (previous_quorum_members_entries - quorum_members_entries < quorum) && + (check_auto_tie_breaker() == 1)) { + quorate = 1; + } + + if ((qdevice_master_wins) && + (!quorate) && + (check_qdevice_master() == 1)) { + log_printf(LOGSYS_LEVEL_DEBUG, "node is quorate as part of master_wins partition"); + quorate = 1; + } + + if (cluster_is_quorate && !quorate) { + quorum_change = 1; + log_printf(LOGSYS_LEVEL_DEBUG, "quorum lost, blocking activity"); + } + if (!cluster_is_quorate && quorate) { + quorum_change = 1; + log_printf(LOGSYS_LEVEL_DEBUG, "quorum regained, resuming activity"); + } + + cluster_is_quorate = quorate; + if (cluster_is_quorate) { + us->flags |= NODE_FLAGS_QUORATE; + } else { + us->flags &= ~NODE_FLAGS_QUORATE; + } + + if (wait_for_all) { + if (quorate) { + update_wait_for_all_status(0); + } else { + update_wait_for_all_status(1); + } + } + + if ((quorum_change) && + (sync_in_progress == 0)) { + quorum_callback(quorum_members, quorum_members_entries, + cluster_is_quorate, &quorum_ringid); + votequorum_exec_send_quorum_notification(NULL, 0L); + } + + LEAVE(); +} + +static void get_total_votes(unsigned int *totalvotes, unsigned int *current_members) +{ + unsigned int total_votes = 0; + unsigned int cluster_members = 0; + struct qb_list_head *nodelist; + struct cluster_node *node; + + ENTER(); + + qb_list_for_each(nodelist, &cluster_members_list) { + node = qb_list_entry(nodelist, struct cluster_node, list); + if (node->state == NODESTATE_MEMBER) { + cluster_members++; + total_votes += node->votes; + } + } + + if (qdevice->votes) { + total_votes += qdevice->votes; + cluster_members++; + } + + *totalvotes = total_votes; + *current_members = cluster_members; + + LEAVE(); +} + +/* + * Recalculate cluster quorum, set quorate and notify changes + */ +static void recalculate_quorum(int allow_decrease, int by_current_nodes) +{ + unsigned int total_votes = 0; + unsigned int cluster_members = 0; + + ENTER(); + + get_total_votes(&total_votes, &cluster_members); + + if (!by_current_nodes) { + cluster_members = 0; + } + + /* + * Keep expected_votes at the highest number of votes in the cluster + */ + log_printf(LOGSYS_LEVEL_DEBUG, "total_votes=%d, expected_votes=%d", total_votes, us->expected_votes); + if (total_votes > us->expected_votes) { + us->expected_votes = total_votes; + votequorum_exec_send_expectedvotes_notification(); + } + + if ((ev_tracking) && + (us->expected_votes > ev_tracking_barrier)) { + update_ev_tracking_barrier(us->expected_votes); + } + + quorum = calculate_quorum(allow_decrease, cluster_members, &total_votes); + update_node_expected_votes(cluster_members); + + are_we_quorate(total_votes); + + LEAVE(); +} + +/* + * configuration bits and pieces + */ + +static int votequorum_read_nodelist_configuration(uint32_t *votes, + uint32_t *nodes, + uint32_t *expected_votes) +{ + icmap_iter_t iter; + const char *iter_key; + char tmp_key[ICMAP_KEYNAME_MAXLEN]; + uint32_t our_pos, node_pos, last_node_pos=-1; + uint32_t nodecount = 0; + uint32_t nodelist_expected_votes = 0; + uint32_t node_votes = 0; + int res = 0; + + ENTER(); + + if (icmap_get_uint32("nodelist.local_node_pos", &our_pos) != CS_OK) { + log_printf(LOGSYS_LEVEL_DEBUG, + "No nodelist defined or our node is not in the nodelist"); + return 0; + } + + iter = icmap_iter_init("nodelist.node."); + + while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { + + res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, tmp_key); + if (res != 2) { + continue; + } + + /* + * If current node_pos is the same as the last_node_pos then skip it + * so we only do the code below once per node. + * (icmap keys are always in order) + */ + if (last_node_pos == node_pos) { + continue; + } + last_node_pos = node_pos; + + nodecount++; + + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.quorum_votes", node_pos); + if (icmap_get_uint32(tmp_key, &node_votes) != CS_OK) { + node_votes = 1; + } + + nodelist_expected_votes = nodelist_expected_votes + node_votes; + + if (node_pos == our_pos) { + *votes = node_votes; + } + } + + *expected_votes = nodelist_expected_votes; + *nodes = nodecount; + + icmap_iter_finalize(iter); + + LEAVE(); + + return 1; +} + +static int votequorum_qdevice_is_configured(uint32_t *qdevice_votes) +{ + char *qdevice_model = NULL; + int ret = 0; + + ENTER(); + + if (icmap_get_string("quorum.device.model", &qdevice_model) == CS_OK) { + if (strlen(qdevice_model)) { + if (icmap_get_uint32("quorum.device.votes", qdevice_votes) != CS_OK) { + *qdevice_votes = -1; + } + if (icmap_get_uint32("quorum.device.timeout", &qdevice_timeout) != CS_OK) { + qdevice_timeout = VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT; + } + if (icmap_get_uint32("quorum.device.sync_timeout", &qdevice_sync_timeout) != CS_OK) { + qdevice_sync_timeout = VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT; + } + update_qdevice_can_operate(1); + ret = 1; + } + + free(qdevice_model); + } + + LEAVE(); + + return ret; +} + +#define VOTEQUORUM_READCONFIG_STARTUP 0 +#define VOTEQUORUM_READCONFIG_RUNTIME 1 + +static char *votequorum_readconfig(int runtime) +{ + uint32_t node_votes = 0, qdevice_votes = 0; + uint32_t node_expected_votes = 0, expected_votes = 0; + uint32_t node_count = 0; + uint8_t atb = 0; + int have_nodelist, have_qdevice; + char *atb_string = NULL; + char *error = NULL; + + ENTER(); + + log_printf(LOGSYS_LEVEL_DEBUG, "Reading configuration (runtime: %d)", runtime); + + /* + * Set the few things we re-read at runtime back to their defaults + */ + if (runtime) { + two_node = 0; + expected_votes = 0; + /* auto_tie_breaker cannot be changed by config reload, but + * we automatically disable it on odd-sized clusters without + * wait_for_all. + * We may need to re-enable it when membership changes to ensure + * that auto_tie_breaker is consistent across all nodes */ + auto_tie_breaker = initial_auto_tie_breaker; + icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker); + } + + /* + * gather basic data here + */ + (void)icmap_get_uint32("quorum.expected_votes", &expected_votes); + have_nodelist = votequorum_read_nodelist_configuration(&node_votes, &node_count, &node_expected_votes); + have_qdevice = votequorum_qdevice_is_configured(&qdevice_votes); + (void)icmap_get_uint8("quorum.two_node", &two_node); + + /* + * do config verification and enablement + */ + + if ((!have_nodelist) && (!expected_votes)) { + if (!runtime) { + error = (char *)"configuration error: nodelist or quorum.expected_votes must be configured!"; + } else { + log_printf(LOGSYS_LEVEL_CRIT, "configuration error: nodelist or quorum.expected_votes must be configured!"); + log_printf(LOGSYS_LEVEL_CRIT, "will continue with current runtime data"); + } + goto out; + } + + /* + * two_node and qdevice are not compatible in the same config. + * try to make an educated guess of what to do + */ + + if ((two_node) && (have_qdevice)) { + if (!runtime) { + error = (char *)"configuration error: two_node and quorum device cannot be configured at the same time!"; + goto out; + } else { + log_printf(LOGSYS_LEVEL_CRIT, "configuration error: two_node and quorum device cannot be configured at the same time!"); + if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) { + log_printf(LOGSYS_LEVEL_CRIT, "quorum device is registered, disabling two_node"); + two_node = 0; + } else { + log_printf(LOGSYS_LEVEL_CRIT, "quorum device is not registered, allowing two_node"); + update_qdevice_can_operate(0); + } + } + } + + /* + * Enable special features + */ + if (!runtime) { + (void)icmap_get_uint8("quorum.allow_downscale", &allow_downscale); + if (icmap_get_uint8("quorum.wait_for_all", &wait_for_all) != CS_OK) { + wait_for_all_autoset = 1; + } + (void)icmap_get_uint8("quorum.last_man_standing", &last_man_standing); + (void)icmap_get_uint32("quorum.last_man_standing_window", &last_man_standing_window); + (void)icmap_get_uint8("quorum.expected_votes_tracking", &ev_tracking); + (void)icmap_get_uint8("quorum.auto_tie_breaker", &atb); + (void)icmap_get_string("quorum.auto_tie_breaker_node", &atb_string); + + /* auto_tie_breaker defaults to LOWEST */ + if (atb) { + auto_tie_breaker = ATB_LOWEST; + icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker); + } + else { + auto_tie_breaker = ATB_NONE; + if (atb_string) { + log_printf(LOGSYS_LEVEL_WARNING, + "auto_tie_breaker_node: is meaningless if auto_tie_breaker is set to 0"); + } + } + + if (atb && atb_string) { + parse_atb_string(atb_string); + } + free(atb_string); + initial_auto_tie_breaker = auto_tie_breaker; + + /* allow_downscale requires ev_tracking */ + if (allow_downscale) { + ev_tracking = 1; + } + + if (ev_tracking) { + if (load_ev_tracking_barrier() < 0) { + LEAVE(); + return ((char *)"Unable to load ev_tracking file!"); + } + update_ev_tracking_barrier(ev_tracking_barrier); + } + + } + + /* + * Changing of wait_for_all during runtime is not supported, but changing of two_node is + * and two_node may set wfa if not configured explicitly. It is safe to unset it + * (or set it back) when two_node changes. + */ + if (wait_for_all_autoset) { + wait_for_all = two_node; + } + + /* two_node and auto_tie_breaker are not compatible as two_node uses + * a fence race to decide quorum whereas ATB decides based on node id + */ + if (two_node && auto_tie_breaker != ATB_NONE) { + log_printf(LOGSYS_LEVEL_CRIT, "two_node and auto_tie_breaker are both specified but are not compatible."); + log_printf(LOGSYS_LEVEL_CRIT, "two_node has been disabled, please fix your corosync.conf"); + two_node = 0; + } + + /* If ATB is set and the cluster has an odd number of nodes then wait_for_all needs + * to be set so that an isolated half+1 without the tie breaker node + * does not have quorum on reboot. + */ + if ((auto_tie_breaker != ATB_NONE) && (node_expected_votes % 2) && + (!wait_for_all)) { + if (last_man_standing) { + /* if LMS is set too, it's a fatal configuration error. We can't dictate to the user what + * they might want so we'll just quit. + */ + log_printf(LOGSYS_LEVEL_CRIT, "auto_tie_breaker is set, the cluster has an odd number of nodes\n"); + log_printf(LOGSYS_LEVEL_CRIT, "and last_man_standing is also set. With this situation a better\n"); + log_printf(LOGSYS_LEVEL_CRIT, "solution would be to disable LMS, leave ATB enabled, and also\n"); + log_printf(LOGSYS_LEVEL_CRIT, "enable wait_for_all (mandatory for ATB in odd-numbered clusters).\n"); + log_printf(LOGSYS_LEVEL_CRIT, "Due to this ambiguity, corosync will fail to start. Please fix your corosync.conf\n"); + error = (char *)"configuration error: auto_tie_breaker & last_man_standing not available in odd sized cluster"; + goto out; + } + else { + log_printf(LOGSYS_LEVEL_CRIT, "auto_tie_breaker is set and the cluster has an odd number of nodes.\n"); + log_printf(LOGSYS_LEVEL_CRIT, "wait_for_all needs to be set for this configuration but it is missing\n"); + log_printf(LOGSYS_LEVEL_CRIT, "Therefore auto_tie_breaker has been disabled. Please fix your corosync.conf\n"); + auto_tie_breaker = ATB_NONE; + icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker); + } + } + + /* + * quorum device is not compatible with last_man_standing and auto_tie_breaker + * neither lms or atb can be set at runtime, so there is no need to check for + * runtime incompatibilities, but qdevice can be configured _after_ LMS and ATB have + * been enabled at startup. + */ + + if ((have_qdevice) && (last_man_standing)) { + if (!runtime) { + error = (char *)"configuration error: quorum.device is not compatible with last_man_standing"; + goto out; + } else { + log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with last_man_standing"); + log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations"); + update_qdevice_can_operate(0); + } + } + + if ((have_qdevice) && (auto_tie_breaker != ATB_NONE)) { + if (!runtime) { + error = (char *)"configuration error: quorum.device is not compatible with auto_tie_breaker"; + goto out; + } else { + log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with auto_tie_breaker"); + log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations"); + update_qdevice_can_operate(0); + } + } + + if ((have_qdevice) && (allow_downscale)) { + if (!runtime) { + error = (char *)"configuration error: quorum.device is not compatible with allow_downscale"; + goto out; + } else { + log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with allow_downscale"); + log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations"); + update_qdevice_can_operate(0); + } + } + + /* + * if user specifies quorum.expected_votes + quorum.device but NOT the device.votes + * we don't know what the quorum device should vote. + */ + + if ((expected_votes) && (have_qdevice) && (qdevice_votes == -1)) { + if (!runtime) { + error = (char *)"configuration error: quorum.device.votes must be specified when quorum.expected_votes is set"; + goto out; + } else { + log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device.votes must be specified when quorum.expected_votes is set"); + log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations"); + update_qdevice_can_operate(0); + } + } + + /* + * if user specifies a node list with uneven votes and no device.votes + * we cannot autocalculate the votes + */ + + if ((have_qdevice) && + (qdevice_votes == -1) && + (have_nodelist) && + (node_count != node_expected_votes)) { + if (!runtime) { + error = (char *)"configuration error: quorum.device.votes must be specified when not all nodes votes 1"; + goto out; + } else { + log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device.votes must be specified when not all nodes votes 1"); + log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations"); + update_qdevice_can_operate(0); + } + } + + /* + * validate quorum device votes vs expected_votes + */ + + if ((qdevice_votes > 0) && (expected_votes)) { + int delta = expected_votes - qdevice_votes; + if (delta < 2) { + if (!runtime) { + error = (char *)"configuration error: quorum.device.votes is too high or expected_votes is too low"; + goto out; + } else { + log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device.votes is too high or expected_votes is too low"); + log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations"); + update_qdevice_can_operate(0); + } + } + } + + /* + * automatically calculate device votes and adjust expected_votes from nodelist + */ + + if ((have_qdevice) && + (qdevice_votes == -1) && + (!expected_votes) && + (have_nodelist) && + (node_count == node_expected_votes)) { + qdevice_votes = node_expected_votes - 1; + node_expected_votes = node_expected_votes + qdevice_votes; + } + + /* + * set this node votes and expected_votes + */ + log_printf(LOGSYS_LEVEL_DEBUG, "ev_tracking=%d, ev_tracking_barrier = %d: expected_votes = %d\n", ev_tracking, ev_tracking_barrier, expected_votes); + + if (ev_tracking) { + expected_votes = ev_tracking_barrier; + } + + if (have_nodelist) { + us->votes = node_votes; + us->expected_votes = node_expected_votes; + } else { + us->votes = 1; + (void)icmap_get_uint32("quorum.votes", &us->votes); + } + + if (expected_votes) { + us->expected_votes = expected_votes; + } + + /* + * set qdevice votes + */ + + if (!have_qdevice) { + qdevice->votes = 0; + } + + if (qdevice_votes != -1) { + qdevice->votes = qdevice_votes; + } + + update_ev_barrier(us->expected_votes); + update_two_node(); + if (wait_for_all) { + if (!runtime) { + update_wait_for_all_status(1); + } + } else if (wait_for_all_autoset && wait_for_all_status) { + /* + * Reset wait for all status for consistency when wfa is auto-unset by 2node. + * wait_for_all_status would be ignored by are_we_quorate anyway. + */ + update_wait_for_all_status(0); + } + +out: + LEAVE(); + return error; +} + +static void votequorum_refresh_config( + int32_t event, + const char *key_name, + struct icmap_notify_value new_val, + struct icmap_notify_value old_val, + void *user_data) +{ + int old_votes, old_expected_votes; + uint8_t reloading; + uint8_t cancel_wfa; + int32_t reload_status; + + ENTER(); + + /* + * If a full reload is in progress then don't do anything until it's done and + * can reconfigure it all atomically + */ + if (icmap_get_uint8("config.totemconfig_reload_in_progress", &reloading) == CS_OK && reloading) { + return; + } + + /* If a full reload failed, then don't reconfigure */ + if ( (strcmp(key_name, "config.totemconfig_reload_in_progress") == 0) && + (icmap_get_int32("config.reload_status", &reload_status) == CS_OK) && + (reload_status != CS_OK) ) { + return; + } + + (void)icmap_get_uint8("quorum.cancel_wait_for_all", &cancel_wfa); + if (strcmp(key_name, "quorum.cancel_wait_for_all") == 0 && + cancel_wfa >= 1) { + icmap_set_uint8("quorum.cancel_wait_for_all", 0); + if (votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_CANCEL_WFA, + us->node_id, 0)) { + log_printf(LOGSYS_LEVEL_ERROR, "Failed to send Cancel WFA message to other nodes"); + } + return; + } + + old_votes = us->votes; + old_expected_votes = us->expected_votes; + + /* + * Reload the configuration + */ + votequorum_readconfig(VOTEQUORUM_READCONFIG_RUNTIME); + + /* + * activate new config + */ + votequorum_exec_send_nodeinfo(us->node_id); + votequorum_exec_send_nodeinfo(VOTEQUORUM_QDEVICE_NODEID); + if (us->votes != old_votes) { + if (votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES, + us->node_id, us->votes)) { + log_printf(LOGSYS_LEVEL_ERROR, "Failed to send new votes message to other nodes"); + } + } + if (us->expected_votes != old_expected_votes) { + if (votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES, + us->node_id, us->expected_votes)) { + log_printf(LOGSYS_LEVEL_ERROR, "Failed to send expected votes message to other nodes"); + } + } + + LEAVE(); +} + +static void votequorum_exec_add_config_notification(void) +{ + icmap_track_t icmap_track_nodelist = NULL; + icmap_track_t icmap_track_quorum = NULL; + icmap_track_t icmap_track_reload = NULL; + + ENTER(); + + icmap_track_add("nodelist.", + ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX, + votequorum_refresh_config, + NULL, + &icmap_track_nodelist); + + icmap_track_add("quorum.", + ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX, + votequorum_refresh_config, + NULL, + &icmap_track_quorum); + + icmap_track_add("config.totemconfig_reload_in_progress", + ICMAP_TRACK_ADD | ICMAP_TRACK_MODIFY, + votequorum_refresh_config, + NULL, + &icmap_track_reload); + + LEAVE(); +} + +/* + * votequorum_exec core + */ + +static int votequorum_exec_send_reconfigure(uint8_t param, unsigned int nodeid, uint32_t value) +{ + struct req_exec_quorum_reconfigure req_exec_quorum_reconfigure; + struct iovec iov[1]; + int ret; + + ENTER(); + + req_exec_quorum_reconfigure.nodeid = nodeid; + req_exec_quorum_reconfigure.value = value; + req_exec_quorum_reconfigure.param = param; + req_exec_quorum_reconfigure._pad0 = 0; + req_exec_quorum_reconfigure._pad1 = 0; + req_exec_quorum_reconfigure._pad2 = 0; + + req_exec_quorum_reconfigure.header.id = SERVICE_ID_MAKE(VOTEQUORUM_SERVICE, MESSAGE_REQ_EXEC_VOTEQUORUM_RECONFIGURE); + req_exec_quorum_reconfigure.header.size = sizeof(req_exec_quorum_reconfigure); + + iov[0].iov_base = (void *)&req_exec_quorum_reconfigure; + iov[0].iov_len = sizeof(req_exec_quorum_reconfigure); + + ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED); + + LEAVE(); + return ret; +} + +static int votequorum_exec_send_nodeinfo(uint32_t nodeid) +{ + struct req_exec_quorum_nodeinfo req_exec_quorum_nodeinfo; + struct iovec iov[1]; + struct cluster_node *node; + int ret; + + ENTER(); + + node = find_node_by_nodeid(nodeid); + if (!node) { + return -1; + } + + memset(&req_exec_quorum_nodeinfo, 0, sizeof(req_exec_quorum_nodeinfo)); + req_exec_quorum_nodeinfo.nodeid = nodeid; + req_exec_quorum_nodeinfo.votes = node->votes; + req_exec_quorum_nodeinfo.expected_votes = node->expected_votes; + req_exec_quorum_nodeinfo.flags = node->flags; + if (nodeid != VOTEQUORUM_QDEVICE_NODEID) { + decode_flags(node->flags); + } + + req_exec_quorum_nodeinfo.header.id = SERVICE_ID_MAKE(VOTEQUORUM_SERVICE, MESSAGE_REQ_EXEC_VOTEQUORUM_NODEINFO); + req_exec_quorum_nodeinfo.header.size = sizeof(req_exec_quorum_nodeinfo); + + iov[0].iov_base = (void *)&req_exec_quorum_nodeinfo; + iov[0].iov_len = sizeof(req_exec_quorum_nodeinfo); + + ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED); + + LEAVE(); + return ret; +} + +static int votequorum_exec_send_qdevice_reconfigure(const char *oldname, const char *newname) +{ + struct req_exec_quorum_qdevice_reconfigure req_exec_quorum_qdevice_reconfigure; + struct iovec iov[1]; + int ret; + + ENTER(); + + req_exec_quorum_qdevice_reconfigure.header.id = SERVICE_ID_MAKE(VOTEQUORUM_SERVICE, MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_RECONFIGURE); + req_exec_quorum_qdevice_reconfigure.header.size = sizeof(req_exec_quorum_qdevice_reconfigure); + + assert(strlen(oldname) < sizeof(req_exec_quorum_qdevice_reconfigure.oldname)); + strcpy(req_exec_quorum_qdevice_reconfigure.oldname, oldname); + + assert(strlen(newname) < sizeof(req_exec_quorum_qdevice_reconfigure.newname)); + strcpy(req_exec_quorum_qdevice_reconfigure.newname, newname); + + iov[0].iov_base = (void *)&req_exec_quorum_qdevice_reconfigure; + iov[0].iov_len = sizeof(req_exec_quorum_qdevice_reconfigure); + + ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED); + + LEAVE(); + return ret; +} + +static int votequorum_exec_send_qdevice_reg(uint32_t operation, const char *qdevice_name_req) +{ + struct req_exec_quorum_qdevice_reg req_exec_quorum_qdevice_reg; + struct iovec iov[1]; + int ret; + + ENTER(); + + req_exec_quorum_qdevice_reg.header.id = SERVICE_ID_MAKE(VOTEQUORUM_SERVICE, MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_REG); + req_exec_quorum_qdevice_reg.header.size = sizeof(req_exec_quorum_qdevice_reg); + req_exec_quorum_qdevice_reg.operation = operation; + + assert(strlen(qdevice_name_req) < sizeof(req_exec_quorum_qdevice_reg.qdevice_name)); + strcpy(req_exec_quorum_qdevice_reg.qdevice_name, qdevice_name_req); + + iov[0].iov_base = (void *)&req_exec_quorum_qdevice_reg; + iov[0].iov_len = sizeof(req_exec_quorum_qdevice_reg); + + ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED); + + LEAVE(); + return ret; +} + +static int votequorum_exec_send_quorum_notification(void *conn, uint64_t context) +{ + struct res_lib_votequorum_quorum_notification *res_lib_votequorum_notification; + struct qb_list_head *tmp; + struct cluster_node *node; + int i = 0; + int cluster_members = 0; + int size; + char buf[sizeof(struct res_lib_votequorum_quorum_notification) + sizeof(struct votequorum_node) * (PROCESSOR_COUNT_MAX + 2)]; + + ENTER(); + + log_printf(LOGSYS_LEVEL_DEBUG, "Sending quorum callback, quorate = %d", cluster_is_quorate); + + qb_list_for_each(tmp, &cluster_members_list) { + node = qb_list_entry(tmp, struct cluster_node, list); + cluster_members++; + } + if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) { + cluster_members++; + } + + size = sizeof(struct res_lib_votequorum_quorum_notification) + sizeof(struct votequorum_node) * cluster_members; + + res_lib_votequorum_notification = (struct res_lib_votequorum_quorum_notification *)&buf; + res_lib_votequorum_notification->quorate = cluster_is_quorate; + res_lib_votequorum_notification->context = context; + res_lib_votequorum_notification->node_list_entries = cluster_members; + res_lib_votequorum_notification->header.id = MESSAGE_RES_VOTEQUORUM_QUORUM_NOTIFICATION; + res_lib_votequorum_notification->header.size = size; + res_lib_votequorum_notification->header.error = CS_OK; + + /* Send all known nodes and their states */ + qb_list_for_each(tmp, &cluster_members_list) { + node = qb_list_entry(tmp, struct cluster_node, list); + res_lib_votequorum_notification->node_list[i].nodeid = node->node_id; + res_lib_votequorum_notification->node_list[i++].state = node->state; + } + if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) { + res_lib_votequorum_notification->node_list[i].nodeid = VOTEQUORUM_QDEVICE_NODEID; + res_lib_votequorum_notification->node_list[i++].state = qdevice->state; + } + + /* Send it to all interested parties */ + if (conn) { + int ret = corosync_api->ipc_dispatch_send(conn, &buf, size); + LEAVE(); + return ret; + } else { + struct quorum_pd *qpd; + + qb_list_for_each(tmp, &trackers_list) { + qpd = qb_list_entry(tmp, struct quorum_pd, list); + res_lib_votequorum_notification->context = qpd->tracking_context; + corosync_api->ipc_dispatch_send(qpd->conn, &buf, size); + } + } + + LEAVE(); + + return 0; +} + +static int votequorum_exec_send_nodelist_notification(void *conn, uint64_t context) +{ + struct res_lib_votequorum_nodelist_notification *res_lib_votequorum_notification; + int i = 0; + int size; + struct qb_list_head *tmp; + char buf[sizeof(struct res_lib_votequorum_nodelist_notification) + sizeof(uint32_t) * quorum_members_entries]; + + ENTER(); + + log_printf(LOGSYS_LEVEL_DEBUG, "Sending nodelist callback. ring_id = " CS_PRI_RING_ID, quorum_ringid.nodeid, quorum_ringid.seq); + + size = sizeof(struct res_lib_votequorum_nodelist_notification) + sizeof(uint32_t) * quorum_members_entries; + + res_lib_votequorum_notification = (struct res_lib_votequorum_nodelist_notification *)&buf; + res_lib_votequorum_notification->node_list_entries = quorum_members_entries; + res_lib_votequorum_notification->ring_id.nodeid = quorum_ringid.nodeid; + res_lib_votequorum_notification->ring_id.seq = quorum_ringid.seq; + res_lib_votequorum_notification->context = context; + + for (i=0; i<quorum_members_entries; i++) { + res_lib_votequorum_notification->node_list[i] = quorum_members[i]; + } + + res_lib_votequorum_notification->header.id = MESSAGE_RES_VOTEQUORUM_NODELIST_NOTIFICATION; + res_lib_votequorum_notification->header.size = size; + res_lib_votequorum_notification->header.error = CS_OK; + + /* Send it to all interested parties */ + if (conn) { + int ret = corosync_api->ipc_dispatch_send(conn, &buf, size); + LEAVE(); + return ret; + } else { + struct quorum_pd *qpd; + + qb_list_for_each(tmp, &trackers_list) { + qpd = qb_list_entry(tmp, struct quorum_pd, list); + res_lib_votequorum_notification->context = qpd->tracking_context; + corosync_api->ipc_dispatch_send(qpd->conn, &buf, size); + } + } + + LEAVE(); + + return 0; +} + +static void votequorum_exec_send_expectedvotes_notification(void) +{ + struct res_lib_votequorum_expectedvotes_notification res_lib_votequorum_expectedvotes_notification; + struct quorum_pd *qpd; + struct qb_list_head *tmp; + + ENTER(); + + log_printf(LOGSYS_LEVEL_DEBUG, "Sending expected votes callback"); + + res_lib_votequorum_expectedvotes_notification.header.id = MESSAGE_RES_VOTEQUORUM_EXPECTEDVOTES_NOTIFICATION; + res_lib_votequorum_expectedvotes_notification.header.size = sizeof(res_lib_votequorum_expectedvotes_notification); + res_lib_votequorum_expectedvotes_notification.header.error = CS_OK; + res_lib_votequorum_expectedvotes_notification.expected_votes = us->expected_votes; + + qb_list_for_each(tmp, &trackers_list) { + qpd = qb_list_entry(tmp, struct quorum_pd, list); + res_lib_votequorum_expectedvotes_notification.context = qpd->tracking_context; + corosync_api->ipc_dispatch_send(qpd->conn, &res_lib_votequorum_expectedvotes_notification, + sizeof(struct res_lib_votequorum_expectedvotes_notification)); + } + + LEAVE(); +} + +static void exec_votequorum_qdevice_reconfigure_endian_convert (void *message) +{ + ENTER(); + + LEAVE(); +} + +static void message_handler_req_exec_votequorum_qdevice_reconfigure ( + const void *message, + unsigned int nodeid) +{ + const struct req_exec_quorum_qdevice_reconfigure *req_exec_quorum_qdevice_reconfigure = message; + + ENTER(); + + log_printf(LOGSYS_LEVEL_DEBUG, "Received qdevice name change req from node " CS_PRI_NODE_ID " [from: %s to: %s]", + nodeid, + req_exec_quorum_qdevice_reconfigure->oldname, + req_exec_quorum_qdevice_reconfigure->newname); + + if (!strcmp(req_exec_quorum_qdevice_reconfigure->oldname, qdevice_name)) { + log_printf(LOGSYS_LEVEL_DEBUG, "Allowing qdevice rename"); + memset(qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN); + strcpy(qdevice_name, req_exec_quorum_qdevice_reconfigure->newname); + /* + * TODO: notify qdevices about name change? + * this is not relevant for now and can wait later on since + * qdevices are local only and libvotequorum is not final + */ + } + + LEAVE(); +} + +static void exec_votequorum_qdevice_reg_endian_convert (void *message) +{ + struct req_exec_quorum_qdevice_reg *req_exec_quorum_qdevice_reg = message; + + ENTER(); + + req_exec_quorum_qdevice_reg->operation = swab32(req_exec_quorum_qdevice_reg->operation); + + LEAVE(); +} + +static void message_handler_req_exec_votequorum_qdevice_reg ( + const void *message, + unsigned int nodeid) +{ + const struct req_exec_quorum_qdevice_reg *req_exec_quorum_qdevice_reg = message; + struct res_lib_votequorum_status res_lib_votequorum_status; + int wipe_qdevice_name = 1; + struct cluster_node *node = NULL; + struct qb_list_head *tmp; + cs_error_t error = CS_OK; + + ENTER(); + + log_printf(LOGSYS_LEVEL_DEBUG, "Received qdevice op %u req from node " CS_PRI_NODE_ID " [%s]", + req_exec_quorum_qdevice_reg->operation, + nodeid, req_exec_quorum_qdevice_reg->qdevice_name); + + switch(req_exec_quorum_qdevice_reg->operation) + { + case VOTEQUORUM_QDEVICE_OPERATION_REGISTER: + if (nodeid != us->node_id) { + if (!strlen(qdevice_name)) { + log_printf(LOGSYS_LEVEL_DEBUG, "Remote qdevice name recorded"); + strcpy(qdevice_name, req_exec_quorum_qdevice_reg->qdevice_name); + } + LEAVE(); + return; + } + + /* + * protect against the case where we broadcast qdevice registration + * to new memebers, we receive the message back, but there is no registration + * connection in progress + */ + if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) { + LEAVE(); + return; + } + + /* + * this should NEVER happen + */ + if (!qdevice_reg_conn) { + log_printf(LOGSYS_LEVEL_WARNING, "Unable to determine origin of the qdevice register call!"); + LEAVE(); + return; + } + + /* + * registering our own device in this case + */ + if (!strlen(qdevice_name)) { + strcpy(qdevice_name, req_exec_quorum_qdevice_reg->qdevice_name); + } + + /* + * check if it is our device or something else + */ + if ((!strncmp(req_exec_quorum_qdevice_reg->qdevice_name, + qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN))) { + us->flags |= NODE_FLAGS_QDEVICE_REGISTERED; + votequorum_exec_send_nodeinfo(VOTEQUORUM_QDEVICE_NODEID); + votequorum_exec_send_nodeinfo(us->node_id); + } else { + log_printf(LOGSYS_LEVEL_WARNING, + "A new qdevice with different name (new: %s old: %s) is trying to register!", + req_exec_quorum_qdevice_reg->qdevice_name, qdevice_name); + error = CS_ERR_EXIST; + } + + res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); + res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; + res_lib_votequorum_status.header.error = error; + corosync_api->ipc_response_send(qdevice_reg_conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); + qdevice_reg_conn = NULL; + break; + case VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER: + qb_list_for_each(tmp, &cluster_members_list) { + node = qb_list_entry(tmp, struct cluster_node, list); + if ((node->state == NODESTATE_MEMBER) && + (node->flags & NODE_FLAGS_QDEVICE_REGISTERED)) { + wipe_qdevice_name = 0; + } + } + + if (wipe_qdevice_name) { + memset(qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN); + } + + break; + } + LEAVE(); +} + +static void exec_votequorum_nodeinfo_endian_convert (void *message) +{ + struct req_exec_quorum_nodeinfo *nodeinfo = message; + + ENTER(); + + nodeinfo->nodeid = swab32(nodeinfo->nodeid); + nodeinfo->votes = swab32(nodeinfo->votes); + nodeinfo->expected_votes = swab32(nodeinfo->expected_votes); + nodeinfo->flags = swab32(nodeinfo->flags); + + LEAVE(); +} + +static void message_handler_req_exec_votequorum_nodeinfo ( + const void *message, + unsigned int sender_nodeid) +{ + const struct req_exec_quorum_nodeinfo *req_exec_quorum_nodeinfo = message; + struct cluster_node *node = NULL; + int old_votes; + int old_expected; + uint32_t old_flags; + nodestate_t old_state; + int new_node = 0; + int allow_downgrade = 0; + int by_node = 0; + unsigned int nodeid = req_exec_quorum_nodeinfo->nodeid; + + ENTER(); + + log_printf(LOGSYS_LEVEL_DEBUG, "got nodeinfo message from cluster node " CS_PRI_NODE_ID, sender_nodeid); + log_printf(LOGSYS_LEVEL_DEBUG, "nodeinfo message[" CS_PRI_NODE_ID "]: votes: %d, expected: %d flags: %d", + nodeid, + req_exec_quorum_nodeinfo->votes, + req_exec_quorum_nodeinfo->expected_votes, + req_exec_quorum_nodeinfo->flags); + + if (nodeid != VOTEQUORUM_QDEVICE_NODEID) { + decode_flags(req_exec_quorum_nodeinfo->flags); + } + + node = find_node_by_nodeid(nodeid); + if (!node) { + node = allocate_node(nodeid); + new_node = 1; + } + if (!node) { + corosync_api->error_memory_failure(); + LEAVE(); + return; + } + + if (new_node) { + old_votes = 0; + old_expected = 0; + old_state = NODESTATE_DEAD; + old_flags = 0; + } else { + old_votes = node->votes; + old_expected = node->expected_votes; + old_state = node->state; + old_flags = node->flags; + } + + if (nodeid == VOTEQUORUM_QDEVICE_NODEID) { + struct cluster_node *sender_node = find_node_by_nodeid(sender_nodeid); + + assert(sender_node != NULL); + + if ((!cluster_is_quorate) && + (sender_node->flags & NODE_FLAGS_QUORATE)) { + node->votes = req_exec_quorum_nodeinfo->votes; + } else { + node->votes = max(node->votes, req_exec_quorum_nodeinfo->votes); + } + goto recalculate; + } + + /* Update node state */ + node->flags = req_exec_quorum_nodeinfo->flags; + node->votes = req_exec_quorum_nodeinfo->votes; + node->state = NODESTATE_MEMBER; + + if (node->flags & NODE_FLAGS_LEAVING) { + node->state = NODESTATE_LEAVING; + allow_downgrade = 1; + by_node = 1; + } + + if ((!cluster_is_quorate) && + (node->flags & NODE_FLAGS_QUORATE)) { + allow_downgrade = 1; + us->expected_votes = req_exec_quorum_nodeinfo->expected_votes; + } + + if (node->flags & NODE_FLAGS_QUORATE || (ev_tracking)) { + node->expected_votes = req_exec_quorum_nodeinfo->expected_votes; + } else { + node->expected_votes = us->expected_votes; + } + + if ((last_man_standing) && (node->votes > 1)) { + log_printf(LOGSYS_LEVEL_WARNING, "Last Man Standing feature is supported only when all" + "cluster nodes votes are set to 1. Disabling LMS."); + last_man_standing = 0; + if (last_man_standing_timer_set) { + corosync_api->timer_delete(last_man_standing_timer); + last_man_standing_timer_set = 0; + } + } + +recalculate: + if ((new_node) || + (nodeid == us->node_id) || + (node->flags & NODE_FLAGS_FIRST) || + (old_votes != node->votes) || + (old_expected != node->expected_votes) || + (old_flags != node->flags) || + (old_state != node->state)) { + recalculate_quorum(allow_downgrade, by_node); + } + + if ((wait_for_all) && + (!(node->flags & NODE_FLAGS_WFASTATUS)) && + (node->flags & NODE_FLAGS_QUORATE)) { + update_wait_for_all_status(0); + } + + LEAVE(); +} + +static void exec_votequorum_reconfigure_endian_convert (void *message) +{ + struct req_exec_quorum_reconfigure *reconfigure = message; + + ENTER(); + + reconfigure->nodeid = swab32(reconfigure->nodeid); + reconfigure->value = swab32(reconfigure->value); + + LEAVE(); +} + +static void message_handler_req_exec_votequorum_reconfigure ( + const void *message, + unsigned int nodeid) +{ + const struct req_exec_quorum_reconfigure *req_exec_quorum_reconfigure = message; + struct cluster_node *node; + + ENTER(); + + log_printf(LOGSYS_LEVEL_DEBUG, "got reconfigure message from cluster node " CS_PRI_NODE_ID " for " CS_PRI_NODE_ID, + nodeid, req_exec_quorum_reconfigure->nodeid); + + switch(req_exec_quorum_reconfigure->param) + { + case VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES: + update_node_expected_votes(req_exec_quorum_reconfigure->value); + votequorum_exec_send_expectedvotes_notification(); + update_ev_barrier(req_exec_quorum_reconfigure->value); + if (ev_tracking) { + us->expected_votes = max(us->expected_votes, ev_tracking_barrier); + } + recalculate_quorum(1, 0); /* Allow decrease */ + break; + + case VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES: + node = find_node_by_nodeid(req_exec_quorum_reconfigure->nodeid); + if (!node) { + LEAVE(); + return; + } + node->votes = req_exec_quorum_reconfigure->value; + recalculate_quorum(1, 0); /* Allow decrease */ + break; + + case VOTEQUORUM_RECONFIG_PARAM_CANCEL_WFA: + update_wait_for_all_status(0); + log_printf(LOGSYS_LEVEL_INFO, "wait_for_all_status reset by user on node " CS_PRI_NODE_ID ".", + req_exec_quorum_reconfigure->nodeid); + recalculate_quorum(0, 0); + + break; + + } + + LEAVE(); +} + +static int votequorum_exec_exit_fn (void) +{ + int ret = 0; + + ENTER(); + + /* + * tell the other nodes we are leaving + */ + + if (allow_downscale) { + us->flags |= NODE_FLAGS_LEAVING; + ret = votequorum_exec_send_nodeinfo(us->node_id); + } + + if ((ev_tracking) && (ev_tracking_fd != -1)) { + close(ev_tracking_fd); + } + + + LEAVE(); + return ret; +} + +static void votequorum_set_icmap_ro_keys(void) +{ + icmap_set_ro_access("quorum.allow_downscale", CS_FALSE, CS_TRUE); + icmap_set_ro_access("quorum.wait_for_all", CS_FALSE, CS_TRUE); + icmap_set_ro_access("quorum.last_man_standing", CS_FALSE, CS_TRUE); + icmap_set_ro_access("quorum.last_man_standing_window", CS_FALSE, CS_TRUE); + icmap_set_ro_access("quorum.expected_votes_tracking", CS_FALSE, CS_TRUE); + icmap_set_ro_access("quorum.auto_tie_breaker", CS_FALSE, CS_TRUE); + icmap_set_ro_access("quorum.auto_tie_breaker_node", CS_FALSE, CS_TRUE); +} + +static char *votequorum_exec_init_fn (struct corosync_api_v1 *api) +{ + char *error = NULL; + + ENTER(); + + /* + * make sure we start clean + */ + qb_list_init(&cluster_members_list); + qb_list_init(&trackers_list); + qdevice = NULL; + us = NULL; + memset(cluster_nodes, 0, sizeof(cluster_nodes)); + + /* + * Allocate a cluster_node for qdevice + */ + qdevice = allocate_node(VOTEQUORUM_QDEVICE_NODEID); + if (!qdevice) { + LEAVE(); + return ((char *)"Could not allocate node."); + } + qdevice->votes = 0; + memset(qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN); + + /* + * Allocate a cluster_node for us + */ + us = allocate_node(corosync_api->totem_nodeid_get()); + if (!us) { + LEAVE(); + return ((char *)"Could not allocate node."); + } + + icmap_set_uint32("runtime.votequorum.this_node_id", us->node_id); + + us->state = NODESTATE_MEMBER; + us->votes = 1; + us->flags |= NODE_FLAGS_FIRST; + + error = votequorum_readconfig(VOTEQUORUM_READCONFIG_STARTUP); + if (error) { + return error; + } + recalculate_quorum(0, 0); + + /* + * Set RO keys in icmap + */ + votequorum_set_icmap_ro_keys(); + + /* + * Listen for changes + */ + votequorum_exec_add_config_notification(); + + /* + * Start us off with one node + */ + votequorum_exec_send_nodeinfo(us->node_id); + + LEAVE(); + + return (NULL); +} + +/* + * votequorum service core + */ + +static void votequorum_last_man_standing_timer_fn(void *arg) +{ + ENTER(); + + last_man_standing_timer_set = 0; + if (cluster_is_quorate) { + recalculate_quorum(1,1); + } + + LEAVE(); +} + +static void votequorum_sync_init ( + const unsigned int *trans_list, size_t trans_list_entries, + const unsigned int *member_list, size_t member_list_entries, + const struct memb_ring_id *ring_id) +{ + int i, j; + int found; + int left_nodes; + struct cluster_node *node; + + ENTER(); + + sync_in_progress = 1; + sync_nodeinfo_sent = 0; + sync_wait_for_poll_or_timeout = 0; + + if (member_list_entries > 1) { + us->flags &= ~NODE_FLAGS_FIRST; + } + + /* + * we don't need to track which nodes have left directly, + * since that info is in the node db, but we need to know + * if somebody has left for last_man_standing + */ + left_nodes = 0; + for (i = 0; i < quorum_members_entries; i++) { + found = 0; + for (j = 0; j < member_list_entries; j++) { + if (quorum_members[i] == member_list[j]) { + found = 1; + break; + } + } + if (found == 0) { + left_nodes = 1; + node = find_node_by_nodeid(quorum_members[i]); + if (node) { + node->state = NODESTATE_DEAD; + } + } + } + + if (last_man_standing) { + if (((member_list_entries >= quorum) && (left_nodes)) || + ((member_list_entries <= quorum) && (auto_tie_breaker != ATB_NONE) && (check_low_node_id_partition() == 1))) { + if (last_man_standing_timer_set) { + corosync_api->timer_delete(last_man_standing_timer); + last_man_standing_timer_set = 0; + } + corosync_api->timer_add_duration((unsigned long long)last_man_standing_window*1000000, + NULL, votequorum_last_man_standing_timer_fn, + &last_man_standing_timer); + last_man_standing_timer_set = 1; + } + } + + memcpy(previous_quorum_members, quorum_members, sizeof(unsigned int) * quorum_members_entries); + previous_quorum_members_entries = quorum_members_entries; + + memcpy(quorum_members, member_list, sizeof(unsigned int) * member_list_entries); + quorum_members_entries = member_list_entries; + memcpy(&quorum_ringid, ring_id, sizeof(*ring_id)); + + if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED && us->flags & NODE_FLAGS_QDEVICE_ALIVE) { + /* + * Reset poll timer. Sync waiting is interrupted on valid qdevice poll or after timeout + */ + if (qdevice_timer_set) { + corosync_api->timer_delete(qdevice_timer); + } + corosync_api->timer_add_duration((unsigned long long)qdevice_sync_timeout*1000000, qdevice, + qdevice_timer_fn, &qdevice_timer); + qdevice_timer_set = 1; + sync_wait_for_poll_or_timeout = 1; + + log_printf(LOGSYS_LEVEL_INFO, "waiting for quorum device %s poll (but maximum for %u ms)", + qdevice_name, qdevice_sync_timeout); + } + + LEAVE(); +} + +static int votequorum_sync_process (void) +{ + if (!sync_nodeinfo_sent) { + votequorum_exec_send_nodeinfo(us->node_id); + votequorum_exec_send_nodeinfo(VOTEQUORUM_QDEVICE_NODEID); + if (strlen(qdevice_name)) { + votequorum_exec_send_qdevice_reg(VOTEQUORUM_QDEVICE_OPERATION_REGISTER, + qdevice_name); + } + votequorum_exec_send_nodelist_notification(NULL, 0LL); + sync_nodeinfo_sent = 1; + } + + if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED && sync_wait_for_poll_or_timeout) { + /* + * Waiting for qdevice to poll with new ringid or timeout + */ + + return (-1); + } + + return 0; +} + +static void votequorum_sync_activate (void) +{ + recalculate_quorum(0, 0); + quorum_callback(quorum_members, quorum_members_entries, + cluster_is_quorate, &quorum_ringid); + votequorum_exec_send_quorum_notification(NULL, 0L); + + sync_in_progress = 0; +} + +static void votequorum_sync_abort (void) +{ + +} + +char *votequorum_init(struct corosync_api_v1 *api, + quorum_set_quorate_fn_t q_set_quorate_fn) +{ + char *error; + + ENTER(); + + if (q_set_quorate_fn == NULL) { + return ((char *)"Quorate function not set"); + } + + corosync_api = api; + quorum_callback = q_set_quorate_fn; + + error = corosync_service_link_and_init(corosync_api, + &votequorum_service[0]); + if (error) { + return (error); + } + + LEAVE(); + + return (NULL); +} + +/* + * Library Handler init/fini + */ + +static int quorum_lib_init_fn (void *conn) +{ + struct quorum_pd *pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn); + + ENTER(); + + qb_list_init (&pd->list); + pd->conn = conn; + + LEAVE(); + return (0); +} + +static int quorum_lib_exit_fn (void *conn) +{ + struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn); + + ENTER(); + + if (quorum_pd->tracking_enabled) { + qb_list_del (&quorum_pd->list); + qb_list_init (&quorum_pd->list); + } + + LEAVE(); + + return (0); +} + +/* + * library internal functions + */ + +static void qdevice_timer_fn(void *arg) +{ + ENTER(); + + if ((!(us->flags & NODE_FLAGS_QDEVICE_ALIVE)) || + (!qdevice_timer_set)) { + LEAVE(); + return; + } + + us->flags &= ~NODE_FLAGS_QDEVICE_ALIVE; + us->flags &= ~NODE_FLAGS_QDEVICE_CAST_VOTE; + log_printf(LOGSYS_LEVEL_INFO, "lost contact with quorum device %s", qdevice_name); + votequorum_exec_send_nodeinfo(us->node_id); + + qdevice_timer_set = 0; + sync_wait_for_poll_or_timeout = 0; + + LEAVE(); +} + +/* + * Library Handler Functions + */ + +static void message_handler_req_lib_votequorum_getinfo (void *conn, const void *message) +{ + const struct req_lib_votequorum_getinfo *req_lib_votequorum_getinfo = message; + struct res_lib_votequorum_getinfo res_lib_votequorum_getinfo; + struct cluster_node *node; + unsigned int highest_expected = 0; + unsigned int total_votes = 0; + cs_error_t error = CS_OK; + uint32_t nodeid = req_lib_votequorum_getinfo->nodeid; + + ENTER(); + + log_printf(LOGSYS_LEVEL_DEBUG, "got getinfo request on %p for node " CS_PRI_NODE_ID, conn, req_lib_votequorum_getinfo->nodeid); + + if (nodeid == VOTEQUORUM_QDEVICE_NODEID) { + nodeid = us->node_id; + } + + node = find_node_by_nodeid(nodeid); + if (node) { + struct cluster_node *iternode; + struct qb_list_head *nodelist; + + qb_list_for_each(nodelist, &cluster_members_list) { + iternode = qb_list_entry(nodelist, struct cluster_node, list); + + if (iternode->state == NODESTATE_MEMBER) { + highest_expected = + max(highest_expected, iternode->expected_votes); + total_votes += iternode->votes; + } + } + + if (node->flags & NODE_FLAGS_QDEVICE_CAST_VOTE) { + total_votes += qdevice->votes; + } + + switch(node->state) { + case NODESTATE_MEMBER: + res_lib_votequorum_getinfo.state = VOTEQUORUM_NODESTATE_MEMBER; + break; + case NODESTATE_DEAD: + res_lib_votequorum_getinfo.state = VOTEQUORUM_NODESTATE_DEAD; + break; + case NODESTATE_LEAVING: + res_lib_votequorum_getinfo.state = VOTEQUORUM_NODESTATE_LEAVING; + break; + default: + res_lib_votequorum_getinfo.state = node->state; + break; + } + res_lib_votequorum_getinfo.state = node->state; + res_lib_votequorum_getinfo.votes = node->votes; + res_lib_votequorum_getinfo.expected_votes = node->expected_votes; + res_lib_votequorum_getinfo.highest_expected = highest_expected; + + res_lib_votequorum_getinfo.quorum = quorum; + res_lib_votequorum_getinfo.total_votes = total_votes; + res_lib_votequorum_getinfo.flags = 0; + res_lib_votequorum_getinfo.nodeid = node->node_id; + + if (two_node) { + res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_TWONODE; + } + if (cluster_is_quorate) { + res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QUORATE; + } + if (wait_for_all) { + res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_WAIT_FOR_ALL; + } + if (last_man_standing) { + res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_LAST_MAN_STANDING; + } + if (auto_tie_breaker != ATB_NONE) { + res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_AUTO_TIE_BREAKER; + } + if (allow_downscale) { + res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_ALLOW_DOWNSCALE; + } + + memset(res_lib_votequorum_getinfo.qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN); + strcpy(res_lib_votequorum_getinfo.qdevice_name, qdevice_name); + res_lib_votequorum_getinfo.qdevice_votes = qdevice->votes; + + if (node->flags & NODE_FLAGS_QDEVICE_REGISTERED) { + res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QDEVICE_REGISTERED; + } + if (node->flags & NODE_FLAGS_QDEVICE_ALIVE) { + res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QDEVICE_ALIVE; + } + if (node->flags & NODE_FLAGS_QDEVICE_CAST_VOTE) { + res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QDEVICE_CAST_VOTE; + } + if (node->flags & NODE_FLAGS_QDEVICE_MASTER_WINS) { + res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QDEVICE_MASTER_WINS; + } + } else { + error = CS_ERR_NOT_EXIST; + } + + res_lib_votequorum_getinfo.header.size = sizeof(res_lib_votequorum_getinfo); + res_lib_votequorum_getinfo.header.id = MESSAGE_RES_VOTEQUORUM_GETINFO; + res_lib_votequorum_getinfo.header.error = error; + corosync_api->ipc_response_send(conn, &res_lib_votequorum_getinfo, sizeof(res_lib_votequorum_getinfo)); + log_printf(LOGSYS_LEVEL_DEBUG, "getinfo response error: %d", error); + + LEAVE(); +} + +static void message_handler_req_lib_votequorum_setexpected (void *conn, const void *message) +{ + const struct req_lib_votequorum_setexpected *req_lib_votequorum_setexpected = message; + struct res_lib_votequorum_status res_lib_votequorum_status; + cs_error_t error = CS_OK; + unsigned int newquorum; + unsigned int total_votes; + uint8_t allow_downscale_status = 0; + + ENTER(); + + allow_downscale_status = allow_downscale; + allow_downscale = 0; + + /* + * Validate new expected votes + */ + newquorum = calculate_quorum(1, req_lib_votequorum_setexpected->expected_votes, &total_votes); + allow_downscale = allow_downscale_status; + /* + * Setting expected_votes < total_votes doesn't make sense. + * For quorate cluster prevent cluster to become unquorate. + */ + if (req_lib_votequorum_setexpected->expected_votes < total_votes || + (cluster_is_quorate && (newquorum > total_votes))) { + error = CS_ERR_INVALID_PARAM; + goto error_exit; + } + update_node_expected_votes(req_lib_votequorum_setexpected->expected_votes); + + if (votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES, us->node_id, + req_lib_votequorum_setexpected->expected_votes)) { + error = CS_ERR_NO_RESOURCES; + } + +error_exit: + res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); + res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; + res_lib_votequorum_status.header.error = error; + corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); + + LEAVE(); +} + +static void message_handler_req_lib_votequorum_setvotes (void *conn, const void *message) +{ + const struct req_lib_votequorum_setvotes *req_lib_votequorum_setvotes = message; + struct res_lib_votequorum_status res_lib_votequorum_status; + struct cluster_node *node; + unsigned int newquorum; + unsigned int total_votes; + unsigned int saved_votes; + cs_error_t error = CS_OK; + unsigned int nodeid; + + ENTER(); + + nodeid = req_lib_votequorum_setvotes->nodeid; + node = find_node_by_nodeid(nodeid); + if (!node) { + error = CS_ERR_NAME_NOT_FOUND; + goto error_exit; + } + + /* + * Check votes is valid + */ + saved_votes = node->votes; + node->votes = req_lib_votequorum_setvotes->votes; + + newquorum = calculate_quorum(1, 0, &total_votes); + + if (newquorum < total_votes / 2 || + newquorum > total_votes) { + node->votes = saved_votes; + error = CS_ERR_INVALID_PARAM; + goto error_exit; + } + + if (votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES, nodeid, + req_lib_votequorum_setvotes->votes)) { + error = CS_ERR_NO_RESOURCES; + } + +error_exit: + res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); + res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; + res_lib_votequorum_status.header.error = error; + corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); + + LEAVE(); +} + +static void message_handler_req_lib_votequorum_trackstart (void *conn, + const void *message) +{ + const struct req_lib_votequorum_trackstart *req_lib_votequorum_trackstart = message; + struct res_lib_votequorum_status res_lib_votequorum_status; + struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn); + cs_error_t error = CS_OK; + + ENTER(); + + /* + * If an immediate listing of the current cluster membership + * is requested, generate membership list + */ + if (req_lib_votequorum_trackstart->track_flags & CS_TRACK_CURRENT || + req_lib_votequorum_trackstart->track_flags & CS_TRACK_CHANGES) { + log_printf(LOGSYS_LEVEL_DEBUG, "sending initial status to %p", conn); + votequorum_exec_send_nodelist_notification(conn, req_lib_votequorum_trackstart->context); + votequorum_exec_send_quorum_notification(conn, req_lib_votequorum_trackstart->context); + } + + if (quorum_pd->tracking_enabled) { + error = CS_ERR_EXIST; + goto response_send; + } + + /* + * Record requests for tracking + */ + if (req_lib_votequorum_trackstart->track_flags & CS_TRACK_CHANGES || + req_lib_votequorum_trackstart->track_flags & CS_TRACK_CHANGES_ONLY) { + + quorum_pd->track_flags = req_lib_votequorum_trackstart->track_flags; + quorum_pd->tracking_enabled = 1; + quorum_pd->tracking_context = req_lib_votequorum_trackstart->context; + + qb_list_add (&quorum_pd->list, &trackers_list); + } + +response_send: + res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); + res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; + res_lib_votequorum_status.header.error = error; + corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); + + LEAVE(); +} + +static void message_handler_req_lib_votequorum_trackstop (void *conn, + const void *message) +{ + struct res_lib_votequorum_status res_lib_votequorum_status; + struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn); + int error = CS_OK; + + ENTER(); + + if (quorum_pd->tracking_enabled) { + error = CS_OK; + quorum_pd->tracking_enabled = 0; + qb_list_del (&quorum_pd->list); + qb_list_init (&quorum_pd->list); + } else { + error = CS_ERR_NOT_EXIST; + } + + res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); + res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; + res_lib_votequorum_status.header.error = error; + corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); + + LEAVE(); +} + +static void message_handler_req_lib_votequorum_qdevice_register (void *conn, + const void *message) +{ + const struct req_lib_votequorum_qdevice_register *req_lib_votequorum_qdevice_register = message; + struct res_lib_votequorum_status res_lib_votequorum_status; + cs_error_t error = CS_OK; + + ENTER(); + + if (!qdevice_can_operate) { + log_printf(LOGSYS_LEVEL_INFO, "Registration of quorum device is disabled by incorrect corosync.conf. See logs for more information"); + error = CS_ERR_ACCESS; + goto out; + } + + if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) { + if ((!strncmp(req_lib_votequorum_qdevice_register->name, + qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN))) { + goto out; + } else { + log_printf(LOGSYS_LEVEL_WARNING, + "A new qdevice with different name (new: %s old: %s) is trying to re-register!", + req_lib_votequorum_qdevice_register->name, qdevice_name); + error = CS_ERR_EXIST; + goto out; + } + } else { + if (qdevice_reg_conn != NULL) { + log_printf(LOGSYS_LEVEL_WARNING, + "Registration request already in progress"); + error = CS_ERR_TRY_AGAIN; + goto out; + } + qdevice_reg_conn = conn; + if (votequorum_exec_send_qdevice_reg(VOTEQUORUM_QDEVICE_OPERATION_REGISTER, + req_lib_votequorum_qdevice_register->name) != 0) { + log_printf(LOGSYS_LEVEL_WARNING, + "Unable to send qdevice registration request to cluster"); + error = CS_ERR_TRY_AGAIN; + qdevice_reg_conn = NULL; + } else { + LEAVE(); + return; + } + } + +out: + + res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); + res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; + res_lib_votequorum_status.header.error = error; + corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); + + LEAVE(); +} + +static void message_handler_req_lib_votequorum_qdevice_unregister (void *conn, + const void *message) +{ + const struct req_lib_votequorum_qdevice_unregister *req_lib_votequorum_qdevice_unregister = message; + struct res_lib_votequorum_status res_lib_votequorum_status; + cs_error_t error = CS_OK; + + ENTER(); + + if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) { + if (strncmp(req_lib_votequorum_qdevice_unregister->name, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) { + error = CS_ERR_INVALID_PARAM; + goto out; + } + if (qdevice_timer_set) { + corosync_api->timer_delete(qdevice_timer); + qdevice_timer_set = 0; + sync_wait_for_poll_or_timeout = 0; + } + us->flags &= ~NODE_FLAGS_QDEVICE_REGISTERED; + us->flags &= ~NODE_FLAGS_QDEVICE_ALIVE; + us->flags &= ~NODE_FLAGS_QDEVICE_CAST_VOTE; + us->flags &= ~NODE_FLAGS_QDEVICE_MASTER_WINS; + votequorum_exec_send_nodeinfo(us->node_id); + votequorum_exec_send_qdevice_reg(VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER, + req_lib_votequorum_qdevice_unregister->name); + } else { + error = CS_ERR_NOT_EXIST; + } + +out: + res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); + res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; + res_lib_votequorum_status.header.error = error; + corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); + + LEAVE(); +} + +static void message_handler_req_lib_votequorum_qdevice_update (void *conn, + const void *message) +{ + const struct req_lib_votequorum_qdevice_update *req_lib_votequorum_qdevice_update = message; + struct res_lib_votequorum_status res_lib_votequorum_status; + cs_error_t error = CS_OK; + + ENTER(); + + if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) { + if (strncmp(req_lib_votequorum_qdevice_update->oldname, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) { + error = CS_ERR_INVALID_PARAM; + goto out; + } + votequorum_exec_send_qdevice_reconfigure(req_lib_votequorum_qdevice_update->oldname, + req_lib_votequorum_qdevice_update->newname); + } else { + error = CS_ERR_NOT_EXIST; + } + +out: + res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); + res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; + res_lib_votequorum_status.header.error = error; + corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); + + LEAVE(); +} + +static void message_handler_req_lib_votequorum_qdevice_poll (void *conn, + const void *message) +{ + const struct req_lib_votequorum_qdevice_poll *req_lib_votequorum_qdevice_poll = message; + struct res_lib_votequorum_status res_lib_votequorum_status; + cs_error_t error = CS_OK; + uint32_t oldflags; + + ENTER(); + + if (!qdevice_can_operate) { + error = CS_ERR_ACCESS; + goto out; + } + + if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) { + if (!(req_lib_votequorum_qdevice_poll->ring_id.nodeid == quorum_ringid.nodeid && + req_lib_votequorum_qdevice_poll->ring_id.seq == quorum_ringid.seq)) { + log_printf(LOGSYS_LEVEL_DEBUG, "Received poll ring id (" CS_PRI_RING_ID ") != last sync " + "ring id (" CS_PRI_RING_ID "). Ignoring poll call.", + req_lib_votequorum_qdevice_poll->ring_id.nodeid, req_lib_votequorum_qdevice_poll->ring_id.seq, + quorum_ringid.nodeid, quorum_ringid.seq); + error = CS_ERR_MESSAGE_ERROR; + goto out; + } + if (strncmp(req_lib_votequorum_qdevice_poll->name, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) { + error = CS_ERR_INVALID_PARAM; + goto out; + } + + if (qdevice_timer_set) { + corosync_api->timer_delete(qdevice_timer); + qdevice_timer_set = 0; + } + + oldflags = us->flags; + + us->flags |= NODE_FLAGS_QDEVICE_ALIVE; + + if (req_lib_votequorum_qdevice_poll->cast_vote) { + us->flags |= NODE_FLAGS_QDEVICE_CAST_VOTE; + } else { + us->flags &= ~NODE_FLAGS_QDEVICE_CAST_VOTE; + } + + if (us->flags != oldflags) { + votequorum_exec_send_nodeinfo(us->node_id); + } + + corosync_api->timer_add_duration((unsigned long long)qdevice_timeout*1000000, qdevice, + qdevice_timer_fn, &qdevice_timer); + qdevice_timer_set = 1; + sync_wait_for_poll_or_timeout = 0; + } else { + error = CS_ERR_NOT_EXIST; + } + +out: + res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); + res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; + res_lib_votequorum_status.header.error = error; + corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); + + LEAVE(); +} + +static void message_handler_req_lib_votequorum_qdevice_master_wins (void *conn, + const void *message) +{ + const struct req_lib_votequorum_qdevice_master_wins *req_lib_votequorum_qdevice_master_wins = message; + struct res_lib_votequorum_status res_lib_votequorum_status; + cs_error_t error = CS_OK; + uint32_t oldflags = us->flags; + + ENTER(); + + if (!qdevice_can_operate) { + error = CS_ERR_ACCESS; + goto out; + } + + if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) { + if (strncmp(req_lib_votequorum_qdevice_master_wins->name, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) { + error = CS_ERR_INVALID_PARAM; + goto out; + } + + if (req_lib_votequorum_qdevice_master_wins->allow) { + us->flags |= NODE_FLAGS_QDEVICE_MASTER_WINS; + } else { + us->flags &= ~NODE_FLAGS_QDEVICE_MASTER_WINS; + } + + if (us->flags != oldflags) { + votequorum_exec_send_nodeinfo(us->node_id); + } + + update_qdevice_master_wins(req_lib_votequorum_qdevice_master_wins->allow); + } else { + error = CS_ERR_NOT_EXIST; + } + +out: + res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); + res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; + res_lib_votequorum_status.header.error = error; + corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); + + LEAVE(); +} diff --git a/exec/votequorum.h b/exec/votequorum.h new file mode 100644 index 0000000..697b694 --- /dev/null +++ b/exec/votequorum.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2012 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Fabio M. Di Nitto (fdinitto@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef VOTEQUORUM_H_DEFINED +#define VOTEQUORUM_H_DEFINED + +#include "quorum.h" +#include <corosync/logsys.h> +#include <corosync/coroapi.h> + +char *votequorum_init(struct corosync_api_v1 *api, + quorum_set_quorate_fn_t q_set_quorate_fn); + +#endif /* VOTEQUORUM_H_DEFINED */ diff --git a/exec/vsf.h b/exec/vsf.h new file mode 100644 index 0000000..6163410 --- /dev/null +++ b/exec/vsf.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2006-2011 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef VSF_H_DEFINED +#define VSF_H_DEFINED + +struct corosync_api_v1; +struct corosync_vsf_iface_ver0 { + + /** + * Executes a callback whenever component changes + */ + int (*init) ( + struct corosync_api_v1 *api, + void (*primary_callback_fn) ( + unsigned int *view_list, + int view_list_entries, + int primary_designated, + struct memb_ring_id *ring_id)); + + /** + * @retval 1 if we are primary component + * @retval 0 if not primary component + */ + int (*primary) (void); +}; + +#endif /* VSF_H_DEFINED */ diff --git a/exec/vsf_quorum.c b/exec/vsf_quorum.c new file mode 100644 index 0000000..e07134b --- /dev/null +++ b/exec/vsf_quorum.c @@ -0,0 +1,801 @@ +/* + * Copyright (c) 2008-2020 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Christine Caulfield (ccaulfie@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of Red Hat Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <config.h> + +#include <pwd.h> +#include <grp.h> +#include <sys/types.h> +#include <sys/poll.h> +#include <sys/uio.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <unistd.h> +#include <fcntl.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <sched.h> +#include <time.h> + +#include "quorum.h" +#include <corosync/corotypes.h> +#include <qb/qbipc_common.h> +#include <corosync/corodefs.h> +#include <corosync/swab.h> +#include <qb/qblist.h> +#include <corosync/mar_gen.h> +#include <corosync/ipc_quorum.h> +#include <corosync/coroapi.h> +#include <corosync/logsys.h> +#include <corosync/icmap.h> + +#include "service.h" +#include "votequorum.h" +#include "vsf_ykd.h" + +LOGSYS_DECLARE_SUBSYS ("QUORUM"); + +struct quorum_pd { + unsigned char track_flags; + int tracking_enabled; + struct qb_list_head list; + void *conn; + enum lib_quorum_model model; +}; + +struct internal_callback_pd { + struct qb_list_head list; + quorum_callback_fn_t callback; + void *context; +}; + +static void quorum_sync_init ( + const unsigned int *trans_list, + size_t trans_list_entries, + const unsigned int *member_list, + size_t member_list_entries, + const struct memb_ring_id *ring_id); + +static int quorum_sync_process (void); + +static void quorum_sync_activate (void); + +static void quorum_sync_abort (void); + +static void message_handler_req_lib_quorum_getquorate (void *conn, + const void *msg); +static void message_handler_req_lib_quorum_trackstart (void *conn, + const void *msg); +static void message_handler_req_lib_quorum_trackstop (void *conn, + const void *msg); +static void message_handler_req_lib_quorum_gettype (void *conn, + const void *msg); +static void message_handler_req_lib_quorum_model_gettype (void *conn, + const void *msg); +static void send_library_notification(void *conn); +static void send_internal_notification(void); +static void send_nodelist_library_notification(void *conn, int send_joined_left_list); +static char *quorum_exec_init_fn (struct corosync_api_v1 *api); +static int quorum_lib_init_fn (void *conn); +static int quorum_lib_exit_fn (void *conn); + +static int primary_designated = 0; +static int quorum_type = 0; +static struct corosync_api_v1 *corosync_api; +static struct qb_list_head lib_trackers_list; +static struct qb_list_head internal_trackers_list; +static struct memb_ring_id quorum_ring_id; +static struct memb_ring_id last_sync_ring_id; +static size_t quorum_view_list_entries = 0; +static int quorum_view_list[PROCESSOR_COUNT_MAX]; +struct quorum_services_api_ver1 *quorum_iface = NULL; + +static char view_buf[64]; + +static unsigned int my_member_list[PROCESSOR_COUNT_MAX]; +static size_t my_member_list_entries; +static unsigned int my_old_member_list[PROCESSOR_COUNT_MAX]; +static size_t my_old_member_list_entries = 0; +static unsigned int my_left_list[PROCESSOR_COUNT_MAX]; +static size_t my_left_list_entries; +static unsigned int my_joined_list[PROCESSOR_COUNT_MAX]; +static size_t my_joined_list_entries; + +static void log_view_list(const unsigned int *view_list, size_t view_list_entries, + const char *view_list_type_str) +{ + int total = (int)view_list_entries; + int len, pos, ret; + int i = 0; + + while (1) { + len = sizeof(view_buf); + pos = 0; + memset(view_buf, 0, len); + + for (; i < total; i++) { + ret = snprintf(view_buf + pos, len - pos, " " CS_PRI_NODE_ID, view_list[i]); + if (ret >= len - pos) + break; + pos += ret; + } + log_printf (LOGSYS_LEVEL_NOTICE, "%s[%d]:%s%s", + view_list_type_str, total, view_buf, i < total ? "\\" : ""); + + if (i == total) + break; + } +} + +/* Internal quorum API function */ +static void quorum_api_set_quorum(const unsigned int *view_list, + size_t view_list_entries, + int quorum, struct memb_ring_id *ring_id) +{ + int old_quorum = primary_designated; + primary_designated = quorum; + + if (primary_designated && !old_quorum) { + log_printf (LOGSYS_LEVEL_NOTICE, "This node is within the primary component and will provide service."); + } else if (!primary_designated && old_quorum) { + log_printf (LOGSYS_LEVEL_NOTICE, "This node is within the non-primary component and will NOT provide any services."); + } + + quorum_view_list_entries = view_list_entries; + memcpy(&quorum_ring_id, ring_id, sizeof (quorum_ring_id)); + memcpy(quorum_view_list, view_list, sizeof(unsigned int)*view_list_entries); + + log_view_list(view_list, view_list_entries, "Members"); + + /* Tell internal listeners */ + send_internal_notification(); + + /* Tell IPC listeners */ + send_library_notification(NULL); +} + +static struct corosync_lib_handler quorum_lib_service[] = +{ + { /* 0 */ + .lib_handler_fn = message_handler_req_lib_quorum_getquorate, + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 1 */ + .lib_handler_fn = message_handler_req_lib_quorum_trackstart, + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 2 */ + .lib_handler_fn = message_handler_req_lib_quorum_trackstop, + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 3 */ + .lib_handler_fn = message_handler_req_lib_quorum_gettype, + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED + }, + { /* 4 */ + .lib_handler_fn = message_handler_req_lib_quorum_model_gettype, + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED + } +}; + +static struct corosync_service_engine quorum_service_handler = { + .name = "corosync cluster quorum service v0.1", + .id = QUORUM_SERVICE, + .priority = 1, + .private_data_size = sizeof (struct quorum_pd), + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED, + .allow_inquorate = CS_LIB_ALLOW_INQUORATE, + .lib_init_fn = quorum_lib_init_fn, + .lib_exit_fn = quorum_lib_exit_fn, + .lib_engine = quorum_lib_service, + .exec_init_fn = quorum_exec_init_fn, + .sync_init = quorum_sync_init, + .sync_process = quorum_sync_process, + .sync_activate = quorum_sync_activate, + .sync_abort = quorum_sync_abort, + .lib_engine_count = sizeof (quorum_lib_service) / sizeof (struct corosync_lib_handler) +}; + +struct corosync_service_engine *vsf_quorum_get_service_engine_ver0 (void) +{ + return (&quorum_service_handler); +} + +/* -------------------------------------------------- */ + + +/* + * Internal API functions for corosync + */ + +static int quorum_quorate(void) +{ + return primary_designated; +} + + +static int quorum_register_callback(quorum_callback_fn_t function, void *context) +{ + struct internal_callback_pd *pd = malloc(sizeof(struct internal_callback_pd)); + if (!pd) + return -1; + + pd->context = context; + pd->callback = function; + qb_list_add (&pd->list, &internal_trackers_list); + + return 0; +} + +static int quorum_unregister_callback(quorum_callback_fn_t function, void *context) +{ + struct internal_callback_pd *pd; + struct qb_list_head *tmp, *tmp_iter; + + qb_list_for_each_safe(tmp, tmp_iter, &internal_trackers_list) { + pd = qb_list_entry(tmp, struct internal_callback_pd, list); + if (pd->callback == function && pd->context == context) { + qb_list_del(&pd->list); + free(pd); + return 0; + } + } + return -1; +} + +static struct quorum_callin_functions callins = { + .quorate = quorum_quorate, + .register_callback = quorum_register_callback, + .unregister_callback = quorum_unregister_callback +}; + +/* --------------------------------------------------------------------- */ + +static void quorum_sync_init ( + const unsigned int *trans_list, + size_t trans_list_entries, + const unsigned int *member_list, + size_t member_list_entries, + const struct memb_ring_id *ring_id) +{ + int found; + int i, j; + int entries; + int node_joined; + + memcpy (my_member_list, member_list, member_list_entries * + sizeof (unsigned int)); + my_member_list_entries = member_list_entries; + + last_sync_ring_id = *ring_id; + + /* + * Determine left list of nodeids + */ + entries = 0; + for (i = 0; i < my_old_member_list_entries; i++) { + found = 0; + for (j = 0; j < trans_list_entries; j++) { + if (my_old_member_list[i] == trans_list[j]) { + found = 1; + break; + } + } + + if (found == 0) { + my_left_list[entries++] = my_old_member_list[i]; + } else { + /* + * Check it is really in new membership + */ + found = 0; + + for (j = 0; j < my_member_list_entries; j++) { + if (my_old_member_list[i] == my_member_list[j]) { + found = 1; + break; + } + } + + /* + * Node is in both old_member_list and trans list but not in my_member_list. + * (This shouldn't really happen). + */ + if (!found) { + my_left_list[entries++] = my_old_member_list[i]; + } + } + } + my_left_list_entries = entries; + + /* + * Determine joined list of nodeids + */ + entries = 0; + for (i = 0; i < my_member_list_entries; i++) { + node_joined = 1; + for (j = 0; j < my_old_member_list_entries; j++) { + if (my_member_list[i] == my_old_member_list[j]) { + /* + * Node is in member list and also in my_old_member list -> check + * if it is in left_list. + */ + node_joined = 0; + break; + } + } + + if (!node_joined) { + /* + * Check if node is in left list. + */ + for (j = 0; j < my_left_list_entries; j++) { + if (my_member_list[i] == my_left_list[j]) { + /* + * Node is both in left and also in member list -> joined + */ + node_joined = 1; + break; + } + } + } + + if (node_joined) { + my_joined_list[entries++] = my_member_list[i]; + } + } + my_joined_list_entries = entries; + + log_view_list(my_member_list, my_member_list_entries, "Sync members"); + + if (my_joined_list_entries > 0) { + log_view_list(my_joined_list, my_joined_list_entries, "Sync joined"); + } + + if (my_left_list_entries > 0) { + log_view_list(my_left_list, my_left_list_entries, "Sync left"); + } +} + +static int quorum_sync_process (void) +{ + + return (0); +} + +static void quorum_sync_activate (void) +{ + + memcpy (my_old_member_list, my_member_list, + my_member_list_entries * sizeof (unsigned int)); + my_old_member_list_entries = my_member_list_entries; + + /* Tell IPC listeners */ + send_nodelist_library_notification(NULL, 1); +} + +static void quorum_sync_abort (void) +{ + +} + +static char *quorum_exec_init_fn (struct corosync_api_v1 *api) +{ + char *quorum_module = NULL; + char *error; + + corosync_api = api; + qb_list_init (&lib_trackers_list); + qb_list_init (&internal_trackers_list); + + /* + * Tell corosync we have a quorum engine. + */ + api->quorum_initialize(&callins); + + /* + * Look for a quorum provider + */ + if (icmap_get_string("quorum.provider", &quorum_module) == CS_OK) { + log_printf (LOGSYS_LEVEL_NOTICE, + "Using quorum provider %s", quorum_module); + + error = (char *)"Invalid quorum provider"; + + if (strcmp (quorum_module, "corosync_votequorum") == 0) { + error = votequorum_init (api, quorum_api_set_quorum); + quorum_type = 1; + } + if (strcmp (quorum_module, "corosync_ykd") == 0) { + error = ykd_init (api, quorum_api_set_quorum); + quorum_type = 1; + } + if (error) { + log_printf (LOGSYS_LEVEL_CRIT, + "Quorum provider: %s failed to initialize.", + quorum_module); + free(quorum_module); + return (error); + } + } + + if (quorum_module) { + free(quorum_module); + quorum_module = NULL; + } + + /* + * setting quorum_type and primary_designated in the right order is important + * always try to lookup/init a quorum module, then revert back to be quorate + */ + + if (quorum_type == 0) { + primary_designated = 1; + } + + return (NULL); +} + +static int quorum_lib_init_fn (void *conn) +{ + struct quorum_pd *pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn); + + log_printf(LOGSYS_LEVEL_DEBUG, "lib_init_fn: conn=%p", conn); + + qb_list_init (&pd->list); + pd->conn = conn; + pd->model = LIB_QUORUM_MODEL_V0; + + return (0); +} + +static int quorum_lib_exit_fn (void *conn) +{ + struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn); + + log_printf(LOGSYS_LEVEL_DEBUG, "lib_exit_fn: conn=%p", conn); + + if (quorum_pd->tracking_enabled) { + qb_list_del (&quorum_pd->list); + qb_list_init (&quorum_pd->list); + } + return (0); +} + + +static void send_internal_notification(void) +{ + struct qb_list_head *tmp; + struct internal_callback_pd *pd; + + qb_list_for_each(tmp, &internal_trackers_list) { + pd = qb_list_entry(tmp, struct internal_callback_pd, list); + + pd->callback(primary_designated, pd->context); + } +} + +static void prepare_library_notification_v0(char *buf, size_t size) +{ + struct res_lib_quorum_notification *res_lib_quorum_notification = (struct res_lib_quorum_notification *)buf; + int i; + + res_lib_quorum_notification->quorate = primary_designated; + res_lib_quorum_notification->ring_seq = quorum_ring_id.seq; + res_lib_quorum_notification->view_list_entries = quorum_view_list_entries; + for (i=0; i<quorum_view_list_entries; i++) { + res_lib_quorum_notification->view_list[i] = quorum_view_list[i]; + } + + res_lib_quorum_notification->header.id = MESSAGE_RES_QUORUM_NOTIFICATION; + res_lib_quorum_notification->header.size = size; + res_lib_quorum_notification->header.error = CS_OK; +} + +static void prepare_library_notification_v1(char *buf, size_t size) +{ + struct res_lib_quorum_v1_quorum_notification *res_lib_quorum_v1_quorum_notification = + (struct res_lib_quorum_v1_quorum_notification *)buf; + int i; + + res_lib_quorum_v1_quorum_notification->quorate = primary_designated; + res_lib_quorum_v1_quorum_notification->ring_id.nodeid = quorum_ring_id.nodeid; + res_lib_quorum_v1_quorum_notification->ring_id.seq = quorum_ring_id.seq; + res_lib_quorum_v1_quorum_notification->view_list_entries = quorum_view_list_entries; + for (i=0; i<quorum_view_list_entries; i++) { + res_lib_quorum_v1_quorum_notification->view_list[i] = quorum_view_list[i]; + } + + res_lib_quorum_v1_quorum_notification->header.id = MESSAGE_RES_QUORUM_V1_QUORUM_NOTIFICATION; + res_lib_quorum_v1_quorum_notification->header.size = size; + res_lib_quorum_v1_quorum_notification->header.error = CS_OK; +} + +static void send_library_notification(void *conn) +{ + int size_v0 = sizeof(struct res_lib_quorum_notification) + + sizeof(mar_uint32_t) * quorum_view_list_entries; + int size_v1 = sizeof(struct res_lib_quorum_v1_quorum_notification) + + sizeof(mar_uint32_t)*quorum_view_list_entries; + + char buf_v0[size_v0]; + char buf_v1[size_v1]; + + struct res_lib_quorum_notification *res_lib_quorum_notification = + (struct res_lib_quorum_notification *)buf_v0; + struct res_lib_quorum_v1_quorum_notification *res_lib_quorum_v1_quorum_notification = + (struct res_lib_quorum_v1_quorum_notification *)buf_v1; + + struct quorum_pd *qpd; + struct qb_list_head *tmp; + + log_printf(LOGSYS_LEVEL_DEBUG, "sending quorum notification to %p, length = %u/%u", conn, size_v0, size_v1); + + prepare_library_notification_v0(buf_v0, size_v0); + prepare_library_notification_v1(buf_v1, size_v1); + + /* Send it to all interested parties */ + if (conn) { + qpd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn); + + if (qpd->model == LIB_QUORUM_MODEL_V0) { + corosync_api->ipc_dispatch_send(conn, res_lib_quorum_notification, size_v0); + } else if (qpd->model == LIB_QUORUM_MODEL_V1) { + corosync_api->ipc_dispatch_send(conn, res_lib_quorum_v1_quorum_notification, size_v1); + } + } + else { + qb_list_for_each(tmp, &lib_trackers_list) { + qpd = qb_list_entry(tmp, struct quorum_pd, list); + + if (qpd->model == LIB_QUORUM_MODEL_V0) { + corosync_api->ipc_dispatch_send(qpd->conn, + res_lib_quorum_notification, size_v0); + } else if (qpd->model == LIB_QUORUM_MODEL_V1) { + corosync_api->ipc_dispatch_send(qpd->conn, + res_lib_quorum_v1_quorum_notification, size_v1); + } + } + } + return; +} + +static void send_nodelist_library_notification(void *conn, int send_joined_left_list) +{ + int size = sizeof(struct res_lib_quorum_v1_nodelist_notification) + + sizeof(mar_uint32_t) * my_member_list_entries; + char *buf; + struct res_lib_quorum_v1_nodelist_notification *res_lib_quorum_v1_nodelist_notification; + struct quorum_pd *qpd; + struct qb_list_head *tmp; + mar_uint32_t *ptr; + int i; + + if (send_joined_left_list) { + size += sizeof(mar_uint32_t) * my_joined_list_entries; + size += sizeof(mar_uint32_t) * my_left_list_entries; + } + + buf = alloca(size); + memset(buf, 0, size); + + res_lib_quorum_v1_nodelist_notification = (struct res_lib_quorum_v1_nodelist_notification *)buf; + + res_lib_quorum_v1_nodelist_notification->ring_id.nodeid = last_sync_ring_id.nodeid; + res_lib_quorum_v1_nodelist_notification->ring_id.seq = last_sync_ring_id.seq; + res_lib_quorum_v1_nodelist_notification->member_list_entries = my_member_list_entries; + + if (send_joined_left_list) { + res_lib_quorum_v1_nodelist_notification->joined_list_entries = my_joined_list_entries; + res_lib_quorum_v1_nodelist_notification->left_list_entries = my_left_list_entries; + } + + ptr = res_lib_quorum_v1_nodelist_notification->member_list; + + for (i=0; i<my_member_list_entries; i++, ptr++) { + *ptr = my_member_list[i]; + } + + if (send_joined_left_list) { + for (i=0; i<my_joined_list_entries; i++, ptr++) { + *ptr = my_joined_list[i]; + } + + for (i=0; i<my_left_list_entries; i++, ptr++) { + *ptr = my_left_list[i]; + } + } + + res_lib_quorum_v1_nodelist_notification->header.id = MESSAGE_RES_QUORUM_V1_NODELIST_NOTIFICATION; + res_lib_quorum_v1_nodelist_notification->header.size = size; + res_lib_quorum_v1_nodelist_notification->header.error = CS_OK; + + log_printf(LOGSYS_LEVEL_DEBUG, "sending nodelist notification to %p, length = %u", conn, size); + + /* Send it to all interested parties */ + if (conn) { + qpd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn); + + if (qpd->model == LIB_QUORUM_MODEL_V1) { + corosync_api->ipc_dispatch_send(conn, res_lib_quorum_v1_nodelist_notification, size); + } + } + else { + qb_list_for_each(tmp, &lib_trackers_list) { + qpd = qb_list_entry(tmp, struct quorum_pd, list); + + if (qpd->model == LIB_QUORUM_MODEL_V1) { + corosync_api->ipc_dispatch_send(qpd->conn, + res_lib_quorum_v1_nodelist_notification, size); + } + } + } + + return; +} + +static void message_handler_req_lib_quorum_getquorate (void *conn, + const void *msg) +{ + struct res_lib_quorum_getquorate res_lib_quorum_getquorate; + + log_printf(LOGSYS_LEVEL_DEBUG, "got quorate request on %p", conn); + + /* send status */ + res_lib_quorum_getquorate.quorate = primary_designated; + res_lib_quorum_getquorate.header.size = sizeof(res_lib_quorum_getquorate); + res_lib_quorum_getquorate.header.id = MESSAGE_RES_QUORUM_GETQUORATE; + res_lib_quorum_getquorate.header.error = CS_OK; + corosync_api->ipc_response_send(conn, &res_lib_quorum_getquorate, sizeof(res_lib_quorum_getquorate)); +} + +static void message_handler_req_lib_quorum_trackstart (void *conn, + const void *msg) +{ + const struct req_lib_quorum_trackstart *req_lib_quorum_trackstart = msg; + struct qb_ipc_response_header res; + struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn); + cs_error_t error = CS_OK; + + log_printf(LOGSYS_LEVEL_DEBUG, "got trackstart request on %p", conn); + + /* + * If an immediate listing of the current cluster membership + * is requested, generate membership list + */ + if (req_lib_quorum_trackstart->track_flags & CS_TRACK_CURRENT || + req_lib_quorum_trackstart->track_flags & CS_TRACK_CHANGES) { + log_printf(LOGSYS_LEVEL_DEBUG, "sending initial status to %p", conn); + send_nodelist_library_notification(conn, 0); + send_library_notification(conn); + } + + if (quorum_pd->tracking_enabled) { + error = CS_ERR_EXIST; + goto response_send; + } + + /* + * Record requests for tracking + */ + if (req_lib_quorum_trackstart->track_flags & CS_TRACK_CHANGES || + req_lib_quorum_trackstart->track_flags & CS_TRACK_CHANGES_ONLY) { + + quorum_pd->track_flags = req_lib_quorum_trackstart->track_flags; + quorum_pd->tracking_enabled = 1; + + qb_list_add (&quorum_pd->list, &lib_trackers_list); + } + +response_send: + /* send status */ + res.size = sizeof(res); + res.id = MESSAGE_RES_QUORUM_TRACKSTART; + res.error = error; + corosync_api->ipc_response_send(conn, &res, sizeof(struct qb_ipc_response_header)); +} + +static void message_handler_req_lib_quorum_trackstop (void *conn, const void *msg) +{ + struct qb_ipc_response_header res; + struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn); + + log_printf(LOGSYS_LEVEL_DEBUG, "got trackstop request on %p", conn); + + if (quorum_pd->tracking_enabled) { + res.error = CS_OK; + quorum_pd->tracking_enabled = 0; + qb_list_del (&quorum_pd->list); + qb_list_init (&quorum_pd->list); + } else { + res.error = CS_ERR_NOT_EXIST; + } + + /* send status */ + res.size = sizeof(res); + res.id = MESSAGE_RES_QUORUM_TRACKSTOP; + res.error = CS_OK; + corosync_api->ipc_response_send(conn, &res, sizeof(struct qb_ipc_response_header)); +} + +static void message_handler_req_lib_quorum_gettype (void *conn, + const void *msg) +{ + struct res_lib_quorum_gettype res_lib_quorum_gettype; + + log_printf(LOGSYS_LEVEL_DEBUG, "got quorum_type request on %p", conn); + + /* send status */ + res_lib_quorum_gettype.quorum_type = quorum_type; + res_lib_quorum_gettype.header.size = sizeof(res_lib_quorum_gettype); + res_lib_quorum_gettype.header.id = MESSAGE_RES_QUORUM_GETTYPE; + res_lib_quorum_gettype.header.error = CS_OK; + corosync_api->ipc_response_send(conn, &res_lib_quorum_gettype, sizeof(res_lib_quorum_gettype)); +} + +static void message_handler_req_lib_quorum_model_gettype (void *conn, + const void *msg) +{ + const struct req_lib_quorum_model_gettype *req_lib_quorum_model_gettype = msg; + struct res_lib_quorum_model_gettype res_lib_quorum_model_gettype; + struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn); + cs_error_t ret_err; + + log_printf(LOGSYS_LEVEL_DEBUG, "got quorum_model_type request on %p", conn); + + ret_err = CS_OK; + + if (req_lib_quorum_model_gettype->model != LIB_QUORUM_MODEL_V0 && + req_lib_quorum_model_gettype->model != LIB_QUORUM_MODEL_V1) { + log_printf(LOGSYS_LEVEL_ERROR, "quorum_model_type request for unsupported model %u", + req_lib_quorum_model_gettype->model); + + ret_err = CS_ERR_INVALID_PARAM; + } else { + quorum_pd->model = req_lib_quorum_model_gettype->model; + } + + /* send status */ + res_lib_quorum_model_gettype.quorum_type = quorum_type; + res_lib_quorum_model_gettype.header.size = sizeof(res_lib_quorum_model_gettype); + res_lib_quorum_model_gettype.header.id = MESSAGE_RES_QUORUM_MODEL_GETTYPE; + res_lib_quorum_model_gettype.header.error = ret_err; + corosync_api->ipc_response_send(conn, &res_lib_quorum_model_gettype, sizeof(res_lib_quorum_model_gettype)); +} diff --git a/exec/vsf_ykd.c b/exec/vsf_ykd.c new file mode 100644 index 0000000..8724168 --- /dev/null +++ b/exec/vsf_ykd.c @@ -0,0 +1,537 @@ +/* + * Copyright (c) 2005 MontaVista Software, Inc. + * Copyright (c) 2006-2012 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <config.h> + +#include <assert.h> +#include <pwd.h> +#include <grp.h> +#include <sys/types.h> +#include <sys/poll.h> +#include <sys/uio.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <unistd.h> +#include <fcntl.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <sched.h> +#include <time.h> + +#include "quorum.h" +#include <corosync/logsys.h> +#include <corosync/corotypes.h> +#include <qb/qbipc_common.h> +#include <corosync/mar_gen.h> +#include <corosync/coroapi.h> +#include <corosync/swab.h> + +#include "vsf_ykd.h" + +LOGSYS_DECLARE_SUBSYS ("YKD"); + +#define YKD_PROCESSOR_COUNT_MAX 32 + +enum ykd_header_values { + YKD_HEADER_SENDSTATE = 0, + YKD_HEADER_ATTEMPT = 1 +}; + +enum ykd_mode { + YKD_MODE_SENDSTATE = 0, + YKD_MODE_ATTEMPT = 1 +}; + +struct ykd_header { + int id; +}; + +struct ykd_session { + unsigned int member_list[YKD_PROCESSOR_COUNT_MAX]; + int member_list_entries; + int session_id; +}; + +struct ykd_state { + struct ykd_session last_primary; + + struct ykd_session last_formed[YKD_PROCESSOR_COUNT_MAX]; + + int last_formed_entries; + + struct ykd_session ambiguous_sessions[YKD_PROCESSOR_COUNT_MAX]; + + int ambiguous_sessions_entries; + + int session_id; +}; + +struct state_received { + unsigned int nodeid; + int received; + struct ykd_state ykd_state; +}; + +struct ykd_state ykd_state; + +static void *ykd_group_handle; + +static struct state_received state_received_confchg[YKD_PROCESSOR_COUNT_MAX]; + +static int state_received_confchg_entries; + +static struct state_received state_received_process[YKD_PROCESSOR_COUNT_MAX]; + +static int state_received_process_entries; + +static enum ykd_mode ykd_mode; + +static unsigned int ykd_view_list[YKD_PROCESSOR_COUNT_MAX]; + +static int ykd_view_list_entries; + +static int session_id_max; + +static struct ykd_session *last_primary_max; + +static struct ykd_session ambiguous_sessions_max[YKD_PROCESSOR_COUNT_MAX]; + +static int ambiguous_sessions_max_entries; + +static int ykd_primary_designated = 0; + +static struct memb_ring_id ykd_ring_id; + +hdb_handle_t schedwrk_attempt_send_callback_handle; + +hdb_handle_t schedwrk_state_send_callback_handle; + +static struct corosync_api_v1 *api; + +static void (*ykd_primary_callback_fn) ( + const unsigned int *view_list, + size_t view_list_entries, + int primary_designated, + struct memb_ring_id *ring_id) = NULL; + +static void ykd_state_init (void) +{ + ykd_state.session_id = 0; + ykd_state.last_formed_entries = 0; + ykd_state.ambiguous_sessions_entries = 0; + ykd_state.last_primary.session_id = 0; + ykd_state.last_primary.member_list_entries = 0; +} + +static int ykd_state_send_msg (const void *context) +{ + struct iovec iovec[2]; + struct ykd_header header; + int res; + + header.id = YKD_HEADER_SENDSTATE; + + iovec[0].iov_base = (char *)&header; + iovec[0].iov_len = sizeof (struct ykd_header); + iovec[1].iov_base = (char *)&ykd_state; + iovec[1].iov_len = sizeof (struct ykd_state); + + res = api->tpg_joined_mcast (ykd_group_handle, iovec, 2, + TOTEM_AGREED); + + return (res); +} + +static void ykd_state_send (void) +{ + api->schedwrk_create ( + &schedwrk_state_send_callback_handle, + ykd_state_send_msg, + NULL); +} + +static int ykd_attempt_send_msg (const void *context) +{ + struct iovec iovec; + struct ykd_header header; + int res; + + header.id = YKD_HEADER_ATTEMPT; + + iovec.iov_base = (char *)&header; + iovec.iov_len = sizeof (struct ykd_header); + + res = api->tpg_joined_mcast (ykd_group_handle, &iovec, 1, + TOTEM_AGREED); + + return (res); +} + +static void ykd_attempt_send (void) +{ + api->schedwrk_create ( + &schedwrk_attempt_send_callback_handle, + ykd_attempt_send_msg, + NULL); +} + +static void compute (void) +{ + int i; + int j; + + session_id_max = 0; + last_primary_max = &state_received_process[0].ykd_state.last_primary; + ambiguous_sessions_max_entries = 0; + + for (i = 0; i < state_received_process_entries; i++) { + /* + * Calculate maximum session id + */ + if (state_received_process[i].ykd_state.session_id > session_id_max) { + session_id_max = state_received_process[i].ykd_state.session_id; + } + + /* + * Calculate maximum primary id + */ + if (state_received_process[i].ykd_state.last_primary.session_id > last_primary_max->session_id) { + last_primary_max = &state_received_process[i].ykd_state.last_primary; + } + + /* + * generate the maximum ambiguous sessions list + */ + for (j = 0; j < state_received_process[i].ykd_state.ambiguous_sessions_entries; j++) { + if (state_received_process[i].ykd_state.ambiguous_sessions[j].session_id > last_primary_max->session_id) { + memcpy (&ambiguous_sessions_max[ambiguous_sessions_max_entries], + &state_received_process[i].ykd_state.ambiguous_sessions[j], + sizeof (struct ykd_session)); + ambiguous_sessions_max_entries += 1; + } + } + } +} + +static int subquorum ( + unsigned int *member_list, + int member_list_entries, + struct ykd_session *session) +{ + int intersections = 0; + int i; + int j; + + for (i = 0; i < member_list_entries; i++) { + for (j = 0; j < session->member_list_entries; j++) { + if (member_list[i] == session->member_list[j]) { + intersections += 1; + } + } + } + + /* + * even split + */ + if (intersections == (session->member_list_entries - intersections)) { + return (1); + } else + + /* + * majority split + */ + if (intersections > (session->member_list_entries - intersections)) { + return (1); + } + return (0); +} + +static int decide (void) +{ + int i; + + /* + * Determine if there is a subquorum + */ + if (subquorum (ykd_view_list, ykd_view_list_entries, last_primary_max) == 0) { + return (0); + } + + for (i = 0; i < ambiguous_sessions_max_entries; i++) { + if (subquorum (ykd_view_list, ykd_view_list_entries, &ambiguous_sessions_max[i]) == 0) { + return (0); + } + + } + return (1); +} + +static void ykd_session_endian_convert (struct ykd_session *ykd_session) +{ + int i; + + ykd_session->member_list_entries = + swab32 (ykd_session->member_list_entries); + ykd_session->session_id = swab32 (ykd_session->session_id); + for (i = 0; i < ykd_session->member_list_entries; i++) { + ykd_session->member_list[i] = + swab32 (ykd_session->member_list[i]); + } +} + +static void ykd_state_endian_convert (struct ykd_state *state) +{ + int i; + + ykd_session_endian_convert (&state->last_primary); + state->last_formed_entries = swab32 (state->last_formed_entries); + state->ambiguous_sessions_entries = swab32 (state->ambiguous_sessions_entries); + state->session_id = swab32 (state->session_id); + + for (i = 0; i < state->last_formed_entries; i++) { + ykd_session_endian_convert (&state->last_formed[i]); + } + + for (i = 0; i < state->ambiguous_sessions_entries; i++) { + ykd_session_endian_convert (&state->ambiguous_sessions[i]); + } +} + +static void ykd_deliver_fn ( + unsigned int nodeid, + const void *msg, + unsigned int msg_len, + int endian_conversion_required) +{ + int all_received = 1; + int state_position = 0; + int i; + struct ykd_header *header = (struct ykd_header *)msg; + char *msg_state = (char *)msg + sizeof (struct ykd_header); + + /* + * If this is a localhost address, this node is always primary + */ +#ifdef TODO + if (totemip_localhost_check (source_addr)) { + log_printf (LOGSYS_LEVEL_NOTICE, + "This processor is within the primary component."); + primary_designated = 1; + + ykd_primary_callback_fn ( + ykd_view_list, + ykd_view_list_entries, + primary_designated, + &ykd_ring_id); + return; + } +#endif + if (endian_conversion_required && + (msg_len > sizeof (struct ykd_header))) { + ykd_state_endian_convert ((struct ykd_state *)msg_state); + } + + /* + * Set completion for source_addr's address + */ + for (state_position = 0; state_position < state_received_confchg_entries; state_position++) { + if (nodeid == state_received_process[state_position].nodeid) { + /* + * State position contains the address of the state to modify + * This may be used later by the other algorithms + */ + state_received_process[state_position].received = 1; + break; + } + } + + /* + * Test if all nodes have submitted their state data + */ + for (i = 0; i < state_received_confchg_entries; i++) { + if (state_received_process[i].received == 0) { + all_received = 0; + } + } + + /* + * Ignore messages from a different state + */ + if ((ykd_mode == YKD_MODE_SENDSTATE && header->id == YKD_HEADER_ATTEMPT) || + (ykd_mode == YKD_MODE_ATTEMPT && header->id == YKD_HEADER_SENDSTATE)) + return; + + switch (ykd_mode) { + case YKD_MODE_SENDSTATE: + assert (msg_len > sizeof (struct ykd_header)); + /* + * Copy state information for the sending processor + */ + memcpy (&state_received_process[state_position].ykd_state, + msg_state, sizeof (struct ykd_state)); + + /* + * Try to form a component + */ + if (all_received) { + for (i = 0; i < state_received_confchg_entries; i++) { + state_received_process[i].received = 0; + } + ykd_mode = YKD_MODE_ATTEMPT; + +// TODO resolve optimizes for failure conditions during ykd calculation +// resolve(); + compute(); + + if (decide ()) { + ykd_state.session_id = session_id_max + 1; + memcpy (ykd_state.ambiguous_sessions[ykd_state.ambiguous_sessions_entries].member_list, + ykd_view_list, sizeof (unsigned int) * ykd_view_list_entries); + ykd_state.ambiguous_sessions[ykd_state.ambiguous_sessions_entries].member_list_entries = ykd_view_list_entries; + ykd_state.ambiguous_sessions_entries += 1; + ykd_attempt_send(); + } + } + break; + + case YKD_MODE_ATTEMPT: + if (all_received) { + log_printf (LOGSYS_LEVEL_NOTICE, + "This processor is within the primary component."); + ykd_primary_designated = 1; + + ykd_primary_callback_fn ( + ykd_view_list, + ykd_view_list_entries, + ykd_primary_designated, + &ykd_ring_id); + + memcpy (ykd_state.last_primary.member_list, ykd_view_list, sizeof (ykd_view_list)); + ykd_state.last_primary.member_list_entries = ykd_view_list_entries; + ykd_state.last_primary.session_id = ykd_state.session_id; + ykd_state.ambiguous_sessions_entries = 0; + } + break; + } +} + +int first_run = 1; +static void ykd_confchg_fn ( + enum totem_configuration_type configuration_type, + const unsigned int *member_list, size_t member_list_entries, + const unsigned int *left_list, size_t left_list_entries, + const unsigned int *joined_list, size_t joined_list_entries, + const struct memb_ring_id *ring_id) +{ + int i; + + if (configuration_type != TOTEM_CONFIGURATION_REGULAR) { + return; + } + + memcpy (&ykd_ring_id, ring_id, sizeof (struct memb_ring_id)); + + if (first_run) { + ykd_state.last_primary.member_list[0] = api->totem_nodeid_get(); + ykd_state.last_primary.member_list_entries = 1; + ykd_state.last_primary.session_id = 0; + first_run = 0; + } + memcpy (ykd_view_list, member_list, + member_list_entries * sizeof (unsigned int)); + ykd_view_list_entries = member_list_entries; + + ykd_mode = YKD_MODE_SENDSTATE; + + ykd_primary_designated = 0; + + ykd_primary_callback_fn ( + ykd_view_list, + ykd_view_list_entries, + ykd_primary_designated, + &ykd_ring_id); + + memset (&state_received_confchg, 0, sizeof (state_received_confchg)); + for (i = 0; i < member_list_entries; i++) { + state_received_confchg[i].nodeid = member_list[i]; + state_received_confchg[i].received = 0; + } + memcpy (state_received_process, state_received_confchg, + sizeof (state_received_confchg)); + + state_received_confchg_entries = member_list_entries; + state_received_process_entries = member_list_entries; + + ykd_state_send (); +} + +struct corosync_tpg_group ykd_group = { + .group = "ykd", + .group_len = 3 +}; + +char *ykd_init ( + struct corosync_api_v1 *corosync_api, + quorum_set_quorate_fn_t set_primary) +{ + const char *error = NULL; + + ykd_primary_callback_fn = set_primary; + api = corosync_api; + + if (set_primary == 0) { + error = (char *)"set primary not set"; + } + + api->tpg_init ( + &ykd_group_handle, + ykd_deliver_fn, + ykd_confchg_fn); + + api->tpg_join ( + ykd_group_handle, + &ykd_group, + 1); + + ykd_state_init (); + + return ((char *)error); +} diff --git a/exec/vsf_ykd.h b/exec/vsf_ykd.h new file mode 100644 index 0000000..d009e1c --- /dev/null +++ b/exec/vsf_ykd.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2012 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Fabio M. Di Nitto (fdinitto@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef VFS_YKD_H_DEFINED +#define VFS_YKD_H_DEFINED + +#include "quorum.h" +#include <corosync/logsys.h> +#include <corosync/coroapi.h> + +char *ykd_init(struct corosync_api_v1 *api, + quorum_set_quorate_fn_t set_primary); + +#endif /* VFS_YKD_H_DEFINED */ diff --git a/exec/wd.c b/exec/wd.c new file mode 100644 index 0000000..4ca5673 --- /dev/null +++ b/exec/wd.c @@ -0,0 +1,767 @@ +/* + * Copyright (c) 2010-2012 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Angus Salkeld <asalkeld@redhat.com> + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <config.h> + +#include <unistd.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <linux/types.h> +#include <linux/watchdog.h> +#include <sys/reboot.h> + +#include <corosync/corotypes.h> +#include <corosync/corodefs.h> +#include <corosync/coroapi.h> +#include <qb/qblist.h> +#include <corosync/logsys.h> +#include <corosync/icmap.h> +#include "fsm.h" + +#include "service.h" + +typedef enum { + WD_RESOURCE_GOOD, + WD_RESOURCE_FAILED, + WD_RESOURCE_STATE_UNKNOWN, + WD_RESOURCE_NOT_MONITORED +} wd_resource_state_t; + +struct resource { + char res_path[ICMAP_KEYNAME_MAXLEN]; + char *recovery; + char name[CS_MAX_NAME_LENGTH]; + time_t last_updated; + struct cs_fsm fsm; + + corosync_timer_handle_t check_timer; + uint64_t check_timeout; + icmap_track_t icmap_track; +}; + +LOGSYS_DECLARE_SUBSYS("WD"); + +/* + * Service Interfaces required by service_message_handler struct + */ +static char *wd_exec_init_fn (struct corosync_api_v1 *corosync_api); +static int wd_exec_exit_fn (void); +static void wd_resource_check_fn (void* resource_ref); + +static struct corosync_api_v1 *api; +#define WD_DEFAULT_TIMEOUT_SEC 6 +#define WD_DEFAULT_TIMEOUT_MS (WD_DEFAULT_TIMEOUT_SEC * CS_TIME_MS_IN_SEC) +#define WD_MIN_TIMEOUT_MS 500 +#define WD_MAX_TIMEOUT_MS (120 * CS_TIME_MS_IN_SEC) +static uint32_t watchdog_timeout = WD_DEFAULT_TIMEOUT_SEC; +static uint64_t tickle_timeout = (WD_DEFAULT_TIMEOUT_MS / 2); +static int dog = -1; +static corosync_timer_handle_t wd_timer; +static int watchdog_ok = 1; +static char *watchdog_device = NULL; + +struct corosync_service_engine wd_service_engine = { + .name = "corosync watchdog service", + .id = WD_SERVICE, + .priority = 1, + .private_data_size = 0, + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED, + .lib_init_fn = NULL, + .lib_exit_fn = NULL, + .lib_engine = NULL, + .lib_engine_count = 0, + .exec_engine = NULL, + .exec_engine_count = 0, + .confchg_fn = NULL, + .exec_init_fn = wd_exec_init_fn, + .exec_exit_fn = wd_exec_exit_fn, + .exec_dump_fn = NULL +}; + +static QB_LIST_DECLARE (confchg_notify); + +/* + * F S M + */ +static void wd_config_changed (struct cs_fsm* fsm, int32_t event, void * data); +static void wd_resource_failed (struct cs_fsm* fsm, int32_t event, void * data); + +enum wd_resource_state { + WD_S_RUNNING, + WD_S_FAILED, + WD_S_STOPPED +}; + +enum wd_resource_event { + WD_E_FAILURE, + WD_E_CONFIG_CHANGED +}; + +const char * wd_running_str = "running"; +const char * wd_failed_str = "failed"; +const char * wd_failure_str = "failure"; +const char * wd_stopped_str = "stopped"; +const char * wd_config_changed_str = "config_changed"; + +struct cs_fsm_entry wd_fsm_table[] = { + { WD_S_STOPPED, WD_E_CONFIG_CHANGED, wd_config_changed, {WD_S_STOPPED, WD_S_RUNNING, -1} }, + { WD_S_STOPPED, WD_E_FAILURE, NULL, {-1} }, + { WD_S_RUNNING, WD_E_CONFIG_CHANGED, wd_config_changed, {WD_S_RUNNING, WD_S_STOPPED, -1} }, + { WD_S_RUNNING, WD_E_FAILURE, wd_resource_failed, {WD_S_FAILED, -1} }, + { WD_S_FAILED, WD_E_CONFIG_CHANGED, wd_config_changed, {WD_S_RUNNING, WD_S_STOPPED, -1} }, + { WD_S_FAILED, WD_E_FAILURE, NULL, {-1} }, +}; + +struct corosync_service_engine *wd_get_service_engine_ver0 (void) +{ + return (&wd_service_engine); +} + +static const char * wd_res_state_to_str(struct cs_fsm* fsm, + int32_t state) +{ + switch (state) { + case WD_S_STOPPED: + return wd_stopped_str; + break; + case WD_S_RUNNING: + return wd_running_str; + break; + case WD_S_FAILED: + return wd_failed_str; + break; + } + return NULL; +} + +static const char * wd_res_event_to_str(struct cs_fsm* fsm, + int32_t event) +{ + switch (event) { + case WD_E_CONFIG_CHANGED: + return wd_config_changed_str; + break; + case WD_E_FAILURE: + return wd_failure_str; + break; + } + return NULL; +} + +static void wd_fsm_cb (struct cs_fsm *fsm, int cb_event, int32_t curr_state, + int32_t next_state, int32_t fsm_event, void *data) +{ + switch (cb_event) { + case CS_FSM_CB_EVENT_PROCESS_NF: + log_printf (LOGSYS_LEVEL_ERROR, "Fsm:%s could not find event \"%s\" in state \"%s\"", + fsm->name, fsm->event_to_str(fsm, fsm_event), fsm->state_to_str(fsm, curr_state)); + corosync_exit_error(COROSYNC_DONE_FATAL_ERR); + break; + case CS_FSM_CB_EVENT_STATE_SET: + log_printf (LOGSYS_LEVEL_INFO, "Fsm:%s event \"%s\", state \"%s\" --> \"%s\"", + fsm->name, + fsm->event_to_str(fsm, fsm_event), + fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state), + fsm->state_to_str(fsm, next_state)); + break; + case CS_FSM_CB_EVENT_STATE_SET_NF: + log_printf (LOGSYS_LEVEL_CRIT, "Fsm:%s Can't change state from \"%s\" to \"%s\" (event was \"%s\")", + fsm->name, + fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state), + fsm->state_to_str(fsm, next_state), + fsm->event_to_str(fsm, fsm_event)); + corosync_exit_error(COROSYNC_DONE_FATAL_ERR); + break; + default: + log_printf (LOGSYS_LEVEL_CRIT, "Fsm: Unknown callback event!"); + corosync_exit_error(COROSYNC_DONE_FATAL_ERR); + break; + } +} + +/* + * returns (CS_TRUE == OK, CS_FALSE == failed) + */ +static int32_t wd_resource_state_is_ok (struct resource *ref) +{ + char* state = NULL; + uint64_t last_updated; + uint64_t my_time; + uint64_t allowed_period; + char key_name[ICMAP_KEYNAME_MAXLEN]; + + if ((snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "last_updated") >= ICMAP_KEYNAME_MAXLEN) || + (icmap_get_uint64(key_name, &last_updated) != CS_OK)) { + /* key does not exist. + */ + return CS_FALSE; + } + + if ((snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "state") >= ICMAP_KEYNAME_MAXLEN) || + (icmap_get_string(key_name, &state) != CS_OK || strcmp(state, "disabled") == 0)) { + /* key does not exist. + */ + if (state != NULL) + free(state); + + return CS_FALSE; + } + + if (last_updated == 0) { + /* initial value */ + free(state); + return CS_TRUE; + } + + my_time = cs_timestamp_get(); + + /* + * Here we check that the monitor has written a timestamp within the poll_period + * plus a grace factor of (0.5 * poll_period). + */ + allowed_period = (ref->check_timeout * MILLI_2_NANO_SECONDS * 3) / 2; + if ((last_updated + allowed_period) < my_time) { + log_printf (LOGSYS_LEVEL_ERROR, + "last_updated %"PRIu64" ms too late, period:%"PRIu64".", + (uint64_t)(my_time/MILLI_2_NANO_SECONDS - ((last_updated + allowed_period) / MILLI_2_NANO_SECONDS)), + ref->check_timeout); + free(state); + return CS_FALSE; + } + + if (strcmp (state, wd_failed_str) == 0) { + free(state); + return CS_FALSE; + } + + free(state); + return CS_TRUE; +} + +static void wd_config_changed (struct cs_fsm* fsm, int32_t event, void * data) +{ + char *state; + uint64_t tmp_value; + uint64_t next_timeout; + struct resource *ref = (struct resource*)data; + char key_name[ICMAP_KEYNAME_MAXLEN]; + + next_timeout = ref->check_timeout; + + if ((snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "poll_period") >= ICMAP_KEYNAME_MAXLEN) || + (icmap_get_uint64(ref->res_path, &tmp_value) == CS_OK)) { + if (tmp_value >= WD_MIN_TIMEOUT_MS && tmp_value <= WD_MAX_TIMEOUT_MS) { + log_printf (LOGSYS_LEVEL_DEBUG, + "poll_period changing from:%"PRIu64" to %"PRIu64".", + ref->check_timeout, tmp_value); + /* + * To easy in the transition between poll_period's we are going + * to make the first timeout the bigger of the new and old value. + * This is to give the monitoring system time to adjust. + */ + next_timeout = CS_MAX(tmp_value, ref->check_timeout); + ref->check_timeout = tmp_value; + } else { + log_printf (LOGSYS_LEVEL_WARNING, + "Could NOT use poll_period:%"PRIu64" ms for resource %s", + tmp_value, ref->name); + } + } + + if ((snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "recovery") >= ICMAP_KEYNAME_MAXLEN) || + (icmap_get_string(key_name, &ref->recovery) != CS_OK)) { + /* key does not exist. + */ + log_printf (LOGSYS_LEVEL_WARNING, + "resource %s missing a recovery key.", ref->name); + cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref, wd_fsm_cb); + return; + } + if ((snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "state") >= ICMAP_KEYNAME_MAXLEN) || + (icmap_get_string(key_name, &state) != CS_OK)) { + /* key does not exist. + */ + log_printf (LOGSYS_LEVEL_WARNING, + "resource %s missing a state key.", ref->name); + cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref, wd_fsm_cb); + return; + } + if (ref->check_timer) { + api->timer_delete(ref->check_timer); + ref->check_timer = 0; + } + + if (strcmp(wd_stopped_str, state) == 0) { + cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref, wd_fsm_cb); + } else { + api->timer_add_duration(next_timeout * MILLI_2_NANO_SECONDS, + ref, wd_resource_check_fn, &ref->check_timer); + cs_fsm_state_set(&ref->fsm, WD_S_RUNNING, ref, wd_fsm_cb); + } + free(state); +} + +static void wd_resource_failed (struct cs_fsm* fsm, int32_t event, void * data) +{ + struct resource* ref = (struct resource*)data; + + if (ref->check_timer) { + api->timer_delete(ref->check_timer); + ref->check_timer = 0; + } + + log_printf (LOGSYS_LEVEL_CRIT, "%s resource \"%s\" failed!", + ref->recovery, (char*)ref->name); + if (strcmp (ref->recovery, "watchdog") == 0 || + strcmp (ref->recovery, "quit") == 0) { + watchdog_ok = 0; + } + else if (strcmp (ref->recovery, "reboot") == 0) { + reboot(RB_AUTOBOOT); + } + else if (strcmp (ref->recovery, "shutdown") == 0) { + reboot(RB_POWER_OFF); + } + cs_fsm_state_set(fsm, WD_S_FAILED, data, wd_fsm_cb); +} + +static void wd_key_changed( + int32_t event, + const char *key_name, + struct icmap_notify_value new_val, + struct icmap_notify_value old_val, + void *user_data) +{ + struct resource* ref = (struct resource*)user_data; + char *last_key_part; + + if (ref == NULL) { + return ; + } + + last_key_part = strrchr(key_name, '.'); + if (last_key_part == NULL) { + return ; + } + last_key_part++; + + if (event == ICMAP_TRACK_ADD || event == ICMAP_TRACK_MODIFY) { + if (strcmp(last_key_part, "last_updated") == 0 || + strcmp(last_key_part, "current") == 0) { + return; + } + + cs_fsm_process(&ref->fsm, WD_E_CONFIG_CHANGED, ref, wd_fsm_cb); + } + + if (event == ICMAP_TRACK_DELETE && ref != NULL) { + if (strcmp(last_key_part, "state") != 0) { + return ; + } + + log_printf (LOGSYS_LEVEL_WARNING, + "resource \"%s\" deleted from cmap!", + ref->name); + + api->timer_delete(ref->check_timer); + ref->check_timer = 0; + icmap_track_delete(ref->icmap_track); + + free(ref); + } +} + +static void wd_resource_check_fn (void* resource_ref) +{ + struct resource* ref = (struct resource*)resource_ref; + + if (wd_resource_state_is_ok (ref) == CS_FALSE) { + cs_fsm_process(&ref->fsm, WD_E_FAILURE, ref, wd_fsm_cb); + return; + } + api->timer_add_duration(ref->check_timeout*MILLI_2_NANO_SECONDS, + ref, wd_resource_check_fn, &ref->check_timer); +} + +/* + * return 0 - fully configured + * return -1 - partially configured + */ +static int32_t wd_resource_create (char *res_path, char *res_name) +{ + char *state; + uint64_t tmp_value; + struct resource *ref = calloc (1, sizeof (struct resource)); + char key_name[ICMAP_KEYNAME_MAXLEN]; + + strcpy(ref->res_path, res_path); + ref->check_timeout = WD_DEFAULT_TIMEOUT_MS; + ref->check_timer = 0; + + strcpy(ref->name, res_name); + ref->fsm.name = ref->name; + ref->fsm.table = wd_fsm_table; + ref->fsm.entries = sizeof(wd_fsm_table) / sizeof(struct cs_fsm_entry); + ref->fsm.curr_entry = 0; + ref->fsm.curr_state = WD_S_STOPPED; + ref->fsm.state_to_str = wd_res_state_to_str; + ref->fsm.event_to_str = wd_res_event_to_str; + + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "poll_period"); + if (icmap_get_uint64(key_name, &tmp_value) != CS_OK) { + icmap_set_uint64(key_name, ref->check_timeout); + } else { + if (tmp_value >= WD_MIN_TIMEOUT_MS && tmp_value <= WD_MAX_TIMEOUT_MS) { + ref->check_timeout = tmp_value; + } else { + log_printf (LOGSYS_LEVEL_WARNING, + "Could NOT use poll_period:%"PRIu64" ms for resource %s", + tmp_value, ref->name); + } + } + + icmap_track_add(res_path, + ICMAP_TRACK_ADD | ICMAP_TRACK_MODIFY | ICMAP_TRACK_DELETE | ICMAP_TRACK_PREFIX, + wd_key_changed, + ref, &ref->icmap_track); + + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "recovery"); + if (icmap_get_string(key_name, &ref->recovery) != CS_OK) { + /* key does not exist. + */ + log_printf (LOGSYS_LEVEL_WARNING, + "resource %s missing a recovery key.", ref->name); + return -1; + } + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "state"); + if (icmap_get_string(key_name, &state) != CS_OK) { + /* key does not exist. + */ + log_printf (LOGSYS_LEVEL_WARNING, + "resource %s missing a state key.", ref->name); + return -1; + } + + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "last_updated"); + if (icmap_get_uint64(key_name, &tmp_value) != CS_OK) { + /* key does not exist. + */ + ref->last_updated = 0; + } else { + ref->last_updated = tmp_value; + } + + /* + * delay the first check to give the monitor time to start working. + */ + tmp_value = CS_MAX(ref->check_timeout * 2, WD_DEFAULT_TIMEOUT_MS); + api->timer_add_duration(tmp_value * MILLI_2_NANO_SECONDS, + ref, + wd_resource_check_fn, &ref->check_timer); + + cs_fsm_state_set(&ref->fsm, WD_S_RUNNING, ref, wd_fsm_cb); + return 0; +} + + +static void wd_tickle_fn (void* arg) +{ + ENTER(); + + if (watchdog_ok) { + if (dog > 0) { + ioctl(dog, WDIOC_KEEPALIVE, &watchdog_ok); + } + api->timer_add_duration(tickle_timeout*MILLI_2_NANO_SECONDS, NULL, + wd_tickle_fn, &wd_timer); + } + else { + log_printf (LOGSYS_LEVEL_ALERT, "NOT tickling the watchdog!"); + } + +} + +static void wd_resource_created_cb( + int32_t event, + const char *key_name, + struct icmap_notify_value new_val, + struct icmap_notify_value old_val, + void *user_data) +{ + char res_name[ICMAP_KEYNAME_MAXLEN]; + char res_type[ICMAP_KEYNAME_MAXLEN]; + char tmp_key[ICMAP_KEYNAME_MAXLEN]; + int res; + + if (event != ICMAP_TRACK_ADD) { + return ; + } + + res = sscanf(key_name, "resources.%[^.].%[^.].%[^.]", res_type, res_name, tmp_key); + if (res != 3) { + return ; + } + + if (strcmp(tmp_key, "state") != 0) { + return ; + } + + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "resources.%s.%s.", res_type, res_name); + wd_resource_create (tmp_key, res_name); +} + +static void wd_scan_resources (void) +{ + int res_count = 0; + icmap_track_t icmap_track = NULL; + icmap_iter_t iter; + const char *key_name; + int res; + char res_name[ICMAP_KEYNAME_MAXLEN]; + char res_type[ICMAP_KEYNAME_MAXLEN]; + char tmp_key[ICMAP_KEYNAME_MAXLEN]; + + ENTER(); + + iter = icmap_iter_init("resources."); + while ((key_name = icmap_iter_next(iter, NULL, NULL)) != NULL) { + res = sscanf(key_name, "resources.%[^.].%[^.].%[^.]", res_type, res_name, tmp_key); + if (res != 3) { + continue ; + } + + if (strcmp(tmp_key, "state") != 0) { + continue ; + } + + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "resources.%s.%s.", res_type, res_name); + if (wd_resource_create (tmp_key, res_name) == 0) { + res_count++; + } + } + icmap_iter_finalize(iter); + + icmap_track_add("resources.process.", ICMAP_TRACK_ADD | ICMAP_TRACK_PREFIX, + wd_resource_created_cb, NULL, &icmap_track); + icmap_track_add("resources.system.", ICMAP_TRACK_ADD | ICMAP_TRACK_PREFIX, + wd_resource_created_cb, NULL, &icmap_track); + + if (res_count == 0) { + log_printf (LOGSYS_LEVEL_INFO, "no resources configured."); + } +} + + +static void watchdog_timeout_apply (uint32_t new) +{ + struct watchdog_info ident; + uint32_t original_timeout = 0; + + if (dog > 0) { + ioctl(dog, WDIOC_GETTIMEOUT, &original_timeout); + } + + if (new == original_timeout) { + return; + } + + watchdog_timeout = new; + + if (dog > 0) { + ioctl(dog, WDIOC_GETSUPPORT, &ident); + if (ident.options & WDIOF_SETTIMEOUT) { + /* yay! the dog is trained. + */ + ioctl(dog, WDIOC_SETTIMEOUT, &watchdog_timeout); + } + ioctl(dog, WDIOC_GETTIMEOUT, &watchdog_timeout); + } + + if (watchdog_timeout == new) { + tickle_timeout = (watchdog_timeout * CS_TIME_MS_IN_SEC)/ 2; + + /* reset the tickle timer in case it was reduced. + */ + api->timer_delete (wd_timer); + api->timer_add_duration(tickle_timeout*MILLI_2_NANO_SECONDS, NULL, + wd_tickle_fn, &wd_timer); + + log_printf (LOGSYS_LEVEL_DEBUG, "The Watchdog timeout is %d seconds", watchdog_timeout); + log_printf (LOGSYS_LEVEL_DEBUG, "The tickle timeout is %"PRIu64" ms", tickle_timeout); + } else { + log_printf (LOGSYS_LEVEL_WARNING, + "Could not change the Watchdog timeout from %d to %d seconds", + original_timeout, new); + } + +} + +static int setup_watchdog(void) +{ + struct watchdog_info ident; + char *str; + + ENTER(); + + if (icmap_get_string("resources.watchdog_device", &str) == CS_OK) { + if (str[0] == 0 || strcmp (str, "off") == 0) { + log_printf (LOGSYS_LEVEL_WARNING, "Watchdog disabled by configuration"); + free(str); + dog = -1; + return -1; + } else { + watchdog_device = str; + } + } else { + log_printf (LOGSYS_LEVEL_WARNING, "Watchdog not enabled by configuration"); + dog = -1; + return -1; + } + + if (access (watchdog_device, W_OK) != 0) { + log_printf (LOGSYS_LEVEL_WARNING, "No watchdog %s, try modprobe <a watchdog>", watchdog_device); + dog = -1; + return -1; + } + + /* here goes, lets hope they have "Magic Close" + */ + dog = open(watchdog_device, O_WRONLY); + + if (dog == -1) { + log_printf (LOGSYS_LEVEL_WARNING, "Watchdog %s exists but couldn't be opened.", watchdog_device); + dog = -1; + return -1; + } + + /* Right we have the dog. + * Lets see what breed it is. + */ + + ioctl(dog, WDIOC_GETSUPPORT, &ident); + log_printf (LOGSYS_LEVEL_INFO, "Watchdog %s is now being tickled by corosync.", watchdog_device); + log_printf (LOGSYS_LEVEL_DEBUG, "%s", ident.identity); + + watchdog_timeout_apply (watchdog_timeout); + + ioctl(dog, WDIOC_SETOPTIONS, WDIOS_ENABLECARD); + + return 0; +} + +static void wd_top_level_key_changed( + int32_t event, + const char *key_name, + struct icmap_notify_value new_val, + struct icmap_notify_value old_val, + void *user_data) +{ + uint32_t tmp_value_32; + + ENTER(); + + if (icmap_get_uint32("resources.watchdog_timeout", &tmp_value_32) == CS_OK) { + if (tmp_value_32 >= 2 && tmp_value_32 <= 120) { + watchdog_timeout_apply (tmp_value_32); + return; + } + } + + log_printf (LOGSYS_LEVEL_WARNING, + "Set watchdog_timeout is out of range (2..120)."); + icmap_set_uint32("resources.watchdog_timeout", watchdog_timeout); +} + +static void watchdog_timeout_get_initial (void) +{ + uint32_t tmp_value_32; + icmap_track_t icmap_track = NULL; + + ENTER(); + + if (icmap_get_uint32("resources.watchdog_timeout", &tmp_value_32) != CS_OK) { + watchdog_timeout_apply (WD_DEFAULT_TIMEOUT_SEC); + + icmap_set_uint32("resources.watchdog_timeout", watchdog_timeout); + } + else { + if (tmp_value_32 >= 2 && tmp_value_32 <= 120) { + watchdog_timeout_apply (tmp_value_32); + } + else { + log_printf (LOGSYS_LEVEL_WARNING, + "Set watchdog_timeout is out of range (2..120)."); + log_printf (LOGSYS_LEVEL_INFO, + "use default value %d seconds.", WD_DEFAULT_TIMEOUT_SEC); + watchdog_timeout_apply (WD_DEFAULT_TIMEOUT_SEC); + icmap_set_uint32("resources.watchdog_timeout", watchdog_timeout); + } + } + + icmap_track_add("resources.watchdog_timeout", ICMAP_TRACK_MODIFY, + wd_top_level_key_changed, NULL, &icmap_track); + +} + +static char *wd_exec_init_fn (struct corosync_api_v1 *corosync_api) +{ + + ENTER(); + + api = corosync_api; + + watchdog_timeout_get_initial(); + + setup_watchdog(); + + wd_scan_resources(); + + return NULL; +} + +static int wd_exec_exit_fn (void) +{ + char magic = 'V'; + ENTER(); + + if (dog > 0) { + log_printf (LOGSYS_LEVEL_INFO, "magically closing the watchdog."); + if (write (dog, &magic, 1) == -1) { + log_printf (LOGSYS_LEVEL_ERROR, "failed to write %c to dog(%d).", magic, dog); + } + } + return 0; +} + + |