summaryrefslogtreecommitdiffstats
path: root/ctdb
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 17:20:00 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 17:20:00 +0000
commit8daa83a594a2e98f39d764422bfbdbc62c9efd44 (patch)
tree4099e8021376c7d8c05bdf8503093d80e9c7bad0 /ctdb
parentInitial commit. (diff)
downloadsamba-8daa83a594a2e98f39d764422bfbdbc62c9efd44.tar.xz
samba-8daa83a594a2e98f39d764422bfbdbc62c9efd44.zip
Adding upstream version 2:4.20.0+dfsg.upstream/2%4.20.0+dfsgupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'ctdb')
-rw-r--r--ctdb/.bzrignore24
-rw-r--r--ctdb/.gitignore32
-rw-r--r--ctdb/COPYING674
-rw-r--r--ctdb/Makefile80
-rw-r--r--ctdb/README8
-rw-r--r--ctdb/client/client.h1416
-rw-r--r--ctdb/client/client_call.c184
-rw-r--r--ctdb/client/client_connect.c532
-rw-r--r--ctdb/client/client_control.c439
-rw-r--r--ctdb/client/client_control_sync.c2676
-rw-r--r--ctdb/client/client_db.c2791
-rw-r--r--ctdb/client/client_event.c444
-rw-r--r--ctdb/client/client_message.c607
-rw-r--r--ctdb/client/client_message_sync.c176
-rw-r--r--ctdb/client/client_private.h99
-rw-r--r--ctdb/client/client_sync.h521
-rw-r--r--ctdb/client/client_tunnel.c693
-rw-r--r--ctdb/client/client_util.c137
-rw-r--r--ctdb/cluster/cluster_conf.c182
-rw-r--r--ctdb/cluster/cluster_conf.h36
-rw-r--r--ctdb/common/cmdline.c598
-rw-r--r--ctdb/common/cmdline.h163
-rw-r--r--ctdb/common/comm.c427
-rw-r--r--ctdb/common/comm.h101
-rw-r--r--ctdb/common/common.h160
-rw-r--r--ctdb/common/conf.c1391
-rw-r--r--ctdb/common/conf.h473
-rw-r--r--ctdb/common/conf_tool.c321
-rw-r--r--ctdb/common/conf_tool.h39
-rw-r--r--ctdb/common/ctdb_io.c498
-rw-r--r--ctdb/common/ctdb_ltdb.c430
-rw-r--r--ctdb/common/ctdb_util.c681
-rw-r--r--ctdb/common/db_hash.c295
-rw-r--r--ctdb/common/db_hash.h174
-rw-r--r--ctdb/common/event_script.c247
-rw-r--r--ctdb/common/event_script.h72
-rw-r--r--ctdb/common/hash_count.c219
-rw-r--r--ctdb/common/hash_count.h94
-rw-r--r--ctdb/common/line.c145
-rw-r--r--ctdb/common/line.h62
-rw-r--r--ctdb/common/logging.c745
-rw-r--r--ctdb/common/logging.h51
-rw-r--r--ctdb/common/logging_conf.c127
-rw-r--r--ctdb/common/logging_conf.h36
-rw-r--r--ctdb/common/path.c211
-rw-r--r--ctdb/common/path.h39
-rw-r--r--ctdb/common/path_tool.c384
-rw-r--r--ctdb/common/path_tool.h38
-rw-r--r--ctdb/common/pidfile.c85
-rw-r--r--ctdb/common/pidfile.h51
-rw-r--r--ctdb/common/pkt_read.c190
-rw-r--r--ctdb/common/pkt_read.h98
-rw-r--r--ctdb/common/pkt_write.c101
-rw-r--r--ctdb/common/pkt_write.h79
-rw-r--r--ctdb/common/rb_tree.c1101
-rw-r--r--ctdb/common/rb_tree.h90
-rw-r--r--ctdb/common/reqid.c89
-rw-r--r--ctdb/common/reqid.h89
-rw-r--r--ctdb/common/run_event.c829
-rw-r--r--ctdb/common/run_event.h150
-rw-r--r--ctdb/common/run_proc.c503
-rw-r--r--ctdb/common/run_proc.h100
-rw-r--r--ctdb/common/sock_client.c334
-rw-r--r--ctdb/common/sock_client.h129
-rw-r--r--ctdb/common/sock_daemon.c1100
-rw-r--r--ctdb/common/sock_daemon.h283
-rw-r--r--ctdb/common/sock_io.c328
-rw-r--r--ctdb/common/sock_io.h39
-rw-r--r--ctdb/common/srvid.c280
-rw-r--r--ctdb/common/srvid.h121
-rw-r--r--ctdb/common/system.c237
-rw-r--r--ctdb/common/system.h37
-rw-r--r--ctdb/common/system_socket.c1168
-rw-r--r--ctdb/common/system_socket.h46
-rw-r--r--ctdb/common/tmon.c602
-rw-r--r--ctdb/common/tmon.h218
-rw-r--r--ctdb/common/tunable.c401
-rw-r--r--ctdb/common/tunable.h35
-rw-r--r--ctdb/config/README31
-rwxr-xr-xctdb/config/ctdb-crash-cleanup.sh27
-rw-r--r--ctdb/config/ctdb.conf22
-rwxr-xr-xctdb/config/ctdb.init161
-rw-r--r--ctdb/config/ctdb.sudoers3
-rw-r--r--ctdb/config/ctdb.sysconfig11
-rw-r--r--ctdb/config/ctdb.tunables2
-rwxr-xr-xctdb/config/debug-hung-script.sh61
-rwxr-xr-xctdb/config/debug_locks.sh218
-rw-r--r--ctdb/config/events/README193
-rwxr-xr-xctdb/config/events/legacy/00.ctdb.script130
-rwxr-xr-xctdb/config/events/legacy/01.reclock.script34
-rwxr-xr-xctdb/config/events/legacy/05.system.script198
-rwxr-xr-xctdb/config/events/legacy/06.nfs.script39
-rwxr-xr-xctdb/config/events/legacy/10.interface.script262
-rwxr-xr-xctdb/config/events/legacy/11.natgw.script242
-rwxr-xr-xctdb/config/events/legacy/11.routing.script49
-rwxr-xr-xctdb/config/events/legacy/13.per_ip_routing.script438
-rwxr-xr-xctdb/config/events/legacy/20.multipathd.script83
-rwxr-xr-xctdb/config/events/legacy/31.clamd.script37
-rwxr-xr-xctdb/config/events/legacy/40.vsftpd.script57
-rwxr-xr-xctdb/config/events/legacy/41.httpd.script78
-rwxr-xr-xctdb/config/events/legacy/47.samba-dcerpcd.script66
-rwxr-xr-xctdb/config/events/legacy/48.netbios.script75
-rwxr-xr-xctdb/config/events/legacy/49.winbind.script55
-rwxr-xr-xctdb/config/events/legacy/50.samba.script166
-rwxr-xr-xctdb/config/events/legacy/60.nfs.script301
-rwxr-xr-xctdb/config/events/legacy/70.iscsi.script87
-rwxr-xr-xctdb/config/events/legacy/91.lvs.script124
-rwxr-xr-xctdb/config/functions1172
-rw-r--r--ctdb/config/nfs-checks.d/00.portmapper.check2
-rw-r--r--ctdb/config/nfs-checks.d/10.status.check7
-rw-r--r--ctdb/config/nfs-checks.d/20.nfs.check7
-rw-r--r--ctdb/config/nfs-checks.d/30.nlockmgr.check6
-rw-r--r--ctdb/config/nfs-checks.d/40.mountd.check7
-rw-r--r--ctdb/config/nfs-checks.d/50.rquotad.check7
-rw-r--r--ctdb/config/nfs-checks.d/README31
-rwxr-xr-xctdb/config/nfs-linux-kernel-callout441
-rwxr-xr-xctdb/config/notification.README36
-rwxr-xr-xctdb/config/notify.sh19
-rw-r--r--ctdb/config/script.options16
-rwxr-xr-xctdb/config/statd-callout254
-rwxr-xr-xctdb/configure22
-rwxr-xr-xctdb/configure.rpm20
-rw-r--r--ctdb/database/database_conf.c165
-rw-r--r--ctdb/database/database_conf.h35
-rw-r--r--ctdb/doc/cluster_mutex_helper.txt80
-rw-r--r--ctdb/doc/ctdb-etcd.7.xml119
-rw-r--r--ctdb/doc/ctdb-script.options.5.xml1137
-rw-r--r--ctdb/doc/ctdb-statistics.7.xml689
-rw-r--r--ctdb/doc/ctdb-tunables.7.xml783
-rw-r--r--ctdb/doc/ctdb.1.xml1863
-rw-r--r--ctdb/doc/ctdb.7.xml1182
-rw-r--r--ctdb/doc/ctdb.conf.5.xml652
-rw-r--r--ctdb/doc/ctdb.sysconfig.5.xml240
-rw-r--r--ctdb/doc/ctdb_diagnostics.1.xml128
-rw-r--r--ctdb/doc/ctdb_mutex_ceph_rados_helper.7.xml96
-rw-r--r--ctdb/doc/ctdbd.1.xml129
-rw-r--r--ctdb/doc/examples/11.natgw.options25
-rw-r--r--ctdb/doc/examples/20.nfs_ganesha.check8
-rw-r--r--ctdb/doc/examples/91.lvs.options12
-rw-r--r--ctdb/doc/examples/README15
-rwxr-xr-xctdb/doc/examples/config_migrate.sh741
-rw-r--r--ctdb/doc/examples/config_migrate.test_input50
-rw-r--r--ctdb/doc/examples/ctdb.conf73
-rw-r--r--ctdb/doc/examples/ctdb.spec.in291
-rwxr-xr-xctdb/doc/examples/nfs-ganesha-callout352
-rw-r--r--ctdb/doc/ltdbtool.1.xml300
-rw-r--r--ctdb/doc/onnode.1.xml315
-rw-r--r--ctdb/doc/ping_pong.1.xml164
-rw-r--r--ctdb/doc/readonlyrecords.txt343
-rw-r--r--ctdb/event/event.h55
-rw-r--r--ctdb/event/event_client.c351
-rw-r--r--ctdb/event/event_cmd.c358
-rw-r--r--ctdb/event/event_conf.c85
-rw-r--r--ctdb/event/event_conf.h31
-rw-r--r--ctdb/event/event_config.c122
-rw-r--r--ctdb/event/event_context.c472
-rw-r--r--ctdb/event/event_daemon.c382
-rw-r--r--ctdb/event/event_private.h103
-rw-r--r--ctdb/event/event_protocol.c1123
-rw-r--r--ctdb/event/event_protocol.h100
-rw-r--r--ctdb/event/event_protocol_api.h61
-rw-r--r--ctdb/event/event_protocol_test.c412
-rw-r--r--ctdb/event/event_protocol_util.c46
-rw-r--r--ctdb/event/event_request.c217
-rw-r--r--ctdb/event/event_tool.c846
-rw-r--r--ctdb/event/event_tool.h38
-rw-r--r--ctdb/failover/failover_conf.c53
-rw-r--r--ctdb/failover/failover_conf.h31
-rw-r--r--ctdb/ib/README.txt10
-rw-r--r--ctdb/ib/ibw_ctdb.c185
-rw-r--r--ctdb/ib/ibw_ctdb.h51
-rw-r--r--ctdb/ib/ibw_ctdb_init.c255
-rw-r--r--ctdb/ib/ibwrapper.c1361
-rw-r--r--ctdb/ib/ibwrapper.h218
-rw-r--r--ctdb/ib/ibwrapper_internal.h126
-rw-r--r--ctdb/ib/ibwrapper_test.c659
l---------ctdb/include/common/srvid.h1
-rw-r--r--ctdb/include/ctdb_client.h289
-rw-r--r--ctdb/include/ctdb_private.h1040
-rw-r--r--ctdb/include/ctdb_protocol.h301
-rw-r--r--ctdb/include/public/README.txt6
-rw-r--r--ctdb/include/public/util/README.txt6
-rw-r--r--ctdb/protocol/protocol.h1079
-rw-r--r--ctdb/protocol/protocol_api.h682
-rw-r--r--ctdb/protocol/protocol_basic.c400
-rw-r--r--ctdb/protocol/protocol_basic.h86
-rw-r--r--ctdb/protocol/protocol_call.c581
-rw-r--r--ctdb/protocol/protocol_client.c2352
-rw-r--r--ctdb/protocol/protocol_control.c2036
-rw-r--r--ctdb/protocol/protocol_debug.c746
-rw-r--r--ctdb/protocol/protocol_header.c169
-rw-r--r--ctdb/protocol/protocol_keepalive.c95
-rw-r--r--ctdb/protocol/protocol_message.c485
-rw-r--r--ctdb/protocol/protocol_packet.c48
-rw-r--r--ctdb/protocol/protocol_private.h300
-rw-r--r--ctdb/protocol/protocol_sock.c81
-rw-r--r--ctdb/protocol/protocol_tunnel.c114
-rw-r--r--ctdb/protocol/protocol_types.c5348
-rw-r--r--ctdb/protocol/protocol_util.c773
-rw-r--r--ctdb/protocol/protocol_util.h83
-rw-r--r--ctdb/server/ctdb_banning.c146
-rw-r--r--ctdb/server/ctdb_call.c2086
-rw-r--r--ctdb/server/ctdb_client.c1709
-rw-r--r--ctdb/server/ctdb_cluster_mutex.c382
-rw-r--r--ctdb/server/ctdb_cluster_mutex.h51
-rw-r--r--ctdb/server/ctdb_config.c183
-rw-r--r--ctdb/server/ctdb_config.h59
-rw-r--r--ctdb/server/ctdb_control.c1097
-rw-r--r--ctdb/server/ctdb_daemon.c2248
-rw-r--r--ctdb/server/ctdb_fork.c216
-rw-r--r--ctdb/server/ctdb_freeze.c923
-rw-r--r--ctdb/server/ctdb_keepalive.c234
-rw-r--r--ctdb/server/ctdb_lock.c996
-rw-r--r--ctdb/server/ctdb_lock_helper.c350
-rw-r--r--ctdb/server/ctdb_logging.c174
-rw-r--r--ctdb/server/ctdb_ltdb_server.c1663
-rw-r--r--ctdb/server/ctdb_monitor.c509
-rw-r--r--ctdb/server/ctdb_mutex_fcntl_helper.c795
-rw-r--r--ctdb/server/ctdb_persistent.c397
-rw-r--r--ctdb/server/ctdb_recover.c1243
-rw-r--r--ctdb/server/ctdb_recoverd.c3286
-rw-r--r--ctdb/server/ctdb_recovery_helper.c3200
-rw-r--r--ctdb/server/ctdb_server.c608
-rw-r--r--ctdb/server/ctdb_statistics.c93
-rw-r--r--ctdb/server/ctdb_takeover.c2751
-rw-r--r--ctdb/server/ctdb_takeover_helper.c1276
-rw-r--r--ctdb/server/ctdb_traverse.c781
-rw-r--r--ctdb/server/ctdb_tunables.c170
-rw-r--r--ctdb/server/ctdb_tunnel.c141
-rw-r--r--ctdb/server/ctdb_update_record.c372
-rw-r--r--ctdb/server/ctdb_uptime.c55
-rw-r--r--ctdb/server/ctdb_vacuum.c1990
-rw-r--r--ctdb/server/ctdbd.c407
-rw-r--r--ctdb/server/eventscript.c845
-rw-r--r--ctdb/server/ipalloc.c284
-rw-r--r--ctdb/server/ipalloc.h67
-rw-r--r--ctdb/server/ipalloc_common.c192
-rw-r--r--ctdb/server/ipalloc_deterministic.c191
-rw-r--r--ctdb/server/ipalloc_lcp2.c525
-rw-r--r--ctdb/server/ipalloc_nondeterministic.c150
-rw-r--r--ctdb/server/ipalloc_private.h57
-rw-r--r--ctdb/server/legacy_conf.c80
-rw-r--r--ctdb/server/legacy_conf.h35
-rw-r--r--ctdb/tcp/ctdb_tcp.h56
-rw-r--r--ctdb/tcp/tcp_connect.c599
-rw-r--r--ctdb/tcp/tcp_init.c215
-rw-r--r--ctdb/tcp/tcp_io.c96
-rwxr-xr-xctdb/tests/CLUSTER/complex/11_ctdb_delip_removes_ip.sh36
-rwxr-xr-xctdb/tests/CLUSTER/complex/18_ctdb_reloadips.sh257
-rwxr-xr-xctdb/tests/CLUSTER/complex/30_nfs_tickle_killtcp.sh57
-rwxr-xr-xctdb/tests/CLUSTER/complex/31_nfs_tickle.sh77
-rwxr-xr-xctdb/tests/CLUSTER/complex/32_cifs_tickle.sh69
-rwxr-xr-xctdb/tests/CLUSTER/complex/33_gratuitous_arp.sh74
-rwxr-xr-xctdb/tests/CLUSTER/complex/34_nfs_tickle_restart.sh81
-rwxr-xr-xctdb/tests/CLUSTER/complex/36_smb_reset_server.sh78
-rwxr-xr-xctdb/tests/CLUSTER/complex/37_nfs_reset_server.sh78
-rwxr-xr-xctdb/tests/CLUSTER/complex/41_failover_ping_discrete.sh56
-rwxr-xr-xctdb/tests/CLUSTER/complex/42_failover_ssh_hostname.sh70
-rwxr-xr-xctdb/tests/CLUSTER/complex/43_failover_nfs_basic.sh62
-rwxr-xr-xctdb/tests/CLUSTER/complex/44_failover_nfs_oneway.sh82
-rwxr-xr-xctdb/tests/CLUSTER/complex/45_failover_nfs_kill.sh69
-rwxr-xr-xctdb/tests/CLUSTER/complex/60_rogueip_releaseip.sh56
-rwxr-xr-xctdb/tests/CLUSTER/complex/61_rogueip_takeip.sh46
-rw-r--r--ctdb/tests/CLUSTER/complex/README2
-rw-r--r--ctdb/tests/CLUSTER/complex/scripts/local.bash289
-rwxr-xr-xctdb/tests/INTEGRATION/database/basics.001.attach.sh48
-rwxr-xr-xctdb/tests/INTEGRATION/database/basics.002.attach.sh116
-rwxr-xr-xctdb/tests/INTEGRATION/database/basics.003.detach.sh166
-rwxr-xr-xctdb/tests/INTEGRATION/database/basics.004.wipe.sh56
-rwxr-xr-xctdb/tests/INTEGRATION/database/basics.010.backup_restore.sh97
-rwxr-xr-xctdb/tests/INTEGRATION/database/fetch.001.ring.sh34
-rwxr-xr-xctdb/tests/INTEGRATION/database/fetch.002.ring-hotkeys.sh161
-rwxr-xr-xctdb/tests/INTEGRATION/database/readonly.001.basic.sh178
-rwxr-xr-xctdb/tests/INTEGRATION/database/recovery.001.volatile.sh118
-rwxr-xr-xctdb/tests/INTEGRATION/database/recovery.002.large.sh106
-rwxr-xr-xctdb/tests/INTEGRATION/database/recovery.003.no_resurrect.sh63
-rwxr-xr-xctdb/tests/INTEGRATION/database/recovery.010.persistent.sh103
-rwxr-xr-xctdb/tests/INTEGRATION/database/recovery.011.continue.sh73
-rw-r--r--ctdb/tests/INTEGRATION/database/scripts/local.bash116
-rwxr-xr-xctdb/tests/INTEGRATION/database/transaction.001.ptrans.sh110
-rwxr-xr-xctdb/tests/INTEGRATION/database/transaction.002.loop.sh28
-rwxr-xr-xctdb/tests/INTEGRATION/database/transaction.003.loop_recovery.sh50
-rwxr-xr-xctdb/tests/INTEGRATION/database/transaction.004.update_record.sh80
-rwxr-xr-xctdb/tests/INTEGRATION/database/transaction.010.loop_recovery.sh51
-rwxr-xr-xctdb/tests/INTEGRATION/database/traverse.001.one.sh116
-rwxr-xr-xctdb/tests/INTEGRATION/database/traverse.002.many.sh52
-rwxr-xr-xctdb/tests/INTEGRATION/database/vacuum.001.fast.sh159
-rwxr-xr-xctdb/tests/INTEGRATION/database/vacuum.002.full.sh96
-rwxr-xr-xctdb/tests/INTEGRATION/database/vacuum.003.recreate.sh139
-rwxr-xr-xctdb/tests/INTEGRATION/database/vacuum.030.locked.sh102
-rwxr-xr-xctdb/tests/INTEGRATION/database/vacuum.031.locked.sh114
-rwxr-xr-xctdb/tests/INTEGRATION/database/vacuum.032.locked.sh102
-rwxr-xr-xctdb/tests/INTEGRATION/database/vacuum.033.locked.sh117
-rwxr-xr-xctdb/tests/INTEGRATION/database/vacuum.034.locked.sh129
-rwxr-xr-xctdb/tests/INTEGRATION/failover/pubips.001.list.sh48
-rwxr-xr-xctdb/tests/INTEGRATION/failover/pubips.010.addip.sh25
-rwxr-xr-xctdb/tests/INTEGRATION/failover/pubips.011.delip.sh16
-rwxr-xr-xctdb/tests/INTEGRATION/failover/pubips.012.reloadips.sh117
-rwxr-xr-xctdb/tests/INTEGRATION/failover/pubips.013.failover_noop.sh44
-rwxr-xr-xctdb/tests/INTEGRATION/failover/pubips.014.iface_gc.sh51
-rwxr-xr-xctdb/tests/INTEGRATION/failover/pubips.020.moveip.sh76
-rwxr-xr-xctdb/tests/INTEGRATION/failover/pubips.030.disable_enable.sh23
-rwxr-xr-xctdb/tests/INTEGRATION/failover/pubips.032.stop_continue.sh21
-rwxr-xr-xctdb/tests/INTEGRATION/failover/pubips.040.NoIPTakeover.sh71
-rwxr-xr-xctdb/tests/INTEGRATION/failover/pubips.050.missing_ip.sh71
-rw-r--r--ctdb/tests/INTEGRATION/simple/README2
-rwxr-xr-xctdb/tests/INTEGRATION/simple/basics.000.onnode.sh12
-rwxr-xr-xctdb/tests/INTEGRATION/simple/basics.001.listnodes.sh38
-rwxr-xr-xctdb/tests/INTEGRATION/simple/basics.002.tunables.sh67
-rwxr-xr-xctdb/tests/INTEGRATION/simple/basics.003.ping.sh34
-rwxr-xr-xctdb/tests/INTEGRATION/simple/basics.004.getpid.sh55
-rwxr-xr-xctdb/tests/INTEGRATION/simple/basics.005.process_exists.sh66
-rwxr-xr-xctdb/tests/INTEGRATION/simple/basics.010.statistics.sh17
-rwxr-xr-xctdb/tests/INTEGRATION/simple/basics.011.statistics_reset.sh62
-rwxr-xr-xctdb/tests/INTEGRATION/simple/cluster.001.stop_leader_yield.sh26
-rwxr-xr-xctdb/tests/INTEGRATION/simple/cluster.002.ban_leader_yield.sh26
-rwxr-xr-xctdb/tests/INTEGRATION/simple/cluster.003.capability_leader_yield.sh24
-rwxr-xr-xctdb/tests/INTEGRATION/simple/cluster.006.stop_leader_yield_no_lock.sh30
-rwxr-xr-xctdb/tests/INTEGRATION/simple/cluster.007.ban_leader_yield_no_lock.sh30
-rwxr-xr-xctdb/tests/INTEGRATION/simple/cluster.008.capability_leader_yield_no_lock.sh29
-rwxr-xr-xctdb/tests/INTEGRATION/simple/cluster.010.getrelock.sh24
-rwxr-xr-xctdb/tests/INTEGRATION/simple/cluster.012.reclock_command.sh20
-rwxr-xr-xctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh80
-rwxr-xr-xctdb/tests/INTEGRATION/simple/cluster.016.reclock_move_lock_dir.sh92
-rwxr-xr-xctdb/tests/INTEGRATION/simple/cluster.020.message_ring.sh53
-rwxr-xr-xctdb/tests/INTEGRATION/simple/cluster.021.tunnel_ring.sh34
-rwxr-xr-xctdb/tests/INTEGRATION/simple/cluster.030.node_stall_leader_timeout.sh48
-rwxr-xr-xctdb/tests/INTEGRATION/simple/cluster.090.unreachable.sh39
-rwxr-xr-xctdb/tests/INTEGRATION/simple/cluster.091.version_check.sh55
-rwxr-xr-xctdb/tests/INTEGRATION/simple/debug.001.getdebug.sh42
-rwxr-xr-xctdb/tests/INTEGRATION/simple/debug.002.setdebug.sh74
-rwxr-xr-xctdb/tests/INTEGRATION/simple/debug.003.dumpmemory.sh18
-rwxr-xr-xctdb/tests/INTEGRATION/simple/eventscripts.001.zero_scripts.sh16
-rwxr-xr-xctdb/tests/INTEGRATION/simple/eventscripts.090.debug_hung.sh76
-rw-r--r--ctdb/tests/README145
-rw-r--r--ctdb/tests/TODO4
-rwxr-xr-xctdb/tests/UNIT/cunit/cluster_mutex_001.sh66
-rwxr-xr-xctdb/tests/UNIT/cunit/cluster_mutex_002.sh132
-rwxr-xr-xctdb/tests/UNIT/cunit/cluster_mutex_003.sh75
-rwxr-xr-xctdb/tests/UNIT/cunit/cmdline_test_001.sh98
-rwxr-xr-xctdb/tests/UNIT/cunit/comm_test_001.sh13
-rwxr-xr-xctdb/tests/UNIT/cunit/comm_test_002.sh24
-rwxr-xr-xctdb/tests/UNIT/cunit/conf_test_001.sh196
-rwxr-xr-xctdb/tests/UNIT/cunit/config_test_001.sh115
-rwxr-xr-xctdb/tests/UNIT/cunit/config_test_002.sh65
-rwxr-xr-xctdb/tests/UNIT/cunit/config_test_003.sh52
-rwxr-xr-xctdb/tests/UNIT/cunit/config_test_004.sh144
-rwxr-xr-xctdb/tests/UNIT/cunit/config_test_005.sh97
-rwxr-xr-xctdb/tests/UNIT/cunit/config_test_006.sh56
-rwxr-xr-xctdb/tests/UNIT/cunit/config_test_007.sh24
-rwxr-xr-xctdb/tests/UNIT/cunit/ctdb_io_test_001.sh10
-rwxr-xr-xctdb/tests/UNIT/cunit/db_hash_test_001.sh7
-rwxr-xr-xctdb/tests/UNIT/cunit/event_protocol_test_001.sh7
-rwxr-xr-xctdb/tests/UNIT/cunit/event_script_test_001.sh127
-rwxr-xr-xctdb/tests/UNIT/cunit/hash_count_test_001.sh7
-rwxr-xr-xctdb/tests/UNIT/cunit/line_test_001.sh90
-rwxr-xr-xctdb/tests/UNIT/cunit/path_tests_001.sh62
-rwxr-xr-xctdb/tests/UNIT/cunit/pidfile_test_001.sh8
-rwxr-xr-xctdb/tests/UNIT/cunit/pkt_read_001.sh7
-rwxr-xr-xctdb/tests/UNIT/cunit/pkt_write_001.sh7
-rwxr-xr-xctdb/tests/UNIT/cunit/porting_tests_001.sh15
-rwxr-xr-xctdb/tests/UNIT/cunit/protocol_test_001.sh7
-rwxr-xr-xctdb/tests/UNIT/cunit/protocol_test_002.sh7
-rwxr-xr-xctdb/tests/UNIT/cunit/protocol_test_012.sh7
-rwxr-xr-xctdb/tests/UNIT/cunit/protocol_test_101.sh7
-rwxr-xr-xctdb/tests/UNIT/cunit/protocol_test_111.sh7
-rwxr-xr-xctdb/tests/UNIT/cunit/protocol_test_201.sh6
-rwxr-xr-xctdb/tests/UNIT/cunit/rb_test_001.sh31
-rwxr-xr-xctdb/tests/UNIT/cunit/reqid_test_001.sh13
-rwxr-xr-xctdb/tests/UNIT/cunit/run_event_001.sh137
-rwxr-xr-xctdb/tests/UNIT/cunit/run_proc_001.sh159
-rwxr-xr-xctdb/tests/UNIT/cunit/sock_daemon_test_001.sh135
-rwxr-xr-xctdb/tests/UNIT/cunit/sock_io_test_001.sh9
-rwxr-xr-xctdb/tests/UNIT/cunit/srvid_test_001.sh7
-rwxr-xr-xctdb/tests/UNIT/cunit/system_socket_test_001.sh6
-rwxr-xr-xctdb/tests/UNIT/cunit/system_socket_test_002.sh68
-rwxr-xr-xctdb/tests/UNIT/cunit/system_socket_test_003.sh42
-rwxr-xr-xctdb/tests/UNIT/cunit/tmon_test_001.sh195
-rwxr-xr-xctdb/tests/UNIT/cunit/tmon_test_002.sh142
-rwxr-xr-xctdb/tests/UNIT/cunit/tunable_test_001.sh312
-rw-r--r--ctdb/tests/UNIT/eventd/README1
-rw-r--r--ctdb/tests/UNIT/eventd/etc-ctdb/ctdb.conf6
-rwxr-xr-xctdb/tests/UNIT/eventd/etc-ctdb/debug-script.sh22
-rw-r--r--ctdb/tests/UNIT/eventd/etc-ctdb/events/data/03.notalink.script2
-rw-r--r--ctdb/tests/UNIT/eventd/etc-ctdb/events/data/README1
-rw-r--r--ctdb/tests/UNIT/eventd/etc-ctdb/events/empty/README1
-rwxr-xr-xctdb/tests/UNIT/eventd/etc-ctdb/events/multi/01.test.script11
-rwxr-xr-xctdb/tests/UNIT/eventd/etc-ctdb/events/multi/02.test.script9
-rwxr-xr-xctdb/tests/UNIT/eventd/etc-ctdb/events/multi/03.test.script9
-rw-r--r--ctdb/tests/UNIT/eventd/etc-ctdb/events/random/01.disabled.script3
-rwxr-xr-xctdb/tests/UNIT/eventd/etc-ctdb/events/random/02.enabled.script49
-rw-r--r--ctdb/tests/UNIT/eventd/etc-ctdb/events/random/README.script1
-rwxr-xr-xctdb/tests/UNIT/eventd/etc-ctdb/events/random/a.script3
-rwxr-xr-xctdb/tests/UNIT/eventd/etc-ctdb/share/events/data/01.dummy.script6
-rwxr-xr-xctdb/tests/UNIT/eventd/etc-ctdb/share/events/data/02.disabled.script6
-rw-r--r--ctdb/tests/UNIT/eventd/etc-ctdb/share/events/empty/README1
-rw-r--r--ctdb/tests/UNIT/eventd/etc-ctdb/share/events/random/01.disabled.script3
-rwxr-xr-xctdb/tests/UNIT/eventd/etc-ctdb/share/events/random/02.enabled.script12
-rw-r--r--ctdb/tests/UNIT/eventd/etc-ctdb/share/events/random/README.script1
-rwxr-xr-xctdb/tests/UNIT/eventd/etc-ctdb/share/events/random/a.script3
-rwxr-xr-xctdb/tests/UNIT/eventd/eventd_001.sh27
-rwxr-xr-xctdb/tests/UNIT/eventd/eventd_002.sh21
-rwxr-xr-xctdb/tests/UNIT/eventd/eventd_003.sh45
-rwxr-xr-xctdb/tests/UNIT/eventd/eventd_004.sh33
-rwxr-xr-xctdb/tests/UNIT/eventd/eventd_005.sh34
-rwxr-xr-xctdb/tests/UNIT/eventd/eventd_006.sh19
-rwxr-xr-xctdb/tests/UNIT/eventd/eventd_007.sh19
-rwxr-xr-xctdb/tests/UNIT/eventd/eventd_008.sh83
-rwxr-xr-xctdb/tests/UNIT/eventd/eventd_009.sh155
-rwxr-xr-xctdb/tests/UNIT/eventd/eventd_011.sh40
-rwxr-xr-xctdb/tests/UNIT/eventd/eventd_012.sh27
-rwxr-xr-xctdb/tests/UNIT/eventd/eventd_013.sh27
-rwxr-xr-xctdb/tests/UNIT/eventd/eventd_014.sh27
-rwxr-xr-xctdb/tests/UNIT/eventd/eventd_021.sh26
-rwxr-xr-xctdb/tests/UNIT/eventd/eventd_022.sh22
-rwxr-xr-xctdb/tests/UNIT/eventd/eventd_023.sh22
-rwxr-xr-xctdb/tests/UNIT/eventd/eventd_024.sh31
-rwxr-xr-xctdb/tests/UNIT/eventd/eventd_031.sh17
-rwxr-xr-xctdb/tests/UNIT/eventd/eventd_032.sh43
-rwxr-xr-xctdb/tests/UNIT/eventd/eventd_033.sh43
-rwxr-xr-xctdb/tests/UNIT/eventd/eventd_041.sh26
-rwxr-xr-xctdb/tests/UNIT/eventd/eventd_042.sh29
-rwxr-xr-xctdb/tests/UNIT/eventd/eventd_043.sh29
-rwxr-xr-xctdb/tests/UNIT/eventd/eventd_044.sh37
-rwxr-xr-xctdb/tests/UNIT/eventd/eventd_051.sh15
-rwxr-xr-xctdb/tests/UNIT/eventd/eventd_052.sh35
-rw-r--r--ctdb/tests/UNIT/eventd/scripts/local.sh122
-rwxr-xr-xctdb/tests/UNIT/eventscripts/00.ctdb.init.001.sh13
-rwxr-xr-xctdb/tests/UNIT/eventscripts/00.ctdb.init.002.sh17
-rwxr-xr-xctdb/tests/UNIT/eventscripts/00.ctdb.init.003.sh16
-rwxr-xr-xctdb/tests/UNIT/eventscripts/00.ctdb.init.004.sh18
-rwxr-xr-xctdb/tests/UNIT/eventscripts/00.ctdb.init.005.sh20
-rwxr-xr-xctdb/tests/UNIT/eventscripts/00.ctdb.init.006.sh23
-rwxr-xr-xctdb/tests/UNIT/eventscripts/00.ctdb.init.007.sh16
-rwxr-xr-xctdb/tests/UNIT/eventscripts/00.ctdb.init.008.sh19
-rwxr-xr-xctdb/tests/UNIT/eventscripts/00.ctdb.init.009.sh51
-rwxr-xr-xctdb/tests/UNIT/eventscripts/01.reclock.init.001.sh10
-rwxr-xr-xctdb/tests/UNIT/eventscripts/01.reclock.init.002.sh10
-rwxr-xr-xctdb/tests/UNIT/eventscripts/01.reclock.init.003.sh20
-rwxr-xr-xctdb/tests/UNIT/eventscripts/05.system.monitor.001.sh13
-rwxr-xr-xctdb/tests/UNIT/eventscripts/05.system.monitor.002.sh14
-rwxr-xr-xctdb/tests/UNIT/eventscripts/05.system.monitor.003.sh17
-rwxr-xr-xctdb/tests/UNIT/eventscripts/05.system.monitor.004.sh15
-rwxr-xr-xctdb/tests/UNIT/eventscripts/05.system.monitor.005.sh17
-rwxr-xr-xctdb/tests/UNIT/eventscripts/05.system.monitor.006.sh17
-rwxr-xr-xctdb/tests/UNIT/eventscripts/05.system.monitor.007.sh14
-rwxr-xr-xctdb/tests/UNIT/eventscripts/05.system.monitor.011.sh13
-rwxr-xr-xctdb/tests/UNIT/eventscripts/05.system.monitor.012.sh14
-rwxr-xr-xctdb/tests/UNIT/eventscripts/05.system.monitor.014.sh18
-rwxr-xr-xctdb/tests/UNIT/eventscripts/05.system.monitor.015.sh20
-rwxr-xr-xctdb/tests/UNIT/eventscripts/05.system.monitor.017.sh20
-rwxr-xr-xctdb/tests/UNIT/eventscripts/05.system.monitor.018.sh82
-rwxr-xr-xctdb/tests/UNIT/eventscripts/06.nfs.releaseip.001.sh12
-rwxr-xr-xctdb/tests/UNIT/eventscripts/06.nfs.releaseip.002.sh12
-rwxr-xr-xctdb/tests/UNIT/eventscripts/06.nfs.takeip.001.sh12
-rwxr-xr-xctdb/tests/UNIT/eventscripts/06.nfs.takeip.002.sh12
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.010.sh23
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.011.sh28
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.012.sh31
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.013.sh36
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.init.001.sh13
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.init.002.sh11
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.init.021.sh11
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.init.022.sh18
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.init.023.sh23
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.monitor.001.sh13
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.monitor.002.sh11
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.monitor.003.sh15
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.monitor.004.sh15
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.monitor.005.sh15
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.monitor.006.sh15
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.monitor.009.sh19
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.monitor.010.sh25
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.monitor.011.sh21
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.monitor.012.sh29
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.monitor.013.sh15
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.monitor.014.sh16
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.monitor.015.sh16
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.monitor.016.sh20
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.monitor.017.sh20
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.monitor.018.sh20
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.multi.001.sh14
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.releaseip.001.sh13
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.releaseip.002.sh14
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.startup.001.sh13
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.startup.002.sh11
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.takeip.001.sh13
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.takeip.002.sh13
-rwxr-xr-xctdb/tests/UNIT/eventscripts/10.interface.takeip.003.sh22
-rwxr-xr-xctdb/tests/UNIT/eventscripts/11.natgw.001.sh12
-rwxr-xr-xctdb/tests/UNIT/eventscripts/11.natgw.002.sh24
-rwxr-xr-xctdb/tests/UNIT/eventscripts/11.natgw.003.sh24
-rwxr-xr-xctdb/tests/UNIT/eventscripts/11.natgw.004.sh24
-rwxr-xr-xctdb/tests/UNIT/eventscripts/11.natgw.011.sh23
-rwxr-xr-xctdb/tests/UNIT/eventscripts/11.natgw.012.sh23
-rwxr-xr-xctdb/tests/UNIT/eventscripts/11.natgw.013.sh27
-rwxr-xr-xctdb/tests/UNIT/eventscripts/11.natgw.014.sh27
-rwxr-xr-xctdb/tests/UNIT/eventscripts/11.natgw.015.sh61
-rwxr-xr-xctdb/tests/UNIT/eventscripts/11.natgw.021.sh27
-rwxr-xr-xctdb/tests/UNIT/eventscripts/11.natgw.022.sh27
-rwxr-xr-xctdb/tests/UNIT/eventscripts/11.natgw.023.sh27
-rwxr-xr-xctdb/tests/UNIT/eventscripts/11.natgw.024.sh27
-rwxr-xr-xctdb/tests/UNIT/eventscripts/11.natgw.025.sh65
-rwxr-xr-xctdb/tests/UNIT/eventscripts/11.natgw.031.sh62
-rwxr-xr-xctdb/tests/UNIT/eventscripts/11.natgw.041.sh24
-rwxr-xr-xctdb/tests/UNIT/eventscripts/11.natgw.042.sh25
-rwxr-xr-xctdb/tests/UNIT/eventscripts/11.natgw.051.sh17
-rwxr-xr-xctdb/tests/UNIT/eventscripts/11.natgw.052.sh21
-rwxr-xr-xctdb/tests/UNIT/eventscripts/11.natgw.053.sh17
-rwxr-xr-xctdb/tests/UNIT/eventscripts/11.natgw.054.sh21
-rwxr-xr-xctdb/tests/UNIT/eventscripts/13.per_ip_routing.001.sh19
-rwxr-xr-xctdb/tests/UNIT/eventscripts/13.per_ip_routing.002.sh18
-rwxr-xr-xctdb/tests/UNIT/eventscripts/13.per_ip_routing.003.sh16
-rwxr-xr-xctdb/tests/UNIT/eventscripts/13.per_ip_routing.004.sh17
-rwxr-xr-xctdb/tests/UNIT/eventscripts/13.per_ip_routing.005.sh20
-rwxr-xr-xctdb/tests/UNIT/eventscripts/13.per_ip_routing.006.sh24
-rwxr-xr-xctdb/tests/UNIT/eventscripts/13.per_ip_routing.007.sh17
-rwxr-xr-xctdb/tests/UNIT/eventscripts/13.per_ip_routing.008.sh23
-rwxr-xr-xctdb/tests/UNIT/eventscripts/13.per_ip_routing.009.sh20
-rwxr-xr-xctdb/tests/UNIT/eventscripts/13.per_ip_routing.010.sh19
-rwxr-xr-xctdb/tests/UNIT/eventscripts/13.per_ip_routing.011.sh21
-rwxr-xr-xctdb/tests/UNIT/eventscripts/13.per_ip_routing.012.sh29
-rwxr-xr-xctdb/tests/UNIT/eventscripts/13.per_ip_routing.013.sh23
-rwxr-xr-xctdb/tests/UNIT/eventscripts/13.per_ip_routing.014.sh29
-rwxr-xr-xctdb/tests/UNIT/eventscripts/13.per_ip_routing.015.sh29
-rwxr-xr-xctdb/tests/UNIT/eventscripts/13.per_ip_routing.016.sh14
-rwxr-xr-xctdb/tests/UNIT/eventscripts/13.per_ip_routing.017.sh15
-rwxr-xr-xctdb/tests/UNIT/eventscripts/13.per_ip_routing.018.sh21
-rwxr-xr-xctdb/tests/UNIT/eventscripts/13.per_ip_routing.019.sh23
-rwxr-xr-xctdb/tests/UNIT/eventscripts/13.per_ip_routing.021.sh15
-rwxr-xr-xctdb/tests/UNIT/eventscripts/13.per_ip_routing.022.sh15
-rwxr-xr-xctdb/tests/UNIT/eventscripts/13.per_ip_routing.023.sh25
-rwxr-xr-xctdb/tests/UNIT/eventscripts/13.per_ip_routing.024.sh30
-rwxr-xr-xctdb/tests/UNIT/eventscripts/20.multipathd.monitor.001.sh11
-rwxr-xr-xctdb/tests/UNIT/eventscripts/20.multipathd.monitor.002.sh11
-rwxr-xr-xctdb/tests/UNIT/eventscripts/20.multipathd.monitor.003.sh14
-rwxr-xr-xctdb/tests/UNIT/eventscripts/20.multipathd.monitor.004.sh15
-rwxr-xr-xctdb/tests/UNIT/eventscripts/31.clamd.monitor.002.sh16
-rwxr-xr-xctdb/tests/UNIT/eventscripts/31.clamd.monitor.003.sh16
-rwxr-xr-xctdb/tests/UNIT/eventscripts/40.vsftpd.monitor.002.sh52
-rwxr-xr-xctdb/tests/UNIT/eventscripts/40.vsftpd.shutdown.002.sh12
-rwxr-xr-xctdb/tests/UNIT/eventscripts/40.vsftpd.startup.002.sh12
-rwxr-xr-xctdb/tests/UNIT/eventscripts/41.httpd.monitor.002.sh30
-rwxr-xr-xctdb/tests/UNIT/eventscripts/41.httpd.shutdown.002.sh12
-rwxr-xr-xctdb/tests/UNIT/eventscripts/41.httpd.startup.002.sh12
-rwxr-xr-xctdb/tests/UNIT/eventscripts/48.netbios.shutdown.011.sh14
-rwxr-xr-xctdb/tests/UNIT/eventscripts/48.netbios.startup.011.sh14
-rwxr-xr-xctdb/tests/UNIT/eventscripts/49.winbind.monitor.101.sh11
-rwxr-xr-xctdb/tests/UNIT/eventscripts/49.winbind.monitor.102.sh13
-rwxr-xr-xctdb/tests/UNIT/eventscripts/49.winbind.shutdown.002.sh12
-rwxr-xr-xctdb/tests/UNIT/eventscripts/49.winbind.startup.002.sh12
-rwxr-xr-xctdb/tests/UNIT/eventscripts/50.samba.monitor.101.sh11
-rwxr-xr-xctdb/tests/UNIT/eventscripts/50.samba.monitor.103.sh13
-rwxr-xr-xctdb/tests/UNIT/eventscripts/50.samba.monitor.104.sh13
-rwxr-xr-xctdb/tests/UNIT/eventscripts/50.samba.monitor.105.sh12
-rwxr-xr-xctdb/tests/UNIT/eventscripts/50.samba.monitor.106.sh15
-rwxr-xr-xctdb/tests/UNIT/eventscripts/50.samba.monitor.110.sh20
-rwxr-xr-xctdb/tests/UNIT/eventscripts/50.samba.monitor.111.sh23
-rwxr-xr-xctdb/tests/UNIT/eventscripts/50.samba.monitor.112.sh13
-rwxr-xr-xctdb/tests/UNIT/eventscripts/50.samba.monitor.113.sh16
-rwxr-xr-xctdb/tests/UNIT/eventscripts/50.samba.shutdown.001.sh12
-rwxr-xr-xctdb/tests/UNIT/eventscripts/50.samba.shutdown.002.sh15
-rwxr-xr-xctdb/tests/UNIT/eventscripts/50.samba.shutdown.011.sh14
-rwxr-xr-xctdb/tests/UNIT/eventscripts/50.samba.startup.011.sh14
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.monitor.101.sh11
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.monitor.102.sh14
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.monitor.103.sh14
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.monitor.104.sh17
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.monitor.105.sh10
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.monitor.106.sh11
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.monitor.107.sh14
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.monitor.108.sh12
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.monitor.109.sh12
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.monitor.111.sh13
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.monitor.112.sh13
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.monitor.113.sh14
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.monitor.114.sh14
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.monitor.121.sh15
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.monitor.122.sh18
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.monitor.131.sh11
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.monitor.132.sh15
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.monitor.141.sh13
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.monitor.142.sh14
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.monitor.143.sh14
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.monitor.144.sh13
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.monitor.151.sh13
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.monitor.152.sh15
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.monitor.153.sh15
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.monitor.161.sh12
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.monitor.162.sh15
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.multi.001.sh19
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.multi.002.sh17
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.releaseip.001.sh14
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.releaseip.002.sh12
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.shutdown.001.sh14
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.shutdown.002.sh12
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.startup.001.sh14
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.startup.002.sh12
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.takeip.001.sh14
-rwxr-xr-xctdb/tests/UNIT/eventscripts/60.nfs.takeip.002.sh12
-rwxr-xr-xctdb/tests/UNIT/eventscripts/91.lvs.001.sh54
-rwxr-xr-xctdb/tests/UNIT/eventscripts/91.lvs.ipreallocated.011.sh14
-rwxr-xr-xctdb/tests/UNIT/eventscripts/91.lvs.ipreallocated.012.sh17
-rwxr-xr-xctdb/tests/UNIT/eventscripts/91.lvs.ipreallocated.013.sh17
-rwxr-xr-xctdb/tests/UNIT/eventscripts/91.lvs.ipreallocated.014.sh27
-rwxr-xr-xctdb/tests/UNIT/eventscripts/91.lvs.monitor.001.sh11
-rwxr-xr-xctdb/tests/UNIT/eventscripts/91.lvs.monitor.002.sh14
-rwxr-xr-xctdb/tests/UNIT/eventscripts/91.lvs.monitor.003.sh19
-rwxr-xr-xctdb/tests/UNIT/eventscripts/91.lvs.shutdown.001.sh11
-rwxr-xr-xctdb/tests/UNIT/eventscripts/91.lvs.shutdown.002.sh18
-rwxr-xr-xctdb/tests/UNIT/eventscripts/91.lvs.startup.001.sh11
-rwxr-xr-xctdb/tests/UNIT/eventscripts/91.lvs.startup.002.sh14
-rw-r--r--ctdb/tests/UNIT/eventscripts/README46
-rwxr-xr-xctdb/tests/UNIT/eventscripts/debug_locks.sh.001.sh9
-rwxr-xr-xctdb/tests/UNIT/eventscripts/debug_locks.sh.002.sh9
-rwxr-xr-xctdb/tests/UNIT/eventscripts/debug_locks.sh.003.sh9
-rwxr-xr-xctdb/tests/UNIT/eventscripts/debug_locks.sh.004.sh9
-rwxr-xr-xctdb/tests/UNIT/eventscripts/debug_locks.sh.005.sh9
-rwxr-xr-xctdb/tests/UNIT/eventscripts/debug_locks.sh.006.sh9
-rwxr-xr-xctdb/tests/UNIT/eventscripts/debug_locks.sh.007.sh9
-rwxr-xr-xctdb/tests/UNIT/eventscripts/debug_locks.sh.008.sh9
-rwxr-xr-xctdb/tests/UNIT/eventscripts/debug_locks.sh.021.sh9
-rwxr-xr-xctdb/tests/UNIT/eventscripts/debug_locks.sh.022.sh9
-rwxr-xr-xctdb/tests/UNIT/eventscripts/debug_locks.sh.023.sh9
-rwxr-xr-xctdb/tests/UNIT/eventscripts/debug_locks.sh.024.sh9
-rwxr-xr-xctdb/tests/UNIT/eventscripts/debug_locks.sh.025.sh9
-rwxr-xr-xctdb/tests/UNIT/eventscripts/debug_locks.sh.026.sh9
-rwxr-xr-xctdb/tests/UNIT/eventscripts/debug_locks.sh.027.sh9
-rwxr-xr-xctdb/tests/UNIT/eventscripts/debug_locks.sh.028.sh9
-rw-r--r--ctdb/tests/UNIT/eventscripts/etc-ctdb/public_addresses9
-rwxr-xr-xctdb/tests/UNIT/eventscripts/etc-ctdb/rc.local56
-rwxr-xr-xctdb/tests/UNIT/eventscripts/etc/init.d/nfs7
-rwxr-xr-xctdb/tests/UNIT/eventscripts/etc/init.d/nfslock7
-rw-r--r--ctdb/tests/UNIT/eventscripts/etc/os-release2
-rw-r--r--ctdb/tests/UNIT/eventscripts/etc/samba/smb.conf43
-rw-r--r--ctdb/tests/UNIT/eventscripts/etc/sysconfig/nfs2
-rw-r--r--ctdb/tests/UNIT/eventscripts/scripts/00.ctdb.sh24
-rw-r--r--ctdb/tests/UNIT/eventscripts/scripts/01.reclock.sh16
-rw-r--r--ctdb/tests/UNIT/eventscripts/scripts/05.system.sh48
-rw-r--r--ctdb/tests/UNIT/eventscripts/scripts/06.nfs.sh4
-rw-r--r--ctdb/tests/UNIT/eventscripts/scripts/10.interface.sh72
-rw-r--r--ctdb/tests/UNIT/eventscripts/scripts/11.natgw.sh120
-rw-r--r--ctdb/tests/UNIT/eventscripts/scripts/13.per_ip_routing.sh47
-rw-r--r--ctdb/tests/UNIT/eventscripts/scripts/20.multipathd.sh25
-rw-r--r--ctdb/tests/UNIT/eventscripts/scripts/31.clamd.sh8
-rw-r--r--ctdb/tests/UNIT/eventscripts/scripts/40.vsftpd.sh14
-rw-r--r--ctdb/tests/UNIT/eventscripts/scripts/41.httpd.sh14
-rw-r--r--ctdb/tests/UNIT/eventscripts/scripts/48.netbios.sh23
-rw-r--r--ctdb/tests/UNIT/eventscripts/scripts/49.winbind.sh28
-rw-r--r--ctdb/tests/UNIT/eventscripts/scripts/50.samba.sh58
-rw-r--r--ctdb/tests/UNIT/eventscripts/scripts/60.nfs.sh435
-rw-r--r--ctdb/tests/UNIT/eventscripts/scripts/91.lvs.sh76
-rw-r--r--ctdb/tests/UNIT/eventscripts/scripts/debug_locks.sh272
-rw-r--r--ctdb/tests/UNIT/eventscripts/scripts/local.sh568
-rw-r--r--ctdb/tests/UNIT/eventscripts/scripts/statd-callout.sh65
-rwxr-xr-xctdb/tests/UNIT/eventscripts/statd-callout.001.sh13
-rwxr-xr-xctdb/tests/UNIT/eventscripts/statd-callout.002.sh14
-rwxr-xr-xctdb/tests/UNIT/eventscripts/statd-callout.003.sh16
-rwxr-xr-xctdb/tests/UNIT/eventscripts/statd-callout.004.sh17
-rwxr-xr-xctdb/tests/UNIT/eventscripts/statd-callout.005.sh25
-rwxr-xr-xctdb/tests/UNIT/eventscripts/statd-callout.006.sh27
-rwxr-xr-xctdb/tests/UNIT/eventscripts/statd-callout.007.sh14
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/ctdb481
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/ctdb-config2
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/ctdb_killtcp10
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/ctdb_lvs53
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/ctdb_natgw34
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/date7
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/df38
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/ethtool12
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/exportfs13
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/gstack19
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/id3
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/ip833
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/ip6tables5
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/iptables5
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/ipvsadm154
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/kill7
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/killall7
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/multipath36
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/net5
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/nfs-fake-callout15
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/nfsconf5
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/pidof17
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/pkill7
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/ps48
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/rm6
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/rpc.lockd6
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/rpc.mountd6
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/rpc.rquotad6
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/rpc.statd6
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/rpcinfo78
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/service65
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/sleep9
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/smnotify65
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/ss206
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/stat71
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/tdb_mutex_check10
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/tdbdump9
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/tdbtool36
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/testparm84
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/timeout8
-rwxr-xr-xctdb/tests/UNIT/eventscripts/stubs/wbinfo7
-rwxr-xr-xctdb/tests/UNIT/onnode/0001.sh24
-rwxr-xr-xctdb/tests/UNIT/onnode/0002.sh16
-rwxr-xr-xctdb/tests/UNIT/onnode/0003.sh16
-rwxr-xr-xctdb/tests/UNIT/onnode/0004.sh16
-rwxr-xr-xctdb/tests/UNIT/onnode/0005.sh13
-rwxr-xr-xctdb/tests/UNIT/onnode/0006.sh15
-rwxr-xr-xctdb/tests/UNIT/onnode/0010.sh13
-rwxr-xr-xctdb/tests/UNIT/onnode/0011.sh13
-rwxr-xr-xctdb/tests/UNIT/onnode/0070.sh32
-rwxr-xr-xctdb/tests/UNIT/onnode/0071.sh29
-rwxr-xr-xctdb/tests/UNIT/onnode/0072.sh29
-rwxr-xr-xctdb/tests/UNIT/onnode/0075.sh29
-rw-r--r--ctdb/tests/UNIT/onnode/etc-ctdb/nodes4
-rw-r--r--ctdb/tests/UNIT/onnode/scripts/local.sh64
-rwxr-xr-xctdb/tests/UNIT/onnode/stubs/ctdb19
-rwxr-xr-xctdb/tests/UNIT/onnode/stubs/ssh2
-rwxr-xr-xctdb/tests/UNIT/shellcheck/base_scripts.sh12
-rwxr-xr-xctdb/tests/UNIT/shellcheck/ctdb_helpers.sh9
-rwxr-xr-xctdb/tests/UNIT/shellcheck/event_scripts.sh7
-rwxr-xr-xctdb/tests/UNIT/shellcheck/functions.sh7
-rwxr-xr-xctdb/tests/UNIT/shellcheck/init_script.sh19
-rw-r--r--ctdb/tests/UNIT/shellcheck/scripts/local.sh33
-rwxr-xr-xctdb/tests/UNIT/shellcheck/tests.sh36
-rwxr-xr-xctdb/tests/UNIT/shellcheck/tools.sh9
-rw-r--r--ctdb/tests/UNIT/takeover/README5
-rwxr-xr-xctdb/tests/UNIT/takeover/det.001.sh38
-rwxr-xr-xctdb/tests/UNIT/takeover/det.002.sh35
-rwxr-xr-xctdb/tests/UNIT/takeover/det.003.sh32
-rwxr-xr-xctdb/tests/UNIT/takeover/det.004.sh41
-rwxr-xr-xctdb/tests/UNIT/takeover/det.005.sh45
-rwxr-xr-xctdb/tests/UNIT/takeover/det.006.sh46
-rwxr-xr-xctdb/tests/UNIT/takeover/lcp2.001.sh31
-rwxr-xr-xctdb/tests/UNIT/takeover/lcp2.002.sh31
-rwxr-xr-xctdb/tests/UNIT/takeover/lcp2.003.sh31
-rwxr-xr-xctdb/tests/UNIT/takeover/lcp2.004.sh37
-rwxr-xr-xctdb/tests/UNIT/takeover/lcp2.005.sh198
-rwxr-xr-xctdb/tests/UNIT/takeover/lcp2.006.sh31
-rwxr-xr-xctdb/tests/UNIT/takeover/lcp2.007.sh31
-rwxr-xr-xctdb/tests/UNIT/takeover/lcp2.008.sh31
-rwxr-xr-xctdb/tests/UNIT/takeover/lcp2.009.sh31
-rwxr-xr-xctdb/tests/UNIT/takeover/lcp2.010.sh32
-rwxr-xr-xctdb/tests/UNIT/takeover/lcp2.011.sh45
-rwxr-xr-xctdb/tests/UNIT/takeover/lcp2.012.sh33
-rwxr-xr-xctdb/tests/UNIT/takeover/lcp2.013.sh33
-rwxr-xr-xctdb/tests/UNIT/takeover/lcp2.014.sh31
-rwxr-xr-xctdb/tests/UNIT/takeover/lcp2.015.sh31
-rwxr-xr-xctdb/tests/UNIT/takeover/lcp2.016.sh31
-rwxr-xr-xctdb/tests/UNIT/takeover/lcp2.024.sh42
-rwxr-xr-xctdb/tests/UNIT/takeover/lcp2.025.sh33
-rwxr-xr-xctdb/tests/UNIT/takeover/lcp2.027.sh45
-rwxr-xr-xctdb/tests/UNIT/takeover/lcp2.028.sh45
-rwxr-xr-xctdb/tests/UNIT/takeover/lcp2.029.sh111
-rwxr-xr-xctdb/tests/UNIT/takeover/lcp2.030.sh1813
-rwxr-xr-xctdb/tests/UNIT/takeover/lcp2.031.sh143
-rwxr-xr-xctdb/tests/UNIT/takeover/lcp2.032.sh450
-rwxr-xr-xctdb/tests/UNIT/takeover/lcp2.033.sh74
-rwxr-xr-xctdb/tests/UNIT/takeover/lcp2.034.sh21
-rwxr-xr-xctdb/tests/UNIT/takeover/lcp2.035.sh1813
-rwxr-xr-xctdb/tests/UNIT/takeover/nondet.001.sh35
-rwxr-xr-xctdb/tests/UNIT/takeover/nondet.002.sh32
-rwxr-xr-xctdb/tests/UNIT/takeover/nondet.003.sh29
-rw-r--r--ctdb/tests/UNIT/takeover/scripts/local.sh30
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/000.sh22
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/010.sh33
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/011.sh33
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/012.sh33
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/013.sh33
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/014.sh37
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/016.sh36
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/017.sh36
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/018.sh34
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/019.sh37
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/021.sh39
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/022.sh40
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/023.sh41
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/024.sh43
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/025.sh37
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/026.sh41
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/027.sh33
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/028.sh33
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/030.sh35
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/031.sh55
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/110.sh29
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/111.sh40
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/120.sh40
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/121.sh40
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/122.sh40
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/130.sh41
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/131.sh40
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/132.sh42
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/140.sh33
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/150.sh31
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/160.sh31
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/210.sh29
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/211.sh40
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/220.sh40
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/230.sh41
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/240.sh33
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/250.sh31
-rwxr-xr-xctdb/tests/UNIT/takeover_helper/260.sh31
-rw-r--r--ctdb/tests/UNIT/takeover_helper/scripts/local.sh108
-rw-r--r--ctdb/tests/UNIT/tool/README17
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.attach.001.sh35
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.attach.002.sh35
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.attach.003.sh35
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.ban.001.sh35
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.ban.002.sh23
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.ban.003.sh23
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.catdb.001.sh80
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.catdb.002.sh86
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.cattdb.001.sh80
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.cattdb.002.sh86
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.continue.001.sh23
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.continue.002.sh23
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.continue.003.sh23
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.deletekey.001.sh34
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.disable.001.sh23
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.disable.002.sh23
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.disable.003.sh23
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.disable.004.sh15
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.enable.001.sh23
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.enable.002.sh23
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.enable.003.sh23
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.getcapabilities.001.sh19
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.getcapabilities.002.sh19
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.getcapabilities.003.sh28
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.getcapabilities.004.sh39
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.getdbmap.001.sh34
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.getdbseqnum.001.sh41
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.getdbseqnum.002.sh36
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.getdbstatus.001.sh108
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.getdbstatus.002.sh108
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.getpid.001.sh17
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.getpid.010.sh25
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.getreclock.001.sh16
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.getreclock.002.sh21
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.getvar.001.sh35
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.getvar.002.sh17
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.ifaces.001.sh24
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.ip.001.sh17
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.ip.002.sh17
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.ip.003.sh30
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.ip.004.sh29
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.ip.005.sh30
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.ip.006.sh30
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.ip.007.sh36
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.ipinfo.001.sh18
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.ipinfo.002.sh32
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.ipinfo.003.sh35
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.leader.001.sh16
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.leader.002.sh16
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.listnodes.001.sh20
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.listnodes.002.sh19
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.listvars.001.sh66
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.lvs.001.sh36
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.lvs.002.sh46
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.lvs.003.sh43
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.lvs.004.sh45
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.lvs.005.sh46
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.lvs.006.sh44
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.lvs.007.sh42
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.lvs.008.sh66
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.lvs.010.sh25
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.natgw.001.sh46
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.natgw.002.sh46
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.natgw.003.sh43
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.natgw.004.sh46
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.natgw.005.sh46
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.natgw.006.sh46
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.natgw.007.sh45
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.natgw.008.sh46
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.natgw.010.sh25
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.nodestatus.001.sh33
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.nodestatus.002.sh33
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.nodestatus.003.sh33
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.nodestatus.004.sh28
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.nodestatus.005.sh28
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.nodestatus.006.sh40
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.nodestatus.007.sh36
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.pdelete.001.sh27
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.ping.001.sh24
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.pnn.001.sh15
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.process-exists.001.sh28
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.process-exists.002.sh30
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.process-exists.003.sh30
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.pstore.001.sh24
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.ptrans.001.sh49
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.readkey.001.sh20
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.recover.001.sh22
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.reloadnodes.001.sh24
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.reloadnodes.002.sh30
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.reloadnodes.003.sh29
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.reloadnodes.011.sh25
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.reloadnodes.012.sh24
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.reloadnodes.013.sh26
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.reloadnodes.014.sh24
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.reloadnodes.015.sh26
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.reloadnodes.016.sh24
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.reloadnodes.017.sh26
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.reloadnodes.018.sh29
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.reloadnodes.019.sh28
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.reloadnodes.020.sh28
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.reloadnodes.021.sh26
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.reloadnodes.023.sh24
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.reloadnodes.024.sh24
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.runstate.001.sh15
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.runstate.002.sh15
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.runstate.003.sh17
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.runstate.004.sh15
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.runstate.005.sh15
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.setdbreadonly.001.sh53
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.setdbreadonly.002.sh37
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.setdbreadonly.003.sh39
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.setdbreadonly.004.sh37
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.setdbreadonly.005.sh39
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.setdbsticky.001.sh53
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.setdbsticky.002.sh37
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.setdbsticky.003.sh39
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.setdbsticky.004.sh37
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.setdbsticky.005.sh39
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.setdebug.001.sh23
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.setdebug.002.sh23
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.setdebug.003.sh32
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.setifacelink.001.sh76
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.setifacelink.002.sh22
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.setvar.001.sh49
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.setvar.002.sh17
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.status.001.sh46
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.status.002.sh46
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.status.003.sh49
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.stop.001.sh23
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.stop.002.sh23
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.stop.003.sh23
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.unban.001.sh23
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.unban.002.sh34
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.unban.003.sh23
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.uptime.001.sh36
-rwxr-xr-xctdb/tests/UNIT/tool/ctdb.writekey.001.sh31
-rw-r--r--ctdb/tests/UNIT/tool/scripts/local.sh112
-rwxr-xr-xctdb/tests/etc-ctdb/events/legacy/00.test.script30
-rwxr-xr-xctdb/tests/local_daemons.sh506
l---------ctdb/tests/run_cluster_tests.sh1
-rwxr-xr-xctdb/tests/run_tests.sh399
-rwxr-xr-xctdb/tests/scripts/cluster.bash18
-rw-r--r--ctdb/tests/scripts/common.sh146
-rw-r--r--ctdb/tests/scripts/integration.bash864
-rw-r--r--ctdb/tests/scripts/integration_local_daemons.bash95
-rw-r--r--ctdb/tests/scripts/integration_real_cluster.bash53
-rw-r--r--ctdb/tests/scripts/script_install_paths.sh67
-rwxr-xr-xctdb/tests/scripts/test_wrap11
-rw-r--r--ctdb/tests/scripts/unit.sh267
-rw-r--r--ctdb/tests/src/cluster_mutex_test.c844
-rw-r--r--ctdb/tests/src/cluster_wait.c346
-rw-r--r--ctdb/tests/src/cluster_wait.h30
-rw-r--r--ctdb/tests/src/cmdline_test.c480
-rw-r--r--ctdb/tests/src/comm_client_test.c217
-rw-r--r--ctdb/tests/src/comm_server_test.c292
-rw-r--r--ctdb/tests/src/comm_test.c501
-rw-r--r--ctdb/tests/src/conf_test.c513
-rw-r--r--ctdb/tests/src/ctdb_io_test.c356
-rw-r--r--ctdb/tests/src/ctdb_packet_parse.c136
-rw-r--r--ctdb/tests/src/ctdb_takeover_tests.c281
-rw-r--r--ctdb/tests/src/db_hash_test.c138
-rw-r--r--ctdb/tests/src/db_test_tool.c792
-rw-r--r--ctdb/tests/src/dummy_client.c163
-rw-r--r--ctdb/tests/src/errcode.c189
-rw-r--r--ctdb/tests/src/event_script_test.c120
-rw-r--r--ctdb/tests/src/fake_ctdbd.c4781
-rw-r--r--ctdb/tests/src/fetch_loop.c288
-rw-r--r--ctdb/tests/src/fetch_loop_key.c217
-rw-r--r--ctdb/tests/src/fetch_readonly.c166
-rw-r--r--ctdb/tests/src/fetch_readonly_loop.c272
-rw-r--r--ctdb/tests/src/fetch_ring.c398
-rw-r--r--ctdb/tests/src/g_lock_loop.c270
-rw-r--r--ctdb/tests/src/hash_count_test.c132
-rw-r--r--ctdb/tests/src/ipalloc_read_known_ips.c179
-rw-r--r--ctdb/tests/src/ipalloc_read_known_ips.h32
-rw-r--r--ctdb/tests/src/line_test.c102
-rw-r--r--ctdb/tests/src/lock_tdb.c60
-rw-r--r--ctdb/tests/src/message_ring.c369
-rw-r--r--ctdb/tests/src/pidfile_test.c242
-rw-r--r--ctdb/tests/src/pkt_read_test.c249
-rw-r--r--ctdb/tests/src/pkt_write_test.c359
-rw-r--r--ctdb/tests/src/porting_tests.c262
-rw-r--r--ctdb/tests/src/protocol_basic_test.c106
-rw-r--r--ctdb/tests/src/protocol_common.c1260
-rw-r--r--ctdb/tests/src/protocol_common.h238
-rw-r--r--ctdb/tests/src/protocol_common_basic.c305
-rw-r--r--ctdb/tests/src/protocol_common_basic.h175
-rw-r--r--ctdb/tests/src/protocol_common_ctdb.c1967
-rw-r--r--ctdb/tests/src/protocol_common_ctdb.h101
-rw-r--r--ctdb/tests/src/protocol_ctdb_compat_test.c1270
-rw-r--r--ctdb/tests/src/protocol_ctdb_test.c365
-rw-r--r--ctdb/tests/src/protocol_types_compat_test.c2371
-rw-r--r--ctdb/tests/src/protocol_types_test.c194
-rw-r--r--ctdb/tests/src/protocol_util_test.c417
-rw-r--r--ctdb/tests/src/rb_test.c336
-rw-r--r--ctdb/tests/src/reqid_test.c89
-rw-r--r--ctdb/tests/src/run_event_test.c251
-rw-r--r--ctdb/tests/src/run_proc_test.c111
-rw-r--r--ctdb/tests/src/sigcode.c120
-rw-r--r--ctdb/tests/src/sock_daemon_test.c1980
-rw-r--r--ctdb/tests/src/sock_io_test.c283
-rw-r--r--ctdb/tests/src/srvid_test.c105
-rw-r--r--ctdb/tests/src/system_socket_test.c266
-rw-r--r--ctdb/tests/src/test_backtrace.c37
-rw-r--r--ctdb/tests/src/test_backtrace.h25
-rw-r--r--ctdb/tests/src/test_mutex_raw.c434
-rw-r--r--ctdb/tests/src/test_options.c245
-rw-r--r--ctdb/tests/src/test_options.h44
-rw-r--r--ctdb/tests/src/tmon_ping_test.c381
-rw-r--r--ctdb/tests/src/tmon_test.c406
-rw-r--r--ctdb/tests/src/transaction_loop.c419
-rw-r--r--ctdb/tests/src/tunable_test.c71
-rw-r--r--ctdb/tests/src/tunnel_cmd.c199
-rw-r--r--ctdb/tests/src/tunnel_test.c480
-rw-r--r--ctdb/tests/src/update_record.c236
-rw-r--r--ctdb/tests/src/update_record_persistent.c218
-rwxr-xr-xctdb/tests/test_check_tcp_ports.sh18
-rw-r--r--ctdb/tools/ctdb.c6600
-rwxr-xr-xctdb/tools/ctdb_diagnostics346
-rw-r--r--ctdb/tools/ctdb_killtcp.c418
-rwxr-xr-xctdb/tools/ctdb_lvs204
-rwxr-xr-xctdb/tools/ctdb_natgw194
-rw-r--r--ctdb/tools/ltdbtool.c395
-rwxr-xr-xctdb/tools/onnode344
-rw-r--r--ctdb/utils/ceph/ctdb_mutex_ceph_rados_helper.c457
-rwxr-xr-xctdb/utils/ceph/test_ceph_rados_reclock.sh212
-rwxr-xr-xctdb/utils/etcd/ctdb_etcd_lock213
-rw-r--r--ctdb/utils/nagios/README56
-rwxr-xr-xctdb/utils/nagios/check_ctdb279
-rw-r--r--ctdb/utils/ping_pong/ping_pong.c303
-rw-r--r--ctdb/utils/pmda/Install36
-rw-r--r--ctdb/utils/pmda/README84
-rw-r--r--ctdb/utils/pmda/Remove29
-rw-r--r--ctdb/utils/pmda/domain.h19
-rw-r--r--ctdb/utils/pmda/help106
-rw-r--r--ctdb/utils/pmda/pmda_ctdb.c559
-rw-r--r--ctdb/utils/pmda/pmns73
-rw-r--r--ctdb/utils/pmda/root10
-rw-r--r--ctdb/utils/smnotify/smnotify.c151
-rw-r--r--ctdb/utils/smnotify/smnotify.x21
-rw-r--r--ctdb/utils/tdb/tdb_mutex_check.c160
-rw-r--r--ctdb/wscript1329
1047 files changed, 178258 insertions, 0 deletions
diff --git a/ctdb/.bzrignore b/ctdb/.bzrignore
new file mode 100644
index 0000000..6560aa4
--- /dev/null
+++ b/ctdb/.bzrignore
@@ -0,0 +1,24 @@
+config.status
+Makefile
+bin
+config.log
+push.sh
+ctdb_test
+config.cache
+configure
+config.h
+config.h.in
+nodes-ssh.txt
+TAGS
+ctdb-0
+ctdb-1
+ctdb-2
+ctdb-3
+nodes.txt
+TAGS
+web/packages
+rec.lock
+test.db
+sock.1
+sock.3
+sock.4
diff --git a/ctdb/.gitignore b/ctdb/.gitignore
new file mode 100644
index 0000000..f0534b3
--- /dev/null
+++ b/ctdb/.gitignore
@@ -0,0 +1,32 @@
+*.[oa]
+*~
+*.swp
+config.status
+configure
+ctdb.pc
+publish*.sh
+push*.sh
+web/packages
+TAGS
+tags
+bin
+Makefile
+config.h
+config.h.in
+config.log
+utils/smnotify/gen_smnotify.c
+utils/smnotify/gen_xdr.c
+utils/smnotify/smnotify.h
+nodes.txt
+public_addresses.txt
+rec.lock
+test.db
+tests/bin
+tests/var
+tests/takeover/ctdb_takeover.pyc
+tests/eventscripts/var
+tests/eventscripts/etc/iproute2
+tests/eventscripts/etc-ctdb/policy_routing
+packaging/RPM/ctdb.spec
+doc/*.[1-7]
+doc/*.[1-7].html
diff --git a/ctdb/COPYING b/ctdb/COPYING
new file mode 100644
index 0000000..94a9ed0
--- /dev/null
+++ b/ctdb/COPYING
@@ -0,0 +1,674 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+ The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works. By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users. We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors. You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+ To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights. Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received. You must make sure that they, too, receive
+or can get the source code. And you must show them these terms so they
+know their rights.
+
+ Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+ For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software. For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+ Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so. This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software. The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable. Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products. If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+ Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary. To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ TERMS AND CONDITIONS
+
+ 0. Definitions.
+
+ "This License" refers to version 3 of the GNU General Public License.
+
+ "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+ "The Program" refers to any copyrightable work licensed under this
+License. Each licensee is addressed as "you". "Licensees" and
+"recipients" may be individuals or organizations.
+
+ To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy. The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+ A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+ To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy. Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+ To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+ An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License. If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+ 1. Source Code.
+
+ The "source code" for a work means the preferred form of the work
+for making modifications to it. "Object code" means any non-source
+form of a work.
+
+ A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+ The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form. A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+ The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities. However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+ The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+ The Corresponding Source for a work in source code form is that
+same work.
+
+ 2. Basic Permissions.
+
+ All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met. This License explicitly affirms your unlimited
+permission to run the unmodified Program. The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work. This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+ You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force. You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright. Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+ Conveying under any other circumstances is permitted solely under
+the conditions stated below. Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+ No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+ When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+ 4. Conveying Verbatim Copies.
+
+ You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+ You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+ 5. Conveying Modified Source Versions.
+
+ You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+ a) The work must carry prominent notices stating that you modified
+ it, and giving a relevant date.
+
+ b) The work must carry prominent notices stating that it is
+ released under this License and any conditions added under section
+ 7. This requirement modifies the requirement in section 4 to
+ "keep intact all notices".
+
+ c) You must license the entire work, as a whole, under this
+ License to anyone who comes into possession of a copy. This
+ License will therefore apply, along with any applicable section 7
+ additional terms, to the whole of the work, and all its parts,
+ regardless of how they are packaged. This License gives no
+ permission to license the work in any other way, but it does not
+ invalidate such permission if you have separately received it.
+
+ d) If the work has interactive user interfaces, each must display
+ Appropriate Legal Notices; however, if the Program has interactive
+ interfaces that do not display Appropriate Legal Notices, your
+ work need not make them do so.
+
+ A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+ 6. Conveying Non-Source Forms.
+
+ You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+ a) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by the
+ Corresponding Source fixed on a durable physical medium
+ customarily used for software interchange.
+
+ b) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by a
+ written offer, valid for at least three years and valid for as
+ long as you offer spare parts or customer support for that product
+ model, to give anyone who possesses the object code either (1) a
+ copy of the Corresponding Source for all the software in the
+ product that is covered by this License, on a durable physical
+ medium customarily used for software interchange, for a price no
+ more than your reasonable cost of physically performing this
+ conveying of source, or (2) access to copy the
+ Corresponding Source from a network server at no charge.
+
+ c) Convey individual copies of the object code with a copy of the
+ written offer to provide the Corresponding Source. This
+ alternative is allowed only occasionally and noncommercially, and
+ only if you received the object code with such an offer, in accord
+ with subsection 6b.
+
+ d) Convey the object code by offering access from a designated
+ place (gratis or for a charge), and offer equivalent access to the
+ Corresponding Source in the same way through the same place at no
+ further charge. You need not require recipients to copy the
+ Corresponding Source along with the object code. If the place to
+ copy the object code is a network server, the Corresponding Source
+ may be on a different server (operated by you or a third party)
+ that supports equivalent copying facilities, provided you maintain
+ clear directions next to the object code saying where to find the
+ Corresponding Source. Regardless of what server hosts the
+ Corresponding Source, you remain obligated to ensure that it is
+ available for as long as needed to satisfy these requirements.
+
+ e) Convey the object code using peer-to-peer transmission, provided
+ you inform other peers where the object code and Corresponding
+ Source of the work are being offered to the general public at no
+ charge under subsection 6d.
+
+ A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+ A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling. In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage. For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product. A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+ "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source. The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+ If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information. But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+ The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed. Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+ Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+ 7. Additional Terms.
+
+ "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law. If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+ When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it. (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.) You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+ Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+ a) Disclaiming warranty or limiting liability differently from the
+ terms of sections 15 and 16 of this License; or
+
+ b) Requiring preservation of specified reasonable legal notices or
+ author attributions in that material or in the Appropriate Legal
+ Notices displayed by works containing it; or
+
+ c) Prohibiting misrepresentation of the origin of that material, or
+ requiring that modified versions of such material be marked in
+ reasonable ways as different from the original version; or
+
+ d) Limiting the use for publicity purposes of names of licensors or
+ authors of the material; or
+
+ e) Declining to grant rights under trademark law for use of some
+ trade names, trademarks, or service marks; or
+
+ f) Requiring indemnification of licensors and authors of that
+ material by anyone who conveys the material (or modified versions of
+ it) with contractual assumptions of liability to the recipient, for
+ any liability that these contractual assumptions directly impose on
+ those licensors and authors.
+
+ All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10. If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term. If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+ If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+ Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+ 8. Termination.
+
+ You may not propagate or modify a covered work except as expressly
+provided under this License. Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+ However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+ Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+ Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+ 9. Acceptance Not Required for Having Copies.
+
+ You are not required to accept this License in order to receive or
+run a copy of the Program. Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance. However,
+nothing other than this License grants you permission to propagate or
+modify any covered work. These actions infringe copyright if you do
+not accept this License. Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+ 10. Automatic Licensing of Downstream Recipients.
+
+ Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License. You are not responsible
+for enforcing compliance by third parties with this License.
+
+ An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+ You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License. For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+ 11. Patents.
+
+ A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The
+work thus licensed is called the contributor's "contributor version".
+
+ A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version. For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+ In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement). To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+ If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients. "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+ If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+ A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License. You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+ Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+ 12. No Surrender of Others' Freedom.
+
+ If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all. For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+ 13. Use with the GNU Affero General Public License.
+
+ Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work. The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+ 14. Revised Versions of this License.
+
+ The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation. If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+ If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+ Later license versions may give you additional or different
+permissions. However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+ 15. Disclaimer of Warranty.
+
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. Limitation of Liability.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+ 17. Interpretation of Sections 15 and 16.
+
+ If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+ If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+ <program> Copyright (C) <year> <name of author>
+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+ You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<http://www.gnu.org/licenses/>.
+
+ The GNU General Public License does not permit incorporating your program
+into proprietary programs. If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License. But first, please read
+<http://www.gnu.org/philosophy/why-not-lgpl.html>.
diff --git a/ctdb/Makefile b/ctdb/Makefile
new file mode 100644
index 0000000..94113fe
--- /dev/null
+++ b/ctdb/Makefile
@@ -0,0 +1,80 @@
+# simple makefile wrapper to run waf
+
+WAF_BINARY=$(PYTHON) ../buildtools/bin/waf
+WAF=PYTHONHASHSEED=1 WAF_MAKE=1 $(WAF_BINARY)
+
+all:
+ $(WAF) build
+
+install:
+ $(WAF) install
+
+uninstall:
+ $(WAF) uninstall
+
+test: FORCE
+ $(WAF) test $(TEST_OPTIONS)
+
+testenv:
+ $(WAF) test --testenv $(TEST_OPTIONS)
+
+autotest:
+ $(WAF) autotest $(TEST_OPTIONS)
+
+quicktest:
+ $(WAF) test --quick $(TEST_OPTIONS)
+
+show_version:
+ @touch .tmplock
+ @WAFLOCK=.tmplock $(WAF) show_version
+
+manpages:
+ touch .tmplock
+ WAFLOCK=.tmplock $(WAF) manpages
+
+dist:
+ touch .tmplock
+ WAFLOCK=.tmplock $(WAF) dist
+
+distcheck:
+ touch .tmplock
+ WAFLOCK=.tmplock $(WAF) distcheck
+
+rpm:
+ touch .tmplock
+ WAFLOCK=.tmplock $(WAF) rpm
+
+clean:
+ $(WAF) clean
+
+distclean:
+ $(WAF) distclean
+
+reconfigure: configure
+ $(WAF) reconfigure
+
+show_waf_options:
+ $(WAF) --help
+
+# some compatibility make targets
+everything: all
+
+testsuite: all
+
+check: test
+
+torture: all
+
+# this should do an install as well, once install is finished
+installcheck: test
+
+etags:
+ $(WAF) etags
+
+ctags:
+ touch .tmplock
+ WAFLOCK=.tmplock $(WAF) ctags
+
+bin/%:: FORCE
+ $(WAF) --targets=`basename $@`
+FORCE:
diff --git a/ctdb/README b/ctdb/README
new file mode 100644
index 0000000..3099a6d
--- /dev/null
+++ b/ctdb/README
@@ -0,0 +1,8 @@
+This is the release version of CTDB, a clustered implementation of TDB
+database used by Samba and other projects to store temporary data.
+
+This software is freely distributable under the GNU public license,
+a copy of which you should have received with this software (in a file
+called COPYING).
+
+For documentation on CTDB, please visit CTDB website http://ctdb.samba.org.
diff --git a/ctdb/client/client.h b/ctdb/client/client.h
new file mode 100644
index 0000000..9d1fd48
--- /dev/null
+++ b/ctdb/client/client.h
@@ -0,0 +1,1416 @@
+/*
+ CTDB client code
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_CLIENT_H__
+#define __CTDB_CLIENT_H__
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "protocol/protocol.h"
+#include "common/srvid.h"
+
+/**
+ * @file client.h
+ *
+ * @brief Client api to talk to ctdb daemon
+ *
+ * This API allows one to connect to ctdb daemon, perform various database
+ * operations, send controls to ctdb daemon and send messages to other ctdb
+ * clients.
+ */
+
+/**
+ * @brief The abstract context that holds client connection to ctdb daemon
+ */
+struct ctdb_client_context;
+
+/**
+ * @brief The abstract context that holds a tunnel endpoint
+ */
+struct ctdb_tunnel_context;
+
+/**
+ * @brief The abstract context that represents a clustered database
+ */
+struct ctdb_db_context;
+
+/**
+ * @brief The abstract context that represents a record from a distributed
+ * database
+ */
+struct ctdb_record_handle;
+
+/**
+ * @brief The abstract context that represents a transaction on a replicated
+ * database
+ */
+struct ctdb_transaction_handle;
+
+/**
+ * @brief Client callback function
+ *
+ * This function can be registered to be invoked in case of ctdb daemon going
+ * away.
+ */
+typedef void (*ctdb_client_callback_func_t)(void *private_data);
+
+/**
+ * @brief Tunnel callback function
+ *
+ * This function is registered when a tunnel endpoint is set up. When the
+ * tunnel endpoint receives a message, this function is invoked.
+ */
+typedef void (*ctdb_tunnel_callback_func_t)(struct ctdb_tunnel_context *tctx,
+ uint32_t srcnode, uint32_t reqid,
+ uint8_t *buf, size_t buflen,
+ void *private_data);
+
+/**
+ * @brief Async computation start to initialize a connection to ctdb daemon
+ *
+ * This returns a ctdb client context. Freeing this context will free the
+ * connection to ctdb daemon and any memory associated with it.
+ *
+ * If the connection to ctdb daemon is lost, the client will terminate
+ * automatically as the library will call exit(). If the client code
+ * wants to perform cleanup or wants to re-establish a new connection,
+ * the client should register a disconnect callback function.
+ *
+ * @see ctdb_client_set_disconnect_callback
+ *
+ * When a disconnect callback function is registered, client library will
+ * not call exit(). It is the responsibility of the client code to take
+ * appropriate action.
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] sockpath Path to ctdb daemon unix domain socket
+ * @return new tevent request, NULL on failure
+ */
+struct tevent_req *ctdb_client_init_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ const char *sockpath);
+
+/**
+ * @brief Async computation end to initialize a connection to ctdb daemon
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @param[in] mem_ctx Talloc memory context
+ * @param[out] result The new ctdb client context
+ * @return true on success, false on failure
+ */
+bool ctdb_client_init_recv(struct tevent_req *req, int *perr,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_client_context **result);
+
+/**
+ * @brief Sync wrapper to initialize ctdb connection
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] sockpath Path to ctdb daemon unix domain socket
+ * @param[out] result The new ctdb client context
+ * @return 0 on success, errno on failure
+ */
+int ctdb_client_init(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ const char *sockpath,
+ struct ctdb_client_context **result);
+
+/**
+ * @brief Register a callback in case of client disconnection
+ *
+ * This allows client code to know if the connection to ctdb daemon is lost.
+ * This is useful if the client wants to re-establish a new connection to ctdb
+ * daemon.
+ *
+ * @param[in] client Client connection context
+ * @param[in] func Callback function
+ * @param[in] private_data private data for callback function
+ */
+void ctdb_client_set_disconnect_callback(struct ctdb_client_context *client,
+ ctdb_client_callback_func_t func,
+ void *private_data);
+
+/**
+ * @brief Get the node number of the current node
+ *
+ * @param[in] client Client connection context
+ * return node number on success, CTDB_UNKNOWN_PNN on error
+ */
+uint32_t ctdb_client_pnn(struct ctdb_client_context *client);
+
+/**
+ * @brief Client event loop waiting for a flag
+ *
+ * This can used to wait for asynchronous computations to complete.
+ * When this function is called, it will run tevent event loop and wait
+ * till the done flag is set to true. This function will block and will
+ * not return as long as the done flag is false.
+ *
+ * @param[in] ev Tevent context
+ * @param[in] done Boolean flag to indicate when to stop waiting
+ */
+void ctdb_client_wait(struct tevent_context *ev, bool *done);
+
+/**
+ * @brief Client event loop waiting for function to return true with timeout
+ *
+ * This can be used to wait for asynchronous computations to complete.
+ * When this function is called, it will run tevent event loop and wait
+ * till the done function returns true or if the timeout occurs.
+ *
+ * This function will return when either
+ * - done function returns true, or
+ * - timeout has occurred.
+ *
+ * @param[in] ev Tevent context
+ * @param[in] done_func Function flag to indicate when to stop waiting
+ * @param[in] private_data Passed to done function
+ * @param[in] timeout How long to wait
+ * @return 0 on success, ETIMEDOUT on timeout, and errno on failure
+ */
+int ctdb_client_wait_func_timeout(struct tevent_context *ev,
+ bool (*done_func)(void *private_data),
+ void *private_data,
+ struct timeval timeout);
+
+/**
+ * @brief Client event loop waiting for a flag with timeout
+ *
+ * This can be used to wait for asynchronous computations to complete.
+ * When this function is called, it will run tevent event loop and wait
+ * till the done flag is set to true or if the timeout occurs.
+ *
+ * This function will return when either
+ * - done flag is set to true, or
+ * - timeout has occurred.
+ *
+ * @param[in] ev Tevent context
+ * @param[in] done Boolean flag to indicate when to stop waiting
+ * @param[in] timeout How long to wait
+ * @return 0 on success, ETIMEDOUT on timeout, and errno on failure
+ */
+int ctdb_client_wait_timeout(struct tevent_context *ev, bool *done,
+ struct timeval timeout);
+
+/**
+ * @brief Async computation start to wait till recovery is completed
+ *
+ * CTDB daemon does not perform many operations while in recovery (especially
+ * database operations). This computation allows one to wait till ctdb daemon has
+ * finished recovery.
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] client Client connection context
+ * @return new tevent request, or NULL on failure
+ */
+struct tevent_req *ctdb_recovery_wait_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client);
+
+/**
+ * @brief Async computation end to wait till recovery is completed
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @return true on success, false on failure
+ */
+bool ctdb_recovery_wait_recv(struct tevent_req *req, int *perr);
+
+/**
+ * @brief Sync wrapper for ctdb_recovery_wait computation
+ *
+ * @param[in] ev Tevent context
+ * @param[in] client Client connection context
+ * @return true on success, false on failure
+ */
+bool ctdb_recovery_wait(struct tevent_context *ev,
+ struct ctdb_client_context *client);
+
+/**
+ * @brief Async computation start to migrate a database record
+ *
+ * This sends a request to ctdb daemon to migrate a database record to
+ * the local node. CTDB daemon will locate the data master for the record
+ * and will migrate record (and the data master) to the current node.
+ *
+ * @see ctdb_fetch_lock_send
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] client Client connection context
+ * @param[in] request CTDB request data
+ * @return a new tevent req, or NULL on failure
+ */
+struct tevent_req *ctdb_client_call_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct ctdb_req_call *request);
+
+/**
+ * @brief Async computation end to migrate a database record
+ *
+ * @param[in] req Tevent request
+ * @param[in] mem_ctx Talloc memory context
+ * @param[out] reply CTDB reply data
+ * @param[out] perr errno in case of failure
+ * @return true on success, false on failure
+ */
+bool ctdb_client_call_recv(struct tevent_req *req, TALLOC_CTX *mem_ctx,
+ struct ctdb_reply_call **reply, int *perr);
+
+
+/**
+ * @brief Async computation start to send a message to remote client(s)
+ *
+ * This sends a message to ctdb clients on a remote node. All the
+ * messages are associated with a specific SRVID. All the clients on the
+ * remote node listening to that SRVID, will get the message.
+ *
+ * Clients can register and deregister for messages for a SRVID using
+ * ctdb_client_set_message_handler() and ctdb_client_remove_message_handler().
+ *
+ * @see ctdb_client_set_message_handler_send,
+ * ctdb_client_remove_message_handler_send
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] client Client connection context
+ * @param[in] destnode Remote node id
+ * @param[in] message Message to send
+ * @return a new tevent req on success, NULL on failure
+ */
+struct tevent_req *ctdb_client_message_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t destnode,
+ struct ctdb_req_message *message);
+
+/**
+ * @brief Async computation end to send a message to remote client(s)
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @return true on success, false on failure
+ */
+bool ctdb_client_message_recv(struct tevent_req *req, int *perr);
+
+/**
+ * @brief Sync wrapper to send a message to client(s) on remote node
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] client Client connection context
+ * @param[in] destnode Node id
+ * @param[in] message Message to send
+ */
+int ctdb_client_message(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t destnode, struct ctdb_req_message *message);
+
+/**
+ * @brief Async computation start to send a message to multiple nodes
+ *
+ * This sends a message to ctdb clients on multiple remote nodes. All the
+ * messages are associated with a specific SRVID. All the clients on remote
+ * nodes listening to that SRVID, will get the message.
+ *
+ * Clients can register and deregister for messages for a SRVID using
+ * ctdb_client_set_message_handler() and ctdb_client_remove_message_handler().
+ *
+ * @see ctdb_client_set_message_handler_send,
+ * ctdb_client_remove_message_handler_send
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] client Client connection context
+ * @param[in] pnn_list List of node ids
+ * @param[in] count Number of node ids
+ * @param[in] message Message to send
+ * @return a new tevent req on success, NULL on failure
+ */
+struct tevent_req *ctdb_client_message_multi_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t *pnn_list, int count,
+ struct ctdb_req_message *message);
+
+/**
+ * @brief Async computation end to send a message to multiple nodes
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @param[in] mem_ctx Talloc memory context
+ * @param[out] perr_list The status from each node id
+ * @return true on success, false on failure
+ *
+ * If perr_list is not NULL, then the status (0 on success, errno on failure)
+ * of sending message to each of the node in the specified node list. The
+ * perr_list is an array of the same size as of pnn_list.
+ */
+bool ctdb_client_message_multi_recv(struct tevent_req *req, int *perr,
+ TALLOC_CTX *mem_ctx, int **perr_list);
+
+/**
+ * @brief Sync wrapper to send a message to multiple nodes
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] client Client connection context
+ * @param[in] pnn_list List of node ids
+ * @param[in] count Number of node ids
+ * @param[in] message Message to send
+ * @param[out] perr_list The status from each node id
+ * @return 0 on success, errno on failure
+ */
+int ctdb_client_message_multi(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t *pnn_list, int count,
+ struct ctdb_req_message *message,
+ int **perr_list);
+
+/**
+ * @brief Async computation start to receive messages for a SRVID
+ *
+ * This computation informs ctdb that the client is interested in all messages
+ * for a specific SRVID.
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] client Client connection context
+ * @param[in] srvid SRVID
+ * @param[in] handler Callback function to call when a message is received
+ * @param[in] private_data Private data for callback
+ * @return a new tevent req on success, NULL on failure
+ */
+struct tevent_req *ctdb_client_set_message_handler_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint64_t srvid,
+ srvid_handler_fn handler,
+ void *private_data);
+
+/**
+ * @brief Async computation end to receive messages for a SRVID
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @return true on success, false on failure
+ */
+bool ctdb_client_set_message_handler_recv(struct tevent_req *req, int *perr);
+
+/**
+ * Sync wrapper to receive messages for a SRVID
+ *
+ * @param[in] ev Tevent context
+ * @param[in] client Client connection context
+ * @param[in] srvid SRVID
+ * @param[in] handler Callback function to call when a message is received
+ * @param[in] private_data Private data for callback
+ * @return 0 on success, errno on failure
+ */
+int ctdb_client_set_message_handler(struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint64_t srvid, srvid_handler_fn handler,
+ void *private_data);
+
+/**
+ * @brief Async computation start to stop receiving messages for a SRVID
+ *
+ * This computation informs ctdb that the client is no longer interested in
+ * messages for a specific SRVID.
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] client Client connection context
+ * @param[in] srvid SRVID
+ * @param[in] private_data Private data used to register callback
+ * @return a new tevent req on success, NULL on failure
+ */
+struct tevent_req *ctdb_client_remove_message_handler_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint64_t srvid,
+ void *private_data);
+
+/**
+ * @brief Async computation end to stop receiving messages for a SRVID
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @return true on success, false on failure
+ */
+bool ctdb_client_remove_message_handler_recv(struct tevent_req *req,
+ int *perr);
+
+/**
+ * Sync wrapper to stop receiving messages for a SRVID
+ *
+ * @param[in] ev Tevent context
+ * @param[in] client Client connection context
+ * @param[in] srvid SRVID
+ * @param[in] private_data Private data used to register callback
+ * @return 0 on success, errno on failure
+ */
+int ctdb_client_remove_message_handler(struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint64_t srvid, void *private_data);
+
+/**
+ * @brief Async computation start to send a control to ctdb daemon
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] client Client connection context
+ * @param[in] destnode Node id
+ * @param[in] timeout How long to wait
+ * @param[in] request Control request
+ * @return a new tevent req on success, NULL on failure
+ */
+struct tevent_req *ctdb_client_control_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t destnode,
+ struct timeval timeout,
+ struct ctdb_req_control *request);
+
+/**
+ * @brief Async computation end to send a control to ctdb daemon
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @param[in] mem_ctx Talloc memory context
+ * @param[out] preply Control reply
+ * @return true on success, false on failure
+ */
+bool ctdb_client_control_recv(struct tevent_req *req, int *perr,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_reply_control **preply);
+
+/**
+ * @brief Sync wrapper to send a control to ctdb daemon
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] client Client connection context
+ * @param[in] destnode Node id
+ * @param[in] timeout How long to wait
+ * @param[in] request Control request
+ * @param[out] preply Control reply
+ * @return 0 on success, errno on failure
+ */
+int ctdb_client_control(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t destnode,
+ struct timeval timeout,
+ struct ctdb_req_control *request,
+ struct ctdb_reply_control **preply);
+
+/**
+ * @brief Async computation start to send a control to multiple nodes
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] client Client connection context
+ * @param[in] pnn_list List of node ids
+ * @param[in] count Number of node ids
+ * @param[in] timeout How long to wait
+ * @param[in] request Control request
+ * @return a new tevent req on success, NULL on failure
+ */
+struct tevent_req *ctdb_client_control_multi_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t *pnn_list, int count,
+ struct timeval timeout,
+ struct ctdb_req_control *request);
+
+/**
+ * @brief Async computation end to send a control to multiple nodes
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @param[in] mem_ctx Talloc memory context
+ * @param[out] perr_list Status from each node
+ * @param[out] preply Control reply from each node
+ * @return true on success, false on failure
+ */
+bool ctdb_client_control_multi_recv(struct tevent_req *req, int *perr,
+ TALLOC_CTX *mem_ctx, int **perr_list,
+ struct ctdb_reply_control ***preply);
+
+/**
+ * @brief Sync wrapper to send a control to multiple nodes
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] client Client connection context
+ * @param[in] pnn_list List of node ids
+ * @param[in] count Number of node ids
+ * @param[in] timeout How long to wait
+ * @param[in] request Control request
+ * @param[out] perr_list Status from each node
+ * @param[out] preply Control reply from each node
+ * @return 0 on success, errno on failure
+ */
+int ctdb_client_control_multi(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t *pnn_list, int count,
+ struct timeval timeout,
+ struct ctdb_req_control *request,
+ int **perr_list,
+ struct ctdb_reply_control ***preply);
+
+/**
+ * @brief Check err_list for errors
+ *
+ * This is a convenience function to parse the err_list returned from
+ * functions that send requests to multiple nodes.
+ *
+ * If status from any of the node is non-zero, then return first non-zero
+ * status.
+ *
+ * If status from all the nodes is 0, then return 0.
+ *
+ * @param[in] pnn_list List of node ids
+ * @param[in] count Number of node ids
+ * @param[in] err_list Status from each node
+ * @param[out] pnn Node id in case of failure
+ * @return 0 if no failures, status from first failure
+ */
+int ctdb_client_control_multi_error(uint32_t *pnn_list, int count,
+ int *err_list, uint32_t *pnn);
+
+/**
+ * @brief Async computation start to setup a tunnel endpoint
+ *
+ * This computation sets up a tunnel endpoint corresponding to a tunnel_id.
+ * A tunnel is a ctdb transport to deliver new protocol between endpoints.
+ *
+ * For two endpoints to communicate using new protocol,
+ * 1. Set up tunnel endpoints
+ * 2. Send requests
+ * 3. Send replies
+ * 4. Destroy tunnel endpoints
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] client Client connection context
+ * @param[in] tunnel_id Unique tunnel id
+ * @param[in] callback Callback function to call when a message is received
+ * @param[in] private_data Private data for callback
+ * @return a new tevent req on success, NULL on failure
+ */
+struct tevent_req *ctdb_tunnel_setup_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint64_t tunnel_id,
+ ctdb_tunnel_callback_func_t callback,
+ void *private_data);
+
+/**
+ * @brief Async computation end to setup a tunnel
+ *
+ * @param[in] req Tevent request
+ * @param[in] perr errno in case of failure
+ * @param[out] result A new tunnel context
+ * @return true on success, false on failure
+ *
+ * Tunnel context should never be freed by user.
+ */
+bool ctdb_tunnel_setup_recv(struct tevent_req *req, int *perr,
+ struct ctdb_tunnel_context **result);
+
+/**
+ * @brief Sync wrapper for ctdb_tunnel_setup computation
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] client Client connection context
+ * @param[in] tunnel_id Unique tunnel id
+ * @param[in] callback Callback function to call when a message is received
+ * @param[in] private_data Private data for callback
+ * @param[out] result A new tunnel context
+ * @return 0 on success, errno on failure
+ */
+int ctdb_tunnel_setup(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client, uint64_t tunnel_id,
+ ctdb_tunnel_callback_func_t callback, void *private_data,
+ struct ctdb_tunnel_context **result);
+
+/**
+ * @brief Async computation start to destroy a tunnel endpoint
+ *
+ * This computation destroys the tunnel endpoint.
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] tctx Tunnel context
+ * @return a new tevent req on success, NULL on failure
+ */
+struct tevent_req *ctdb_tunnel_destroy_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_tunnel_context *tctx);
+
+/**
+ * @brief Async computation end to destroy a tunnel endpoint
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @return true on success, false on failure
+ */
+bool ctdb_tunnel_destroy_recv(struct tevent_req *req, int *perr);
+
+/**
+ * @brief Sync wrapper for ctdb_tunnel_destroy computation
+ *
+ * @param[in] ev Tevent context
+ * @param[in] tctx Tunnel context
+ * @return 0 on success, errno on failure
+ */
+int ctdb_tunnel_destroy(struct tevent_context *ev,
+ struct ctdb_tunnel_context *tctx);
+
+/**
+ * @brief Async computation start to send a request via a tunnel
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] tctx Tunnel context
+ * @param[in] destnode PNN of destination
+ * @param[in] timeout How long to wait
+ * @param[in] buf Message to send
+ * @param[in] buflen Size of the message to send
+ * @param[in] wait_for_reply Whether to wait for reply
+ * @return a new tevent req on success, NULL on failure
+ */
+struct tevent_req *ctdb_tunnel_request_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_tunnel_context *tctx,
+ uint32_t destnode,
+ struct timeval timeout,
+ uint8_t *buf, size_t buflen,
+ bool wait_for_reply);
+
+/**
+ * @brief Async computation end to send a request via a tunnel
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @param[in] mem_ctx Talloc context
+ * @param[out] buf Reply data if expected
+ * @param[out] buflen Size of reply data if expected
+ * @return true on success, false on failure
+ */
+bool ctdb_tunnel_request_recv(struct tevent_req *req, int *perr,
+ TALLOC_CTX *mem_ctx, uint8_t **buf,
+ size_t *buflen);
+
+/**
+ * @brief Sync wrapper for ctdb_tunnel_request computation
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] tctx Tunnel context
+ * @param[in] destnode PNN of destination
+ * @param[in] timeout How long to wait
+ * @param[in] buf Message to send
+ * @param[in] buflen Size of the message to send
+ * @param[in] wait_for_reply Whether to wait for reply
+ * @return 0 on success, errno on failure
+ */
+int ctdb_tunnel_request(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_tunnel_context *tctx, uint32_t destnode,
+ struct timeval timeout, uint8_t *buf, size_t buflen,
+ bool wait_for_reply);
+
+/**
+ * @brief Async computation start to send a reply via a tunnel
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] tctx Tunnel context
+ * @param[in] destnode PNN of destination
+ * @param[in] reqid Request id
+ * @param[in] timeout How long to wait
+ * @param[in] buf Reply data
+ * @param[in] buflen Size of reply data
+ * @return a new tevent req on success, NULL on failure
+ */
+struct tevent_req *ctdb_tunnel_reply_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_tunnel_context *tctx,
+ uint32_t destnode, uint32_t reqid,
+ struct timeval timeout,
+ uint8_t *buf, size_t buflen);
+
+/**
+ * @brief Async computation end to send a reply via a tunnel
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @return true on success, false on failure
+ */
+bool ctdb_tunnel_reply_recv(struct tevent_req *req, int *perr);
+
+/**
+ * @brief Sync wrapper for ctdb_tunnel_reply computation
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] tctx Tunnel context
+ * @param[in] destnode PNN of destination
+ * @param[in] reqid Request id
+ * @param[in] timeout How long to wait
+ * @param[in] buf Reply data
+ * @param[in] buflen Size of reply data
+ * @return 0 on success, errno on failure
+ */
+int ctdb_tunnel_reply(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_tunnel_context *tctx, uint32_t destnode,
+ uint32_t reqid, struct timeval timeout,
+ uint8_t *buf, size_t buflen);
+
+/**
+ * @brief Async computation start to attach a database
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in[ client Client connection context
+ * @param[in] timeout How long to wait
+ * @param[in] db_name Name of the database
+ * @param[in] db_flags Database flags
+ * @return a new tevent req on success, NULL on failure
+ */
+struct tevent_req *ctdb_attach_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct timeval timeout,
+ const char *db_name, uint8_t db_flags);
+
+/**
+ * @brief Async computation end to attach a database
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @param[out] result New database context
+ * @return true on success, false on failure
+ */
+bool ctdb_attach_recv(struct tevent_req *req, int *perr,
+ struct ctdb_db_context **result);
+
+/**
+ * @brief Sync wrapper to attach a database
+ *
+ * @param[in] ev Tevent context
+ * @param[in[ client Client connection context
+ * @param[in] timeout How long to wait
+ * @param[in] db_name Name of the database
+ * @param[in] db_flags Database flags
+ * @param[out] result New database context
+ * @return 0 on success, errno on failure
+ */
+int ctdb_attach(struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct timeval timeout,
+ const char *db_name, uint8_t db_flags,
+ struct ctdb_db_context **result);
+
+/**
+ * @brief Async computation start to detach a database
+ *
+ * Only volatile databases can be detached at runtime.
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in[ client Client connection context
+ * @param[in] timeout How long to wait
+ * @param[in] db_id Database id
+ * @return a new tevent req on success, NULL on failure
+ */
+struct tevent_req *ctdb_detach_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct timeval timeout, uint32_t db_id);
+
+/**
+ * @brief Async computation end to detach a database
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @return true on success, false on failure
+ */
+bool ctdb_detach_recv(struct tevent_req *req, int *perr);
+
+/**
+ * @brief Sync wrapper to detach a database
+ *
+ * Only volatile databases can be detached at runtime.
+ *
+ * @param[in] ev Tevent context
+ * @param[in[ client Client connection context
+ * @param[in] timeout How long to wait
+ * @param[in] db_id Database id
+ * @return 0 on success, errno on failure
+ */
+int ctdb_detach(struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct timeval timeout, uint32_t db_id);
+
+
+/**
+ * @brief Get database id from database context
+ *
+ * @param[in] db Database context
+ * @return database id
+ */
+uint32_t ctdb_db_id(struct ctdb_db_context *db);
+
+/**
+ * @brief Traverse a database locally on the node
+ *
+ * This function traverses a database locally on the node and for each record
+ * calls the parser function. If the parser function returns 1, the traverse
+ * will terminate. If parser function returns 0, the traverse will continue
+ * till all records in database are parsed.
+ *
+ * This is useful for replicated databases, since each node has exactly the
+ * same records.
+ *
+ * @param[in] db Database context
+ * @param[in] readonly Is the traversal for reading or updating
+ * @param[in] extract_header Whether to extract ltdb header from record data
+ * @param[in] parser Record parsing function
+ * @param[in] private_data Private data for parser function
+ * @return 0 on success, non-zero return value from parser function
+ */
+int ctdb_db_traverse_local(struct ctdb_db_context *db, bool readonly,
+ bool extract_header,
+ ctdb_rec_parser_func_t parser, void *private_data);
+
+/**
+ * @brief Async computation start to a cluster-wide database traverse
+ *
+ * This function traverses a database on all the nodes and for each record
+ * calls the parser function. If the parser function returns 1, the traverse
+ * will terminate. If parser function returns 0, the traverse will continue
+ * till all records all on nodes are parsed.
+ *
+ * This is useful for distributed databases as the records are distributed
+ * among the cluster nodes.
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] client Client connection context
+ * @param[in] db Database context
+ * @param[in] destnode Node id
+ * @param[in] timeout How long to wait
+ * @param[in] parser Record parser function
+ * @param[in] private_data Private data for parser
+ * @return a new tevent req on success, NULL on failure
+ */
+struct tevent_req *ctdb_db_traverse_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct ctdb_db_context *db,
+ uint32_t destnode,
+ struct timeval timeout,
+ ctdb_rec_parser_func_t parser,
+ void *private_data);
+
+/**
+ * @brief Async computation end to a cluster-wide database traverse
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @return true on success, false on failure
+ */
+bool ctdb_db_traverse_recv(struct tevent_req *req, int *perr);
+
+/**
+ * @brief Sync wrapper for a cluster-wide database traverse
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] client Client connection context
+ * @param[in] db Database context
+ * @param[in] destnode Node id
+ * @param[in] timeout How long to wait
+ * @param[in] parser Record parser function
+ * @param[in] private_data Private data for parser
+ * @return 0 on success, errno on failure or non-zero status from parser
+ */
+int ctdb_db_traverse(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct ctdb_db_context *db,
+ uint32_t destnode, struct timeval timeout,
+ ctdb_rec_parser_func_t parser, void *private_data);
+
+/**
+ * @brief Fetch a record from a local database
+ *
+ * This function is primarily for internal use.
+ * Clients should use ctdb_fetch_lock() instead.
+ *
+ * @param[in] db Database context
+ * @param[in] key Record key
+ * @param[out] header Record header
+ * @param[in] mem_ctx Talloc memory context
+ * @param[out] data Record data
+ */
+int ctdb_ltdb_fetch(struct ctdb_db_context *db, TDB_DATA key,
+ struct ctdb_ltdb_header *header,
+ TALLOC_CTX *mem_ctx, TDB_DATA *data);
+
+/**
+ * @brief Async computation start to fetch a locked record
+ *
+ * This function is used to fetch a record from a distributed database.
+ *
+ * If the record is already available on the local node, then lock the
+ * record and return the record handle.
+ *
+ * If the record is not available on the local node, send a CTDB request to
+ * migrate the record. Once the record is migrated to the local node, lock
+ * the record and return the record handle.
+ *
+ * At the end of the computation, a record handle is returned which holds
+ * the record lock. When the record handle is freed, the record is unlocked.
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] client Client context
+ * @param[in] db Database context
+ * @param[in] key Record key
+ * @param[in] readonly Whether to request readonly copy of the record
+ * @return a new tevent req on success, NULL on failure
+ */
+struct tevent_req *ctdb_fetch_lock_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct ctdb_db_context *db,
+ TDB_DATA key, bool readonly);
+
+/**
+ * @brief Async computation end to fetch a locked record
+ *
+ * @param[in] req Tevent request
+ * @param[out] header Record header
+ * @param[in] mem_ctx Talloc memory context
+ * @param[out] data Record data
+ * @param[out] perr errno in case of failure
+ * @return a new record handle, NULL on failure
+ */
+struct ctdb_record_handle *ctdb_fetch_lock_recv(struct tevent_req *req,
+ struct ctdb_ltdb_header *header,
+ TALLOC_CTX *mem_ctx,
+ TDB_DATA *data, int *perr);
+
+/**
+ * @brief Sync wrapper to fetch a locked record
+ *
+ * @see ctdb_fetch_lock_send
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] client Client context
+ * @param[in] db Database context
+ * @param[in] key Record key
+ * @param[in] readonly Whether to request readonly copy of the record
+ * @param[out] header Record header
+ * @param[out] data Record data
+ * return 0 on success, errno on failure
+ */
+int ctdb_fetch_lock(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct ctdb_db_context *db, TDB_DATA key, bool readonly,
+ struct ctdb_record_handle **out,
+ struct ctdb_ltdb_header *header, TDB_DATA *data);
+
+/**
+ * @brief Update a locked record
+ *
+ * This function is used to update a record in a distributed database.
+ *
+ * This function should NOT be used to store null data, instead use
+ * ctdb_delete_record().
+ *
+ * @param[in] h Record handle
+ * @param[in] data New record data
+ * @return 0 on success, errno on failure
+ */
+int ctdb_store_record(struct ctdb_record_handle *h, TDB_DATA data);
+
+/**
+ * @brief Async computation start to delete a locked record
+ *
+ * This function is used to delete a record in a distributed database
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] h Record handle
+ * @return a new tevent req on success, NULL on failure
+ */
+struct tevent_req *ctdb_delete_record_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_record_handle *h);
+
+/**
+ * @brief Async computation end to delete a locked record
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @return true on success, false on failure
+ */
+bool ctdb_delete_record_recv(struct tevent_req *req, int *perr);
+
+/**
+ * @brief Sync wrapper to delete a locked record
+ *
+ * @see ctdb_delete_record_send
+ *
+ * @param[in] h Record handle
+ * @return 0 on success, errno on failure
+ */
+int ctdb_delete_record(struct ctdb_record_handle *h);
+
+/**
+ * @brief Async computation start to get a global database lock
+ *
+ * Functions related to global locks are primarily used internally for
+ * implementing transaction api.
+ *
+ * Clients should use transaction api directly.
+ * @see ctdb_transaction_start_send
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] client Client context
+ * @param[in] db Database context for g_lock.tdb
+ * @param[in] keyname Record key
+ * @param[in] sid Server id
+ * @param[in] readonly Lock type
+ * @return a new tevent req on success, NULL on failure
+ */
+struct tevent_req *ctdb_g_lock_lock_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct ctdb_db_context *db,
+ const char *keyname,
+ struct ctdb_server_id *sid,
+ bool readonly);
+
+/**
+ * @brief Async computation end to get a global database lock
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @return true on success, false on failure
+ */
+bool ctdb_g_lock_lock_recv(struct tevent_req *req, int *perr);
+
+/**
+ * @brief Async computation start to release a global database lock
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] client Client connection context
+ * @param[in] db Database context
+ * @param[in] keyname Record key
+ * @param[in] sid Server id
+ * @return a new tevent req on success, NULL on failure
+ */
+struct tevent_req *ctdb_g_lock_unlock_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct ctdb_db_context *db,
+ const char *keyname,
+ struct ctdb_server_id sid);
+
+/**
+ * @brief Async computation end to release a global database lock
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @return true on success, false on failure
+ */
+bool ctdb_g_lock_unlock_recv(struct tevent_req *req, int *perr);
+
+/**
+ * @brief Async computation start to start a transaction
+ *
+ * This function is used to start a transaction on a replicated database.
+ *
+ * To perform any updates on a replicated database
+ * - start transaction
+ * - fetch record (ctdb_transaction_fetch_record)
+ * - store record (ctdb_transaction_store_record)
+ * - delete record (ctdb_transaction_delete_record)
+ * - commit transaction (ctdb_transaction_commit_send), or
+ * - cancel transaction (ctdb_transaction_cancel_send)
+ *
+ * Starting a transaction will return a transaction handle. This is used
+ * for updating records under a transaction. This handle is automatically
+ * freed once the transaction is committed or cancelled.
+ *
+ * Clients should NOT free the transaction handle.
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] client Client connection context
+ * @param[in] timeout How long to wait
+ * @param[in] db Database context
+ * @param[in] readonly Is transaction readonly
+ * @return a new tevent req on success, NULL on failure
+ */
+struct tevent_req *ctdb_transaction_start_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct timeval timeout,
+ struct ctdb_db_context *db,
+ bool readonly);
+
+/**
+ * @brief Async computation end to start a transaction
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @return a new transaction handle on success, NULL on failure
+ */
+struct ctdb_transaction_handle *ctdb_transaction_start_recv(
+ struct tevent_req *req,
+ int *perr);
+
+/**
+ * @brief Sync wrapper to start a transaction
+ *
+ * @see ctdb_transaction_start_send
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] client Client connection context
+ * @param[in] timeout How long to wait
+ * @param[in] db Database context
+ * @param[in] readonly Is transaction readonly
+ * @param[out] result a new transaction handle
+ * @return 0 on success, errno on failure
+ */
+int ctdb_transaction_start(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct timeval timeout,
+ struct ctdb_db_context *db, bool readonly,
+ struct ctdb_transaction_handle **result);
+
+/**
+ * @brief Fetch a record under a transaction
+ *
+ * @see ctdb_transaction_start_send
+ *
+ * @param[in] h Transaction handle
+ * @param[in] key Record key
+ * @param[in] mem_ctx Talloc memory context
+ * @param[out] data Record data
+ * @return 0 on success, errno on failure
+ */
+int ctdb_transaction_fetch_record(struct ctdb_transaction_handle *h,
+ TDB_DATA key,
+ TALLOC_CTX *mem_ctx, TDB_DATA *data);
+
+/**
+ * @brief Store a record under a transaction
+ *
+ * @see ctdb_transaction_start_send
+ *
+ * @param[in] h Transaction handle
+ * @param[in] key Record key
+ * @param[in] data New record data
+ * @return 0 on success, errno on failure
+ */
+int ctdb_transaction_store_record(struct ctdb_transaction_handle *h,
+ TDB_DATA key, TDB_DATA data);
+
+/**
+ * @brief Delete a record under a transaction
+ *
+ * @see ctdb_transaction_start_send
+ *
+ * @param[in] h Transaction handle
+ * @param[in] key Record key
+ * @return 0 on success, errno on failure
+ */
+int ctdb_transaction_delete_record(struct ctdb_transaction_handle *h,
+ TDB_DATA key);
+
+/**
+ * @brief Async computation start to commit a transaction
+ *
+ * @see ctdb_transaction_start_send
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] timeout How long to wait
+ * @param[in] h Transaction handle
+ * @return a new tevent req on success, NULL on failure
+ */
+struct tevent_req *ctdb_transaction_commit_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct timeval timeout,
+ struct ctdb_transaction_handle *h);
+
+/**
+ * @brief Async computation end to commit a transaction
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @return true on success, false on failure
+ */
+bool ctdb_transaction_commit_recv(struct tevent_req *req, int *perr);
+
+/**
+ * @brief Sync wrapper to commit a transaction
+ *
+ * @see ctdb_transaction_commit_send
+ *
+ * @param[in] h Transaction handle
+ * @return 0 on success, errno on failure
+ */
+int ctdb_transaction_commit(struct ctdb_transaction_handle *h);
+
+/**
+ * @brief Async computation start to cancel a transaction
+ *
+ * @see ctdb_transaction_start_send
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] timeout How long to wait
+ * @param[in] h Transaction handle
+ * @return a new tevent req on success, NULL on failure
+ */
+struct tevent_req *ctdb_transaction_cancel_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct timeval timeout,
+ struct ctdb_transaction_handle *h);
+
+/**
+ * @brief Async computation end to cancel a transaction
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @return true on success, false on failure
+ */
+bool ctdb_transaction_cancel_recv(struct tevent_req *req, int *perr);
+
+/**
+ * @brief Sync wrapper to cancel a transaction
+ *
+ * @see ctdb_transaction_cancel_send
+ *
+ * @param[in] h Transaction handle
+ * @return 0 on success, errno on failure
+ */
+int ctdb_transaction_cancel(struct ctdb_transaction_handle *h);
+
+/**
+ * @brief Utility function to extract a list of node ids from nodemap
+ *
+ * @param[in] nodemap Node map
+ * @param[in] flags_mask Flags to match on
+ * @param[in] exclude_pnn Node id to exclude from the list
+ * @param[in] mem_ctx Talloc memory context
+ * @param[out] pnn_list List of node ids
+ * @return number of node ids on success, -1 on failure
+ */
+int list_of_nodes(struct ctdb_node_map *nodemap,
+ uint32_t flags_mask, uint32_t exclude_pnn,
+ TALLOC_CTX *mem_ctx, uint32_t **pnn_list);
+
+/**
+ * @brief Utility function to extract a list of node ids for active nodes
+ *
+ * @param[in] nodemap Node map
+ * @param[in] exclude_pnn Node id to exclude from the list
+ * @param[in] mem_ctx Talloc memory context
+ * @param[out] pnn_list List of node ids
+ * @return number of node ids on success, -1 on failure
+ */
+int list_of_active_nodes(struct ctdb_node_map *nodemap, uint32_t exclude_pnn,
+ TALLOC_CTX *mem_ctx, uint32_t **pnn_list);
+
+/**
+ * @brief Utility function to extract a list of node ids for connected nodes
+ *
+ * @param[in] nodemap Node map
+ * @param[in] exclude_pnn Node id to exclude from the list
+ * @param[in] mem_ctx Talloc memory context
+ * @param[out] pnn_list List of node ids
+ * @return number of node ids on success, -1 on failure
+ */
+int list_of_connected_nodes(struct ctdb_node_map *nodemap,
+ uint32_t exclude_pnn,
+ TALLOC_CTX *mem_ctx, uint32_t **pnn_list);
+
+/**
+ * @brief Construct a new server id
+ *
+ * @param[in] client Client connection context
+ * @param[in] task_id Task id
+ * @return a new server id
+ */
+struct ctdb_server_id ctdb_client_get_server_id(
+ struct ctdb_client_context *client,
+ uint32_t task_id);
+
+/**
+ * @brief Check if two server ids are the same
+ *
+ * @param[in] sid1 Server id 1
+ * @param[in] sid2 Server id 2
+ * @return true if the server ids are same, false otherwise
+ */
+bool ctdb_server_id_equal(struct ctdb_server_id *sid1,
+ struct ctdb_server_id *sid2);
+
+/**
+ * @brief Check if the process with server id exists
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] client Client connection context
+ * @param[in] sid Server id
+ * @param[out] exists Boolean flag to indicate if the process exists
+ * @return 0 on success, errno on failure
+ */
+int ctdb_server_id_exists(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct ctdb_server_id *sid, bool *exists);
+
+#endif /* __CTDB_CLIENT_H__ */
diff --git a/ctdb/client/client_call.c b/ctdb/client/client_call.c
new file mode 100644
index 0000000..088ba67
--- /dev/null
+++ b/ctdb/client/client_call.c
@@ -0,0 +1,184 @@
+/*
+ CTDB client code
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+
+#include <talloc.h>
+#include <tevent.h>
+#include <tdb.h>
+
+#include "lib/util/tevent_unix.h"
+
+#include "common/reqid.h"
+#include "common/srvid.h"
+#include "common/comm.h"
+
+#include "protocol/protocol.h"
+#include "protocol/protocol_api.h"
+
+#include "client/client_private.h"
+#include "client/client.h"
+
+
+/*
+ * Handle REQ_CALL and REPLY_CALL
+ */
+
+struct ctdb_client_call_state {
+ struct ctdb_client_context *client;
+ uint32_t reqid;
+ struct ctdb_reply_call *reply;
+ struct tevent_req *req;
+};
+
+static int ctdb_client_call_state_destructor(
+ struct ctdb_client_call_state *state);
+static void ctdb_client_call_done(struct tevent_req *subreq);
+
+struct tevent_req *ctdb_client_call_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct ctdb_req_call *request)
+{
+ struct ctdb_req_header h;
+ struct tevent_req *req, *subreq;
+ struct ctdb_client_call_state *state;
+ uint32_t reqid;
+ uint8_t *buf;
+ size_t datalen, buflen;
+ int ret;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct ctdb_client_call_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ reqid = reqid_new(client->idr, state);
+ if (reqid == REQID_INVALID) {
+ talloc_free(req);
+ return NULL;
+ }
+
+ state->client = client;
+ state->reqid = reqid;
+ state->req = req;
+ state->reply = talloc_zero(state, struct ctdb_reply_call);
+ if (tevent_req_nomem(state->reply, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ talloc_set_destructor(state, ctdb_client_call_state_destructor);
+
+ ctdb_req_header_fill(&h, 0, CTDB_REQ_CALL, CTDB_CURRENT_NODE,
+ client->pnn, reqid);
+
+ datalen = ctdb_req_call_len(&h, request);
+ ret = ctdb_allocate_pkt(state, datalen, &buf, &buflen);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ ret = ctdb_req_call_push(&h, request, buf, &buflen);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ subreq = comm_write_send(state, ev, client->comm, buf, buflen);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, ctdb_client_call_done, req);
+
+ return req;
+}
+
+static int ctdb_client_call_state_destructor(
+ struct ctdb_client_call_state *state)
+{
+ reqid_remove(state->client->idr, state->reqid);
+ return 0;
+}
+
+static void ctdb_client_call_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ bool status;
+ int ret;
+
+ status = comm_write_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ /* wait for the reply */
+}
+
+void ctdb_client_reply_call(struct ctdb_client_context *client,
+ uint8_t *buf, size_t buflen, uint32_t reqid)
+{
+ struct ctdb_req_header h;
+ struct ctdb_client_call_state *state;
+ int ret;
+
+ state = reqid_find(client->idr, reqid, struct ctdb_client_call_state);
+ if (state == NULL) {
+ return;
+ }
+
+ if (reqid != state->reqid) {
+ return;
+ }
+
+ ret = ctdb_reply_call_pull(buf, buflen, &h, state, state->reply);
+ if (ret != 0) {
+ tevent_req_error(state->req, ret);
+ return;
+ }
+
+ tevent_req_done(state->req);
+}
+
+bool ctdb_client_call_recv(struct tevent_req *req, TALLOC_CTX *mem_ctx,
+ struct ctdb_reply_call **reply, int *perr)
+{
+ struct ctdb_client_call_state *state = tevent_req_data(
+ req, struct ctdb_client_call_state);
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+
+ if (reply != NULL) {
+ *reply = talloc_steal(mem_ctx, state->reply);
+ }
+
+ return true;
+}
diff --git a/ctdb/client/client_connect.c b/ctdb/client/client_connect.c
new file mode 100644
index 0000000..a942871
--- /dev/null
+++ b/ctdb/client/client_connect.c
@@ -0,0 +1,532 @@
+/*
+ CTDB client code
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+
+#include <talloc.h>
+#include <tevent.h>
+#include <tdb.h>
+
+#include "common/reqid.h"
+#include "common/srvid.h"
+#include "common/comm.h"
+#include "common/logging.h"
+
+#include "lib/util/tevent_unix.h"
+#include "lib/util/debug.h"
+
+#include "protocol/protocol.h"
+#include "protocol/protocol_api.h"
+
+#include "client/client_private.h"
+#include "client/client.h"
+#include "client/client_sync.h"
+
+static void client_read_handler(uint8_t *buf, size_t buflen,
+ void *private_data);
+static void client_dead_handler(void *private_data);
+
+struct ctdb_client_init_state {
+ struct ctdb_client_context *client;
+};
+
+static int ctdb_client_context_destructor(struct ctdb_client_context *client);
+static void ctdb_client_init_done(struct tevent_req *subreq);
+
+struct tevent_req *ctdb_client_init_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ const char *sockpath)
+{
+ struct tevent_req *req, *subreq;
+ struct ctdb_client_init_state *state;
+ struct ctdb_client_context *client;
+ struct ctdb_req_control request;
+ struct sockaddr_un addr;
+ size_t len;
+ int ret;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct ctdb_client_init_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ if (sockpath == NULL) {
+ D_ERR("socket path cannot be NULL\n");
+ tevent_req_error(req, EINVAL);
+ return tevent_req_post(req, ev);
+ }
+
+ client = talloc_zero(state, struct ctdb_client_context);
+ if (tevent_req_nomem(client, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ ret = reqid_init(client, INT_MAX-200, &client->idr);
+ if (ret != 0) {
+ D_ERR("reqid_init() failed, ret=%d\n", ret);
+ talloc_free(client);
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ ret = srvid_init(client, &client->srv);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("srvid_init() failed, ret=%d\n", ret));
+ talloc_free(client);
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ ret = srvid_init(client, &client->tunnels);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("srvid_init() failed, ret=%d\n", ret));
+ talloc_free(client);
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+ len = strlcpy(addr.sun_path, sockpath, sizeof(addr.sun_path));
+ if (len != strlen(sockpath)) {
+ D_ERR("socket path too long, len=%zu\n", strlen(sockpath));
+ talloc_free(client);
+ tevent_req_error(req, ENAMETOOLONG);
+ return tevent_req_post(req, ev);
+ }
+
+ client->fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (client->fd == -1) {
+ ret = errno;
+ D_ERR("socket() failed, errno=%d\n", ret);
+ talloc_free(client);
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ ret = connect(client->fd, (struct sockaddr *)&addr, sizeof(addr));
+ if (ret == -1) {
+ ret = errno;
+ DEBUG(DEBUG_ERR, ("connect() failed, errno=%d\n", ret));
+ close(client->fd);
+ talloc_free(client);
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ ret = comm_setup(client, ev, client->fd, client_read_handler, client,
+ client_dead_handler, client, &client->comm);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("comm_setup() failed, ret=%d\n", ret));
+ close(client->fd);
+ talloc_free(client);
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ client->pnn = CTDB_UNKNOWN_PNN;
+
+ talloc_set_destructor(client, ctdb_client_context_destructor);
+
+ state->client = client;
+
+ ctdb_req_control_get_pnn(&request);
+ subreq = ctdb_client_control_send(state, ev, client,
+ CTDB_CURRENT_NODE,
+ tevent_timeval_zero(),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ TALLOC_FREE(state->client);
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, ctdb_client_init_done, req);
+
+ return req;
+}
+
+static int ctdb_client_context_destructor(struct ctdb_client_context *client)
+{
+ if (client->fd != -1) {
+ close(client->fd);
+ client->fd = -1;
+ }
+ return 0;
+}
+
+static void ctdb_client_init_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_client_init_state *state = tevent_req_data(
+ req, struct ctdb_client_init_state);
+ struct ctdb_reply_control *reply;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_reply_control_get_pnn(reply, &state->client->pnn);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+bool ctdb_client_init_recv(struct tevent_req *req, int *perr,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_client_context **result)
+{
+ struct ctdb_client_init_state *state = tevent_req_data(
+ req, struct ctdb_client_init_state);
+ int ret;
+
+ if (tevent_req_is_unix_error(req, &ret)) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+
+ *result = talloc_steal(mem_ctx, state->client);
+ return true;
+}
+
+
+int ctdb_client_init(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ const char *sockpath, struct ctdb_client_context **out)
+{
+ struct tevent_req *req;
+ int ret;
+ bool status;
+
+ req = ctdb_client_init_send(mem_ctx, ev, sockpath);
+ if (req == NULL) {
+ return ENOMEM;
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = ctdb_client_init_recv(req, &ret, mem_ctx, out);
+ TALLOC_FREE(req);
+ if (! status) {
+ return ret;
+ }
+
+ return 0;
+}
+
+static void client_read_handler(uint8_t *buf, size_t buflen,
+ void *private_data)
+{
+ struct ctdb_client_context *client = talloc_get_type_abort(
+ private_data, struct ctdb_client_context);
+ struct ctdb_req_header hdr;
+ size_t np;
+ int ret;
+
+ ret = ctdb_req_header_pull(buf, buflen, &hdr, &np);
+ if (ret != 0) {
+ DEBUG(DEBUG_WARNING, ("invalid header, ret=%d\n", ret));
+ return;
+ }
+
+ if (buflen != hdr.length) {
+ DEBUG(DEBUG_WARNING, ("packet size mismatch %zu != %d\n",
+ buflen, hdr.length));
+ return;
+ }
+
+ ret = ctdb_req_header_verify(&hdr, 0);
+ if (ret != 0) {
+ DEBUG(DEBUG_WARNING, ("invalid header, ret=%d\n", ret));
+ return;
+ }
+
+ switch (hdr.operation) {
+ case CTDB_REPLY_CALL:
+ ctdb_client_reply_call(client, buf, buflen, hdr.reqid);
+ break;
+
+ case CTDB_REQ_MESSAGE:
+ ctdb_client_req_message(client, buf, buflen, hdr.reqid);
+ break;
+
+ case CTDB_REPLY_CONTROL:
+ ctdb_client_reply_control(client, buf, buflen, hdr.reqid);
+ break;
+
+ case CTDB_REQ_TUNNEL:
+ ctdb_client_req_tunnel(client, buf, buflen, hdr.reqid);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void client_dead_handler(void *private_data)
+{
+ struct ctdb_client_context *client = talloc_get_type_abort(
+ private_data, struct ctdb_client_context);
+ ctdb_client_callback_func_t callback = client->callback;
+ void *callback_data = client->private_data;
+
+ if (callback != NULL) {
+ callback(callback_data);
+ return;
+ }
+
+ DEBUG(DEBUG_NOTICE, ("connection to daemon closed, exiting\n"));
+ exit(1);
+}
+
+void ctdb_client_set_disconnect_callback(struct ctdb_client_context *client,
+ ctdb_client_callback_func_t callback,
+ void *private_data)
+{
+ client->callback = callback;
+ client->private_data = private_data;
+}
+
+uint32_t ctdb_client_pnn(struct ctdb_client_context *client)
+{
+ return client->pnn;
+}
+
+void ctdb_client_wait(struct tevent_context *ev, bool *done)
+{
+ while (! (*done)) {
+ tevent_loop_once(ev);
+ }
+}
+
+static void ctdb_client_wait_timeout_handler(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t,
+ void *private_data)
+{
+ bool *timed_out = (bool *)private_data;
+
+ *timed_out = true;
+}
+
+int ctdb_client_wait_func_timeout(struct tevent_context *ev,
+ bool (*done_func)(void *private_data),
+ void *private_data,
+ struct timeval timeout)
+{
+ TALLOC_CTX *mem_ctx;
+ struct tevent_timer *timer;
+ bool timed_out = false;
+
+ mem_ctx = talloc_new(ev);
+ if (mem_ctx == NULL) {
+ return ENOMEM;
+ }
+
+ timer = tevent_add_timer(ev, mem_ctx, timeout,
+ ctdb_client_wait_timeout_handler,
+ &timed_out);
+ if (timer == NULL) {
+ talloc_free(mem_ctx);
+ return ENOMEM;
+ }
+
+ while (! (done_func(private_data)) && ! timed_out) {
+ tevent_loop_once(ev);
+ }
+
+ talloc_free(mem_ctx);
+
+ if (timed_out) {
+ return ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static bool client_wait_done(void *private_data)
+{
+ bool *done = (bool *)private_data;
+
+ return *done;
+}
+
+int ctdb_client_wait_timeout(struct tevent_context *ev,
+ bool *done,
+ struct timeval timeout)
+
+{
+ int ret;
+
+ ret = ctdb_client_wait_func_timeout(ev,
+ client_wait_done,
+ done,
+ timeout);
+
+ return ret;
+}
+
+struct ctdb_recovery_wait_state {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+};
+
+static void ctdb_recovery_wait_recmode(struct tevent_req *subreq);
+static void ctdb_recovery_wait_retry(struct tevent_req *subreq);
+
+struct tevent_req *ctdb_recovery_wait_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client)
+{
+ struct tevent_req *req, *subreq;
+ struct ctdb_recovery_wait_state *state;
+ struct ctdb_req_control request;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct ctdb_recovery_wait_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->client = client;
+
+ ctdb_req_control_get_recmode(&request);
+ subreq = ctdb_client_control_send(state, ev, client, client->pnn,
+ tevent_timeval_zero(), &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, ctdb_recovery_wait_recmode, req);
+
+ return req;
+}
+
+static void ctdb_recovery_wait_recmode(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_recovery_wait_state *state = tevent_req_data(
+ req, struct ctdb_recovery_wait_state);
+ struct ctdb_reply_control *reply;
+ int recmode;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_reply_control_get_recmode(reply, &recmode);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ if (recmode == CTDB_RECOVERY_NORMAL) {
+ tevent_req_done(req);
+ return;
+ }
+
+ subreq = tevent_wakeup_send(state, state->ev,
+ tevent_timeval_current_ofs(1, 0));
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, ctdb_recovery_wait_retry, req);
+}
+
+static void ctdb_recovery_wait_retry(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_recovery_wait_state *state = tevent_req_data(
+ req, struct ctdb_recovery_wait_state);
+ struct ctdb_req_control request;
+ bool status;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ENOMEM);
+ return;
+ }
+
+ ctdb_req_control_get_recmode(&request);
+ subreq = ctdb_client_control_send(state, state->ev, state->client,
+ state->client->pnn,
+ tevent_timeval_zero(), &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, ctdb_recovery_wait_recmode, req);
+}
+
+bool ctdb_recovery_wait_recv(struct tevent_req *req, int *perr)
+{
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+bool ctdb_recovery_wait(struct tevent_context *ev,
+ struct ctdb_client_context *client)
+{
+ TALLOC_CTX *mem_ctx;
+ struct tevent_req *req;
+ bool status;
+
+ mem_ctx = talloc_new(client);
+ if (mem_ctx == NULL) {
+ return false;
+ }
+
+ req = ctdb_recovery_wait_send(mem_ctx, ev, client);
+ if (req == NULL) {
+ return false;
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = ctdb_recovery_wait_recv(req, NULL);
+
+ talloc_free(mem_ctx);
+ return status;
+}
diff --git a/ctdb/client/client_control.c b/ctdb/client/client_control.c
new file mode 100644
index 0000000..ab0aac8
--- /dev/null
+++ b/ctdb/client/client_control.c
@@ -0,0 +1,439 @@
+/*
+ CTDB client code
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+
+#include <talloc.h>
+#include <tevent.h>
+#include <tdb.h>
+
+#include "lib/util/tevent_unix.h"
+
+#include "common/reqid.h"
+#include "common/srvid.h"
+#include "common/comm.h"
+
+#include "protocol/protocol.h"
+#include "protocol/protocol_api.h"
+
+#include "client/client_private.h"
+#include "client/client.h"
+
+
+/*
+ * Handle REQ_CONTROL and REPLY_CONTROL
+ */
+
+struct ctdb_client_control_state {
+ struct ctdb_client_context *client;
+ uint32_t opcode;
+ uint32_t flags;
+ uint32_t reqid;
+ struct ctdb_reply_control *reply;
+ struct tevent_req *req;
+};
+
+static int ctdb_client_control_state_destructor(
+ struct ctdb_client_control_state *state);
+static void ctdb_client_control_done(struct tevent_req *subreq);
+
+struct tevent_req *ctdb_client_control_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t destnode,
+ struct timeval timeout,
+ struct ctdb_req_control *request)
+{
+ struct ctdb_req_header h;
+ struct tevent_req *req, *subreq;
+ struct ctdb_client_control_state *state;
+ uint32_t reqid;
+ uint8_t *buf;
+ size_t datalen, buflen;
+ int ret;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct ctdb_client_control_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ reqid = reqid_new(client->idr, state);
+ if (reqid == REQID_INVALID) {
+ talloc_free(req);
+ return NULL;
+ }
+
+ state->client = client;
+ state->flags = request->flags;
+ state->opcode = request->opcode;
+ state->reqid = reqid;
+ state->req = req;
+ state->reply = talloc_zero(state, struct ctdb_reply_control);
+ if (tevent_req_nomem(state->reply, req)) {
+ return tevent_req_post(req, ev);
+ }
+ state->reply->rdata.opcode = request->rdata.opcode;
+
+ talloc_set_destructor(state, ctdb_client_control_state_destructor);
+
+ ctdb_req_header_fill(&h, 0, CTDB_REQ_CONTROL, destnode,
+ client->pnn, reqid);
+
+ datalen = ctdb_req_control_len(&h, request);
+ ret = ctdb_allocate_pkt(state, datalen, &buf, &buflen);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ ret = ctdb_req_control_push(&h, request, buf, &buflen);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ if (!tevent_timeval_is_zero(&timeout)) {
+ if (!tevent_req_set_endtime(req, ev, timeout)) {
+ return tevent_req_post(req, ev);
+ }
+ }
+
+ subreq = comm_write_send(state, ev, client->comm, buf, buflen);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, ctdb_client_control_done, req);
+
+ return req;
+}
+
+static int ctdb_client_control_state_destructor(
+ struct ctdb_client_control_state *state)
+{
+ reqid_remove(state->client->idr, state->reqid);
+ return 0;
+}
+
+static void ctdb_client_control_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_client_control_state *state = tevent_req_data(
+ req, struct ctdb_client_control_state);
+ bool status;
+ int ret;
+
+ status = comm_write_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ /* Daemon will not reply, so we set status to 0 */
+ if (state->flags & CTDB_CTRL_FLAG_NOREPLY) {
+ state->reply->status = 0;
+ tevent_req_done(req);
+ }
+
+ /* wait for the reply or timeout */
+}
+
+void ctdb_client_reply_control(struct ctdb_client_context *client,
+ uint8_t *buf, size_t buflen, uint32_t reqid)
+{
+ struct ctdb_req_header h;
+ struct ctdb_client_control_state *state;
+ int ret;
+
+ state = reqid_find(client->idr, reqid,
+ struct ctdb_client_control_state);
+ if (state == NULL) {
+ return;
+ }
+
+ if (reqid != state->reqid) {
+ return;
+ }
+
+ ret = ctdb_reply_control_pull(buf, buflen, state->opcode, &h,
+ state->reply, state->reply);
+ if (ret != 0) {
+ tevent_req_error(state->req, ret);
+ return;
+ }
+
+ tevent_req_done(state->req);
+}
+
+bool ctdb_client_control_recv(struct tevent_req *req, int *perr,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_reply_control **reply)
+{
+ struct ctdb_client_control_state *state = tevent_req_data(
+ req, struct ctdb_client_control_state);
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+
+ if (reply != NULL) {
+ *reply = talloc_steal(mem_ctx, state->reply);
+ }
+
+ return true;
+}
+
+/*
+ * Handle multiple nodes - there cannot be any return data
+ */
+
+struct ctdb_client_control_multi_state {
+ uint32_t *pnn_list;
+ int count;
+ int done;
+ int err;
+ int *err_list;
+ struct ctdb_reply_control **reply;
+};
+
+struct control_index_state {
+ struct tevent_req *req;
+ int index;
+};
+
+static void ctdb_client_control_multi_done(struct tevent_req *subreq);
+
+struct tevent_req *ctdb_client_control_multi_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t *pnn_list, int count,
+ struct timeval timeout,
+ struct ctdb_req_control *request)
+{
+ struct tevent_req *req, *subreq;
+ struct ctdb_client_control_multi_state *state;
+ int i;
+
+ if (pnn_list == NULL || count == 0) {
+ return NULL;
+ }
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct ctdb_client_control_multi_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->pnn_list = pnn_list;
+ state->count = count;
+ state->done = 0;
+ state->err = 0;
+ state->err_list = talloc_zero_array(state, int, count);
+ if (tevent_req_nomem(state->err_list, req)) {
+ return tevent_req_post(req, ev);
+ }
+ state->reply = talloc_zero_array(state, struct ctdb_reply_control *,
+ count);
+ if (tevent_req_nomem(state->reply, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ for (i=0; i<count; i++) {
+ struct control_index_state *substate;
+
+ subreq = ctdb_client_control_send(state, ev, client,
+ pnn_list[i], timeout,
+ request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ substate = talloc(subreq, struct control_index_state);
+ if (tevent_req_nomem(substate, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ substate->req = req;
+ substate->index = i;
+
+ tevent_req_set_callback(subreq, ctdb_client_control_multi_done,
+ substate);
+ }
+
+ return req;
+}
+
+static void ctdb_client_control_multi_done(struct tevent_req *subreq)
+{
+ struct control_index_state *substate = tevent_req_callback_data(
+ subreq, struct control_index_state);
+ struct tevent_req *req = substate->req;
+ int idx = substate->index;
+ struct ctdb_client_control_multi_state *state = tevent_req_data(
+ req, struct ctdb_client_control_multi_state);
+ bool status;
+ int ret;
+
+ status = ctdb_client_control_recv(subreq, &ret, state->reply,
+ &state->reply[idx]);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ if (state->err == 0) {
+ state->err = ret;
+ state->err_list[idx] = state->err;
+ }
+ } else {
+ if (state->reply[idx]->status != 0) {
+ if (state->err == 0) {
+ state->err = state->reply[idx]->status;
+ state->err_list[idx] = state->err;
+ }
+ }
+ }
+
+ state->done += 1;
+
+ if (state->done == state->count) {
+ tevent_req_done(req);
+ }
+}
+
+bool ctdb_client_control_multi_recv(struct tevent_req *req, int *perr,
+ TALLOC_CTX *mem_ctx, int **perr_list,
+ struct ctdb_reply_control ***preply)
+{
+ struct ctdb_client_control_multi_state *state = tevent_req_data(
+ req, struct ctdb_client_control_multi_state);
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ if (perr_list != NULL) {
+ *perr_list = talloc_steal(mem_ctx, state->err_list);
+ }
+ return false;
+ }
+
+ if (perr != NULL) {
+ *perr = state->err;
+ }
+
+ if (perr_list != NULL) {
+ *perr_list = talloc_steal(mem_ctx, state->err_list);
+ }
+
+ if (preply != NULL) {
+ *preply = talloc_steal(mem_ctx, state->reply);
+ }
+
+ if (state->err != 0) {
+ return false;
+ }
+
+ return true;
+}
+
+int ctdb_client_control_multi_error(uint32_t *pnn_list, int count,
+ int *err_list, uint32_t *pnn)
+{
+ int ret = 0, i;
+
+ for (i=0; i<count; i++) {
+ if (err_list[i] != 0) {
+ ret = err_list[i];
+ *pnn = pnn_list[i];
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * Sync version of control send/recv
+ */
+
+int ctdb_client_control(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t destnode,
+ struct timeval timeout,
+ struct ctdb_req_control *request,
+ struct ctdb_reply_control **reply)
+{
+ struct tevent_req *req;
+ int ret;
+ bool status;
+
+ req = ctdb_client_control_send(mem_ctx, ev, client, destnode, timeout,
+ request);
+ if (req == NULL) {
+ return ENOMEM;
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = ctdb_client_control_recv(req, &ret, mem_ctx, reply);
+ if (! status) {
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_client_control_multi(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t *pnn_list, int count,
+ struct timeval timeout,
+ struct ctdb_req_control *request,
+ int **perr_list,
+ struct ctdb_reply_control ***preply)
+{
+ struct tevent_req *req;
+ bool status;
+ int ret;
+
+ req = ctdb_client_control_multi_send(mem_ctx, ev, client,
+ pnn_list, count,
+ timeout, request);
+ if (req == NULL) {
+ return ENOMEM;
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = ctdb_client_control_multi_recv(req, &ret, mem_ctx, perr_list,
+ preply);
+ if (! status) {
+ return ret;
+ }
+
+ return 0;
+}
diff --git a/ctdb/client/client_control_sync.c b/ctdb/client/client_control_sync.c
new file mode 100644
index 0000000..c786fc7
--- /dev/null
+++ b/ctdb/client/client_control_sync.c
@@ -0,0 +1,2676 @@
+/*
+ CTDB client code
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+
+#include <talloc.h>
+#include <tevent.h>
+#include <tdb.h>
+
+#include "common/logging.h"
+
+#include "lib/util/debug.h"
+
+#include "protocol/protocol.h"
+#include "protocol/protocol_api.h"
+#include "client/client_private.h"
+#include "client/client.h"
+#include "client/client_sync.h"
+
+int ctdb_ctrl_process_exists(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ pid_t pid, int *status)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_process_exists(&request, pid);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control PROCESS_EXISTS failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_process_exists(reply, status);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control PROCESS_EXISTS failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_statistics(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_statistics **stats)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_statistics(&request);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control STATISTICS failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_statistics(reply, mem_ctx, stats);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control STATISTICS failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_ping(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ int *num_clients)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_ping(&request);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control PING failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_ping(reply, num_clients);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control PING failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_getdbpath(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t db_id,
+ const char **db_path)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_getdbpath(&request, db_id);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GETDBPATH failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_getdbpath(reply, mem_ctx, db_path);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GETDBPATH failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_getvnnmap(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_vnn_map **vnnmap)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_getvnnmap(&request);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GETVNNMAP failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_getvnnmap(reply, mem_ctx, vnnmap);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GETVNNMAP failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_getdebug(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ int *loglevel)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_get_debug(&request);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_DEBUG failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_get_debug(reply, loglevel);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_DEBUG failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_setdebug(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ int loglevel)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_set_debug(&request, loglevel);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control SET_DEBUG failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_set_debug(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control SET_DEBUG failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_get_dbmap(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_dbid_map **dbmap)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_get_dbmap(&request);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_DBMAP failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_get_dbmap(reply, mem_ctx, dbmap);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_DBMAP failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_get_recmode(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ int *recmode)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_get_recmode(&request);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_RECMODE failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_get_recmode(reply, recmode);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_RECMODE failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_set_recmode(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ int recmode)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_set_recmode(&request, recmode);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control SET_RECMODE failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_set_recmode(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control SET_RECMODE failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_statistics_reset(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_statistics_reset(&request);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control STATISTICS_RESET failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_statistics_reset(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control STATISTICS_RESET failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_db_attach(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ const char *db_name, uint32_t *db_id)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_db_attach(&request, db_name);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DB_ATTACH failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_db_attach(reply, db_id);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DB_ATTACH failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_traverse_start(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_traverse_start *traverse)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_traverse_start(&request, traverse);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control TRAVERSE_START failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_traverse_start(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control TRAVERSE_START failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_register_srvid(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint64_t srvid)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_register_srvid(&request, srvid);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control REGISTER_SRVID failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_register_srvid(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control REGISTER_SRVID failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_deregister_srvid(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint64_t srvid)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_deregister_srvid(&request, srvid);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DEREGISTER_SRVID failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_deregister_srvid(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DEREGISTER_SRVID failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_get_dbname(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t db_id, const char **db_name)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_get_dbname(&request, db_id);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_DBNAME failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_get_dbname(reply, mem_ctx, db_name);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_DBNAME failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_enable_seqnum(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t db_id)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_enable_seqnum(&request, db_id);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control ENABLE_SEQNUM failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_enable_seqnum(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control ENABLE_SEQNUM failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_update_seqnum(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t db_id)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_update_seqnum(&request, db_id);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control UPDATE_SEQNUM failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_update_seqnum(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control UPDATE_SEQNUM failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_dump_memory(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ const char **mem_str)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_dump_memory(&request);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DUMP_MEMORY failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_dump_memory(reply, mem_ctx, mem_str);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DUMP_MEMORY failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_get_pid(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ pid_t *pid)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_get_pid(&request);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_PID failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_get_pid(reply, pid);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_PID failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_freeze(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ int priority)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_freeze(&request, priority);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control FREEZE failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_freeze(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control FREEZE failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_get_pnn(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t *pnn)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_get_pnn(&request);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_PNN failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_get_pnn(reply, pnn);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_PNN failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_shutdown(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_shutdown(&request);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control SHUTDOWN failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_shutdown(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control SHUTDOWN failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_tcp_add(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_connection *conn)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_tcp_add(&request, conn);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control TCP_ADD failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_tcp_add(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control TCP_ADD failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_tcp_remove(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_connection *conn)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_tcp_remove(&request, conn);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control TCP_REMOVE failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_tcp_remove(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control TCP_REMOVE failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_set_tunable(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_tunable *tunable)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_set_tunable(&request, tunable);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control SET_TUNABLE failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_set_tunable(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control SET_TUNABLE failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_get_tunable(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ const char *var, uint32_t *value)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_get_tunable(&request, var);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_TUNABLE failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_get_tunable(reply, value);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_TUNABLE failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_list_tunables(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_var_list **var_list)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_list_tunables(&request);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control LIST_TUNABLES failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_list_tunables(reply, mem_ctx, var_list);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control LIST_TUNABLES failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_modify_flags(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t pnn, uint32_t old_flags,
+ uint32_t new_flags)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ struct ctdb_node_flag_change flag_change;
+ int ret;
+
+ flag_change.pnn = pnn;
+ flag_change.old_flags = old_flags;
+ flag_change.new_flags = new_flags;
+
+ ctdb_req_control_modify_flags(&request, &flag_change);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control MODIFY_FLAGS failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_modify_flags(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control MODIFY_FLAGS failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_get_all_tunables(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_tunable_list **tun_list)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_get_all_tunables(&request);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_ALL_TUNABLES failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_get_all_tunables(reply, mem_ctx, tun_list);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_ALL_TUNABLES failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_get_tcp_tickle_list(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ ctdb_sock_addr *addr,
+ struct ctdb_tickle_list **tickles)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_get_tcp_tickle_list(&request, addr);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_TCP_TICKLE_LIST failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_get_tcp_tickle_list(reply, mem_ctx, tickles);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_TCP_TICKLE_LIST failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_set_tcp_tickle_list(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_tickle_list *tickles)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_set_tcp_tickle_list(&request, tickles);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control SET_TCP_TICKLE_LIST failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_set_tcp_tickle_list(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control SET_TCP_TICKLE_LIST failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_db_attach_persistent(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ const char *db_name, uint32_t *db_id)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_db_attach_persistent(&request, db_name);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DB_ATTACH_PERSISTENT failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_db_attach_persistent(reply, db_id);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DB_ATTACH_PERSISTENT failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_send_gratuitous_arp(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_addr_info *addr_info)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_send_gratuitous_arp(&request, addr_info);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control SEND_GRATUITOUS_ARP failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_send_gratuitous_arp(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control SEND_GRATUITOUS_ARP failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_wipe_database(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t db_id, uint32_t tid)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ struct ctdb_transdb transdb;
+ int ret;
+
+ transdb.db_id = db_id;
+ transdb.tid = tid;
+
+ ctdb_req_control_wipe_database(&request, &transdb);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control WIPE_DATABASE failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_wipe_database(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control WIPE_DATABASE failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_uptime(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_uptime **uptime)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_uptime(&request);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control UPTIME failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_uptime(reply, mem_ctx, uptime);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control UPTIME failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_start_recovery(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_start_recovery(&request);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control START_RECOVERY failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_start_recovery(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control START_RECOVERY failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_end_recovery(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_end_recovery(&request);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control END_RECOVERY failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_end_recovery(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control END_RECOVERY failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_reload_nodes_file(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_reload_nodes_file(&request);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control RELOAD_NODES_FILE failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_reload_nodes_file(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control RELOAD_NODES_FILE failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_add_public_ip(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_addr_info *addr_info)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_add_public_ip(&request, addr_info);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control ADD_PUBLIC_IP failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_add_public_ip(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control ADD_PUBLIC_IP failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_del_public_ip(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_addr_info *addr_info)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_del_public_ip(&request, addr_info);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DEL_PUBLIC_IP failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_del_public_ip(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DEL_PUBLIC_IP failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_get_capabilities(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t *caps)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_get_capabilities(&request);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_CAPABILITIES failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_get_capabilities(reply, caps);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_CAPABILITIES failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_release_ip(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_public_ip *pubip)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_release_ip(&request, pubip);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control RELEASE_IP failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_release_ip(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control RELEASE_IP failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_takeover_ip(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_public_ip *pubip)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_takeover_ip(&request, pubip);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control TAKEOVER_IP failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_takeover_ip(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control TAKEOVER_IP failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_get_public_ips(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ bool available_only,
+ struct ctdb_public_ip_list **pubip_list)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_get_public_ips(&request, available_only);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_PUBLIC_IPS failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_get_public_ips(reply, mem_ctx, pubip_list);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_PUBLIC_IPS failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_get_nodemap(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_node_map **nodemap)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_get_nodemap(&request);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_NODEMAP failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_get_nodemap(reply, mem_ctx, nodemap);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_NODEMAP failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_traverse_kill(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_traverse_start *traverse)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_traverse_kill(&request, traverse);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control TRAVERSE_KILL failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_traverse_kill(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control TRAVERSE_KILL failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_get_reclock_file(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ const char **reclock_file)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_get_reclock_file(&request);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_RECLOCK_FILE failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_get_reclock_file(reply, mem_ctx, reclock_file);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_RECLOCK_FILE failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_stop_node(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_stop_node(&request);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control STOP_NODE failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_stop_node(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control STOP_NODE failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_continue_node(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_continue_node(&request);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control CONTINUE_NODE failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_continue_node(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control CONTINUE_NODE failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_set_lmasterrole(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t lmaster_role)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_set_lmasterrole(&request, lmaster_role);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control SET_LMASTERROLE failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_set_lmasterrole(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control SET_LMASTERROLE failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_set_recmasterrole(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t recmaster_role)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_set_recmasterrole(&request, recmaster_role);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control SET_RECMASTERROLE failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_set_recmasterrole(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control SET_RECMASTERROLE failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_set_ban_state(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_ban_state *ban_state)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_set_ban_state(&request, ban_state);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control SET_BAN_STATE failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_set_ban_state(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control SET_BAN_STATE failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_get_ban_state(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_ban_state **ban_state)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_get_ban_state(&request);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_BAN_STATE failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_get_ban_state(reply, mem_ctx, ban_state);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_BAN_STATE failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_register_notify(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_notify_data *notify)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_register_notify(&request, notify);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control REGISTER_NOTIFY failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_register_notify(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control REGISTER_NOTIFY failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_deregister_notify(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint64_t srvid)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_deregister_notify(&request, srvid);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DEREGISTER_NOTIFY failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_deregister_notify(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DEREGISTER_NOTIFY failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_trans3_commit(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_rec_buffer *recbuf)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_trans3_commit(&request, recbuf);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control TRANS3_COMMIT failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_trans3_commit(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control TRANS3_COMMIT failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_get_db_seqnum(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t db_id, uint64_t *seqnum)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_get_db_seqnum(&request, db_id);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_DB_SEQNUM failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_get_db_seqnum(reply, seqnum);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_DB_SEQNUM failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_db_set_healthy(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t db_id)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_db_set_healthy(&request, db_id);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DB_SET_HEALTHY failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_db_set_healthy(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DB_SET_HEALTHY failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_db_get_health(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t db_id, const char **reason)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_db_get_health(&request, db_id);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DB_GET_HEALTH failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_db_get_health(reply, mem_ctx, reason);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DB_GET_HEALTH failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_get_public_ip_info(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ ctdb_sock_addr *addr,
+ struct ctdb_public_ip_info **ipinfo)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_get_public_ip_info(&request, addr);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_PUBLIC_IP_INFO failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_get_public_ip_info(reply, mem_ctx, ipinfo);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_PUBLIC_IP_INFO failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_get_ifaces(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_iface_list **iface_list)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_get_ifaces(&request);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_IFACES failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_get_ifaces(reply, mem_ctx, iface_list);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_IFACES failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_set_iface_link_state(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_iface *iface)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_set_iface_link_state(&request, iface);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control SET_IFACE_LINK_STATE failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_set_iface_link_state(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control SET_IFACE_LINK_STATE failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_tcp_add_delayed_update(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_connection *conn)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_tcp_add_delayed_update(&request, conn);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control TCP_ADD_DELAYED_UPDATE failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_tcp_add_delayed_update(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control TCP_ADD_DELAYED_UPDATE failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_get_stat_history(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_statistics_list **stats_list)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_get_stat_history(&request);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_STAT_HISTORY failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_get_stat_history(reply, mem_ctx, stats_list);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_STAT_HISTORY failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_schedule_for_deletion(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_key_data *key)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_schedule_for_deletion(&request, key);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control SCHEDULE_FOR_DELETION failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_schedule_for_deletion(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control SCHEDULE_FOR_DELETION failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_set_db_readonly(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t db_id)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_set_db_readonly(&request, db_id);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control SET_DB_READONY failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_set_db_readonly(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control SET_DB_READONY failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_traverse_start_ext(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_traverse_start_ext *traverse)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_traverse_start_ext(&request, traverse);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control TRAVERSE_START_EXT failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_traverse_start_ext(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control TRAVERSE_START_EXT failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_get_db_statistics(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t db_id,
+ struct ctdb_db_statistics **dbstats)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_get_db_statistics(&request, db_id);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_DB_STATISTICS failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_get_db_statistics(reply, mem_ctx, dbstats);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_DB_STATISTICS failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_set_db_sticky(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t db_id)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_set_db_sticky(&request, db_id);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control SET_DB_STICKY failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_set_db_sticky(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control SET_DB_STICKY failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_reload_public_ips(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_reload_public_ips(&request);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control RELOAD_PUBLIC_IPS failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_reload_public_ips(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control RELOAD_PUBLIC_IPS failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_ipreallocated(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_ipreallocated(&request);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control IPREALLOCATED failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_ipreallocated(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control IPREALLOCATED failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_get_runstate(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ enum ctdb_runstate *runstate)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_get_runstate(&request);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_RUNSTATE failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_get_runstate(reply, runstate);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_RUNSTATE failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_db_detach(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t db_id)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_db_detach(&request, db_id);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DB_DETACH failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_db_detach(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DB_DETACH failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_get_nodes_file(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_node_map **nodemap)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_get_nodes_file(&request);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_NODES_FILE failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_get_nodes_file(reply, mem_ctx, nodemap);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control GET_NODES_FILE failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_db_freeze(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout, uint32_t db_id)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_db_freeze(&request, db_id);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DB_FREEZE failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_db_freeze(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DB_FREEZE failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_db_thaw(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout, uint32_t db_id)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_db_thaw(&request, db_id);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DB_THAW failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_db_thaw(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DB_THAW failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_db_transaction_start(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_transdb *transdb)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_db_transaction_start(&request, transdb);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DB_TRANSACTION_START failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_db_transaction_start(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DB_TRANSACTION_START failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_db_transaction_commit(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_transdb *transdb)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_db_transaction_commit(&request, transdb);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DB_TRANSACTION_COMMIT failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_db_transaction_commit(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DB_TRANSACTION_COMMIT failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_db_transaction_cancel(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t db_id)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_db_transaction_cancel(&request, db_id);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DB_TRANSACTION_CANCEL failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_db_transaction_cancel(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DB_TRANSACTION_CANCEL failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_db_pull(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_pulldb_ext *pulldb, uint32_t *num_records)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_db_pull(&request, pulldb);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DB_PULL failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_db_pull(reply, num_records);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("Control DB_PULL failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_db_push_start(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_pulldb_ext *pulldb)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_db_push_start(&request, pulldb);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DB_PUSH_START failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_db_push_start(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DB_PUSH_START failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_db_push_confirm(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t db_id, uint32_t *num_records)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_db_push_confirm(&request, db_id);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DB_PUSH_CONFIRM failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_db_push_confirm(reply, num_records);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DB_PUSH_CONFIRM failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_db_open_flags(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t db_id, int *tdb_flags)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_db_open_flags(&request, db_id);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DB_OPEN_FLAGS failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_db_open_flags(reply, tdb_flags);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DB_OPEN_FLAGS failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_db_attach_replicated(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ const char *db_name, uint32_t *db_id)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_db_attach_replicated(&request, db_name);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DB_ATTACH_REPLICATED failed to node %u,"
+ " ret=%d\n", destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_db_attach_replicated(reply, db_id);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control DB_ATTACH_REPLICATED failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_check_pid_srvid(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_pid_srvid *pid_srvid, int *status)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_check_pid_srvid(&request, pid_srvid);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control CHECK_PID_SRVID failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_check_pid_srvid(reply, status);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control CHECK_PID_SRVID failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_tunnel_register(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint64_t tunnel_id)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_tunnel_register(&request, tunnel_id);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control TUNNEL_REGISTER failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_tunnel_register(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control TUNNEL_REGISTER failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_tunnel_deregister(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint64_t tunnel_id)
+{
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ int ret;
+
+ ctdb_req_control_tunnel_deregister(&request, tunnel_id);
+ ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
+ &request, &reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control TUNNEL_DEREGISTER failed to node %u, ret=%d\n",
+ destnode, ret));
+ return ret;
+ }
+
+ ret = ctdb_reply_control_tunnel_deregister(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Control TUNNEL_DEREGISTER failed, ret=%d\n", ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_disable_node(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode,
+ struct timeval timeout)
+{
+ struct ctdb_req_control request = {
+ .opcode = 0,
+ };
+ struct ctdb_reply_control *reply = NULL;
+ int ret;
+
+ ctdb_req_control_disable_node(&request);
+ ret = ctdb_client_control(mem_ctx,
+ ev,
+ client,
+ destnode,
+ timeout,
+ &request,
+ &reply);
+ if (ret != 0) {
+ D_ERR("Control DISABLE_NODE failed to node %u, ret=%d\n",
+ destnode,
+ ret);
+ return ret;
+ }
+
+ ret = ctdb_reply_control_disable_node(reply);
+ if (ret != 0) {
+ D_ERR("Control DISABLE_NODE failed, ret=%d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_enable_node(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode,
+ struct timeval timeout)
+{
+ struct ctdb_req_control request = {
+ .opcode = 0,
+ };
+ struct ctdb_reply_control *reply = NULL;
+ int ret;
+
+ ctdb_req_control_enable_node(&request);
+ ret = ctdb_client_control(mem_ctx,
+ ev,
+ client,
+ destnode,
+ timeout,
+ &request,
+ &reply);
+ if (ret != 0) {
+ D_ERR("Control ENABLE_NODE failed to node %u, ret=%d\n",
+ destnode,
+ ret);
+ return ret;
+ }
+
+ ret = ctdb_reply_control_enable_node(reply);
+ if (ret != 0) {
+ D_ERR("Control ENABLE_NODE failed, ret=%d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
diff --git a/ctdb/client/client_db.c b/ctdb/client/client_db.c
new file mode 100644
index 0000000..0b06d6e
--- /dev/null
+++ b/ctdb/client/client_db.c
@@ -0,0 +1,2791 @@
+/*
+ CTDB client code
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+
+#include <talloc.h>
+#include <tevent.h>
+#include <tdb.h>
+
+#include "common/logging.h"
+
+#include "lib/tdb_wrap/tdb_wrap.h"
+#include "lib/util/tevent_unix.h"
+#include "lib/util/dlinklist.h"
+#include "lib/util/debug.h"
+
+#include "protocol/protocol.h"
+#include "protocol/protocol_api.h"
+#include "client/client_private.h"
+#include "client/client.h"
+
+struct tdb_context *client_db_tdb(struct ctdb_db_context *db)
+{
+ return db->ltdb->tdb;
+}
+
+static struct ctdb_db_context *client_db_handle(
+ struct ctdb_client_context *client,
+ const char *db_name)
+{
+ struct ctdb_db_context *db;
+
+ for (db = client->db; db != NULL; db = db->next) {
+ if (strcmp(db_name, db->db_name) == 0) {
+ return db;
+ }
+ }
+
+ return NULL;
+}
+
+static bool ctdb_db_persistent(struct ctdb_db_context *db)
+{
+ if (db->db_flags & CTDB_DB_FLAGS_PERSISTENT) {
+ return true;
+ }
+ return false;
+}
+
+static bool ctdb_db_replicated(struct ctdb_db_context *db)
+{
+ if (db->db_flags & CTDB_DB_FLAGS_REPLICATED) {
+ return true;
+ }
+ return false;
+}
+
+static bool ctdb_db_volatile(struct ctdb_db_context *db)
+{
+ if (db->db_flags & CTDB_DB_FLAGS_PERSISTENT ||
+ db->db_flags & CTDB_DB_FLAGS_REPLICATED) {
+ return false;
+ }
+ return true;
+}
+
+struct ctdb_set_db_flags_state {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct timeval timeout;
+ uint32_t db_id;
+ uint8_t db_flags;
+ bool readonly_done, sticky_done;
+ uint32_t *pnn_list;
+ int count;
+};
+
+static void ctdb_set_db_flags_nodemap_done(struct tevent_req *subreq);
+static void ctdb_set_db_flags_readonly_done(struct tevent_req *subreq);
+static void ctdb_set_db_flags_sticky_done(struct tevent_req *subreq);
+
+static struct tevent_req *ctdb_set_db_flags_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t destnode, struct timeval timeout,
+ uint32_t db_id, uint8_t db_flags)
+{
+ struct tevent_req *req, *subreq;
+ struct ctdb_set_db_flags_state *state;
+ struct ctdb_req_control request;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct ctdb_set_db_flags_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ if (! (db_flags & (CTDB_DB_FLAGS_READONLY | CTDB_DB_FLAGS_STICKY))) {
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+ }
+
+ state->ev = ev;
+ state->client = client;
+ state->timeout = timeout;
+ state->db_id = db_id;
+ state->db_flags = db_flags;
+
+ ctdb_req_control_get_nodemap(&request);
+ subreq = ctdb_client_control_send(state, ev, client, destnode, timeout,
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, ctdb_set_db_flags_nodemap_done, req);
+
+ return req;
+}
+
+static void ctdb_set_db_flags_nodemap_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_set_db_flags_state *state = tevent_req_data(
+ req, struct ctdb_set_db_flags_state);
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ struct ctdb_node_map *nodemap;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ DEBUG(DEBUG_ERR,
+ ("set_db_flags: 0x%08x GET_NODEMAP failed, ret=%d\n",
+ state->db_id, ret));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_reply_control_get_nodemap(reply, state, &nodemap);
+ talloc_free(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("set_db_flags: 0x%08x GET_NODEMAP parse failed, ret=%d\n",
+ state->db_id, ret));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ state->count = list_of_connected_nodes(nodemap, CTDB_UNKNOWN_PNN,
+ state, &state->pnn_list);
+ talloc_free(nodemap);
+ if (state->count <= 0) {
+ DEBUG(DEBUG_ERR,
+ ("set_db_flags: 0x%08x no connected nodes, count=%d\n",
+ state->db_id, state->count));
+ tevent_req_error(req, ENOMEM);
+ return;
+ }
+
+ if (state->db_flags & CTDB_DB_FLAGS_READONLY) {
+ ctdb_req_control_set_db_readonly(&request, state->db_id);
+ subreq = ctdb_client_control_multi_send(
+ state, state->ev, state->client,
+ state->pnn_list, state->count,
+ state->timeout, &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq,
+ ctdb_set_db_flags_readonly_done, req);
+ } else {
+ state->readonly_done = true;
+ }
+
+ if (state->db_flags & CTDB_DB_FLAGS_STICKY) {
+ ctdb_req_control_set_db_sticky(&request, state->db_id);
+ subreq = ctdb_client_control_multi_send(
+ state, state->ev, state->client,
+ state->pnn_list, state->count,
+ state->timeout, &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, ctdb_set_db_flags_sticky_done,
+ req);
+ } else {
+ state->sticky_done = true;
+ }
+}
+
+static void ctdb_set_db_flags_readonly_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_set_db_flags_state *state = tevent_req_data(
+ req, struct ctdb_set_db_flags_state);
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_multi_recv(subreq, &ret, NULL, NULL,
+ NULL);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ DEBUG(DEBUG_ERR,
+ ("set_db_flags: 0x%08x SET_DB_READONLY failed, ret=%d\n",
+ state->db_id, ret));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ state->readonly_done = true;
+
+ if (state->readonly_done && state->sticky_done) {
+ tevent_req_done(req);
+ }
+}
+
+static void ctdb_set_db_flags_sticky_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_set_db_flags_state *state = tevent_req_data(
+ req, struct ctdb_set_db_flags_state);
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_multi_recv(subreq, &ret, NULL, NULL,
+ NULL);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ DEBUG(DEBUG_ERR,
+ ("set_db_flags: 0x%08x SET_DB_STICKY failed, ret=%d\n",
+ state->db_id, ret));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ state->sticky_done = true;
+
+ if (state->readonly_done && state->sticky_done) {
+ tevent_req_done(req);
+ }
+}
+
+static bool ctdb_set_db_flags_recv(struct tevent_req *req, int *perr)
+{
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+ return true;
+}
+
+struct ctdb_attach_state {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct timeval timeout;
+ uint32_t destnode;
+ uint8_t db_flags;
+ struct ctdb_db_context *db;
+};
+
+static void ctdb_attach_dbid_done(struct tevent_req *subreq);
+static void ctdb_attach_dbpath_done(struct tevent_req *subreq);
+static void ctdb_attach_health_done(struct tevent_req *subreq);
+static void ctdb_attach_flags_done(struct tevent_req *subreq);
+static void ctdb_attach_open_flags_done(struct tevent_req *subreq);
+
+struct tevent_req *ctdb_attach_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct timeval timeout,
+ const char *db_name, uint8_t db_flags)
+{
+ struct tevent_req *req, *subreq;
+ struct ctdb_attach_state *state;
+ struct ctdb_req_control request;
+
+ req = tevent_req_create(mem_ctx, &state, struct ctdb_attach_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->db = client_db_handle(client, db_name);
+ if (state->db != NULL) {
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+ }
+
+ state->ev = ev;
+ state->client = client;
+ state->timeout = timeout;
+ state->destnode = ctdb_client_pnn(client);
+ state->db_flags = db_flags;
+
+ state->db = talloc_zero(client, struct ctdb_db_context);
+ if (tevent_req_nomem(state->db, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ state->db->db_name = talloc_strdup(state->db, db_name);
+ if (tevent_req_nomem(state->db, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ state->db->db_flags = db_flags;
+
+ if (ctdb_db_persistent(state->db)) {
+ ctdb_req_control_db_attach_persistent(&request,
+ state->db->db_name);
+ } else if (ctdb_db_replicated(state->db)) {
+ ctdb_req_control_db_attach_replicated(&request,
+ state->db->db_name);
+ } else {
+ ctdb_req_control_db_attach(&request, state->db->db_name);
+ }
+
+ subreq = ctdb_client_control_send(state, state->ev, state->client,
+ state->destnode, state->timeout,
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, ctdb_attach_dbid_done, req);
+
+ return req;
+}
+
+static void ctdb_attach_dbid_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_attach_state *state = tevent_req_data(
+ req, struct ctdb_attach_state);
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ bool status;
+ int ret;
+
+ status = ctdb_client_control_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ DEBUG(DEBUG_ERR, ("attach: %s %s failed, ret=%d\n",
+ state->db->db_name,
+ (ctdb_db_persistent(state->db)
+ ? "DB_ATTACH_PERSISTENT"
+ : (ctdb_db_replicated(state->db)
+ ? "DB_ATTACH_REPLICATED"
+ : "DB_ATTACH")),
+ ret));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ if (ctdb_db_persistent(state->db)) {
+ ret = ctdb_reply_control_db_attach_persistent(
+ reply, &state->db->db_id);
+ } else if (ctdb_db_replicated(state->db)) {
+ ret = ctdb_reply_control_db_attach_replicated(
+ reply, &state->db->db_id);
+ } else {
+ ret = ctdb_reply_control_db_attach(reply, &state->db->db_id);
+ }
+ talloc_free(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("attach: %s failed to get db_id, ret=%d\n",
+ state->db->db_name, ret));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ctdb_req_control_getdbpath(&request, state->db->db_id);
+ subreq = ctdb_client_control_send(state, state->ev, state->client,
+ state->destnode, state->timeout,
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, ctdb_attach_dbpath_done, req);
+}
+
+static void ctdb_attach_dbpath_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_attach_state *state = tevent_req_data(
+ req, struct ctdb_attach_state);
+ struct ctdb_reply_control *reply;
+ struct ctdb_req_control request;
+ bool status;
+ int ret;
+
+ status = ctdb_client_control_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ DEBUG(DEBUG_ERR, ("attach: %s GETDBPATH failed, ret=%d\n",
+ state->db->db_name, ret));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_reply_control_getdbpath(reply, state->db,
+ &state->db->db_path);
+ talloc_free(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("attach: %s GETDBPATH parse failed, ret=%d\n",
+ state->db->db_name, ret));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ctdb_req_control_db_get_health(&request, state->db->db_id);
+ subreq = ctdb_client_control_send(state, state->ev, state->client,
+ state->destnode, state->timeout,
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, ctdb_attach_health_done, req);
+}
+
+static void ctdb_attach_health_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_attach_state *state = tevent_req_data(
+ req, struct ctdb_attach_state);
+ struct ctdb_reply_control *reply;
+ const char *reason;
+ bool status;
+ int ret;
+
+ status = ctdb_client_control_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ DEBUG(DEBUG_ERR, ("attach: %s DB_GET_HEALTH failed, ret=%d\n",
+ state->db->db_name, ret));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_reply_control_db_get_health(reply, state, &reason);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("attach: %s DB_GET_HEALTH parse failed, ret=%d\n",
+ state->db->db_name, ret));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ if (reason != NULL) {
+ /* Database unhealthy, avoid attach */
+ DEBUG(DEBUG_ERR, ("attach: %s database unhealthy (%s)\n",
+ state->db->db_name, reason));
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ subreq = ctdb_set_db_flags_send(state, state->ev, state->client,
+ state->destnode, state->timeout,
+ state->db->db_id, state->db_flags);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, ctdb_attach_flags_done, req);
+}
+
+static void ctdb_attach_flags_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_attach_state *state = tevent_req_data(
+ req, struct ctdb_attach_state);
+ struct ctdb_req_control request;
+ bool status;
+ int ret;
+
+ status = ctdb_set_db_flags_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ DEBUG(DEBUG_ERR, ("attach: %s set db flags 0x%08x failed\n",
+ state->db->db_name, state->db_flags));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ctdb_req_control_db_open_flags(&request, state->db->db_id);
+ subreq = ctdb_client_control_send(state, state->ev, state->client,
+ state->destnode, state->timeout,
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, ctdb_attach_open_flags_done, req);
+}
+
+static void ctdb_attach_open_flags_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_attach_state *state = tevent_req_data(
+ req, struct ctdb_attach_state);
+ struct ctdb_reply_control *reply;
+ bool status;
+ int ret, tdb_flags;
+
+ status = ctdb_client_control_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ DEBUG(DEBUG_ERR, ("attach: %s DB_OPEN_FLAGS failed, ret=%d\n",
+ state->db->db_name, ret));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_reply_control_db_open_flags(reply, &tdb_flags);
+ talloc_free(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("attach: %s DB_OPEN_FLAGS parse failed,"
+ " ret=%d\n", state->db->db_name, ret));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ state->db->ltdb = tdb_wrap_open(state->db, state->db->db_path, 0,
+ tdb_flags, O_RDWR, 0);
+ if (tevent_req_nomem(state->db->ltdb, req)) {
+ DEBUG(DEBUG_ERR, ("attach: %s tdb_wrap_open failed\n",
+ state->db->db_name));
+ return;
+ }
+ DLIST_ADD(state->client->db, state->db);
+
+ tevent_req_done(req);
+}
+
+bool ctdb_attach_recv(struct tevent_req *req, int *perr,
+ struct ctdb_db_context **out)
+{
+ struct ctdb_attach_state *state = tevent_req_data(
+ req, struct ctdb_attach_state);
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+
+ if (out != NULL) {
+ *out = state->db;
+ }
+ return true;
+}
+
+int ctdb_attach(struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct timeval timeout,
+ const char *db_name, uint8_t db_flags,
+ struct ctdb_db_context **out)
+{
+ TALLOC_CTX *mem_ctx;
+ struct tevent_req *req;
+ bool status;
+ int ret;
+
+ mem_ctx = talloc_new(client);
+ if (mem_ctx == NULL) {
+ return ENOMEM;
+ }
+
+ req = ctdb_attach_send(mem_ctx, ev, client, timeout,
+ db_name, db_flags);
+ if (req == NULL) {
+ talloc_free(mem_ctx);
+ return ENOMEM;
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = ctdb_attach_recv(req, &ret, out);
+ if (! status) {
+ talloc_free(mem_ctx);
+ return ret;
+ }
+
+ /*
+ ctdb_set_call(db, CTDB_NULL_FUNC, ctdb_null_func);
+ ctdb_set_call(db, CTDB_FETCH_FUNC, ctdb_fetch_func);
+ ctdb_set_call(db, CTDB_FETCH_WITH_HEADER_FUNC, ctdb_fetch_with_header_func);
+ */
+
+ talloc_free(mem_ctx);
+ return 0;
+}
+
+struct ctdb_detach_state {
+ struct ctdb_client_context *client;
+ struct tevent_context *ev;
+ struct timeval timeout;
+ uint32_t db_id;
+ const char *db_name;
+};
+
+static void ctdb_detach_dbname_done(struct tevent_req *subreq);
+static void ctdb_detach_done(struct tevent_req *subreq);
+
+struct tevent_req *ctdb_detach_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct timeval timeout, uint32_t db_id)
+{
+ struct tevent_req *req, *subreq;
+ struct ctdb_detach_state *state;
+ struct ctdb_req_control request;
+
+ req = tevent_req_create(mem_ctx, &state, struct ctdb_detach_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->client = client;
+ state->ev = ev;
+ state->timeout = timeout;
+ state->db_id = db_id;
+
+ ctdb_req_control_get_dbname(&request, db_id);
+ subreq = ctdb_client_control_send(state, ev, client,
+ ctdb_client_pnn(client), timeout,
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, ctdb_detach_dbname_done, req);
+
+ return req;
+}
+
+static void ctdb_detach_dbname_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_detach_state *state = tevent_req_data(
+ req, struct ctdb_detach_state);
+ struct ctdb_reply_control *reply;
+ struct ctdb_req_control request;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ DEBUG(DEBUG_ERR, ("detach: 0x%x GET_DBNAME failed, ret=%d\n",
+ state->db_id, ret));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_reply_control_get_dbname(reply, state, &state->db_name);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("detach: 0x%x GET_DBNAME failed, ret=%d\n",
+ state->db_id, ret));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ctdb_req_control_db_detach(&request, state->db_id);
+ subreq = ctdb_client_control_send(state, state->ev, state->client,
+ ctdb_client_pnn(state->client),
+ state->timeout, &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, ctdb_detach_done, req);
+
+}
+
+static void ctdb_detach_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_detach_state *state = tevent_req_data(
+ req, struct ctdb_detach_state);
+ struct ctdb_reply_control *reply;
+ struct ctdb_db_context *db;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ DEBUG(DEBUG_ERR, ("detach: %s DB_DETACH failed, ret=%d\n",
+ state->db_name, ret));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_reply_control_db_detach(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("detach: %s DB_DETACH failed, ret=%d\n",
+ state->db_name, ret));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ db = client_db_handle(state->client, state->db_name);
+ if (db != NULL) {
+ DLIST_REMOVE(state->client->db, db);
+ TALLOC_FREE(db);
+ }
+
+ tevent_req_done(req);
+}
+
+bool ctdb_detach_recv(struct tevent_req *req, int *perr)
+{
+ int ret;
+
+ if (tevent_req_is_unix_error(req, &ret)) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+int ctdb_detach(struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct timeval timeout, uint32_t db_id)
+{
+ TALLOC_CTX *mem_ctx;
+ struct tevent_req *req;
+ int ret;
+ bool status;
+
+ mem_ctx = talloc_new(client);
+ if (mem_ctx == NULL) {
+ return ENOMEM;
+ }
+
+ req = ctdb_detach_send(mem_ctx, ev, client, timeout, db_id);
+ if (req == NULL) {
+ talloc_free(mem_ctx);
+ return ENOMEM;
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = ctdb_detach_recv(req, &ret);
+ if (! status) {
+ talloc_free(mem_ctx);
+ return ret;
+ }
+
+ talloc_free(mem_ctx);
+ return 0;
+}
+
+uint32_t ctdb_db_id(struct ctdb_db_context *db)
+{
+ return db->db_id;
+}
+
+struct ctdb_db_traverse_local_state {
+ ctdb_rec_parser_func_t parser;
+ void *private_data;
+ bool extract_header;
+ int error;
+};
+
+static int ctdb_db_traverse_local_handler(struct tdb_context *tdb,
+ TDB_DATA key, TDB_DATA data,
+ void *private_data)
+{
+ struct ctdb_db_traverse_local_state *state =
+ (struct ctdb_db_traverse_local_state *)private_data;
+ int ret;
+
+ if (state->extract_header) {
+ struct ctdb_ltdb_header header;
+
+ ret = ctdb_ltdb_header_extract(&data, &header);
+ if (ret != 0) {
+ state->error = ret;
+ return 1;
+ }
+
+ ret = state->parser(0, &header, key, data, state->private_data);
+ } else {
+ ret = state->parser(0, NULL, key, data, state->private_data);
+ }
+
+ if (ret != 0) {
+ state->error = ret;
+ return 1;
+ }
+
+ return 0;
+}
+
+int ctdb_db_traverse_local(struct ctdb_db_context *db, bool readonly,
+ bool extract_header,
+ ctdb_rec_parser_func_t parser, void *private_data)
+{
+ struct ctdb_db_traverse_local_state state;
+ int ret;
+
+ state.parser = parser;
+ state.private_data = private_data;
+ state.extract_header = extract_header;
+ state.error = 0;
+
+ if (readonly) {
+ ret = tdb_traverse_read(client_db_tdb(db),
+ ctdb_db_traverse_local_handler,
+ &state);
+ } else {
+ ret = tdb_traverse(client_db_tdb(db),
+ ctdb_db_traverse_local_handler, &state);
+ }
+
+ if (ret == -1) {
+ return EIO;
+ }
+
+ return state.error;
+}
+
+struct ctdb_db_traverse_state {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct ctdb_db_context *db;
+ uint32_t destnode;
+ uint64_t srvid;
+ struct timeval timeout;
+ ctdb_rec_parser_func_t parser;
+ void *private_data;
+ int result;
+};
+
+static void ctdb_db_traverse_handler_set(struct tevent_req *subreq);
+static void ctdb_db_traverse_started(struct tevent_req *subreq);
+static void ctdb_db_traverse_handler(uint64_t srvid, TDB_DATA data,
+ void *private_data);
+static void ctdb_db_traverse_remove_handler(struct tevent_req *req);
+static void ctdb_db_traverse_handler_removed(struct tevent_req *subreq);
+
+struct tevent_req *ctdb_db_traverse_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct ctdb_db_context *db,
+ uint32_t destnode,
+ struct timeval timeout,
+ ctdb_rec_parser_func_t parser,
+ void *private_data)
+{
+ struct tevent_req *req, *subreq;
+ struct ctdb_db_traverse_state *state;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct ctdb_db_traverse_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->client = client;
+ state->db = db;
+ state->destnode = destnode;
+ state->srvid = CTDB_SRVID_CLIENT_RANGE | getpid();
+ state->timeout = timeout;
+ state->parser = parser;
+ state->private_data = private_data;
+
+ subreq = ctdb_client_set_message_handler_send(state, ev, client,
+ state->srvid,
+ ctdb_db_traverse_handler,
+ req);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, ctdb_db_traverse_handler_set, req);
+
+ return req;
+}
+
+static void ctdb_db_traverse_handler_set(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_db_traverse_state *state = tevent_req_data(
+ req, struct ctdb_db_traverse_state);
+ struct ctdb_traverse_start_ext traverse;
+ struct ctdb_req_control request;
+ int ret = 0;
+ bool status;
+
+ status = ctdb_client_set_message_handler_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ traverse = (struct ctdb_traverse_start_ext) {
+ .db_id = ctdb_db_id(state->db),
+ .reqid = 0,
+ .srvid = state->srvid,
+ .withemptyrecords = false,
+ };
+
+ ctdb_req_control_traverse_start_ext(&request, &traverse);
+ subreq = ctdb_client_control_send(state, state->ev, state->client,
+ state->destnode, state->timeout,
+ &request);
+ if (subreq == NULL) {
+ state->result = ENOMEM;
+ ctdb_db_traverse_remove_handler(req);
+ return;
+ }
+ tevent_req_set_callback(subreq, ctdb_db_traverse_started, req);
+}
+
+static void ctdb_db_traverse_started(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_db_traverse_state *state = tevent_req_data(
+ req, struct ctdb_db_traverse_state);
+ struct ctdb_reply_control *reply;
+ int ret = 0;
+ bool status;
+
+ status = ctdb_client_control_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ DEBUG(DEBUG_ERR, ("traverse: control failed, ret=%d\n", ret));
+ state->result = ret;
+ ctdb_db_traverse_remove_handler(req);
+ return;
+ }
+
+ ret = ctdb_reply_control_traverse_start_ext(reply);
+ talloc_free(reply);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("traverse: control reply failed, ret=%d\n",
+ ret));
+ state->result = ret;
+ ctdb_db_traverse_remove_handler(req);
+ return;
+ }
+}
+
+static void ctdb_db_traverse_handler(uint64_t srvid, TDB_DATA data,
+ void *private_data)
+{
+ struct tevent_req *req = talloc_get_type_abort(
+ private_data, struct tevent_req);
+ struct ctdb_db_traverse_state *state = tevent_req_data(
+ req, struct ctdb_db_traverse_state);
+ struct ctdb_rec_data *rec;
+ struct ctdb_ltdb_header header;
+ size_t np;
+ int ret;
+
+ ret = ctdb_rec_data_pull(data.dptr, data.dsize, state, &rec, &np);
+ if (ret != 0) {
+ return;
+ }
+
+ if (rec->key.dsize == 0 && rec->data.dsize == 0) {
+ talloc_free(rec);
+ ctdb_db_traverse_remove_handler(req);
+ return;
+ }
+
+ ret = ctdb_ltdb_header_extract(&rec->data, &header);
+ if (ret != 0) {
+ talloc_free(rec);
+ return;
+ }
+
+ if (rec->data.dsize == 0) {
+ talloc_free(rec);
+ return;
+ }
+
+ ret = state->parser(rec->reqid, &header, rec->key, rec->data,
+ state->private_data);
+ talloc_free(rec);
+ if (ret != 0) {
+ state->result = ret;
+ ctdb_db_traverse_remove_handler(req);
+ }
+}
+
+static void ctdb_db_traverse_remove_handler(struct tevent_req *req)
+{
+ struct ctdb_db_traverse_state *state = tevent_req_data(
+ req, struct ctdb_db_traverse_state);
+ struct tevent_req *subreq;
+
+ subreq = ctdb_client_remove_message_handler_send(state, state->ev,
+ state->client,
+ state->srvid, req);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, ctdb_db_traverse_handler_removed, req);
+}
+
+static void ctdb_db_traverse_handler_removed(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_db_traverse_state *state = tevent_req_data(
+ req, struct ctdb_db_traverse_state);
+ int ret;
+ bool status;
+
+ status = ctdb_client_remove_message_handler_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ if (state->result != 0) {
+ tevent_req_error(req, state->result);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+bool ctdb_db_traverse_recv(struct tevent_req *req, int *perr)
+{
+ int ret;
+
+ if (tevent_req_is_unix_error(req, &ret)) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+int ctdb_db_traverse(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct ctdb_db_context *db,
+ uint32_t destnode, struct timeval timeout,
+ ctdb_rec_parser_func_t parser, void *private_data)
+{
+ struct tevent_req *req;
+ int ret = 0;
+ bool status;
+
+ req = ctdb_db_traverse_send(mem_ctx, ev, client, db, destnode,
+ timeout, parser, private_data);
+ if (req == NULL) {
+ return ENOMEM;
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = ctdb_db_traverse_recv(req, &ret);
+ if (! status) {
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_ltdb_fetch(struct ctdb_db_context *db, TDB_DATA key,
+ struct ctdb_ltdb_header *header,
+ TALLOC_CTX *mem_ctx, TDB_DATA *data)
+{
+ TDB_DATA rec;
+ size_t np;
+ int ret;
+
+ rec = tdb_fetch(client_db_tdb(db), key);
+ if (rec.dsize < sizeof(struct ctdb_ltdb_header)) {
+ /* No record present */
+ if (rec.dptr != NULL) {
+ free(rec.dptr);
+ }
+
+ if (tdb_error(client_db_tdb(db)) != TDB_ERR_NOEXIST) {
+ return EIO;
+ }
+
+ *header = (struct ctdb_ltdb_header) {
+ .dmaster = CTDB_UNKNOWN_PNN,
+ };
+
+ if (data != NULL) {
+ *data = tdb_null;
+ }
+ return 0;
+ }
+
+ ret = ctdb_ltdb_header_pull(rec.dptr, rec.dsize, header, &np);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ret = 0;
+ if (data != NULL) {
+ data->dsize = rec.dsize - np;
+ data->dptr = talloc_memdup(mem_ctx, rec.dptr + np,
+ data->dsize);
+ if (data->dptr == NULL) {
+ ret = ENOMEM;
+ }
+ }
+
+ free(rec.dptr);
+ return ret;
+}
+
+/*
+ * Fetch a record from volatile database
+ *
+ * Steps:
+ * 1. Get a lock on the hash chain
+ * 2. If the record does not exist, migrate the record
+ * 3. If readonly=true and delegations do not exist, migrate the record.
+ * 4. If readonly=false and delegations exist, migrate the record.
+ * 5. If the local node is not dmaster, migrate the record.
+ * 6. Return record
+ */
+
+struct ctdb_fetch_lock_state {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct ctdb_record_handle *h;
+ bool readonly;
+ uint32_t pnn;
+};
+
+static int ctdb_fetch_lock_check(struct tevent_req *req);
+static void ctdb_fetch_lock_migrate(struct tevent_req *req);
+static void ctdb_fetch_lock_migrate_done(struct tevent_req *subreq);
+
+struct tevent_req *ctdb_fetch_lock_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct ctdb_db_context *db,
+ TDB_DATA key, bool readonly)
+{
+ struct ctdb_fetch_lock_state *state;
+ struct tevent_req *req;
+ int ret;
+
+ req = tevent_req_create(mem_ctx, &state, struct ctdb_fetch_lock_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->client = client;
+
+ state->h = talloc_zero(db, struct ctdb_record_handle);
+ if (tevent_req_nomem(state->h, req)) {
+ return tevent_req_post(req, ev);
+ }
+ state->h->ev = ev;
+ state->h->client = client;
+ state->h->db = db;
+ state->h->key.dptr = talloc_memdup(state->h, key.dptr, key.dsize);
+ if (tevent_req_nomem(state->h->key.dptr, req)) {
+ return tevent_req_post(req, ev);
+ }
+ state->h->key.dsize = key.dsize;
+ state->h->readonly = false;
+
+ state->readonly = readonly;
+ state->pnn = ctdb_client_pnn(client);
+
+ /* Check that database is not persistent */
+ if (! ctdb_db_volatile(db)) {
+ DEBUG(DEBUG_ERR, ("fetch_lock: %s database not volatile\n",
+ db->db_name));
+ tevent_req_error(req, EINVAL);
+ return tevent_req_post(req, ev);
+ }
+
+ ret = ctdb_fetch_lock_check(req);
+ if (ret == 0) {
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+ }
+ if (ret != EAGAIN) {
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+ return req;
+}
+
+static int ctdb_fetch_lock_check(struct tevent_req *req)
+{
+ struct ctdb_fetch_lock_state *state = tevent_req_data(
+ req, struct ctdb_fetch_lock_state);
+ struct ctdb_record_handle *h = state->h;
+ struct ctdb_ltdb_header header;
+ TDB_DATA data = tdb_null;
+ size_t np;
+ int ret, err = 0;
+ bool do_migrate = false;
+
+ ret = tdb_chainlock(client_db_tdb(h->db), h->key);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("fetch_lock: %s tdb_chainlock failed, %s\n",
+ h->db->db_name, tdb_errorstr(client_db_tdb(h->db))));
+ err = EIO;
+ goto failed;
+ }
+
+ data = tdb_fetch(client_db_tdb(h->db), h->key);
+ if (data.dptr == NULL) {
+ if (tdb_error(client_db_tdb(h->db)) == TDB_ERR_NOEXIST) {
+ goto migrate;
+ } else {
+ err = EIO;
+ goto failed;
+ }
+ }
+
+ /* Got the record */
+ ret = ctdb_ltdb_header_pull(data.dptr, data.dsize, &header, &np);
+ if (ret != 0) {
+ err = ret;
+ goto failed;
+ }
+
+ if (! state->readonly) {
+ /* Read/write access */
+ if (header.dmaster == state->pnn &&
+ header.flags & CTDB_REC_RO_HAVE_DELEGATIONS) {
+ goto migrate;
+ }
+
+ if (header.dmaster != state->pnn) {
+ goto migrate;
+ }
+ } else {
+ /* Readonly access */
+ if (header.dmaster != state->pnn &&
+ ! (header.flags & (CTDB_REC_RO_HAVE_READONLY |
+ CTDB_REC_RO_HAVE_DELEGATIONS))) {
+ goto migrate;
+ }
+ }
+
+ /* We are the dmaster or readonly delegation */
+ h->header = header;
+ h->data = data;
+ if (header.flags & (CTDB_REC_RO_HAVE_READONLY |
+ CTDB_REC_RO_HAVE_DELEGATIONS)) {
+ h->readonly = true;
+ }
+ return 0;
+
+migrate:
+ do_migrate = true;
+ err = EAGAIN;
+
+failed:
+ if (data.dptr != NULL) {
+ free(data.dptr);
+ }
+ ret = tdb_chainunlock(client_db_tdb(h->db), h->key);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("fetch_lock: %s tdb_chainunlock failed, %s\n",
+ h->db->db_name, tdb_errorstr(client_db_tdb(h->db))));
+ return EIO;
+ }
+
+ if (do_migrate) {
+ ctdb_fetch_lock_migrate(req);
+ }
+ return err;
+}
+
+static void ctdb_fetch_lock_migrate(struct tevent_req *req)
+{
+ struct ctdb_fetch_lock_state *state = tevent_req_data(
+ req, struct ctdb_fetch_lock_state);
+ struct ctdb_req_call request;
+ struct tevent_req *subreq;
+
+ ZERO_STRUCT(request);
+ request.flags = CTDB_IMMEDIATE_MIGRATION;
+ if (state->readonly) {
+ request.flags |= CTDB_WANT_READONLY;
+ }
+ request.db_id = state->h->db->db_id;
+ request.callid = CTDB_NULL_FUNC;
+ request.key = state->h->key;
+ request.calldata = tdb_null;
+
+ subreq = ctdb_client_call_send(state, state->ev, state->client,
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+
+ tevent_req_set_callback(subreq, ctdb_fetch_lock_migrate_done, req);
+}
+
+static void ctdb_fetch_lock_migrate_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_fetch_lock_state *state = tevent_req_data(
+ req, struct ctdb_fetch_lock_state);
+ struct ctdb_reply_call *reply;
+ int ret;
+ bool status;
+
+ status = ctdb_client_call_recv(subreq, state, &reply, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ DEBUG(DEBUG_ERR, ("fetch_lock: %s CALL failed, ret=%d\n",
+ state->h->db->db_name, ret));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ if (reply->status != 0) {
+ tevent_req_error(req, EIO);
+ return;
+ }
+ talloc_free(reply);
+
+ ret = ctdb_fetch_lock_check(req);
+ if (ret != 0) {
+ if (ret != EAGAIN) {
+ tevent_req_error(req, ret);
+ }
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+static int ctdb_record_handle_destructor(struct ctdb_record_handle *h)
+{
+ int ret;
+
+ ret = tdb_chainunlock(client_db_tdb(h->db), h->key);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("fetch_lock: %s tdb_chainunlock failed, %s\n",
+ h->db->db_name, tdb_errorstr(client_db_tdb(h->db))));
+ }
+ free(h->data.dptr);
+ return 0;
+}
+
+struct ctdb_record_handle *ctdb_fetch_lock_recv(struct tevent_req *req,
+ struct ctdb_ltdb_header *header,
+ TALLOC_CTX *mem_ctx,
+ TDB_DATA *data, int *perr)
+{
+ struct ctdb_fetch_lock_state *state = tevent_req_data(
+ req, struct ctdb_fetch_lock_state);
+ struct ctdb_record_handle *h = state->h;
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ TALLOC_FREE(state->h);
+ *perr = err;
+ }
+ return NULL;
+ }
+
+ if (header != NULL) {
+ *header = h->header;
+ }
+ if (data != NULL) {
+ size_t offset;
+
+ offset = ctdb_ltdb_header_len(&h->header);
+
+ data->dsize = h->data.dsize - offset;
+ if (data->dsize == 0) {
+ data->dptr = NULL;
+ } else {
+ data->dptr = talloc_memdup(mem_ctx,
+ h->data.dptr + offset,
+ data->dsize);
+ if (data->dptr == NULL) {
+ TALLOC_FREE(state->h);
+ if (perr != NULL) {
+ *perr = ENOMEM;
+ }
+ return NULL;
+ }
+ }
+ }
+
+ talloc_set_destructor(h, ctdb_record_handle_destructor);
+ return h;
+}
+
+int ctdb_fetch_lock(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct ctdb_db_context *db, TDB_DATA key, bool readonly,
+ struct ctdb_record_handle **out,
+ struct ctdb_ltdb_header *header, TDB_DATA *data)
+{
+ struct tevent_req *req;
+ struct ctdb_record_handle *h;
+ int ret = 0;
+
+ req = ctdb_fetch_lock_send(mem_ctx, ev, client, db, key, readonly);
+ if (req == NULL) {
+ return ENOMEM;
+ }
+
+ tevent_req_poll(req, ev);
+
+ h = ctdb_fetch_lock_recv(req, header, mem_ctx, data, &ret);
+ if (h == NULL) {
+ return ret;
+ }
+
+ *out = h;
+ return 0;
+}
+
+int ctdb_store_record(struct ctdb_record_handle *h, TDB_DATA data)
+{
+ uint8_t header[sizeof(struct ctdb_ltdb_header)];
+ TDB_DATA rec[2];
+ size_t np;
+ int ret;
+
+ /* Cannot modify the record if it was obtained as a readonly copy */
+ if (h->readonly) {
+ return EINVAL;
+ }
+
+ /* Check if the new data is same */
+ if (h->data.dsize == data.dsize &&
+ memcmp(h->data.dptr, data.dptr, data.dsize) == 0) {
+ /* No need to do anything */
+ return 0;
+ }
+
+ ctdb_ltdb_header_push(&h->header, header, &np);
+
+ rec[0].dsize = np;
+ rec[0].dptr = header;
+
+ rec[1].dsize = data.dsize;
+ rec[1].dptr = data.dptr;
+
+ ret = tdb_storev(client_db_tdb(h->db), h->key, rec, 2, TDB_REPLACE);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("store_record: %s tdb_storev failed, %s\n",
+ h->db->db_name, tdb_errorstr(client_db_tdb(h->db))));
+ return EIO;
+ }
+
+ return 0;
+}
+
+struct ctdb_delete_record_state {
+ struct ctdb_record_handle *h;
+};
+
+static void ctdb_delete_record_done(struct tevent_req *subreq);
+
+struct tevent_req *ctdb_delete_record_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_record_handle *h)
+{
+ struct tevent_req *req, *subreq;
+ struct ctdb_delete_record_state *state;
+ struct ctdb_key_data key;
+ struct ctdb_req_control request;
+ uint8_t header[sizeof(struct ctdb_ltdb_header)];
+ TDB_DATA rec;
+ size_t np;
+ int ret;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct ctdb_delete_record_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->h = h;
+
+ /* Cannot delete the record if it was obtained as a readonly copy */
+ if (h->readonly) {
+ DEBUG(DEBUG_ERR, ("fetch_lock delete: %s readonly record\n",
+ h->db->db_name));
+ tevent_req_error(req, EINVAL);
+ return tevent_req_post(req, ev);
+ }
+
+ ctdb_ltdb_header_push(&h->header, header, &np);
+
+ rec.dsize = np;
+ rec.dptr = header;
+
+ ret = tdb_store(client_db_tdb(h->db), h->key, rec, TDB_REPLACE);
+ if (ret != 0) {
+ D_ERR("fetch_lock delete: %s tdb_store failed, %s\n",
+ h->db->db_name,
+ tdb_errorstr(client_db_tdb(h->db)));
+ tevent_req_error(req, EIO);
+ return tevent_req_post(req, ev);
+ }
+
+ key.db_id = h->db->db_id;
+ key.header = h->header;
+ key.key = h->key;
+
+ ctdb_req_control_schedule_for_deletion(&request, &key);
+ subreq = ctdb_client_control_send(state, ev, h->client,
+ ctdb_client_pnn(h->client),
+ tevent_timeval_zero(),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, ctdb_delete_record_done, req);
+
+ return req;
+}
+
+static void ctdb_delete_record_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_delete_record_state *state = tevent_req_data(
+ req, struct ctdb_delete_record_state);
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_recv(subreq, &ret, NULL, NULL);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ D_ERR("delete_record: %s SCHEDULE_FOR_DELETION failed, ret=%d\n",
+ state->h->db->db_name,
+ ret);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+bool ctdb_delete_record_recv(struct tevent_req *req, int *perr)
+{
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+
+int ctdb_delete_record(struct ctdb_record_handle *h)
+{
+ struct tevent_context *ev = h->ev;
+ TALLOC_CTX *mem_ctx;
+ struct tevent_req *req;
+ int ret;
+ bool status;
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ return ENOMEM;
+ }
+
+ req = ctdb_delete_record_send(mem_ctx, ev, h);
+ if (req == NULL) {
+ talloc_free(mem_ctx);
+ return ENOMEM;
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = ctdb_delete_record_recv(req, &ret);
+ talloc_free(mem_ctx);
+ if (! status) {
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * Global lock functions
+ */
+
+struct ctdb_g_lock_lock_state {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct ctdb_db_context *db;
+ TDB_DATA key;
+ struct ctdb_server_id my_sid;
+ enum ctdb_g_lock_type lock_type;
+ struct ctdb_record_handle *h;
+ /* state for verification of active locks */
+ struct ctdb_g_lock_list *lock_list;
+ unsigned int current;
+};
+
+static void ctdb_g_lock_lock_fetched(struct tevent_req *subreq);
+static void ctdb_g_lock_lock_process_locks(struct tevent_req *req);
+static void ctdb_g_lock_lock_checked(struct tevent_req *subreq);
+static int ctdb_g_lock_lock_update(struct tevent_req *req);
+static void ctdb_g_lock_lock_retry(struct tevent_req *subreq);
+
+static bool ctdb_g_lock_conflicts(enum ctdb_g_lock_type l1,
+ enum ctdb_g_lock_type l2)
+{
+ if ((l1 == CTDB_G_LOCK_READ) && (l2 == CTDB_G_LOCK_READ)) {
+ return false;
+ }
+ return true;
+}
+
+struct tevent_req *ctdb_g_lock_lock_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct ctdb_db_context *db,
+ const char *keyname,
+ struct ctdb_server_id *sid,
+ bool readonly)
+{
+ struct tevent_req *req, *subreq;
+ struct ctdb_g_lock_lock_state *state;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct ctdb_g_lock_lock_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->client = client;
+ state->db = db;
+ state->key.dptr = discard_const(keyname);
+ state->key.dsize = strlen(keyname) + 1;
+ state->my_sid = *sid;
+ state->lock_type = (readonly ? CTDB_G_LOCK_READ : CTDB_G_LOCK_WRITE);
+
+ subreq = ctdb_fetch_lock_send(state, ev, client, db, state->key,
+ false);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, ctdb_g_lock_lock_fetched, req);
+
+ return req;
+}
+
+static void ctdb_g_lock_lock_fetched(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_g_lock_lock_state *state = tevent_req_data(
+ req, struct ctdb_g_lock_lock_state);
+ TDB_DATA data;
+ size_t np;
+ int ret = 0;
+
+ state->h = ctdb_fetch_lock_recv(subreq, NULL, state, &data, &ret);
+ TALLOC_FREE(subreq);
+ if (state->h == NULL) {
+ DEBUG(DEBUG_ERR, ("g_lock_lock: %s fetch lock failed\n",
+ (char *)state->key.dptr));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ if (state->lock_list != NULL) {
+ TALLOC_FREE(state->lock_list);
+ state->current = 0;
+ }
+
+ ret = ctdb_g_lock_list_pull(data.dptr, data.dsize, state,
+ &state->lock_list, &np);
+ talloc_free(data.dptr);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("g_lock_lock: %s invalid lock data\n",
+ (char *)state->key.dptr));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ctdb_g_lock_lock_process_locks(req);
+}
+
+static void ctdb_g_lock_lock_process_locks(struct tevent_req *req)
+{
+ struct ctdb_g_lock_lock_state *state = tevent_req_data(
+ req, struct ctdb_g_lock_lock_state);
+ struct tevent_req *subreq;
+ struct ctdb_g_lock *lock;
+ bool check_server = false;
+ int ret;
+
+ while (state->current < state->lock_list->num) {
+ lock = &state->lock_list->lock[state->current];
+
+ /* We should not ask for the same lock more than once */
+ if (ctdb_server_id_equal(&lock->sid, &state->my_sid)) {
+ DEBUG(DEBUG_ERR, ("g_lock_lock: %s deadlock\n",
+ (char *)state->key.dptr));
+ tevent_req_error(req, EDEADLK);
+ return;
+ }
+
+ if (ctdb_g_lock_conflicts(lock->type, state->lock_type)) {
+ check_server = true;
+ break;
+ }
+
+ state->current += 1;
+ }
+
+ if (check_server) {
+ struct ctdb_req_control request;
+
+ ctdb_req_control_process_exists(&request, lock->sid.pid);
+ subreq = ctdb_client_control_send(state, state->ev,
+ state->client,
+ lock->sid.vnn,
+ tevent_timeval_zero(),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, ctdb_g_lock_lock_checked, req);
+ return;
+ }
+
+ /* There is no conflict, add ourself to the lock_list */
+ state->lock_list->lock = talloc_realloc(state->lock_list,
+ state->lock_list->lock,
+ struct ctdb_g_lock,
+ state->lock_list->num + 1);
+ if (state->lock_list->lock == NULL) {
+ tevent_req_error(req, ENOMEM);
+ return;
+ }
+
+ lock = &state->lock_list->lock[state->lock_list->num];
+ lock->type = state->lock_type;
+ lock->sid = state->my_sid;
+ state->lock_list->num += 1;
+
+ ret = ctdb_g_lock_lock_update(req);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ TALLOC_FREE(state->h);
+ tevent_req_done(req);
+}
+
+static void ctdb_g_lock_lock_checked(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_g_lock_lock_state *state = tevent_req_data(
+ req, struct ctdb_g_lock_lock_state);
+ struct ctdb_reply_control *reply;
+ int ret, value;
+ bool status;
+
+ status = ctdb_client_control_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ DEBUG(DEBUG_ERR,
+ ("g_lock_lock: %s PROCESS_EXISTS failed, ret=%d\n",
+ (char *)state->key.dptr, ret));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_reply_control_process_exists(reply, &value);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+ talloc_free(reply);
+
+ if (value == 0) {
+ /* server process exists, need to retry */
+ TALLOC_FREE(state->h);
+ subreq = tevent_wakeup_send(state, state->ev,
+ tevent_timeval_current_ofs(0,1000));
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, ctdb_g_lock_lock_retry, req);
+ return;
+ }
+
+ /* server process does not exist, remove conflicting entry */
+ state->lock_list->lock[state->current] =
+ state->lock_list->lock[state->lock_list->num-1];
+ state->lock_list->num -= 1;
+
+ ret = ctdb_g_lock_lock_update(req);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ctdb_g_lock_lock_process_locks(req);
+}
+
+static int ctdb_g_lock_lock_update(struct tevent_req *req)
+{
+ struct ctdb_g_lock_lock_state *state = tevent_req_data(
+ req, struct ctdb_g_lock_lock_state);
+ TDB_DATA data;
+ size_t np;
+ int ret;
+
+ data.dsize = ctdb_g_lock_list_len(state->lock_list);
+ data.dptr = talloc_size(state, data.dsize);
+ if (data.dptr == NULL) {
+ return ENOMEM;
+ }
+
+ ctdb_g_lock_list_push(state->lock_list, data.dptr, &np);
+ ret = ctdb_store_record(state->h, data);
+ talloc_free(data.dptr);
+ return ret;
+}
+
+static void ctdb_g_lock_lock_retry(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_g_lock_lock_state *state = tevent_req_data(
+ req, struct ctdb_g_lock_lock_state);
+ bool success;
+
+ success = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (! success) {
+ tevent_req_error(req, ENOMEM);
+ return;
+ }
+
+ subreq = ctdb_fetch_lock_send(state, state->ev, state->client,
+ state->db, state->key, false);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, ctdb_g_lock_lock_fetched, req);
+}
+
+bool ctdb_g_lock_lock_recv(struct tevent_req *req, int *perr)
+{
+ struct ctdb_g_lock_lock_state *state = tevent_req_data(
+ req, struct ctdb_g_lock_lock_state);
+ int err;
+
+ TALLOC_FREE(state->h);
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+struct ctdb_g_lock_unlock_state {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct ctdb_db_context *db;
+ TDB_DATA key;
+ struct ctdb_server_id my_sid;
+ struct ctdb_record_handle *h;
+ struct ctdb_g_lock_list *lock_list;
+};
+
+static void ctdb_g_lock_unlock_fetched(struct tevent_req *subreq);
+static int ctdb_g_lock_unlock_update(struct tevent_req *req);
+static void ctdb_g_lock_unlock_deleted(struct tevent_req *subreq);
+
+struct tevent_req *ctdb_g_lock_unlock_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct ctdb_db_context *db,
+ const char *keyname,
+ struct ctdb_server_id sid)
+{
+ struct tevent_req *req, *subreq;
+ struct ctdb_g_lock_unlock_state *state;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct ctdb_g_lock_unlock_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->client = client;
+ state->db = db;
+ state->key.dptr = discard_const(keyname);
+ state->key.dsize = strlen(keyname) + 1;
+ state->my_sid = sid;
+
+ subreq = ctdb_fetch_lock_send(state, ev, client, db, state->key,
+ false);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, ctdb_g_lock_unlock_fetched, req);
+
+ return req;
+}
+
+static void ctdb_g_lock_unlock_fetched(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_g_lock_unlock_state *state = tevent_req_data(
+ req, struct ctdb_g_lock_unlock_state);
+ TDB_DATA data;
+ size_t np;
+ int ret = 0;
+
+ state->h = ctdb_fetch_lock_recv(subreq, NULL, state, &data, &ret);
+ TALLOC_FREE(subreq);
+ if (state->h == NULL) {
+ DEBUG(DEBUG_ERR, ("g_lock_unlock: %s fetch lock failed\n",
+ (char *)state->key.dptr));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_g_lock_list_pull(data.dptr, data.dsize, state,
+ &state->lock_list, &np);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("g_lock_unlock: %s invalid lock data\n",
+ (char *)state->key.dptr));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_g_lock_unlock_update(req);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ if (state->lock_list->num == 0) {
+ subreq = ctdb_delete_record_send(state, state->ev, state->h);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, ctdb_g_lock_unlock_deleted,
+ req);
+ return;
+ }
+
+ TALLOC_FREE(state->h);
+ tevent_req_done(req);
+}
+
+static int ctdb_g_lock_unlock_update(struct tevent_req *req)
+{
+ struct ctdb_g_lock_unlock_state *state = tevent_req_data(
+ req, struct ctdb_g_lock_unlock_state);
+ struct ctdb_g_lock *lock;
+ unsigned int i;
+ int ret;
+
+ for (i=0; i<state->lock_list->num; i++) {
+ lock = &state->lock_list->lock[i];
+
+ if (ctdb_server_id_equal(&lock->sid, &state->my_sid)) {
+ break;
+ }
+ }
+
+ if (i < state->lock_list->num) {
+ state->lock_list->lock[i] =
+ state->lock_list->lock[state->lock_list->num-1];
+ state->lock_list->num -= 1;
+ }
+
+ if (state->lock_list->num != 0) {
+ TDB_DATA data;
+ size_t np;
+
+ data.dsize = ctdb_g_lock_list_len(state->lock_list);
+ data.dptr = talloc_size(state, data.dsize);
+ if (data.dptr == NULL) {
+ return ENOMEM;
+ }
+
+ ctdb_g_lock_list_push(state->lock_list, data.dptr, &np);
+ ret = ctdb_store_record(state->h, data);
+ talloc_free(data.dptr);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static void ctdb_g_lock_unlock_deleted(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_g_lock_unlock_state *state = tevent_req_data(
+ req, struct ctdb_g_lock_unlock_state);
+ int ret;
+ bool status;
+
+ status = ctdb_delete_record_recv(subreq, &ret);
+ if (! status) {
+ DEBUG(DEBUG_ERR,
+ ("g_lock_unlock %s delete record failed, ret=%d\n",
+ (char *)state->key.dptr, ret));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ TALLOC_FREE(state->h);
+ tevent_req_done(req);
+}
+
+bool ctdb_g_lock_unlock_recv(struct tevent_req *req, int *perr)
+{
+ struct ctdb_g_lock_unlock_state *state = tevent_req_data(
+ req, struct ctdb_g_lock_unlock_state);
+ int err;
+
+ TALLOC_FREE(state->h);
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Persistent database functions
+ */
+struct ctdb_transaction_start_state {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct timeval timeout;
+ struct ctdb_transaction_handle *h;
+ uint32_t destnode;
+};
+
+static void ctdb_transaction_g_lock_attached(struct tevent_req *subreq);
+static void ctdb_transaction_g_lock_done(struct tevent_req *subreq);
+
+struct tevent_req *ctdb_transaction_start_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct timeval timeout,
+ struct ctdb_db_context *db,
+ bool readonly)
+{
+ struct ctdb_transaction_start_state *state;
+ struct tevent_req *req, *subreq;
+ struct ctdb_transaction_handle *h;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct ctdb_transaction_start_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ if (ctdb_db_volatile(db)) {
+ tevent_req_error(req, EINVAL);
+ return tevent_req_post(req, ev);
+ }
+
+ state->ev = ev;
+ state->client = client;
+ state->destnode = ctdb_client_pnn(client);
+
+ h = talloc_zero(db, struct ctdb_transaction_handle);
+ if (tevent_req_nomem(h, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ h->ev = ev;
+ h->client = client;
+ h->db = db;
+ h->readonly = readonly;
+ h->updated = false;
+
+ /* SRVID is unique for databases, so client can have transactions
+ * active for multiple databases */
+ h->sid = ctdb_client_get_server_id(client, db->db_id);
+
+ h->recbuf = ctdb_rec_buffer_init(h, db->db_id);
+ if (tevent_req_nomem(h->recbuf, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ h->lock_name = talloc_asprintf(h, "transaction_db_0x%08x", db->db_id);
+ if (tevent_req_nomem(h->lock_name, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ state->h = h;
+
+ subreq = ctdb_attach_send(state, ev, client, timeout, "g_lock.tdb", 0);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, ctdb_transaction_g_lock_attached, req);
+
+ return req;
+}
+
+static void ctdb_transaction_g_lock_attached(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_transaction_start_state *state = tevent_req_data(
+ req, struct ctdb_transaction_start_state);
+ bool status;
+ int ret;
+
+ status = ctdb_attach_recv(subreq, &ret, &state->h->db_g_lock);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ DEBUG(DEBUG_ERR,
+ ("transaction_start: %s attach g_lock.tdb failed\n",
+ state->h->db->db_name));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ subreq = ctdb_g_lock_lock_send(state, state->ev, state->client,
+ state->h->db_g_lock,
+ state->h->lock_name,
+ &state->h->sid, state->h->readonly);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, ctdb_transaction_g_lock_done, req);
+}
+
+static void ctdb_transaction_g_lock_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_transaction_start_state *state = tevent_req_data(
+ req, struct ctdb_transaction_start_state);
+ int ret;
+ bool status;
+
+ status = ctdb_g_lock_lock_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ DEBUG(DEBUG_ERR,
+ ("transaction_start: %s g_lock lock failed, ret=%d\n",
+ state->h->db->db_name, ret));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+struct ctdb_transaction_handle *ctdb_transaction_start_recv(
+ struct tevent_req *req,
+ int *perr)
+{
+ struct ctdb_transaction_start_state *state = tevent_req_data(
+ req, struct ctdb_transaction_start_state);
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return NULL;
+ }
+
+ return state->h;
+}
+
+int ctdb_transaction_start(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct timeval timeout,
+ struct ctdb_db_context *db, bool readonly,
+ struct ctdb_transaction_handle **out)
+{
+ struct tevent_req *req;
+ struct ctdb_transaction_handle *h;
+ int ret = 0;
+
+ req = ctdb_transaction_start_send(mem_ctx, ev, client, timeout, db,
+ readonly);
+ if (req == NULL) {
+ return ENOMEM;
+ }
+
+ tevent_req_poll(req, ev);
+
+ h = ctdb_transaction_start_recv(req, &ret);
+ if (h == NULL) {
+ return ret;
+ }
+
+ *out = h;
+ return 0;
+}
+
+struct ctdb_transaction_record_fetch_state {
+ TDB_DATA key, data;
+ struct ctdb_ltdb_header header;
+ bool found;
+};
+
+static int ctdb_transaction_record_fetch_traverse(
+ uint32_t reqid,
+ struct ctdb_ltdb_header *nullheader,
+ TDB_DATA key, TDB_DATA data,
+ void *private_data)
+{
+ struct ctdb_transaction_record_fetch_state *state =
+ (struct ctdb_transaction_record_fetch_state *)private_data;
+
+ if (state->key.dsize == key.dsize &&
+ memcmp(state->key.dptr, key.dptr, key.dsize) == 0) {
+ int ret;
+
+ ret = ctdb_ltdb_header_extract(&data, &state->header);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("record_fetch: Failed to extract header, "
+ "ret=%d\n", ret));
+ return 1;
+ }
+
+ state->data = data;
+ state->found = true;
+ }
+
+ return 0;
+}
+
+static int ctdb_transaction_record_fetch(struct ctdb_transaction_handle *h,
+ TDB_DATA key,
+ struct ctdb_ltdb_header *header,
+ TDB_DATA *data)
+{
+ struct ctdb_transaction_record_fetch_state state;
+ int ret;
+
+ state.key = key;
+ state.found = false;
+
+ ret = ctdb_rec_buffer_traverse(h->recbuf,
+ ctdb_transaction_record_fetch_traverse,
+ &state);
+ if (ret != 0) {
+ return ret;
+ }
+
+ if (state.found) {
+ if (header != NULL) {
+ *header = state.header;
+ }
+ if (data != NULL) {
+ *data = state.data;
+ }
+ return 0;
+ }
+
+ return ENOENT;
+}
+
+int ctdb_transaction_fetch_record(struct ctdb_transaction_handle *h,
+ TDB_DATA key,
+ TALLOC_CTX *mem_ctx, TDB_DATA *data)
+{
+ TDB_DATA tmp_data;
+ struct ctdb_ltdb_header header;
+ int ret;
+
+ ret = ctdb_transaction_record_fetch(h, key, NULL, &tmp_data);
+ if (ret == 0) {
+ data->dptr = talloc_memdup(mem_ctx, tmp_data.dptr,
+ tmp_data.dsize);
+ if (data->dptr == NULL) {
+ return ENOMEM;
+ }
+ data->dsize = tmp_data.dsize;
+ return 0;
+ }
+
+ ret = ctdb_ltdb_fetch(h->db, key, &header, mem_ctx, data);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ret = ctdb_rec_buffer_add(h, h->recbuf, 0, &header, key, *data);
+ if (ret != 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_transaction_store_record(struct ctdb_transaction_handle *h,
+ TDB_DATA key, TDB_DATA data)
+{
+ TALLOC_CTX *tmp_ctx;
+ struct ctdb_ltdb_header header;
+ TDB_DATA old_data;
+ int ret;
+
+ if (h->readonly) {
+ return EINVAL;
+ }
+
+ tmp_ctx = talloc_new(h);
+ if (tmp_ctx == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_transaction_record_fetch(h, key, &header, &old_data);
+ if (ret != 0) {
+ ret = ctdb_ltdb_fetch(h->db, key, &header, tmp_ctx, &old_data);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ if (old_data.dsize == data.dsize &&
+ memcmp(old_data.dptr, data.dptr, data.dsize) == 0) {
+ talloc_free(tmp_ctx);
+ return 0;
+ }
+
+ header.dmaster = ctdb_client_pnn(h->client);
+ header.rsn += 1;
+
+ ret = ctdb_rec_buffer_add(h, h->recbuf, 0, &header, key, data);
+ talloc_free(tmp_ctx);
+ if (ret != 0) {
+ return ret;
+ }
+ h->updated = true;
+
+ return 0;
+}
+
+int ctdb_transaction_delete_record(struct ctdb_transaction_handle *h,
+ TDB_DATA key)
+{
+ return ctdb_transaction_store_record(h, key, tdb_null);
+}
+
+static int ctdb_transaction_fetch_db_seqnum(struct ctdb_transaction_handle *h,
+ uint64_t *seqnum)
+{
+ const char *keyname = CTDB_DB_SEQNUM_KEY;
+ TDB_DATA key, data;
+ struct ctdb_ltdb_header header;
+ int ret;
+
+ key.dptr = discard_const(keyname);
+ key.dsize = strlen(keyname) + 1;
+
+ ret = ctdb_ltdb_fetch(h->db, key, &header, h, &data);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("transaction_commit: %s seqnum fetch failed, ret=%d\n",
+ h->db->db_name, ret));
+ return ret;
+ }
+
+ if (data.dsize == 0) {
+ /* initial data */
+ *seqnum = 0;
+ return 0;
+ }
+
+ if (data.dsize != sizeof(uint64_t)) {
+ talloc_free(data.dptr);
+ return EINVAL;
+ }
+
+ *seqnum = *(uint64_t *)data.dptr;
+
+ talloc_free(data.dptr);
+ return 0;
+}
+
+static int ctdb_transaction_store_db_seqnum(struct ctdb_transaction_handle *h,
+ uint64_t seqnum)
+{
+ const char *keyname = CTDB_DB_SEQNUM_KEY;
+ TDB_DATA key, data;
+
+ key.dptr = discard_const(keyname);
+ key.dsize = strlen(keyname) + 1;
+
+ data.dptr = (uint8_t *)&seqnum;
+ data.dsize = sizeof(seqnum);
+
+ return ctdb_transaction_store_record(h, key, data);
+}
+
+struct ctdb_transaction_commit_state {
+ struct tevent_context *ev;
+ struct timeval timeout;
+ struct ctdb_transaction_handle *h;
+ uint64_t seqnum;
+};
+
+static void ctdb_transaction_commit_done(struct tevent_req *subreq);
+static void ctdb_transaction_commit_g_lock_done(struct tevent_req *subreq);
+
+struct tevent_req *ctdb_transaction_commit_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct timeval timeout,
+ struct ctdb_transaction_handle *h)
+{
+ struct tevent_req *req, *subreq;
+ struct ctdb_transaction_commit_state *state;
+ struct ctdb_req_control request;
+ int ret;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct ctdb_transaction_commit_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->timeout = timeout;
+ state->h = h;
+
+ ret = ctdb_transaction_fetch_db_seqnum(h, &state->seqnum);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ ret = ctdb_transaction_store_db_seqnum(h, state->seqnum+1);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ ctdb_req_control_trans3_commit(&request, h->recbuf);
+ subreq = ctdb_client_control_send(state, ev, h->client,
+ ctdb_client_pnn(h->client),
+ timeout, &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, ctdb_transaction_commit_done, req);
+
+ return req;
+}
+
+static void ctdb_transaction_commit_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_transaction_commit_state *state = tevent_req_data(
+ req, struct ctdb_transaction_commit_state);
+ struct ctdb_transaction_handle *h = state->h;
+ struct ctdb_reply_control *reply;
+ uint64_t seqnum;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ DEBUG(DEBUG_ERR,
+ ("transaction_commit: %s TRANS3_COMMIT failed, ret=%d\n",
+ h->db->db_name, ret));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_reply_control_trans3_commit(reply);
+ talloc_free(reply);
+
+ if (ret != 0) {
+ /* Control failed due to recovery */
+
+ ret = ctdb_transaction_fetch_db_seqnum(h, &seqnum);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ if (seqnum == state->seqnum) {
+ struct ctdb_req_control request;
+
+ /* try again */
+ ctdb_req_control_trans3_commit(&request,
+ state->h->recbuf);
+ subreq = ctdb_client_control_send(
+ state, state->ev, state->h->client,
+ ctdb_client_pnn(state->h->client),
+ state->timeout, &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq,
+ ctdb_transaction_commit_done,
+ req);
+ return;
+ }
+
+ if (seqnum != state->seqnum + 1) {
+ DEBUG(DEBUG_ERR,
+ ("transaction_commit: %s seqnum mismatch "
+ "0x%"PRIx64" != 0x%"PRIx64" + 1\n",
+ state->h->db->db_name, seqnum, state->seqnum));
+ tevent_req_error(req, EIO);
+ return;
+ }
+ }
+
+ /* trans3_commit successful */
+ subreq = ctdb_g_lock_unlock_send(state, state->ev, h->client,
+ h->db_g_lock, h->lock_name, h->sid);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, ctdb_transaction_commit_g_lock_done,
+ req);
+}
+
+static void ctdb_transaction_commit_g_lock_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_transaction_commit_state *state = tevent_req_data(
+ req, struct ctdb_transaction_commit_state);
+ int ret;
+ bool status;
+
+ status = ctdb_g_lock_unlock_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ DEBUG(DEBUG_ERR,
+ ("transaction_commit: %s g_lock unlock failed, ret=%d\n",
+ state->h->db->db_name, ret));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ talloc_free(state->h);
+ tevent_req_done(req);
+}
+
+bool ctdb_transaction_commit_recv(struct tevent_req *req, int *perr)
+{
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+int ctdb_transaction_commit(struct ctdb_transaction_handle *h)
+{
+ struct tevent_context *ev = h->ev;
+ TALLOC_CTX *mem_ctx;
+ struct tevent_req *req;
+ int ret;
+ bool status;
+
+ if (h->readonly || ! h->updated) {
+ return ctdb_transaction_cancel(h);
+ }
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ return ENOMEM;
+ }
+
+ req = ctdb_transaction_commit_send(mem_ctx, ev,
+ tevent_timeval_zero(), h);
+ if (req == NULL) {
+ talloc_free(mem_ctx);
+ return ENOMEM;
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = ctdb_transaction_commit_recv(req, &ret);
+ if (! status) {
+ talloc_free(mem_ctx);
+ return ret;
+ }
+
+ talloc_free(mem_ctx);
+ return 0;
+}
+
+struct ctdb_transaction_cancel_state {
+ struct tevent_context *ev;
+ struct ctdb_transaction_handle *h;
+ struct timeval timeout;
+};
+
+static void ctdb_transaction_cancel_done(struct tevent_req *subreq);
+
+struct tevent_req *ctdb_transaction_cancel_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct timeval timeout,
+ struct ctdb_transaction_handle *h)
+{
+ struct tevent_req *req, *subreq;
+ struct ctdb_transaction_cancel_state *state;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct ctdb_transaction_cancel_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->h = h;
+ state->timeout = timeout;
+
+ subreq = ctdb_g_lock_unlock_send(state, state->ev, state->h->client,
+ state->h->db_g_lock,
+ state->h->lock_name, state->h->sid);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, ctdb_transaction_cancel_done,
+ req);
+
+ return req;
+}
+
+static void ctdb_transaction_cancel_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_transaction_cancel_state *state = tevent_req_data(
+ req, struct ctdb_transaction_cancel_state);
+ int ret;
+ bool status;
+
+ status = ctdb_g_lock_unlock_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ DEBUG(DEBUG_ERR,
+ ("transaction_cancel: %s g_lock unlock failed, ret=%d\n",
+ state->h->db->db_name, ret));
+ talloc_free(state->h);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ talloc_free(state->h);
+ tevent_req_done(req);
+}
+
+bool ctdb_transaction_cancel_recv(struct tevent_req *req, int *perr)
+{
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+int ctdb_transaction_cancel(struct ctdb_transaction_handle *h)
+{
+ struct tevent_context *ev = h->ev;
+ struct tevent_req *req;
+ TALLOC_CTX *mem_ctx;
+ int ret;
+ bool status;
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ talloc_free(h);
+ return ENOMEM;
+ }
+
+ req = ctdb_transaction_cancel_send(mem_ctx, ev,
+ tevent_timeval_zero(), h);
+ if (req == NULL) {
+ talloc_free(mem_ctx);
+ talloc_free(h);
+ return ENOMEM;
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = ctdb_transaction_cancel_recv(req, &ret);
+ if (! status) {
+ talloc_free(mem_ctx);
+ return ret;
+ }
+
+ talloc_free(mem_ctx);
+ return 0;
+}
+
+/*
+ * TODO:
+ *
+ * In future Samba should register SERVER_ID.
+ * Make that structure same as struct srvid {}.
+ */
diff --git a/ctdb/client/client_event.c b/ctdb/client/client_event.c
new file mode 100644
index 0000000..7111fe7
--- /dev/null
+++ b/ctdb/client/client_event.c
@@ -0,0 +1,444 @@
+/*
+ Eventd client api
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/debug.h"
+#include "lib/util/tevent_unix.h"
+
+#include "common/logging.h"
+#include "common/sock_client.h"
+
+#include "protocol/protocol_api.h"
+
+#include "client/client_event.h"
+
+struct ctdb_event_context {
+ struct sock_client_context *sockc;
+};
+
+static int ctdb_event_msg_request_push(void *request_data, uint32_t reqid,
+ TALLOC_CTX *mem_ctx,
+ uint8_t **buf, size_t *buflen,
+ void *private_data)
+{
+ struct ctdb_event_request *request =
+ (struct ctdb_event_request *)request_data;
+ int ret;
+
+ sock_packet_header_set_reqid(&request->header, reqid);
+
+ *buflen = ctdb_event_request_len(request);
+ *buf = talloc_size(mem_ctx, *buflen);
+ if (*buf == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_event_request_push(request, *buf, buflen);
+ if (ret != 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+static int ctdb_event_msg_reply_pull(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx, void **reply_data,
+ void *private_data)
+{
+ struct ctdb_event_reply *reply;
+ int ret;
+
+ reply = talloc_zero(mem_ctx, struct ctdb_event_reply);
+ if (reply == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_event_reply_pull(buf, buflen, reply, reply);
+ if (ret != 0) {
+ talloc_free(reply);
+ return ret;
+ }
+
+ *reply_data = reply;
+ return 0;
+}
+
+static int ctdb_event_msg_reply_reqid(uint8_t *buf, size_t buflen,
+ uint32_t *reqid, void *private_data)
+{
+ struct sock_packet_header header;
+ size_t np;
+ int ret;
+
+ ret = sock_packet_header_pull(buf, buflen, &header, &np);
+ if (ret != 0) {
+ return ret;
+ }
+
+ *reqid = header.reqid;
+ return 0;
+}
+
+struct sock_client_proto_funcs event_proto_funcs = {
+ .request_push = ctdb_event_msg_request_push,
+ .reply_pull = ctdb_event_msg_reply_pull,
+ .reply_reqid = ctdb_event_msg_reply_reqid,
+};
+
+
+int ctdb_event_init(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ const char *sockpath, struct ctdb_event_context **out)
+{
+ struct ctdb_event_context *eclient;
+ int ret;
+
+ eclient = talloc_zero(mem_ctx, struct ctdb_event_context);
+ if (eclient == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " memory allocation error\n"));
+ return ENOMEM;
+ }
+
+ ret = sock_client_setup(eclient, ev, sockpath,
+ &event_proto_funcs, eclient,
+ &eclient->sockc);
+ if (ret != 0) {
+ talloc_free(eclient);
+ return ret;
+ }
+
+ *out = eclient;
+ return 0;
+}
+
+void ctdb_event_set_disconnect_callback(struct ctdb_event_context *eclient,
+ ctdb_client_callback_func_t callback,
+ void *private_data)
+{
+ sock_client_set_disconnect_callback(eclient->sockc,
+ callback, private_data);
+}
+
+/*
+ * Handle eventd_request and eventd_reply
+ */
+
+struct tevent_req *ctdb_event_msg_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_event_context *eclient,
+ struct ctdb_event_request *request)
+{
+ struct tevent_req *req;
+
+ req = sock_client_msg_send(mem_ctx, ev, eclient->sockc,
+ tevent_timeval_zero(), request);
+ return req;
+}
+
+bool ctdb_event_msg_recv(struct tevent_req *req, int *perr,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_event_reply **reply)
+{
+ void *reply_data;
+ bool status;
+
+ status = sock_client_msg_recv(req, perr, mem_ctx, &reply_data);
+
+ if (status && reply != NULL) {
+ *reply = talloc_get_type_abort(
+ reply_data, struct ctdb_event_reply);
+ }
+
+ return status;
+}
+
+/*
+ * Run an event
+ */
+
+struct tevent_req *ctdb_event_run_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_event_context *eclient,
+ enum ctdb_event event,
+ uint32_t timeout, const char *arg_str)
+{
+ struct ctdb_event_request request;
+ struct ctdb_event_request_run rdata;
+
+ rdata.event = event;
+ rdata.timeout = timeout;
+ rdata.arg_str = arg_str;
+
+ request.rdata.command = CTDB_EVENT_COMMAND_RUN;
+ request.rdata.data.run = &rdata;
+
+ return ctdb_event_msg_send(mem_ctx, ev, eclient, &request);
+}
+
+bool ctdb_event_run_recv(struct tevent_req *req, int *perr, int *result)
+{
+ struct ctdb_event_reply *reply;
+ int ret;
+ bool status;
+
+ status = ctdb_event_msg_recv(req, &ret, req, &reply);
+ if (! status) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+
+ if (reply->rdata.command != CTDB_EVENT_COMMAND_RUN) {
+ if (perr != NULL) {
+ *perr = EPROTO;
+ }
+ talloc_free(reply);
+ return false;
+ }
+
+ if (result != NULL) {
+ *result = reply->rdata.result;
+ }
+
+ talloc_free(reply);
+ return true;
+}
+
+/*
+ * Get event status
+ */
+
+struct tevent_req *ctdb_event_status_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_event_context *eclient,
+ enum ctdb_event event,
+ enum ctdb_event_status_state state)
+{
+ struct ctdb_event_request request;
+ struct ctdb_event_request_status rdata;
+
+ rdata.event = event;
+ rdata.state = state;
+
+ request.rdata.command = CTDB_EVENT_COMMAND_STATUS;
+ request.rdata.data.status = &rdata;
+
+ return ctdb_event_msg_send(mem_ctx, ev, eclient, &request);
+}
+
+bool ctdb_event_status_recv(struct tevent_req *req, int *perr,
+ int32_t *result, int *event_status,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_script_list **script_list)
+{
+ struct ctdb_event_reply *reply;
+ int ret;
+ bool status;
+
+ status = ctdb_event_msg_recv(req, &ret, req, &reply);
+ if (! status) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+
+ if (reply->rdata.command != CTDB_EVENT_COMMAND_STATUS) {
+ if (perr != NULL) {
+ *perr = EPROTO;
+ }
+ talloc_free(reply);
+ return false;
+ }
+
+ if (result != NULL) {
+ *result = reply->rdata.result;
+ }
+ if (event_status != NULL) {
+ *event_status = reply->rdata.data.status->status;
+ }
+ if (script_list != NULL) {
+ *script_list = talloc_steal(mem_ctx,
+ reply->rdata.data.status->script_list);
+ }
+
+ talloc_free(reply);
+ return true;
+}
+
+/*
+ * Get script list
+ */
+
+struct tevent_req *ctdb_event_script_list_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_event_context *eclient)
+{
+ struct ctdb_event_request request;
+
+ request.rdata.command = CTDB_EVENT_COMMAND_SCRIPT_LIST;
+
+ return ctdb_event_msg_send(mem_ctx, ev, eclient, &request);
+}
+
+bool ctdb_event_script_list_recv(struct tevent_req *req, int *perr,
+ int32_t *result, TALLOC_CTX *mem_ctx,
+ struct ctdb_script_list **script_list)
+{
+ struct ctdb_event_reply *reply;
+ int ret;
+ bool status;
+
+ status = ctdb_event_msg_recv(req, &ret, req, &reply);
+ if (! status) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+
+ if (reply->rdata.command != CTDB_EVENT_COMMAND_SCRIPT_LIST) {
+ if (perr != NULL) {
+ *perr = EPROTO;
+ }
+ talloc_free(reply);
+ return false;
+ }
+
+ if (result != NULL) {
+ *result = reply->rdata.result;
+ }
+ if (script_list != NULL) {
+ *script_list = talloc_steal(mem_ctx,
+ reply->rdata.data.script_list->script_list);
+ }
+
+ talloc_free(reply);
+ return true;
+}
+
+/*
+ * Enable a script
+ */
+
+struct tevent_req *ctdb_event_script_enable_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_event_context *eclient,
+ const char *script_name)
+{
+ struct ctdb_event_request request;
+ struct ctdb_event_request_script_enable rdata;
+
+ rdata.script_name = script_name;
+
+ request.rdata.command = CTDB_EVENT_COMMAND_SCRIPT_ENABLE;
+ request.rdata.data.script_enable = &rdata;
+
+ return ctdb_event_msg_send(mem_ctx, ev, eclient, &request);
+}
+
+bool ctdb_event_script_enable_recv(struct tevent_req *req, int *perr,
+ int *result)
+{
+ struct ctdb_event_reply *reply;
+ int ret;
+ bool status;
+
+ status = ctdb_event_msg_recv(req, &ret, req, &reply);
+ if (! status) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+
+ if (reply->rdata.command != CTDB_EVENT_COMMAND_SCRIPT_ENABLE) {
+ if (perr != NULL) {
+ *perr = EPROTO;
+ }
+ talloc_free(reply);
+ return false;
+ }
+
+ if (result != NULL) {
+ *result = reply->rdata.result;
+ }
+
+ talloc_free(reply);
+ return true;
+}
+
+/*
+ * Disable a script
+ */
+
+struct tevent_req *ctdb_event_script_disable_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_event_context *eclient,
+ const char *script_name)
+{
+ struct ctdb_event_request request;
+ struct ctdb_event_request_script_disable rdata;
+
+ rdata.script_name = script_name;
+
+ request.rdata.command = CTDB_EVENT_COMMAND_SCRIPT_DISABLE;
+ request.rdata.data.script_disable = &rdata;
+
+ return ctdb_event_msg_send(mem_ctx, ev, eclient, &request);
+}
+
+bool ctdb_event_script_disable_recv(struct tevent_req *req, int *perr,
+ int *result)
+{
+ struct ctdb_event_reply *reply;
+ int ret;
+ bool status;
+
+ status = ctdb_event_msg_recv(req, &ret, req, &reply);
+ if (! status) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+
+ if (reply->rdata.command != CTDB_EVENT_COMMAND_SCRIPT_DISABLE) {
+ if (perr != NULL) {
+ *perr = EPROTO;
+ }
+ talloc_free(reply);
+ return false;
+ }
+
+ if (result != NULL) {
+ *result = reply->rdata.result;
+ }
+
+ talloc_free(reply);
+ return true;
+}
diff --git a/ctdb/client/client_message.c b/ctdb/client/client_message.c
new file mode 100644
index 0000000..c2e975a
--- /dev/null
+++ b/ctdb/client/client_message.c
@@ -0,0 +1,607 @@
+/*
+ CTDB client code
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+
+#include <talloc.h>
+#include <tevent.h>
+#include <tdb.h>
+
+#include "lib/util/tevent_unix.h"
+
+#include "common/reqid.h"
+#include "common/srvid.h"
+#include "common/comm.h"
+
+#include "protocol/protocol.h"
+#include "protocol/protocol_api.h"
+
+#include "client/client_private.h"
+#include "client/client.h"
+
+
+/*
+ * Handle REQ_MESSAGE
+ */
+
+struct ctdb_client_message_state {
+ struct ctdb_client_context *client;
+ uint32_t reqid;
+};
+
+static int ctdb_client_message_state_destructor(
+ struct ctdb_client_message_state *state);
+static void ctdb_client_message_done(struct tevent_req *subreq);
+
+struct tevent_req *ctdb_client_message_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t destnode,
+ struct ctdb_req_message *message)
+{
+ struct tevent_req *req, *subreq;
+ struct ctdb_client_message_state *state;
+ struct ctdb_req_header h;
+ uint32_t reqid;
+ uint8_t *buf;
+ size_t datalen, buflen;
+ int ret;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct ctdb_client_message_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ reqid = reqid_new(client->idr, state);
+ if (reqid == REQID_INVALID) {
+ talloc_free(req);
+ return NULL;
+ }
+
+ state->client = client;
+ state->reqid = reqid;
+
+ talloc_set_destructor(state, ctdb_client_message_state_destructor);
+
+ ctdb_req_header_fill(&h, 0, CTDB_REQ_MESSAGE, destnode,
+ client->pnn, reqid);
+
+ datalen = ctdb_req_message_len(&h, message);
+ ret = ctdb_allocate_pkt(state, datalen, &buf, &buflen);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ ret = ctdb_req_message_push(&h, message, buf, &buflen);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ subreq = comm_write_send(state, ev, client->comm, buf, buflen);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, ctdb_client_message_done, req);
+
+ return req;
+}
+
+static int ctdb_client_message_state_destructor(
+ struct ctdb_client_message_state *state)
+{
+ reqid_remove(state->client->idr, state->reqid);
+ return 0;
+}
+
+static void ctdb_client_message_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ int ret;
+ bool status;
+
+ status = comm_write_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+bool ctdb_client_message_recv(struct tevent_req *req, int *perr)
+{
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+void ctdb_client_req_message(struct ctdb_client_context *client,
+ uint8_t *buf, size_t buflen, uint32_t reqid)
+{
+ struct ctdb_req_header h;
+ struct ctdb_req_message_data message;
+ TALLOC_CTX *tmp_ctx = talloc_new(client);
+ int ret;
+
+ ret = ctdb_req_message_data_pull(buf, buflen, &h, tmp_ctx, &message);
+ if (ret != 0) {
+ return;
+ }
+
+ srvid_dispatch(client->srv, message.srvid, CTDB_SRVID_ALL,
+ message.data);
+ talloc_free(tmp_ctx);
+}
+
+/*
+ * Handle multiple nodes
+ */
+
+struct ctdb_client_message_multi_state {
+ uint32_t *pnn_list;
+ int count;
+ int done;
+ int err;
+ int *err_list;
+};
+
+struct message_index_state {
+ struct tevent_req *req;
+ int index;
+};
+
+static void ctdb_client_message_multi_done(struct tevent_req *subreq);
+
+struct tevent_req *ctdb_client_message_multi_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t *pnn_list, int count,
+ struct ctdb_req_message *message)
+{
+ struct tevent_req *req, *subreq;
+ struct ctdb_client_message_multi_state *state;
+ int i;
+
+ if (pnn_list == NULL || count == 0) {
+ return NULL;
+ }
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct ctdb_client_message_multi_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->pnn_list = pnn_list;
+ state->count = count;
+ state->done = 0;
+ state->err = 0;
+ state->err_list = talloc_zero_array(state, int, count);
+ if (tevent_req_nomem(state->err_list, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ for (i=0; i<count; i++) {
+ struct message_index_state *substate;
+
+ subreq = ctdb_client_message_send(state, ev, client,
+ pnn_list[i], message);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ substate = talloc(subreq, struct message_index_state);
+ if (tevent_req_nomem(substate, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ substate->req = req;
+ substate->index = i;
+
+ tevent_req_set_callback(subreq, ctdb_client_message_multi_done,
+ substate);
+ }
+
+ return req;
+}
+
+static void ctdb_client_message_multi_done(struct tevent_req *subreq)
+{
+ struct message_index_state *substate = tevent_req_callback_data(
+ subreq, struct message_index_state);
+ struct tevent_req *req = substate->req;
+ int idx = substate->index;
+ struct ctdb_client_message_multi_state *state = tevent_req_data(
+ req, struct ctdb_client_message_multi_state);
+ bool status;
+ int ret;
+
+ status = ctdb_client_message_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ if (state->err == 0) {
+ state->err = ret;
+ state->err_list[idx] = state->err;
+ }
+ }
+
+ state->done += 1;
+
+ if (state->done == state->count) {
+ tevent_req_done(req);
+ }
+}
+
+bool ctdb_client_message_multi_recv(struct tevent_req *req, int *perr,
+ TALLOC_CTX *mem_ctx, int **perr_list)
+{
+ struct ctdb_client_message_multi_state *state = tevent_req_data(
+ req, struct ctdb_client_message_multi_state);
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ if (perr_list != NULL) {
+ *perr_list = talloc_steal(mem_ctx, state->err_list);
+ }
+ return false;
+ }
+
+ if (perr != NULL) {
+ *perr = state->err;
+ }
+
+ if (perr_list != NULL) {
+ *perr_list = talloc_steal(mem_ctx, state->err_list);
+ }
+
+ if (state->err != 0) {
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * sync version of message send
+ */
+
+int ctdb_client_message(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t destnode, struct ctdb_req_message *message)
+{
+ TALLOC_CTX *tmp_ctx;
+ struct tevent_req *req;
+ int ret;
+ bool status;
+
+ tmp_ctx = talloc_new(client);
+ if (tmp_ctx == NULL) {
+ return ENOMEM;
+ }
+
+ req = ctdb_client_message_send(tmp_ctx, ev, client, destnode, message);
+ if (req == NULL) {
+ talloc_free(tmp_ctx);
+ return ENOMEM;
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = ctdb_client_message_recv(req, &ret);
+ if (! status) {
+ talloc_free(tmp_ctx);
+ return ret;
+ }
+
+ talloc_free(tmp_ctx);
+ return 0;
+}
+
+int ctdb_client_message_multi(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t *pnn_list, int count,
+ struct ctdb_req_message *message,
+ int **perr_list)
+{
+ struct tevent_req *req;
+ bool status;
+ int ret;
+
+ req = ctdb_client_message_multi_send(mem_ctx, ev, client,
+ pnn_list, count,
+ message);
+ if (req == NULL) {
+ return ENOMEM;
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = ctdb_client_message_multi_recv(req, &ret, mem_ctx, perr_list);
+ if (! status) {
+ return ret;
+ }
+
+ return 0;
+}
+
+struct ctdb_client_set_message_handler_state {
+ struct ctdb_client_context *client;
+ uint64_t srvid;
+ srvid_handler_fn handler;
+ void *private_data;
+};
+
+static void ctdb_client_set_message_handler_done(struct tevent_req *subreq);
+
+struct tevent_req *ctdb_client_set_message_handler_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint64_t srvid,
+ srvid_handler_fn handler,
+ void *private_data)
+{
+ struct tevent_req *req, *subreq;
+ struct ctdb_client_set_message_handler_state *state;
+ struct ctdb_req_control request;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct ctdb_client_set_message_handler_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->client = client;
+ state->srvid = srvid;
+ state->handler = handler;
+ state->private_data = private_data;
+
+ ctdb_req_control_register_srvid(&request, srvid);
+ subreq = ctdb_client_control_send(state, ev, client, client->pnn,
+ tevent_timeval_zero(), &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, ctdb_client_set_message_handler_done,
+ req);
+
+ return req;
+}
+
+static void ctdb_client_set_message_handler_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_client_set_message_handler_state *state = tevent_req_data(
+ req, struct ctdb_client_set_message_handler_state);
+ struct ctdb_reply_control *reply;
+ bool status;
+ int ret;
+
+ status = ctdb_client_control_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_reply_control_register_srvid(reply);
+ talloc_free(reply);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = srvid_register(state->client->srv, state->client, state->srvid,
+ state->handler, state->private_data);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+bool ctdb_client_set_message_handler_recv(struct tevent_req *req, int *perr)
+{
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+ return true;
+}
+
+struct ctdb_client_remove_message_handler_state {
+ struct ctdb_client_context *client;
+ uint64_t srvid;
+ void *private_data;
+};
+
+static void ctdb_client_remove_message_handler_done(struct tevent_req *subreq);
+
+struct tevent_req *ctdb_client_remove_message_handler_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint64_t srvid,
+ void *private_data)
+{
+ struct tevent_req *req, *subreq;
+ struct ctdb_client_remove_message_handler_state *state;
+ struct ctdb_req_control request;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct ctdb_client_remove_message_handler_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->client = client;
+ state->srvid = srvid;
+ state->private_data = private_data;
+
+ ctdb_req_control_deregister_srvid(&request, srvid);
+ subreq = ctdb_client_control_send(state, ev, client, client->pnn,
+ tevent_timeval_zero(), &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq,
+ ctdb_client_remove_message_handler_done, req);
+
+ return req;
+}
+
+static void ctdb_client_remove_message_handler_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_client_remove_message_handler_state *state = tevent_req_data(
+ req, struct ctdb_client_remove_message_handler_state);
+ struct ctdb_reply_control *reply;
+ bool status;
+ int ret;
+
+ status = ctdb_client_control_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_reply_control_deregister_srvid(reply);
+ talloc_free(reply);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = srvid_deregister(state->client->srv, state->srvid,
+ state->private_data);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+bool ctdb_client_remove_message_handler_recv(struct tevent_req *req, int *perr)
+{
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+ return true;
+}
+
+int ctdb_client_set_message_handler(struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint64_t srvid, srvid_handler_fn handler,
+ void *private_data)
+{
+ TALLOC_CTX *mem_ctx;
+ struct tevent_req *req;
+ int ret;
+ bool status;
+
+ mem_ctx = talloc_new(client);
+ if (mem_ctx == NULL) {
+ return ENOMEM;
+ }
+
+ req = ctdb_client_set_message_handler_send(mem_ctx, ev, client,
+ srvid, handler,
+ private_data);
+ if (req == NULL) {
+ talloc_free(mem_ctx);
+ return ENOMEM;
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = ctdb_client_set_message_handler_recv(req, &ret);
+ if (! status) {
+ talloc_free(mem_ctx);
+ return ret;
+ }
+
+ talloc_free(mem_ctx);
+ return 0;
+}
+
+int ctdb_client_remove_message_handler(struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint64_t srvid, void *private_data)
+{
+ TALLOC_CTX *mem_ctx;
+ struct tevent_req *req;
+ int ret;
+ bool status;
+
+ mem_ctx = talloc_new(client);
+ if (mem_ctx == NULL) {
+ return ENOMEM;
+ }
+
+ req = ctdb_client_remove_message_handler_send(mem_ctx, ev, client,
+ srvid, private_data);
+ if (req == NULL) {
+ talloc_free(mem_ctx);
+ return ENOMEM;
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = ctdb_client_remove_message_handler_recv(req, &ret);
+ if (! status) {
+ talloc_free(mem_ctx);
+ return ret;
+ }
+
+ talloc_free(mem_ctx);
+ return 0;
+}
diff --git a/ctdb/client/client_message_sync.c b/ctdb/client/client_message_sync.c
new file mode 100644
index 0000000..a23cd6c
--- /dev/null
+++ b/ctdb/client/client_message_sync.c
@@ -0,0 +1,176 @@
+/*
+ CTDB client code
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+
+#include <talloc.h>
+#include <tevent.h>
+#include <tdb.h>
+
+#include "common/logging.h"
+
+#include "lib/util/debug.h"
+
+#include "protocol/protocol.h"
+#include "protocol/protocol_api.h"
+#include "client/client_private.h"
+#include "client/client.h"
+#include "client/client_sync.h"
+
+int ctdb_message_recd_update_ip(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct ctdb_public_ip *pubip)
+{
+ struct ctdb_req_message message;
+ int ret;
+
+ message.srvid = CTDB_SRVID_RECD_UPDATE_IP;
+ message.data.pubip = pubip;
+
+ ret = ctdb_client_message(mem_ctx, ev, client, destnode, &message);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Message RECD_UPDATE_IP failed to node %u\n",
+ destnode));
+ }
+
+ return ret;
+}
+
+int ctdb_message_mem_dump(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct ctdb_srvid_message *msg)
+{
+ struct ctdb_req_message message;
+ int ret;
+
+ message.srvid = CTDB_SRVID_MEM_DUMP;
+ message.data.msg = msg;
+
+ ret = ctdb_client_message(mem_ctx, ev, client, destnode, &message);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Message MEM_DUMP failed to node %u\n", destnode));
+ }
+
+ return ret;
+}
+
+int ctdb_message_reload_nodes(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode)
+{
+ struct ctdb_req_message message;
+ int ret;
+
+ message.srvid = CTDB_SRVID_RELOAD_NODES;
+
+ ret = ctdb_client_message(mem_ctx, ev, client, destnode, &message);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Message RELOAD_NODES failed to node %u\n", destnode));
+ }
+
+ return ret;
+}
+
+int ctdb_message_takeover_run(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct ctdb_srvid_message *msg)
+{
+ struct ctdb_req_message message;
+ int ret;
+
+ message.srvid = CTDB_SRVID_TAKEOVER_RUN;
+ message.data.msg = msg;
+
+ ret = ctdb_client_message(mem_ctx, ev, client, destnode, &message);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Message TAKEOVER_RUN failed to node %u\n", destnode));
+ }
+
+ return ret;
+}
+
+int ctdb_message_rebalance_node(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, uint32_t pnn)
+{
+ struct ctdb_req_message message;
+ int ret;
+
+ message.srvid = CTDB_SRVID_REBALANCE_NODE;
+ message.data.pnn = pnn;
+
+ ret = ctdb_client_message(mem_ctx, ev, client, destnode, &message);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Message REBALANCE_NODE failed to node %u\n",
+ destnode));
+ }
+
+ return ret;
+}
+
+int ctdb_message_disable_takeover_runs(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode,
+ struct ctdb_disable_message *disable)
+{
+ struct ctdb_req_message message;
+ int ret;
+
+ message.srvid = CTDB_SRVID_DISABLE_TAKEOVER_RUNS;
+ message.data.disable = disable;
+
+ ret = ctdb_client_message(mem_ctx, ev, client, destnode, &message);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Message DISABLE_TAKEOVER_RUNS failed to node %u\n",
+ destnode));
+ }
+
+ return ret;
+}
+
+int ctdb_message_disable_recoveries(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode,
+ struct ctdb_disable_message *disable)
+{
+ struct ctdb_req_message message;
+ int ret;
+
+ message.srvid = CTDB_SRVID_DISABLE_RECOVERIES;
+ message.data.disable = disable;
+
+ ret = ctdb_client_message(mem_ctx, ev, client, destnode, &message);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Message DISABLE_RECOVERIES failed to node %u\n",
+ destnode));
+ }
+
+ return ret;
+}
diff --git a/ctdb/client/client_private.h b/ctdb/client/client_private.h
new file mode 100644
index 0000000..0bb2ad5
--- /dev/null
+++ b/ctdb/client/client_private.h
@@ -0,0 +1,99 @@
+/*
+ CTDB client code
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_CLIENT_PRIVATE_H__
+#define __CTDB_CLIENT_PRIVATE_H__
+
+#include "protocol/protocol.h"
+#include "client/client.h"
+
+struct ctdb_db_context {
+ struct ctdb_db_context *prev, *next;
+ uint32_t db_id;
+ uint8_t db_flags;
+ const char *db_name;
+ const char *db_path;
+ struct tdb_wrap *ltdb;
+};
+
+struct ctdb_client_context {
+ struct reqid_context *idr;
+ struct srvid_context *srv;
+ struct srvid_context *tunnels;
+ struct comm_context *comm;
+ ctdb_client_callback_func_t callback;
+ void *private_data;
+ int fd;
+ uint32_t pnn;
+ struct ctdb_db_context *db;
+};
+
+struct ctdb_record_handle {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct ctdb_db_context *db;
+ struct ctdb_ltdb_header header;
+ TDB_DATA key;
+ TDB_DATA data; /* This is returned from tdb_fetch() */
+ bool readonly;
+};
+
+struct ctdb_transaction_handle {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct ctdb_db_context *db, *db_g_lock;
+ struct ctdb_rec_buffer *recbuf;
+ struct ctdb_server_id sid;
+ const char *lock_name;
+ bool readonly;
+ bool updated;
+};
+
+struct ctdb_tunnel_context {
+ struct ctdb_client_context *client;
+ uint64_t tunnel_id;
+ ctdb_tunnel_callback_func_t callback;
+ void *private_data;
+};
+
+/* From client_call.c */
+
+void ctdb_client_reply_call(struct ctdb_client_context *client,
+ uint8_t *buf, size_t buflen, uint32_t reqid);
+
+/* From client_db.c */
+
+struct tdb_context *client_db_tdb(struct ctdb_db_context *db);
+
+/* From client_message.c */
+
+void ctdb_client_req_message(struct ctdb_client_context *client,
+ uint8_t *buf, size_t buflen, uint32_t reqid);
+
+/* From client_control.c */
+
+void ctdb_client_reply_control(struct ctdb_client_context *client,
+ uint8_t *buf, size_t buflen, uint32_t reqid);
+
+/* From client_tunnel.c */
+
+void ctdb_client_req_tunnel(struct ctdb_client_context *client,
+ uint8_t *buf, size_t buflen, uint32_t reqid);
+
+#endif /* __CTDB_CLIENT_PRIVATE_H__ */
diff --git a/ctdb/client/client_sync.h b/ctdb/client/client_sync.h
new file mode 100644
index 0000000..618879e
--- /dev/null
+++ b/ctdb/client/client_sync.h
@@ -0,0 +1,521 @@
+/*
+ CTDB client code - sync api
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_CLIENT_SYNC_H__
+#define __CTDB_CLIENT_SYNC_H__
+
+#include <talloc.h>
+#include <tevent.h>
+
+/* from client/client_control_sync.c */
+
+int ctdb_ctrl_process_exists(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ pid_t pid, int *status);
+
+int ctdb_ctrl_statistics(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_statistics **stats);
+
+int ctdb_ctrl_ping(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ int *num_clients);
+
+int ctdb_ctrl_getdbpath(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t db_id, const char **db_path);
+
+int ctdb_ctrl_getvnnmap(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_vnn_map **vnnmap);
+
+int ctdb_ctrl_getdebug(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ int *loglevel);
+
+int ctdb_ctrl_setdebug(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ int loglevel);
+
+int ctdb_ctrl_get_dbmap(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_dbid_map **dbmap);
+
+int ctdb_ctrl_get_recmode(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ int *recmode);
+
+int ctdb_ctrl_set_recmode(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ int recmode);
+
+int ctdb_ctrl_statistics_reset(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout);
+
+int ctdb_ctrl_db_attach(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ const char *db_name, uint32_t *db_id);
+
+int ctdb_ctrl_traverse_start(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_traverse_start *traverse);
+
+int ctdb_ctrl_register_srvid(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint64_t srvid);
+
+int ctdb_ctrl_deregister_srvid(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint64_t srvid);
+
+int ctdb_ctrl_get_dbname(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t db_id, const char **db_name);
+
+int ctdb_ctrl_enable_seqnum(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t db_id);
+
+int ctdb_ctrl_update_seqnum(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t db_id);
+
+int ctdb_ctrl_dump_memory(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ const char **mem_str);
+
+int ctdb_ctrl_get_pid(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ pid_t *pid);
+
+int ctdb_ctrl_freeze(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ int priority);
+
+int ctdb_ctrl_get_pnn(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t *pnn);
+
+int ctdb_ctrl_shutdown(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout);
+
+int ctdb_ctrl_tcp_add(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_connection *conn);
+
+int ctdb_ctrl_tcp_remove(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_connection *conn);
+
+int ctdb_ctrl_set_tunable(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_tunable *tunable);
+
+int ctdb_ctrl_get_tunable(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ const char *var, uint32_t *value);
+
+int ctdb_ctrl_list_tunables(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_var_list **var_list);
+
+int ctdb_ctrl_modify_flags(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t pnn, uint32_t old_flags,
+ uint32_t new_flags);
+
+int ctdb_ctrl_get_all_tunables(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_tunable_list **tun_list);
+
+int ctdb_ctrl_get_tcp_tickle_list(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ ctdb_sock_addr *addr,
+ struct ctdb_tickle_list **tickles);
+
+int ctdb_ctrl_set_tcp_tickle_list(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_tickle_list *tickles);
+
+int ctdb_ctrl_db_attach_persistent(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ const char *db_name, uint32_t *db_id);
+
+int ctdb_ctrl_send_gratuitous_arp(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_addr_info *addr_info);
+
+int ctdb_ctrl_wipe_database(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t db_id, uint32_t tid);
+
+int ctdb_ctrl_uptime(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_uptime **uptime);
+
+int ctdb_ctrl_start_recovery(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout);
+
+int ctdb_ctrl_end_recovery(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout);
+
+int ctdb_ctrl_reload_nodes_file(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout);
+
+int ctdb_ctrl_add_public_ip(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_addr_info *addr_info);
+
+int ctdb_ctrl_del_public_ip(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_addr_info *addr_info);
+
+int ctdb_ctrl_get_capabilities(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t *caps);
+
+int ctdb_ctrl_release_ip(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_public_ip *pubip);
+
+int ctdb_ctrl_takeover_ip(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_public_ip *pubip);
+
+int ctdb_ctrl_get_public_ips(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ bool available_only,
+ struct ctdb_public_ip_list **pubip_list);
+
+int ctdb_ctrl_get_nodemap(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_node_map **nodemap);
+
+int ctdb_ctrl_traverse_kill(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_traverse_start *traverse);
+
+int ctdb_ctrl_get_reclock_file(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ const char **reclock_file);
+
+int ctdb_ctrl_stop_node(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout);
+
+int ctdb_ctrl_continue_node(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout);
+
+int ctdb_ctrl_set_lmasterrole(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t lmaster_role);
+
+int ctdb_ctrl_set_recmasterrole(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t recmaster_role);
+
+int ctdb_ctrl_set_ban_state(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_ban_state *ban_state);
+
+int ctdb_ctrl_get_ban_state(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_ban_state **ban_state);
+
+int ctdb_ctrl_register_notify(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_notify_data *notify);
+
+int ctdb_ctrl_deregister_notify(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint64_t srvid);
+
+int ctdb_ctrl_trans3_commit(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_rec_buffer *recbuf);
+
+int ctdb_ctrl_get_db_seqnum(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t db_id, uint64_t *seqnum);
+
+int ctdb_ctrl_db_set_healthy(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t db_id);
+
+int ctdb_ctrl_db_get_health(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t db_id, const char **reason);
+
+int ctdb_ctrl_get_public_ip_info(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ ctdb_sock_addr *addr,
+ struct ctdb_public_ip_info **ipinfo);
+
+int ctdb_ctrl_get_ifaces(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_iface_list **iface_list);
+
+int ctdb_ctrl_set_iface_link_state(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_iface *iface);
+
+int ctdb_ctrl_tcp_add_delayed_update(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_connection *conn);
+
+int ctdb_ctrl_get_stat_history(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_statistics_list **stats_list);
+
+int ctdb_ctrl_schedule_for_deletion(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_key_data *key);
+
+int ctdb_ctrl_set_db_readonly(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t db_id);
+
+int ctdb_ctrl_traverse_start_ext(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_traverse_start_ext *traverse);
+
+int ctdb_ctrl_get_db_statistics(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t db_id,
+ struct ctdb_db_statistics **dbstats);
+
+int ctdb_ctrl_set_db_sticky(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t db_id);
+
+int ctdb_ctrl_reload_public_ips(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout);
+
+int ctdb_ctrl_ipreallocated(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout);
+
+int ctdb_ctrl_get_runstate(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ enum ctdb_runstate *runstate);
+
+int ctdb_ctrl_db_detach(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t db_id);
+
+int ctdb_ctrl_get_nodes_file(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_node_map **nodemap);
+
+int ctdb_ctrl_db_freeze(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout, uint32_t db_id);
+
+int ctdb_ctrl_db_thaw(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout, uint32_t db_id);
+
+int ctdb_ctrl_db_transaction_start(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_transdb *transdb);
+
+int ctdb_ctrl_db_transaction_commit(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_transdb *transdb);
+
+int ctdb_ctrl_db_transaction_cancel(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t db_id);
+
+int ctdb_ctrl_db_pull(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_pulldb_ext *pulldb, uint32_t *num_records);
+
+int ctdb_ctrl_db_push_start(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_pulldb_ext *pulldb);
+
+int ctdb_ctrl_db_push_confirm(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t db_id, uint32_t *num_records);
+
+int ctdb_ctrl_db_open_flags(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint32_t db_id, int *tdb_flags);
+
+int ctdb_ctrl_db_attach_replicated(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ const char *db_name, uint32_t *db_id);
+
+int ctdb_ctrl_check_pid_srvid(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ struct ctdb_pid_srvid *pid_srvid, int *status);
+
+int ctdb_ctrl_tunnel_register(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint64_t tunnel_id);
+
+int ctdb_ctrl_tunnel_deregister(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct timeval timeout,
+ uint64_t tunnel_id);
+
+int ctdb_ctrl_disable_node(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode,
+ struct timeval timeout);
+
+int ctdb_ctrl_enable_node(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode,
+ struct timeval timeout);
+
+/* from client/client_message_sync.c */
+
+int ctdb_message_recd_update_ip(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct ctdb_public_ip *pubip);
+
+int ctdb_message_mem_dump(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct ctdb_srvid_message *msg);
+
+int ctdb_message_reload_nodes(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode);
+
+int ctdb_message_takeover_run(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, struct ctdb_srvid_message *msg);
+
+int ctdb_message_rebalance_node(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode, uint32_t pnn);
+
+int ctdb_message_disable_takeover_runs(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode,
+ struct ctdb_disable_message *disable);
+
+int ctdb_message_disable_recoveries(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int destnode,
+ struct ctdb_disable_message *disable);
+
+#endif /* __CTDB_CLIENT_SYNC_H__ */
diff --git a/ctdb/client/client_tunnel.c b/ctdb/client/client_tunnel.c
new file mode 100644
index 0000000..13c35fb
--- /dev/null
+++ b/ctdb/client/client_tunnel.c
@@ -0,0 +1,693 @@
+/*
+ CTDB client code
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <tevent.h>
+#include <tdb.h>
+
+#include "lib/util/tevent_unix.h"
+
+#include "common/reqid.h"
+#include "common/srvid.h"
+#include "common/comm.h"
+
+#include "protocol/protocol.h"
+#include "protocol/protocol_api.h"
+
+#include "client/client_private.h"
+#include "client/client.h"
+
+
+struct ctdb_tunnel_data {
+ struct ctdb_req_header hdr;
+ struct ctdb_req_tunnel *tunnel;
+ uint32_t reqid;
+};
+
+/*
+ * Tunnel setup and destroy
+ */
+
+struct ctdb_tunnel_setup_state {
+ struct ctdb_client_context *client;
+ struct ctdb_tunnel_context *tctx;
+ uint64_t tunnel_id;
+};
+
+static void ctdb_tunnel_setup_register_done(struct tevent_req *subreq);
+static void ctdb_tunnel_handler(uint64_t tunnel_id, TDB_DATA data,
+ void *private_data);
+
+struct tevent_req *ctdb_tunnel_setup_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint64_t tunnel_id,
+ ctdb_tunnel_callback_func_t callback,
+ void *private_data)
+{
+ struct tevent_req *req, *subreq;
+ struct ctdb_tunnel_setup_state *state;
+ struct ctdb_tunnel_context *tctx;
+ struct ctdb_req_control request;
+ int ret;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct ctdb_tunnel_setup_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ tctx = talloc_zero(client, struct ctdb_tunnel_context);
+ if (tevent_req_nomem(tctx, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ tctx->client = client;
+ tctx->tunnel_id = tunnel_id;
+ tctx->callback = callback;
+ tctx->private_data = private_data;
+
+ state->client = client;
+ state->tunnel_id = tunnel_id;
+ state->tctx = tctx;
+
+ ret = srvid_exists(client->tunnels, tunnel_id, NULL);
+ if (ret == 0) {
+ tevent_req_error(req, EEXIST);
+ return tevent_req_post(req, ev);
+ }
+
+ ctdb_req_control_tunnel_register(&request, tunnel_id);
+ subreq = ctdb_client_control_send(state, ev, client,
+ ctdb_client_pnn(client),
+ tevent_timeval_zero(),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, ctdb_tunnel_setup_register_done, req);
+
+ return req;
+}
+
+static void ctdb_tunnel_setup_register_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_tunnel_setup_state *state = tevent_req_data(
+ req, struct ctdb_tunnel_setup_state);
+ struct ctdb_reply_control *reply;
+ bool status;
+ int ret;
+
+ status = ctdb_client_control_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_reply_control_tunnel_register(reply);
+ talloc_free(reply);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = srvid_register(state->client->tunnels, state->client,
+ state->tunnel_id,
+ ctdb_tunnel_handler, state->tctx);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+static void ctdb_tunnel_handler(uint64_t tunnel_id, TDB_DATA data,
+ void *private_data)
+{
+ struct ctdb_tunnel_context *tctx = talloc_get_type_abort(
+ private_data, struct ctdb_tunnel_context);
+ struct ctdb_tunnel_data *tunnel_data;
+
+ if (tctx->tunnel_id != tunnel_id) {
+ return;
+ }
+
+ if (data.dsize != sizeof(struct ctdb_tunnel_data)) {
+ return;
+ }
+
+ tunnel_data = (struct ctdb_tunnel_data *)data.dptr;
+
+ tctx->callback(tctx, tunnel_data->hdr.srcnode, tunnel_data->reqid,
+ tunnel_data->tunnel->data.dptr,
+ tunnel_data->tunnel->data.dsize, tctx->private_data);
+}
+
+bool ctdb_tunnel_setup_recv(struct tevent_req *req, int *perr,
+ struct ctdb_tunnel_context **result)
+{
+ struct ctdb_tunnel_setup_state *state = tevent_req_data(
+ req, struct ctdb_tunnel_setup_state);
+ int ret;
+
+ if (tevent_req_is_unix_error(req, &ret)) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+
+ *result = state->tctx;
+ return true;
+}
+
+int ctdb_tunnel_setup(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client, uint64_t tunnel_id,
+ ctdb_tunnel_callback_func_t callback, void *private_data,
+ struct ctdb_tunnel_context **result)
+{
+ struct tevent_req *req;
+ int ret;
+ bool status;
+
+ req = ctdb_tunnel_setup_send(mem_ctx, ev, client, tunnel_id,
+ callback, private_data);
+ if (req == NULL) {
+ return ENOMEM;
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = ctdb_tunnel_setup_recv(req, &ret, result);
+ talloc_free(req);
+ if (! status) {
+ return ret;
+ }
+
+ return 0;
+}
+
+struct ctdb_tunnel_destroy_state {
+ struct ctdb_tunnel_context *tctx;
+};
+
+static void ctdb_tunnel_destroy_deregister_done(struct tevent_req *subreq);
+
+struct tevent_req *ctdb_tunnel_destroy_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_tunnel_context *tctx)
+{
+ struct tevent_req *req, *subreq;
+ struct ctdb_tunnel_destroy_state *state;
+ struct ctdb_req_control request;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct ctdb_tunnel_destroy_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->tctx = tctx;
+
+ ctdb_req_control_tunnel_deregister(&request, tctx->tunnel_id);
+ subreq = ctdb_client_control_send(state, ev, tctx->client,
+ ctdb_client_pnn(tctx->client),
+ tevent_timeval_zero(),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, ctdb_tunnel_destroy_deregister_done,
+ req);
+
+ return req;
+}
+
+static void ctdb_tunnel_destroy_deregister_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_tunnel_destroy_state *state = tevent_req_data(
+ req, struct ctdb_tunnel_destroy_state);
+ struct ctdb_client_context *client = state->tctx->client;
+ struct ctdb_reply_control *reply;
+ bool status;
+ int ret;
+
+ status = ctdb_client_control_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_reply_control_tunnel_deregister(reply);
+ talloc_free(reply);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = srvid_deregister(client->tunnels, state->tctx->tunnel_id,
+ state->tctx);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+bool ctdb_tunnel_destroy_recv(struct tevent_req *req, int *perr)
+{
+ int ret;
+
+ if (tevent_req_is_unix_error(req, &ret)) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+ return true;
+}
+
+
+int ctdb_tunnel_destroy(struct tevent_context *ev,
+ struct ctdb_tunnel_context *tctx)
+{
+ struct tevent_req *req;
+ int ret;
+ bool status;
+
+ req = ctdb_tunnel_destroy_send(ev, ev, tctx);
+ if (req == NULL) {
+ return ENOMEM;
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = ctdb_tunnel_destroy_recv(req, &ret);
+ talloc_free(req);
+ if (! status) {
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * Callback when REQ_TUNNEL packet is received
+ */
+
+static void ctdb_tunnel_request_reply(struct tevent_req *req,
+ struct ctdb_tunnel_data *tunnel_data);
+
+void ctdb_client_req_tunnel(struct ctdb_client_context *client,
+ uint8_t *buf, size_t buflen, uint32_t reqid)
+{
+ TALLOC_CTX *tmp_ctx = talloc_new(client);
+ struct ctdb_req_header h;
+ struct ctdb_req_tunnel *tunnel;
+ struct tevent_req *req;
+ struct ctdb_tunnel_data tunnel_data;
+ int ret;
+
+ tunnel = talloc_zero(tmp_ctx, struct ctdb_req_tunnel);
+ if (tunnel == NULL) {
+ goto fail;
+ }
+
+ ret = ctdb_req_tunnel_pull(buf, buflen, &h, tmp_ctx, tunnel);
+ if (ret != 0) {
+ goto fail;
+ }
+
+ tunnel_data = (struct ctdb_tunnel_data) {
+ .hdr = h,
+ .tunnel = tunnel,
+ .reqid = reqid,
+ };
+
+ if (tunnel->flags & CTDB_TUNNEL_FLAG_REPLY) {
+ req = reqid_find(client->idr, reqid, struct tevent_req);
+ if (req == NULL) {
+ goto fail;
+ }
+
+ ctdb_tunnel_request_reply(req, &tunnel_data);
+
+ } else if (tunnel->flags & CTDB_TUNNEL_FLAG_REQUEST) {
+
+ TDB_DATA data = {
+ .dsize = sizeof(struct ctdb_tunnel_data),
+ .dptr = (uint8_t *)&tunnel_data,
+ };
+
+ srvid_dispatch(client->tunnels, tunnel->tunnel_id, 0, data);
+ }
+
+fail:
+ TALLOC_FREE(tmp_ctx);
+}
+
+
+/*
+ * Send messages using tunnel
+ */
+
+struct ctdb_tunnel_request_state {
+ struct ctdb_tunnel_context *tctx;
+ bool wait_for_reply;
+ uint32_t reqid;
+ struct ctdb_req_tunnel *tunnel;
+};
+
+static int ctdb_tunnel_request_state_destructor(
+ struct ctdb_tunnel_request_state *state);
+static void ctdb_tunnel_request_done(struct tevent_req *subreq);
+
+struct tevent_req *ctdb_tunnel_request_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_tunnel_context *tctx,
+ uint32_t destnode,
+ struct timeval timeout,
+ uint8_t *buf, size_t buflen,
+ bool wait_for_reply)
+{
+ struct tevent_req *req, *subreq;
+ struct ctdb_tunnel_request_state *state;
+ struct ctdb_req_tunnel tunnel;
+ struct ctdb_req_header h;
+ uint8_t *pkt;
+ size_t datalen, pkt_len;
+ int ret;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct ctdb_tunnel_request_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->tctx = tctx;
+ state->wait_for_reply = wait_for_reply;
+ state->reqid = reqid_new(tctx->client->idr, req);
+ if (state->reqid == REQID_INVALID) {
+ talloc_free(req);
+ return NULL;
+ }
+
+ talloc_set_destructor(state, ctdb_tunnel_request_state_destructor);
+
+ tunnel = (struct ctdb_req_tunnel) {
+ .tunnel_id = state->tctx->tunnel_id,
+ .flags = CTDB_TUNNEL_FLAG_REQUEST,
+ .data = (TDB_DATA) {
+ .dptr = buf,
+ .dsize = buflen,
+ },
+ };
+
+ if (destnode == CTDB_BROADCAST_ALL ||
+ destnode == CTDB_BROADCAST_ACTIVE ||
+ destnode == CTDB_BROADCAST_CONNECTED) {
+ state->wait_for_reply = false;
+ }
+ if (! state->wait_for_reply) {
+ tunnel.flags |= CTDB_TUNNEL_FLAG_NOREPLY;
+ }
+
+ ctdb_req_header_fill(&h, 0, CTDB_REQ_TUNNEL, destnode,
+ ctdb_client_pnn(state->tctx->client),
+ state->reqid);
+
+ datalen = ctdb_req_tunnel_len(&h, &tunnel);
+ ret = ctdb_allocate_pkt(state, datalen, &pkt, &pkt_len);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ ret = ctdb_req_tunnel_push(&h, &tunnel, pkt, &pkt_len);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ if (!tevent_timeval_is_zero(&timeout)) {
+ if (!tevent_req_set_endtime(req, ev, timeout)) {
+ return tevent_req_post(req, ev);
+ }
+ }
+
+ subreq = comm_write_send(state, ev, tctx->client->comm,
+ pkt, pkt_len);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, ctdb_tunnel_request_done, req);
+
+ return req;
+}
+
+static int ctdb_tunnel_request_state_destructor(
+ struct ctdb_tunnel_request_state *state)
+{
+ reqid_remove(state->tctx->client->idr, state->reqid);
+ return 0;
+}
+
+static void ctdb_tunnel_request_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_tunnel_request_state *state = tevent_req_data(
+ req, struct ctdb_tunnel_request_state);
+ int ret;
+ bool status;
+
+ status = comm_write_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ if (! state->wait_for_reply) {
+ tevent_req_done(req);
+ }
+
+ /* Wait for the reply or timeout */
+}
+
+static void ctdb_tunnel_request_reply(struct tevent_req *req,
+ struct ctdb_tunnel_data *tunnel_data)
+{
+ struct ctdb_tunnel_request_state *state = tevent_req_data(
+ req, struct ctdb_tunnel_request_state);
+
+ if (tunnel_data->reqid != state->reqid) {
+ return;
+ }
+
+ state->tunnel = talloc_steal(state, tunnel_data->tunnel);
+ tevent_req_done(req);
+}
+
+bool ctdb_tunnel_request_recv(struct tevent_req *req, int *perr,
+ TALLOC_CTX *mem_ctx, uint8_t **buf,
+ size_t *buflen)
+{
+ struct ctdb_tunnel_request_state *state = tevent_req_data(
+ req, struct ctdb_tunnel_request_state);
+ int ret;
+
+ if (tevent_req_is_unix_error(req, &ret)) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+
+ if (state->wait_for_reply) {
+ if (buf != NULL) {
+ *buf = talloc_steal(mem_ctx, state->tunnel->data.dptr);
+ }
+ if (buflen != NULL) {
+ *buflen = state->tunnel->data.dsize;
+ }
+ }
+
+ return true;
+}
+
+int ctdb_tunnel_request(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_tunnel_context *tctx, uint32_t destnode,
+ struct timeval timeout, uint8_t *buf, size_t buflen,
+ bool wait_for_reply)
+{
+ struct tevent_req *req;
+ int ret;
+ bool status;
+
+ req = ctdb_tunnel_request_send(mem_ctx, ev, tctx, destnode,
+ timeout, buf, buflen, wait_for_reply);
+ if (req == NULL) {
+ return ENOMEM;
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = ctdb_tunnel_request_recv(req, &ret, NULL, NULL, NULL);
+ talloc_free(req);
+ if (! status) {
+ return ret;
+ }
+
+ return 0;
+}
+
+struct ctdb_tunnel_reply_state {
+};
+
+static void ctdb_tunnel_reply_done(struct tevent_req *subreq);
+
+struct tevent_req *ctdb_tunnel_reply_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_tunnel_context *tctx,
+ uint32_t destnode, uint32_t reqid,
+ struct timeval timeout,
+ uint8_t *buf, size_t buflen)
+{
+ struct tevent_req *req, *subreq;
+ struct ctdb_tunnel_reply_state *state;
+ struct ctdb_req_tunnel tunnel;
+ struct ctdb_req_header h;
+ uint8_t *pkt;
+ size_t datalen, pkt_len;
+ int ret;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct ctdb_tunnel_reply_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ tunnel = (struct ctdb_req_tunnel) {
+ .tunnel_id = tctx->tunnel_id,
+ .flags = CTDB_TUNNEL_FLAG_REPLY,
+ .data = (TDB_DATA) {
+ .dptr = buf,
+ .dsize = buflen,
+ },
+ };
+
+ ctdb_req_header_fill(&h, 0, CTDB_REQ_TUNNEL, destnode,
+ ctdb_client_pnn(tctx->client), reqid);
+
+ datalen = ctdb_req_tunnel_len(&h, &tunnel);
+ ret = ctdb_allocate_pkt(state, datalen, &pkt, &pkt_len);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ ret = ctdb_req_tunnel_push(&h, &tunnel, pkt, &pkt_len);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ if (!tevent_timeval_is_zero(&timeout)) {
+ if (!tevent_req_set_endtime(req, ev, timeout)) {
+ return tevent_req_post(req, ev);
+ }
+ }
+
+ subreq = comm_write_send(state, ev, tctx->client->comm, pkt, pkt_len);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, ctdb_tunnel_reply_done, req);
+
+ return req;
+}
+
+static void ctdb_tunnel_reply_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ int ret;
+ bool status;
+
+ status = comm_write_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+bool ctdb_tunnel_reply_recv(struct tevent_req *req, int *perr)
+{
+ int ret;
+
+ if (tevent_req_is_unix_error(req, &ret)) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+int ctdb_tunnel_reply(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_tunnel_context *tctx, uint32_t destnode,
+ uint32_t reqid, struct timeval timeout,
+ uint8_t *buf, size_t buflen)
+{
+ struct tevent_req *req;
+ int ret;
+ bool status;
+
+ req = ctdb_tunnel_reply_send(mem_ctx, ev, tctx, destnode, reqid,
+ timeout, buf, buflen);
+ if (req == NULL) {
+ return ENOMEM;
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = ctdb_tunnel_reply_recv(req, &ret);
+ talloc_free(req);
+ if (! status) {
+ return ret;
+ }
+
+ return 0;
+}
diff --git a/ctdb/client/client_util.c b/ctdb/client/client_util.c
new file mode 100644
index 0000000..35323ff
--- /dev/null
+++ b/ctdb/client/client_util.c
@@ -0,0 +1,137 @@
+/*
+ CTDB client code
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+
+#include <talloc.h>
+#include <tevent.h>
+#include <tdb.h>
+
+#include "common/logging.h"
+
+#include "lib/util/debug.h"
+
+#include "protocol/protocol.h"
+#include "protocol/protocol_api.h"
+#include "client/client_private.h"
+#include "client/client.h"
+#include "client/client_sync.h"
+
+int list_of_nodes(struct ctdb_node_map *nodemap,
+ uint32_t flags_mask, uint32_t exclude_pnn,
+ TALLOC_CTX *mem_ctx, uint32_t **pnn_list)
+{
+ int num_nodes = 0;
+ uint32_t *list;
+ unsigned int i;
+
+ /* Allocate the list of same number of nodes */
+ list = talloc_array(mem_ctx, uint32_t, nodemap->num);
+ if (list == NULL) {
+ return -1;
+ }
+
+ for (i=0; i<nodemap->num; i++) {
+ if (nodemap->node[i].flags & flags_mask) {
+ continue;
+ }
+ if (nodemap->node[i].pnn == exclude_pnn) {
+ continue;
+ }
+ list[num_nodes] = nodemap->node[i].pnn;
+ num_nodes++;
+ }
+
+ *pnn_list = list;
+ return num_nodes;
+}
+
+int list_of_active_nodes(struct ctdb_node_map *nodemap, uint32_t exclude_pnn,
+ TALLOC_CTX *mem_ctx, uint32_t **pnn_list)
+{
+ return list_of_nodes(nodemap, NODE_FLAGS_INACTIVE, exclude_pnn,
+ mem_ctx, pnn_list);
+}
+
+int list_of_connected_nodes(struct ctdb_node_map *nodemap,
+ uint32_t exclude_pnn,
+ TALLOC_CTX *mem_ctx, uint32_t **pnn_list)
+{
+ return list_of_nodes(nodemap, NODE_FLAGS_DISCONNECTED, exclude_pnn,
+ mem_ctx, pnn_list);
+}
+
+struct ctdb_server_id ctdb_client_get_server_id(
+ struct ctdb_client_context *client,
+ uint32_t task_id)
+{
+ struct ctdb_server_id sid;
+
+ sid.pid = getpid();
+ sid.task_id = task_id;
+ sid.vnn = ctdb_client_pnn(client);
+ sid.unique_id = task_id;
+ sid.unique_id = (sid.unique_id << 32) | sid.pid;
+
+ return sid;
+}
+
+bool ctdb_server_id_equal(struct ctdb_server_id *sid1,
+ struct ctdb_server_id *sid2)
+{
+ if (sid1->pid != sid2->pid) {
+ return false;
+ }
+ if (sid1->task_id != sid2->task_id) {
+ return false;
+ }
+ if (sid1->vnn != sid2->vnn) {
+ return false;
+ }
+ if (sid1->unique_id != sid2->unique_id) {
+ return false;
+ }
+
+ return true;
+}
+
+int ctdb_server_id_exists(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct ctdb_server_id *sid, bool *exists)
+{
+ int result;
+ int ret;
+
+ ret = ctdb_ctrl_process_exists(mem_ctx, ev, client, sid->vnn,
+ tevent_timeval_zero(),
+ sid->pid, &result);
+ if (ret != 0) {
+ return ret;
+ }
+
+ if (result == 1) {
+ *exists = true;
+ } else {
+ *exists = false;
+ }
+
+ return 0;
+}
diff --git a/ctdb/cluster/cluster_conf.c b/ctdb/cluster/cluster_conf.c
new file mode 100644
index 0000000..bdd64ba
--- /dev/null
+++ b/ctdb/cluster/cluster_conf.c
@@ -0,0 +1,182 @@
+/*
+ CTDB cluster config handling
+
+ Copyright (C) Martin Schwenke 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include "lib/util/debug.h"
+
+#include "common/conf.h"
+
+#include "cluster_conf.h"
+
+#define CLUSTER_TRANSPORT_DEFAULT "tcp"
+
+/*
+ * Ideally this wants to be a void function but it also used directly
+ * as a validation function
+ */
+static bool check_static_string_change(const char *key,
+ const char *old_value,
+ const char *new_value,
+ enum conf_update_mode mode)
+{
+ if (mode == CONF_MODE_RELOAD) {
+ if (old_value == new_value) {
+ goto done;
+ }
+
+ /*
+ * At this point old_value or new_value can not both
+ * NULL, so if one is NULL then they are different
+ */
+ if (old_value == NULL ||
+ new_value == NULL ||
+ strcmp(old_value, new_value) != 0) {
+ D_WARNING("Ignoring update of [%s] -> %s\n",
+ CLUSTER_CONF_SECTION,
+ key);
+ }
+ }
+
+done:
+ return true;
+}
+
+static bool validate_transport(const char *key,
+ const char *old_transport,
+ const char *new_transport,
+ enum conf_update_mode mode)
+{
+ /* Don't allow "ib" for now. It is broken! */
+ if (strcmp(new_transport, CLUSTER_TRANSPORT_DEFAULT) != 0) {
+ D_ERR("Invalid value for [cluster] -> transport = %s\n",
+ new_transport);
+ return false;
+ }
+
+ /* This sometimes warns but always returns true */
+ return check_static_string_change(key,
+ old_transport,
+ new_transport,
+ mode);
+}
+
+static bool validate_node_address(const char *key,
+ const char *old_node_address,
+ const char *new_node_address,
+ enum conf_update_mode mode)
+{
+ struct in_addr addr4;
+ struct in6_addr addr6;
+ int ret;
+
+ if (new_node_address == NULL) {
+ goto good;
+ }
+
+ ret = inet_pton(AF_INET, new_node_address, &addr4);
+ if (ret == 1) {
+ goto good;
+ }
+
+ ret = inet_pton(AF_INET6, new_node_address, &addr6);
+ if (ret == 1) {
+ goto good;
+ }
+
+ D_ERR("Invalid value for [cluster] -> node address = %s\n",
+ new_node_address);
+ return false;
+
+good:
+ /* This sometimes warns but always returns true */
+ return check_static_string_change(key,
+ old_node_address,
+ new_node_address,
+ mode);
+}
+
+static bool validate_recovery_lock(const char *key,
+ const char *old_reclock,
+ const char *new_reclock,
+ enum conf_update_mode mode)
+{
+ bool status;
+
+ if (new_reclock != NULL) {
+ D_WARNING("Configuration option [%s] -> %s is deprecated\n",
+ CLUSTER_CONF_SECTION,
+ key);
+ }
+
+ status = check_static_string_change(key, old_reclock, new_reclock, mode);
+
+ return status;
+}
+
+static bool validate_leader_timeout(const char *key,
+ int old_timeout,
+ int new_timeout,
+ enum conf_update_mode mode)
+{
+ if (new_timeout <= 0) {
+ D_ERR("Invalid value for [cluster] -> leader timeout = %d\n",
+ new_timeout);
+ return false;
+ }
+
+ return true;
+}
+
+void cluster_conf_init(struct conf_context *conf)
+{
+ conf_define_section(conf, CLUSTER_CONF_SECTION, NULL);
+
+ conf_define_string(conf,
+ CLUSTER_CONF_SECTION,
+ CLUSTER_CONF_TRANSPORT,
+ CLUSTER_TRANSPORT_DEFAULT,
+ validate_transport);
+ conf_define_string(conf,
+ CLUSTER_CONF_SECTION,
+ CLUSTER_CONF_NODE_ADDRESS,
+ NULL,
+ validate_node_address);
+ conf_define_string(conf,
+ CLUSTER_CONF_SECTION,
+ CLUSTER_CONF_CLUSTER_LOCK,
+ NULL,
+ check_static_string_change);
+ conf_define_string(conf,
+ CLUSTER_CONF_SECTION,
+ CLUSTER_CONF_RECOVERY_LOCK,
+ NULL,
+ validate_recovery_lock);
+ conf_define_integer(conf,
+ CLUSTER_CONF_SECTION,
+ CLUSTER_CONF_LEADER_TIMEOUT,
+ 5,
+ validate_leader_timeout);
+ conf_define_boolean(conf,
+ CLUSTER_CONF_SECTION,
+ CLUSTER_CONF_LEADER_CAPABILITY,
+ true,
+ NULL);
+}
diff --git a/ctdb/cluster/cluster_conf.h b/ctdb/cluster/cluster_conf.h
new file mode 100644
index 0000000..38c378f
--- /dev/null
+++ b/ctdb/cluster/cluster_conf.h
@@ -0,0 +1,36 @@
+/*
+ CTDB cluster config handling
+
+ Copyright (C) Martin Schwenke 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_CLUSTER_CONF_H__
+#define __CTDB_CLUSTER_CONF_H__
+
+#include "common/conf.h"
+
+#define CLUSTER_CONF_SECTION "cluster"
+
+#define CLUSTER_CONF_TRANSPORT "transport"
+#define CLUSTER_CONF_NODE_ADDRESS "node address"
+#define CLUSTER_CONF_CLUSTER_LOCK "cluster lock"
+#define CLUSTER_CONF_RECOVERY_LOCK "recovery lock"
+#define CLUSTER_CONF_LEADER_TIMEOUT "leader timeout"
+#define CLUSTER_CONF_LEADER_CAPABILITY "leader capability"
+
+void cluster_conf_init(struct conf_context *conf);
+
+#endif /* __CTDB_CLUSTER_CONF_H__ */
diff --git a/ctdb/common/cmdline.c b/ctdb/common/cmdline.c
new file mode 100644
index 0000000..ce368a9
--- /dev/null
+++ b/ctdb/common/cmdline.c
@@ -0,0 +1,598 @@
+/*
+ Command line processing
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <popt.h>
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/debug.h"
+
+#include "common/cmdline.h"
+
+#define CMDLINE_MAX_LEN 80
+
+struct cmdline_section {
+ const char *name;
+ struct cmdline_command *commands;
+};
+
+struct cmdline_context {
+ const char *prog;
+ struct poptOption *options;
+ struct cmdline_section *section;
+ int num_sections;
+ size_t max_len;
+ poptContext pc;
+ int argc, arg0;
+ const char **argv;
+ struct cmdline_command *match_cmd;
+};
+
+static bool cmdline_show_help = false;
+
+static void cmdline_popt_help(poptContext pc,
+ enum poptCallbackReason reason,
+ struct poptOption *key,
+ const char *arg,
+ void *data)
+{
+ if (key->shortName == 'h') {
+ cmdline_show_help = true;
+ }
+}
+
+struct poptOption cmdline_help_options[] = {
+ { NULL, '\0', POPT_ARG_CALLBACK, cmdline_popt_help, 0, NULL, NULL },
+ { "help", 'h', 0, NULL, 'h', "Show this help message", NULL },
+ POPT_TABLEEND
+};
+
+#define CMDLINE_HELP_OPTIONS \
+ { NULL, '\0', POPT_ARG_INCLUDE_TABLE, cmdline_help_options, \
+ 0, "Help Options:", NULL }
+
+static bool cmdline_option_check(struct poptOption *option)
+{
+ if (option->longName == NULL) {
+ D_ERR("Option has no long name\n");
+ return false;
+ }
+
+ if (option->argInfo != POPT_ARG_STRING &&
+ option->argInfo != POPT_ARG_INT &&
+ option->argInfo != POPT_ARG_LONG &&
+ option->argInfo != POPT_ARG_VAL &&
+ option->argInfo != POPT_ARG_FLOAT &&
+ option->argInfo != POPT_ARG_DOUBLE) {
+ D_ERR("Option '%s' has unsupported type\n", option->longName);
+ return false;
+ }
+
+ if (option->arg == NULL) {
+ D_ERR("Option '%s' has invalid arg\n", option->longName);
+ return false;
+ }
+
+ if (option->descrip == NULL) {
+ D_ERR("Option '%s' has no help msg\n", option->longName);
+ return false;
+ }
+
+ return true;
+}
+
+static bool cmdline_options_check(struct poptOption *options)
+{
+ int i;
+ bool ok;
+
+ if (options == NULL) {
+ return true;
+ }
+
+ i = 0;
+ while (options[i].longName != NULL || options[i].shortName != '\0') {
+ ok = cmdline_option_check(&options[i]);
+ if (!ok) {
+ return false;
+ }
+ i++;
+ }
+
+ return true;
+}
+
+static int cmdline_options_define(TALLOC_CTX *mem_ctx,
+ struct poptOption *user_options,
+ struct poptOption **result)
+{
+ struct poptOption *options;
+ int count, i;
+
+ count = (user_options == NULL ? 2 : 3);
+
+ options = talloc_array(mem_ctx, struct poptOption, count);
+ if (options == NULL) {
+ return ENOMEM;
+ }
+
+ i = 0;
+ options[i++] = (struct poptOption) CMDLINE_HELP_OPTIONS;
+ if (user_options != NULL) {
+ options[i++] = (struct poptOption) {
+ .argInfo = POPT_ARG_INCLUDE_TABLE,
+ .arg = user_options,
+ .descrip = "Options:",
+ };
+ }
+ options[i++] = (struct poptOption) POPT_TABLEEND;
+
+ *result = options;
+ return 0;
+}
+
+static bool cmdline_command_check(struct cmdline_command *cmd, size_t *max_len)
+{
+ size_t len;
+
+ if (cmd->name == NULL) {
+ return false;
+ }
+
+ if (cmd->fn == NULL) {
+ D_ERR("Command '%s' has no implementation function\n",
+ cmd->name);
+ return false;
+ }
+
+ if (cmd->msg_help == NULL) {
+ D_ERR("Command '%s' has no help msg\n", cmd->name);
+ return false;
+ }
+
+ len = strlen(cmd->name);
+ if (cmd->msg_args != NULL) {
+ len += strlen(cmd->msg_args);
+ }
+ if (len > CMDLINE_MAX_LEN) {
+ D_ERR("Command '%s' is too long (%zu)\n", cmd->name, len);
+ return false;
+ }
+
+ if (len > *max_len) {
+ *max_len = len;
+ }
+
+ len = strlen(cmd->msg_help);
+ if (len > CMDLINE_MAX_LEN) {
+ D_ERR("Command '%s' help too long (%zu)\n", cmd->name, len);
+ return false;
+ }
+
+ return true;
+}
+
+static bool cmdline_commands_check(struct cmdline_command *commands,
+ size_t *max_len)
+{
+ int i;
+ bool ok;
+
+ if (commands == NULL) {
+ return false;
+ }
+
+ for (i=0; commands[i].name != NULL; i++) {
+ ok = cmdline_command_check(&commands[i], max_len);
+ if (!ok) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static int cmdline_context_destructor(struct cmdline_context *cmdline);
+
+static int cmdline_section_add(struct cmdline_context *cmdline,
+ const char *name,
+ struct cmdline_command *commands)
+{
+ struct cmdline_section *section;
+ size_t max_len = 0;
+ bool ok;
+
+ ok = cmdline_commands_check(commands, &max_len);
+ if (!ok) {
+ return EINVAL;
+ }
+
+ section = talloc_realloc(cmdline,
+ cmdline->section,
+ struct cmdline_section,
+ cmdline->num_sections + 1);
+ if (section == NULL) {
+ return ENOMEM;
+ }
+
+ section[cmdline->num_sections] = (struct cmdline_section) {
+ .name = name,
+ .commands = commands,
+ };
+
+ if (max_len > cmdline->max_len) {
+ cmdline->max_len = max_len;
+ }
+
+ cmdline->section = section;
+ cmdline->num_sections += 1;
+
+ return 0;
+}
+
+int cmdline_init(TALLOC_CTX *mem_ctx,
+ const char *prog,
+ struct poptOption *options,
+ const char *name,
+ struct cmdline_command *commands,
+ struct cmdline_context **result)
+{
+ struct cmdline_context *cmdline;
+ int ret;
+ bool ok;
+
+ if (prog == NULL) {
+ return EINVAL;
+ }
+
+ ok = cmdline_options_check(options);
+ if (!ok) {
+ return EINVAL;
+ }
+
+ cmdline = talloc_zero(mem_ctx, struct cmdline_context);
+ if (cmdline == NULL) {
+ return ENOMEM;
+ }
+
+ cmdline->prog = talloc_strdup(cmdline, prog);
+ if (cmdline->prog == NULL) {
+ talloc_free(cmdline);
+ return ENOMEM;
+ }
+
+ ret = cmdline_options_define(cmdline, options, &cmdline->options);
+ if (ret != 0) {
+ talloc_free(cmdline);
+ return ret;
+ }
+
+ ret = cmdline_section_add(cmdline, name, commands);
+ if (ret != 0) {
+ talloc_free(cmdline);
+ return ret;
+ }
+
+ cmdline->argc = 1;
+ cmdline->argv = talloc_array(cmdline, const char *, 2);
+ if (cmdline->argv == NULL) {
+ talloc_free(cmdline);
+ return ENOMEM;
+ }
+ cmdline->argv[0] = cmdline->prog;
+ cmdline->argv[1] = NULL;
+
+ /* Dummy popt context for generating help */
+ cmdline->pc = poptGetContext(cmdline->prog,
+ cmdline->argc,
+ cmdline->argv,
+ cmdline->options,
+ 0);
+ if (cmdline->pc == NULL) {
+ talloc_free(cmdline);
+ return ENOMEM;
+ }
+
+ talloc_set_destructor(cmdline, cmdline_context_destructor);
+
+ *result = cmdline;
+ return 0;
+}
+
+static int cmdline_context_destructor(struct cmdline_context *cmdline)
+{
+ if (cmdline->pc != NULL) {
+ poptFreeContext(cmdline->pc);
+ }
+
+ return 0;
+}
+
+int cmdline_add(struct cmdline_context *cmdline,
+ const char *name,
+ struct cmdline_command *commands)
+{
+ return cmdline_section_add(cmdline, name, commands);
+}
+
+static int cmdline_parse_options(struct cmdline_context *cmdline,
+ int argc,
+ const char **argv)
+{
+ int opt;
+
+ if (cmdline->pc != NULL) {
+ poptFreeContext(cmdline->pc);
+ }
+
+ cmdline->pc = poptGetContext(cmdline->prog,
+ argc,
+ argv,
+ cmdline->options,
+ 0);
+ if (cmdline->pc == NULL) {
+ return ENOMEM;
+ }
+
+ while ((opt = poptGetNextOpt(cmdline->pc)) != -1) {
+ D_ERR("Invalid option %s: %s\n",
+ poptBadOption(cmdline->pc, 0),
+ poptStrerror(opt));
+ return EINVAL;
+ }
+
+ /* Set up remaining arguments for commands */
+ cmdline->argc = 0;
+ cmdline->argv = poptGetArgs(cmdline->pc);
+ if (cmdline->argv != NULL) {
+ while (cmdline->argv[cmdline->argc] != NULL) {
+ cmdline->argc++;
+ }
+ }
+
+ return 0;
+}
+
+static int cmdline_match_section(struct cmdline_context *cmdline,
+ struct cmdline_section *section)
+{
+ int i;
+
+ for (i=0; section->commands[i].name != NULL; i++) {
+ struct cmdline_command *cmd;
+ char name[CMDLINE_MAX_LEN+1];
+ size_t len;
+ char *t, *str;
+ int n = 0;
+ bool match = false;
+
+ cmd = &section->commands[i];
+ len = strlcpy(name, cmd->name, sizeof(name));
+ if (len >= sizeof(name)) {
+ D_ERR("Skipping long command '%s'\n", cmd->name);
+ continue;
+ }
+
+ str = name;
+ while ((t = strtok(str, " ")) != NULL) {
+ if (n >= cmdline->argc) {
+ match = false;
+ break;
+ }
+ if (cmdline->argv[n] == NULL) {
+ match = false;
+ break;
+ }
+ if (strcmp(cmdline->argv[n], t) == 0) {
+ match = true;
+ cmdline->arg0 = n+1;
+ } else {
+ match = false;
+ break;
+ }
+
+ n += 1;
+ str = NULL;
+ }
+
+ if (match) {
+ cmdline->match_cmd = cmd;
+ return 0;
+ }
+ }
+
+ cmdline->match_cmd = NULL;
+ return ENOENT;
+}
+
+static int cmdline_match(struct cmdline_context *cmdline)
+{
+ int i, ret = ENOENT;
+
+ if (cmdline->argc == 0 || cmdline->argv == NULL) {
+ cmdline->match_cmd = NULL;
+ return EINVAL;
+ }
+
+ for (i=0; i<cmdline->num_sections; i++) {
+ ret = cmdline_match_section(cmdline, &cmdline->section[i]);
+ if (ret == 0) {
+ break;
+ }
+ }
+
+ return ret;
+}
+
+int cmdline_parse(struct cmdline_context *cmdline,
+ int argc,
+ const char **argv,
+ bool parse_options)
+{
+ int ret;
+
+ if (argc < 2) {
+ cmdline_usage(cmdline, NULL);
+ return EINVAL;
+ }
+
+ cmdline_show_help = false;
+
+ if (parse_options) {
+ ret = cmdline_parse_options(cmdline, argc, argv);
+ if (ret != 0) {
+ cmdline_usage(cmdline, NULL);
+ return ret;
+ }
+ } else {
+ cmdline->argc = argc;
+ cmdline->argv = argv;
+ }
+
+ ret = cmdline_match(cmdline);
+
+ if (ret != 0 || cmdline_show_help) {
+ const char *name = NULL;
+
+ if (cmdline->match_cmd != NULL) {
+ name = cmdline->match_cmd->name;
+ }
+
+ cmdline_usage(cmdline, name);
+
+ if (cmdline_show_help) {
+ ret = EAGAIN;
+ }
+ }
+
+ return ret;
+}
+
+static void cmdline_usage_command(struct cmdline_context *cmdline,
+ struct cmdline_command *cmd,
+ bool print_all)
+{
+ size_t len;
+
+ len = strlen(cmd->name);
+
+ printf(" %s ", cmd->name);
+ if (print_all) {
+ printf("%-*s",
+ (int)(cmdline->max_len-len),
+ cmd->msg_args == NULL ? "" : cmd->msg_args);
+ } else {
+ printf("%s", cmd->msg_args == NULL ? "" : cmd->msg_args);
+ }
+ printf(" %s\n", cmd->msg_help);
+}
+
+static void cmdline_usage_section(struct cmdline_context *cmdline,
+ struct cmdline_section *section)
+{
+ int i;
+
+ printf("\n");
+
+ if (section->name != NULL) {
+ printf("%s ", section->name);
+ }
+ printf("Commands:\n");
+ for (i=0; section->commands[i].name != NULL; i++) {
+ cmdline_usage_command(cmdline, &section->commands[i], true);
+
+ }
+}
+
+static void cmdline_usage_full(struct cmdline_context *cmdline)
+{
+ int i;
+
+ poptSetOtherOptionHelp(cmdline->pc, "[<options>] <command> [<args>]");
+ poptPrintHelp(cmdline->pc, stdout, 0);
+
+ for (i=0; i<cmdline->num_sections; i++) {
+ cmdline_usage_section(cmdline, &cmdline->section[i]);
+ }
+}
+
+void cmdline_usage(struct cmdline_context *cmdline, const char *cmd_name)
+{
+ struct cmdline_command *cmd = NULL;
+ int i, j;
+
+ if (cmd_name == NULL) {
+ cmdline_usage_full(cmdline);
+ return;
+ }
+
+ for (j=0; j<cmdline->num_sections; j++) {
+ struct cmdline_section *section = &cmdline->section[j];
+
+ for (i=0; section->commands[i].name != NULL; i++) {
+ if (strcmp(section->commands[i].name, cmd_name) == 0) {
+ cmd = &section->commands[i];
+ break;
+ }
+ }
+ }
+
+ if (cmd == NULL) {
+ cmdline_usage_full(cmdline);
+ return;
+ }
+
+ poptSetOtherOptionHelp(cmdline->pc, "<command> [<args>]");
+ poptPrintUsage(cmdline->pc, stdout, 0);
+
+ printf("\n");
+ cmdline_usage_command(cmdline, cmd, false);
+}
+
+int cmdline_run(struct cmdline_context *cmdline,
+ void *private_data,
+ int *result)
+{
+ struct cmdline_command *cmd = cmdline->match_cmd;
+ TALLOC_CTX *tmp_ctx;
+ int ret;
+
+ if (cmd == NULL) {
+ return ENOENT;
+ }
+
+ tmp_ctx = talloc_new(cmdline);
+ if (tmp_ctx == NULL) {
+ return ENOMEM;
+ }
+
+ ret = cmd->fn(tmp_ctx,
+ cmdline->argc - cmdline->arg0,
+ &cmdline->argv[cmdline->arg0],
+ private_data);
+
+ talloc_free(tmp_ctx);
+
+ if (result != NULL) {
+ *result = ret;
+ }
+ return 0;
+}
diff --git a/ctdb/common/cmdline.h b/ctdb/common/cmdline.h
new file mode 100644
index 0000000..51519ca
--- /dev/null
+++ b/ctdb/common/cmdline.h
@@ -0,0 +1,163 @@
+/*
+ Command line processing
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_CMDLINE_H__
+#define __CTDB_CMDLINE_H__
+
+#include <popt.h>
+#include <talloc.h>
+
+/**
+ * @file cmdline.h
+ *
+ * @brief Command-line handling with options and commands
+ *
+ * This abstraction encapsulates the boiler-plate for parsing options,
+ * commands and arguments on command-line.
+ *
+ * Options handling is done using popt.
+ */
+
+/**
+ * @brief Abstract data structure holding command-line configuration
+ */
+struct cmdline_context;
+
+/**
+ * @brief A command definition structure
+ *
+ * @name is the name of the command
+ * @fn is the implementation of the command
+ * @msg_help is the help message describing command
+ * @msg_args is the help message describing arguments
+ *
+ * A command name can be a single word or multiple words separated with spaces.
+ *
+ * An implementation function should return 0 on success and non-zero value
+ * on failure. This value is returned as result in @cmdline_run.
+ */
+struct cmdline_command {
+ const char *name;
+ int (*fn)(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data);
+ const char *msg_help;
+ const char *msg_args;
+};
+
+/**
+ * @brief convenience macro to define the end of commands list
+ *
+ * Here is an example of defining commands list.
+ *
+ * struct cmdline_command commands[] = {
+ * { "command1", command1_func, "Run command1", NULL },
+ * { "command2", command2_func, "Run command2", "<filename>" },
+ * CMDLINE_TABLEEND
+ * };
+ */
+#define CMDLINE_TABLEEND { NULL, NULL, NULL, NULL }
+
+/**
+ * @brief Initialize cmdline abstraction
+ *
+ * If there are no options, options can be NULL.
+ *
+ * Help options (--help, -h) are automatically added to the options.
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] prog Program name
+ * @param[in] options Command-line options
+ * @param[in] section Name of section grouping specified commands
+ * @param[in] commands Commands array
+ * @param[out] result New cmdline context
+ * @return 0 on success, errno on failure
+ *
+ * Freeing cmdline context will free up all the resources.
+ */
+int cmdline_init(TALLOC_CTX *mem_ctx,
+ const char *prog,
+ struct poptOption *options,
+ const char *section,
+ struct cmdline_command *commands,
+ struct cmdline_context **result);
+
+
+/**
+ * @brief Add command line section/commands
+ *
+ * @param[in] cmdline Cmdline context
+ * @param[in] section Name of section grouping specified commands
+ * @param[in] commands Commands array
+ * @return 0 on success, errno on failure
+ */
+int cmdline_add(struct cmdline_context *cmdline,
+ const char *section,
+ struct cmdline_command *commands);
+
+/**
+ * @brief Parse command line options and commands/arguments
+ *
+ * This function parses the arguments to process options and commands.
+ *
+ * This function should be passed the arguments to main() and parse_options
+ * should be set to true. If cmdline is used for handling second-level
+ * commands, then parse_options should be set to false.
+ *
+ * If argv does not match any command, then ENOENT is returned.
+ *
+ * @param[in] cmdline Cmdline context
+ * @param[in] argc Number of arguments
+ * @param[in] argv Arguments array
+ * @param[in] parse_options Whether to parse for options
+ * @return 0 on success, errno on failure
+ */
+int cmdline_parse(struct cmdline_context *cmdline,
+ int argc,
+ const char **argv,
+ bool parse_options);
+
+/**
+ * @brief Execute the function for the command matched by @cmdline_parse
+ *
+ * @param[in] cmdline Cmdline context
+ * @param[in] private_data Private data for implementation function
+ * @param[out] result Return value from the implementation function
+ * @return 0 on success, errno on failure
+ *
+ * If help options are specified, then detailed help will be printed and
+ * the return value will be EAGAIN.
+ */
+int cmdline_run(struct cmdline_context *cmdline,
+ void *private_data,
+ int *result);
+
+/**
+ * @brief Print usage help message to stdout
+ *
+ * @param[in] cmdline Cmdline context
+ * @param[in] command Command string
+ *
+ * If command is NULL, then full help is printed.
+ * If command is specified, then compact help is printed.
+ */
+void cmdline_usage(struct cmdline_context *cmdline, const char *command);
+
+#endif /* __CTDB_CMDLINE_H__ */
diff --git a/ctdb/common/comm.c b/ctdb/common/comm.c
new file mode 100644
index 0000000..12f4970
--- /dev/null
+++ b/ctdb/common/comm.c
@@ -0,0 +1,427 @@
+/*
+ Communication endpoint implementation
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+
+#include <talloc.h>
+#include <tdb.h>
+
+#include "lib/util/blocking.h"
+#include "lib/util/tevent_unix.h"
+
+#include "pkt_read.h"
+#include "pkt_write.h"
+#include "comm.h"
+
+/*
+ * Communication endpoint around a socket
+ */
+
+#define SMALL_PKT_SIZE 1024
+
+struct comm_context {
+ int fd;
+ comm_read_handler_fn read_handler;
+ void *read_private_data;
+ comm_dead_handler_fn dead_handler;
+ void *dead_private_data;
+ uint8_t small_pkt[SMALL_PKT_SIZE];
+ struct tevent_req *read_req, *write_req;
+ struct tevent_fd *fde;
+ struct tevent_queue *queue;
+};
+
+static void comm_fd_handler(struct tevent_context *ev,
+ struct tevent_fd *fde,
+ uint16_t flags, void *private_data);
+static struct tevent_req *comm_read_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct comm_context *comm,
+ uint8_t *buf, size_t buflen);
+static void comm_read_failed(struct tevent_req *req);
+
+
+int comm_setup(TALLOC_CTX *mem_ctx, struct tevent_context *ev, int fd,
+ comm_read_handler_fn read_handler, void *read_private_data,
+ comm_dead_handler_fn dead_handler, void *dead_private_data,
+ struct comm_context **result)
+{
+ struct comm_context *comm;
+ int ret;
+
+ if (fd < 0) {
+ return EINVAL;
+ }
+
+ if (dead_handler == NULL) {
+ return EINVAL;
+ }
+
+ /* Socket queue relies on non-blocking sockets. */
+ ret = set_blocking(fd, false);
+ if (ret == -1) {
+ return EIO;
+ }
+
+ comm = talloc_zero(mem_ctx, struct comm_context);
+ if (comm == NULL) {
+ return ENOMEM;
+ }
+
+ comm->fd = fd;
+ comm->read_handler = read_handler;
+ comm->read_private_data = read_private_data;
+ comm->dead_handler = dead_handler;
+ comm->dead_private_data = dead_private_data;
+
+ comm->queue = tevent_queue_create(comm, "comm write queue");
+ if (comm->queue == NULL) {
+ goto fail;
+ }
+
+ /* Set up to write packets */
+ comm->fde = tevent_add_fd(ev, comm, fd, TEVENT_FD_READ,
+ comm_fd_handler, comm);
+ if (comm->fde == NULL) {
+ goto fail;
+ }
+
+ /* Set up to read packets */
+ if (read_handler != NULL) {
+ struct tevent_req *req;
+
+ req = comm_read_send(comm, ev, comm, comm->small_pkt,
+ SMALL_PKT_SIZE);
+ if (req == NULL) {
+ goto fail;
+ }
+
+ tevent_req_set_callback(req, comm_read_failed, comm);
+ comm->read_req = req;
+ }
+
+ *result = comm;
+ return 0;
+
+fail:
+ talloc_free(comm);
+ return ENOMEM;
+}
+
+
+/*
+ * Read packets
+ */
+
+struct comm_read_state {
+ struct tevent_context *ev;
+ struct comm_context *comm;
+ uint8_t *buf;
+ size_t buflen;
+ struct tevent_req *subreq;
+};
+
+static ssize_t comm_read_more(uint8_t *buf, size_t buflen, void *private_data);
+static void comm_read_done(struct tevent_req *subreq);
+
+static struct tevent_req *comm_read_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct comm_context *comm,
+ uint8_t *buf, size_t buflen)
+{
+ struct tevent_req *req, *subreq;
+ struct comm_read_state *state;
+
+ req = tevent_req_create(mem_ctx, &state, struct comm_read_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->comm = comm;
+ state->buf = buf;
+ state->buflen = buflen;
+
+ subreq = pkt_read_send(state, state->ev, comm->fd, sizeof(uint32_t),
+ state->buf, state->buflen,
+ comm_read_more, NULL);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ state->subreq = subreq;
+
+ tevent_req_set_callback(subreq, comm_read_done, req);
+ return req;
+}
+
+static ssize_t comm_read_more(uint8_t *buf, size_t buflen, void *private_data)
+{
+ uint32_t packet_len;
+
+ if (buflen < sizeof(uint32_t)) {
+ return sizeof(uint32_t) - buflen;
+ }
+
+ packet_len = *(uint32_t *)buf;
+
+ return packet_len - buflen;
+}
+
+static void comm_read_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct comm_read_state *state = tevent_req_data(
+ req, struct comm_read_state);
+ struct comm_context *comm = state->comm;
+ ssize_t nread;
+ uint8_t *buf;
+ bool free_buf;
+ int err = 0;
+
+ nread = pkt_read_recv(subreq, state, &buf, &free_buf, &err);
+ TALLOC_FREE(subreq);
+ state->subreq = NULL;
+ if (nread == -1) {
+ tevent_req_error(req, err);
+ return;
+ }
+
+ comm->read_handler(buf, nread, comm->read_private_data);
+
+ if (free_buf) {
+ talloc_free(buf);
+ }
+
+ subreq = pkt_read_send(state, state->ev, comm->fd, sizeof(uint32_t),
+ state->buf, state->buflen,
+ comm_read_more, NULL);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ state->subreq = subreq;
+
+ tevent_req_set_callback(subreq, comm_read_done, req);
+}
+
+static void comm_read_recv(struct tevent_req *req, int *perr)
+{
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ }
+}
+
+static void comm_read_failed(struct tevent_req *req)
+{
+ struct comm_context *comm = tevent_req_callback_data(
+ req, struct comm_context);
+
+ comm_read_recv(req, NULL);
+ TALLOC_FREE(req);
+ comm->read_req = NULL;
+ if (comm->dead_handler != NULL) {
+ comm->dead_handler(comm->dead_private_data);
+ }
+}
+
+
+/*
+ * Write packets
+ */
+
+struct comm_write_entry {
+ struct comm_context *comm;
+ struct tevent_queue_entry *qentry;
+ struct tevent_req *req;
+};
+
+struct comm_write_state {
+ struct tevent_context *ev;
+ struct comm_context *comm;
+ struct comm_write_entry *entry;
+ struct tevent_req *subreq;
+ uint8_t *buf;
+ size_t buflen, nwritten;
+};
+
+static int comm_write_entry_destructor(struct comm_write_entry *entry);
+static void comm_write_trigger(struct tevent_req *req, void *private_data);
+static void comm_write_done(struct tevent_req *subreq);
+
+struct tevent_req *comm_write_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct comm_context *comm,
+ uint8_t *buf, size_t buflen)
+{
+ struct tevent_req *req;
+ struct comm_write_state *state;
+ struct comm_write_entry *entry;
+
+ req = tevent_req_create(mem_ctx, &state, struct comm_write_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->comm = comm;
+ state->buf = buf;
+ state->buflen = buflen;
+
+ entry = talloc_zero(state, struct comm_write_entry);
+ if (tevent_req_nomem(entry, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ entry->comm = comm;
+ entry->req = req;
+ entry->qentry = tevent_queue_add_entry(comm->queue, ev, req,
+ comm_write_trigger, NULL);
+ if (tevent_req_nomem(entry->qentry, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ state->entry = entry;
+ talloc_set_destructor(entry, comm_write_entry_destructor);
+
+ return req;
+}
+
+static int comm_write_entry_destructor(struct comm_write_entry *entry)
+{
+ struct comm_context *comm = entry->comm;
+
+ if (comm->write_req == entry->req) {
+ comm->write_req = NULL;
+ TEVENT_FD_NOT_WRITEABLE(comm->fde);
+ }
+
+ TALLOC_FREE(entry->qentry);
+ return 0;
+}
+
+static void comm_write_trigger(struct tevent_req *req, void *private_data)
+{
+ struct comm_write_state *state = tevent_req_data(
+ req, struct comm_write_state);
+ struct comm_context *comm = state->comm;
+ struct tevent_req *subreq;
+
+ comm->write_req = req;
+
+ subreq = pkt_write_send(state, state->ev, comm->fd,
+ state->buf, state->buflen);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+
+ state->subreq = subreq;
+ tevent_req_set_callback(subreq, comm_write_done, req);
+ TEVENT_FD_WRITEABLE(comm->fde);
+}
+
+static void comm_write_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct comm_write_state *state = tevent_req_data(
+ req, struct comm_write_state);
+ struct comm_context *comm = state->comm;
+ ssize_t nwritten;
+ int err = 0;
+
+ TEVENT_FD_NOT_WRITEABLE(comm->fde);
+ nwritten = pkt_write_recv(subreq, &err);
+ TALLOC_FREE(subreq);
+ state->subreq = NULL;
+ comm->write_req = NULL;
+ if (nwritten == -1) {
+ if (err == EPIPE) {
+ comm->dead_handler(comm->dead_private_data);
+ }
+ tevent_req_error(req, err);
+ return;
+ }
+
+ state->nwritten = nwritten;
+ state->entry->qentry = NULL;
+ TALLOC_FREE(state->entry);
+ tevent_req_done(req);
+}
+
+bool comm_write_recv(struct tevent_req *req, int *perr)
+{
+ struct comm_write_state *state = tevent_req_data(
+ req, struct comm_write_state);
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+
+ if (state->nwritten != state->buflen) {
+ *perr = EIO;
+ return false;
+ }
+
+ *perr = 0;
+ return true;
+}
+
+static void comm_fd_handler(struct tevent_context *ev,
+ struct tevent_fd *fde,
+ uint16_t flags, void *private_data)
+{
+ struct comm_context *comm = talloc_get_type_abort(
+ private_data, struct comm_context);
+
+ if (flags & TEVENT_FD_READ) {
+ struct comm_read_state *read_state;
+
+ if (comm->read_req == NULL) {
+ /* This should never happen */
+ abort();
+ }
+
+ read_state = tevent_req_data(comm->read_req,
+ struct comm_read_state);
+ pkt_read_handler(ev, fde, flags, read_state->subreq);
+ }
+
+ if (flags & TEVENT_FD_WRITE) {
+ struct comm_write_state *write_state;
+
+ if (comm->write_req == NULL) {
+ TEVENT_FD_NOT_WRITEABLE(comm->fde);
+ return;
+ }
+
+ write_state = tevent_req_data(comm->write_req,
+ struct comm_write_state);
+ pkt_write_handler(ev, fde, flags, write_state->subreq);
+ }
+}
diff --git a/ctdb/common/comm.h b/ctdb/common/comm.h
new file mode 100644
index 0000000..e11d38e
--- /dev/null
+++ b/ctdb/common/comm.h
@@ -0,0 +1,101 @@
+/*
+ Communication endpoint API
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_COMM_H__
+#define __CTDB_COMM_H__
+
+#include <talloc.h>
+#include <tevent.h>
+
+/**
+ * @file comm.h
+ *
+ * @brief Communication over a socket or file descriptor
+ *
+ * This abstraction is a wrapper around a socket or file descriptor to
+ * send/receive complete packets.
+ */
+
+/**
+ * @brief Packet handler function
+ *
+ * This function is registered while setting up communication endpoint. Any
+ * time packets are read, this function is called.
+ */
+typedef void (*comm_read_handler_fn)(uint8_t *buf, size_t buflen,
+ void *private_data);
+
+/**
+ * @brief Communication endpoint dead handler function
+ *
+ * This function is called when the communication endpoint is closed.
+ */
+typedef void (*comm_dead_handler_fn)(void *private_data);
+
+/**
+ * @brief Abstract struct to store communication endpoint details
+ */
+struct comm_context;
+
+/**
+ * @brief Initialize the communication endpoint
+ *
+ * This return a new communication context. Freeing this context will free all
+ * memory associated with it.
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] fd The socket or file descriptor
+ * @param[in] read_handler The packet handler function
+ * @param[in] read_private_data Private data for read handler function
+ * @param[in] dead_handler The communication dead handler function
+ * @param[in] dead_private_data Private data for dead handler function
+ * @param[out] result The new comm_context structure
+ * @return 0 on success, errno on failure
+ */
+int comm_setup(TALLOC_CTX *mem_ctx, struct tevent_context *ev, int fd,
+ comm_read_handler_fn read_handler, void *read_private_data,
+ comm_dead_handler_fn dead_handler, void *dead_private_data,
+ struct comm_context **result);
+
+/**
+ * @brief Async computation start to send a packet
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] comm Communication context
+ * @param[in] buf The packet data
+ * @param[in] buflen The size of the packet
+ * @return new tevent request, or NULL on failure
+ */
+struct tevent_req *comm_write_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct comm_context *comm,
+ uint8_t *buf, size_t buflen);
+
+/**
+ * @brief Async computation end to send a packet
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @return true on success, false on failure
+ */
+bool comm_write_recv(struct tevent_req *req, int *perr);
+
+#endif /* __CTDB_COMM_H__ */
diff --git a/ctdb/common/common.h b/ctdb/common/common.h
new file mode 100644
index 0000000..9a73bec
--- /dev/null
+++ b/ctdb/common/common.h
@@ -0,0 +1,160 @@
+/*
+ ctdb database library
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_COMMON_H__
+#define __CTDB_COMMON_H__
+
+#include "lib/util/attr.h"
+
+/* From common/ctdb_io.c */
+
+typedef void (*ctdb_queue_cb_fn_t)(uint8_t *data, size_t length,
+ void *private_data);
+
+uint32_t ctdb_queue_length(struct ctdb_queue *queue);
+
+int ctdb_queue_send(struct ctdb_queue *queue, uint8_t *data, uint32_t length);
+
+int ctdb_queue_set_fd(struct ctdb_queue *queue, int fd);
+
+struct ctdb_queue *ctdb_queue_setup(struct ctdb_context *ctdb,
+ TALLOC_CTX *mem_ctx, int fd, int alignment,
+ ctdb_queue_cb_fn_t callback,
+ void *private_data, const char *fmt, ...)
+ PRINTF_ATTRIBUTE(7,8);
+
+/* From common/ctdb_ltdb.c */
+
+int ctdb_db_tdb_flags(uint8_t db_flags, bool with_valgrind, bool with_mutex);
+
+struct ctdb_db_context *ctdb_db_handle(struct ctdb_context *ctdb,
+ const char *name);
+
+bool ctdb_db_persistent(struct ctdb_db_context *ctdb_db);
+bool ctdb_db_replicated(struct ctdb_db_context *ctdb_db);
+bool ctdb_db_volatile(struct ctdb_db_context *ctdb_db);
+
+bool ctdb_db_readonly(struct ctdb_db_context *ctdb_db);
+void ctdb_db_set_readonly(struct ctdb_db_context *ctdb_db);
+void ctdb_db_reset_readonly(struct ctdb_db_context *ctdb_db);
+
+bool ctdb_db_sticky(struct ctdb_db_context *ctdb_db);
+void ctdb_db_set_sticky(struct ctdb_db_context *ctdb_db);
+
+uint32_t ctdb_lmaster(struct ctdb_context *ctdb, const TDB_DATA *key);
+
+int ctdb_ltdb_fetch(struct ctdb_db_context *ctdb_db,
+ TDB_DATA key, struct ctdb_ltdb_header *header,
+ TALLOC_CTX *mem_ctx, TDB_DATA *data);
+
+int ctdb_ltdb_store(struct ctdb_db_context *ctdb_db, TDB_DATA key,
+ struct ctdb_ltdb_header *header, TDB_DATA data);
+
+int ctdb_ltdb_lock(struct ctdb_db_context *ctdb_db, TDB_DATA key);
+
+int ctdb_ltdb_unlock(struct ctdb_db_context *ctdb_db, TDB_DATA key);
+
+int ctdb_ltdb_delete(struct ctdb_db_context *ctdb_db, TDB_DATA key);
+
+int ctdb_trackingdb_add_pnn(struct ctdb_context *ctdb, TDB_DATA *data, uint32_t pnn);
+
+typedef void (*ctdb_trackingdb_cb)(struct ctdb_context *ctdb, uint32_t pnn,
+ void *private_data);
+
+void ctdb_trackingdb_traverse(struct ctdb_context *ctdb, TDB_DATA data,
+ ctdb_trackingdb_cb cb, void *private_data);
+
+int ctdb_null_func(struct ctdb_call_info *call);
+
+int ctdb_fetch_func(struct ctdb_call_info *call);
+
+int ctdb_fetch_with_header_func(struct ctdb_call_info *call);
+
+/* from common/ctdb_util.c */
+
+const char *ctdb_errstr(struct ctdb_context *ctdb);
+
+void ctdb_set_error(struct ctdb_context *ctdb, const char *fmt, ...)
+ PRINTF_ATTRIBUTE(2,3);
+
+void ctdb_fatal(struct ctdb_context *ctdb, const char *msg) _NORETURN_;
+
+void ctdb_die(struct ctdb_context *ctdb, const char *msg) _NORETURN_;
+
+bool ctdb_set_helper(const char *type, char *helper, size_t size,
+ const char *envvar,
+ const char *dir, const char *file);
+
+int ctdb_parse_address(TALLOC_CTX *mem_ctx, const char *str,
+ ctdb_sock_addr *address);
+
+bool ctdb_same_address(ctdb_sock_addr *a1, ctdb_sock_addr *a2);
+
+uint32_t ctdb_hash(const TDB_DATA *key);
+
+struct ctdb_rec_data_old *ctdb_marshall_record(TALLOC_CTX *mem_ctx,
+ uint32_t reqid,
+ TDB_DATA key,
+ struct ctdb_ltdb_header *header,
+ TDB_DATA data);
+
+struct ctdb_marshall_buffer *ctdb_marshall_add(TALLOC_CTX *mem_ctx,
+ struct ctdb_marshall_buffer *m,
+ uint32_t db_id,
+ uint32_t reqid,
+ TDB_DATA key,
+ struct ctdb_ltdb_header *header,
+ TDB_DATA data);
+
+TDB_DATA ctdb_marshall_finish(struct ctdb_marshall_buffer *m);
+
+struct ctdb_rec_data_old *ctdb_marshall_loop_next(
+ struct ctdb_marshall_buffer *m,
+ struct ctdb_rec_data_old *r,
+ uint32_t *reqid,
+ struct ctdb_ltdb_header *header,
+ TDB_DATA *key, TDB_DATA *data);
+
+void ctdb_canonicalize_ip(const ctdb_sock_addr *ip, ctdb_sock_addr *cip);
+void ctdb_canonicalize_ip_inplace(ctdb_sock_addr *ip);
+
+bool ctdb_same_ip(const ctdb_sock_addr *tip1, const ctdb_sock_addr *tip2);
+
+bool ctdb_same_sockaddr(const ctdb_sock_addr *ip1, const ctdb_sock_addr *ip2);
+
+char *ctdb_addr_to_str(ctdb_sock_addr *addr);
+
+unsigned ctdb_addr_to_port(ctdb_sock_addr *addr);
+
+struct ctdb_node_map_old *ctdb_read_nodes_file(TALLOC_CTX *mem_ctx,
+ const char *nlist);
+
+struct ctdb_node_map_old *ctdb_node_list_to_map(struct ctdb_node **nodes,
+ uint32_t num_nodes,
+ TALLOC_CTX *mem_ctx);
+
+const char *runstate_to_string(enum ctdb_runstate runstate);
+
+enum ctdb_runstate runstate_from_string(const char *label);
+
+void ctdb_set_runstate(struct ctdb_context *ctdb, enum ctdb_runstate runstate);
+
+uint32_t *ctdb_key_to_idkey(TALLOC_CTX *mem_ctx, TDB_DATA key);
+
+#endif /* __CTDB_COMMON_H__ */
diff --git a/ctdb/common/conf.c b/ctdb/common/conf.c
new file mode 100644
index 0000000..a8ff724
--- /dev/null
+++ b/ctdb/common/conf.c
@@ -0,0 +1,1391 @@
+/*
+ Configuration file handling on top of tini
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/locale.h"
+
+#include <talloc.h>
+
+#include "lib/util/dlinklist.h"
+#include "lib/util/tini.h"
+#include "lib/util/debug.h"
+
+#include "common/conf.h"
+
+struct conf_value {
+ enum conf_type type;
+ union {
+ const char *string;
+ int integer;
+ bool boolean;
+ } data;
+};
+
+union conf_pointer {
+ const char **string;
+ int *integer;
+ bool *boolean;
+};
+
+struct conf_option {
+ struct conf_option *prev, *next;
+
+ const char *name;
+ enum conf_type type;
+ void *validate;
+
+ struct conf_value default_value;
+ bool default_set;
+
+ struct conf_value *value, *new_value;
+ union conf_pointer ptr;
+ bool temporary_modified;
+};
+
+struct conf_section {
+ struct conf_section *prev, *next;
+
+ const char *name;
+ conf_validate_section_fn validate;
+ struct conf_option *option;
+};
+
+struct conf_context {
+ const char *filename;
+ struct conf_section *section;
+ bool define_failed;
+ bool ignore_unknown;
+ bool reload;
+ bool validation_active;
+};
+
+/*
+ * Functions related to conf_value
+ */
+
+static int string_to_string(TALLOC_CTX *mem_ctx,
+ const char *str,
+ const char **str_val)
+{
+ char *t;
+
+ if (str == NULL) {
+ return EINVAL;
+ }
+
+ t = talloc_strdup(mem_ctx, str);
+ if (t == NULL) {
+ return ENOMEM;
+ }
+
+ *str_val = t;
+ return 0;
+}
+
+static int string_to_integer(const char *str, int *int_val)
+{
+ long t;
+ char *endptr = NULL;
+
+ if (str == NULL) {
+ return EINVAL;
+ }
+
+ t = strtol(str, &endptr, 0);
+ if (*str != '\0' || endptr == NULL) {
+ if (t < 0 || t > INT_MAX) {
+ return EINVAL;
+ }
+
+ *int_val = (int)t;
+ return 0;
+ }
+
+ return EINVAL;
+}
+
+static int string_to_boolean(const char *str, bool *bool_val)
+{
+ if (strcasecmp(str, "true") == 0 || strcasecmp(str, "yes") == 0) {
+ *bool_val = true;
+ return 0;
+ }
+
+ if (strcasecmp(str, "false") == 0 || strcasecmp(str, "no") == 0) {
+ *bool_val = false;
+ return 0;
+ }
+
+ return EINVAL;
+}
+
+static int conf_value_from_string(TALLOC_CTX *mem_ctx,
+ const char *str,
+ struct conf_value *value)
+{
+ int ret;
+
+ switch (value->type) {
+ case CONF_STRING:
+ ret = string_to_string(mem_ctx, str, &value->data.string);
+ break;
+
+ case CONF_INTEGER:
+ ret = string_to_integer(str, &value->data.integer);
+ break;
+
+ case CONF_BOOLEAN:
+ ret = string_to_boolean(str, &value->data.boolean);
+ break;
+
+ default:
+ return EINVAL;
+ }
+
+ return ret;
+}
+
+static bool conf_value_compare(struct conf_value *old, struct conf_value *new)
+{
+ if (old == NULL || new == NULL) {
+ return false;
+ }
+
+ if (old->type != new->type) {
+ return false;
+ }
+
+ switch (old->type) {
+ case CONF_STRING:
+ if (old->data.string == NULL && new->data.string == NULL) {
+ return true;
+ }
+ if (old->data.string != NULL && new->data.string != NULL) {
+ if (strcmp(old->data.string, new->data.string) == 0) {
+ return true;
+ }
+ }
+ break;
+
+ case CONF_INTEGER:
+ if (old->data.integer == new->data.integer) {
+ return true;
+ }
+ break;
+
+ case CONF_BOOLEAN:
+ if (old->data.boolean == new->data.boolean) {
+ return true;
+ }
+ break;
+ }
+
+ return false;
+}
+
+static int conf_value_copy(TALLOC_CTX *mem_ctx,
+ struct conf_value *src,
+ struct conf_value *dst)
+{
+ if (src->type != dst->type) {
+ return EINVAL;
+ }
+
+ switch (src->type) {
+ case CONF_STRING:
+ if (dst->data.string != NULL) {
+ talloc_free(discard_const(dst->data.string));
+ }
+ if (src->data.string == NULL) {
+ dst->data.string = NULL;
+ } else {
+ dst->data.string = talloc_strdup(
+ mem_ctx, src->data.string);
+ if (dst->data.string == NULL) {
+ return ENOMEM;
+ }
+ }
+ break;
+
+ case CONF_INTEGER:
+ dst->data.integer = src->data.integer;
+ break;
+
+ case CONF_BOOLEAN:
+ dst->data.boolean = src->data.boolean;
+ break;
+
+ default:
+ return EINVAL;
+ }
+
+ return 0;
+}
+
+static void conf_value_dump(const char *key,
+ struct conf_value *value,
+ bool is_default,
+ bool is_temporary,
+ FILE *fp)
+{
+ if ((value->type == CONF_STRING && value->data.string == NULL) ||
+ is_default) {
+ fprintf(fp, "\t# %s = ", key);
+ } else {
+ fprintf(fp, "\t%s = ", key);
+ }
+
+ switch (value->type) {
+ case CONF_STRING:
+ if (value->data.string != NULL) {
+ fprintf(fp, "%s", value->data.string);
+ }
+ break;
+
+ case CONF_INTEGER:
+ fprintf(fp, "%d", value->data.integer);
+ break;
+
+ case CONF_BOOLEAN:
+ fprintf(fp, "%s", (value->data.boolean ? "true" : "false"));
+ break;
+ }
+
+ if (is_temporary) {
+ fprintf(fp, " # temporary");
+ }
+
+ fprintf(fp, "\n");
+}
+
+/*
+ * Functions related to conf_option
+ */
+
+static struct conf_option *conf_option_find(struct conf_section *s,
+ const char *key)
+{
+ struct conf_option *opt;
+
+ for (opt = s->option; opt != NULL; opt = opt->next) {
+ if (strcmp(opt->name, key) == 0) {
+ return opt;
+ }
+ }
+
+ return NULL;
+}
+
+static void conf_option_set_ptr_value(struct conf_option *opt)
+{
+ switch (opt->type) {
+ case CONF_STRING:
+ if (opt->ptr.string != NULL) {
+ *(opt->ptr.string) = opt->value->data.string;
+ }
+ break;
+
+ case CONF_INTEGER:
+ if (opt->ptr.integer != NULL) {
+ *(opt->ptr.integer) = opt->value->data.integer;
+ }
+ break;
+
+ case CONF_BOOLEAN:
+ if (opt->ptr.boolean != NULL) {
+ *(opt->ptr.boolean) = opt->value->data.boolean;
+ }
+ break;
+ }
+}
+
+static void conf_option_default(struct conf_option *opt);
+
+static int conf_option_add(struct conf_section *s,
+ const char *key,
+ enum conf_type type,
+ void *validate,
+ struct conf_option **popt)
+{
+ struct conf_option *opt;
+
+ opt = conf_option_find(s, key);
+ if (opt != NULL) {
+ D_ERR("conf: option \"%s\" already exists\n", key);
+ return EEXIST;
+ }
+
+ opt = talloc_zero(s, struct conf_option);
+ if (opt == NULL) {
+ return ENOMEM;
+ }
+
+ opt->name = talloc_strdup(opt, key);
+ if (opt->name == NULL) {
+ talloc_free(opt);
+ return ENOMEM;
+ }
+
+ opt->type = type;
+ opt->validate = validate;
+
+ DLIST_ADD_END(s->option, opt);
+
+ if (popt != NULL) {
+ *popt = opt;
+ }
+
+ return 0;
+}
+
+static int conf_option_set_default(struct conf_option *opt,
+ struct conf_value *default_value)
+{
+ int ret;
+
+ opt->default_value.type = opt->type;
+
+ ret = conf_value_copy(opt, default_value, &opt->default_value);
+ if (ret != 0) {
+ return ret;
+ }
+
+ opt->default_set = true;
+ opt->temporary_modified = false;
+
+ return 0;
+}
+
+static void conf_option_set_ptr(struct conf_option *opt,
+ union conf_pointer *ptr)
+{
+ opt->ptr = *ptr;
+}
+
+static bool conf_option_validate_string(struct conf_option *opt,
+ struct conf_value *value,
+ enum conf_update_mode mode)
+{
+ conf_validate_string_option_fn validate =
+ (conf_validate_string_option_fn)opt->validate;
+
+ return validate(opt->name,
+ opt->value->data.string,
+ value->data.string,
+ mode);
+}
+
+static bool conf_option_validate_integer(struct conf_option *opt,
+ struct conf_value *value,
+ enum conf_update_mode mode)
+{
+ conf_validate_integer_option_fn validate =
+ (conf_validate_integer_option_fn)opt->validate;
+
+ return validate(opt->name,
+ opt->value->data.integer,
+ value->data.integer,
+ mode);
+}
+
+static bool conf_option_validate_boolean(struct conf_option *opt,
+ struct conf_value *value,
+ enum conf_update_mode mode)
+{
+ conf_validate_boolean_option_fn validate =
+ (conf_validate_boolean_option_fn)opt->validate;
+
+ return validate(opt->name,
+ opt->value->data.boolean,
+ value->data.boolean,
+ mode);
+}
+
+static bool conf_option_validate(struct conf_option *opt,
+ struct conf_value *value,
+ enum conf_update_mode mode)
+{
+ int ret;
+
+ if (opt->validate == NULL) {
+ return true;
+ }
+
+ switch (opt->type) {
+ case CONF_STRING:
+ ret = conf_option_validate_string(opt, value, mode);
+ break;
+
+ case CONF_INTEGER:
+ ret = conf_option_validate_integer(opt, value, mode);
+ break;
+
+ case CONF_BOOLEAN:
+ ret = conf_option_validate_boolean(opt, value, mode);
+ break;
+
+ default:
+ ret = EINVAL;
+ }
+
+ return ret;
+}
+
+static bool conf_option_same_value(struct conf_option *opt,
+ struct conf_value *new_value)
+{
+ return conf_value_compare(opt->value, new_value);
+}
+
+static int conf_option_new_value(struct conf_option *opt,
+ struct conf_value *new_value,
+ enum conf_update_mode mode)
+{
+ int ret;
+ bool ok;
+
+ if (opt->new_value != &opt->default_value) {
+ TALLOC_FREE(opt->new_value);
+ }
+
+ if (new_value == &opt->default_value) {
+ /*
+ * This happens only during load/reload. Set the value to
+ * default value, so if the config option is dropped from
+ * config file, then it gets reset to default.
+ */
+ opt->new_value = &opt->default_value;
+ } else {
+ ok = conf_option_validate(opt, new_value, mode);
+ if (!ok) {
+ D_ERR("conf: validation for option \"%s\" failed\n",
+ opt->name);
+ return EINVAL;
+ }
+
+ opt->new_value = talloc_zero(opt, struct conf_value);
+ if (opt->new_value == NULL) {
+ return ENOMEM;
+ }
+
+ opt->new_value->type = opt->value->type;
+ ret = conf_value_copy(opt, new_value, opt->new_value);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ conf_option_set_ptr_value(opt);
+
+ if (new_value != &opt->default_value) {
+ if (mode == CONF_MODE_API) {
+ opt->temporary_modified = true;
+ } else {
+ opt->temporary_modified = false;
+ }
+ }
+
+ return 0;
+}
+
+static int conf_option_new_default_value(struct conf_option *opt,
+ enum conf_update_mode mode)
+{
+ return conf_option_new_value(opt, &opt->default_value, mode);
+}
+
+static void conf_option_default(struct conf_option *opt)
+{
+ if (! opt->default_set) {
+ return;
+ }
+
+ if (opt->value != &opt->default_value) {
+ TALLOC_FREE(opt->value);
+ }
+
+ opt->value = &opt->default_value;
+ conf_option_set_ptr_value(opt);
+}
+
+static void conf_option_reset(struct conf_option *opt)
+{
+ if (opt->new_value != &opt->default_value) {
+ TALLOC_FREE(opt->new_value);
+ }
+
+ conf_option_set_ptr_value(opt);
+}
+
+static void conf_option_update(struct conf_option *opt)
+{
+ if (opt->new_value == NULL) {
+ return;
+ }
+
+ if (opt->value != &opt->default_value) {
+ TALLOC_FREE(opt->value);
+ }
+
+ opt->value = opt->new_value;
+ opt->new_value = NULL;
+
+ conf_option_set_ptr_value(opt);
+}
+
+static void conf_option_reset_temporary(struct conf_option *opt)
+{
+ opt->temporary_modified = false;
+}
+
+static bool conf_option_is_default(struct conf_option *opt)
+{
+ return (opt->value == &opt->default_value);
+}
+
+static void conf_option_dump(struct conf_option *opt, FILE *fp)
+{
+ bool is_default;
+
+ is_default = conf_option_is_default(opt);
+
+ conf_value_dump(opt->name,
+ opt->value,
+ is_default,
+ opt->temporary_modified,
+ fp);
+}
+
+/*
+ * Functions related to conf_section
+ */
+
+static struct conf_section *conf_section_find(struct conf_context *conf,
+ const char *section)
+{
+ struct conf_section *s;
+
+ for (s = conf->section; s != NULL; s = s->next) {
+ if (strcasecmp(s->name, section) == 0) {
+ return s;
+ }
+ }
+
+ return NULL;
+}
+
+static int conf_section_add(struct conf_context *conf,
+ const char *section,
+ conf_validate_section_fn validate)
+{
+ struct conf_section *s;
+
+ s = conf_section_find(conf, section);
+ if (s != NULL) {
+ return EEXIST;
+ }
+
+ s = talloc_zero(conf, struct conf_section);
+ if (s == NULL) {
+ return ENOMEM;
+ }
+
+ s->name = talloc_strdup(s, section);
+ if (s->name == NULL) {
+ talloc_free(s);
+ return ENOMEM;
+ }
+
+ s->validate = validate;
+
+ DLIST_ADD_END(conf->section, s);
+ return 0;
+}
+
+static bool conf_section_validate(struct conf_context *conf,
+ struct conf_section *s,
+ enum conf_update_mode mode)
+{
+ bool ok;
+
+ if (s->validate == NULL) {
+ return true;
+ }
+
+ ok = s->validate(conf, s->name, mode);
+ if (!ok) {
+ D_ERR("conf: validation for section [%s] failed\n", s->name);
+ }
+
+ return ok;
+}
+
+static void conf_section_dump(struct conf_section *s, FILE *fp)
+{
+ fprintf(fp, "[%s]\n", s->name);
+}
+
+/*
+ * Functions related to conf_context
+ */
+
+static void conf_all_default(struct conf_context *conf)
+{
+ struct conf_section *s;
+ struct conf_option *opt;
+
+ for (s = conf->section; s != NULL; s = s->next) {
+ for (opt = s->option; opt != NULL; opt = opt->next) {
+ conf_option_default(opt);
+ }
+ }
+}
+
+static int conf_all_temporary_default(struct conf_context *conf,
+ enum conf_update_mode mode)
+{
+ struct conf_section *s;
+ struct conf_option *opt;
+ int ret;
+
+ for (s = conf->section; s != NULL; s = s->next) {
+ for (opt = s->option; opt != NULL; opt = opt->next) {
+ ret = conf_option_new_default_value(opt, mode);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static void conf_all_reset(struct conf_context *conf)
+{
+ struct conf_section *s;
+ struct conf_option *opt;
+
+ for (s = conf->section; s != NULL; s = s->next) {
+ for (opt = s->option; opt != NULL; opt = opt->next) {
+ conf_option_reset(opt);
+ }
+ }
+}
+
+static void conf_all_update(struct conf_context *conf)
+{
+ struct conf_section *s;
+ struct conf_option *opt;
+
+ for (s = conf->section; s != NULL; s = s->next) {
+ for (opt = s->option; opt != NULL; opt = opt->next) {
+ conf_option_update(opt);
+ conf_option_reset_temporary(opt);
+ }
+ }
+}
+
+/*
+ * API functions
+ */
+
+int conf_init(TALLOC_CTX *mem_ctx, struct conf_context **result)
+{
+ struct conf_context *conf;
+
+ conf = talloc_zero(mem_ctx, struct conf_context);
+ if (conf == NULL) {
+ return ENOMEM;
+ }
+
+ conf->define_failed = false;
+
+ *result = conf;
+ return 0;
+}
+
+void conf_define_section(struct conf_context *conf,
+ const char *section,
+ conf_validate_section_fn validate)
+{
+ int ret;
+
+ if (conf->define_failed) {
+ return;
+ }
+
+ if (section == NULL) {
+ conf->define_failed = true;
+ return;
+ }
+
+ ret = conf_section_add(conf, section, validate);
+ if (ret != 0) {
+ conf->define_failed = true;
+ return;
+ }
+}
+
+static struct conf_option *conf_define(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ enum conf_type type,
+ conf_validate_string_option_fn validate)
+{
+ struct conf_section *s;
+ struct conf_option *opt;
+ int ret;
+
+ s = conf_section_find(conf, section);
+ if (s == NULL) {
+ D_ERR("conf: unknown section [%s]\n", section);
+ return NULL;
+ }
+
+ if (key == NULL) {
+ D_ERR("conf: option name null in section [%s]\n", section);
+ return NULL;
+ }
+
+ ret = conf_option_add(s, key, type, validate, &opt);
+ if (ret != 0) {
+ return NULL;
+ }
+
+ return opt;
+}
+
+static void conf_define_post(struct conf_context *conf,
+ struct conf_option *opt,
+ struct conf_value *default_value)
+{
+ int ret;
+
+ ret = conf_option_set_default(opt, default_value);
+ if (ret != 0) {
+ conf->define_failed = true;
+ return;
+ }
+
+ conf_option_default(opt);
+}
+
+void conf_define_string(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ const char *default_str_val,
+ conf_validate_string_option_fn validate)
+{
+ struct conf_option *opt;
+ struct conf_value default_value;
+
+ if (! conf_valid(conf)) {
+ return;
+ }
+
+ opt = conf_define(conf, section, key, CONF_STRING, validate);
+ if (opt == NULL) {
+ conf->define_failed = true;
+ return;
+ }
+
+ default_value.type = CONF_STRING;
+ default_value.data.string = default_str_val;
+
+ conf_define_post(conf, opt, &default_value);
+}
+
+void conf_define_integer(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ const int default_int_val,
+ conf_validate_integer_option_fn validate)
+{
+ struct conf_option *opt;
+ struct conf_value default_value;
+
+ if (! conf_valid(conf)) {
+ return;
+ }
+
+ opt = conf_define(conf, section, key, CONF_INTEGER, (void *)validate);
+ if (opt == NULL) {
+ conf->define_failed = true;
+ return;
+ }
+
+ default_value.type = CONF_INTEGER;
+ default_value.data.integer = default_int_val;
+
+ conf_define_post(conf, opt, &default_value);
+}
+
+
+void conf_define_boolean(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ const bool default_bool_val,
+ conf_validate_boolean_option_fn validate)
+{
+ struct conf_option *opt;
+ struct conf_value default_value;
+
+ if (! conf_valid(conf)) {
+ return;
+ }
+
+ opt = conf_define(conf, section, key, CONF_BOOLEAN, (void *)validate);
+ if (opt == NULL) {
+ conf->define_failed = true;
+ return;
+ }
+
+ default_value.type = CONF_BOOLEAN;
+ default_value.data.boolean = default_bool_val;
+
+ conf_define_post(conf, opt, &default_value);
+}
+
+static struct conf_option *_conf_option(struct conf_context *conf,
+ const char *section,
+ const char *key)
+{
+ struct conf_section *s;
+ struct conf_option *opt;
+
+ s = conf_section_find(conf, section);
+ if (s == NULL) {
+ return NULL;
+ }
+
+ opt = conf_option_find(s, key);
+ return opt;
+}
+
+void conf_assign_string_pointer(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ const char **str_ptr)
+{
+ struct conf_option *opt;
+ union conf_pointer ptr;
+
+ opt = _conf_option(conf, section, key);
+ if (opt == NULL) {
+ D_ERR("conf: unknown option [%s] -> \"%s\"\n", section, key);
+ conf->define_failed = true;
+ return;
+ }
+
+ if (opt->type != CONF_STRING) {
+ conf->define_failed = true;
+ return;
+ }
+
+ ptr.string = str_ptr;
+ conf_option_set_ptr(opt, &ptr);
+ conf_option_set_ptr_value(opt);
+}
+
+void conf_assign_integer_pointer(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ int *int_ptr)
+{
+ struct conf_option *opt;
+ union conf_pointer ptr;
+
+ opt = _conf_option(conf, section, key);
+ if (opt == NULL) {
+ D_ERR("conf: unknown option [%s] -> \"%s\"\n", section, key);
+ conf->define_failed = true;
+ return;
+ }
+
+ if (opt->type != CONF_INTEGER) {
+ conf->define_failed = true;
+ return;
+ }
+
+ ptr.integer = int_ptr;
+ conf_option_set_ptr(opt, &ptr);
+ conf_option_set_ptr_value(opt);
+}
+
+void conf_assign_boolean_pointer(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ bool *bool_ptr)
+{
+ struct conf_option *opt;
+ union conf_pointer ptr;
+
+ opt = _conf_option(conf, section, key);
+ if (opt == NULL) {
+ D_ERR("conf: unknown option [%s] -> \"%s\"\n", section, key);
+ conf->define_failed = true;
+ return;
+ }
+
+ if (opt->type != CONF_BOOLEAN) {
+ conf->define_failed = true;
+ return;
+ }
+
+ ptr.boolean = bool_ptr;
+ conf_option_set_ptr(opt, &ptr);
+ conf_option_set_ptr_value(opt);
+}
+
+bool conf_query(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ enum conf_type *type)
+{
+ struct conf_section *s;
+ struct conf_option *opt;
+
+ if (! conf_valid(conf)) {
+ return false;
+ }
+
+ s = conf_section_find(conf, section);
+ if (s == NULL) {
+ return false;
+ }
+
+ opt = conf_option_find(s, key);
+ if (opt == NULL) {
+ return false;
+ }
+
+ if (type != NULL) {
+ *type = opt->type;
+ }
+ return true;
+}
+
+bool conf_valid(struct conf_context *conf)
+{
+ if (conf->define_failed) {
+ return false;
+ }
+
+ return true;
+}
+
+void conf_set_defaults(struct conf_context *conf)
+{
+ conf_all_default(conf);
+}
+
+struct conf_load_state {
+ struct conf_context *conf;
+ struct conf_section *s;
+ enum conf_update_mode mode;
+ int err;
+};
+
+static bool conf_load_section(const char *section, void *private_data);
+static bool conf_load_option(const char *name,
+ const char *value_str,
+ void *private_data);
+
+static int conf_load_internal(struct conf_context *conf)
+{
+ struct conf_load_state state;
+ FILE *fp;
+ int ret;
+ bool ok;
+
+ state = (struct conf_load_state) {
+ .conf = conf,
+ .mode = (conf->reload ? CONF_MODE_RELOAD : CONF_MODE_LOAD),
+ };
+
+ ret = conf_all_temporary_default(conf, state.mode);
+ if (ret != 0) {
+ return ret;
+ }
+
+ fp = fopen(conf->filename, "r");
+ if (fp == NULL) {
+ return errno;
+ }
+
+ ok = tini_parse(fp,
+ false,
+ conf_load_section,
+ conf_load_option,
+ &state);
+ fclose(fp);
+ if (!ok) {
+ goto fail;
+ }
+
+ /* Process the last section */
+ if (state.s != NULL) {
+ ok = conf_section_validate(conf, state.s, state.mode);
+ if (!ok) {
+ state.err = EINVAL;
+ goto fail;
+ }
+ }
+
+ if (state.err != 0) {
+ goto fail;
+ }
+
+ conf_all_update(conf);
+ return 0;
+
+fail:
+ conf_all_reset(conf);
+ return state.err;
+}
+
+static bool conf_load_section(const char *section, void *private_data)
+{
+ struct conf_load_state *state =
+ (struct conf_load_state *)private_data;
+ bool ok;
+
+ if (state->s != NULL) {
+ ok = conf_section_validate(state->conf, state->s, state->mode);
+ if (!ok) {
+ state->err = EINVAL;
+ return true;
+ }
+ }
+
+ state->s = conf_section_find(state->conf, section);
+ if (state->s == NULL) {
+ if (state->conf->ignore_unknown) {
+ D_DEBUG("conf: ignoring unknown section [%s]\n",
+ section);
+ } else {
+ D_ERR("conf: unknown section [%s]\n", section);
+ state->err = EINVAL;
+ return true;
+ }
+ }
+
+ return true;
+}
+
+static bool conf_load_option(const char *name,
+ const char *value_str,
+ void *private_data)
+{
+ struct conf_load_state *state =
+ (struct conf_load_state *)private_data;
+ struct conf_option *opt;
+ TALLOC_CTX *tmp_ctx;
+ struct conf_value value;
+ int ret;
+ bool ok;
+
+ if (state->s == NULL) {
+ if (state->conf->ignore_unknown) {
+ D_DEBUG("conf: unknown section for option \"%s\"\n",
+ name);
+ return true;
+ } else {
+ D_ERR("conf: unknown section for option \"%s\"\n",
+ name);
+ state->err = EINVAL;
+ return true;
+ }
+ }
+
+ opt = conf_option_find(state->s, name);
+ if (opt == NULL) {
+ if (state->conf->ignore_unknown) {
+ D_DEBUG("conf: unknown option [%s] -> \"%s\"\n",
+ state->s->name,
+ name);
+ return true;
+ } else {
+ D_ERR("conf: unknown option [%s] -> \"%s\"\n",
+ state->s->name,
+ name);
+ state->err = EINVAL;
+ return true;
+ }
+ }
+
+ if (strlen(value_str) == 0) {
+ D_ERR("conf: empty value [%s] -> \"%s\"\n",
+ state->s->name,
+ name);
+ state->err = EINVAL;
+ return true;
+ }
+
+ tmp_ctx = talloc_new(state->conf);
+ if (tmp_ctx == NULL) {
+ state->err = ENOMEM;
+ return false;
+ }
+
+ value.type = opt->type;
+ ret = conf_value_from_string(tmp_ctx, value_str, &value);
+ if (ret != 0) {
+ D_ERR("conf: invalid value [%s] -> \"%s\" = \"%s\"\n",
+ state->s->name,
+ name,
+ value_str);
+ talloc_free(tmp_ctx);
+ state->err = ret;
+ return true;
+ }
+
+ ok = conf_option_same_value(opt, &value);
+ if (ok) {
+ goto done;
+ }
+
+ ret = conf_option_new_value(opt, &value, state->mode);
+ if (ret != 0) {
+ talloc_free(tmp_ctx);
+ state->err = ret;
+ return true;
+ }
+
+done:
+ talloc_free(tmp_ctx);
+ return true;
+
+}
+
+int conf_load(struct conf_context *conf,
+ const char *filename,
+ bool ignore_unknown)
+{
+ conf->filename = talloc_strdup(conf, filename);
+ if (conf->filename == NULL) {
+ return ENOMEM;
+ }
+
+ conf->ignore_unknown = ignore_unknown;
+
+ D_NOTICE("Reading config file %s\n", filename);
+
+ return conf_load_internal(conf);
+}
+
+int conf_reload(struct conf_context *conf)
+{
+ int ret;
+
+ if (conf->filename == NULL) {
+ return EPERM;
+ }
+
+ D_NOTICE("Re-reading config file %s\n", conf->filename);
+
+ conf->reload = true;
+ ret = conf_load_internal(conf);
+ conf->reload = false;
+
+ return ret;
+}
+
+static int conf_set(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ struct conf_value *value)
+{
+ struct conf_section *s;
+ struct conf_option *opt;
+ int ret;
+ bool ok;
+
+ s = conf_section_find(conf, section);
+ if (s == NULL) {
+ return EINVAL;
+ }
+
+ opt = conf_option_find(s, key);
+ if (opt == NULL) {
+ return EINVAL;
+ }
+
+ if (opt->type != value->type) {
+ return EINVAL;
+ }
+
+ ok = conf_option_same_value(opt, value);
+ if (ok) {
+ return 0;
+ }
+
+ ret = conf_option_new_value(opt, value, CONF_MODE_API);
+ if (ret != 0) {
+ conf_option_reset(opt);
+ return ret;
+ }
+
+ ok = conf_section_validate(conf, s, CONF_MODE_API);
+ if (!ok) {
+ conf_option_reset(opt);
+ return EINVAL;
+ }
+
+ conf_option_update(opt);
+ return 0;
+}
+
+int conf_set_string(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ const char *str_val)
+{
+ struct conf_value value;
+
+ value.type = CONF_STRING;
+ value.data.string = str_val;
+
+ return conf_set(conf, section, key, &value);
+}
+
+int conf_set_integer(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ int int_val)
+{
+ struct conf_value value;
+
+ value.type = CONF_INTEGER;
+ value.data.integer = int_val;
+
+ return conf_set(conf, section, key, &value);
+}
+
+int conf_set_boolean(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ bool bool_val)
+{
+ struct conf_value value;
+
+ value.type = CONF_BOOLEAN;
+ value.data.boolean = bool_val;
+
+ return conf_set(conf, section, key, &value);
+}
+
+static int conf_get(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ enum conf_type type,
+ const struct conf_value **value,
+ bool *is_default)
+{
+ struct conf_section *s;
+ struct conf_option *opt;
+
+ s = conf_section_find(conf, section);
+ if (s == NULL) {
+ return EINVAL;
+ }
+
+ opt = conf_option_find(s, key);
+ if (opt == NULL) {
+ return EINVAL;
+ }
+
+ if (opt->type != type) {
+ return EINVAL;
+ }
+
+ *value = opt->value;
+ if (is_default != NULL) {
+ *is_default = conf_option_is_default(opt);
+ }
+
+ return 0;
+}
+
+int conf_get_string(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ const char **str_val,
+ bool *is_default)
+{
+ const struct conf_value *value;
+ int ret;
+
+ ret = conf_get(conf, section, key, CONF_STRING, &value, is_default);
+ if (ret != 0) {
+ return ret;
+ }
+
+ *str_val = value->data.string;
+ return 0;
+}
+
+int conf_get_integer(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ int *int_val,
+ bool *is_default)
+{
+ const struct conf_value *value;
+ int ret;
+
+ ret = conf_get(conf, section, key, CONF_INTEGER, &value, is_default);
+ if (ret != 0) {
+ return ret;
+ }
+
+ *int_val = value->data.integer;
+ return 0;
+}
+
+int conf_get_boolean(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ bool *bool_val,
+ bool *is_default)
+{
+ const struct conf_value *value;
+ int ret;
+
+ ret = conf_get(conf, section, key, CONF_BOOLEAN, &value, is_default);
+ if (ret != 0) {
+ return ret;
+ }
+
+ *bool_val = value->data.boolean;
+ return 0;
+}
+
+void conf_dump(struct conf_context *conf, FILE *fp)
+{
+ struct conf_section *s;
+ struct conf_option *opt;
+
+ for (s = conf->section; s != NULL; s = s->next) {
+ conf_section_dump(s, fp);
+ for (opt = s->option; opt != NULL; opt = opt->next) {
+ conf_option_dump(opt, fp);
+ }
+ }
+}
diff --git a/ctdb/common/conf.h b/ctdb/common/conf.h
new file mode 100644
index 0000000..4dbf9c3
--- /dev/null
+++ b/ctdb/common/conf.h
@@ -0,0 +1,473 @@
+/*
+ Configuration file handling on top of tini
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_CONF_H__
+#define __CTDB_CONF_H__
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <talloc.h>
+
+/**
+ * @file conf.h
+ *
+ * @brief Configuration file handling with sections and key-value pairs
+ *
+ * CTDB settings can be written in a configuration file ctdb.conf (similar to
+ * samba's smb.conf). Various daemons and tools will consult the configuration
+ * file for runtime settings.
+ *
+ * The configuration will be organized in sections depending on various
+ * components. Each section will have various configuration options in the form
+ * of key-value pairs.
+ *
+ * [section1]
+ * key1 = value1
+ * ...
+ *
+ * [section2]
+ * key2 = value2
+ * ...
+ *
+ * ...
+ *
+ */
+
+/**
+ * @brief Abstract data structure holding the configuration options
+ */
+struct conf_context;
+
+/**
+ * @brief configuration option update mode
+ *
+ * When a value of configuration option is changed, update mode is set
+ * appropriately.
+ *
+ * CONF_MODE_API - value modified using set functions
+ * CONF_MODE_LOAD - value modified via conf_load
+ * CONF_MODE_RELOAD - value modified via conf_reload
+ */
+enum conf_update_mode {
+ CONF_MODE_API,
+ CONF_MODE_LOAD,
+ CONF_MODE_RELOAD,
+};
+
+/**
+ * @brief configuration option type
+ */
+enum conf_type {
+ CONF_STRING,
+ CONF_INTEGER,
+ CONF_BOOLEAN,
+};
+
+/**
+ * @brief Configuration section validation function
+ *
+ * Check if all the configuration options are consistent with each-other
+ */
+typedef bool (*conf_validate_section_fn)(struct conf_context *conf,
+ const char *section,
+ enum conf_update_mode mode);
+
+/**
+ * @brief Configuration option validation function for string
+ *
+ * Check if a configuration option value is valid
+ */
+typedef bool (*conf_validate_string_option_fn)(const char *key,
+ const char *old_value,
+ const char *new_value,
+ enum conf_update_mode mode);
+
+/**
+ * @brief Configuration option validation function for integer
+ *
+ * Check if a configuration option value is valid
+ */
+typedef bool (*conf_validate_integer_option_fn)(const char *key,
+ int old_value,
+ int new_value,
+ enum conf_update_mode mode);
+
+/**
+ * @brief Configuration option validation function for boolean
+ *
+ * Check if a configuration option value is valid
+ */
+typedef bool (*conf_validate_boolean_option_fn)(const char *key,
+ bool old_value,
+ bool new_value,
+ enum conf_update_mode mode);
+
+/**
+ * @brief Initialize configuration option database
+ *
+ * This return a new configuration options context. Freeing this context will
+ * free up all the memory associated with the configuration options.
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] result The new configuration options context
+ * @return 0 on success, errno on failure
+ */
+int conf_init(TALLOC_CTX *mem_ctx, struct conf_context **result);
+
+/**
+ * @brief Define a section for organizing configuration options
+ *
+ * This functions creates a section to organize configuration option. The
+ * section names are case-insensitive and are always stored in lower case.
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] section The name of the section
+ * @param[in] validate The validation function for configuration options
+ */
+void conf_define_section(struct conf_context *conf,
+ const char *section,
+ conf_validate_section_fn validate);
+
+/**
+ * @brief Define a configuration option which has a string value
+ *
+ * This functions adds a new configuration option organized under a given
+ * section. Configuration options are case-insensitive and are always stored
+ * in lower case.
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] section The name of the section
+ * @param[in] key The name of the configuration option
+ * @param[in] default_value The default value for the configuration option
+ * @param[in] validate The validation function for the configuration option
+ */
+void conf_define_string(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ const char *default_value,
+ conf_validate_string_option_fn validate);
+
+/**
+ * @brief Define a configuration option which has an integer value
+ *
+ * This functions adds a new configuration option organized under a given
+ * section. Configuration options are case-insensitive and are always stored
+ * in lower case.
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] section The name of the section
+ * @param[in] key The name of the configuration option
+ * @param[in] default_value The default value for the configuration option
+ * @param[in] validate The validation function for the configuration option
+ */
+void conf_define_integer(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ const int default_value,
+ conf_validate_integer_option_fn validate);
+
+/**
+ * @brief Define a configuration option which has an boolean value
+ *
+ * This functions adds a new configuration option organized under a given
+ * section. Configuration options are case-insensitive and are always stored
+ * in lower case.
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] section The name of the section
+ * @param[in] key The name of the configuration option
+ * @param[in] default_value The default value for the configuration option
+ * @param[in] validate The validation function for the configuration option
+ */
+void conf_define_boolean(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ const bool default_value,
+ conf_validate_boolean_option_fn validate);
+
+/**
+ * @brief Assign user-accessible pointer for string option
+ *
+ * This pointer can be used for accessing the value of configuration option
+ * directly without requiring a function call.
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] section The name of the section
+ * @param[in] key The name of the configuration option
+ * @param[in] ptr User-accessible pointer to the value
+ */
+void conf_assign_string_pointer(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ const char **ptr);
+
+/**
+ * @brief Assign user-accessible pointer for integer option
+ *
+ * This pointer can be used for accessing the value of configuration option
+ * directly without requiring a function call.
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] section The name of the section
+ * @param[in] key The name of the configuration option
+ * @param[in] ptr User-accessible pointer to the value
+ */
+void conf_assign_integer_pointer(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ int *ptr);
+
+/**
+ * @brief Assign user-accessible pointer for boolean option
+ *
+ * This pointer can be used for accessing the value of configuration option
+ * directly without requiring a function call.
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] section The name of the section
+ * @param[in] key The name of the configuration option
+ * @param[in] ptr User-accessible pointer to the value
+ * @return true on success, false on failure
+ */
+void conf_assign_boolean_pointer(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ bool *ptr);
+
+/**
+ * @brief Query a configuration option
+ *
+ * This function checks if a configuration option is defined or not.
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] section The name of the section
+ * @param[in] key The name of the configuration option
+ * @param[out] type The type of the configuration option
+ * @return true on success, false if section/option is not defined
+ */
+bool conf_query(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ enum conf_type *type);
+
+/**
+ * @brief Check if the defined configuration options are valid
+ *
+ * This function must be called after creating configuration options
+ * to confirm that all the option definitions are valid.
+ *
+ * @param[in] conf The configuration options context
+ * @return true on success, false on failure
+ */
+bool conf_valid(struct conf_context *conf);
+
+/**
+ * @brief Set the default values for all configuration options
+ *
+ * This function resets all the configuration options to their default values.
+ *
+ * @param[in] conf The connfiguration options context
+ */
+void conf_set_defaults(struct conf_context *conf);
+
+/**
+ * @brief Load the values for configuration option values from a file
+ *
+ * This function will update the values of the configuration options from those
+ * specified in a file. This function will fail in case it encounters an
+ * undefined option. Any sections which are not defined, will be ignored.
+ *
+ * This function will call validation function (if specified) before updating
+ * the value of a configuration option. After updating all the values for a
+ * section, the validation for section (if specified) will be called. If any
+ * of the validation functions return error, then all the configuration
+ * options will be reset to their previous values.
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] filename The configuration file
+ * @param[in] skip_unknown Whether unknown config options should be ignored
+ * @return 0 on success, errno on failure
+ */
+int conf_load(struct conf_context *conf,
+ const char *filename,
+ bool ignore_unknown);
+
+/**
+ * @brief Reload the values for configuration options
+ *
+ * This function will re-load the values of the configuration options. This
+ * function can be called only after successful call to conf_load().
+ *
+ * @see conf_load
+ *
+ * @param[in] conf The configuration options context
+ * @return 0 on success, errno on failure.
+ */
+int conf_reload(struct conf_context *conf);
+
+/**
+ * @brief Set the string value of a configuration option
+ *
+ * This function can be used to update the value of a configuration option.
+ * This will call the validation function for that option (if defined) and
+ * the section validation function (if defined).
+ *
+ * If a user-defined storage pointer is provided, then the value of a
+ * configuration option should not be changed via that pointer.
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] section The name of a section
+ * @param[in] key The name of a configuration option
+ * @param[in] str_val The string value
+ * @return 0 on success, errno in case of failure
+ */
+int conf_set_string(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ const char *str_val);
+
+/**
+ * @brief Set the integer value of a configuration option
+ *
+ * This function can be used to update the value of a configuration option.
+ * This will call the validation function for that option (if defined) and
+ * the section validation function (if defined).
+ *
+ * If a user-defined storage pointer is provided, then the value of a
+ * configuration option should not be changed via that pointer.
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] section The name of a section
+ * @param[in] key The name of a configuration option
+ * @param[in] int_val The integer value
+ * @return 0 on success, errno in case of failure
+ */
+int conf_set_integer(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ int int_val);
+
+/**
+ * @brief Set the boolean value of a configuration option
+ *
+ * This function can be used to update the value of a configuration option.
+ * This will call the validation function for that option (if defined) and
+ * the section validation function (if defined).
+ *
+ * If a user-defined storage pointer is provided, then the value of a
+ * configuration option should not be changed via that pointer.
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] section The name of a section
+ * @param[in] key The name of a configuration option
+ * @param[in] bool_val The boolean value
+ * @return 0 on success, errno in case of failure
+ */
+int conf_set_boolean(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ bool bool_val);
+
+/**
+ * @brief Get the string value of a configuration option
+ *
+ * This function can be used to fetch the current value of a configuration
+ * option.
+ *
+ * If a user-defined storage pointer is provided, then the value of a
+ * configuration option can be accessed directly via that pointer.
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] section The name of a section
+ * @param[in] key The name of a configuration option
+ * @param[out] str_val The string value of the configuration option
+ * @param[out] is_default True if the value is default value
+ * @return 0 on success, errno in case of failure
+ */
+int conf_get_string(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ const char **str_val,
+ bool *is_default);
+
+/**
+ * @brief Get the integer value of a configuration option
+ *
+ * This function can be used to fetch the current value of a configuration
+ * option.
+ *
+ * If a user-defined storage pointer is provided, then the value of a
+ * configuration option can be accessed directly via that pointer.
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] section The name of a section
+ * @param[in] key The name of a configuration option
+ * @param[out] int_val The integer value of the configuration option
+ * @param[out] is_default True if the value is default value
+ * @return 0 on success, errno in case of failure
+ */
+int conf_get_integer(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ int *int_val,
+ bool *is_default);
+
+/**
+ * @brief Get the boolean value of a configuration option
+ *
+ * This function can be used to fetch the current value of a configuration
+ * option.
+ *
+ * If a user-defined storage pointer is provided, then the value of a
+ * configuration option can be accessed directly via that pointer.
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] section The name of a section
+ * @param[in] key The name of a configuration option
+ * @param[out] bool_val The boolean value of the configuration option
+ * @param[out] is_default True if the value is default value
+ * @return 0 on success, errno in case of failure
+ */
+int conf_get_boolean(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ bool *bool_val,
+ bool *is_default);
+
+/**
+ * @brief Dump the configuration in a file
+ *
+ * All the configuration options are dumped with their current values.
+ * If an option has a default value, then it is commented.
+ *
+ * Here is a sample output:
+ *
+ * [section1]
+ * key1 = value1
+ * key2 = value2
+ * # key3 = default_value3
+ * [section2]
+ * key4 = value4
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] fp File pointer
+ */
+void conf_dump(struct conf_context *conf, FILE *fp);
+
+#endif /* __CTDB_CONF_H__ */
diff --git a/ctdb/common/conf_tool.c b/ctdb/common/conf_tool.c
new file mode 100644
index 0000000..2d0543d
--- /dev/null
+++ b/ctdb/common/conf_tool.c
@@ -0,0 +1,321 @@
+/*
+ Config options tool
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <talloc.h>
+
+#include "lib/util/debug.h"
+
+#include "common/logging.h"
+#include "common/cmdline.h"
+#include "common/conf.h"
+#include "common/path.h"
+
+#include "common/logging_conf.h"
+#include "cluster/cluster_conf.h"
+#include "database/database_conf.h"
+#include "event/event_conf.h"
+#include "failover/failover_conf.h"
+#include "server/legacy_conf.h"
+
+#include "common/conf_tool.h"
+
+struct conf_tool_context {
+ struct cmdline_context *cmdline;
+ const char *conf_file;
+ struct conf_context *conf;
+};
+
+static int conf_tool_dump(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct conf_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct conf_tool_context);
+ int ret;
+
+ if (argc != 0) {
+ cmdline_usage(ctx->cmdline, "dump");
+ return EINVAL;
+ }
+
+ ret = conf_load(ctx->conf, ctx->conf_file, true);
+ if (ret != 0 && ret != ENOENT) {
+ D_ERR("Failed to load config file %s\n", ctx->conf_file);
+ return ret;
+ }
+
+ conf_dump(ctx->conf, stdout);
+ return 0;
+}
+
+static int conf_tool_get(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct conf_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct conf_tool_context);
+ const char *section, *option;
+ enum conf_type type;
+ int ret;
+ bool ok;
+ const char *s_val = NULL;
+ int i_val;
+ bool b_val;
+
+ if (argc != 2) {
+ cmdline_usage(ctx->cmdline, "get");
+ return EINVAL;
+ }
+
+ section = argv[0];
+ option = argv[1];
+
+ ok = conf_query(ctx->conf, section, option, &type);
+ if (!ok) {
+ D_ERR("Configuration option [%s] -> \"%s\" not defined\n",
+ section, option);
+ return ENOENT;
+ }
+
+ ret = conf_load(ctx->conf, ctx->conf_file, true);
+ if (ret != 0 && ret != ENOENT) {
+ D_ERR("Failed to load config file %s\n", ctx->conf_file);
+ return ret;
+ }
+
+ switch (type) {
+ case CONF_STRING:
+ ret = conf_get_string(ctx->conf,
+ section,
+ option,
+ &s_val,
+ NULL);
+ break;
+
+ case CONF_INTEGER:
+ ret = conf_get_integer(ctx->conf,
+ section,
+ option,
+ &i_val,
+ NULL);
+ break;
+
+ case CONF_BOOLEAN:
+ ret = conf_get_boolean(ctx->conf,
+ section,
+ option,
+ &b_val,
+ NULL);
+ break;
+
+ default:
+ D_ERR("Unknown configuration option type\n");
+ return EINVAL;
+ }
+
+ if (ret != 0) {
+ D_ERR("Failed to get configuration option value\n");
+ return ret;
+ }
+
+ switch (type) {
+ case CONF_STRING:
+ printf("%s\n", s_val == NULL ? "" : s_val);
+ break;
+
+ case CONF_INTEGER:
+ printf("%d\n", i_val);
+ break;
+
+ case CONF_BOOLEAN:
+ printf("%s\n", b_val ? "true" : "false");
+ break;
+ }
+
+ return 0;
+}
+
+static int conf_tool_validate(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct conf_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct conf_tool_context);
+ int ret;
+
+ if (argc != 0) {
+ cmdline_usage(ctx->cmdline, "validate");
+ return EINVAL;
+ }
+
+ ret = conf_load(ctx->conf, ctx->conf_file, false);
+ if (ret != 0) {
+ D_ERR("Failed to load config file %s\n", ctx->conf_file);
+ return ret;
+ }
+
+ return 0;
+}
+
+struct cmdline_command conf_commands[] = {
+ { "dump", conf_tool_dump,
+ "Dump configuration", NULL },
+ { "get", conf_tool_get,
+ "Get a config value", "<section> <key>" },
+ { "validate", conf_tool_validate,
+ "Validate configuration file", NULL },
+ CMDLINE_TABLEEND
+};
+
+int conf_tool_init(TALLOC_CTX *mem_ctx,
+ const char *prog,
+ struct poptOption *options,
+ int argc,
+ const char **argv,
+ bool parse_options,
+ struct conf_tool_context **result)
+{
+ struct conf_tool_context *ctx;
+ int ret;
+
+ ctx = talloc_zero(mem_ctx, struct conf_tool_context);
+ if (ctx == NULL) {
+ D_ERR("Memory allocation error\n");
+ return ENOMEM;
+ }
+
+ ret = cmdline_init(ctx,
+ prog,
+ options,
+ NULL,
+ conf_commands,
+ &ctx->cmdline);
+ if (ret != 0) {
+ D_ERR("Failed to initialize cmdline, ret=%d\n", ret);
+ talloc_free(ctx);
+ return ret;
+ }
+
+ ret = cmdline_parse(ctx->cmdline, argc, argv, parse_options);
+ if (ret != 0) {
+ cmdline_usage(ctx->cmdline, NULL);
+ talloc_free(ctx);
+ return ret;
+ }
+
+ *result = ctx;
+ return 0;
+}
+
+int conf_tool_run(struct conf_tool_context *ctx, int *result)
+{
+ int ret;
+
+ ctx->conf_file = path_config(ctx);
+ if (ctx->conf_file == NULL) {
+ D_ERR("Memory allocation error\n");
+ return ENOMEM;
+ }
+
+ ret = conf_init(ctx, &ctx->conf);
+ if (ret != 0) {
+ D_ERR("Failed to initialize config\n");
+ return ret;
+ }
+
+ /* Call functions to initialize config sections/variables */
+ logging_conf_init(ctx->conf, NULL);
+ cluster_conf_init(ctx->conf);
+ database_conf_init(ctx->conf);
+ event_conf_init(ctx->conf);
+ failover_conf_init(ctx->conf);
+ legacy_conf_init(ctx->conf);
+
+ if (! conf_valid(ctx->conf)) {
+ D_ERR("Failed to define configuration options\n");
+ return EINVAL;
+ }
+
+ ret = cmdline_run(ctx->cmdline, ctx, result);
+ return ret;
+}
+
+#ifdef CTDB_CONF_TOOL
+
+static struct {
+ const char *debug;
+} conf_data = {
+ .debug = "ERROR",
+};
+
+struct poptOption conf_options[] = {
+ POPT_AUTOHELP
+ { "debug", 'd', POPT_ARG_STRING, &conf_data.debug, 0,
+ "debug level", "ERROR|WARNING|NOTICE|INFO|DEBUG" },
+ POPT_TABLEEND
+};
+
+int main(int argc, const char **argv)
+{
+ TALLOC_CTX *mem_ctx;
+ struct conf_tool_context *ctx;
+ int ret, result;
+ int level;
+ bool ok;
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ret = conf_tool_init(mem_ctx,
+ "ctdb-config",
+ conf_options,
+ argc,
+ argv,
+ true,
+ &ctx);
+ if (ret != 0) {
+ talloc_free(mem_ctx);
+ exit(1);
+ }
+
+ setup_logging("ctdb-config", DEBUG_STDERR);
+ ok = debug_level_parse(conf_data.debug, &level);
+ if (!ok) {
+ level = DEBUG_ERR;
+ }
+ debuglevel_set(level);
+
+ ret = conf_tool_run(ctx, &result);
+ if (ret != 0) {
+ result = 1;
+ }
+
+ talloc_free(mem_ctx);
+ exit(result);
+}
+
+#endif /* CTDB_CONF_TOOL */
diff --git a/ctdb/common/conf_tool.h b/ctdb/common/conf_tool.h
new file mode 100644
index 0000000..c77419f
--- /dev/null
+++ b/ctdb/common/conf_tool.h
@@ -0,0 +1,39 @@
+/*
+ Config options tool
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_CONF_TOOL_H__
+#define __CTDB_CONF_TOOL_H__
+
+#include <stdbool.h>
+#include <popt.h>
+#include <talloc.h>
+
+struct conf_tool_context;
+
+int conf_tool_init(TALLOC_CTX *mem_ctx,
+ const char *prog,
+ struct poptOption *options,
+ int argc,
+ const char **argv,
+ bool parse_options,
+ struct conf_tool_context **result);
+
+int conf_tool_run(struct conf_tool_context *ctx, int *result);
+
+#endif /* __CTDB_CONF_TOOL_H__ */
diff --git a/ctdb/common/ctdb_io.c b/ctdb/common/ctdb_io.c
new file mode 100644
index 0000000..bf8bc73
--- /dev/null
+++ b/ctdb/common/ctdb_io.c
@@ -0,0 +1,498 @@
+/*
+ ctdb database library
+ Utility functions to read/write blobs of data from a file descriptor
+ and handle the case where we might need multiple read/writes to get all the
+ data.
+
+ Copyright (C) Andrew Tridgell 2006
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+
+#include <tdb.h>
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/dlinklist.h"
+#include "lib/util/debug.h"
+#include "lib/util/sys_rw.h"
+
+#include "ctdb_private.h"
+#include "ctdb_client.h"
+
+#include "common/logging.h"
+#include "common/common.h"
+
+/* structures for packet queueing - see common/ctdb_io.c */
+struct ctdb_buffer {
+ uint8_t *data;
+ uint32_t length;
+ uint32_t size;
+ uint32_t offset;
+};
+
+struct ctdb_queue_pkt {
+ struct ctdb_queue_pkt *next, *prev;
+ uint8_t *data;
+ uint32_t length;
+ uint32_t full_length;
+ uint8_t buf[];
+};
+
+struct ctdb_queue {
+ struct ctdb_context *ctdb;
+ struct tevent_immediate *im;
+ struct ctdb_buffer buffer; /* input buffer */
+ struct ctdb_queue_pkt *out_queue, *out_queue_tail;
+ uint32_t out_queue_length;
+ struct tevent_fd *fde;
+ int fd;
+ size_t alignment;
+ void *private_data;
+ ctdb_queue_cb_fn_t callback;
+ TALLOC_CTX *data_pool;
+ const char *name;
+ uint32_t buffer_size;
+};
+
+
+
+uint32_t ctdb_queue_length(struct ctdb_queue *queue)
+{
+ return queue->out_queue_length;
+}
+
+static void queue_process(struct ctdb_queue *queue);
+
+static void queue_process_event(struct tevent_context *ev, struct tevent_immediate *im,
+ void *private_data)
+{
+ struct ctdb_queue *queue = talloc_get_type(private_data, struct ctdb_queue);
+
+ queue_process(queue);
+}
+
+/*
+ * This function is used to process data in queue buffer.
+ *
+ * Queue callback function can end up freeing the queue, there should not be a
+ * loop processing packets from queue buffer. Instead set up a timed event for
+ * immediate run to process remaining packets from buffer.
+ */
+static void queue_process(struct ctdb_queue *queue)
+{
+ uint32_t pkt_size;
+ uint8_t *data = NULL;
+
+ if (queue->buffer.length < sizeof(pkt_size)) {
+ return;
+ }
+
+ /* Did we at least read the size into the buffer */
+ pkt_size = *(uint32_t *)(queue->buffer.data + queue->buffer.offset);
+ if (pkt_size == 0) {
+ DEBUG(DEBUG_CRIT, ("Invalid packet of length 0\n"));
+ goto failed;
+ }
+
+ /* the buffer doesn't contain the full packet, return to get the rest */
+ if (queue->buffer.length < pkt_size) {
+ return;
+ }
+
+ /* Extract complete packet */
+ data = talloc_memdup(queue->data_pool,
+ queue->buffer.data + queue->buffer.offset,
+ pkt_size);
+
+ if (data == NULL) {
+ D_ERR("read error alloc failed for %u\n", pkt_size);
+ return;
+ }
+
+ queue->buffer.offset += pkt_size;
+ queue->buffer.length -= pkt_size;
+
+ if (queue->buffer.offset < pkt_size ||
+ queue->buffer.offset > queue->buffer.size) {
+ D_ERR("buffer offset overflow\n");
+ TALLOC_FREE(queue->buffer.data);
+ memset(&queue->buffer, 0, sizeof(queue->buffer));
+ goto failed;
+ }
+
+ if (queue->buffer.length > 0) {
+ /* There is more data to be processed, schedule an event */
+ tevent_schedule_immediate(queue->im, queue->ctdb->ev,
+ queue_process_event, queue);
+ } else {
+ if (queue->buffer.size > queue->buffer_size) {
+ TALLOC_FREE(queue->buffer.data);
+ queue->buffer.size = 0;
+ }
+ queue->buffer.offset = 0;
+ }
+
+ /* It is the responsibility of the callback to free 'data' */
+ queue->callback(data, pkt_size, queue->private_data);
+ return;
+
+failed:
+ queue->callback(NULL, 0, queue->private_data);
+}
+
+/*
+ called when an incoming connection is readable
+ This function MUST be safe for reentry via the queue callback!
+*/
+static void queue_io_read(struct ctdb_queue *queue)
+{
+ int num_ready = 0;
+ uint32_t pkt_size = 0;
+ uint32_t start_offset;
+ ssize_t nread;
+ uint8_t *data;
+
+ /* check how much data is available on the socket for immediately
+ guaranteed nonblocking access.
+ as long as we are careful never to try to read more than this
+ we know all reads will be successful and will neither block
+ nor fail with a "data not available right now" error
+ */
+ if (ioctl(queue->fd, FIONREAD, &num_ready) != 0) {
+ return;
+ }
+ if (num_ready == 0) {
+ /* the descriptor has been closed */
+ goto failed;
+ }
+
+ if (queue->buffer.data == NULL) {
+ /* starting fresh, allocate buf to read data */
+ queue->buffer.data = talloc_size(queue, queue->buffer_size);
+ if (queue->buffer.data == NULL) {
+ DEBUG(DEBUG_ERR, ("read error alloc failed for %u\n", num_ready));
+ goto failed;
+ }
+ queue->buffer.size = queue->buffer_size;
+ goto data_read;
+ }
+
+ if (sizeof(pkt_size) > queue->buffer.length) {
+ /* data read is not sufficient to gather message size */
+ goto buffer_shift;
+ }
+
+ pkt_size = *(uint32_t *)(queue->buffer.data + queue->buffer.offset);
+ if (pkt_size > queue->buffer.size) {
+ data = talloc_realloc_size(queue,
+ queue->buffer.data,
+ pkt_size);
+ if (data == NULL) {
+ DBG_ERR("read error realloc failed for %u\n", pkt_size);
+ goto failed;
+ }
+ queue->buffer.data = data;
+ queue->buffer.size = pkt_size;
+ /* fall through here as we might need to move the data as well */
+ }
+
+buffer_shift:
+ if (sizeof(pkt_size) > queue->buffer.size - queue->buffer.offset ||
+ pkt_size > queue->buffer.size - queue->buffer.offset) {
+ /* Either the offset has progressed too far to host at least
+ * the size information or the remaining space in the buffer
+ * is not sufficient for the full message.
+ * Therefore, move the data and try again.
+ */
+ memmove(queue->buffer.data,
+ queue->buffer.data + queue->buffer.offset,
+ queue->buffer.length);
+ queue->buffer.offset = 0;
+ }
+
+data_read:
+ start_offset = queue->buffer.length + queue->buffer.offset;
+ if (start_offset < queue->buffer.length) {
+ DBG_ERR("Buffer overflow\n");
+ goto failed;
+ }
+ if (start_offset > queue->buffer.size) {
+ DBG_ERR("Buffer overflow\n");
+ goto failed;
+ }
+
+ num_ready = MIN(num_ready, queue->buffer.size - start_offset);
+
+ if (num_ready > 0) {
+ nread = sys_read(queue->fd,
+ queue->buffer.data +
+ queue->buffer.offset +
+ queue->buffer.length,
+ num_ready);
+ if (nread <= 0) {
+ DEBUG(DEBUG_ERR, ("read error nread=%d\n", (int)nread));
+ goto failed;
+ }
+ queue->buffer.length += nread;
+ }
+
+ queue_process(queue);
+ return;
+
+failed:
+ queue->callback(NULL, 0, queue->private_data);
+}
+
+
+/* used when an event triggers a dead queue */
+static void queue_dead(struct tevent_context *ev, struct tevent_immediate *im,
+ void *private_data)
+{
+ struct ctdb_queue *queue = talloc_get_type(private_data, struct ctdb_queue);
+ queue->callback(NULL, 0, queue->private_data);
+}
+
+
+/*
+ called when an incoming connection is writeable
+*/
+static void queue_io_write(struct ctdb_queue *queue)
+{
+ while (queue->out_queue) {
+ struct ctdb_queue_pkt *pkt = queue->out_queue;
+ ssize_t n;
+ if (queue->ctdb->flags & CTDB_FLAG_TORTURE) {
+ n = write(queue->fd, pkt->data, 1);
+ } else {
+ n = write(queue->fd, pkt->data, pkt->length);
+ }
+
+ if (n == -1 && errno != EAGAIN && errno != EWOULDBLOCK) {
+ if (pkt->length != pkt->full_length) {
+ /* partial packet sent - we have to drop it */
+ DLIST_REMOVE(queue->out_queue, pkt);
+ queue->out_queue_length--;
+ talloc_free(pkt);
+ }
+ TALLOC_FREE(queue->fde);
+ queue->fd = -1;
+ tevent_schedule_immediate(queue->im, queue->ctdb->ev,
+ queue_dead, queue);
+ return;
+ }
+ if (n <= 0) return;
+
+ if (n != pkt->length) {
+ pkt->length -= n;
+ pkt->data += n;
+ return;
+ }
+
+ DLIST_REMOVE(queue->out_queue, pkt);
+ queue->out_queue_length--;
+ talloc_free(pkt);
+ }
+
+ TEVENT_FD_NOT_WRITEABLE(queue->fde);
+}
+
+/*
+ called when an incoming connection is readable or writeable
+*/
+static void queue_io_handler(struct tevent_context *ev, struct tevent_fd *fde,
+ uint16_t flags, void *private_data)
+{
+ struct ctdb_queue *queue = talloc_get_type(private_data, struct ctdb_queue);
+
+ if (flags & TEVENT_FD_READ) {
+ queue_io_read(queue);
+ } else {
+ queue_io_write(queue);
+ }
+}
+
+
+/*
+ queue a packet for sending
+*/
+int ctdb_queue_send(struct ctdb_queue *queue, uint8_t *data, uint32_t length)
+{
+ struct ctdb_req_header *hdr = (struct ctdb_req_header *)data;
+ struct ctdb_queue_pkt *pkt;
+ uint32_t length2, full_length;
+
+ /* If the queue does not have valid fd, no point queueing a packet */
+ if (queue->fd == -1) {
+ return 0;
+ }
+
+ if (queue->alignment) {
+ /* enforce the length and alignment rules from the tcp packet allocator */
+ length2 = (length+(queue->alignment-1)) & ~(queue->alignment-1);
+ *(uint32_t *)data = length2;
+ } else {
+ length2 = length;
+ }
+
+ if (length2 != length) {
+ memset(data+length, 0, length2-length);
+ }
+
+ full_length = length2;
+
+ /* if the queue is empty then try an immediate write, avoiding
+ queue overhead. This relies on non-blocking sockets */
+ if (queue->out_queue == NULL && queue->fd != -1 &&
+ !(queue->ctdb->flags & CTDB_FLAG_TORTURE)) {
+ ssize_t n = write(queue->fd, data, length2);
+ if (n == -1 && errno != EAGAIN && errno != EWOULDBLOCK) {
+ TALLOC_FREE(queue->fde);
+ queue->fd = -1;
+ tevent_schedule_immediate(queue->im, queue->ctdb->ev,
+ queue_dead, queue);
+ /* yes, we report success, as the dead node is
+ handled via a separate event */
+ return 0;
+ }
+ if (n > 0) {
+ data += n;
+ length2 -= n;
+ }
+ if (length2 == 0) return 0;
+ }
+
+ pkt = talloc_size(
+ queue, offsetof(struct ctdb_queue_pkt, buf) + length2);
+ CTDB_NO_MEMORY(queue->ctdb, pkt);
+ talloc_set_name_const(pkt, "struct ctdb_queue_pkt");
+
+ pkt->data = pkt->buf;
+ memcpy(pkt->data, data, length2);
+
+ pkt->length = length2;
+ pkt->full_length = full_length;
+
+ if (queue->out_queue == NULL && queue->fd != -1) {
+ TEVENT_FD_WRITEABLE(queue->fde);
+ }
+
+ DLIST_ADD_END(queue->out_queue, pkt);
+
+ queue->out_queue_length++;
+
+ if (queue->ctdb->tunable.verbose_memory_names != 0) {
+ switch (hdr->operation) {
+ case CTDB_REQ_CONTROL: {
+ struct ctdb_req_control_old *c = (struct ctdb_req_control_old *)hdr;
+ talloc_set_name(pkt, "ctdb_queue_pkt: %s control opcode=%u srvid=%llu datalen=%u",
+ queue->name, (unsigned)c->opcode, (unsigned long long)c->srvid, (unsigned)c->datalen);
+ break;
+ }
+ case CTDB_REQ_MESSAGE: {
+ struct ctdb_req_message_old *m = (struct ctdb_req_message_old *)hdr;
+ talloc_set_name(pkt, "ctdb_queue_pkt: %s message srvid=%llu datalen=%u",
+ queue->name, (unsigned long long)m->srvid, (unsigned)m->datalen);
+ break;
+ }
+ default:
+ talloc_set_name(pkt, "ctdb_queue_pkt: %s operation=%u length=%u src=%u dest=%u",
+ queue->name, (unsigned)hdr->operation, (unsigned)hdr->length,
+ (unsigned)hdr->srcnode, (unsigned)hdr->destnode);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+/*
+ setup the fd used by the queue
+ */
+int ctdb_queue_set_fd(struct ctdb_queue *queue, int fd)
+{
+ queue->fd = fd;
+ TALLOC_FREE(queue->fde);
+
+ if (fd != -1) {
+ queue->fde = tevent_add_fd(queue->ctdb->ev, queue, fd,
+ TEVENT_FD_READ,
+ queue_io_handler, queue);
+ if (queue->fde == NULL) {
+ return -1;
+ }
+ tevent_fd_set_auto_close(queue->fde);
+
+ if (queue->out_queue) {
+ TEVENT_FD_WRITEABLE(queue->fde);
+ }
+ }
+
+ return 0;
+}
+
+/*
+ setup a packet queue on a socket
+ */
+struct ctdb_queue *ctdb_queue_setup(struct ctdb_context *ctdb,
+ TALLOC_CTX *mem_ctx, int fd, int alignment,
+ ctdb_queue_cb_fn_t callback,
+ void *private_data, const char *fmt, ...)
+{
+ struct ctdb_queue *queue;
+ va_list ap;
+
+ queue = talloc_zero(mem_ctx, struct ctdb_queue);
+ CTDB_NO_MEMORY_NULL(ctdb, queue);
+ va_start(ap, fmt);
+ queue->name = talloc_vasprintf(mem_ctx, fmt, ap);
+ va_end(ap);
+ CTDB_NO_MEMORY_NULL(ctdb, queue->name);
+
+ queue->im= tevent_create_immediate(queue);
+ CTDB_NO_MEMORY_NULL(ctdb, queue->im);
+
+ queue->ctdb = ctdb;
+ queue->fd = fd;
+ queue->alignment = alignment;
+ queue->private_data = private_data;
+ queue->callback = callback;
+ if (fd != -1) {
+ if (ctdb_queue_set_fd(queue, fd) != 0) {
+ talloc_free(queue);
+ return NULL;
+ }
+ }
+
+ queue->buffer_size = ctdb->tunable.queue_buffer_size;
+ /* In client code, ctdb->tunable is not initialized.
+ * This does not affect recovery daemon.
+ */
+ if (queue->buffer_size == 0) {
+ queue->buffer_size = 1024;
+ }
+
+ queue->data_pool = talloc_pool(queue, queue->buffer_size);
+ if (queue->data_pool == NULL) {
+ TALLOC_FREE(queue);
+ return NULL;
+ }
+
+ return queue;
+}
diff --git a/ctdb/common/ctdb_ltdb.c b/ctdb/common/ctdb_ltdb.c
new file mode 100644
index 0000000..6634416
--- /dev/null
+++ b/ctdb/common/ctdb_ltdb.c
@@ -0,0 +1,430 @@
+/*
+ ctdb ltdb code
+
+ Copyright (C) Andrew Tridgell 2006
+ Copyright (C) Ronnie sahlberg 2011
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+
+#include <tdb.h>
+
+#include "lib/tdb_wrap/tdb_wrap.h"
+#include "lib/util/dlinklist.h"
+#include "lib/util/debug.h"
+
+#include "ctdb_private.h"
+
+#include "common/common.h"
+#include "common/logging.h"
+
+
+/*
+ * Calculate tdb flags based on database type
+ */
+int ctdb_db_tdb_flags(uint8_t db_flags, bool with_valgrind, bool with_mutex)
+{
+ int tdb_flags = 0;
+
+ if (db_flags & CTDB_DB_FLAGS_PERSISTENT) {
+ tdb_flags = TDB_DEFAULT;
+
+ } else if (db_flags & CTDB_DB_FLAGS_REPLICATED) {
+ tdb_flags = TDB_NOSYNC |
+ TDB_CLEAR_IF_FIRST |
+ TDB_INCOMPATIBLE_HASH;
+
+ } else {
+ tdb_flags = TDB_NOSYNC |
+ TDB_CLEAR_IF_FIRST |
+ TDB_INCOMPATIBLE_HASH;
+
+#ifdef TDB_MUTEX_LOCKING
+ if (with_mutex && tdb_runtime_check_for_robust_mutexes()) {
+ tdb_flags |= TDB_MUTEX_LOCKING;
+ }
+#endif
+
+ }
+
+ tdb_flags |= TDB_DISALLOW_NESTING;
+ if (with_valgrind) {
+ tdb_flags |= TDB_NOMMAP;
+ }
+
+ return tdb_flags;
+}
+
+/*
+ find an attached ctdb_db handle given a name
+ */
+struct ctdb_db_context *ctdb_db_handle(struct ctdb_context *ctdb, const char *name)
+{
+ struct ctdb_db_context *tmp_db;
+ for (tmp_db=ctdb->db_list;tmp_db;tmp_db=tmp_db->next) {
+ if (strcmp(name, tmp_db->db_name) == 0) {
+ return tmp_db;
+ }
+ }
+ return NULL;
+}
+
+bool ctdb_db_persistent(struct ctdb_db_context *ctdb_db)
+{
+ if (ctdb_db->db_flags & CTDB_DB_FLAGS_PERSISTENT) {
+ return true;
+ }
+ return false;
+}
+
+bool ctdb_db_replicated(struct ctdb_db_context *ctdb_db)
+{
+ if (ctdb_db->db_flags & CTDB_DB_FLAGS_REPLICATED) {
+ return true;
+ }
+ return false;
+}
+
+bool ctdb_db_volatile(struct ctdb_db_context *ctdb_db)
+{
+ if ((ctdb_db->db_flags & CTDB_DB_FLAGS_PERSISTENT) ||
+ (ctdb_db->db_flags & CTDB_DB_FLAGS_REPLICATED)) {
+ return false;
+ }
+ return true;
+}
+
+bool ctdb_db_readonly(struct ctdb_db_context *ctdb_db)
+{
+ if (ctdb_db->db_flags & CTDB_DB_FLAGS_READONLY) {
+ return true;
+ }
+ return false;
+}
+
+void ctdb_db_set_readonly(struct ctdb_db_context *ctdb_db)
+{
+ ctdb_db->db_flags |= CTDB_DB_FLAGS_READONLY;
+}
+
+void ctdb_db_reset_readonly(struct ctdb_db_context *ctdb_db)
+{
+ ctdb_db->db_flags &= ~CTDB_DB_FLAGS_READONLY;
+}
+
+bool ctdb_db_sticky(struct ctdb_db_context *ctdb_db)
+{
+ if (ctdb_db->db_flags & CTDB_DB_FLAGS_STICKY) {
+ return true;
+ }
+ return false;
+}
+
+void ctdb_db_set_sticky(struct ctdb_db_context *ctdb_db)
+{
+ ctdb_db->db_flags |= CTDB_DB_FLAGS_STICKY;
+}
+
+/*
+ return the lmaster given a key
+*/
+uint32_t ctdb_lmaster(struct ctdb_context *ctdb, const TDB_DATA *key)
+{
+ uint32_t idx, lmaster;
+
+ idx = ctdb_hash(key) % ctdb->vnn_map->size;
+ lmaster = ctdb->vnn_map->map[idx];
+
+ return lmaster;
+}
+
+
+/*
+ construct an initial header for a record with no ltdb header yet
+*/
+static void ltdb_initial_header(struct ctdb_db_context *ctdb_db,
+ TDB_DATA key,
+ struct ctdb_ltdb_header *header)
+{
+ ZERO_STRUCTP(header);
+ /* initial dmaster is the lmaster */
+ header->dmaster = ctdb_lmaster(ctdb_db->ctdb, &key);
+ header->flags = CTDB_REC_FLAG_AUTOMATIC;
+}
+
+struct ctdb_ltdb_fetch_state {
+ struct ctdb_ltdb_header *header;
+ TALLOC_CTX *mem_ctx;
+ TDB_DATA *data;
+ int ret;
+ bool found;
+};
+
+static int ctdb_ltdb_fetch_fn(TDB_DATA key, TDB_DATA data, void *private_data)
+{
+ struct ctdb_ltdb_fetch_state *state = private_data;
+ struct ctdb_ltdb_header *header = state->header;
+ TDB_DATA *dstdata = state->data;
+
+ if (data.dsize < sizeof(*header)) {
+ return 0;
+ }
+
+ state->found = true;
+ memcpy(header, data.dptr, sizeof(*header));
+
+ if (dstdata != NULL) {
+ dstdata->dsize = data.dsize - sizeof(struct ctdb_ltdb_header);
+ dstdata->dptr = talloc_memdup(
+ state->mem_ctx,
+ data.dptr + sizeof(struct ctdb_ltdb_header),
+ dstdata->dsize);
+ if (dstdata->dptr == NULL) {
+ state->ret = -1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ fetch a record from the ltdb, separating out the header information
+ and returning the body of the record. A valid (initial) header is
+ returned if the record is not present
+*/
+int ctdb_ltdb_fetch(struct ctdb_db_context *ctdb_db,
+ TDB_DATA key, struct ctdb_ltdb_header *header,
+ TALLOC_CTX *mem_ctx, TDB_DATA *data)
+{
+ struct ctdb_context *ctdb = ctdb_db->ctdb;
+ struct ctdb_ltdb_fetch_state state = {
+ .header = header,
+ .mem_ctx = mem_ctx,
+ .data = data,
+ .found = false,
+ };
+ int ret;
+
+ ret = tdb_parse_record(
+ ctdb_db->ltdb->tdb, key, ctdb_ltdb_fetch_fn, &state);
+
+ if (ret == -1) {
+ enum TDB_ERROR err = tdb_error(ctdb_db->ltdb->tdb);
+ if (err != TDB_ERR_NOEXIST) {
+ return -1;
+ }
+ }
+
+ if (state.ret != 0) {
+ DBG_DEBUG("ctdb_ltdb_fetch_fn failed\n");
+ return state.ret;
+ }
+
+ if (state.found) {
+ return 0;
+ }
+
+ if (data != NULL) {
+ *data = tdb_null;
+ }
+
+ if (ctdb->vnn_map == NULL) {
+ /* called from the client */
+ header->dmaster = (uint32_t)-1;
+ return -1;
+ }
+
+ ltdb_initial_header(ctdb_db, key, header);
+ if (ctdb_db_persistent(ctdb_db) ||
+ header->dmaster == ctdb_db->ctdb->pnn) {
+
+ ret = ctdb_ltdb_store(ctdb_db, key, header, tdb_null);
+ if (ret != 0) {
+ DBG_NOTICE("failed to store initial header\n");
+ }
+ }
+
+ return 0;
+}
+
+/*
+ write a record to a normal database
+*/
+int ctdb_ltdb_store(struct ctdb_db_context *ctdb_db, TDB_DATA key,
+ struct ctdb_ltdb_header *header, TDB_DATA data)
+{
+ struct ctdb_context *ctdb = ctdb_db->ctdb;
+ TDB_DATA rec[2];
+ uint32_t hsize = sizeof(struct ctdb_ltdb_header);
+ int ret;
+
+ if (ctdb_db->ctdb_ltdb_store_fn) {
+ return ctdb_db->ctdb_ltdb_store_fn(ctdb_db, key, header, data);
+ }
+
+ if (ctdb->flags & CTDB_FLAG_TORTURE) {
+ TDB_DATA old;
+ struct ctdb_ltdb_header *h2;
+
+ old = tdb_fetch(ctdb_db->ltdb->tdb, key);
+ h2 = (struct ctdb_ltdb_header *)old.dptr;
+ if (old.dptr != NULL && old.dsize >= hsize &&
+ h2->rsn > header->rsn) {
+ DEBUG(DEBUG_ERR,
+ ("RSN regression! %"PRIu64" %"PRIu64"\n",
+ h2->rsn, header->rsn));
+ }
+ if (old.dptr != NULL) {
+ free(old.dptr);
+ }
+ }
+
+ rec[0].dsize = hsize;
+ rec[0].dptr = (uint8_t *)header;
+
+ rec[1].dsize = data.dsize;
+ rec[1].dptr = data.dptr;
+
+ ret = tdb_storev(ctdb_db->ltdb->tdb, key, rec, 2, TDB_REPLACE);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Failed to store dynamic data\n"));
+ }
+
+ return ret;
+}
+
+/*
+ lock a record in the ltdb, given a key
+ */
+int ctdb_ltdb_lock(struct ctdb_db_context *ctdb_db, TDB_DATA key)
+{
+ return tdb_chainlock(ctdb_db->ltdb->tdb, key);
+}
+
+/*
+ unlock a record in the ltdb, given a key
+ */
+int ctdb_ltdb_unlock(struct ctdb_db_context *ctdb_db, TDB_DATA key)
+{
+ int ret = tdb_chainunlock(ctdb_db->ltdb->tdb, key);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,("tdb_chainunlock failed on db %s [%s]\n", ctdb_db->db_name, tdb_errorstr(ctdb_db->ltdb->tdb)));
+ }
+ return ret;
+}
+
+
+/*
+ delete a record from a normal database
+*/
+int ctdb_ltdb_delete(struct ctdb_db_context *ctdb_db, TDB_DATA key)
+{
+ if (! ctdb_db_volatile(ctdb_db)) {
+ DEBUG(DEBUG_WARNING,
+ ("Ignored deletion of empty record from "
+ "non-volatile database\n"));
+ return 0;
+ }
+ if (tdb_delete(ctdb_db->ltdb->tdb, key) != 0) {
+ DEBUG(DEBUG_ERR,("Failed to delete empty record.\n"));
+ return -1;
+ }
+ return 0;
+}
+
+int ctdb_trackingdb_add_pnn(struct ctdb_context *ctdb, TDB_DATA *data, uint32_t pnn)
+{
+ unsigned int byte_pos = pnn / 8;
+ unsigned char bit_mask = 1 << (pnn % 8);
+
+ if (byte_pos + 1 > data->dsize) {
+ char *buf;
+
+ buf = malloc(byte_pos + 1);
+ memset(buf, 0, byte_pos + 1);
+ if (buf == NULL) {
+ DEBUG(DEBUG_ERR, ("Out of memory when allocating buffer of %d bytes for trackingdb\n", byte_pos + 1));
+ return -1;
+ }
+ if (data->dptr != NULL) {
+ memcpy(buf, data->dptr, data->dsize);
+ free(data->dptr);
+ }
+ data->dptr = (uint8_t *)buf;
+ data->dsize = byte_pos + 1;
+ }
+
+ data->dptr[byte_pos] |= bit_mask;
+ return 0;
+}
+
+void ctdb_trackingdb_traverse(struct ctdb_context *ctdb, TDB_DATA data, ctdb_trackingdb_cb cb, void *private_data)
+{
+ unsigned int i;
+
+ for(i = 0; i < data.dsize; i++) {
+ unsigned int j;
+
+ for (j=0; j<8; j++) {
+ int mask = 1<<j;
+
+ if (data.dptr[i] & mask) {
+ cb(ctdb, i * 8 + j, private_data);
+ }
+ }
+ }
+}
+
+/*
+ this is the dummy null procedure that all databases support
+*/
+int ctdb_null_func(struct ctdb_call_info *call)
+{
+ return 0;
+}
+
+/*
+ this is a plain fetch procedure that all databases support
+*/
+int ctdb_fetch_func(struct ctdb_call_info *call)
+{
+ call->reply_data = &call->record_data;
+ return 0;
+}
+
+/*
+ this is a plain fetch procedure that all databases support
+ this returns the full record including the ltdb header
+*/
+int ctdb_fetch_with_header_func(struct ctdb_call_info *call)
+{
+ call->reply_data = talloc(call, TDB_DATA);
+ if (call->reply_data == NULL) {
+ return -1;
+ }
+ call->reply_data->dsize = sizeof(struct ctdb_ltdb_header) + call->record_data.dsize;
+ call->reply_data->dptr = talloc_size(call->reply_data, call->reply_data->dsize);
+ if (call->reply_data->dptr == NULL) {
+ return -1;
+ }
+ memcpy(call->reply_data->dptr, call->header, sizeof(struct ctdb_ltdb_header));
+ memcpy(&call->reply_data->dptr[sizeof(struct ctdb_ltdb_header)], call->record_data.dptr, call->record_data.dsize);
+
+ return 0;
+}
+
diff --git a/ctdb/common/ctdb_util.c b/ctdb/common/ctdb_util.c
new file mode 100644
index 0000000..5c7731c
--- /dev/null
+++ b/ctdb/common/ctdb_util.c
@@ -0,0 +1,681 @@
+/*
+ ctdb utility code
+
+ Copyright (C) Andrew Tridgell 2006
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+#include "system/wait.h"
+
+#include <tdb.h>
+
+#include "lib/util/debug.h"
+#include "lib/util/samba_util.h"
+
+#include "ctdb_private.h"
+
+#include "protocol/protocol_util.h"
+
+#include "common/reqid.h"
+#include "common/system.h"
+#include "common/common.h"
+#include "common/logging.h"
+
+/*
+ return error string for last error
+*/
+const char *ctdb_errstr(struct ctdb_context *ctdb)
+{
+ return ctdb->err_msg;
+}
+
+
+/*
+ remember an error message
+*/
+void ctdb_set_error(struct ctdb_context *ctdb, const char *fmt, ...)
+{
+ va_list ap;
+ talloc_free(ctdb->err_msg);
+ va_start(ap, fmt);
+ ctdb->err_msg = talloc_vasprintf(ctdb, fmt, ap);
+ DEBUG(DEBUG_ERR,("ctdb error: %s\n", ctdb->err_msg));
+ va_end(ap);
+}
+
+/*
+ a fatal internal error occurred - no hope for recovery
+*/
+void ctdb_fatal(struct ctdb_context *ctdb, const char *msg)
+{
+ DEBUG(DEBUG_ALERT,("ctdb fatal error: %s\n", msg));
+ abort();
+}
+
+/*
+ like ctdb_fatal() but a core/backtrace would not be useful
+*/
+void ctdb_die(struct ctdb_context *ctdb, const char *msg)
+{
+ DEBUG(DEBUG_ALERT,("ctdb exiting with error: %s\n", msg));
+ exit(1);
+}
+
+/* Set the path of a helper program from envvar, falling back to
+ * dir/file if envvar unset. type is a string to print in log
+ * messages. helper is assumed to point to a statically allocated
+ * array of size bytes, initialised to "". If file is NULL don't fall
+ * back if envvar is unset. If dir is NULL and envvar is unset (but
+ * file is not NULL) then this is an error. Returns true if helper is
+ * set, either previously or this time. */
+bool ctdb_set_helper(const char *type, char *helper, size_t size,
+ const char *envvar,
+ const char *dir, const char *file)
+{
+ const char *t;
+ struct stat st;
+
+ if (helper[0] != '\0') {
+ /* Already set */
+ return true;
+ }
+
+ t = getenv(envvar);
+ if (t != NULL) {
+ if (strlen(t) >= size) {
+ DEBUG(DEBUG_ERR,
+ ("Unable to set %s - path too long\n", type));
+ return false;
+ }
+
+ strncpy(helper, t, size);
+ } else if (file == NULL) {
+ return false;
+ } else if (dir == NULL) {
+ DEBUG(DEBUG_ERR,
+ ("Unable to set %s - dir is NULL\n", type));
+ return false;
+ } else {
+ int ret;
+
+ ret = snprintf(helper, size, "%s/%s", dir, file);
+ if (ret < 0 || (size_t)ret >= size) {
+ DEBUG(DEBUG_ERR,
+ ("Unable to set %s - path too long\n", type));
+ return false;
+ }
+ }
+
+ if (stat(helper, &st) != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Unable to set %s \"%s\" - %s\n",
+ type, helper, strerror(errno)));
+ return false;
+ }
+ if (!(st.st_mode & S_IXUSR)) {
+ DEBUG(DEBUG_ERR,
+ ("Unable to set %s \"%s\" - not executable\n",
+ type, helper));
+ return false;
+ }
+
+ DEBUG(DEBUG_NOTICE,
+ ("Set %s to \"%s\"\n", type, helper));
+ return true;
+}
+
+/*
+ parse a IP:port pair
+*/
+int ctdb_parse_address(TALLOC_CTX *mem_ctx, const char *str,
+ ctdb_sock_addr *address)
+{
+ struct servent *se;
+ int port;
+ int ret;
+
+ setservent(0);
+ se = getservbyname("ctdb", "tcp");
+ endservent();
+
+ if (se == NULL) {
+ port = CTDB_PORT;
+ } else {
+ port = ntohs(se->s_port);
+ }
+
+ ret = ctdb_sock_addr_from_string(str, address, false);
+ if (ret != 0) {
+ return -1;
+ }
+ ctdb_sock_addr_set_port(address, port);
+
+ return 0;
+}
+
+
+/*
+ check if two addresses are the same
+*/
+bool ctdb_same_address(ctdb_sock_addr *a1, ctdb_sock_addr *a2)
+{
+ return ctdb_same_ip(a1, a2) &&
+ ctdb_addr_to_port(a1) == ctdb_addr_to_port(a2);
+}
+
+
+/*
+ hash function for mapping data to a VNN - taken from tdb
+*/
+uint32_t ctdb_hash(const TDB_DATA *key)
+{
+ return tdb_jenkins_hash(discard_const(key));
+}
+
+
+static uint32_t ctdb_marshall_record_size(TDB_DATA key,
+ struct ctdb_ltdb_header *header,
+ TDB_DATA data)
+{
+ return offsetof(struct ctdb_rec_data_old, data) + key.dsize +
+ data.dsize + (header ? sizeof(*header) : 0);
+}
+
+static void ctdb_marshall_record_copy(struct ctdb_rec_data_old *rec,
+ uint32_t reqid,
+ TDB_DATA key,
+ struct ctdb_ltdb_header *header,
+ TDB_DATA data,
+ uint32_t length)
+{
+ uint32_t offset;
+
+ rec->length = length;
+ rec->reqid = reqid;
+ rec->keylen = key.dsize;
+ memcpy(&rec->data[0], key.dptr, key.dsize);
+ offset = key.dsize;
+
+ if (header) {
+ rec->datalen = data.dsize + sizeof(*header);
+ memcpy(&rec->data[offset], header, sizeof(*header));
+ offset += sizeof(*header);
+ } else {
+ rec->datalen = data.dsize;
+ }
+ memcpy(&rec->data[offset], data.dptr, data.dsize);
+}
+
+/*
+ form a ctdb_rec_data record from a key/data pair
+
+ note that header may be NULL. If not NULL then it is included in the data portion
+ of the record
+ */
+struct ctdb_rec_data_old *ctdb_marshall_record(TALLOC_CTX *mem_ctx,
+ uint32_t reqid,
+ TDB_DATA key,
+ struct ctdb_ltdb_header *header,
+ TDB_DATA data)
+{
+ size_t length;
+ struct ctdb_rec_data_old *d;
+
+ length = ctdb_marshall_record_size(key, header, data);
+
+ d = (struct ctdb_rec_data_old *)talloc_size(mem_ctx, length);
+ if (d == NULL) {
+ return NULL;
+ }
+
+ ctdb_marshall_record_copy(d, reqid, key, header, data, length);
+ return d;
+}
+
+
+/* helper function for marshalling multiple records */
+struct ctdb_marshall_buffer *ctdb_marshall_add(TALLOC_CTX *mem_ctx,
+ struct ctdb_marshall_buffer *m,
+ uint32_t db_id,
+ uint32_t reqid,
+ TDB_DATA key,
+ struct ctdb_ltdb_header *header,
+ TDB_DATA data)
+{
+ struct ctdb_rec_data_old *r;
+ struct ctdb_marshall_buffer *m2;
+ uint32_t length, offset;
+
+ length = ctdb_marshall_record_size(key, header, data);
+
+ if (m == NULL) {
+ offset = offsetof(struct ctdb_marshall_buffer, data);
+ m2 = talloc_zero_size(mem_ctx, offset + length);
+ } else {
+ offset = talloc_get_size(m);
+ m2 = talloc_realloc_size(mem_ctx, m, offset + length);
+ }
+ if (m2 == NULL) {
+ TALLOC_FREE(m);
+ return NULL;
+ }
+
+ if (m == NULL) {
+ m2->db_id = db_id;
+ }
+
+ r = (struct ctdb_rec_data_old *)((uint8_t *)m2 + offset);
+ ctdb_marshall_record_copy(r, reqid, key, header, data, length);
+ m2->count++;
+
+ return m2;
+}
+
+/* we've finished marshalling, return a data blob with the marshalled records */
+TDB_DATA ctdb_marshall_finish(struct ctdb_marshall_buffer *m)
+{
+ TDB_DATA data;
+ data.dptr = (uint8_t *)m;
+ data.dsize = talloc_get_size(m);
+ return data;
+}
+
+/*
+ loop over a marshalling buffer
+
+ - pass r==NULL to start
+ - loop the number of times indicated by m->count
+*/
+struct ctdb_rec_data_old *ctdb_marshall_loop_next(
+ struct ctdb_marshall_buffer *m,
+ struct ctdb_rec_data_old *r,
+ uint32_t *reqid,
+ struct ctdb_ltdb_header *header,
+ TDB_DATA *key, TDB_DATA *data)
+{
+ if (r == NULL) {
+ r = (struct ctdb_rec_data_old *)&m->data[0];
+ } else {
+ r = (struct ctdb_rec_data_old *)(r->length + (uint8_t *)r);
+ }
+
+ if (reqid != NULL) {
+ *reqid = r->reqid;
+ }
+
+ if (key != NULL) {
+ key->dptr = &r->data[0];
+ key->dsize = r->keylen;
+ }
+ if (data != NULL) {
+ data->dptr = &r->data[r->keylen];
+ data->dsize = r->datalen;
+ if (header != NULL) {
+ data->dptr += sizeof(*header);
+ data->dsize -= sizeof(*header);
+ }
+ }
+
+ if (header != NULL) {
+ if (r->datalen < sizeof(*header)) {
+ return NULL;
+ }
+ memcpy(header, &r->data[r->keylen], sizeof(*header));
+ }
+
+ return r;
+}
+
+/*
+ This is used to canonicalize a ctdb_sock_addr structure.
+*/
+void ctdb_canonicalize_ip(const ctdb_sock_addr *ip, ctdb_sock_addr *cip)
+{
+ ZERO_STRUCTP(cip);
+
+ if (ip->sa.sa_family == AF_INET6) {
+ const char prefix[12] = { 0,0,0,0,0,0,0,0,0,0,0xff,0xff };
+ if (memcmp(&ip->ip6.sin6_addr, prefix, sizeof(prefix)) == 0) {
+ /* Copy IPv4-mapped IPv6 addresses as IPv4 */
+ cip->ip.sin_family = AF_INET;
+#ifdef HAVE_SOCK_SIN_LEN
+ cip->ip.sin_len = sizeof(ctdb_sock_addr);
+#endif
+ cip->ip.sin_port = ip->ip6.sin6_port;
+ memcpy(&cip->ip.sin_addr,
+ &ip->ip6.sin6_addr.s6_addr[12],
+ sizeof(cip->ip.sin_addr));
+ } else {
+ cip->ip6.sin6_family = AF_INET6;
+#ifdef HAVE_SOCK_SIN6_LEN
+ cip->ip6.sin6_len = sizeof(ctdb_sock_addr);
+#endif
+ cip->ip6.sin6_port = ip->ip6.sin6_port;
+ memcpy(&cip->ip6.sin6_addr,
+ &ip->ip6.sin6_addr,
+ sizeof(cip->ip6.sin6_addr));
+ }
+
+ return;
+ }
+
+ if (ip->sa.sa_family == AF_INET) {
+ cip->ip.sin_family = AF_INET;
+#ifdef HAVE_SOCK_SIN_LEN
+ cip->ip.sin_len = sizeof(ctdb_sock_addr);
+#endif
+ cip->ip.sin_port = ip->ip.sin_port;
+ memcpy(&cip->ip.sin_addr,
+ &ip->ip.sin_addr,
+ sizeof(ip->ip.sin_addr));
+
+ return;
+ }
+}
+
+void ctdb_canonicalize_ip_inplace(ctdb_sock_addr *ip)
+{
+ ctdb_sock_addr tmp;
+ ctdb_canonicalize_ip(ip, &tmp);
+ memcpy(ip, &tmp, sizeof(tmp));
+}
+
+bool ctdb_same_ip(const ctdb_sock_addr *tip1, const ctdb_sock_addr *tip2)
+{
+ ctdb_sock_addr ip1, ip2;
+
+ ctdb_canonicalize_ip(tip1, &ip1);
+ ctdb_canonicalize_ip(tip2, &ip2);
+
+ if (ip1.sa.sa_family != ip2.sa.sa_family) {
+ return false;
+ }
+
+ switch (ip1.sa.sa_family) {
+ case AF_INET:
+ return ip1.ip.sin_addr.s_addr == ip2.ip.sin_addr.s_addr;
+ case AF_INET6:
+ return !memcmp(&ip1.ip6.sin6_addr.s6_addr[0],
+ &ip2.ip6.sin6_addr.s6_addr[0],
+ 16);
+ default:
+ DEBUG(DEBUG_ERR, (__location__ " CRITICAL Can not compare sockaddr structures of type %u\n", ip1.sa.sa_family));
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ compare two ctdb_sock_addr structures
+ */
+bool ctdb_same_sockaddr(const ctdb_sock_addr *ip1, const ctdb_sock_addr *ip2)
+{
+ return ctdb_same_ip(ip1, ip2) && ip1->ip.sin_port == ip2->ip.sin_port;
+}
+
+char *ctdb_addr_to_str(ctdb_sock_addr *addr)
+{
+ static char cip[128] = "";
+
+ switch (addr->sa.sa_family) {
+ case AF_INET:
+ inet_ntop(addr->ip.sin_family, &addr->ip.sin_addr, cip, sizeof(cip));
+ break;
+ case AF_INET6:
+ inet_ntop(addr->ip6.sin6_family, &addr->ip6.sin6_addr, cip, sizeof(cip));
+ break;
+ default:
+ DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family %u\n", addr->sa.sa_family));
+ }
+
+ return cip;
+}
+
+unsigned ctdb_addr_to_port(ctdb_sock_addr *addr)
+{
+ switch (addr->sa.sa_family) {
+ case AF_INET:
+ return ntohs(addr->ip.sin_port);
+ break;
+ case AF_INET6:
+ return ntohs(addr->ip6.sin6_port);
+ break;
+ default:
+ DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family %u\n", addr->sa.sa_family));
+ }
+
+ return 0;
+}
+
+/* Add a node to a node map with given address and flags */
+static bool node_map_add(TALLOC_CTX *mem_ctx,
+ const char *nstr, uint32_t flags,
+ struct ctdb_node_map_old **node_map)
+{
+ ctdb_sock_addr addr;
+ uint32_t num;
+ size_t s;
+ struct ctdb_node_and_flags *n;
+
+ /* Might as well do this before trying to allocate memory */
+ if (ctdb_parse_address(mem_ctx, nstr, &addr) == -1) {
+ return false;
+ }
+
+ num = (*node_map)->num + 1;
+ s = offsetof(struct ctdb_node_map_old, nodes) +
+ num * sizeof(struct ctdb_node_and_flags);
+ *node_map = talloc_realloc_size(mem_ctx, *node_map, s);
+ if (*node_map == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
+ return false;
+ }
+
+ n = &(*node_map)->nodes[(*node_map)->num];
+ n->addr = addr;
+ n->pnn = (*node_map)->num;
+ n->flags = flags;
+
+ (*node_map)->num++;
+
+ return true;
+}
+
+/* Read a nodes file into a node map */
+struct ctdb_node_map_old *ctdb_read_nodes_file(TALLOC_CTX *mem_ctx,
+ const char *nlist)
+{
+ char **lines;
+ int nlines;
+ int i;
+ struct ctdb_node_map_old *ret;
+
+ /* Allocate node map header */
+ ret = talloc_zero_size(mem_ctx, offsetof(struct ctdb_node_map_old, nodes));
+ if (ret == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
+ return false;
+ }
+
+ lines = file_lines_load(nlist, &nlines, 0, mem_ctx);
+ if (lines == NULL) {
+ DEBUG(DEBUG_ERR, ("Failed to read nodes file \"%s\"\n", nlist));
+ return false;
+ }
+ while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
+ nlines--;
+ }
+
+ for (i=0; i < nlines; i++) {
+ char *node;
+ uint32_t flags;
+ size_t len;
+
+ node = lines[i];
+ /* strip leading spaces */
+ while((*node == ' ') || (*node == '\t')) {
+ node++;
+ }
+
+ len = strlen(node);
+
+ while ((len > 1) &&
+ ((node[len-1] == ' ') || (node[len-1] == '\t')))
+ {
+ node[len-1] = '\0';
+ len--;
+ }
+
+ if (len == 0) {
+ continue;
+ }
+ if (*node == '#') {
+ /* A "deleted" node is a node that is
+ commented out in the nodes file. This is
+ used instead of removing a line, which
+ would cause subsequent nodes to change
+ their PNN. */
+ flags = NODE_FLAGS_DELETED;
+ node = discard_const("0.0.0.0");
+ } else {
+ flags = 0;
+ }
+ if (!node_map_add(mem_ctx, node, flags, &ret)) {
+ talloc_free(lines);
+ TALLOC_FREE(ret);
+ return NULL;
+ }
+ }
+
+ talloc_free(lines);
+ return ret;
+}
+
+struct ctdb_node_map_old *
+ctdb_node_list_to_map(struct ctdb_node **nodes, uint32_t num_nodes,
+ TALLOC_CTX *mem_ctx)
+{
+ uint32_t i;
+ size_t size;
+ struct ctdb_node_map_old *node_map;
+
+ size = offsetof(struct ctdb_node_map_old, nodes) +
+ num_nodes * sizeof(struct ctdb_node_and_flags);
+ node_map = (struct ctdb_node_map_old *)talloc_zero_size(mem_ctx, size);
+ if (node_map == NULL) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " Failed to allocate nodemap array\n"));
+ return NULL;
+ }
+
+ node_map->num = num_nodes;
+ for (i=0; i<num_nodes; i++) {
+ node_map->nodes[i].addr = nodes[i]->address;
+ node_map->nodes[i].pnn = nodes[i]->pnn;
+ node_map->nodes[i].flags = nodes[i]->flags;
+ }
+
+ return node_map;
+}
+
+const char *ctdb_eventscript_call_names[] = {
+ "init",
+ "setup",
+ "startup",
+ "startrecovery",
+ "recovered",
+ "takeip",
+ "releaseip",
+ "stopped",
+ "monitor",
+ "status",
+ "shutdown",
+ "reload",
+ "updateip",
+ "ipreallocated"
+};
+
+/* Runstate handling */
+static struct {
+ enum ctdb_runstate runstate;
+ const char * label;
+} runstate_map[] = {
+ { CTDB_RUNSTATE_UNKNOWN, "UNKNOWN" },
+ { CTDB_RUNSTATE_INIT, "INIT" },
+ { CTDB_RUNSTATE_SETUP, "SETUP" },
+ { CTDB_RUNSTATE_FIRST_RECOVERY, "FIRST_RECOVERY" },
+ { CTDB_RUNSTATE_STARTUP, "STARTUP" },
+ { CTDB_RUNSTATE_RUNNING, "RUNNING" },
+ { CTDB_RUNSTATE_SHUTDOWN, "SHUTDOWN" },
+ { -1, NULL },
+};
+
+const char *runstate_to_string(enum ctdb_runstate runstate)
+{
+ int i;
+ for (i=0; runstate_map[i].label != NULL ; i++) {
+ if (runstate_map[i].runstate == runstate) {
+ return runstate_map[i].label;
+ }
+ }
+
+ return runstate_map[0].label;
+}
+
+enum ctdb_runstate runstate_from_string(const char *label)
+{
+ int i;
+ for (i=0; runstate_map[i].label != NULL; i++) {
+ if (strcasecmp(runstate_map[i].label, label) == 0) {
+ return runstate_map[i].runstate;
+ }
+ }
+
+ return CTDB_RUNSTATE_UNKNOWN;
+}
+
+void ctdb_set_runstate(struct ctdb_context *ctdb, enum ctdb_runstate runstate)
+{
+ DEBUG(DEBUG_NOTICE,("Set runstate to %s (%d)\n",
+ runstate_to_string(runstate), runstate));
+
+ if (runstate <= ctdb->runstate) {
+ ctdb_fatal(ctdb, "runstate must always increase");
+ }
+
+ ctdb->runstate = runstate;
+}
+
+/* Convert arbitrary data to 4-byte boundary padded uint32 array */
+uint32_t *ctdb_key_to_idkey(TALLOC_CTX *mem_ctx, TDB_DATA key)
+{
+ uint32_t idkey_size, *k;
+
+ idkey_size = 1 + (key.dsize + sizeof(uint32_t)-1) / sizeof(uint32_t);
+
+ k = talloc_zero_array(mem_ctx, uint32_t, idkey_size);
+ if (k == NULL) {
+ return NULL;
+ }
+
+ k[0] = idkey_size;
+ memcpy(&k[1], key.dptr, key.dsize);
+
+ return k;
+}
diff --git a/ctdb/common/db_hash.c b/ctdb/common/db_hash.c
new file mode 100644
index 0000000..8dd62c4
--- /dev/null
+++ b/ctdb/common/db_hash.c
@@ -0,0 +1,295 @@
+/*
+ Using tdb as a hash table
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+
+#include <talloc.h>
+#include <tdb.h>
+
+#include "common/db_hash.h"
+
+struct db_hash_context {
+ struct tdb_context *db;
+};
+
+
+static int db_hash_destructor(struct db_hash_context *dh)
+{
+ if (dh->db != NULL) {
+ tdb_close(dh->db);
+ dh->db = NULL;
+ }
+ return 0;
+}
+
+int db_hash_init(TALLOC_CTX *mem_ctx, const char *name, int hash_size,
+ enum db_hash_type type, struct db_hash_context **result)
+{
+ struct db_hash_context *dh;
+ int tdb_flags = TDB_INTERNAL | TDB_DISALLOW_NESTING;
+
+ dh = talloc_zero(mem_ctx, struct db_hash_context);
+ if (dh == NULL) {
+ return ENOMEM;
+ }
+
+ if (type == DB_HASH_COMPLEX) {
+ tdb_flags |= TDB_INCOMPATIBLE_HASH;
+ }
+
+ dh->db = tdb_open(name, hash_size, tdb_flags, O_RDWR|O_CREAT, 0);
+ if (dh->db == NULL) {
+ talloc_free(dh);
+ return ENOMEM;
+ }
+
+ talloc_set_destructor(dh, db_hash_destructor);
+ *result = dh;
+ return 0;
+}
+
+static int db_hash_map_tdb_error(struct db_hash_context *dh)
+{
+ enum TDB_ERROR tdb_err;
+ int ret;
+
+ tdb_err = tdb_error(dh->db);
+ switch (tdb_err) {
+ case TDB_SUCCESS:
+ ret = 0; break;
+ case TDB_ERR_OOM:
+ ret = ENOMEM; break;
+ case TDB_ERR_EXISTS:
+ ret = EEXIST; break;
+ case TDB_ERR_NOEXIST:
+ ret = ENOENT; break;
+ case TDB_ERR_EINVAL:
+ ret = EINVAL; break;
+ default:
+ ret = EIO; break;
+ }
+ return ret;
+}
+
+int db_hash_insert(struct db_hash_context *dh, uint8_t *keybuf, size_t keylen,
+ uint8_t *databuf, size_t datalen)
+{
+ TDB_DATA key, data;
+ int ret;
+
+ if (dh == NULL) {
+ return EINVAL;
+ }
+
+ key.dptr = keybuf;
+ key.dsize = keylen;
+
+ data.dptr = databuf;
+ data.dsize = datalen;
+
+ ret = tdb_store(dh->db, key, data, TDB_INSERT);
+ if (ret != 0) {
+ ret = db_hash_map_tdb_error(dh);
+ }
+ return ret;
+}
+
+int db_hash_add(struct db_hash_context *dh, uint8_t *keybuf, size_t keylen,
+ uint8_t *databuf, size_t datalen)
+{
+ TDB_DATA key, data;
+ int ret;
+
+ if (dh == NULL) {
+ return EINVAL;
+ }
+
+ key.dptr = keybuf;
+ key.dsize = keylen;
+
+ data.dptr = databuf;
+ data.dsize = datalen;
+
+ ret = tdb_store(dh->db, key, data, TDB_REPLACE);
+ if (ret != 0) {
+ ret = db_hash_map_tdb_error(dh);
+ }
+ return ret;
+}
+
+int db_hash_delete(struct db_hash_context *dh, uint8_t *keybuf, size_t keylen)
+{
+ TDB_DATA key;
+ int ret;
+
+ key.dptr = keybuf;
+ key.dsize = keylen;
+
+ if (dh == NULL) {
+ return EINVAL;
+ }
+
+ ret = tdb_delete(dh->db, key);
+ if (ret != 0) {
+ ret = db_hash_map_tdb_error(dh);
+ }
+ return ret;
+}
+
+struct db_hash_fetch_state {
+ db_hash_record_parser_fn parser;
+ void *private_data;
+};
+
+static int db_hash_fetch_parser(TDB_DATA key, TDB_DATA data, void *private_data)
+{
+ struct db_hash_fetch_state *state =
+ (struct db_hash_fetch_state *)private_data;
+ int ret;
+
+ ret = state->parser(key.dptr, key.dsize, data.dptr, data.dsize,
+ state->private_data);
+ return ret;
+}
+
+int db_hash_fetch(struct db_hash_context *dh, uint8_t *keybuf, size_t keylen,
+ db_hash_record_parser_fn parser, void *private_data)
+{
+ struct db_hash_fetch_state state;
+ TDB_DATA key;
+ int ret;
+
+ if (dh == NULL || parser == NULL) {
+ return EINVAL;
+ }
+
+ state.parser = parser;
+ state.private_data = private_data;
+
+ key.dptr = keybuf;
+ key.dsize = keylen;
+
+ ret = tdb_parse_record(dh->db, key, db_hash_fetch_parser, &state);
+ if (ret == -1) {
+ return ENOENT;
+ }
+ return ret;
+}
+
+int db_hash_exists(struct db_hash_context *dh, uint8_t *keybuf, size_t keylen)
+{
+ TDB_DATA key;
+ int ret;
+
+ if (dh == NULL) {
+ return EINVAL;
+ }
+
+ key.dptr = keybuf;
+ key.dsize = keylen;
+
+ ret = tdb_exists(dh->db, key);
+ if (ret == 1) {
+ /* Key found */
+ ret = 0;
+ } else {
+ ret = db_hash_map_tdb_error(dh);
+ if (ret == 0) {
+ ret = ENOENT;
+ }
+ }
+ return ret;
+}
+
+struct db_hash_traverse_state {
+ db_hash_record_parser_fn parser;
+ void *private_data;
+};
+
+static int db_hash_traverse_parser(struct tdb_context *tdb,
+ TDB_DATA key, TDB_DATA data,
+ void *private_data)
+{
+ struct db_hash_traverse_state *state =
+ (struct db_hash_traverse_state *)private_data;
+
+ return state->parser(key.dptr, key.dsize, data.dptr, data.dsize,
+ state->private_data);
+}
+
+int db_hash_traverse(struct db_hash_context *dh,
+ db_hash_record_parser_fn parser, void *private_data,
+ int *count)
+{
+ struct db_hash_traverse_state state;
+ int ret;
+
+ if (dh == NULL) {
+ return EINVAL;
+ }
+
+ /* Special case, for counting records */
+ if (parser == NULL) {
+ ret = tdb_traverse_read(dh->db, NULL, NULL);
+ } else {
+ state.parser = parser;
+ state.private_data = private_data;
+
+ ret = tdb_traverse_read(dh->db, db_hash_traverse_parser, &state);
+ }
+
+ if (ret == -1) {
+ ret = db_hash_map_tdb_error(dh);
+ } else {
+ if (count != NULL) {
+ *count = ret;
+ }
+ ret = 0;
+ }
+
+ return ret;
+}
+
+int db_hash_traverse_update(struct db_hash_context *dh,
+ db_hash_record_parser_fn parser,
+ void *private_data, int *count)
+{
+ struct db_hash_traverse_state state;
+ int ret;
+
+ if (dh == NULL || parser == NULL) {
+ return EINVAL;
+ }
+
+ state.parser = parser;
+ state.private_data = private_data;
+
+ ret = tdb_traverse(dh->db, db_hash_traverse_parser, &state);
+ if (ret == -1) {
+ ret = db_hash_map_tdb_error(dh);
+ } else {
+ if (count != NULL) {
+ *count = ret;
+ }
+ ret = 0;
+ }
+
+ return ret;
+}
diff --git a/ctdb/common/db_hash.h b/ctdb/common/db_hash.h
new file mode 100644
index 0000000..67e2b85
--- /dev/null
+++ b/ctdb/common/db_hash.h
@@ -0,0 +1,174 @@
+/*
+ Using tdb as a hash table
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_DB_HASH_H__
+#define __CTDB_DB_HASH_H__
+
+#include <talloc.h>
+#include <tdb.h>
+
+/**
+ * @file db_hash.h
+ *
+ * @brief Use tdb database as a hash table
+ *
+ * This uses in-memory tdb databases to create a fixed sized hash table.
+ */
+
+/**
+ * @brief Hash type to indicate the hashing function to use.
+ *
+ * DB_HASH_SIMPLE uses default hashing function
+ * DB_HASH_COMPLEX uses jenkins hashing function
+ */
+enum db_hash_type {
+ DB_HASH_SIMPLE,
+ DB_HASH_COMPLEX,
+};
+
+/**
+ * @brief Parser callback function called when fetching a record
+ *
+ * This function is called when fetching a record. This function should
+ * not modify key and data arguments.
+ *
+ * The function should return 0 on success and errno on error.
+ */
+typedef int (*db_hash_record_parser_fn)(uint8_t *keybuf, size_t keylen,
+ uint8_t *databuf, size_t datalen,
+ void *private_data);
+
+/**
+ * @brief Abstract structure representing tdb hash table
+ */
+struct db_hash_context;
+
+/**
+ * @brief Initialize tdb hash table
+ *
+ * This returns a new tdb hash table context which is a talloc context. Freeing
+ * this context will free all the memory associated with the hash table.
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] name The name for the hash table
+ * @param[in] hash_size The size of the hash table
+ * @param[in] type The type of hashing function to use
+ * @param[out] result The new db_hash_context structure
+ * @return 0 on success, errno on failure
+ */
+int db_hash_init(TALLOC_CTX *mem_ctx, const char *name, int hash_size,
+ enum db_hash_type type, struct db_hash_context **result);
+
+/**
+ * @brief Insert a record into the hash table
+ *
+ * The key and data can be any binary data. Insert only if the record does not
+ * exist. If the record already exists, return error.
+ *
+ * @param[in] dh The tdb hash table context
+ * @param[in] keybuf The key buffer
+ * @param[in] keylen The key length
+ * @param[in] databuf The data buffer
+ * @param[in] datalen The data length
+ * @return 0 on success, errno on failure
+ */
+int db_hash_insert(struct db_hash_context *dh, uint8_t *keybuf, size_t keylen,
+ uint8_t *databuf, size_t datalen);
+
+/**
+ * @brief Add a record into the hash table
+ *
+ * The key and data can be any binary data. If the record does not exist,
+ * insert the record. If the record already exists, replace the record.
+ *
+ * @param[in] dh The tdb hash table context
+ * @param[in] keybuf The key buffer
+ * @param[in] keylen The key length
+ * @param[in] databuf The data buffer
+ * @param[in] datalen The data length
+ * @return 0 on success, errno on failure
+ */
+int db_hash_add(struct db_hash_context *dh, uint8_t *keybuf, size_t keylen,
+ uint8_t *databuf, size_t datalen);
+/**
+ * @brief Delete a record from the hash table
+ *
+ * @param[in] dh The tdb hash table context
+ * @param[in] keybuf The key buffer
+ * @param[in] keylen The key length
+ * @return 0 on success, errno on failure
+ */
+int db_hash_delete(struct db_hash_context *dh, uint8_t *keybuf, size_t keylen);
+
+/**
+ * @brief Fetch a record from the hash table
+ *
+ * The key and data can be any binary data.
+ *
+ * @param[in] dh The tdb hash table context
+ * @param[in] keybuf The key buffer
+ * @param[in] keylen The key length
+ * @param[in] parser Function called when the matching record is found
+ * @param[in] private_data Private data to parser function
+ * @return 0 on success, errno on failure
+ */
+int db_hash_fetch(struct db_hash_context *dh, uint8_t *keybuf, size_t keylen,
+ db_hash_record_parser_fn parser, void *private_data);
+
+/**
+ * @brief Check if a record exists in the hash table
+ *
+ * @param[in] dh The tdb hash table context
+ * @param[in] keybuf The key buffer
+ * @param[in] keylen The key length
+ * @return 0 if the record exists, errno on failure
+ */
+int db_hash_exists(struct db_hash_context *dh, uint8_t *keybuf, size_t keylen);
+
+/**
+ * @brief Traverse the database without modification
+ *
+ * The parser function should return non-zero value to stop traverse.
+ *
+ * @param[in] dh The tdb hash table context
+ * @param[in] parser Function called for each record
+ * @param[in] private_data Private data to parser function
+ * @param[out] count Number of records traversed
+ * @return 0 on success, errno on failure
+ */
+int db_hash_traverse(struct db_hash_context *dh,
+ db_hash_record_parser_fn parser, void *private_data,
+ int *count);
+
+/**
+ * @brief Traverse the database for modifications
+ *
+ * The parser function should return non-zero value to stop traverse.
+ *
+ * @param[in] dh The tdb hash table context
+ * @param[in] parser Function called for each record
+ * @param[in] private_data Private data to parser function
+ * @param[out] count Number of records traversed
+ * @return 0 on success, errno on failure
+ */
+int db_hash_traverse_update(struct db_hash_context *dh,
+ db_hash_record_parser_fn parser,
+ void *private_data, int *count);
+
+#endif /* __CTDB_DB_HASH_H__ */
diff --git a/ctdb/common/event_script.c b/ctdb/common/event_script.c
new file mode 100644
index 0000000..edd607f
--- /dev/null
+++ b/ctdb/common/event_script.c
@@ -0,0 +1,247 @@
+/*
+ Low level event script handling
+
+ Copyright (C) Amitay Isaacs 2017
+ Copyright (C) Martin Schwenke 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/dir.h"
+#include "system/glob.h"
+
+#include <talloc.h>
+
+#include "common/event_script.h"
+
+static int script_filter(const struct dirent *de)
+{
+ int ret;
+
+ /* Match a script pattern */
+ ret = fnmatch("[0-9][0-9].*.script", de->d_name, 0);
+ if (ret == 0) {
+ return 1;
+ }
+
+ return 0;
+}
+
+int event_script_get_list(TALLOC_CTX *mem_ctx,
+ const char *script_dir,
+ struct event_script_list **out)
+{
+ struct dirent **namelist = NULL;
+ struct event_script_list *script_list = NULL;
+ size_t ds_len;
+ int count, ret;
+ int i;
+
+ count = scandir(script_dir, &namelist, script_filter, alphasort);
+ if (count == -1) {
+ ret = errno;
+ goto done;
+ }
+
+ script_list = talloc_zero(mem_ctx, struct event_script_list);
+ if (script_list == NULL) {
+ goto nomem;
+ }
+
+ if (count == 0) {
+ ret = 0;
+ *out = script_list;
+ goto done;
+ }
+
+ script_list->num_scripts = count;
+ script_list->script = talloc_zero_array(script_list,
+ struct event_script *,
+ count);
+ if (script_list->script == NULL) {
+ goto nomem;
+ }
+
+ ds_len = strlen(".script");
+ for (i = 0; i < count; i++) {
+ struct event_script *s;
+ struct stat statbuf;
+
+ s = talloc_zero(script_list->script, struct event_script);
+ if (s == NULL) {
+ goto nomem;
+ }
+
+ script_list->script[i] = s;
+
+ s->name = talloc_strndup(script_list->script,
+ namelist[i]->d_name,
+ strlen(namelist[i]->d_name) - ds_len);
+ if (s->name == NULL) {
+ goto nomem;
+ }
+
+ s->path = talloc_asprintf(script_list->script,
+ "%s/%s",
+ script_dir,
+ namelist[i]->d_name);
+ if (s->path == NULL) {
+ goto nomem;
+ }
+
+ ret = stat(s->path, &statbuf);
+ if (ret == 0) {
+ /*
+ * If ret != 0 this is either a dangling
+ * symlink or it has just disappeared. Either
+ * way, it isn't executable. See the note
+ * below about things that have disappeared.
+ */
+ if (statbuf.st_mode & S_IXUSR) {
+ s->enabled = true;
+ }
+ }
+ }
+
+ *out = script_list;
+ ret = 0;
+ goto done;
+
+nomem:
+ ret = ENOMEM;
+ talloc_free(script_list);
+
+done:
+ if (namelist != NULL && count != -1) {
+ for (i=0; i<count; i++) {
+ free(namelist[i]);
+ }
+ free(namelist);
+ }
+
+ return ret;
+}
+
+int event_script_chmod(const char *script_dir,
+ const char *script_name,
+ bool enable)
+{
+ const char *dot_script = ".script";
+ size_t ds_len = strlen(dot_script);
+ size_t sn_len = strlen(script_name);
+ DIR *dirp;
+ struct dirent *de;
+ char buf[PATH_MAX];
+ const char *script_file;
+ int ret, new_mode;
+ char filename[PATH_MAX];
+ struct stat st;
+ bool found;
+ ino_t found_inode;
+ int fd = -1;
+
+ /* Allow script_name to already have ".script" suffix */
+ if (sn_len > ds_len &&
+ strcmp(&script_name[sn_len - ds_len], dot_script) == 0) {
+ script_file = script_name;
+ } else {
+ ret = snprintf(buf, sizeof(buf), "%s.script", script_name);
+ if (ret < 0 || (size_t)ret >= sizeof(buf)) {
+ return ENAMETOOLONG;
+ }
+ script_file = buf;
+ }
+
+ dirp = opendir(script_dir);
+ if (dirp == NULL) {
+ return errno;
+ }
+
+ found = false;
+ while ((de = readdir(dirp)) != NULL) {
+ if (strcmp(de->d_name, script_file) == 0) {
+ /* check for valid script names */
+ ret = script_filter(de);
+ if (ret == 0) {
+ closedir(dirp);
+ return EINVAL;
+ }
+
+ found = true;
+ found_inode = de->d_ino;
+ break;
+ }
+ }
+ closedir(dirp);
+
+ if (! found) {
+ return ENOENT;
+ }
+
+ ret = snprintf(filename,
+ sizeof(filename),
+ "%s/%s",
+ script_dir,
+ script_file);
+ if (ret < 0 || (size_t)ret >= sizeof(filename)) {
+ return ENAMETOOLONG;
+ }
+
+ fd = open(filename, O_RDWR);
+ if (fd == -1) {
+ ret = errno;
+ goto done;
+ }
+
+ ret = fstat(fd, &st);
+ if (ret != 0) {
+ ret = errno;
+ goto done;
+ }
+
+ /*
+ * If the directory entry inode number doesn't match the one
+ * returned by fstat() then this is probably a symlink, so the
+ * caller should not be calling this function. Note that this
+ * is a cheap sanity check to catch most programming errors.
+ * This doesn't cost any extra system calls but can still miss
+ * the unlikely case where the symlink is to a file on a
+ * different filesystem with the same inode number as the
+ * symlink.
+ */
+ if (found && found_inode != st.st_ino) {
+ ret = EINVAL;
+ goto done;
+ }
+
+ if (enable) {
+ new_mode = st.st_mode | (S_IXUSR | S_IXGRP | S_IXOTH);
+ } else {
+ new_mode = st.st_mode & ~(S_IXUSR | S_IXGRP | S_IXOTH);
+ }
+
+ ret = fchmod(fd, new_mode);
+ if (ret != 0) {
+ ret = errno;
+ goto done;
+ }
+
+done:
+ if (fd != -1) {
+ close(fd);
+ }
+ return ret;
+}
diff --git a/ctdb/common/event_script.h b/ctdb/common/event_script.h
new file mode 100644
index 0000000..bf5a8fd
--- /dev/null
+++ b/ctdb/common/event_script.h
@@ -0,0 +1,72 @@
+/*
+ Low level event script handling
+
+ Copyright (C) Amitay Isaacs 2017
+ Copyright (C) Martin Schwenke 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_SCRIPT_H__
+#define __CTDB_SCRIPT_H__
+
+#include "replace.h"
+#include "system/filesys.h"
+
+#include <talloc.h>
+
+/**
+ * @file script.h
+ *
+ * @brief Script listing and manipulation
+ */
+
+
+struct event_script {
+ char *name;
+ char *path;
+ bool enabled;
+};
+
+struct event_script_list {
+ unsigned int num_scripts;
+ struct event_script **script;
+};
+
+
+/**
+ * @brief Retrieve a list of scripts
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] script_dir Directory containing scripts
+ * @param[out] out List of scripts
+ * @return 0 on success, errno on failure
+ */
+int event_script_get_list(TALLOC_CTX *mem_ctx,
+ const char *script_dir,
+ struct event_script_list **out);
+
+/**
+ * @brief Make a script executable or not executable
+ *
+ * @param[in] script_dir Directory containing script
+ * @param[in] script_name Name of the script to enable
+ * @param[in] executable True if script should be made executable
+ * @return 0 on success, errno on failure
+ */
+int event_script_chmod(const char *script_dir,
+ const char *script_name,
+ bool executable);
+
+#endif /* __CTDB_SCRIPT_H__ */
diff --git a/ctdb/common/hash_count.c b/ctdb/common/hash_count.c
new file mode 100644
index 0000000..f845016
--- /dev/null
+++ b/ctdb/common/hash_count.c
@@ -0,0 +1,219 @@
+/*
+ Using hash table for counting events
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/time.h"
+
+#include <tdb.h>
+
+#include "lib/util/time.h"
+
+#include "common/db_hash.h"
+#include "common/hash_count.h"
+
+struct hash_count_value {
+ struct timeval update_time;
+ uint64_t counter;
+};
+
+struct hash_count_context {
+ struct db_hash_context *dh;
+ struct timeval update_interval;
+ hash_count_update_handler_fn handler;
+ void *private_data;
+};
+
+/*
+ * Initialise hash count map
+ */
+int hash_count_init(TALLOC_CTX *mem_ctx, struct timeval update_interval,
+ hash_count_update_handler_fn handler, void *private_data,
+ struct hash_count_context **result)
+{
+ struct hash_count_context *hcount;
+ int ret;
+
+ if (handler == NULL) {
+ return EINVAL;
+ }
+
+ hcount = talloc_zero(mem_ctx, struct hash_count_context);
+ if (hcount == NULL) {
+ return ENOMEM;
+ }
+
+ ret = db_hash_init(hcount, "hash_count_db", 8192, DB_HASH_COMPLEX,
+ &hcount->dh);
+ if (ret != 0) {
+ talloc_free(hcount);
+ return ret;
+ }
+
+ hcount->update_interval = update_interval;
+ hcount->handler = handler;
+ hcount->private_data = private_data;
+
+ *result = hcount;
+ return 0;
+}
+
+static int hash_count_fetch_parser(uint8_t *keybuf, size_t keylen,
+ uint8_t *databuf, size_t datalen,
+ void *private_data)
+{
+ struct hash_count_value *value =
+ (struct hash_count_value *)private_data;
+
+ if (datalen != sizeof(struct hash_count_value)) {
+ return EIO;
+ }
+
+ *value = *(struct hash_count_value *)databuf;
+ return 0;
+}
+
+static int hash_count_fetch(struct hash_count_context *hcount, TDB_DATA key,
+ struct hash_count_value *value)
+{
+ return db_hash_fetch(hcount->dh, key.dptr, key.dsize,
+ hash_count_fetch_parser, value);
+}
+
+static int hash_count_insert(struct hash_count_context *hcount, TDB_DATA key,
+ struct hash_count_value *value)
+{
+ return db_hash_insert(hcount->dh, key.dptr, key.dsize,
+ (uint8_t *)value,
+ sizeof(struct hash_count_value));
+}
+
+static int hash_count_update(struct hash_count_context *hcount, TDB_DATA key,
+ struct hash_count_value *value)
+{
+ return db_hash_add(hcount->dh, key.dptr, key.dsize,
+ (uint8_t *)value, sizeof(struct hash_count_value));
+}
+
+int hash_count_increment(struct hash_count_context *hcount, TDB_DATA key)
+{
+ struct hash_count_value value;
+ struct timeval current_time = timeval_current();
+ int ret;
+
+ if (hcount == NULL) {
+ return EINVAL;
+ }
+
+ ret = hash_count_fetch(hcount, key, &value);
+ if (ret == 0) {
+ struct timeval tmp_t;
+
+ tmp_t = timeval_sum(&value.update_time,
+ &hcount->update_interval);
+ if (timeval_compare(&current_time, &tmp_t) < 0) {
+ value.counter += 1;
+ } else {
+ value.update_time = current_time;
+ value.counter = 1;
+ }
+
+ hcount->handler(key, value.counter, hcount->private_data);
+ ret = hash_count_update(hcount, key, &value);
+
+ } else if (ret == ENOENT) {
+ value.update_time = current_time;
+ value.counter = 1;
+
+ hcount->handler(key, value.counter, hcount->private_data);
+ ret = hash_count_insert(hcount, key, &value);
+ }
+
+ return ret;
+}
+
+static struct timeval timeval_subtract(const struct timeval *tv1,
+ const struct timeval *tv2)
+{
+ struct timeval tv = *tv1;
+ const unsigned int million = 1000000;
+
+ if (tv.tv_sec > 1) {
+ tv.tv_sec -= 1;
+ tv.tv_usec += million;
+ } else {
+ return tv;
+ }
+
+ tv.tv_sec -= tv2->tv_sec;
+ tv.tv_usec -= tv2->tv_usec;
+
+ tv.tv_sec += tv.tv_usec / million;
+ tv.tv_usec = tv.tv_usec % million;
+
+ return tv;
+}
+
+struct hash_count_expire_state {
+ struct db_hash_context *dh;
+ struct timeval last_time;
+ int count;
+};
+
+static int hash_count_expire_parser(uint8_t *keybuf, size_t keylen,
+ uint8_t *databuf, size_t datalen,
+ void *private_data)
+{
+ struct hash_count_expire_state *state =
+ (struct hash_count_expire_state *)private_data;
+ struct hash_count_value *value;
+ int ret = 0;
+
+ if (datalen != sizeof(struct hash_count_value)) {
+ return EIO;
+ }
+
+ value = (struct hash_count_value *)databuf;
+ if (timeval_compare(&value->update_time, &state->last_time) < 0) {
+ ret = db_hash_delete(state->dh, keybuf, keylen);
+ if (ret == 0) {
+ state->count += 1;
+ }
+ }
+
+ return ret;
+}
+
+void hash_count_expire(struct hash_count_context *hcount, int *delete_count)
+{
+ struct timeval current_time = timeval_current();
+ struct hash_count_expire_state state;
+
+ state.dh = hcount->dh;
+ state.last_time = timeval_subtract(&current_time,
+ &hcount->update_interval);
+ state.count = 0;
+
+ (void) db_hash_traverse_update(hcount->dh, hash_count_expire_parser,
+ &state, NULL);
+
+ if (delete_count != NULL) {
+ *delete_count = state.count;
+ }
+}
diff --git a/ctdb/common/hash_count.h b/ctdb/common/hash_count.h
new file mode 100644
index 0000000..f14c82c
--- /dev/null
+++ b/ctdb/common/hash_count.h
@@ -0,0 +1,94 @@
+/*
+ Using hash table for counting events
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_HASH_COUNT_H__
+#define __CTDB_HASH_COUNT_H__
+
+/**
+ * @file hash_count.h
+ *
+ * @brief Count key-based events for specified interval
+ *
+ * This can be used to measure the rate of events based on any interval.
+ * For example, number of occurrences per second.
+ */
+
+/**
+ * @brief Handler callback function called when counter is incremented
+ *
+ * This function is called every time a counter is incremented for a key.
+ * The counter argument is the number of times the increment function is
+ * called during a count interval.
+ *
+ * This function should not modify key and data arguments.
+ */
+typedef void (*hash_count_update_handler_fn)(TDB_DATA key, uint64_t counter,
+ void *private_data);
+
+/**
+ * @brief Abstract structure representing hash based counting
+ */
+struct hash_count_context;
+
+/**
+ * @brief Initialize hash counting
+ *
+ * This return a new hash count context which is a talloc context. Freeing
+ * this context will free all the memory associated with hash count.
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] count_interval The time interval for counting events
+ * @param[in] handler Function called when counter is incremented
+ * @param[in] private_data Private data to handler function
+ * @param[out] result The new hash_count structure
+ * @return 0 on success, errno on failure
+ */
+int hash_count_init(TALLOC_CTX *mem_ctx, struct timeval count_interval,
+ hash_count_update_handler_fn handler, void *private_data,
+ struct hash_count_context **result);
+
+/**
+ * @brief Increment a counter for a key
+ *
+ * First time this is called for a key, corresponding counter is set to 1
+ * and the start time is noted. For all subsequent calls made during the
+ * count_interval (used in initializing the context) will increment
+ * corresponding counter for the key. After the count_interval has elapsed,
+ * the counter will be reset to 1.
+ *
+ * @param[in] hcount The hash count context
+ * @param[in] key The key for which counter is updated
+ * @return 0 on success, errno on failure
+ *
+ * This will result in a callback function being called.
+ */
+int hash_count_increment(struct hash_count_context *hcount, TDB_DATA key);
+
+/**
+ * @brief Remove keys for which count interval has elapsed
+ *
+ * This function is used to clean the database of keys for which there are
+ * no recent events.
+ *
+ * @param[in] hcount The hash count context
+ * @param[out] delete_count The number of keys deleted
+ */
+void hash_count_expire(struct hash_count_context *hcount, int *delete_count);
+
+#endif /* __CTDB_HASH_COUNT_H__ */
diff --git a/ctdb/common/line.c b/ctdb/common/line.c
new file mode 100644
index 0000000..c4c6726
--- /dev/null
+++ b/ctdb/common/line.c
@@ -0,0 +1,145 @@
+/*
+ Line based I/O over fds
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <talloc.h>
+
+#include "lib/util/sys_rw.h"
+
+#include "common/line.h"
+
+struct line_read_state {
+ line_process_fn_t callback;
+ void *private_data;
+ char *buf;
+ size_t hint, len, offset;
+ int num_lines;
+};
+
+static bool line_read_one(char *buf, size_t start, size_t len, size_t *pos)
+{
+ size_t i;
+
+ for (i=start; i<len; i++) {
+ if (buf[i] == '\n' || buf[i] == '\0') {
+ *pos = i;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static int line_read_process(struct line_read_state *state)
+{
+ size_t start = 0;
+ size_t pos = 0;
+
+ while (1) {
+ int ret;
+ bool ok;
+
+ ok = line_read_one(state->buf, start, state->offset, &pos);
+ if (! ok) {
+ break;
+ }
+
+ state->buf[pos] = '\0';
+ state->num_lines += 1;
+
+ ret = state->callback(state->buf + start, state->private_data);
+ if (ret != 0) {
+ return ret;
+ }
+
+ start = pos+1;
+ }
+
+ if (pos > 0) {
+ if (pos+1 < state->offset) {
+ memmove(state->buf,
+ state->buf + pos+1,
+ state->offset - (pos+1));
+ }
+ state->offset -= (pos+1);
+ }
+
+ return 0;
+}
+
+int line_read(int fd,
+ size_t length,
+ TALLOC_CTX *mem_ctx,
+ line_process_fn_t callback,
+ void *private_data,
+ int *num_lines)
+{
+ struct line_read_state state;
+
+ if (length < 32) {
+ length = 32;
+ }
+
+ state = (struct line_read_state) {
+ .callback = callback,
+ .private_data = private_data,
+ .hint = length,
+ };
+
+ while (1) {
+ ssize_t n;
+ int ret;
+
+ if (state.offset == state.len) {
+ state.len += state.hint;
+ state.buf = talloc_realloc_size(mem_ctx,
+ state.buf,
+ state.len);
+ if (state.buf == NULL) {
+ return ENOMEM;
+ }
+ }
+
+ n = sys_read(fd,
+ state.buf + state.offset,
+ state.len - state.offset);
+ if (n < 0) {
+ return errno;
+ }
+ if (n == 0) {
+ break;
+ }
+
+ state.offset += n;
+
+ ret = line_read_process(&state);
+ if (ret != 0) {
+ if (num_lines != NULL) {
+ *num_lines = state.num_lines;
+ }
+ return ret;
+ }
+ }
+
+ if (num_lines != NULL) {
+ *num_lines = state.num_lines;
+ }
+ return 0;
+}
diff --git a/ctdb/common/line.h b/ctdb/common/line.h
new file mode 100644
index 0000000..6b67f1e
--- /dev/null
+++ b/ctdb/common/line.h
@@ -0,0 +1,62 @@
+/*
+ Line based I/O over fds
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_LINE_H__
+#define __CTDB_LINE_H__
+
+#include <talloc.h>
+
+/**
+ * @file line.h
+ *
+ * @brief Line based I/O over pipes and sockets
+ */
+
+/**
+ * @brief The callback routine called to process a line
+ *
+ * @param[in] line The line read
+ * @param[in] private_data Private data for callback
+ * @return 0 to continue processing lines, non-zero to stop reading
+ */
+typedef int (*line_process_fn_t)(char *line, void *private_data);
+
+/**
+ * @brief Read a line (terminated by \n or \0)
+ *
+ * If there is any read error on fd, then errno will be returned.
+ * If callback function returns a non-zero value, then that value will be
+ * returned.
+ *
+ * @param[in] fd The file descriptor
+ * @param[in] length The expected length of a line (this is only a hint)
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] callback Callback function called when a line is read
+ * @param[in] private_data Private data for callback
+ * @param[out] num_lines Number of lines read so far
+ * @return 0 on on success, errno on failure
+ */
+int line_read(int fd,
+ size_t length,
+ TALLOC_CTX *mem_ctx,
+ line_process_fn_t callback,
+ void *private_data,
+ int *num_lines);
+
+#endif /* __CTDB_LINE_H__ */
diff --git a/ctdb/common/logging.c b/ctdb/common/logging.c
new file mode 100644
index 0000000..ad6d0c9
--- /dev/null
+++ b/ctdb/common/logging.c
@@ -0,0 +1,745 @@
+/*
+ Logging utilities
+
+ Copyright (C) Andrew Tridgell 2008
+ Copyright (C) Martin Schwenke 2014
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/locale.h"
+#include "system/time.h"
+#include "system/filesys.h"
+#include "system/syslog.h"
+#include "system/dir.h"
+
+#include "lib/util/time_basic.h"
+#include "lib/util/sys_rw.h"
+#include "lib/util/debug.h"
+#include "lib/util/blocking.h"
+#include "lib/util/samba_util.h" /* get_myname() */
+
+#include "common/logging.h"
+
+struct {
+ int log_level;
+ const char *log_string;
+} log_string_map[] = {
+ { DEBUG_ERR, "ERROR" },
+ { DEBUG_WARNING, "WARNING" },
+ { 2, "WARNING" },
+ { DEBUG_NOTICE, "NOTICE" },
+ { 4, "NOTICE" },
+ { DEBUG_INFO, "INFO" },
+ { 6, "INFO" },
+ { 7, "INFO" },
+ { 8, "INFO" },
+ { 9, "INFO" },
+ { DEBUG_DEBUG, "DEBUG" },
+};
+
+bool debug_level_parse(const char *log_string, int *log_level)
+{
+ size_t i;
+
+ if (log_string == NULL) {
+ return false;
+ }
+
+ if (isdigit(log_string[0])) {
+ int level = atoi(log_string);
+
+ if (level >= 0 && (size_t)level < ARRAY_SIZE(log_string_map)) {
+ *log_level = level;
+ return true;
+ }
+ return false;
+ }
+
+ for (i=0; i<ARRAY_SIZE(log_string_map); i++) {
+ if (strncasecmp(log_string_map[i].log_string,
+ log_string, strlen(log_string)) == 0) {
+ *log_level = log_string_map[i].log_level;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+const char *debug_level_to_string(int log_level)
+{
+ size_t i;
+
+ for (i=0; i < ARRAY_SIZE(log_string_map); i++) {
+ if (log_string_map[i].log_level == log_level) {
+ return log_string_map[i].log_string;
+ }
+ }
+ return "UNKNOWN";
+}
+
+int debug_level_from_string(const char *log_string)
+{
+ bool found;
+ int log_level;
+
+ found = debug_level_parse(log_string, &log_level);
+ if (found) {
+ return log_level;
+ }
+
+ /* Default debug level */
+ return DEBUG_ERR;
+}
+
+/*
+ * file logging backend
+ */
+
+static bool file_log_validate(const char *option)
+{
+ char *t, *dir;
+ struct stat st;
+ int ret;
+
+ if (option == NULL || strcmp(option, "-") == 0) {
+ return true;
+ }
+
+ t = strdup(option);
+ if (t == NULL) {
+ return false;
+ }
+
+ dir = dirname(t);
+
+ ret = stat(dir, &st);
+ free(t);
+ if (ret != 0) {
+ return false;
+ }
+
+ if (! S_ISDIR(st.st_mode)) {
+ return false;
+ }
+
+ return true;
+}
+
+static int file_log_setup(TALLOC_CTX *mem_ctx,
+ const char *option,
+ const char *app_name)
+{
+ struct debug_settings settings = {
+ .debug_syslog_format = DEBUG_SYSLOG_FORMAT_ALWAYS,
+ .debug_hires_timestamp = true,
+ .debug_no_stderr_redirect = true,
+ };
+ const char *t = NULL;
+
+ if (option == NULL || strcmp(option, "-") == 0) {
+ /*
+ * Logging to stderr is the default and has already
+ * been done in logging init
+ */
+ return 0;
+ }
+
+ /*
+ * Support logging of fake hostname in local daemons. This
+ * hostname is basename(getenv(CTDB_BASE)).
+ */
+ t = getenv("CTDB_TEST_MODE");
+ if (t != NULL) {
+ t = getenv("CTDB_BASE");
+ if (t != NULL) {
+ const char *p = strrchr(t, '/');
+ if (p != NULL) {
+ p++;
+ if (p[0] == '\0') {
+ p = "unknown";
+ }
+ } else {
+ p = t;
+ }
+
+ debug_set_hostname(p);
+ }
+ }
+
+ debug_set_settings(&settings, "file", 0, false);
+ debug_set_logfile(option);
+ setup_logging(app_name, DEBUG_FILE);
+
+ return 0;
+}
+
+/*
+ * syslog logging backend
+ */
+
+/* Copied from lib/util/debug.c */
+static int debug_level_to_priority(int level)
+{
+ /*
+ * map debug levels to syslog() priorities
+ */
+ static const int priority_map[] = {
+ LOG_ERR, /* 0 */
+ LOG_WARNING, /* 1 */
+ LOG_NOTICE, /* 2 */
+ LOG_NOTICE, /* 3 */
+ LOG_NOTICE, /* 4 */
+ LOG_NOTICE, /* 5 */
+ LOG_INFO, /* 6 */
+ LOG_INFO, /* 7 */
+ LOG_INFO, /* 8 */
+ LOG_INFO, /* 9 */
+ };
+ int priority;
+
+ if ((size_t)level >= ARRAY_SIZE(priority_map) || level < 0) {
+ priority = LOG_DEBUG;
+ } else {
+ priority = priority_map[level];
+ }
+ return priority;
+}
+
+struct syslog_log_state {
+ int fd;
+ const char *app_name;
+ const char *hostname;
+ int (*format)(int dbglevel, struct syslog_log_state *state,
+ const char *str, char *buf, int bsize);
+ /* RFC3164 says: The total length of the packet MUST be 1024
+ bytes or less. */
+ char buffer[1024];
+ unsigned int dropped_count;
+};
+
+/* Format messages as per RFC3164
+ *
+ * It appears that some syslog daemon implementations do not allow a
+ * hostname when messages are sent via a Unix domain socket, so omit
+ * it. Similarly, syslogd on FreeBSD does not understand the hostname
+ * part of the header, even when logging via UDP. Note that most
+ * implementations will log messages against "localhost" when logging
+ * via UDP. A timestamp could be sent but rsyslogd on Linux limits
+ * the timestamp logged to the precision that was received on
+ * /dev/log. It seems sane to send degenerate RFC3164 messages
+ * without a header at all, so that the daemon will generate high
+ * resolution timestamps if configured.
+ */
+static int format_rfc3164(int dbglevel, struct syslog_log_state *state,
+ const char *str, char *buf, int bsize)
+{
+ int pri;
+ int len;
+
+ pri = LOG_DAEMON | debug_level_to_priority(dbglevel);
+ len = snprintf(buf, bsize, "<%d>%s[%u]: %s",
+ pri, state->app_name, getpid(), str);
+ buf[bsize-1] = '\0';
+ len = MIN(len, bsize - 1);
+
+ return len;
+}
+
+/* Format messages as per RFC5424
+ *
+ * <165>1 2003-08-24T05:14:15.000003-07:00 192.0.2.1
+ * myproc 8710 - - %% It's time to make the do-nuts.
+ */
+static int format_rfc5424(int dbglevel, struct syslog_log_state *state,
+ const char *str, char *buf, int bsize)
+{
+ int pri;
+ struct timeval tv;
+ struct timeval_buf tvbuf;
+ int len, s;
+
+ /* Header */
+ pri = LOG_DAEMON | debug_level_to_priority(dbglevel);
+ GetTimeOfDay(&tv);
+ len = snprintf(buf, bsize,
+ "<%d>1 %s %s %s %u - - ",
+ pri, timeval_str_buf(&tv, true, true, &tvbuf),
+ state->hostname, state->app_name, getpid());
+ /* A truncated header is not useful... */
+ if (len >= bsize) {
+ return -1;
+ }
+
+ /* Message */
+ s = snprintf(&buf[len], bsize - len, "%s", str);
+ buf[bsize-1] = '\0';
+ len = MIN(len + s, bsize - 1);
+
+ return len;
+}
+
+static void syslog_log(void *private_data, int level, const char *msg)
+{
+ syslog(debug_level_to_priority(level), "%s", msg);
+}
+
+static int syslog_log_sock_maybe(struct syslog_log_state *state,
+ int level, const char *msg)
+{
+ int n;
+ ssize_t ret;
+
+ n = state->format(level, state, msg, state->buffer,
+ sizeof(state->buffer));
+ if (n == -1) {
+ return E2BIG;
+ }
+
+ do {
+ ret = write(state->fd, state->buffer, n);
+ } while (ret == -1 && errno == EINTR);
+
+ if (ret == -1) {
+ return errno;
+ }
+
+ return 0;
+
+}
+static void syslog_log_sock(void *private_data, int level, const char *msg)
+{
+ struct syslog_log_state *state = talloc_get_type_abort(
+ private_data, struct syslog_log_state);
+ int ret;
+
+ if (state->dropped_count > 0) {
+ char t[64] = { 0 };
+ snprintf(t, sizeof(t),
+ "[Dropped %u log messages]\n",
+ state->dropped_count);
+ t[sizeof(t)-1] = '\0';
+ ret = syslog_log_sock_maybe(state, level, t);
+ if (ret == EAGAIN || ret == EWOULDBLOCK) {
+ state->dropped_count++;
+ /*
+ * If above failed then actually drop the
+ * message that would be logged below, since
+ * it would have been dropped anyway and it is
+ * also likely to fail. Falling through and
+ * attempting to log the message also means
+ * that the dropped message count will be
+ * logged out of order.
+ */
+ return;
+ }
+ if (ret != 0) {
+ /* Silent failure on any other error */
+ return;
+ }
+ state->dropped_count = 0;
+ }
+
+ ret = syslog_log_sock_maybe(state, level, msg);
+ if (ret == EAGAIN || ret == EWOULDBLOCK) {
+ state->dropped_count++;
+ }
+}
+
+static int syslog_log_setup_syslog(TALLOC_CTX *mem_ctx, const char *app_name)
+{
+ openlog(app_name, LOG_PID, LOG_DAEMON);
+
+ debug_set_callback(NULL, syslog_log);
+
+ return 0;
+}
+
+static int syslog_log_state_destructor(struct syslog_log_state *state)
+{
+ if (state->fd != -1) {
+ close(state->fd);
+ state->fd = -1;
+ }
+ return 0;
+}
+
+static int syslog_log_setup_common(TALLOC_CTX *mem_ctx, const char *app_name,
+ struct syslog_log_state **result)
+{
+ struct syslog_log_state *state;
+
+ state = talloc_zero(mem_ctx, struct syslog_log_state);
+ if (state == NULL) {
+ return ENOMEM;
+ }
+
+ state->fd = -1;
+ state->app_name = app_name;
+ talloc_set_destructor(state, syslog_log_state_destructor);
+
+ *result = state;
+ return 0;
+}
+
+#ifdef _PATH_LOG
+static int syslog_log_setup_nonblocking(TALLOC_CTX *mem_ctx,
+ const char *app_name)
+{
+ struct syslog_log_state *state = NULL;
+ struct sockaddr_un dest;
+ int ret;
+
+ ret = syslog_log_setup_common(mem_ctx, app_name, &state);
+ if (ret != 0) {
+ return ret;
+ }
+
+ state->fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+ if (state->fd == -1) {
+ int save_errno = errno;
+ talloc_free(state);
+ return save_errno;
+ }
+
+ dest.sun_family = AF_UNIX;
+ strncpy(dest.sun_path, _PATH_LOG, sizeof(dest.sun_path)-1);
+ ret = connect(state->fd,
+ (struct sockaddr *)&dest, sizeof(dest));
+ if (ret == -1) {
+ int save_errno = errno;
+ talloc_free(state);
+ return save_errno;
+ }
+
+ ret = set_blocking(state->fd, false);
+ if (ret != 0) {
+ int save_errno = errno;
+ talloc_free(state);
+ return save_errno;
+ }
+
+ if (! set_close_on_exec(state->fd)) {
+ int save_errno = errno;
+ talloc_free(state);
+ return save_errno;
+ }
+
+ state->hostname = NULL; /* Make this explicit */
+ state->format = format_rfc3164;
+
+ debug_set_callback(state, syslog_log_sock);
+
+ return 0;
+}
+#endif /* _PATH_LOG */
+
+static int syslog_log_setup_udp(TALLOC_CTX *mem_ctx, const char *app_name,
+ bool rfc5424)
+{
+ struct syslog_log_state *state = NULL;
+ struct sockaddr_in dest;
+ int ret;
+
+ ret = syslog_log_setup_common(mem_ctx, app_name, &state);
+ if (ret != 0) {
+ return ret;
+ }
+
+ state->fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
+ if (state->fd == -1) {
+ int save_errno = errno;
+ talloc_free(state);
+ return save_errno;
+ }
+
+ dest.sin_family = AF_INET;
+ dest.sin_port = htons(514);
+ dest.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ ret = connect(state->fd,
+ (struct sockaddr *)&dest, sizeof(dest));
+ if (ret == -1) {
+ int save_errno = errno;
+ talloc_free(state);
+ return save_errno;
+ }
+
+ if (! set_close_on_exec(state->fd)) {
+ int save_errno = errno;
+ talloc_free(state);
+ return save_errno;
+ }
+
+ state->hostname = get_myname(state);
+ if (state->hostname == NULL) {
+ /* Use a fallback instead of failing initialisation */
+ state->hostname = "localhost";
+ }
+ if (rfc5424) {
+ state->format = format_rfc5424;
+ } else {
+ state->format = format_rfc3164;
+ }
+
+ debug_set_callback(state, syslog_log_sock);
+
+ return 0;
+}
+
+static bool syslog_log_validate(const char *option)
+{
+ if (option == NULL) {
+ return true;
+#ifdef _PATH_LOG
+ } else if (strcmp(option, "nonblocking") == 0) {
+ return true;
+#endif
+ } else if (strcmp(option, "udp") == 0) {
+ return true;
+ } else if (strcmp(option, "udp-rfc5424") == 0) {
+ return true;
+ }
+
+ return false;
+}
+
+static int syslog_log_setup(TALLOC_CTX *mem_ctx, const char *option,
+ const char *app_name)
+{
+ if (option == NULL) {
+ return syslog_log_setup_syslog(mem_ctx, app_name);
+#ifdef _PATH_LOG
+ } else if (strcmp(option, "nonblocking") == 0) {
+ return syslog_log_setup_nonblocking(mem_ctx, app_name);
+#endif
+ } else if (strcmp(option, "udp") == 0) {
+ return syslog_log_setup_udp(mem_ctx, app_name, false);
+ } else if (strcmp(option, "udp-rfc5424") == 0) {
+ return syslog_log_setup_udp(mem_ctx, app_name, true);
+ }
+
+ return EINVAL;
+}
+
+struct log_backend {
+ const char *name;
+ bool (*validate)(const char *option);
+ int (*setup)(TALLOC_CTX *mem_ctx,
+ const char *option,
+ const char *app_name);
+};
+
+static struct log_backend log_backend[] = {
+ {
+ .name = "file",
+ .validate = file_log_validate,
+ .setup = file_log_setup,
+ },
+ {
+ .name = "syslog",
+ .validate = syslog_log_validate,
+ .setup = syslog_log_setup,
+ },
+};
+
+static int log_backend_parse(TALLOC_CTX *mem_ctx,
+ const char *logging,
+ struct log_backend **backend,
+ char **backend_option)
+{
+ struct log_backend *b = NULL;
+ char *t, *name, *option;
+ size_t i;
+
+ t = talloc_strdup(mem_ctx, logging);
+ if (t == NULL) {
+ return ENOMEM;
+ }
+
+ name = strtok(t, ":");
+ if (name == NULL) {
+ talloc_free(t);
+ return EINVAL;
+ }
+ option = strtok(NULL, ":");
+
+ for (i=0; i<ARRAY_SIZE(log_backend); i++) {
+ if (strcmp(log_backend[i].name, name) == 0) {
+ b = &log_backend[i];
+ }
+ }
+
+ if (b == NULL) {
+ talloc_free(t);
+ return ENOENT;
+ }
+
+ *backend = b;
+ if (option != NULL) {
+ *backend_option = talloc_strdup(mem_ctx, option);
+ if (*backend_option == NULL) {
+ talloc_free(t);
+ return ENOMEM;
+ }
+ } else {
+ *backend_option = NULL;
+ }
+
+ talloc_free(t);
+ return 0;
+}
+
+bool logging_validate(const char *logging)
+{
+ TALLOC_CTX *tmp_ctx;
+ struct log_backend *backend;
+ char *option;
+ int ret;
+ bool status;
+
+ tmp_ctx = talloc_new(NULL);
+ if (tmp_ctx == NULL) {
+ return false;
+ }
+
+ ret = log_backend_parse(tmp_ctx, logging, &backend, &option);
+ if (ret != 0) {
+ talloc_free(tmp_ctx);
+ return false;
+ }
+
+ status = backend->validate(option);
+ talloc_free(tmp_ctx);
+ return status;
+}
+
+/* Initialise logging */
+int logging_init(TALLOC_CTX *mem_ctx, const char *logging,
+ const char *debug_level, const char *app_name)
+{
+ struct log_backend *backend = NULL;
+ char *option = NULL;
+ int level;
+ int ret;
+
+ setup_logging(app_name, DEBUG_DEFAULT_STDERR);
+
+ if (debug_level == NULL) {
+ debug_level = getenv("CTDB_DEBUGLEVEL");
+ }
+ if (! debug_level_parse(debug_level, &level)) {
+ return EINVAL;
+ }
+ debuglevel_set(level);
+
+ if (logging == NULL) {
+ logging = getenv("CTDB_LOGGING");
+ }
+ if (logging == NULL || logging[0] == '\0') {
+ return EINVAL;
+ }
+
+ ret = log_backend_parse(mem_ctx, logging, &backend, &option);
+ if (ret != 0) {
+ if (ret == ENOENT) {
+ fprintf(stderr, "Invalid logging option \'%s\'\n",
+ logging);
+ }
+ talloc_free(option);
+ return ret;
+ }
+
+ ret = backend->setup(mem_ctx, option, app_name);
+ talloc_free(option);
+ return ret;
+}
+
+bool logging_reopen_logs(void)
+{
+ bool status;
+
+ status = reopen_logs_internal();
+
+ return status;
+}
+
+struct logging_reopen_logs_data {
+ void (*hook)(void *private_data);
+ void *private_data;
+};
+
+static void logging_sig_hup_handler(struct tevent_context *ev,
+ struct tevent_signal *se,
+ int signum,
+ int count,
+ void *dont_care,
+ void *private_data)
+{
+ bool status;
+
+ if (private_data != NULL) {
+ struct logging_reopen_logs_data *data = talloc_get_type_abort(
+ private_data, struct logging_reopen_logs_data);
+
+ if (data->hook != NULL) {
+ data->hook(data->private_data);
+ }
+ }
+
+ status = logging_reopen_logs();
+ if (!status) {
+ D_WARNING("Failed to reopen logs\n");
+ return;
+ }
+
+ D_NOTICE("Reopened logs\n");
+
+}
+
+bool logging_setup_sighup_handler(struct tevent_context *ev,
+ TALLOC_CTX *talloc_ctx,
+ void (*hook)(void *private_data),
+ void *private_data)
+{
+ struct logging_reopen_logs_data *data = NULL;
+ struct tevent_signal *se;
+
+ if (hook != NULL) {
+ data = talloc(talloc_ctx, struct logging_reopen_logs_data);
+ if (data == NULL) {
+ return false;
+ }
+
+ data->hook = hook;
+ data->private_data = private_data;
+ }
+
+
+ se = tevent_add_signal(ev,
+ talloc_ctx,
+ SIGHUP,
+ 0,
+ logging_sig_hup_handler,
+ data);
+ if (se == NULL) {
+ talloc_free(data);
+ return false;
+ }
+
+ return true;
+}
diff --git a/ctdb/common/logging.h b/ctdb/common/logging.h
new file mode 100644
index 0000000..542b4a3
--- /dev/null
+++ b/ctdb/common/logging.h
@@ -0,0 +1,51 @@
+/*
+ Logging utilities
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_LOGGING_H__
+#define __CTDB_LOGGING_H__
+
+#include <talloc.h>
+#include <tevent.h>
+#include "lib/util/debug.h"
+
+#define DEBUG_ERR DBGLVL_ERR
+#define DEBUG_WARNING DBGLVL_WARNING
+#define DEBUG_NOTICE DBGLVL_NOTICE
+#define DEBUG_INFO DBGLVL_INFO
+#define DEBUG_DEBUG DBGLVL_DEBUG
+
+/* These are used in many places, so define them here to avoid churn */
+#define DEBUG_ALERT DEBUG_ERR
+#define DEBUG_CRIT DEBUG_ERR
+
+bool debug_level_parse(const char *log_string, int *log_level);
+const char *debug_level_to_string(int log_level);
+int debug_level_from_string(const char *log_string);
+
+bool logging_validate(const char *logging);
+int logging_init(TALLOC_CTX *mem_ctx, const char *logging,
+ const char *debuglevel, const char *app_name);
+
+bool logging_reopen_logs(void);
+bool logging_setup_sighup_handler(struct tevent_context *ev,
+ TALLOC_CTX *talloc_ctx,
+ void (*hook)(void *private_data),
+ void *private_data);
+
+#endif /* __CTDB_LOGGING_H__ */
diff --git a/ctdb/common/logging_conf.c b/ctdb/common/logging_conf.c
new file mode 100644
index 0000000..38b3003
--- /dev/null
+++ b/ctdb/common/logging_conf.c
@@ -0,0 +1,127 @@
+/*
+ CTDB logging config handling
+
+ Copyright (C) Martin Schwenke 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <talloc.h>
+
+#include "common/conf.h"
+#include "common/logging.h"
+#include "common/logging_conf.h"
+
+#define LOGGING_LOCATION_DEFAULT "file:" LOGDIR "/log.ctdb"
+#define LOGGING_LOG_LEVEL_DEFAULT "NOTICE"
+
+static bool logging_conf_validate_log_level(const char *key,
+ const char *old_loglevel,
+ const char *new_loglevel,
+ enum conf_update_mode mode)
+{
+ int log_level;
+ bool ok;
+
+ ok = debug_level_parse(new_loglevel, &log_level);
+ if (!ok) {
+ return false;
+ }
+
+ return true;
+}
+
+static bool logging_conf_validate_location(const char *key,
+ const char *old_location,
+ const char *new_location,
+ enum conf_update_mode mode)
+{
+ bool ok;
+
+ ok = logging_validate(new_location);
+ if (!ok) {
+ return false;
+ }
+
+ if (mode == CONF_MODE_RELOAD &&
+ strcmp(old_location, new_location) != 0) {
+ D_WARNING("Ignoring update of %s config option \"%s\"\n",
+ LOGGING_CONF_SECTION, key);
+ return false;
+ }
+
+ return true;
+}
+
+void logging_conf_init(struct conf_context *conf,
+ const char *default_log_level)
+{
+ const char *log_level;
+
+ log_level = (default_log_level == NULL) ?
+ LOGGING_LOG_LEVEL_DEFAULT :
+ default_log_level;
+
+ conf_define_section(conf, LOGGING_CONF_SECTION, NULL);
+
+ conf_define_string(conf,
+ LOGGING_CONF_SECTION,
+ LOGGING_CONF_LOCATION,
+ LOGGING_LOCATION_DEFAULT,
+ logging_conf_validate_location);
+
+ conf_define_string(conf,
+ LOGGING_CONF_SECTION,
+ LOGGING_CONF_LOG_LEVEL,
+ log_level,
+ logging_conf_validate_log_level);
+}
+
+const char *logging_conf_location(struct conf_context *conf)
+{
+ const char *out = NULL;
+ int ret;
+
+ ret = conf_get_string(conf,
+ LOGGING_CONF_SECTION,
+ LOGGING_CONF_LOCATION,
+ &out,
+ NULL);
+ if (ret != 0) {
+ /* Can't really happen, but return default */
+ return LOGGING_LOCATION_DEFAULT;
+ }
+
+ return out;
+}
+
+const char *logging_conf_log_level(struct conf_context *conf)
+{
+ const char *out = NULL;
+ int ret;
+
+ ret = conf_get_string(conf,
+ LOGGING_CONF_SECTION,
+ LOGGING_CONF_LOG_LEVEL,
+ &out,
+ NULL);
+ if (ret != 0) {
+ /* Can't really happen, but return default */
+ return LOGGING_LOG_LEVEL_DEFAULT;
+ }
+
+ return out;
+}
diff --git a/ctdb/common/logging_conf.h b/ctdb/common/logging_conf.h
new file mode 100644
index 0000000..fab478d
--- /dev/null
+++ b/ctdb/common/logging_conf.h
@@ -0,0 +1,36 @@
+/*
+ CTDB logging config handling
+
+ Copyright (C) Martin Schwenke 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __LOGGING_CONF_H__
+#define __LOGGING_CONF_H__
+
+#include "common/conf.h"
+
+#define LOGGING_CONF_SECTION "logging"
+
+#define LOGGING_CONF_LOCATION "location"
+#define LOGGING_CONF_LOG_LEVEL "log level"
+
+void logging_conf_init(struct conf_context *conf,
+ const char *default_log_level);
+
+const char *logging_conf_location(struct conf_context *conf);
+const char *logging_conf_log_level(struct conf_context *conf);
+
+#endif /* __LOGGING_CONF_H__ */
diff --git a/ctdb/common/path.c b/ctdb/common/path.c
new file mode 100644
index 0000000..ea3b08f
--- /dev/null
+++ b/ctdb/common/path.c
@@ -0,0 +1,211 @@
+/*
+ Construct runtime paths
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+
+#include "lib/util/debug.h"
+
+#include "common/path.h"
+
+#define CTDB_CONFIG_FILE "ctdb.conf"
+
+struct {
+ char *basedir;
+ char datadir[PATH_MAX];
+ char etcdir[PATH_MAX];
+ char rundir[PATH_MAX];
+ char vardir[PATH_MAX];
+ bool test_mode;
+ bool basedir_set;
+ bool datadir_set;
+ bool etcdir_set;
+ bool rundir_set;
+ bool vardir_set;
+} ctdb_paths = {
+ .datadir = CTDB_DATADIR,
+ .etcdir = CTDB_ETCDIR,
+ .rundir = CTDB_RUNDIR,
+ .vardir = CTDB_VARDIR,
+};
+
+static void path_set_basedir(void)
+{
+ const char *t;
+
+ t = getenv("CTDB_TEST_MODE");
+ if (t == NULL) {
+ goto done;
+ }
+
+ ctdb_paths.test_mode = true;
+
+ ctdb_paths.basedir = getenv("CTDB_BASE");
+ if (ctdb_paths.basedir == NULL) {
+ D_ERR("Broken CTDB setup, CTDB_BASE not set in test mode\n");
+ abort();
+ }
+
+done:
+ ctdb_paths.basedir_set = true;
+}
+
+static bool path_construct(char *path, const char *subdir)
+{
+ char p[PATH_MAX];
+ int len;
+
+ if (! ctdb_paths.basedir_set) {
+ path_set_basedir();
+ }
+
+ if (! ctdb_paths.test_mode) {
+ return true;
+ }
+
+ if (subdir == NULL) {
+ len = snprintf(p, sizeof(p), "%s", ctdb_paths.basedir);
+ } else {
+ len = snprintf(p,
+ sizeof(p),
+ "%s/%s",
+ ctdb_paths.basedir,
+ subdir);
+ }
+
+ if ((size_t)len >= sizeof(p)) {
+ return false;
+ }
+
+ strncpy(path, p, PATH_MAX);
+ return true;
+}
+
+const char *path_datadir(void)
+{
+ bool ok;
+
+ if (! ctdb_paths.datadir_set) {
+ ok = path_construct(ctdb_paths.datadir, "share");
+ if (!ok) {
+ D_ERR("Failed to construct DATADIR\n");
+ } else {
+ ctdb_paths.datadir_set = true;
+ }
+ }
+
+ return ctdb_paths.datadir;
+}
+
+const char *path_etcdir(void)
+{
+ bool ok;
+
+ if (! ctdb_paths.etcdir_set) {
+ ok = path_construct(ctdb_paths.etcdir, NULL);
+ if (!ok) {
+ D_ERR("Failed to construct ETCDIR\n");
+ } else {
+ ctdb_paths.etcdir_set = true;
+ }
+ }
+
+ return ctdb_paths.etcdir;
+}
+
+const char *path_rundir(void)
+{
+ bool ok;
+
+ if (! ctdb_paths.rundir_set) {
+ ok = path_construct(ctdb_paths.rundir, "run");
+ if (!ok) {
+ D_ERR("Failed to construct RUNDIR\n");
+ } else {
+ ctdb_paths.rundir_set = true;
+ }
+ }
+
+ return ctdb_paths.rundir;
+}
+
+const char *path_vardir(void)
+{
+ bool ok;
+
+ if (! ctdb_paths.vardir_set) {
+ ok = path_construct(ctdb_paths.vardir, "var");
+ if (!ok) {
+ D_ERR("Failed to construct VARDIR\n");
+ } else {
+ ctdb_paths.vardir_set = true;
+ }
+ }
+
+ return ctdb_paths.vardir;
+}
+
+char *path_datadir_append(TALLOC_CTX *mem_ctx, const char *path)
+{
+ return talloc_asprintf(mem_ctx, "%s/%s", path_datadir(), path);
+}
+
+char *path_etcdir_append(TALLOC_CTX *mem_ctx, const char *path)
+{
+ return talloc_asprintf(mem_ctx, "%s/%s", path_etcdir(), path);
+}
+
+char *path_rundir_append(TALLOC_CTX *mem_ctx, const char *path)
+{
+ return talloc_asprintf(mem_ctx, "%s/%s", path_rundir(), path);
+}
+
+char *path_vardir_append(TALLOC_CTX *mem_ctx, const char *path)
+{
+ return talloc_asprintf(mem_ctx, "%s/%s", path_vardir(), path);
+}
+
+char *path_config(TALLOC_CTX *mem_ctx)
+{
+ return path_etcdir_append(mem_ctx, CTDB_CONFIG_FILE);
+}
+
+char *path_socket(TALLOC_CTX *mem_ctx, const char *daemon)
+{
+ if (strcmp(daemon, "ctdbd") == 0) {
+ const char *t = getenv("CTDB_SOCKET");
+
+ if (t != NULL) {
+ return talloc_strdup(mem_ctx, t);
+ }
+ }
+
+ return talloc_asprintf(mem_ctx,
+ "%s/%s.socket",
+ path_rundir(),
+ daemon);
+}
+
+char *path_pidfile(TALLOC_CTX *mem_ctx, const char *daemon)
+{
+ return talloc_asprintf(mem_ctx,
+ "%s/%s.pid",
+ path_rundir(),
+ daemon);
+}
diff --git a/ctdb/common/path.h b/ctdb/common/path.h
new file mode 100644
index 0000000..dcc6c20
--- /dev/null
+++ b/ctdb/common/path.h
@@ -0,0 +1,39 @@
+/*
+ Construct runtime paths
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_PATH_H__
+#define __CTDB_PATH_H__
+
+#include <talloc.h>
+
+const char *path_datadir(void);
+const char *path_etcdir(void);
+const char *path_rundir(void);
+const char *path_vardir(void);
+
+char *path_datadir_append(TALLOC_CTX *mem_ctx, const char *path);
+char *path_etcdir_append(TALLOC_CTX *mem_ctx, const char *path);
+char *path_rundir_append(TALLOC_CTX *mem_ctx, const char *path);
+char *path_vardir_append(TALLOC_CTX *mem_ctx, const char *path);
+
+char *path_config(TALLOC_CTX *mem_ctx);
+char *path_socket(TALLOC_CTX *mem_ctx, const char *daemon);
+char *path_pidfile(TALLOC_CTX *mem_ctx, const char *daemon);
+
+#endif /* __CTDB_PATH_H__ */
diff --git a/ctdb/common/path_tool.c b/ctdb/common/path_tool.c
new file mode 100644
index 0000000..44d29b6
--- /dev/null
+++ b/ctdb/common/path_tool.c
@@ -0,0 +1,384 @@
+/*
+ path tool
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <talloc.h>
+
+#include "lib/util/debug.h"
+
+#include "common/logging.h"
+#include "common/cmdline.h"
+#include "common/path.h"
+#include "common/path_tool.h"
+
+struct path_tool_context {
+ struct cmdline_context *cmdline;
+};
+
+static int path_tool_config(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct path_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct path_tool_context);
+
+ if (argc != 0) {
+ cmdline_usage(ctx->cmdline, "config");
+ return EINVAL;
+ }
+
+ printf("%s\n", path_config(mem_ctx));
+
+ return 0;
+}
+
+static int path_tool_pidfile(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct path_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct path_tool_context);
+ char *p;
+
+ if (argc != 1) {
+ cmdline_usage(ctx->cmdline, "pidfile");
+ return EINVAL;
+ }
+
+ p = path_pidfile(mem_ctx, argv[0]);
+ if (p == NULL) {
+ D_ERR("Memory allocation error\n");
+ return 1;
+ }
+
+ printf("%s\n", p);
+
+ return 0;
+}
+
+static int path_tool_socket(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct path_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct path_tool_context);
+ char *p;
+
+ if (argc != 1) {
+ cmdline_usage(ctx->cmdline, "socket");
+ return EINVAL;
+ }
+
+ p = path_socket(mem_ctx, argv[0]);
+ if (p == NULL) {
+ D_ERR("Memory allocation error\n");
+ return 1;
+ }
+
+ printf("%s\n", p);
+
+ return 0;
+}
+
+static int path_tool_datadir(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct path_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct path_tool_context);
+
+ if (argc != 0) {
+ cmdline_usage(ctx->cmdline, "datadir");
+ return EINVAL;
+ }
+
+ printf("%s\n", path_datadir());
+
+ return 0;
+}
+
+static int path_tool_datadir_append(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct path_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct path_tool_context);
+ char *p;
+
+ if (argc != 1) {
+ cmdline_usage(ctx->cmdline, "datadir append");
+ return EINVAL;
+ }
+
+ p = path_datadir_append(mem_ctx, argv[0]);
+ if (p == NULL) {
+ D_ERR("Memory allocation error\n");
+ return 1;
+ }
+
+ printf("%s\n", p);
+
+ return 0;
+}
+
+static int path_tool_etcdir(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct path_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct path_tool_context);
+
+ if (argc != 0) {
+ cmdline_usage(ctx->cmdline, "etcdir");
+ return EINVAL;
+ }
+
+ printf("%s\n", path_etcdir());
+
+ return 0;
+}
+
+static int path_tool_etcdir_append(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct path_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct path_tool_context);
+ char *p;
+
+ if (argc != 1) {
+ cmdline_usage(ctx->cmdline, "etcdir append");
+ return EINVAL;
+ }
+
+ p = path_etcdir_append(mem_ctx, argv[0]);
+ if (p == NULL) {
+ D_ERR("Memory allocation error\n");
+ return 1;
+ }
+
+ printf("%s\n", p);
+
+ return 0;
+}
+
+static int path_tool_rundir(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct path_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct path_tool_context);
+
+ if (argc != 0) {
+ cmdline_usage(ctx->cmdline, "rundir");
+ return EINVAL;
+ }
+
+ printf("%s\n", path_rundir());
+
+ return 0;
+}
+
+static int path_tool_rundir_append(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct path_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct path_tool_context);
+ char *p;
+
+ if (argc != 1) {
+ cmdline_usage(ctx->cmdline, "rundir append");
+ return EINVAL;
+ }
+
+ p = path_rundir_append(mem_ctx, argv[0]);
+ if (p == NULL) {
+ D_ERR("Memory allocation error\n");
+ return 1;
+ }
+
+ printf("%s\n", p);
+
+ return 0;
+}
+
+static int path_tool_vardir(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct path_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct path_tool_context);
+
+ if (argc != 0) {
+ cmdline_usage(ctx->cmdline, "vardir");
+ return EINVAL;
+ }
+
+ printf("%s\n", path_vardir());
+
+ return 0;
+}
+
+static int path_tool_vardir_append(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct path_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct path_tool_context);
+ char *p;
+
+ if (argc != 1) {
+ cmdline_usage(ctx->cmdline, "vardir append");
+ return EINVAL;
+ }
+
+ p = path_vardir_append(mem_ctx, argv[0]);
+ if (p == NULL) {
+ D_ERR("Memory allocation error\n");
+ return 1;
+ }
+
+ printf("%s\n", p);
+
+ return 0;
+}
+
+struct cmdline_command path_commands[] = {
+ { "config", path_tool_config,
+ "Get path of CTDB config file", NULL },
+ { "pidfile", path_tool_pidfile,
+ "Get path of CTDB daemon pidfile", "<daemon>" },
+ { "socket", path_tool_socket,
+ "Get path of CTDB daemon socket", "<daemon>" },
+ { "datadir append", path_tool_datadir_append,
+ "Get path relative to CTDB DATADIR", "<path>" },
+ { "datadir", path_tool_datadir,
+ "Get path of CTDB DATADIR", NULL },
+ { "etcdir append", path_tool_etcdir_append,
+ "Get path relative to CTDB ETCDIR", "<path>" },
+ { "etcdir", path_tool_etcdir,
+ "Get path of CTDB ETCDIR", NULL },
+ { "rundir append", path_tool_rundir_append,
+ "Get path relative to CTDB RUNDIR", "<path>" },
+ { "rundir", path_tool_rundir,
+ "Get path of CTDB RUNDIR", NULL },
+ { "vardir append", path_tool_vardir_append,
+ "Get path relative to CTDB VARDIR", "<path>" },
+ { "vardir", path_tool_vardir,
+ "Get path of CTDB VARDIR", NULL },
+ CMDLINE_TABLEEND
+};
+
+int path_tool_init(TALLOC_CTX *mem_ctx,
+ const char *prog,
+ struct poptOption *options,
+ int argc,
+ const char **argv,
+ bool parse_options,
+ struct path_tool_context **result)
+{
+ struct path_tool_context *ctx;
+ int ret;
+
+ ctx = talloc_zero(mem_ctx, struct path_tool_context);
+ if (ctx == NULL) {
+ D_ERR("Memory allocation error\n");
+ return ENOMEM;
+ }
+
+ ret = cmdline_init(ctx,
+ prog,
+ options,
+ NULL,
+ path_commands,
+ &ctx->cmdline);
+ if (ret != 0) {
+ D_ERR("Failed to initialize cmdline, ret=%d\n", ret);
+ talloc_free(ctx);
+ return ret;
+ }
+
+ ret = cmdline_parse(ctx->cmdline, argc, argv, parse_options);
+ if (ret != 0) {
+ cmdline_usage(ctx->cmdline, NULL);
+ talloc_free(ctx);
+ return ret;
+ }
+
+ *result = ctx;
+ return 0;
+}
+
+int path_tool_run(struct path_tool_context *ctx, int *result)
+{
+ return cmdline_run(ctx->cmdline, ctx, result);
+}
+
+#ifdef CTDB_PATH_TOOL
+
+int main(int argc, const char **argv)
+{
+ TALLOC_CTX *mem_ctx;
+ struct path_tool_context *ctx;
+ int ret, result;
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ret = path_tool_init(mem_ctx,
+ "ctdb-path",
+ NULL,
+ argc,
+ argv,
+ true,
+ &ctx);
+ if (ret != 0) {
+ talloc_free(mem_ctx);
+ exit(1);
+ }
+
+ setup_logging("ctdb-path", DEBUG_STDERR);
+ debuglevel_set(DEBUG_ERR);
+
+ ret = path_tool_run(ctx, &result);
+ if (ret != 0) {
+ result = 1;
+ }
+
+ talloc_free(mem_ctx);
+ exit(result);
+}
+
+#endif /* CTDB_PATH_TOOL */
diff --git a/ctdb/common/path_tool.h b/ctdb/common/path_tool.h
new file mode 100644
index 0000000..bc6ea62
--- /dev/null
+++ b/ctdb/common/path_tool.h
@@ -0,0 +1,38 @@
+/*
+ path tool
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_PATH_TOOL__
+#define __CTDB_PATH_TOOL__
+
+#include <talloc.h>
+#include <popt.h>
+
+struct path_tool_context;
+
+int path_tool_init(TALLOC_CTX *mem_ctx,
+ const char *prog,
+ struct poptOption *options,
+ int argc,
+ const char **argv,
+ bool parse_options,
+ struct path_tool_context **result);
+
+int path_tool_run(struct path_tool_context *ctx, int *result);
+
+#endif /* __CTDB_PATH_TOOL__ */
diff --git a/ctdb/common/pidfile.c b/ctdb/common/pidfile.c
new file mode 100644
index 0000000..47589f4
--- /dev/null
+++ b/ctdb/common/pidfile.c
@@ -0,0 +1,85 @@
+/*
+ Create and remove pidfile
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+
+#include <talloc.h>
+
+#include "lib/util/blocking.h"
+#include "lib/util/pidfile.h"
+
+#include "common/pidfile.h"
+
+struct pidfile_context {
+ const char *pidfile;
+ int fd;
+ pid_t pid;
+};
+
+static int pidfile_context_destructor(struct pidfile_context *pid_ctx);
+
+int pidfile_context_create(TALLOC_CTX *mem_ctx, const char *pidfile,
+ struct pidfile_context **result)
+{
+ struct pidfile_context *pid_ctx;
+ int fd, ret = 0;
+
+ pid_ctx = talloc_zero(mem_ctx, struct pidfile_context);
+ if (pid_ctx == NULL) {
+ return ENOMEM;
+ }
+
+ pid_ctx->pidfile = talloc_strdup(pid_ctx, pidfile);
+ if (pid_ctx->pidfile == NULL) {
+ ret = ENOMEM;
+ goto fail;
+ }
+
+ pid_ctx->pid = getpid();
+
+ ret = pidfile_path_create(pid_ctx->pidfile, &fd, NULL);
+ if (ret != 0) {
+ return ret;
+ }
+
+ pid_ctx->fd = fd;
+
+ talloc_set_destructor(pid_ctx, pidfile_context_destructor);
+
+ *result = pid_ctx;
+ return 0;
+
+fail:
+ talloc_free(pid_ctx);
+ return ret;
+}
+
+static int pidfile_context_destructor(struct pidfile_context *pid_ctx)
+{
+ if (getpid() != pid_ctx->pid) {
+ return 0;
+ }
+
+ (void) unlink(pid_ctx->pidfile);
+
+ pidfile_fd_close(pid_ctx->fd);
+
+ return 0;
+}
diff --git a/ctdb/common/pidfile.h b/ctdb/common/pidfile.h
new file mode 100644
index 0000000..bc4e3a7
--- /dev/null
+++ b/ctdb/common/pidfile.h
@@ -0,0 +1,51 @@
+/*
+ Create and remove pidfile
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_PIDFILE_H__
+#define __CTDB_PIDFILE_H__
+
+#include <talloc.h>
+
+/**
+ * @file pidfile.h
+ *
+ * @brief Routines to manage PID file
+ */
+
+/**
+ * @brief Abstract struct to store pidfile details
+ */
+struct pidfile_context;
+
+/**
+ * @brief Create a PID file
+ *
+ * This creates a PID file, locks it, and writes PID.
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] pidfile Path of PID file
+ * @param[out] result Pidfile context
+ * @return 0 on success, errno on failure
+ *
+ * Freeing the pidfile_context, will delete the pidfile.
+ */
+int pidfile_context_create(TALLOC_CTX *mem_ctx, const char *pidfile,
+ struct pidfile_context **result);
+
+#endif /* __CTDB_PIDFILE_H__ */
diff --git a/ctdb/common/pkt_read.c b/ctdb/common/pkt_read.c
new file mode 100644
index 0000000..212ace5
--- /dev/null
+++ b/ctdb/common/pkt_read.c
@@ -0,0 +1,190 @@
+/*
+ Reading packets using fixed and dynamic buffer
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+/* This is similar to read_packet abstraction. The main different is that
+ * tevent fd event is created only once.
+ */
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/tevent_unix.h"
+
+#include "pkt_read.h"
+
+/*
+ * Read a packet using fixed buffer
+ */
+
+struct pkt_read_state {
+ int fd;
+ uint8_t *buf;
+ size_t buflen;
+ size_t nread, total;
+ bool use_fixed;
+ ssize_t (*more)(uint8_t *buf, size_t buflen, void *private_data);
+ void *private_data;
+};
+
+struct tevent_req *pkt_read_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ int fd, size_t initial,
+ uint8_t *buf, size_t buflen,
+ ssize_t (*more)(uint8_t *buf,
+ size_t buflen,
+ void *private_data),
+ void *private_data)
+{
+ struct tevent_req *req;
+ struct pkt_read_state *state;
+
+ req = tevent_req_create(mem_ctx, &state, struct pkt_read_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->fd = fd;
+
+ if (buf == NULL || buflen == 0) {
+ state->use_fixed = false;
+ state->buf = talloc_array(state, uint8_t, initial);
+ if (state->buf == NULL) {
+ talloc_free(req);
+ return NULL;
+ }
+ state->buflen = initial;
+ } else {
+ state->use_fixed = true;
+ state->buf = buf;
+ state->buflen = buflen;
+ }
+
+ state->nread = 0;
+ state->total = initial;
+
+ state->more = more;
+ state->private_data = private_data;
+
+ return req;
+}
+
+void pkt_read_handler(struct tevent_context *ev, struct tevent_fd *fde,
+ uint16_t flags, struct tevent_req *req)
+{
+ struct pkt_read_state *state = tevent_req_data(
+ req, struct pkt_read_state);
+ ssize_t nread, more;
+ uint8_t *tmp;
+
+ nread = read(state->fd, state->buf + state->nread,
+ state->total - state->nread);
+ if ((nread == -1) && (errno == EINTR)) {
+ /* retry */
+ return;
+ }
+ if (nread == -1) {
+ tevent_req_error(req, errno);
+ return;
+ }
+ if (nread == 0) {
+ /* fd closed */
+ tevent_req_error(req, EPIPE);
+ return;
+ }
+
+ state->nread += nread;
+ if (state->nread < state->total) {
+ /* come back later */
+ return;
+ }
+
+ /* Check if "more" asks for more data */
+ if (state->more == NULL) {
+ tevent_req_done(req);
+ return;
+ }
+
+ more = state->more(state->buf, state->nread, state->private_data);
+ if (more == -1) {
+ /* invalid packet */
+ tevent_req_error(req, EIO);
+ return;
+ }
+ if (more == 0) {
+ tevent_req_done(req);
+ return;
+ }
+
+ if (state->total + more < state->total) {
+ /* int wrapped */
+ tevent_req_error(req, EMSGSIZE);
+ return;
+ }
+
+ if (state->total + more < state->buflen) {
+ /* continue using fixed buffer */
+ state->total += more;
+ return;
+ }
+
+ if (state->use_fixed) {
+ /* switch to dynamic buffer */
+ tmp = talloc_array(state, uint8_t, state->total + more);
+ if (tevent_req_nomem(tmp, req)) {
+ return;
+ }
+
+ memcpy(tmp, state->buf, state->total);
+ state->use_fixed = false;
+ } else {
+ tmp = talloc_realloc(state, state->buf, uint8_t,
+ state->total + more);
+ if (tevent_req_nomem(tmp, req)) {
+ return;
+ }
+ }
+
+ state->buf = tmp;
+ state->buflen = state->total + more;
+ state->total += more;
+}
+
+ssize_t pkt_read_recv(struct tevent_req *req, TALLOC_CTX *mem_ctx,
+ uint8_t **pbuf, bool *free_buf, int *perrno)
+{
+ struct pkt_read_state *state = tevent_req_data(
+ req, struct pkt_read_state);
+
+ if (tevent_req_is_unix_error(req, perrno)) {
+ return -1;
+ }
+
+ if (state->use_fixed) {
+ *pbuf = state->buf;
+ *free_buf = false;
+ } else {
+ *pbuf = talloc_steal(mem_ctx, state->buf);
+ *free_buf = true;
+ }
+
+ return state->total;
+}
diff --git a/ctdb/common/pkt_read.h b/ctdb/common/pkt_read.h
new file mode 100644
index 0000000..25d4a51
--- /dev/null
+++ b/ctdb/common/pkt_read.h
@@ -0,0 +1,98 @@
+/*
+ API for reading packets using fixed and dynamic buffer
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_PKT_READ_H__
+#define __CTDB_PKT_READ_H__
+
+#include <talloc.h>
+#include <tevent.h>
+
+/**
+ * @file pkt_read.h
+ *
+ * @brief Read a packet using fixed size buffer or allocated memory.
+ *
+ * CTDB communication uses lots of small packets. This abstraction avoids the
+ * need to allocate memory for small packets. Only if the received packet is
+ * larger than the fixed memory buffer, use talloc to allocate memory.
+ */
+
+/**
+ * @brief Start async computation to read a packet
+ *
+ * This returns a tevent request to read a packet from given fd. The fd
+ * should be nonblocking. Freeing this request will free all the memory
+ * associated with the request.
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] fd The non-blocking file/socket descriptor to read from
+ * @param[in] initial Initial amount of data to read
+ * @param[in] buf The static buffer to read data in
+ * @param[in] buflen The size of the static buffer
+ * @param[in] more The function to check if the bytes read forms a packet
+ * @param[in] private_data Private data to pass to more function
+ * @return new tevent request or NULL on failure
+ */
+struct tevent_req *pkt_read_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ int fd, size_t initial,
+ uint8_t *buf, size_t buflen,
+ ssize_t (*more)(uint8_t *buf,
+ size_t buflen,
+ void *private_data),
+ void *private_data);
+
+/**
+ * @brief Function to actually read data from the socket
+ *
+ * This function should be called, when tevent fd event is triggered. This
+ * function has the syntax of tevent_fd_handler_t. The private_data for this
+ * function is the tevent request created by pkt_read_send function.
+ *
+ * @param[in] ev Tevent context
+ * @param[in] fde Tevent fd context
+ * @param[in] flags Tevent fd flags
+ * @param[in] req The active tevent request
+ */
+void pkt_read_handler(struct tevent_context *ev, struct tevent_fd *fde,
+ uint16_t flags, struct tevent_req *req);
+
+/**
+ * @brief Retrieve a packet
+ *
+ * This function returns the pkt read from fd.
+ *
+ * @param[in] req Tevent request
+ * @param[in] mem_ctx Talloc memory context
+ * @param[out] pbuf The pointer to the buffer
+ * @param[out] free_buf Boolean to indicate that caller should free buffer
+ * @param[out] perrno errno in case of failure
+ * @return the size of the pkt, or -1 on failure
+ *
+ * If the pkt data is dynamically allocated, then it is moved under the
+ * specified talloc memory context and free_buf is set to true. It is the
+ * responsibility of the caller to the free the memory returned.
+ *
+ * If the pkt data is stored in the fixed buffer, then free_buf is set to false.
+ */
+ssize_t pkt_read_recv(struct tevent_req *req, TALLOC_CTX *mem_ctx,
+ uint8_t **pbuf, bool *free_buf, int *perrno);
+
+#endif /* __CTDB_PKT_READ_H__ */
diff --git a/ctdb/common/pkt_write.c b/ctdb/common/pkt_write.c
new file mode 100644
index 0000000..b1c1730
--- /dev/null
+++ b/ctdb/common/pkt_write.c
@@ -0,0 +1,101 @@
+/*
+ Write a packet
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/tevent_unix.h"
+
+#include "pkt_write.h"
+
+/*
+ * Write a packet
+ */
+
+struct pkt_write_state {
+ int fd;
+ uint8_t *buf;
+ size_t buflen, offset;
+};
+
+struct tevent_req *pkt_write_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ int fd, uint8_t *buf, size_t buflen)
+{
+ struct tevent_req *req;
+ struct pkt_write_state *state;
+
+ req = tevent_req_create(mem_ctx, &state, struct pkt_write_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->fd = fd;
+ state->buf = buf;
+ state->buflen = buflen;
+ state->offset = 0;
+
+ return req;
+}
+
+void pkt_write_handler(struct tevent_context *ev, struct tevent_fd *fde,
+ uint16_t flags, struct tevent_req *req)
+{
+ struct pkt_write_state *state = tevent_req_data(
+ req, struct pkt_write_state);
+ ssize_t nwritten;
+
+ nwritten = write(state->fd, state->buf + state->offset,
+ state->buflen - state->offset);
+ if ((nwritten == -1) && (errno == EINTR)) {
+ /* retry */
+ return;
+ }
+ if (nwritten == -1) {
+ tevent_req_error(req, errno);
+ return;
+ }
+ if (nwritten == 0) {
+ /* retry */
+ return;
+ }
+
+ state->offset += nwritten;
+ if (state->offset < state->buflen) {
+ /* come back later */
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+ssize_t pkt_write_recv(struct tevent_req *req, int *perrno)
+{
+ struct pkt_write_state *state = tevent_req_data(
+ req, struct pkt_write_state);
+
+ if (tevent_req_is_unix_error(req, perrno)) {
+ return -1;
+ }
+
+ return state->offset;
+}
diff --git a/ctdb/common/pkt_write.h b/ctdb/common/pkt_write.h
new file mode 100644
index 0000000..19d8045
--- /dev/null
+++ b/ctdb/common/pkt_write.h
@@ -0,0 +1,79 @@
+/*
+ API for writing a packet
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_PKT_WRITE_H__
+#define __CTDB_PKT_WRITE_H__
+
+#include <talloc.h>
+#include <tevent.h>
+
+/**
+ * @file pkt_write.h
+ *
+ * @brief Write a packet.
+ *
+ * Write a complete packet with possibly multiple system calls.
+ */
+
+/**
+ * @brief Start async computation to write a packet
+ *
+ * This returns a tevent request to write a packet to given fd. The fd
+ * should be nonblocking. Freeing this request will free all the memory
+ * associated with the request.
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] fd The non-blocking file/socket descriptor to write to
+ * @param[in] buf The data
+ * @param[in] buflen The size of the data
+ * @return new tevent request or NULL on failure
+ */
+struct tevent_req *pkt_write_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ int fd, uint8_t *buf, size_t buflen);
+
+/**
+ * @brief Function to actually write data to the socket
+ *
+ * This function should be called, when tevent fd event is triggered
+ * for TEVENT_FD_WRITE event. This function has the syntax of
+ * tevent_fd_handler_t. The private_data for this function is the tevent
+ * request created by pkt_write_send function.
+ *
+ * @param[in] ev Tevent context
+ * @param[in] fde Tevent fd context
+ * @param[in] flags Tevent fd flags
+ * @param[in] req The active tevent request
+ */
+void pkt_write_handler(struct tevent_context *ev, struct tevent_fd *fde,
+ uint16_t flags, struct tevent_req *req);
+
+/**
+ * @brief Packet is sent
+ *
+ * This function returns the number of bytes written.
+ *
+ * @param[in] req Tevent request
+ * @param[out] perrno errno in case of failure
+ * @return the number of bytes written, or -1 on failure
+ */
+ssize_t pkt_write_recv(struct tevent_req *req, int *perrno);
+
+#endif /* __CTDB_PKT_WRITE_H__ */
diff --git a/ctdb/common/rb_tree.c b/ctdb/common/rb_tree.c
new file mode 100644
index 0000000..8e13dff
--- /dev/null
+++ b/ctdb/common/rb_tree.c
@@ -0,0 +1,1101 @@
+/*
+ a talloc based red-black tree
+
+ Copyright (C) Ronnie Sahlberg 2007
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <talloc.h>
+
+#include "lib/util/debug.h"
+
+#include "common/logging.h"
+#include "common/rb_tree.h"
+
+#define NO_MEMORY_FATAL(p) do { if (!(p)) { \
+ DEBUG(DEBUG_CRIT,("Out of memory for %s at %s\n", #p, __location__)); \
+ exit(10); \
+ }} while (0)
+
+
+static void
+tree_destructor_traverse_node(TALLOC_CTX *mem_ctx, trbt_node_t *node)
+{
+ talloc_set_destructor(node, NULL);
+ if (node->left) {
+ tree_destructor_traverse_node(mem_ctx, node->left);
+ }
+ if (node->right) {
+ tree_destructor_traverse_node(mem_ctx, node->right);
+ }
+ talloc_steal(mem_ctx, node);
+}
+
+/*
+ destroy a tree and remove all its nodes
+ */
+static int tree_destructor(trbt_tree_t *tree)
+{
+ TALLOC_CTX *tmp_ctx;
+ trbt_node_t *node;
+
+ if (tree == NULL) {
+ return 0;
+ }
+
+ node=tree->root;
+ if (node == NULL) {
+ return 0;
+ }
+
+ /* traverse the tree and remove the node destructor and steal
+ the node to the temporary context.
+ we don't want to use the existing destructor for the node
+ since that will remove the nodes one by one from the tree.
+ since the entire tree will be completely destroyed we don't care
+ if it is inconsistent or unbalanced while freeing the
+ individual nodes
+ */
+ tmp_ctx = talloc_new(NULL);
+ tree_destructor_traverse_node(tmp_ctx, node);
+ talloc_free(tmp_ctx);
+
+ return 0;
+}
+
+
+/* create a red black tree */
+trbt_tree_t *
+trbt_create(TALLOC_CTX *memctx, uint32_t flags)
+{
+ trbt_tree_t *tree;
+
+ tree = talloc_zero(memctx, trbt_tree_t);
+ NO_MEMORY_FATAL(tree);
+
+ /* If the tree is freed, we must walk over all entries and steal the
+ node from the stored data pointer and release the node.
+ Note, when we free the tree we only free the tree and not any of
+ the data stored in the tree.
+ */
+ talloc_set_destructor(tree, tree_destructor);
+ tree->flags = flags;
+
+ return tree;
+}
+
+static inline trbt_node_t *
+trbt_parent(trbt_node_t *node)
+{
+ return node->parent;
+}
+
+static inline trbt_node_t *
+trbt_grandparent(trbt_node_t *node)
+{
+ trbt_node_t *parent;
+
+ parent=trbt_parent(node);
+ if(parent){
+ return parent->parent;
+ }
+ return NULL;
+}
+
+static inline trbt_node_t *
+trbt_uncle(trbt_node_t *node)
+{
+ trbt_node_t *parent, *grandparent;
+
+ parent=trbt_parent(node);
+ if(!parent){
+ return NULL;
+ }
+ grandparent=trbt_parent(parent);
+ if(!grandparent){
+ return NULL;
+ }
+ if(parent==grandparent->left){
+ return grandparent->right;
+ }
+ return grandparent->left;
+}
+
+
+static inline void trbt_insert_case1(trbt_tree_t *tree, trbt_node_t *node);
+static inline void trbt_insert_case2(trbt_tree_t *tree, trbt_node_t *node);
+
+static inline void
+trbt_rotate_left(trbt_node_t *node)
+{
+ trbt_tree_t *tree = node->tree;
+
+ if(node->parent){
+ if(node->parent->left==node){
+ node->parent->left=node->right;
+ } else {
+ node->parent->right=node->right;
+ }
+ } else {
+ tree->root=node->right;
+ }
+ node->right->parent=node->parent;
+ node->parent=node->right;
+ node->right=node->right->left;
+ if(node->right){
+ node->right->parent=node;
+ }
+ node->parent->left=node;
+}
+
+static inline void
+trbt_rotate_right(trbt_node_t *node)
+{
+ trbt_tree_t *tree = node->tree;
+
+ if(node->parent){
+ if(node->parent->left==node){
+ node->parent->left=node->left;
+ } else {
+ node->parent->right=node->left;
+ }
+ } else {
+ tree->root=node->left;
+ }
+ node->left->parent=node->parent;
+ node->parent=node->left;
+ node->left=node->left->right;
+ if(node->left){
+ node->left->parent=node;
+ }
+ node->parent->right=node;
+}
+
+/* NULL nodes are black by definition */
+static inline int trbt_get_color(trbt_node_t *node)
+{
+ if (node==NULL) {
+ return TRBT_BLACK;
+ }
+ return node->rb_color;
+}
+static inline int trbt_get_color_left(trbt_node_t *node)
+{
+ if (node==NULL) {
+ return TRBT_BLACK;
+ }
+ if (node->left==NULL) {
+ return TRBT_BLACK;
+ }
+ return node->left->rb_color;
+}
+static inline int trbt_get_color_right(trbt_node_t *node)
+{
+ if (node==NULL) {
+ return TRBT_BLACK;
+ }
+ if (node->right==NULL) {
+ return TRBT_BLACK;
+ }
+ return node->right->rb_color;
+}
+/* setting a NULL node to black is a nop */
+static inline void trbt_set_color(trbt_node_t *node, int color)
+{
+ if (node == NULL) {
+ return;
+ }
+ node->rb_color = color;
+}
+static inline void trbt_set_color_left(trbt_node_t *node, int color)
+{
+ if (node == NULL || node->left == NULL) {
+ return;
+ }
+ node->left->rb_color = color;
+}
+static inline void trbt_set_color_right(trbt_node_t *node, int color)
+{
+ if (node == NULL || node->right == NULL) {
+ return;
+ }
+ node->right->rb_color = color;
+}
+
+static inline void
+trbt_insert_case5(trbt_tree_t *tree, trbt_node_t *node)
+{
+ trbt_node_t *grandparent;
+ trbt_node_t *parent;
+
+ parent=trbt_parent(node);
+ grandparent=trbt_parent(parent);
+ parent->rb_color=TRBT_BLACK;
+ grandparent->rb_color=TRBT_RED;
+ if( (node==parent->left) && (parent==grandparent->left) ){
+ trbt_rotate_right(grandparent);
+ } else {
+ trbt_rotate_left(grandparent);
+ }
+}
+
+static inline void
+trbt_insert_case4(trbt_tree_t *tree, trbt_node_t *node)
+{
+ trbt_node_t *grandparent;
+ trbt_node_t *parent;
+
+ parent=trbt_parent(node);
+ grandparent=trbt_parent(parent);
+ if(!grandparent){
+ return;
+ }
+ if( (node==parent->right) && (parent==grandparent->left) ){
+ trbt_rotate_left(parent);
+ node=node->left;
+ } else if( (node==parent->left) && (parent==grandparent->right) ){
+ trbt_rotate_right(parent);
+ node=node->right;
+ }
+ trbt_insert_case5(tree, node);
+}
+
+static inline void
+trbt_insert_case3(trbt_tree_t *tree, trbt_node_t *node)
+{
+ trbt_node_t *grandparent;
+ trbt_node_t *parent;
+ trbt_node_t *uncle;
+
+ uncle=trbt_uncle(node);
+ if(uncle && (uncle->rb_color==TRBT_RED)){
+ parent=trbt_parent(node);
+ parent->rb_color=TRBT_BLACK;
+ uncle->rb_color=TRBT_BLACK;
+ grandparent=trbt_grandparent(node);
+ grandparent->rb_color=TRBT_RED;
+ trbt_insert_case1(tree, grandparent);
+ } else {
+ trbt_insert_case4(tree, node);
+ }
+}
+
+static inline void
+trbt_insert_case2(trbt_tree_t *tree, trbt_node_t *node)
+{
+ trbt_node_t *parent;
+
+ parent=trbt_parent(node);
+ /* parent is always non-NULL here */
+ if(parent->rb_color==TRBT_BLACK){
+ return;
+ }
+ trbt_insert_case3(tree, node);
+}
+
+static inline void
+trbt_insert_case1(trbt_tree_t *tree, trbt_node_t *node)
+{
+ trbt_node_t *parent;
+
+ parent=trbt_parent(node);
+ if(!parent){
+ node->rb_color=TRBT_BLACK;
+ return;
+ }
+ trbt_insert_case2(tree, node);
+}
+
+static inline trbt_node_t *
+trbt_sibling(trbt_node_t *node)
+{
+ trbt_node_t *parent;
+
+ parent=trbt_parent(node);
+ if(!parent){
+ return NULL;
+ }
+
+ if (node == parent->left) {
+ return parent->right;
+ } else {
+ return parent->left;
+ }
+}
+
+static inline void
+trbt_delete_case6(trbt_node_t *node)
+{
+ trbt_node_t *sibling, *parent;
+
+ sibling = trbt_sibling(node);
+ parent = trbt_parent(node);
+
+ trbt_set_color(sibling, parent->rb_color);
+ trbt_set_color(parent, TRBT_BLACK);
+ if (node == parent->left) {
+ trbt_set_color_right(sibling, TRBT_BLACK);
+ trbt_rotate_left(parent);
+ } else {
+ trbt_set_color_left(sibling, TRBT_BLACK);
+ trbt_rotate_right(parent);
+ }
+}
+
+
+static inline void
+trbt_delete_case5(trbt_node_t *node)
+{
+ trbt_node_t *parent, *sibling;
+
+ parent = trbt_parent(node);
+ sibling = trbt_sibling(node);
+ if ( (node == parent->left)
+ &&(trbt_get_color(sibling) == TRBT_BLACK)
+ &&(trbt_get_color_left(sibling) == TRBT_RED)
+ &&(trbt_get_color_right(sibling) == TRBT_BLACK) ){
+ trbt_set_color(sibling, TRBT_RED);
+ trbt_set_color_left(sibling, TRBT_BLACK);
+ trbt_rotate_right(sibling);
+ trbt_delete_case6(node);
+ return;
+ }
+ if ( (node == parent->right)
+ &&(trbt_get_color(sibling) == TRBT_BLACK)
+ &&(trbt_get_color_right(sibling) == TRBT_RED)
+ &&(trbt_get_color_left(sibling) == TRBT_BLACK) ){
+ trbt_set_color(sibling, TRBT_RED);
+ trbt_set_color_right(sibling, TRBT_BLACK);
+ trbt_rotate_left(sibling);
+ trbt_delete_case6(node);
+ return;
+ }
+
+ trbt_delete_case6(node);
+}
+
+static inline void
+trbt_delete_case4(trbt_node_t *node)
+{
+ trbt_node_t *sibling;
+
+ sibling = trbt_sibling(node);
+ if ( (trbt_get_color(node->parent) == TRBT_RED)
+ &&(trbt_get_color(sibling) == TRBT_BLACK)
+ &&(trbt_get_color_left(sibling) == TRBT_BLACK)
+ &&(trbt_get_color_right(sibling) == TRBT_BLACK) ){
+ trbt_set_color(sibling, TRBT_RED);
+ trbt_set_color(node->parent, TRBT_BLACK);
+ } else {
+ trbt_delete_case5(node);
+ }
+}
+
+static void trbt_delete_case1(trbt_node_t *node);
+
+static inline void
+trbt_delete_case3(trbt_node_t *node)
+{
+ trbt_node_t *sibling;
+
+ sibling = trbt_sibling(node);
+ if ( (trbt_get_color(node->parent) == TRBT_BLACK)
+ &&(trbt_get_color(sibling) == TRBT_BLACK)
+ &&(trbt_get_color_left(sibling) == TRBT_BLACK)
+ &&(trbt_get_color_right(sibling) == TRBT_BLACK) ){
+ trbt_set_color(sibling, TRBT_RED);
+ trbt_delete_case1(node->parent);
+ } else {
+ trbt_delete_case4(node);
+ }
+}
+
+static inline void
+trbt_delete_case2(trbt_node_t *node)
+{
+ trbt_node_t *sibling;
+
+ sibling = trbt_sibling(node);
+ if (trbt_get_color(sibling) == TRBT_RED) {
+ trbt_set_color(node->parent, TRBT_RED);
+ trbt_set_color(sibling, TRBT_BLACK);
+ if (node == node->parent->left) {
+ trbt_rotate_left(node->parent);
+ } else {
+ trbt_rotate_right(node->parent);
+ }
+ }
+ trbt_delete_case3(node);
+}
+
+static void
+trbt_delete_case1(trbt_node_t *node)
+{
+ if (!node->parent) {
+ return;
+ } else {
+ trbt_delete_case2(node);
+ }
+}
+
+static void
+delete_node(trbt_node_t *node, bool from_destructor)
+{
+ trbt_node_t *parent, *child, dc;
+ trbt_node_t *temp = NULL;
+
+ /* This node has two child nodes, then just copy the content
+ from the next smaller node with this node and delete the
+ predecessor instead.
+ The predecessor is guaranteed to have at most one child
+ node since its right arm must be NULL
+ (It must be NULL since we are its successor and we are above
+ it in the tree)
+ */
+ if (node->left != NULL && node->right != NULL) {
+ /* This node has two children, just copy the data */
+ /* find the predecessor */
+ temp = node->left;
+
+ while (temp->right != NULL) {
+ temp = temp->right;
+ }
+
+ /* swap the predecessor data and key with the node to
+ be deleted.
+ */
+ node->key32 = temp->key32;
+ node->data = temp->data;
+ /* now we let node hang off the new data */
+ talloc_steal(node->data, node);
+
+ temp->data = NULL;
+ temp->key32 = -1;
+ /* then delete the temp node.
+ this node is guaranteed to have at least one leaf
+ child */
+ delete_node(temp, from_destructor);
+ goto finished;
+ }
+
+
+ /* There is at most one child to this node to be deleted */
+ child = node->left;
+ if (node->right) {
+ child = node->right;
+ }
+
+ /* If the node to be deleted did not have any child at all we
+ create a temporary dummy node for the child and mark it black.
+ Once the delete of the node is finished, we remove this dummy
+ node, which is simple to do since it is guaranteed that it will
+ still not have any children after the delete operation.
+ This is because we don't represent the leaf-nodes as actual nodes
+ in this implementation.
+ */
+ if (!child) {
+ child = &dc;
+ child->tree = node->tree;
+ child->left=NULL;
+ child->right=NULL;
+ child->rb_color=TRBT_BLACK;
+ child->data=NULL;
+ }
+
+ /* replace node with child */
+ parent = trbt_parent(node);
+ if (parent) {
+ if (parent->left == node) {
+ parent->left = child;
+ } else {
+ parent->right = child;
+ }
+ } else {
+ node->tree->root = child;
+ }
+ child->parent = node->parent;
+
+
+ if (node->rb_color == TRBT_BLACK) {
+ if (trbt_get_color(child) == TRBT_RED) {
+ child->rb_color = TRBT_BLACK;
+ } else {
+ trbt_delete_case1(child);
+ }
+ }
+
+ /* If we had to create a temporary dummy node to represent a black
+ leaf child we now has to delete it.
+ This is simple since this dummy node originally had no children
+ and we are guaranteed that it will also not have any children
+ after the node has been deleted and any possible rotations
+ have occurred.
+
+ The only special case is if this was the last node of the tree
+ in which case we have to reset the root to NULL as well.
+ Othervise it is enough to just unlink the child from its new
+ parent.
+ */
+ if (child == &dc) {
+ if (child->parent == NULL) {
+ node->tree->root = NULL;
+ } else if (child == child->parent->left) {
+ child->parent->left = NULL;
+ } else {
+ child->parent->right = NULL;
+ }
+ }
+
+finished:
+ if (!from_destructor) {
+ talloc_free(node);
+ }
+
+ /* if we came from a destructor and temp!=NULL this means we
+ did the node-swap but now the tree still contains the old
+ node which was freed in the destructor. Not good.
+ */
+ if (from_destructor && temp) {
+ temp->key32 = node->key32;
+ temp->rb_color = node->rb_color;
+
+ temp->data = node->data;
+ talloc_steal(temp->data, temp);
+
+ temp->parent = node->parent;
+ if (temp->parent) {
+ if (temp->parent->left == node) {
+ temp->parent->left = temp;
+ } else {
+ temp->parent->right = temp;
+ }
+ }
+
+ temp->left = node->left;
+ if (temp->left) {
+ temp->left->parent = temp;
+ }
+ temp->right = node->right;
+ if (temp->right) {
+ temp->right->parent = temp;
+ }
+
+ if (temp->tree->root == node) {
+ temp->tree->root = temp;
+ }
+ }
+
+ if ( (node->tree->flags & TRBT_AUTOFREE)
+ && (node->tree->root == NULL) ) {
+ talloc_free(node->tree);
+ }
+
+ return;
+}
+
+/*
+ destroy a node and remove it from its tree
+ */
+static int node_destructor(trbt_node_t *node)
+{
+ delete_node(node, true);
+
+ return 0;
+}
+
+static inline trbt_node_t *
+trbt_create_node(trbt_tree_t *tree, trbt_node_t *parent, uint32_t key, void *data)
+{
+ trbt_node_t *node;
+
+ node=talloc_zero(tree, trbt_node_t);
+ NO_MEMORY_FATAL(node);
+
+ node->tree=tree;
+ node->rb_color=TRBT_BLACK;
+ node->parent=parent;
+ node->left=NULL;
+ node->right=NULL;
+ node->key32=key;
+ node->data = data;
+
+ /* let this node hang off data so that it is removed when
+ data is freed
+ */
+ talloc_steal(data, node);
+ talloc_set_destructor(node, node_destructor);
+
+ return node;
+}
+
+/* insert a new node in the tree.
+ if there is already a node with a matching key in the tree
+ we replace it with the new data and return a pointer to the old data
+ in case the caller wants to take any special action
+ */
+void *
+trbt_insert32(trbt_tree_t *tree, uint32_t key, void *data)
+{
+ trbt_node_t *node;
+
+ node=tree->root;
+
+ /* is this the first node ?*/
+ if(!node){
+ node = trbt_create_node(tree, NULL, key, data);
+
+ tree->root=node;
+ return NULL;
+ }
+
+ /* it was not the new root so walk the tree until we find where to
+ * insert this new leaf.
+ */
+ while(1){
+ /* this node already exists, replace data and return the
+ old data
+ */
+ if(key==node->key32){
+ void *old_data;
+
+ old_data = node->data;
+ node->data = data;
+ /* Let the node now be owned by the new data
+ so the node is freed when the enw data is released
+ */
+ talloc_steal(node->data, node);
+
+ return old_data;
+ }
+ if(key<node->key32) {
+ if(!node->left){
+ /* new node to the left */
+ trbt_node_t *new_node;
+
+ new_node = trbt_create_node(tree, node, key, data);
+ node->left=new_node;
+ node=new_node;
+
+ break;
+ }
+ node=node->left;
+ continue;
+ }
+ if(key>node->key32) {
+ if(!node->right){
+ /* new node to the right */
+ trbt_node_t *new_node;
+
+ new_node = trbt_create_node(tree, node, key, data);
+ node->right=new_node;
+ node=new_node;
+ break;
+ }
+ node=node->right;
+ continue;
+ }
+ }
+
+ /* node will now point to the newly created node */
+ node->rb_color=TRBT_RED;
+ trbt_insert_case1(tree, node);
+ return NULL;
+}
+
+void *
+trbt_lookup32(trbt_tree_t *tree, uint32_t key)
+{
+ trbt_node_t *node;
+
+ node=tree->root;
+
+ while(node){
+ if(key==node->key32){
+ return node->data;
+ }
+ if(key<node->key32){
+ node=node->left;
+ continue;
+ }
+ if(key>node->key32){
+ node=node->right;
+ continue;
+ }
+ }
+ return NULL;
+}
+
+
+/* This deletes a node from the tree.
+ Note that this does not release the data that the node points to
+*/
+void
+trbt_delete32(trbt_tree_t *tree, uint32_t key)
+{
+ trbt_node_t *node;
+
+ node=tree->root;
+
+ while(node){
+ if(key==node->key32){
+ delete_node(node, false);
+ return;
+ }
+ if(key<node->key32){
+ node=node->left;
+ continue;
+ }
+ if(key>node->key32){
+ node=node->right;
+ continue;
+ }
+ }
+}
+
+
+void
+trbt_insert32_callback(trbt_tree_t *tree, uint32_t key, void *(*callback)(void *param, void *data), void *param)
+{
+ trbt_node_t *node;
+
+ node=tree->root;
+
+ /* is this the first node ?*/
+ if(!node){
+ node = trbt_create_node(tree, NULL, key,
+ callback(param, NULL));
+
+ tree->root=node;
+ return;
+ }
+
+ /* it was not the new root so walk the tree until we find where to
+ * insert this new leaf.
+ */
+ while(1){
+ /* this node already exists, replace it
+ */
+ if(key==node->key32){
+ node->data = callback(param, node->data);
+ talloc_steal(node->data, node);
+
+ return;
+ }
+ if(key<node->key32) {
+ if(!node->left){
+ /* new node to the left */
+ trbt_node_t *new_node;
+
+ new_node = trbt_create_node(tree, node, key,
+ callback(param, NULL));
+ node->left=new_node;
+ node=new_node;
+
+ break;
+ }
+ node=node->left;
+ continue;
+ }
+ if(key>node->key32) {
+ if(!node->right){
+ /* new node to the right */
+ trbt_node_t *new_node;
+
+ new_node = trbt_create_node(tree, node, key,
+ callback(param, NULL));
+ node->right=new_node;
+ node=new_node;
+ break;
+ }
+ node=node->right;
+ continue;
+ }
+ }
+
+ /* node will now point to the newly created node */
+ node->rb_color=TRBT_RED;
+ trbt_insert_case1(tree, node);
+ return;
+}
+
+
+struct trbt_array_param {
+ void *(*callback)(void *param, void *data);
+ void *param;
+ uint32_t keylen;
+ uint32_t *key;
+ trbt_tree_t *tree;
+};
+static void *array_insert_callback(void *p, void *data)
+{
+ struct trbt_array_param *param = (struct trbt_array_param *)p;
+ trbt_tree_t *tree = NULL;
+
+
+ /* if keylen has reached 0 we are done and can call the users
+ callback function with the users parameters
+ */
+ if (param->keylen == 0) {
+ return param->callback(param->param, data);
+ }
+
+
+ /* keylen is not zero yes so we must create/process more subtrees */
+ /* if data is NULL this means we did not yet have a subtree here
+ and we must create one.
+ */
+ if (data == NULL) {
+ /* create a new subtree and hang it off our current tree
+ set it to autofree so that the tree is freed when
+ the last node in it has been released.
+ */
+ tree = trbt_create(param->tree, TRBT_AUTOFREE);
+ } else {
+ /* we already have a subtree for this path */
+ tree = (trbt_tree_t *)data;
+ }
+
+ trbt_insertarray32_callback(tree, param->keylen, param->key, param->callback, param->param);
+
+ /* now return either the old tree we got in *data or the new tree
+ we created to our caller so he can update his pointer in his
+ tree to point to our subtree
+ */
+ return tree;
+}
+
+
+
+/* insert into the tree using an array of uint32 as a key */
+void
+trbt_insertarray32_callback(trbt_tree_t *tree, uint32_t keylen, uint32_t *key, void *(*cb)(void *param, void *data), void *pm)
+{
+ struct trbt_array_param tap;
+
+ /* keylen-1 and key[1] since the call to insert32 will consume the
+ first part of the key.
+ */
+ tap.callback= cb;
+ tap.param = pm;
+ tap.keylen = keylen-1;
+ tap.key = &key[1];
+ tap.tree = tree;
+
+ trbt_insert32_callback(tree, key[0], array_insert_callback, &tap);
+}
+
+/* lookup the tree using an array of uint32 as a key */
+void *
+trbt_lookuparray32(trbt_tree_t *tree, uint32_t keylen, uint32_t *key)
+{
+ /* if keylen is 1 we can do a regular lookup and return this to the
+ user
+ */
+ if (keylen == 1) {
+ return trbt_lookup32(tree, key[0]);
+ }
+
+ /* we need to lookup the next subtree */
+ tree = trbt_lookup32(tree, key[0]);
+ if (tree == NULL) {
+ /* the key does not exist, return NULL */
+ return NULL;
+ }
+
+ /* now lookup the next part of the key in our new tree */
+ return trbt_lookuparray32(tree, keylen-1, &key[1]);
+}
+
+
+/* traverse a tree starting at node */
+static int
+trbt_traversearray32_node(trbt_node_t *node, uint32_t keylen,
+ int (*callback)(void *param, void *data),
+ void *param)
+{
+ trbt_node_t *left = node->left;
+ trbt_node_t *right = node->right;
+
+ if (left) {
+ int ret;
+ ret = trbt_traversearray32_node(left, keylen, callback, param);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ /* this is the smallest node in this subtree
+ if keylen is 0 this means we can just call the callback
+ otherwise we must pull the next subtree and traverse that one as well
+ */
+ if (keylen == 0) {
+ int ret;
+
+ ret = callback(param, node->data);
+ if (ret != 0) {
+ return ret;
+ }
+ } else {
+ int ret;
+
+ ret = trbt_traversearray32(node->data, keylen, callback, param);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ if (right) {
+ int ret;
+
+ ret = trbt_traversearray32_node(right, keylen, callback, param);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+
+/* traverse the tree using an array of uint32 as a key */
+int
+trbt_traversearray32(trbt_tree_t *tree, uint32_t keylen,
+ int (*callback)(void *param, void *data),
+ void *param)
+{
+ trbt_node_t *node;
+
+ if (tree == NULL) {
+ return 0;
+ }
+
+ node=tree->root;
+ if (node == NULL) {
+ return 0;
+ }
+
+ return trbt_traversearray32_node(node, keylen-1, callback, param);
+}
+
+
+/* this function will return the first node in a tree where
+ the key is an array of uint32_t
+*/
+void *
+trbt_findfirstarray32(trbt_tree_t *tree, uint32_t keylen)
+{
+ trbt_node_t *node;
+
+ if (keylen < 1) {
+ return NULL;
+ }
+
+ if (tree == NULL) {
+ return NULL;
+ }
+
+ node=tree->root;
+ if (node == NULL) {
+ return NULL;
+ }
+
+ while (node->left) {
+ node = node->left;
+ }
+
+ /* we found our node so return the data */
+ if (keylen == 1) {
+ return node->data;
+ }
+
+ /* we are still traversing subtrees so find the first node in the
+ next level of trees
+ */
+ return trbt_findfirstarray32(node->data, keylen-1);
+}
+
+
+#ifdef TEST_RB_TREE
+static void printtree(trbt_node_t *node, int levels)
+{
+ int i;
+ if(node==NULL)return;
+ printtree(node->left, levels+1);
+
+ for(i=0;i<levels;i++)printf(" ");
+ printf("key:%d COLOR:%s (node:%p parent:%p left:%p right:%p)\n",node->key32,node->rb_color==TRBT_BLACK?"BLACK":"RED", node, node->parent, node->left, node->right);
+
+ printtree(node->right, levels+1);
+ printf("\n");
+}
+
+void print_tree(trbt_tree_t *tree)
+{
+ if(tree->root==NULL){
+ printf("tree is empty\n");
+ return;
+ }
+ printf("---\n");
+ printtree(tree->root->left, 1);
+ printf("root node key:%d COLOR:%s (node:%p left:%p right:%p)\n",tree->root->key32,tree->root->rb_color==TRBT_BLACK?"BLACK":"RED", tree->root, tree->root->left, tree->root->right);
+ printtree(tree->root->right, 1);
+ printf("===\n");
+}
+
+void
+test_tree(void)
+{
+ trbt_tree_t *tree;
+ char *str;
+ int i, ret;
+ int NUM=15;
+ int cnt=0;
+
+ tree=trbt_create(talloc_new(NULL), 0);
+#if 0
+ for(i=0;i<10;i++){
+ printf("adding node %i\n",i);
+ trbt_insert32(tree, i, NULL);
+ print_tree(tree);
+ }
+ printf("deleting node %i\n",3);
+ trbt_delete32(tree, 3);
+ print_tree(tree);
+ for(i=0;i<10;i++){
+ printf("deleting node %i\n",i);
+ trbt_delete32(tree, i);
+ print_tree(tree);
+ }
+exit(0);
+#endif
+ while(++cnt){
+ int i;
+ printf("iteration : %d\n",cnt);
+ i=random()%20;
+ printf("adding node %i\n",i);
+ trbt_insert32(tree, i, NULL);
+ print_tree(tree);
+
+ i=random()%20;
+ printf("deleting node %i\n",i);
+ trbt_delete32(tree, i);
+ print_tree(tree);
+ }
+
+}
+
+#endif /* TEST_RB_TREE */
diff --git a/ctdb/common/rb_tree.h b/ctdb/common/rb_tree.h
new file mode 100644
index 0000000..59e7ccc
--- /dev/null
+++ b/ctdb/common/rb_tree.h
@@ -0,0 +1,90 @@
+/*
+ a talloc based red-black tree
+
+ Copyright (C) Ronnie Sahlberg 2007
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _RB_TREE_H
+#define _RB_TREE_H
+
+#define TRBT_RED 0x00
+#define TRBT_BLACK 0x01
+typedef struct trbt_node {
+ struct trbt_tree *tree;
+ struct trbt_node *parent;
+ struct trbt_node *left;
+ struct trbt_node *right;
+ uint32_t rb_color;
+ uint32_t key32;
+ void *data;
+} trbt_node_t;
+
+typedef struct trbt_tree {
+ trbt_node_t *root;
+/* automatically free the tree when the last node has been deleted */
+#define TRBT_AUTOFREE 0x00000001
+ uint32_t flags;
+} trbt_tree_t;
+
+
+
+/* Create a RB tree */
+trbt_tree_t *trbt_create(TALLOC_CTX *memctx, uint32_t flags);
+
+/* Lookup a node in the tree and return a pointer to data or NULL */
+void *trbt_lookup32(trbt_tree_t *tree, uint32_t key);
+
+/* Insert a new node into the tree. If there was already a node with this
+ key the pointer to the previous data is returned.
+ The tree will talloc_steal() the data inserted into the tree .
+*/
+void *trbt_insert32(trbt_tree_t *tree, uint32_t key, void *data);
+
+/* Insert a new node into the tree.
+ If this is a new node:
+ callback is called with data==NULL and param=param
+ the returned value from the callback is talloc_stolen and inserted in the
+ tree.
+ If a node already exists for this key then:
+ callback is called with data==existing data and param=param
+ the returned value is talloc_stolen and inserted in the tree
+*/
+void trbt_insert32_callback(trbt_tree_t *tree, uint32_t key, void *(*callback)(void *param, void *data), void *param);
+
+/* Delete a node from the tree and free all data associated with it */
+void trbt_delete32(trbt_tree_t *tree, uint32_t key);
+
+
+/* insert into the tree with a key based on an array of uint32 */
+void trbt_insertarray32_callback(trbt_tree_t *tree, uint32_t keylen, uint32_t *key, void *(*callback)(void *param, void *data), void *param);
+
+/* Lookup a node in the tree with a key based on an array of uint32
+ and return a pointer to data or NULL */
+void *trbt_lookuparray32(trbt_tree_t *tree, uint32_t keylen, uint32_t *key);
+
+/* Traverse a tree with a key based on an array of uint32
+ returns 0 if traverse completed
+ !0 if the traverse was aborted
+
+ If the callback returns !0 the traverse will be aborted
+*/
+int trbt_traversearray32(trbt_tree_t *tree, uint32_t keylen, int (*callback)(void *param, void *data), void *param);
+
+/* Lookup the first node in the tree with a key based on an array of uint32
+ and return a pointer to data or NULL */
+void *trbt_findfirstarray32(trbt_tree_t *tree, uint32_t keylen);
+
+#endif /* _RB_TREE_H */
diff --git a/ctdb/common/reqid.c b/ctdb/common/reqid.c
new file mode 100644
index 0000000..0e651cf
--- /dev/null
+++ b/ctdb/common/reqid.c
@@ -0,0 +1,89 @@
+/*
+ ctdb request id handling code
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <talloc.h>
+
+#include "lib/util/idtree.h"
+#include "reqid.h"
+
+struct reqid_context {
+ struct idr_context *idr;
+ uint32_t lastid;
+};
+
+int reqid_init(TALLOC_CTX *mem_ctx, int start_id,
+ struct reqid_context **result)
+{
+ struct reqid_context *reqid_ctx;
+
+ reqid_ctx = talloc_zero(mem_ctx, struct reqid_context);
+ if (reqid_ctx == NULL) {
+ return ENOMEM;
+ }
+
+ reqid_ctx->idr = idr_init(reqid_ctx);
+ if (reqid_ctx->idr == NULL) {
+ talloc_free(reqid_ctx);
+ return ENOMEM;
+ }
+
+ if (start_id <= 0) {
+ start_id = 1;
+ }
+ reqid_ctx->lastid = start_id;
+
+ *result = reqid_ctx;
+ return 0;
+}
+
+uint32_t reqid_new(struct reqid_context *reqid_ctx, void *private_data)
+{
+ int id;
+
+ id = idr_get_new_above(reqid_ctx->idr, private_data,
+ reqid_ctx->lastid+1, INT_MAX);
+ if (id < 0) {
+ /* reqid wrapped */
+ id = idr_get_new(reqid_ctx->idr, private_data, INT_MAX);
+ }
+ if (id == -1) {
+ return REQID_INVALID;
+ }
+
+ reqid_ctx->lastid = id;
+ return id;
+}
+
+void *_reqid_find(struct reqid_context *reqid_ctx, uint32_t reqid)
+{
+ return idr_find(reqid_ctx->idr, reqid);
+}
+
+int reqid_remove(struct reqid_context *reqid_ctx, uint32_t reqid)
+{
+ int ret;
+
+ ret = idr_remove(reqid_ctx->idr, reqid);
+ if (ret < 0) {
+ return ENOENT;
+ }
+ return 0;
+}
diff --git a/ctdb/common/reqid.h b/ctdb/common/reqid.h
new file mode 100644
index 0000000..d6d3936
--- /dev/null
+++ b/ctdb/common/reqid.h
@@ -0,0 +1,89 @@
+/*
+ Request id database
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_REQID_H__
+#define __CTDB_REQID_H__
+
+#include <talloc.h>
+
+/**
+ * @file reqid.h
+ *
+ * @brief Request id database
+ *
+ * CTDB tracks messages using request id. CTDB stores client state for each
+ * request id to process the replies correctly.
+ */
+
+/**
+ * @brief Abstract struct to store request id database
+ */
+struct reqid_context;
+
+#define REQID_INVALID 0xffffffff
+
+/**
+ * @brief Initialize request id database
+ *
+ * This returns a new request id context. Freeing this context will free
+ * all the memory associated with request id database.
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] start_id The initial id
+ * @param[out] result The new talloc_context structure
+ * @return 0 on success, errno on failure
+ */
+int reqid_init(TALLOC_CTX *mem_ctx, int start_id,
+ struct reqid_context **result);
+
+/**
+ * @brief Generate new request id and associate given data with the request id
+ *
+ * @param[in] reqid_ctx The request id context
+ * @param[in] private_data The state to associate with new request id
+ * @return new request id, REQID_INVALID on failure
+ */
+uint32_t reqid_new(struct reqid_context *reqid_ctx, void *private_data);
+
+#ifdef DOXYGEN
+/**
+ * @brief Fetch the data associated with the request id
+ *
+ * @param[in] reqid_ctx The request id context
+ * @param[in] reqid The request id
+ * @param[in] type The data type of the stored data
+ * @return the data stored for the reqid, NULL on failure
+ */
+type *reqid_find(struct reqid_context *reqid_ctx, uint32_t reqid, #type);
+#else
+void *_reqid_find(struct reqid_context *reqid_ctx, uint32_t reqid);
+#define reqid_find(ctx, reqid, type) \
+ (type *)talloc_check_name(_reqid_find(ctx, reqid), #type)
+#endif
+
+/**
+ * @brief Remove the data associated with the request id
+ *
+ * @param[in] reqid_ctx The request id context
+ * @param[in] reqid The request id
+ * @return 0 on success, errno on failure
+ */
+int reqid_remove(struct reqid_context *reqid_ctx, uint32_t reqid);
+
+#endif /* __CTDB_REQID_H__ */
diff --git a/ctdb/common/run_event.c b/ctdb/common/run_event.c
new file mode 100644
index 0000000..d283664
--- /dev/null
+++ b/ctdb/common/run_event.c
@@ -0,0 +1,829 @@
+/*
+ Run scripts in a directory with specific event arguments
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/dir.h"
+#include "system/glob.h"
+#include "system/wait.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/tevent_unix.h"
+#include "lib/util/debug.h"
+
+#include "common/logging.h"
+#include "common/run_proc.h"
+#include "common/event_script.h"
+
+#include "common/run_event.h"
+
+/*
+ * Utility functions
+ */
+
+static int get_script_list(TALLOC_CTX *mem_ctx,
+ const char *script_dir,
+ struct run_event_script_list **out)
+{
+ struct event_script_list *s_list;
+ struct run_event_script_list *script_list;
+ unsigned int i;
+ int ret;
+
+ ret = event_script_get_list(mem_ctx, script_dir, &s_list);
+ if (ret != 0) {
+ if (ret == ENOENT) {
+ D_WARNING("event script dir %s removed\n", script_dir);
+ } else {
+ D_WARNING("failed to get script list for %s, ret=%d\n",
+ script_dir, ret);
+ }
+ return ret;
+ }
+
+ if (s_list->num_scripts == 0) {
+ *out = NULL;
+ talloc_free(s_list);
+ return 0;
+ }
+
+ script_list = talloc_zero(mem_ctx, struct run_event_script_list);
+ if (script_list == NULL) {
+ talloc_free(s_list);
+ return ENOMEM;
+ }
+
+ script_list->num_scripts = s_list->num_scripts;
+ script_list->script = talloc_zero_array(script_list,
+ struct run_event_script,
+ script_list->num_scripts);
+ if (script_list->script == NULL) {
+ talloc_free(s_list);
+ talloc_free(script_list);
+ return ENOMEM;
+ }
+
+ for (i = 0; i < s_list->num_scripts; i++) {
+ struct event_script *s = s_list->script[i];
+ struct run_event_script *script = &script_list->script[i];
+
+ script->name = talloc_steal(script_list->script, s->name);
+
+ if (! s->enabled) {
+ script->summary = -ENOEXEC;
+ }
+ }
+
+ talloc_free(s_list);
+ *out = script_list;
+ return 0;
+}
+
+static int script_args(TALLOC_CTX *mem_ctx, const char *event_str,
+ const char *arg_str, const char ***out)
+{
+ const char **argv;
+ size_t argc;
+ size_t len;
+
+ /* Preallocate argv array to avoid reallocation. */
+ len = 8;
+ argv = talloc_array(mem_ctx, const char *, len);
+ if (argv == NULL) {
+ return ENOMEM;
+ }
+
+ argv[0] = NULL; /* script name */
+ argv[1] = event_str;
+ argc = 2;
+
+ if (arg_str != NULL) {
+ char *str, *t, *tok;
+
+ str = talloc_strdup(argv, arg_str);
+ if (str == NULL) {
+ return ENOMEM;
+ }
+
+ t = str;
+ while ((tok = strtok(t, " ")) != NULL) {
+ argv[argc] = talloc_strdup(argv, tok);
+ if (argv[argc] == NULL) {
+ talloc_free(argv);
+ return ENOMEM;
+ }
+ argc += 1;
+ if (argc >= len) {
+ argv = talloc_realloc(mem_ctx, argv,
+ const char *, len + 8);
+ if (argv == NULL) {
+ return ENOMEM;
+ }
+ len += 8;
+ }
+ t = NULL;
+ }
+
+ talloc_free(str);
+ }
+
+ argv[argc] = NULL;
+ /* argc += 1 */
+
+ *out = argv;
+ return 0;
+}
+
+struct run_event_context {
+ struct run_proc_context *run_proc_ctx;
+ const char *script_dir;
+ const char *debug_prog;
+ bool debug_running;
+
+ struct tevent_queue *queue;
+ struct tevent_req *current_req;
+ bool monitor_running;
+};
+
+
+int run_event_init(TALLOC_CTX *mem_ctx, struct run_proc_context *run_proc_ctx,
+ const char *script_dir, const char *debug_prog,
+ struct run_event_context **out)
+{
+ struct run_event_context *run_ctx;
+ struct stat st;
+ int ret;
+
+ run_ctx = talloc_zero(mem_ctx, struct run_event_context);
+ if (run_ctx == NULL) {
+ return ENOMEM;
+ }
+
+ run_ctx->run_proc_ctx = run_proc_ctx;
+
+ ret = stat(script_dir, &st);
+ if (ret != 0) {
+ ret = errno;
+ talloc_free(run_ctx);
+ return ret;
+ }
+
+ if (! S_ISDIR(st.st_mode)) {
+ talloc_free(run_ctx);
+ return ENOTDIR;
+ }
+
+ run_ctx->script_dir = talloc_strdup(run_ctx, script_dir);
+ if (run_ctx->script_dir == NULL) {
+ talloc_free(run_ctx);
+ return ENOMEM;
+ }
+
+ if (debug_prog != NULL) {
+ run_ctx->debug_prog = talloc_strdup(run_ctx, debug_prog);
+ if (run_ctx->debug_prog == NULL) {
+ talloc_free(run_ctx);
+ return ENOMEM;
+ }
+ }
+
+ run_ctx->debug_running = false;
+
+ run_ctx->queue = tevent_queue_create(run_ctx, "run event queue");
+ if (run_ctx->queue == NULL) {
+ talloc_free(run_ctx);
+ return ENOMEM;
+ }
+
+ run_ctx->monitor_running = false;
+
+ *out = run_ctx;
+ return 0;
+}
+
+static struct run_proc_context *
+run_event_run_proc_context(struct run_event_context *run_ctx)
+{
+ return run_ctx->run_proc_ctx;
+}
+
+static const char *run_event_script_dir(struct run_event_context *run_ctx)
+{
+ return run_ctx->script_dir;
+}
+
+static const char *run_event_debug_prog(struct run_event_context *run_ctx)
+{
+ return run_ctx->debug_prog;
+}
+
+static struct tevent_queue *run_event_queue(struct run_event_context *run_ctx)
+{
+ return run_ctx->queue;
+}
+
+static void run_event_start_running(struct run_event_context *run_ctx,
+ struct tevent_req *req, bool is_monitor)
+{
+ run_ctx->current_req = req;
+ run_ctx->monitor_running = is_monitor;
+}
+
+static void run_event_stop_running(struct run_event_context *run_ctx)
+{
+ run_ctx->current_req = NULL;
+ run_ctx->monitor_running = false;
+}
+
+static struct tevent_req *run_event_get_running(
+ struct run_event_context *run_ctx,
+ bool *is_monitor)
+{
+ *is_monitor = run_ctx->monitor_running;
+ return run_ctx->current_req;
+}
+
+static int run_event_script_status(struct run_event_script *script)
+{
+ int ret;
+
+ if (script->result.sig > 0) {
+ ret = -EINTR;
+ } else if (script->result.err > 0) {
+ if (script->result.err == EACCES) {
+ /* Map EACCESS to ENOEXEC */
+ ret = -ENOEXEC;
+ } else {
+ ret = -script->result.err;
+ }
+ } else {
+ ret = script->result.status;
+ }
+
+ return ret;
+}
+
+int run_event_list(struct run_event_context *run_ctx,
+ TALLOC_CTX *mem_ctx,
+ struct run_event_script_list **output)
+{
+ struct event_script_list *s_list = NULL;
+ struct run_event_script_list *script_list = NULL;
+ unsigned int i;
+ int ret;
+
+ ret = event_script_get_list(mem_ctx,
+ run_event_script_dir(run_ctx),
+ &s_list);
+ if (ret != 0) {
+ return ret;
+ }
+
+ if (s_list->num_scripts == 0) {
+ *output = NULL;
+ talloc_free(s_list);
+ return 0;
+ }
+
+ script_list = talloc_zero(mem_ctx, struct run_event_script_list);
+ if (script_list == NULL) {
+ return ENOMEM;
+ }
+
+ script_list->num_scripts = s_list->num_scripts;
+ script_list->script = talloc_zero_array(script_list,
+ struct run_event_script,
+ script_list->num_scripts);
+ if (script_list->script == NULL) {
+ talloc_free(s_list);
+ talloc_free(script_list);
+ return ENOMEM;
+ }
+
+ for (i=0; i < s_list->num_scripts; i++) {
+ struct event_script *s = s_list->script[i];
+ struct run_event_script *script = &script_list->script[i];
+
+ script->name = talloc_steal(script_list->script, s->name);
+
+ if (! s->enabled) {
+ script->summary = -ENOEXEC;
+ }
+ }
+
+
+ talloc_free(s_list);
+ *output = script_list;
+ return 0;
+}
+
+int run_event_script_enable(struct run_event_context *run_ctx,
+ const char *script_name)
+{
+ return event_script_chmod(run_event_script_dir(run_ctx),
+ script_name,
+ true);
+}
+
+int run_event_script_disable(struct run_event_context *run_ctx,
+ const char *script_name)
+{
+ return event_script_chmod(run_event_script_dir(run_ctx),
+ script_name,
+ false);
+}
+
+/*
+ * Run debug program to diagnose hung scripts
+ */
+
+static int debug_args(TALLOC_CTX *mem_ctx, const char *path,
+ const char *event_str, pid_t pid, const char ***out)
+{
+ const char **argv;
+
+ argv = talloc_array(mem_ctx, const char *, 4);
+ if (argv == NULL) {
+ return ENOMEM;
+ }
+
+ argv[0] = path;
+ argv[1] = talloc_asprintf(argv, "%d", pid);
+ argv[2] = event_str;
+ if (argv[1] == NULL) {
+ talloc_free(argv);
+ return ENOMEM;
+ }
+ argv[3] = NULL;
+
+ *out = argv;
+ return 0;
+}
+
+static void debug_log(int loglevel, const char *output, const char *log_prefix)
+{
+ char *line, *s;
+
+ s = strdup(output);
+ if (s == NULL) {
+ DEBUG(loglevel, ("%s: %s\n", log_prefix, output));
+ return;
+ }
+
+ line = strtok(s, "\n");
+ while (line != NULL) {
+ DEBUG(loglevel, ("%s: %s\n", log_prefix, line));
+ line = strtok(NULL, "\n");
+ }
+ free(s);
+}
+
+struct run_debug_state {
+ struct run_event_context *run_ctx;
+ pid_t pid;
+};
+
+static void run_debug_done(struct tevent_req *subreq);
+
+static struct tevent_req *run_debug_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct run_event_context *run_ctx,
+ const char *event_str, pid_t pid)
+{
+ struct tevent_req *req, *subreq;
+ struct run_debug_state *state;
+ const char **argv;
+ const char *debug_prog;
+ int ret;
+
+ req = tevent_req_create(mem_ctx, &state, struct run_debug_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->run_ctx = run_ctx;
+ state->pid = pid;
+
+ debug_prog = run_event_debug_prog(run_ctx);
+ if (debug_prog == NULL) {
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+ }
+
+ if (run_ctx->debug_running) {
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+ }
+
+ if (pid == -1) {
+ D_DEBUG("Event script terminated, nothing to debug\n");
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+ }
+
+ ret = debug_args(state, debug_prog, event_str, pid, &argv);
+ if (ret != 0) {
+ D_ERR("debug_args() failed\n");
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ D_DEBUG("Running debug %s with args \"%s %s\"\n",
+ debug_prog, argv[1], argv[2]);
+
+ subreq = run_proc_send(state, ev, run_event_run_proc_context(run_ctx),
+ debug_prog, argv, -1, tevent_timeval_zero());
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, run_debug_done, req);
+
+ run_ctx->debug_running = true;
+
+ talloc_free(argv);
+ return req;
+}
+
+static void run_debug_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct run_debug_state *state = tevent_req_data(
+ req, struct run_debug_state);
+ char *output;
+ int ret;
+ bool status;
+
+ state->run_ctx->debug_running = false;
+
+ status = run_proc_recv(subreq, &ret, NULL, NULL, state, &output);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ D_ERR("Running debug failed, ret=%d\n", ret);
+ }
+
+ /* Log output */
+ if (output != NULL) {
+ debug_log(DEBUG_ERR, output, "event_debug");
+ talloc_free(output);
+ }
+
+ kill(-state->pid, SIGTERM);
+ tevent_req_done(req);
+}
+
+static bool run_debug_recv(struct tevent_req *req, int *perr)
+{
+ int ret;
+
+ if (tevent_req_is_unix_error(req, &ret)) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Run a single event
+ */
+
+struct run_event_state {
+ struct tevent_context *ev;
+ struct run_event_context *run_ctx;
+ const char *event_str;
+ const char *arg_str;
+ struct timeval timeout;
+ bool continue_on_failure;
+
+ struct run_event_script_list *script_list;
+ const char **argv;
+ struct tevent_req *script_subreq;
+ unsigned int index;
+ bool cancelled;
+};
+
+static void run_event_cancel(struct tevent_req *req);
+static void run_event_trigger(struct tevent_req *req, void *private_data);
+static struct tevent_req *run_event_run_script(struct tevent_req *req);
+static void run_event_next_script(struct tevent_req *subreq);
+static void run_event_debug(struct tevent_req *req, pid_t pid);
+static void run_event_debug_done(struct tevent_req *subreq);
+
+struct tevent_req *run_event_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct run_event_context *run_ctx,
+ const char *event_str,
+ const char *arg_str,
+ struct timeval timeout,
+ bool continue_on_failure)
+{
+ struct tevent_req *req, *current_req;
+ struct run_event_state *state;
+ bool monitor_running, status;
+
+ req = tevent_req_create(mem_ctx, &state, struct run_event_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->run_ctx = run_ctx;
+ state->event_str = talloc_strdup(state, event_str);
+ if (tevent_req_nomem(state->event_str, req)) {
+ return tevent_req_post(req, ev);
+ }
+ if (arg_str != NULL) {
+ state->arg_str = talloc_strdup(state, arg_str);
+ if (tevent_req_nomem(state->arg_str, req)) {
+ return tevent_req_post(req, ev);
+ }
+ }
+ state->timeout = timeout;
+ state->continue_on_failure = continue_on_failure;
+ state->cancelled = false;
+
+ state->script_list = talloc_zero(state, struct run_event_script_list);
+ if (tevent_req_nomem(state->script_list, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ /*
+ * If monitor event is running,
+ * cancel the running monitor event and run new event
+ *
+ * If any other event is running,
+ * if new event is monitor, cancel that event
+ * else add new event to the queue
+ */
+
+ current_req = run_event_get_running(run_ctx, &monitor_running);
+ if (current_req != NULL) {
+ if (monitor_running) {
+ run_event_cancel(current_req);
+ } else if (strcmp(event_str, "monitor") == 0) {
+ state->script_list->summary = -ECANCELED;
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+ }
+ }
+
+ status = tevent_queue_add(run_event_queue(run_ctx), ev, req,
+ run_event_trigger, NULL);
+ if (! status) {
+ tevent_req_error(req, ENOMEM);
+ return tevent_req_post(req, ev);
+ }
+
+ return req;
+}
+
+static void run_event_cancel(struct tevent_req *req)
+{
+ struct run_event_state *state = tevent_req_data(
+ req, struct run_event_state);
+
+ run_event_stop_running(state->run_ctx);
+
+ state->script_list->summary = -ECANCELED;
+ state->cancelled = true;
+
+ TALLOC_FREE(state->script_subreq);
+
+ tevent_req_done(req);
+}
+
+static void run_event_trigger(struct tevent_req *req, void *private_data)
+{
+ struct tevent_req *subreq;
+ struct run_event_state *state = tevent_req_data(
+ req, struct run_event_state);
+ struct run_event_script_list *script_list;
+ int ret;
+ bool is_monitor = false;
+
+ D_DEBUG("Running event %s with args \"%s\"\n", state->event_str,
+ state->arg_str == NULL ? "(null)" : state->arg_str);
+
+ ret = get_script_list(state,
+ run_event_script_dir(state->run_ctx),
+ &script_list);
+ if (ret != 0) {
+ D_ERR("get_script_list() failed, ret=%d\n", ret);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ /* No scripts */
+ if (script_list == NULL || script_list->num_scripts == 0) {
+ tevent_req_done(req);
+ return;
+ }
+
+ talloc_free(state->script_list);
+ state->script_list = script_list;
+
+ ret = script_args(state, state->event_str, state->arg_str,
+ &state->argv);
+ if (ret != 0) {
+ D_ERR("script_args() failed, ret=%d\n", ret);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ state->index = 0;
+
+ subreq = run_event_run_script(req);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, run_event_next_script, req);
+
+ state->script_subreq = subreq;
+
+ if (strcmp(state->event_str, "monitor") == 0) {
+ is_monitor = true;
+ }
+ run_event_start_running(state->run_ctx, req, is_monitor);
+}
+
+static struct tevent_req *run_event_run_script(struct tevent_req *req)
+{
+ struct run_event_state *state = tevent_req_data(
+ req, struct run_event_state);
+ struct run_event_script *script;
+ struct tevent_req *subreq;
+ char *path;
+
+ script = &state->script_list->script[state->index];
+
+ path = talloc_asprintf(state, "%s/%s.script",
+ run_event_script_dir(state->run_ctx),
+ script->name);
+ if (path == NULL) {
+ return NULL;
+ }
+
+ state->argv[0] = script->name;
+ script->begin = tevent_timeval_current();
+
+ D_DEBUG("Running %s with args \"%s %s\"\n",
+ path, state->argv[0], state->argv[1]);
+
+ subreq = run_proc_send(state, state->ev,
+ run_event_run_proc_context(state->run_ctx),
+ path, state->argv, -1, state->timeout);
+
+ talloc_free(path);
+
+ return subreq;
+}
+
+static void run_event_next_script(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct run_event_state *state = tevent_req_data(
+ req, struct run_event_state);
+ struct run_event_script *script;
+ pid_t pid;
+ int ret;
+ bool status;
+
+ script = &state->script_list->script[state->index];
+ script->end = tevent_timeval_current();
+
+ status = run_proc_recv(subreq, &ret, &script->result, &pid,
+ state->script_list, &script->output);
+ TALLOC_FREE(subreq);
+ state->script_subreq = NULL;
+ if (! status) {
+ D_ERR("run_proc failed for %s, ret=%d\n", script->name, ret);
+ run_event_stop_running(state->run_ctx);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ if (state->cancelled) {
+ return;
+ }
+
+ /* Log output */
+ if (script->output != NULL) {
+ debug_log(DEBUG_ERR, script->output, script->name);
+ }
+
+ D_DEBUG("Script %s finished sig=%d, err=%d, status=%d\n",
+ script->name, script->result.sig, script->result.err,
+ script->result.status);
+
+
+ /* If a script fails, stop running */
+ script->summary = run_event_script_status(script);
+ if (script->summary != 0 && script->summary != -ENOEXEC) {
+ state->script_list->summary = script->summary;
+
+ if (! state->continue_on_failure) {
+ state->script_list->num_scripts = state->index + 1;
+
+ if (script->summary == -ETIMEDOUT && pid != -1) {
+ run_event_debug(req, pid);
+ }
+ D_NOTICE("%s event %s\n", state->event_str,
+ (script->summary == -ETIMEDOUT) ?
+ "timed out" :
+ "failed");
+ run_event_stop_running(state->run_ctx);
+ tevent_req_done(req);
+ return;
+ }
+ }
+
+ state->index += 1;
+
+ /* All scripts executed */
+ if (state->index >= state->script_list->num_scripts) {
+ run_event_stop_running(state->run_ctx);
+ tevent_req_done(req);
+ return;
+ }
+
+ subreq = run_event_run_script(req);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, run_event_next_script, req);
+
+ state->script_subreq = subreq;
+}
+
+static void run_event_debug(struct tevent_req *req, pid_t pid)
+{
+ struct run_event_state *state = tevent_req_data(
+ req, struct run_event_state);
+ struct tevent_req *subreq;
+
+ /* Debug script is run with ectx as the memory context */
+ subreq = run_debug_send(state->run_ctx, state->ev, state->run_ctx,
+ state->event_str, pid);
+ if (subreq == NULL) {
+ /* If run debug fails, it's not an error */
+ D_NOTICE("Failed to run event debug\n");
+ return;
+ }
+ tevent_req_set_callback(subreq, run_event_debug_done, NULL);
+}
+
+static void run_event_debug_done(struct tevent_req *subreq)
+{
+ int ret = 0;
+ bool status;
+
+ status = run_debug_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ D_NOTICE("run_debug() failed, ret=%d\n", ret);
+ }
+}
+
+bool run_event_recv(struct tevent_req *req, int *perr,
+ TALLOC_CTX *mem_ctx,
+ struct run_event_script_list **script_list)
+{
+ struct run_event_state *state = tevent_req_data(
+ req, struct run_event_state);
+ int ret;
+
+ if (tevent_req_is_unix_error(req, &ret)) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+
+ if (script_list != NULL) {
+ *script_list = talloc_steal(mem_ctx, state->script_list);
+ }
+ return true;
+}
+
diff --git a/ctdb/common/run_event.h b/ctdb/common/run_event.h
new file mode 100644
index 0000000..f53bca3
--- /dev/null
+++ b/ctdb/common/run_event.h
@@ -0,0 +1,150 @@
+/*
+ Run scripts in a directory with specific event arguments
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_RUN_EVENT_H__
+#define __CTDB_RUN_EVENT_H__
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "common/run_proc.h"
+
+/**
+ * @file run_event.h
+ *
+ * @brief Run scripts in a directory with specific event arguments.
+ *
+ * This abstraction allows one to execute multiple scripts in a directory
+ * (specified by script_dir) with given event and arguments.
+ *
+ * At one time, only one event can be run. Multiple run_event calls
+ * will cause events to be queued up. They will be run sequentially.
+ *
+ * A "monitor" event is special and has special semantics.
+ *
+ * If a monitor event is running and another event is scheduled, the
+ * currently running monitor event is cancelled.
+ *
+ * If an event (not monitor) is running and monitor event is scheduled,
+ * then the monior event will be cancelled immediately.
+ */
+
+/**
+ * @brief The run process context
+ */
+struct run_event_context;
+
+struct run_event_script {
+ char *name;
+ struct timeval begin, end;
+ struct run_proc_result result;
+ int summary;
+ char *output;
+};
+
+struct run_event_script_list {
+ uint32_t num_scripts;
+ struct run_event_script *script;
+ int summary;
+};
+
+
+/**
+ * @brief Initialize the context for running events
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] script_dir Directory containing script to run
+ * @param[in] debug_prog Path of a program to run if a script hangs
+ * @param[out] result New run_event context
+ * @return 0 on success, errno on error
+ */
+int run_event_init(TALLOC_CTX *mem_ctx, struct run_proc_context *run_proc_ctx,
+ const char *script_dir, const char *debug_prog,
+ struct run_event_context **result);
+
+/**
+ * @brief Get a list of scripts
+ *
+ * @param[in] run_ctx Run_event context
+ * @param[in] mem_ctx Talloc memory context
+ * @param[out] output List of valid scripts
+ * @return 0 on success, errno on failure
+ */
+int run_event_list(struct run_event_context *run_ctx,
+ TALLOC_CTX *mem_ctx,
+ struct run_event_script_list **output);
+
+/**
+ * @brief Enable a script
+ *
+ * @param[in] run_ctx Run_event context
+ * @param[in] script_name Name of the script to enable
+ * @return 0 on success, errno on failure
+ */
+int run_event_script_enable(struct run_event_context *run_ctx,
+ const char *script_name);
+
+/**
+ * @brief Disable a script
+ *
+ * @param[in] run_ctx Run_event context
+ * @param[in] script_name Name of the script to disable
+ * @return 0 on success, errno on failure
+ */
+int run_event_script_disable(struct run_event_context *run_ctx,
+ const char *script_name);
+
+/**
+ * @brief Async computation start to run an event
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] run_ctx Run_event context
+ * @param[in] event_str The event argument to the script
+ * @param[in] arg_str Event arguments to the script
+ * @param[in] timeout How long to wait for execution
+ * @param[in] continue_on_failure Whether to continue to run events on failure
+ * @return new tevent request, or NULL on failure
+ *
+ * arg_str contains optional arguments for an event.
+ */
+struct tevent_req *run_event_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct run_event_context *run_ctx,
+ const char *event_str,
+ const char *arg_str,
+ struct timeval timeout,
+ bool continue_on_failure);
+
+/**
+ * @brief Async computation end to run an event
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @param[in] mem_ctx Talloc memory context
+ * @param[out] output List of scripts executed and their status
+ * @return true on success, false on failure
+ */
+bool run_event_recv(struct tevent_req *req, int *perr,
+ TALLOC_CTX *mem_ctx,
+ struct run_event_script_list **output);
+
+#endif /* __CTDB_RUN_EVENT_H__ */
+
diff --git a/ctdb/common/run_proc.c b/ctdb/common/run_proc.c
new file mode 100644
index 0000000..84bc343
--- /dev/null
+++ b/ctdb/common/run_proc.c
@@ -0,0 +1,503 @@
+/*
+ Run a child process and collect the output
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/wait.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/tevent_unix.h"
+#include "lib/util/sys_rw.h"
+#include "lib/util/blocking.h"
+#include "lib/util/dlinklist.h"
+
+#include "common/run_proc.h"
+
+/*
+ * Process abstraction
+ */
+
+struct run_proc_context;
+
+struct proc_context {
+ struct proc_context *prev, *next;
+
+ pid_t pid;
+
+ int fd;
+ struct tevent_fd *fde;
+
+ char *output;
+ struct run_proc_result result;
+
+ struct tevent_req *req;
+};
+
+static int proc_destructor(struct proc_context *proc);
+
+static struct proc_context *proc_new(TALLOC_CTX *mem_ctx,
+ struct run_proc_context *run_ctx)
+{
+ struct proc_context *proc;
+
+ proc = talloc_zero(mem_ctx, struct proc_context);
+ if (proc == NULL) {
+ return NULL;
+ }
+
+ proc->pid = -1;
+ proc->fd = -1;
+
+ talloc_set_destructor(proc, proc_destructor);
+
+ return proc;
+}
+
+static void run_proc_kill(struct tevent_req *req);
+
+static int proc_destructor(struct proc_context *proc)
+{
+ if (proc->req != NULL) {
+ run_proc_kill(proc->req);
+ }
+
+ talloc_free(proc->fde);
+ if (proc->pid != -1) {
+ kill(-proc->pid, SIGKILL);
+ }
+
+ return 0;
+}
+
+static void proc_read_handler(struct tevent_context *ev,
+ struct tevent_fd *fde, uint16_t flags,
+ void *private_data);
+
+static int proc_start(struct proc_context *proc, struct tevent_context *ev,
+ const char *path, const char **argv, int stdin_fd)
+{
+ int fd[2];
+ int ret;
+
+ ret = pipe(fd);
+ if (ret != 0) {
+ return ret;
+ }
+
+ proc->pid = fork();
+ if (proc->pid == -1) {
+ ret = errno;
+ close(fd[0]);
+ close(fd[1]);
+ return ret;
+ }
+
+ if (proc->pid == 0) {
+ close(fd[0]);
+
+ ret = dup2(fd[1], STDOUT_FILENO);
+ if (ret == -1) {
+ exit(64 + errno);
+ }
+ ret = dup2(fd[1], STDERR_FILENO);
+ if (ret == -1) {
+ exit(64 + errno);
+ }
+
+ close(fd[1]);
+
+ if (stdin_fd != -1) {
+ ret = dup2(stdin_fd, STDIN_FILENO);
+ if (ret == -1) {
+ exit(64 + errno);
+ }
+ }
+
+ ret = setpgid(0, 0);
+ if (ret != 0) {
+ exit(64 + errno);
+ }
+
+ ret = execv(path, discard_const(argv));
+ if (ret != 0) {
+ exit(64 + errno);
+ }
+
+ exit(64 + ENOEXEC);
+ }
+
+ close(fd[1]);
+
+ proc->fd = fd[0];
+ proc->fde = tevent_add_fd(ev, proc, fd[0], TEVENT_FD_READ,
+ proc_read_handler, proc);
+ if (proc->fde == NULL) {
+ close(fd[0]);
+ return ENOMEM;
+ }
+
+ tevent_fd_set_auto_close(proc->fde);
+
+ return 0;
+}
+
+static void proc_read_handler(struct tevent_context *ev,
+ struct tevent_fd *fde, uint16_t flags,
+ void *private_data)
+{
+ struct proc_context *proc = talloc_get_type_abort(
+ private_data, struct proc_context);
+ size_t offset;
+ ssize_t nread;
+ int len = 0;
+ int ret;
+
+ ret = ioctl(proc->fd, FIONREAD, &len);
+ if (ret != 0) {
+ goto fail;
+ }
+
+ if (len == 0) {
+ /* pipe closed */
+ goto close;
+ }
+
+ offset = (proc->output == NULL) ? 0 : strlen(proc->output);
+
+ proc->output = talloc_realloc(proc, proc->output, char, offset+len+1);
+ if (proc->output == NULL) {
+ goto fail;
+ }
+
+ nread = sys_read(proc->fd, proc->output + offset, len);
+ if (nread == -1) {
+ goto fail;
+ }
+ proc->output[offset+nread] = '\0';
+ return;
+
+fail:
+ if (proc->pid != -1) {
+ kill(-proc->pid, SIGKILL);
+ proc->pid = -1;
+ }
+close:
+ TALLOC_FREE(proc->fde);
+ proc->fd = -1;
+}
+
+
+/*
+ * Run proc abstraction
+ */
+
+struct run_proc_context {
+ struct tevent_context *ev;
+ struct tevent_signal *se;
+ struct proc_context *plist;
+};
+
+static void run_proc_signal_handler(struct tevent_context *ev,
+ struct tevent_signal *se,
+ int signum, int count, void *siginfo,
+ void *private_data);
+static int run_proc_context_destructor(struct run_proc_context *run_ctx);
+static void run_proc_done(struct tevent_req *req);
+
+int run_proc_init(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct run_proc_context **result)
+{
+ struct run_proc_context *run_ctx;
+
+ run_ctx = talloc_zero(mem_ctx, struct run_proc_context);
+ if (run_ctx == NULL) {
+ return ENOMEM;
+ }
+
+ run_ctx->ev = ev;
+ run_ctx->se = tevent_add_signal(ev, run_ctx, SIGCHLD, 0,
+ run_proc_signal_handler, run_ctx);
+ if (run_ctx->se == NULL) {
+ talloc_free(run_ctx);
+ return ENOMEM;
+ }
+
+ talloc_set_destructor(run_ctx, run_proc_context_destructor);
+
+ *result = run_ctx;
+ return 0;
+}
+
+static void run_proc_signal_handler(struct tevent_context *ev,
+ struct tevent_signal *se,
+ int signum, int count, void *siginfo,
+ void *private_data)
+{
+ struct run_proc_context *run_ctx = talloc_get_type_abort(
+ private_data, struct run_proc_context);
+ struct proc_context *proc;
+ pid_t pid = -1;
+ int status;
+
+again:
+ pid = waitpid(-1, &status, WNOHANG);
+ if (pid == -1) {
+ return;
+ }
+
+ if (pid == 0) {
+ return;
+ }
+
+ for (proc = run_ctx->plist; proc != NULL; proc = proc->next) {
+ if (proc->pid == pid) {
+ break;
+ }
+ }
+
+ if (proc == NULL) {
+ /* unknown process */
+ goto again;
+ }
+
+ /* Mark the process as terminated */
+ proc->pid = -1;
+
+ /* Update process status */
+ if (WIFEXITED(status)) {
+ int pstatus = WEXITSTATUS(status);
+ if (WIFSIGNALED(status)) {
+ proc->result.sig = WTERMSIG(status);
+ } else if (pstatus >= 64 && pstatus < 255) {
+ proc->result.err = pstatus-64;
+ } else {
+ proc->result.status = pstatus;
+ }
+ } else if (WIFSIGNALED(status)) {
+ proc->result.sig = WTERMSIG(status);
+ }
+
+ /* Confirm that all data has been read from the pipe */
+ if (proc->fd != -1) {
+ proc_read_handler(ev, proc->fde, 0, proc);
+ TALLOC_FREE(proc->fde);
+ proc->fd = -1;
+ }
+
+ DLIST_REMOVE(run_ctx->plist, proc);
+
+ /* Active run_proc request */
+ if (proc->req != NULL) {
+ run_proc_done(proc->req);
+ } else {
+ talloc_free(proc);
+ }
+
+ goto again;
+}
+
+static int run_proc_context_destructor(struct run_proc_context *run_ctx)
+{
+ struct proc_context *proc;
+
+ /* Get rid of signal handler */
+ TALLOC_FREE(run_ctx->se);
+
+ /* Kill any pending processes */
+ while ((proc = run_ctx->plist) != NULL) {
+ DLIST_REMOVE(run_ctx->plist, proc);
+ talloc_free(proc);
+ }
+
+ return 0;
+}
+
+struct run_proc_state {
+ struct tevent_context *ev;
+ struct run_proc_context *run_ctx;
+ struct proc_context *proc;
+
+ struct run_proc_result result;
+ char *output;
+ pid_t pid;
+};
+
+static int run_proc_state_destructor(struct run_proc_state *state);
+static void run_proc_timedout(struct tevent_req *subreq);
+
+struct tevent_req *run_proc_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct run_proc_context *run_ctx,
+ const char *path, const char **argv,
+ int stdin_fd, struct timeval timeout)
+{
+ struct tevent_req *req;
+ struct run_proc_state *state;
+ struct stat st;
+ int ret;
+
+ req = tevent_req_create(mem_ctx, &state, struct run_proc_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->run_ctx = run_ctx;
+ state->pid = -1;
+
+ ret = stat(path, &st);
+ if (ret != 0) {
+ state->result.err = errno;
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+ }
+
+ if (! (st.st_mode & S_IXUSR)) {
+ state->result.err = EACCES;
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+ }
+
+ state->proc = proc_new(run_ctx, run_ctx);
+ if (tevent_req_nomem(state->proc, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ state->proc->req = req;
+ DLIST_ADD(run_ctx->plist, state->proc);
+
+ ret = proc_start(state->proc, ev, path, argv, stdin_fd);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ talloc_set_destructor(state, run_proc_state_destructor);
+
+ if (! tevent_timeval_is_zero(&timeout)) {
+ struct tevent_req *subreq;
+
+ subreq = tevent_wakeup_send(state, ev, timeout);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, run_proc_timedout, req);
+ }
+
+ return req;
+}
+
+static int run_proc_state_destructor(struct run_proc_state *state)
+{
+ /* Do not get rid of the child process if timeout has occurred */
+ if ((state->proc != NULL) && (state->proc->req != NULL)) {
+ state->proc->req = NULL;
+ DLIST_REMOVE(state->run_ctx->plist, state->proc);
+ TALLOC_FREE(state->proc);
+ }
+
+ return 0;
+}
+
+static void run_proc_done(struct tevent_req *req)
+{
+ struct run_proc_state *state = tevent_req_data(
+ req, struct run_proc_state);
+
+ state->proc->req = NULL;
+
+ state->result = state->proc->result;
+ if (state->proc->output != NULL) {
+ state->output = talloc_move(state, &state->proc->output);
+ }
+ talloc_steal(state, state->proc);
+
+ tevent_req_done(req);
+}
+
+static void run_proc_kill(struct tevent_req *req)
+{
+ struct run_proc_state *state = tevent_req_data(
+ req, struct run_proc_state);
+
+ state->proc->req = NULL;
+ state->proc = NULL;
+
+ state->result.sig = SIGKILL;
+
+ tevent_req_done(req);
+}
+
+static void run_proc_timedout(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct run_proc_state *state = tevent_req_data(
+ req, struct run_proc_state);
+ bool status;
+
+ state->proc->req = NULL;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ state->result.err = ETIMEDOUT;
+ if (state->proc->output != NULL) {
+ state->output = talloc_move(state, &state->proc->output);
+ }
+ state->pid = state->proc->pid;
+
+ tevent_req_done(req);
+}
+
+bool run_proc_recv(struct tevent_req *req, int *perr,
+ struct run_proc_result *result, pid_t *pid,
+ TALLOC_CTX *mem_ctx, char **output)
+{
+ struct run_proc_state *state = tevent_req_data(
+ req, struct run_proc_state);
+ int ret;
+
+ if (tevent_req_is_unix_error(req, &ret)) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+
+ if (result != NULL) {
+ *result = state->result;
+ }
+
+ if (pid != NULL) {
+ *pid = state->pid;
+ }
+
+ if (output != NULL) {
+ *output = talloc_move(mem_ctx, &state->output);
+ }
+
+ return true;
+}
diff --git a/ctdb/common/run_proc.h b/ctdb/common/run_proc.h
new file mode 100644
index 0000000..7b06dad
--- /dev/null
+++ b/ctdb/common/run_proc.h
@@ -0,0 +1,100 @@
+/*
+ Run a child process and collect the output
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_RUN_PROC_H__
+#define __CTDB_RUN_PROC_H__
+
+#include <talloc.h>
+#include <tevent.h>
+
+/**
+ * @file run_proc.h
+ *
+ * @brief Run a process and capture the output
+ *
+ * This abstraction allows one to execute scripts with argumunts.
+ */
+
+/**
+ * @brief The run process context
+ */
+struct run_proc_context;
+
+/**
+ * @brief The exit status structure
+ *
+ * If the process is terminated due to a signal, sig is set.
+ * If the process is terminated due to an error, err is set.
+ * If the process terminates normally, status is set.
+ */
+struct run_proc_result {
+ int sig;
+ int err;
+ int status;
+};
+
+/**
+ * @brief Initialize the context for running processes
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[out] result New run_proc context
+ * @return 0 on success, errno on error
+ */
+int run_proc_init(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct run_proc_context **result);
+
+/**
+ * @brief Async computation start to run an executable
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] run_ctx Run_proc context
+ * @param[in] prog The path to the executable
+ * @param[in] argv Arguments to the executable
+ * @param[in] stdin_fd Assign stdin_fd as stdin for the process, -1 if not
+ * @param[in] timeout How long to wait for execution
+ * @return new tevent request, or NULL on failure
+ *
+ * argv must include program name as argv[0] and must be null terminated.
+ */
+struct tevent_req *run_proc_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct run_proc_context *run_ctx,
+ const char *prog, const char **argv,
+ int stdin_fd, struct timeval timeout);
+
+/**
+ * @brief Async computation end to run an executable
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @param[out] result The exit status of the executable
+ * @param[out] pid The pid of the child process (still running)
+ * @param[in] mem_ctx Talloc memory context
+ * @param[out] output The output from the executable (stdio + stderr)
+ * @return true on success, false on failure
+ *
+ * The returned pid is -1 if the process has terminated.
+ */
+bool run_proc_recv(struct tevent_req *req, int *perr,
+ struct run_proc_result *result, pid_t *pid,
+ TALLOC_CTX *mem_ctx, char **output);
+
+#endif /* __CTDB_RUN_PROC_H__ */
diff --git a/ctdb/common/sock_client.c b/ctdb/common/sock_client.c
new file mode 100644
index 0000000..75f471f
--- /dev/null
+++ b/ctdb/common/sock_client.c
@@ -0,0 +1,334 @@
+/*
+ A client based on unix domain socket
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/debug.h"
+#include "lib/util/time.h"
+#include "lib/util/tevent_unix.h"
+
+#include "common/logging.h"
+#include "common/reqid.h"
+#include "common/comm.h"
+#include "common/sock_client.h"
+
+struct sock_client_context {
+ struct sock_client_proto_funcs *funcs;
+ void *private_data;
+
+ void (*disconnect_callback)(void *private_data);
+ void *disconnect_data;
+
+ int fd;
+ struct comm_context *comm;
+ struct reqid_context *idr;
+};
+
+/*
+ * connect to a unix domain socket
+ */
+
+static int socket_connect(const char *sockpath)
+{
+ struct sockaddr_un addr;
+ size_t len;
+ int fd, ret;
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+
+ len = strlcpy(addr.sun_path, sockpath, sizeof(addr.sun_path));
+ if (len >= sizeof(addr.sun_path)) {
+ D_ERR("socket path too long: %s\n", sockpath);
+ return -1;
+ }
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd == -1) {
+ D_ERR("socket create failed - %s\n", sockpath);
+ return -1;
+ }
+
+ ret = connect(fd, (struct sockaddr *)&addr, sizeof(addr));
+ if (ret != 0) {
+ D_ERR("socket connect failed - %s\n", sockpath);
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+/*
+ * Socket client
+ */
+
+static int sock_client_context_destructor(struct sock_client_context *sockc);
+static void sock_client_read_handler(uint8_t *buf, size_t buflen,
+ void *private_data);
+static void sock_client_dead_handler(void *private_data);
+
+static void sock_client_msg_reply(struct sock_client_context *sockc,
+ uint8_t *buf, size_t buflen);
+
+int sock_client_setup(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ const char *sockpath,
+ struct sock_client_proto_funcs *funcs,
+ void *private_data,
+ struct sock_client_context **result)
+{
+ struct sock_client_context *sockc;
+ int ret;
+
+ if (sockpath == NULL) {
+ return EINVAL;
+ }
+
+ if (funcs == NULL || funcs->request_push == NULL ||
+ funcs->reply_pull == NULL || funcs->reply_reqid == NULL) {
+ return EINVAL;
+ }
+
+ sockc = talloc_zero(mem_ctx, struct sock_client_context);
+ if (sockc == NULL) {
+ return ENOMEM;
+ }
+
+ sockc->funcs = funcs;
+ sockc->private_data = private_data;
+
+ sockc->fd = socket_connect(sockpath);
+ if (sockc->fd == -1) {
+ talloc_free(sockc);
+ return EIO;
+ }
+
+ ret = comm_setup(sockc, ev, sockc->fd,
+ sock_client_read_handler, sockc,
+ sock_client_dead_handler, sockc,
+ &sockc->comm);
+ if (ret != 0) {
+ D_ERR("comm_setup() failed, ret=%d\n", ret);
+ close(sockc->fd);
+ talloc_free(sockc);
+ return ret;
+ }
+
+ ret = reqid_init(sockc, INT_MAX-200, &sockc->idr);
+ if (ret != 0) {
+ D_ERR("reqid_init() failed, ret=%d\n", ret);
+ close(sockc->fd);
+ talloc_free(sockc);
+ return ret;
+ }
+
+ talloc_set_destructor(sockc, sock_client_context_destructor);
+
+ *result = sockc;
+ return 0;
+}
+
+static int sock_client_context_destructor(struct sock_client_context *sockc)
+{
+ TALLOC_FREE(sockc->comm);
+ if (sockc->fd != -1) {
+ close(sockc->fd);
+ sockc->fd = -1;
+ }
+ return 0;
+}
+
+
+static void sock_client_read_handler(uint8_t *buf, size_t buflen,
+ void *private_data)
+{
+ struct sock_client_context *sockc = talloc_get_type_abort(
+ private_data, struct sock_client_context);
+
+ sock_client_msg_reply(sockc, buf, buflen);
+}
+
+static void sock_client_dead_handler(void *private_data)
+{
+ struct sock_client_context *sockc = talloc_get_type_abort(
+ private_data, struct sock_client_context);
+
+ if (sockc->disconnect_callback != NULL) {
+ sockc->disconnect_callback(sockc->disconnect_data);
+ talloc_free(sockc);
+ return;
+ }
+
+ D_NOTICE("connection to daemon closed, exiting\n");
+ exit(1);
+}
+
+void sock_client_set_disconnect_callback(struct sock_client_context *sockc,
+ sock_client_callback_func_t callback,
+ void *private_data)
+{
+ sockc->disconnect_callback = callback;
+ sockc->disconnect_data = private_data;
+}
+
+
+struct sock_client_msg_state {
+ struct sock_client_context *sockc;
+ uint32_t reqid;
+ struct tevent_req *req;
+ void *reply;
+};
+
+static int sock_client_msg_state_destructor(
+ struct sock_client_msg_state *state);
+static void sock_client_msg_done(struct tevent_req *subreq);
+
+struct tevent_req *sock_client_msg_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct sock_client_context *sockc,
+ struct timeval timeout,
+ void *request)
+{
+ struct tevent_req *req, *subreq;
+ struct sock_client_msg_state *state;
+ uint8_t *buf;
+ size_t buflen;
+ int ret;
+
+ req = tevent_req_create(mem_ctx, &state, struct sock_client_msg_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->sockc = sockc;
+
+ state->reqid = reqid_new(sockc->idr, state);
+ if (state->reqid == REQID_INVALID) {
+ talloc_free(req);
+ return NULL;
+ }
+
+ state->req = req;
+
+ talloc_set_destructor(state, sock_client_msg_state_destructor);
+
+ ret = sockc->funcs->request_push(request, state->reqid, state,
+ &buf, &buflen, sockc->private_data);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ subreq = comm_write_send(state, ev, sockc->comm, buf, buflen);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, sock_client_msg_done, req);
+
+ if (! timeval_is_zero(&timeout)) {
+ if (!tevent_req_set_endtime(req, ev, timeout)) {
+ return tevent_req_post(req, ev);
+ }
+ }
+
+ return req;
+}
+
+static int sock_client_msg_state_destructor(
+ struct sock_client_msg_state *state)
+{
+ reqid_remove(state->sockc->idr, state->reqid);
+ return 0;
+}
+
+static void sock_client_msg_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ int ret;
+ bool status;
+
+ status = comm_write_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ /* wait for the reply or timeout */
+}
+
+static void sock_client_msg_reply(struct sock_client_context *sockc,
+ uint8_t *buf, size_t buflen)
+{
+ struct sock_client_msg_state *state;
+ uint32_t reqid;
+ int ret;
+
+ ret = sockc->funcs->reply_reqid(buf, buflen, &reqid,
+ sockc->private_data);
+ if (ret != 0) {
+ D_WARNING("Invalid packet received, ret=%d\n", ret);
+ return;
+ }
+
+ state = reqid_find(sockc->idr, reqid, struct sock_client_msg_state);
+ if (state == NULL) {
+ return;
+ }
+
+ if (reqid != state->reqid) {
+ return;
+ }
+
+ ret = sockc->funcs->reply_pull(buf, buflen, state, &state->reply,
+ sockc->private_data);
+ if (ret != 0) {
+ tevent_req_error(state->req, ret);
+ return;
+ }
+
+ tevent_req_done(state->req);
+}
+
+bool sock_client_msg_recv(struct tevent_req *req, int *perr,
+ TALLOC_CTX *mem_ctx, void *reply)
+{
+ struct sock_client_msg_state *state = tevent_req_data(
+ req, struct sock_client_msg_state);
+ int ret;
+
+ if (tevent_req_is_unix_error(req, &ret)) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+
+ if (reply != NULL) {
+ *(void **)reply = talloc_steal(mem_ctx, state->reply);
+ }
+
+ return true;
+}
diff --git a/ctdb/common/sock_client.h b/ctdb/common/sock_client.h
new file mode 100644
index 0000000..49a0a52
--- /dev/null
+++ b/ctdb/common/sock_client.h
@@ -0,0 +1,129 @@
+/*
+ A client based on unix domain socket
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_SOCK_CLIENT_H__
+#define __CTDB_SOCK_CLIENT_H__
+
+#include <talloc.h>
+#include <tevent.h>
+
+/**
+ * @file sock_client.h
+ *
+ * @brief A framework for a client based on unix-domain sockets.
+ *
+ * This abstraction allows one to build clients that communicate using
+ * unix-domain sockets. It takes care of the common boilerplate.
+ */
+
+/**
+ * @brief The abstract socket daemon context
+ */
+struct sock_client_context;
+
+/**
+ * @brief callback function
+ *
+ * This function can be registered to be called in case daemon goes away.
+ */
+typedef void (*sock_client_callback_func_t)(void *private_data);
+
+/**
+ * @brief Protocol marshalling functions
+ *
+ * The typical protocol packet will have a header and a payload.
+ * Header will contain at least 2 fields: length and reqid
+ *
+ * request_push() is called when the request packet needs to be marshalled
+ *
+ * reply_pull() is called to unmarshall data into a reply packet
+ *
+ * reply_reqid() is called to extract request id from a reply packet
+ */
+struct sock_client_proto_funcs {
+ int (*request_push)(void *request, uint32_t reqid,
+ TALLOC_CTX *mem_ctx,
+ uint8_t **buf, size_t *buflen,
+ void *private_data);
+
+ int (*reply_pull)(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx, void **reply,
+ void *private_data);
+
+ int (*reply_reqid)(uint8_t *buf, size_t buflen,
+ uint32_t *reqid, void *private_data);
+};
+
+/**
+ * @brief Create a new socket client
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] sockpath Unix domain socket path
+ * @param[in] funcs Protocol marshalling functions
+ * @param[in] private_data Private data for protocol functions
+ * @param[out] result New socket client context
+ * @return 0 on success, errno on failure
+ */
+int sock_client_setup(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ const char *sockpath,
+ struct sock_client_proto_funcs *funcs,
+ void *private_data,
+ struct sock_client_context **result);
+
+/**
+ * @brief Register a callback in case of client disconnection
+ *
+ * @param[in] sockc Socket client context
+ * @param[in] callback Callback function
+ * @param[in] private_data Private data for callback function
+ */
+void sock_client_set_disconnect_callback(struct sock_client_context *sockc,
+ sock_client_callback_func_t callback,
+ void *private_data);
+
+/**
+ * @brief Async computation to send data to the daemon
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] sockc The socket client context
+ * @param[in] timeout How long to wait for
+ * @param[in] request Request packet to be sent
+ * @return new tevent request, or NULL on failure
+ */
+struct tevent_req *sock_client_msg_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct sock_client_context *sockc,
+ struct timeval timeout,
+ void *request);
+
+/**
+ * @brief Async computation end to send data to the daemon
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @param[in] mem_ctx Talloc memory context
+ * @param[out] reply Reply received from server
+ * @return true on success, false on failure
+ */
+bool sock_client_msg_recv(struct tevent_req *req, int *perr,
+ TALLOC_CTX *mem_ctx, void *reply);
+
+#endif /* __CTDB_SOCK_CLIENT_H__ */
diff --git a/ctdb/common/sock_daemon.c b/ctdb/common/sock_daemon.c
new file mode 100644
index 0000000..e31a364
--- /dev/null
+++ b/ctdb/common/sock_daemon.c
@@ -0,0 +1,1100 @@
+/*
+ A server based on unix domain socket
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/network.h"
+#include "system/wait.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/async_req/async_sock.h"
+#include "lib/util/debug.h"
+#include "lib/util/blocking.h"
+#include "lib/util/dlinklist.h"
+#include "lib/util/tevent_unix.h"
+#include "lib/util/become_daemon.h"
+#include "lib/util/sys_rw.h"
+
+#include "common/logging.h"
+#include "common/reqid.h"
+#include "common/comm.h"
+#include "common/pidfile.h"
+#include "common/system.h"
+#include "common/sock_daemon.h"
+
+struct sock_socket {
+ struct sock_socket *prev, *next;
+
+ const char *sockpath;
+ struct sock_socket_funcs *funcs;
+ void *private_data;
+
+ int fd;
+ struct tevent_req *req;
+};
+
+struct sock_client {
+ struct sock_client *prev, *next;
+
+ struct tevent_req *req;
+ struct sock_client_context *client_ctx;
+};
+
+struct sock_client_context {
+ struct tevent_context *ev;
+ struct sock_socket *sock;
+ int fd;
+ struct comm_context *comm;
+
+ struct sock_client *client;
+};
+
+struct sock_daemon_context {
+ struct sock_daemon_funcs *funcs;
+ void *private_data;
+
+ struct pidfile_context *pid_ctx;
+ struct sock_socket *socket_list;
+ int startup_fd;
+};
+
+/*
+ * Process a single client
+ */
+
+static void sock_client_read_handler(uint8_t *buf, size_t buflen,
+ void *private_data);
+static void sock_client_read_done(struct tevent_req *subreq);
+static void sock_client_dead_handler(void *private_data);
+static int sock_client_context_destructor(
+ struct sock_client_context *client_ctx);
+
+static int sock_client_context_init(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct sock_socket *sock,
+ int client_fd,
+ struct sock_client *client,
+ struct sock_client_context **result)
+{
+ struct sock_client_context *client_ctx;
+ int ret;
+
+ client_ctx = talloc_zero(mem_ctx, struct sock_client_context);
+ if (client_ctx == NULL) {
+ return ENOMEM;
+ }
+
+ client_ctx->ev = ev;
+ client_ctx->sock = sock;
+ client_ctx->fd = client_fd;
+ client_ctx->client = client;
+
+ ret = comm_setup(client_ctx, ev, client_fd,
+ sock_client_read_handler, client_ctx,
+ sock_client_dead_handler, client_ctx,
+ &client_ctx->comm);
+ if (ret != 0) {
+ talloc_free(client_ctx);
+ return ret;
+ }
+
+ if (sock->funcs->connect != NULL) {
+ pid_t pid;
+ bool status;
+
+ (void) ctdb_get_peer_pid(client_fd, &pid);
+
+ status = sock->funcs->connect(client_ctx,
+ pid,
+ sock->private_data);
+ if (! status) {
+ talloc_free(client_ctx);
+ close(client_fd);
+ return 0;
+ }
+ }
+
+ talloc_set_destructor(client_ctx, sock_client_context_destructor);
+
+ *result = client_ctx;
+ return 0;
+}
+
+static void sock_client_read_handler(uint8_t *buf, size_t buflen,
+ void *private_data)
+{
+ struct sock_client_context *client_ctx = talloc_get_type_abort(
+ private_data, struct sock_client_context);
+ struct sock_socket *sock = client_ctx->sock;
+ struct tevent_req *subreq;
+
+ subreq = sock->funcs->read_send(client_ctx, client_ctx->ev,
+ client_ctx, buf, buflen,
+ sock->private_data);
+ if (subreq == NULL) {
+ talloc_free(client_ctx);
+ return;
+ }
+ tevent_req_set_callback(subreq, sock_client_read_done, client_ctx);
+}
+
+static void sock_client_read_done(struct tevent_req *subreq)
+{
+ struct sock_client_context *client_ctx = tevent_req_callback_data(
+ subreq, struct sock_client_context);
+ struct sock_socket *sock = client_ctx->sock;
+ int ret;
+ bool status;
+
+ status = sock->funcs->read_recv(subreq, &ret);
+ if (! status) {
+ D_ERR("client read failed with ret=%d\n", ret);
+ talloc_free(client_ctx);
+ }
+}
+
+static void sock_client_dead_handler(void *private_data)
+{
+ struct sock_client_context *client_ctx = talloc_get_type_abort(
+ private_data, struct sock_client_context);
+ struct sock_socket *sock = client_ctx->sock;
+
+ if (sock->funcs->disconnect != NULL) {
+ sock->funcs->disconnect(client_ctx, sock->private_data);
+ }
+
+ talloc_free(client_ctx);
+}
+
+static int sock_client_context_destructor(
+ struct sock_client_context *client_ctx)
+{
+ TALLOC_FREE(client_ctx->client);
+ TALLOC_FREE(client_ctx->comm);
+ if (client_ctx->fd != -1) {
+ close(client_ctx->fd);
+ client_ctx->fd = -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Process a single listening socket
+ */
+
+static int socket_setup(const char *sockpath, bool remove_before_use)
+{
+ struct sockaddr_un addr;
+ size_t len;
+ int ret, fd;
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+
+ len = strlcpy(addr.sun_path, sockpath, sizeof(addr.sun_path));
+ if (len >= sizeof(addr.sun_path)) {
+ D_ERR("socket path too long: %s\n", sockpath);
+ return -1;
+ }
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd == -1) {
+ D_ERR("socket create failed - %s\n", sockpath);
+ return -1;
+ }
+
+ ret = set_blocking(fd, false);
+ if (ret != 0) {
+ D_ERR("socket set nonblocking failed - %s\n", sockpath);
+ close(fd);
+ return -1;
+ }
+
+ if (remove_before_use) {
+ unlink(sockpath);
+ }
+
+ ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
+ if (ret != 0) {
+ D_ERR("socket bind failed - %s\n", sockpath);
+ close(fd);
+ return -1;
+ }
+
+ ret = listen(fd, 10);
+ if (ret != 0) {
+ D_ERR("socket listen failed - %s\n", sockpath);
+ close(fd);
+ return -1;
+ }
+
+ D_NOTICE("listening on %s\n", sockpath);
+
+ return fd;
+}
+
+static int sock_socket_destructor(struct sock_socket *sock);
+
+static int sock_socket_init(TALLOC_CTX *mem_ctx, const char *sockpath,
+ struct sock_socket_funcs *funcs,
+ void *private_data,
+ struct sock_socket **result)
+{
+ struct sock_socket *sock;
+
+ if (funcs == NULL) {
+ return EINVAL;
+ }
+ if (funcs->read_send == NULL || funcs->read_recv == NULL) {
+ return EINVAL;
+ }
+
+ sock = talloc_zero(mem_ctx, struct sock_socket);
+ if (sock == NULL) {
+ return ENOMEM;
+ }
+
+ sock->sockpath = talloc_strdup(sock, sockpath);
+ if (sock->sockpath == NULL) {
+ talloc_free(sock);
+ return ENOMEM;
+ }
+ sock->funcs = funcs;
+ sock->private_data = private_data;
+ sock->fd = -1;
+
+ talloc_set_destructor(sock, sock_socket_destructor);
+
+ *result = sock;
+ return 0;
+}
+
+static int sock_socket_destructor(struct sock_socket *sock)
+{
+ TALLOC_FREE(sock->req);
+
+ if (sock->fd != -1) {
+ close(sock->fd);
+ sock->fd = -1;
+ }
+
+ unlink(sock->sockpath);
+ return 0;
+}
+
+
+struct sock_socket_start_state {
+ struct tevent_context *ev;
+ struct sock_socket *sock;
+
+ struct sock_client *client_list;
+};
+
+static int sock_socket_start_state_destructor(
+ struct sock_socket_start_state *state);
+static void sock_socket_start_new_client(struct tevent_req *subreq);
+static int sock_socket_start_client_destructor(struct sock_client *client);
+
+static struct tevent_req *sock_socket_start_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct sock_socket *sock,
+ bool remove_before_use)
+{
+ struct tevent_req *req, *subreq;
+ struct sock_socket_start_state *state;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct sock_socket_start_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->sock = sock;
+
+ sock->fd = socket_setup(sock->sockpath, remove_before_use);
+ if (sock->fd == -1) {
+ tevent_req_error(req, EIO);
+ return tevent_req_post(req, ev);
+ }
+
+ talloc_set_destructor(state, sock_socket_start_state_destructor);
+
+ subreq = accept_send(state, ev, sock->fd);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, sock_socket_start_new_client, req);
+
+ sock->req = req;
+
+ return req;
+}
+
+static int sock_socket_start_state_destructor(
+ struct sock_socket_start_state *state)
+{
+ struct sock_client *client;
+
+ while ((client = state->client_list) != NULL) {
+ talloc_free(client);
+ }
+
+ return 0;
+}
+
+static void sock_socket_start_new_client(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct sock_socket_start_state *state = tevent_req_data(
+ req, struct sock_socket_start_state);
+ struct sock_client *client;
+ int client_fd, ret;
+
+ client_fd = accept_recv(subreq, NULL, NULL, &ret);
+ TALLOC_FREE(subreq);
+ if (client_fd == -1) {
+ D_ERR("failed to accept new connection\n");
+ }
+
+ subreq = accept_send(state, state->ev, state->sock->fd);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, sock_socket_start_new_client, req);
+
+ if (client_fd == -1) {
+ return;
+ }
+
+ client = talloc_zero(state, struct sock_client);
+ if (tevent_req_nomem(client, req)) {
+ close(client_fd);
+ return;
+ }
+
+ client->req = req;
+
+ ret = sock_client_context_init(client, state->ev, state->sock,
+ client_fd, client, &client->client_ctx);
+ if (ret != 0) {
+ talloc_free(client);
+ return;
+ }
+
+ talloc_set_destructor(client, sock_socket_start_client_destructor);
+ DLIST_ADD(state->client_list, client);
+}
+
+static int sock_socket_start_client_destructor(struct sock_client *client)
+{
+ struct sock_socket_start_state *state = tevent_req_data(
+ client->req, struct sock_socket_start_state);
+
+ DLIST_REMOVE(state->client_list, client);
+ TALLOC_FREE(client->client_ctx);
+
+ return 0;
+}
+
+static bool sock_socket_start_recv(struct tevent_req *req, int *perr,
+ TALLOC_CTX *mem_ctx, const char **sockpath)
+{
+ struct sock_socket_start_state *state = tevent_req_data(
+ req, struct sock_socket_start_state);
+ int ret;
+
+ state->sock->req = NULL;
+
+ if (tevent_req_is_unix_error(req, &ret)) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+
+ if (sockpath != NULL) {
+ *sockpath = talloc_steal(mem_ctx, state->sock->sockpath);
+ }
+
+ return true;
+}
+
+/*
+ * Send message to a client
+ */
+
+struct tevent_req *sock_socket_write_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct sock_client_context *client_ctx,
+ uint8_t *buf, size_t buflen)
+{
+ struct tevent_req *req;
+
+ req = comm_write_send(mem_ctx, ev, client_ctx->comm, buf, buflen);
+
+ return req;
+}
+
+bool sock_socket_write_recv(struct tevent_req *req, int *perr)
+{
+ int ret;
+ bool status;
+
+ status = comm_write_recv(req, &ret);
+ if (! status) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ }
+
+ return status;
+}
+
+/*
+ * Socket daemon
+ */
+
+int sock_daemon_setup(TALLOC_CTX *mem_ctx, const char *daemon_name,
+ const char *logging, const char *debug_level,
+ struct sock_daemon_funcs *funcs,
+ void *private_data,
+ struct sock_daemon_context **out)
+{
+ struct sock_daemon_context *sockd;
+ int ret;
+
+ sockd = talloc_zero(mem_ctx, struct sock_daemon_context);
+ if (sockd == NULL) {
+ return ENOMEM;
+ }
+
+ sockd->funcs = funcs;
+ sockd->private_data = private_data;
+ sockd->startup_fd = -1;
+
+ ret = logging_init(sockd, logging, debug_level, daemon_name);
+ if (ret != 0) {
+ fprintf(stderr,
+ "Failed to initialize logging, logging=%s, debug=%s\n",
+ logging, debug_level);
+ return ret;
+ }
+
+ *out = sockd;
+ return 0;
+}
+
+int sock_daemon_add_unix(struct sock_daemon_context *sockd,
+ const char *sockpath,
+ struct sock_socket_funcs *funcs,
+ void *private_data)
+{
+ struct sock_socket *sock;
+ int ret;
+
+ ret = sock_socket_init(sockd, sockpath, funcs, private_data, &sock);
+ if (ret != 0) {
+ return ret;
+ }
+
+
+ DLIST_ADD(sockd->socket_list, sock);
+ return 0;
+}
+
+bool sock_daemon_set_startup_fd(struct sock_daemon_context *sockd, int fd)
+{
+ if (! set_close_on_exec(fd)) {
+ D_ERR("Failed to set close-on-exec on startup fd\n");
+ return false;
+ }
+
+ sockd->startup_fd = fd;
+ return true;
+}
+
+/*
+ * Run socket daemon
+ */
+
+struct sock_daemon_run_state {
+ struct tevent_context *ev;
+ struct sock_daemon_context *sockd;
+ pid_t pid_watch;
+
+ int fd;
+ int exit_code;
+};
+
+static void sock_daemon_run_started(struct tevent_req *subreq);
+static void sock_daemon_run_startup_done(struct tevent_req *subreq);
+static void sock_daemon_run_signal_handler(struct tevent_context *ev,
+ struct tevent_signal *se,
+ int signum, int count, void *siginfo,
+ void *private_data);
+static void sock_daemon_run_reconfigure(struct tevent_req *req);
+static void sock_daemon_run_reconfigure_done(struct tevent_req *subreq);
+static void sock_daemon_run_reopen_logs(struct tevent_req *req);
+static void sock_daemon_run_reopen_logs_done(struct tevent_req *subreq);
+static void sock_daemon_run_shutdown(struct tevent_req *req);
+static void sock_daemon_run_shutdown_done(struct tevent_req *subreq);
+static void sock_daemon_run_exit(struct tevent_req *req);
+static bool sock_daemon_run_socket_listen(struct tevent_req *req);
+static void sock_daemon_run_socket_fail(struct tevent_req *subreq);
+static void sock_daemon_run_watch_pid(struct tevent_req *subreq);
+static void sock_daemon_run_wait(struct tevent_req *req);
+static void sock_daemon_run_wait_done(struct tevent_req *subreq);
+static void sock_daemon_startup_notify(struct sock_daemon_context *sockd);
+
+struct tevent_req *sock_daemon_run_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct sock_daemon_context *sockd,
+ const char *pidfile,
+ bool do_fork, bool create_session,
+ pid_t pid_watch)
+{
+ struct tevent_req *req, *subreq;
+ struct sock_daemon_run_state *state;
+ struct tevent_signal *se;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct sock_daemon_run_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ become_daemon(do_fork, !create_session, false);
+
+ if (pidfile != NULL) {
+ int ret = pidfile_context_create(sockd, pidfile,
+ &sockd->pid_ctx);
+ if (ret != 0) {
+ tevent_req_error(req, EEXIST);
+ return tevent_req_post(req, ev);
+ }
+ }
+
+ state->ev = ev;
+ state->sockd = sockd;
+ state->pid_watch = pid_watch;
+ state->fd = -1;
+
+ subreq = tevent_wakeup_send(state, ev,
+ tevent_timeval_current_ofs(0, 0));
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, sock_daemon_run_started, req);
+
+ se = tevent_add_signal(ev, state, SIGHUP, 0,
+ sock_daemon_run_signal_handler, req);
+ if (tevent_req_nomem(se, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ se = tevent_add_signal(ev, state, SIGUSR1, 0,
+ sock_daemon_run_signal_handler, req);
+ if (tevent_req_nomem(se, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ se = tevent_add_signal(ev, state, SIGINT, 0,
+ sock_daemon_run_signal_handler, req);
+ if (tevent_req_nomem(se, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ se = tevent_add_signal(ev, state, SIGTERM, 0,
+ sock_daemon_run_signal_handler, req);
+ if (tevent_req_nomem(se, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ if (pid_watch > 1) {
+ subreq = tevent_wakeup_send(state, ev,
+ tevent_timeval_current_ofs(1,0));
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, sock_daemon_run_watch_pid,
+ req);
+ }
+
+ return req;
+}
+
+static void sock_daemon_run_started(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct sock_daemon_run_state *state = tevent_req_data(
+ req, struct sock_daemon_run_state);
+ struct sock_daemon_context *sockd = state->sockd;
+ bool status;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ D_NOTICE("daemon started, pid=%u\n", getpid());
+
+ if (sockd->funcs != NULL && sockd->funcs->startup_send != NULL &&
+ sockd->funcs->startup_recv != NULL) {
+ subreq = sockd->funcs->startup_send(state, state->ev,
+ sockd->private_data);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, sock_daemon_run_startup_done,
+ req);
+ return;
+ }
+
+ if (sockd->funcs != NULL && sockd->funcs->startup != NULL) {
+ int ret;
+
+ ret = sockd->funcs->startup(sockd->private_data);
+ if (ret != 0) {
+ D_ERR("startup failed, ret=%d\n", ret);
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ D_NOTICE("startup completed successfully\n");
+ }
+
+ status = sock_daemon_run_socket_listen(req);
+ if (! status) {
+ return;
+ }
+ sock_daemon_run_wait(req);
+
+ sock_daemon_startup_notify(sockd);
+}
+
+static void sock_daemon_run_startup_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct sock_daemon_run_state *state = tevent_req_data(
+ req, struct sock_daemon_run_state);
+ struct sock_daemon_context *sockd = state->sockd;
+ int ret;
+ bool status;
+
+ status = sockd->funcs->startup_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ D_ERR("startup failed, ret=%d\n", ret);
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ D_NOTICE("startup completed successfully\n");
+
+ status = sock_daemon_run_socket_listen(req);
+ if (! status) {
+ return;
+ }
+ sock_daemon_run_wait(req);
+
+ sock_daemon_startup_notify(sockd);
+}
+
+static void sock_daemon_run_signal_handler(struct tevent_context *ev,
+ struct tevent_signal *se,
+ int signum, int count, void *siginfo,
+ void *private_data)
+{
+ struct tevent_req *req = talloc_get_type_abort(
+ private_data, struct tevent_req);
+ struct sock_daemon_run_state *state = tevent_req_data(
+ req, struct sock_daemon_run_state);
+
+ D_NOTICE("Received signal %d\n", signum);
+
+ if (signum == SIGUSR1) {
+ sock_daemon_run_reconfigure(req);
+ return;
+ }
+
+ if (signum == SIGHUP) {
+ sock_daemon_run_reopen_logs(req);
+ return;
+ }
+
+ if (signum == SIGINT || signum == SIGTERM) {
+ state->exit_code = EINTR;
+ sock_daemon_run_shutdown(req);
+ }
+}
+
+static void sock_daemon_run_reconfigure(struct tevent_req *req)
+{
+ struct tevent_req *subreq;
+ struct sock_daemon_run_state *state = tevent_req_data(
+ req, struct sock_daemon_run_state);
+ struct sock_daemon_context *sockd = state->sockd;
+
+ if (sockd->funcs != NULL && sockd->funcs->reconfigure_send != NULL &&
+ sockd->funcs->reconfigure_recv != NULL) {
+ subreq = sockd->funcs->reconfigure_send(state, state->ev,
+ sockd->private_data);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq,
+ sock_daemon_run_reconfigure_done, req);
+ return;
+ }
+
+ if (sockd->funcs != NULL && sockd->funcs->reconfigure != NULL) {
+ int ret;
+
+ ret = sockd->funcs->reconfigure(sockd->private_data);
+ if (ret != 0) {
+ D_ERR("reconfigure failed, ret=%d\n", ret);
+ return;
+ }
+
+ D_NOTICE("reconfigure completed successfully\n");
+ }
+}
+
+static void sock_daemon_run_reconfigure_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct sock_daemon_run_state *state = tevent_req_data(
+ req, struct sock_daemon_run_state);
+ struct sock_daemon_context *sockd = state->sockd;
+ int ret;
+ bool status;
+
+ status = sockd->funcs->reconfigure_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ D_ERR("reconfigure failed, ret=%d\n", ret);
+ return;
+ }
+
+ D_NOTICE("reconfigure completed successfully\n");
+}
+
+static void sock_daemon_run_reopen_logs(struct tevent_req *req)
+{
+ struct tevent_req *subreq;
+ struct sock_daemon_run_state *state = tevent_req_data(
+ req, struct sock_daemon_run_state);
+ struct sock_daemon_context *sockd = state->sockd;
+
+ if (sockd->funcs != NULL && sockd->funcs->reopen_logs_send != NULL &&
+ sockd->funcs->reopen_logs_recv != NULL) {
+ subreq = sockd->funcs->reopen_logs_send(state, state->ev,
+ sockd->private_data);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq,
+ sock_daemon_run_reopen_logs_done, req);
+ return;
+ }
+
+ if (sockd->funcs != NULL && sockd->funcs->reopen_logs != NULL) {
+ int ret;
+
+ ret = sockd->funcs->reopen_logs(sockd->private_data);
+ if (ret != 0) {
+ D_ERR("reopen logs, ret=%d\n", ret);
+ return;
+ }
+
+ D_NOTICE("reopen logs completed successfully\n");
+ }
+}
+
+static void sock_daemon_run_reopen_logs_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct sock_daemon_run_state *state = tevent_req_data(
+ req, struct sock_daemon_run_state);
+ struct sock_daemon_context *sockd = state->sockd;
+ int ret;
+ bool status;
+
+ status = sockd->funcs->reopen_logs_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ D_ERR("reopen logs failed, ret=%d\n", ret);
+ return;
+ }
+
+ D_NOTICE("reopen logs completed successfully\n");
+}
+
+static void sock_daemon_run_shutdown(struct tevent_req *req)
+{
+ struct tevent_req *subreq;
+ struct sock_daemon_run_state *state = tevent_req_data(
+ req, struct sock_daemon_run_state);
+ struct sock_daemon_context *sockd = state->sockd;
+ struct sock_socket *sock;
+
+ D_NOTICE("Shutting down\n");
+
+ while ((sock = sockd->socket_list) != NULL) {
+ DLIST_REMOVE(sockd->socket_list, sock);
+ TALLOC_FREE(sock);
+ }
+
+ if (sockd->funcs != NULL && sockd->funcs->shutdown_send != NULL &&
+ sockd->funcs->shutdown_recv != NULL) {
+ subreq = sockd->funcs->shutdown_send(state, state->ev,
+ sockd->private_data);
+ if (subreq == NULL) {
+ sock_daemon_run_exit(req);
+ return;
+ }
+ tevent_req_set_callback(subreq, sock_daemon_run_shutdown_done,
+ req);
+ return;
+ }
+
+ if (sockd->funcs != NULL && sockd->funcs->shutdown != NULL) {
+ sockd->funcs->shutdown(sockd->private_data);
+ }
+
+ sock_daemon_run_exit(req);
+}
+
+static void sock_daemon_run_shutdown_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct sock_daemon_run_state *state = tevent_req_data(
+ req, struct sock_daemon_run_state);
+ struct sock_daemon_context *sockd = state->sockd;
+
+ sockd->funcs->shutdown_recv(subreq);
+ TALLOC_FREE(subreq);
+
+ sock_daemon_run_exit(req);
+}
+
+static void sock_daemon_run_exit(struct tevent_req *req)
+{
+ struct sock_daemon_run_state *state = tevent_req_data(
+ req, struct sock_daemon_run_state);
+ struct sock_daemon_context *sockd = state->sockd;
+
+ TALLOC_FREE(sockd->pid_ctx);
+
+ if (state->exit_code == 0) {
+ tevent_req_done(req);
+ } else {
+ tevent_req_error(req, state->exit_code);
+ }
+}
+
+static bool sock_daemon_run_socket_listen(struct tevent_req *req)
+{
+ struct tevent_req *subreq;
+ struct sock_daemon_run_state *state = tevent_req_data(
+ req, struct sock_daemon_run_state);
+ struct sock_daemon_context *sockd = state->sockd;
+ struct sock_socket *sock;
+ bool remove_before_use = false;
+
+ if (sockd->pid_ctx != NULL) {
+ remove_before_use = true;
+ }
+ for (sock = sockd->socket_list; sock != NULL; sock = sock->next) {
+ subreq = sock_socket_start_send(state, state->ev, sock,
+ remove_before_use);
+ if (tevent_req_nomem(subreq, req)) {
+ return false;
+ }
+ tevent_req_set_callback(subreq, sock_daemon_run_socket_fail,
+ req);
+ }
+
+ return true;
+}
+
+static void sock_daemon_run_socket_fail(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct sock_daemon_run_state *state = tevent_req_data(
+ req, struct sock_daemon_run_state);
+ const char *sockpath = "INVALID";
+ int ret = 0;
+ bool status;
+
+ status = sock_socket_start_recv(subreq, &ret, state, &sockpath);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ D_ERR("socket %s closed unexpectedly\n", sockpath);
+ state->exit_code = ret;
+ } else {
+ state->exit_code = 0;
+ }
+
+ sock_daemon_run_shutdown(req);
+}
+
+static void sock_daemon_run_watch_pid(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct sock_daemon_run_state *state = tevent_req_data(
+ req, struct sock_daemon_run_state);
+ int ret;
+ bool status;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ ret = kill(state->pid_watch, 0);
+ if (ret == -1) {
+ if (errno == ESRCH) {
+ D_ERR("PID %d gone away, exiting\n", state->pid_watch);
+ state->exit_code = ESRCH;
+ sock_daemon_run_shutdown(req);
+ return;
+ } else {
+ D_ERR("Failed to check PID status %d, ret=%d\n",
+ state->pid_watch, errno);
+ }
+ }
+
+ subreq = tevent_wakeup_send(state, state->ev,
+ tevent_timeval_current_ofs(5,0));
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, sock_daemon_run_watch_pid, req);
+}
+
+static void sock_daemon_run_wait(struct tevent_req *req)
+{
+ struct tevent_req *subreq;
+ struct sock_daemon_run_state *state = tevent_req_data(
+ req, struct sock_daemon_run_state);
+ struct sock_daemon_context *sockd = state->sockd;
+
+ if (sockd->funcs != NULL && sockd->funcs->wait_send != NULL &&
+ sockd->funcs->wait_recv != NULL) {
+ subreq = sockd->funcs->wait_send(state, state->ev,
+ sockd->private_data);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, sock_daemon_run_wait_done,
+ req);
+ }
+}
+
+static void sock_daemon_run_wait_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct sock_daemon_run_state *state = tevent_req_data(
+ req, struct sock_daemon_run_state);
+ struct sock_daemon_context *sockd = state->sockd;
+ int ret = 0;
+ bool status;
+
+ status = sockd->funcs->wait_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ state->exit_code = ret;
+ } else {
+ state->exit_code = 0;
+ }
+
+ sock_daemon_run_shutdown(req);
+}
+
+static void sock_daemon_startup_notify(struct sock_daemon_context *sockd)
+{
+ if (sockd->startup_fd != -1) {
+ unsigned int zero = 0;
+ ssize_t num;
+
+ num = sys_write(sockd->startup_fd, &zero, sizeof(zero));
+ if (num != sizeof(zero)) {
+ D_WARNING("Failed to write zero to pipe FD\n");
+ }
+ }
+}
+
+bool sock_daemon_run_recv(struct tevent_req *req, int *perr)
+{
+ int ret;
+
+ if (tevent_req_is_unix_error(req, &ret)) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+int sock_daemon_run(struct tevent_context *ev,
+ struct sock_daemon_context *sockd,
+ const char *pidfile,
+ bool do_fork, bool create_session,
+ pid_t pid_watch)
+{
+ struct tevent_req *req;
+ int ret;
+ bool status;
+
+ req = sock_daemon_run_send(ev, ev, sockd,
+ pidfile, do_fork, create_session, pid_watch);
+ if (req == NULL) {
+ return ENOMEM;
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = sock_daemon_run_recv(req, &ret);
+ TALLOC_FREE(req);
+ if (! status) {
+ return ret;
+ }
+
+ return 0;
+}
diff --git a/ctdb/common/sock_daemon.h b/ctdb/common/sock_daemon.h
new file mode 100644
index 0000000..85ed961
--- /dev/null
+++ b/ctdb/common/sock_daemon.h
@@ -0,0 +1,283 @@
+/*
+ A server based on unix domain socket
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_SOCK_DAEMON_H__
+#define __CTDB_SOCK_DAEMON_H__
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "common/logging.h"
+
+/**
+ * @file sock_daemon.h
+ *
+ * @brief A framework for a server based on unix-domain sockets.
+ *
+ * This abstraction allows one to build simple servers that communicate using
+ * unix-domain sockets. It takes care of the common boilerplate.
+ */
+
+/**
+ * @brief The abstract socket daemon context
+ */
+struct sock_daemon_context;
+
+/**
+ * @brief The abstract socket client context
+ */
+struct sock_client_context;
+
+/**
+ * @brief The callback routines called during daemon life cycle
+ *
+ * startup() is called when the daemon starts running
+ * either via sock_daemon_run() or via sock_daemon_run_send()
+ * startup() should return 0 for success, non-zero value on failure
+ * On failure, sock_daemon_run() will return error.
+ *
+ * startup_send()/startup_recv() is the async version of startup()
+ *
+ * reconfigure() is called when the daemon receives SIGUSR1
+ * reconfigure() should return 0 for success, non-zero value on failure
+ * On failure, sock_daemon_run() will continue to run.
+ *
+ * reconfigure_send()/reconfigure_recv() is the async version of reconfigure()
+ *
+ * reopen_logs() is called when the daemon receives SIGHUP
+ * reopen_logs() should return 0 for success, non-zero value on failure
+ * On failure, sock_daemon_run() will continue to run.
+ *
+ * reopen_logs_send()/reopen_logs_recv() is the async version of reopen_logs()
+ *
+ * shutdown() is called when process receives SIGINT or SIGTERM or
+ * when wait computation has finished
+ *
+ * shutdown_send()/shutdown_recv() is the async version of shutdown()
+ *
+ * Please note that only one (sync or async) version of these functions
+ * will be called. If both versions are defined, then only async function
+ * will be called.
+ *
+ * wait_send() starts the async computation to keep running the daemon
+ * wait_recv() ends the async computation to keep running the daemon
+ *
+ * If wait_send()/wait_recv() is NULL, then daemon will keep running forever.
+ * If wait_send() returns req, then when req is over, daemon will shutdown.
+ */
+struct sock_daemon_funcs {
+ int (*startup)(void *private_data);
+
+ struct tevent_req * (*startup_send)(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ void *private_data);
+ bool (*startup_recv)(struct tevent_req *req, int *perr);
+
+ int (*reconfigure)(void *private_data);
+
+ struct tevent_req * (*reconfigure_send)(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ void *private_data);
+ bool (*reconfigure_recv)(struct tevent_req *req, int *perr);
+
+ int (*reopen_logs)(void *private_data);
+
+ struct tevent_req * (*reopen_logs_send)(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ void *private_data);
+ bool (*reopen_logs_recv)(struct tevent_req *req, int *perr);
+
+ void (*shutdown)(void *private_data);
+
+ struct tevent_req * (*shutdown_send)(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ void *private_data);
+ void (*shutdown_recv)(struct tevent_req *req);
+
+ struct tevent_req * (*wait_send)(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ void *private_data);
+ bool (*wait_recv)(struct tevent_req *req, int *perr);
+};
+
+/**
+ * @brief The callback routines called for an unix-domain socket
+ *
+ * connect() is called when there is a new connection
+ *
+ * @param[in] client The new socket client context
+ * @param[in] pid The pid of the new client process, or -1 if unknown
+ * @param[in] private_data Private data set with the socket
+ * @return true if connection should be accepted, false otherwise
+ *
+ *
+ * disconnect() is called when client closes connection
+ *
+ * @param[in] client The socket client context
+ * @param[in] private_data Private data associated with the socket
+ *
+ *
+ * read_send() starts the async computation to process data on the socket
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] client The socket client context
+ * @param[in] buf Data received from the client
+ * @param[in] buflen Length of the data
+ * @param[i] private_data Private data associatedwith the socket
+ * @return new tevent request, or NULL on failure
+ *
+ *
+ * read_recv() ends the async computation to process data on the socket
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @return true on success, false on failure
+ *
+ */
+struct sock_socket_funcs {
+ bool (*connect)(struct sock_client_context *client,
+ pid_t pid,
+ void *private_data);
+ void (*disconnect)(struct sock_client_context *client,
+ void *private_data);
+
+ struct tevent_req * (*read_send)(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct sock_client_context *client,
+ uint8_t *buf, size_t buflen,
+ void *private_data);
+ bool (*read_recv)(struct tevent_req *req, int *perr);
+};
+
+/**
+ * @brief Async computation to send data to the client
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] client The socket client context
+ * @param[in] buf Data to be sent to the client
+ * @param[in] buflen Length of the data
+ * @return new tevent request, or NULL on failure
+ */
+struct tevent_req *sock_socket_write_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct sock_client_context *client,
+ uint8_t *buf, size_t buflen);
+
+/**
+ * @brief Async computation end to send data to client
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @return true on success, false on failure
+ */
+bool sock_socket_write_recv(struct tevent_req *req, int *perr);
+
+/**
+ * @brief Create a new socket daemon
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] daemon_name Name of the daemon, used for logging
+ * @param[in] logging Logging setup string
+ * @param[in] debug_level Debug level to log at
+ * @param[in] funcs Socket daemon callback routines
+ * @param[in] private_data Private data associated with callback routines
+ * @param[out] result New socket daemon context
+ * @return 0 on success, errno on failure
+ */
+int sock_daemon_setup(TALLOC_CTX *mem_ctx, const char *daemon_name,
+ const char *logging, const char *debug_level,
+ struct sock_daemon_funcs *funcs,
+ void *private_data,
+ struct sock_daemon_context **result);
+
+/**
+ * @brief Create and listen to the unix domain socket
+ *
+ * @param[in] sockd Socket daemon context
+ * @param[in] sockpath Unix domain socket path
+ * @param[in] funcs socket callback routines
+ * @param[in] private_data Private data associated with callback routines
+ * @return 0 on success, errno on failure
+ */
+int sock_daemon_add_unix(struct sock_daemon_context *sockd,
+ const char *sockpath,
+ struct sock_socket_funcs *funcs,
+ void *private_data);
+
+/**
+ * @brief Set file descriptor for indicating startup success
+ *
+ * On successful completion, 0 (unsigned int) will be written to the fd.
+ *
+ * @param[in] sockd Socket daemon context
+ * @param[in] fd File descriptor
+ * @return true on success, false on error
+ */
+bool sock_daemon_set_startup_fd(struct sock_daemon_context *sockd, int fd);
+
+/**
+ * @brief Async computation start to run a socket daemon
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] sockd The socket daemon context
+ * @param[in] pidfile PID file to create, NULL if no PID file required
+ * @param[in] do_fork Whether the daemon should fork on startup
+ * @param[in] create_session Whether the daemon should create a new session
+ * @param[in] pid_watch PID to watch. If PID goes away, shutdown.
+ * @return new tevent request, NULL on failure
+ */
+struct tevent_req *sock_daemon_run_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct sock_daemon_context *sockd,
+ const char *pidfile,
+ bool do_fork, bool create_session,
+ pid_t pid_watch);
+
+/**
+ * @brief Async computation end to run a socket daemon
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @return true on success, false on failure
+ */
+bool sock_daemon_run_recv(struct tevent_req *req, int *perr);
+
+/**
+ * @brief Sync way to start a daemon
+ *
+ * @param[in] ev Tevent context
+ * @param[in] sockd The socket daemon context
+ * @param[in] pidfile PID file to create, NULL if no PID file required
+ * @param[in] do_fork Whether the daemon should fork on startup
+ * @param[in] create_session Whether the daemon should create a new session
+ * @param[in] pid_watch PID to watch. If PID goes away, shutdown.
+ * @return 0 on success, errno on failure
+ *
+ * This call will return only on shutdown of the daemon
+ */
+int sock_daemon_run(struct tevent_context *ev,
+ struct sock_daemon_context *sockd,
+ const char *pidfile,
+ bool do_fork, bool create_session,
+ pid_t pid_watch);
+
+#endif /* __CTDB_SOCK_DAEMON_H__ */
diff --git a/ctdb/common/sock_io.c b/ctdb/common/sock_io.c
new file mode 100644
index 0000000..81e82c5
--- /dev/null
+++ b/ctdb/common/sock_io.c
@@ -0,0 +1,328 @@
+/*
+ Generic Unix-domain Socket I/O
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/sys_rw.h"
+#include "lib/util/debug.h"
+#include "lib/util/blocking.h"
+
+#include "common/logging.h"
+#include "common/sock_io.h"
+
+bool sock_clean(const char *sockpath)
+{
+ int ret;
+
+ ret = unlink(sockpath);
+ if (ret == 0) {
+ D_WARNING("Removed stale socket %s\n", sockpath);
+ } else if (errno != ENOENT) {
+ D_ERR("Failed to remove stale socket %s\n", sockpath);
+ return false;
+ }
+
+ return true;
+}
+
+int sock_connect(const char *sockpath)
+{
+ struct sockaddr_un addr;
+ size_t len;
+ int fd, ret;
+
+ if (sockpath == NULL) {
+ D_ERR("Invalid socket path\n");
+ return -1;
+ }
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+ len = strlcpy(addr.sun_path, sockpath, sizeof(addr.sun_path));
+ if (len >= sizeof(addr.sun_path)) {
+ D_ERR("Socket path too long, len=%zu\n", strlen(sockpath));
+ return -1;
+ }
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd == -1) {
+ D_ERR("socket() failed, errno=%d\n", errno);
+ return -1;
+ }
+
+ ret = connect(fd, (struct sockaddr *)&addr, sizeof(addr));
+ if (ret == -1) {
+ D_ERR("connect() failed, errno=%d\n", errno);
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+struct sock_queue {
+ struct tevent_context *ev;
+ sock_queue_callback_fn_t callback;
+ void *private_data;
+ int fd;
+
+ struct tevent_immediate *im;
+ struct tevent_queue *queue;
+ struct tevent_fd *fde;
+ uint8_t *buf;
+ size_t buflen, begin, end;
+};
+
+/*
+ * The reserved talloc headers, SOCK_QUEUE_OBJ_COUNT,
+ * and the pre-allocated pool-memory SOCK_QUEUE_POOL_SIZE,
+ * are used for the sub-objects queue->im, queue->queue, queue->fde
+ * and queue->buf.
+ * If the memory allocating sub-objects of struct sock_queue change,
+ * those values need to be adjusted.
+ */
+#define SOCK_QUEUE_OBJ_COUNT 4
+#define SOCK_QUEUE_POOL_SIZE 2048
+
+static bool sock_queue_set_fd(struct sock_queue *queue, int fd);
+static void sock_queue_handler(struct tevent_context *ev,
+ struct tevent_fd *fde, uint16_t flags,
+ void *private_data);
+static void sock_queue_process(struct sock_queue *queue);
+static void sock_queue_process_event(struct tevent_context *ev,
+ struct tevent_immediate *im,
+ void *private_data);
+
+struct sock_queue *sock_queue_setup(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ int fd,
+ sock_queue_callback_fn_t callback,
+ void *private_data)
+{
+ struct sock_queue *queue;
+
+ queue = talloc_pooled_object(mem_ctx, struct sock_queue,
+ SOCK_QUEUE_OBJ_COUNT, SOCK_QUEUE_POOL_SIZE);
+ if (queue == NULL) {
+ return NULL;
+ }
+ memset(queue, 0, sizeof(struct sock_queue));
+
+ queue->ev = ev;
+ queue->callback = callback;
+ queue->private_data = private_data;
+
+ queue->im = tevent_create_immediate(queue);
+ if (queue->im == NULL) {
+ talloc_free(queue);
+ return NULL;
+ }
+
+ queue->queue = tevent_queue_create(queue, "out-queue");
+ if (queue->queue == NULL) {
+ talloc_free(queue);
+ return NULL;
+ }
+
+ if (! sock_queue_set_fd(queue, fd)) {
+ talloc_free(queue);
+ return NULL;
+ }
+
+ return queue;
+}
+
+static bool sock_queue_set_fd(struct sock_queue *queue, int fd)
+{
+ TALLOC_FREE(queue->fde);
+ queue->fd = fd;
+
+ if (fd != -1) {
+ int ret;
+
+ ret = set_blocking(fd, false);
+ if (ret != 0) {
+ return false;
+ }
+
+ queue->fde = tevent_add_fd(queue->ev, queue, fd,
+ TEVENT_FD_READ,
+ sock_queue_handler, queue);
+ if (queue->fde == NULL) {
+ return false;
+ }
+ tevent_fd_set_auto_close(queue->fde);
+ }
+
+ return true;
+}
+
+static void sock_queue_handler(struct tevent_context *ev,
+ struct tevent_fd *fde, uint16_t flags,
+ void *private_data)
+{
+ struct sock_queue *queue = talloc_get_type_abort(
+ private_data, struct sock_queue);
+ int ret, num_ready;
+ ssize_t nread;
+
+ ret = ioctl(queue->fd, FIONREAD, &num_ready);
+ if (ret != 0) {
+ /* Ignore */
+ return;
+ }
+
+ if (num_ready == 0) {
+ /* descriptor has been closed */
+ goto fail;
+ }
+
+ if ((size_t)num_ready > queue->buflen - queue->end) {
+ queue->buf = talloc_realloc_size(queue, queue->buf,
+ queue->end + num_ready);
+ if (queue->buf == NULL) {
+ goto fail;
+ }
+ queue->buflen = queue->end + num_ready;
+ }
+
+ nread = sys_read(queue->fd, queue->buf + queue->end, num_ready);
+ if (nread < 0) {
+ goto fail;
+ }
+ queue->end += nread;
+
+ sock_queue_process(queue);
+ return;
+
+fail:
+ queue->callback(NULL, 0, queue->private_data);
+}
+
+static void sock_queue_process(struct sock_queue *queue)
+{
+ uint32_t pkt_size;
+
+ if ((queue->end - queue->begin) < sizeof(uint32_t)) {
+ /* not enough data */
+ return;
+ }
+
+ pkt_size = *(uint32_t *)(queue->buf + queue->begin);
+ if (pkt_size == 0) {
+ D_ERR("Invalid packet of length 0\n");
+ queue->callback(NULL, 0, queue->private_data);
+ return;
+ }
+
+ if ((queue->end - queue->begin) < pkt_size) {
+ /* not enough data */
+ return;
+ }
+
+ queue->callback(queue->buf + queue->begin, pkt_size,
+ queue->private_data);
+ queue->begin += pkt_size;
+
+ if (queue->begin < queue->end) {
+ /* more data to be processed */
+ tevent_schedule_immediate(queue->im, queue->ev,
+ sock_queue_process_event, queue);
+ } else {
+ TALLOC_FREE(queue->buf);
+ queue->buflen = 0;
+ queue->begin = 0;
+ queue->end = 0;
+ }
+}
+
+static void sock_queue_process_event(struct tevent_context *ev,
+ struct tevent_immediate *im,
+ void *private_data)
+{
+ struct sock_queue *queue = talloc_get_type_abort(
+ private_data, struct sock_queue);
+
+ sock_queue_process(queue);
+}
+
+struct sock_queue_write_state {
+ uint8_t *pkt;
+ uint32_t pkt_size;
+};
+
+static void sock_queue_trigger(struct tevent_req *req, void *private_data);
+
+int sock_queue_write(struct sock_queue *queue, uint8_t *buf, size_t buflen)
+{
+ struct tevent_req *req;
+ struct sock_queue_write_state *state;
+ struct tevent_queue_entry *qentry;
+
+ if (buflen >= INT32_MAX) {
+ return -1;
+ }
+
+ req = tevent_req_create(queue, &state, struct sock_queue_write_state);
+ if (req == NULL) {
+ return -1;
+ }
+
+ state->pkt = buf;
+ state->pkt_size = (uint32_t)buflen;
+
+ qentry = tevent_queue_add_entry(queue->queue, queue->ev, req,
+ sock_queue_trigger, queue);
+ if (qentry == NULL) {
+ talloc_free(req);
+ return -1;
+ }
+
+ return 0;
+}
+
+static void sock_queue_trigger(struct tevent_req *req, void *private_data)
+{
+ struct sock_queue *queue = talloc_get_type_abort(
+ private_data, struct sock_queue);
+ struct sock_queue_write_state *state = tevent_req_data(
+ req, struct sock_queue_write_state);
+ size_t offset = 0;
+
+ do {
+ ssize_t nwritten;
+
+ nwritten = sys_write(queue->fd, state->pkt + offset,
+ state->pkt_size - offset);
+ if (nwritten < 0) {
+ queue->callback(NULL, 0, queue->private_data);
+ return;
+ }
+ offset += nwritten;
+
+ } while (offset < state->pkt_size);
+
+ tevent_req_done(req);
+ talloc_free(req);
+}
diff --git a/ctdb/common/sock_io.h b/ctdb/common/sock_io.h
new file mode 100644
index 0000000..8b6e4eb
--- /dev/null
+++ b/ctdb/common/sock_io.h
@@ -0,0 +1,39 @@
+/*
+ Generic Socket I/O
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_SOCK_IO_H__
+#define __CTDB_SOCK_IO_H__
+
+typedef void (*sock_queue_callback_fn_t)(uint8_t *buf, size_t buflen,
+ void *private_data);
+
+struct sock_queue;
+
+bool sock_clean(const char *sockpath);
+int sock_connect(const char *sockpath);
+
+struct sock_queue *sock_queue_setup(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ int fd,
+ sock_queue_callback_fn_t callback,
+ void *private_data);
+
+int sock_queue_write(struct sock_queue *queue, uint8_t *buf, size_t buflen);
+
+#endif /* __CTDB_SOCK_IO_H__ */
diff --git a/ctdb/common/srvid.c b/ctdb/common/srvid.c
new file mode 100644
index 0000000..3304994
--- /dev/null
+++ b/ctdb/common/srvid.c
@@ -0,0 +1,280 @@
+/*
+ Message handler database based on srvid
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+
+#include <tdb.h>
+
+#include "lib/util/dlinklist.h"
+#include "common/db_hash.h"
+#include "common/srvid.h"
+
+struct srvid_handler_list;
+
+struct srvid_context {
+ struct db_hash_context *dh;
+ struct srvid_handler_list *list;
+};
+
+struct srvid_handler {
+ struct srvid_handler *prev, *next;
+ struct srvid_handler_list *list;
+ srvid_handler_fn handler;
+ void *private_data;
+};
+
+struct srvid_handler_list {
+ struct srvid_handler_list *prev, *next;
+ struct srvid_context *srv;
+ uint64_t srvid;
+ struct srvid_handler *h;
+};
+
+
+/*
+ * Initialise message srvid context and database
+ */
+int srvid_init(TALLOC_CTX *mem_ctx, struct srvid_context **result)
+{
+ struct srvid_context *srv;
+ int ret;
+
+ srv = talloc_zero(mem_ctx, struct srvid_context);
+ if (srv == NULL) {
+ return ENOMEM;
+ }
+
+ ret = db_hash_init(srv, "messagedb", 8192, DB_HASH_SIMPLE, &srv->dh);
+ if (ret != 0) {
+ talloc_free(srv);
+ return ret;
+ }
+
+ *result = srv;
+ return 0;
+}
+
+/*
+ * Wrapper functions to insert/delete/fetch srvid_hander_list
+ */
+
+static int srvid_insert(struct srvid_context *srv, uint64_t srvid,
+ struct srvid_handler_list *list)
+{
+ return db_hash_insert(srv->dh, (uint8_t *)&srvid, sizeof(uint64_t),
+ (uint8_t *)&list, sizeof(list));
+}
+
+static int srvid_delete(struct srvid_context *srv, uint64_t srvid)
+{
+ return db_hash_delete(srv->dh, (uint8_t *)&srvid, sizeof(uint64_t));
+}
+
+static int srvid_fetch_parser(uint8_t *keybuf, size_t keylen,
+ uint8_t *databuf, size_t datalen,
+ void *private_data)
+{
+ struct srvid_handler_list **list =
+ (struct srvid_handler_list **)private_data;
+
+ if (datalen != sizeof(*list)) {
+ return EIO;
+ }
+
+ *list = *(struct srvid_handler_list **)databuf;
+ return 0;
+}
+
+static int srvid_fetch(struct srvid_context *srv, uint64_t srvid,
+ struct srvid_handler_list **list)
+{
+ return db_hash_fetch(srv->dh, (uint8_t *)&srvid, sizeof(uint64_t),
+ srvid_fetch_parser, list);
+}
+
+/*
+ * When a handler is freed, remove it from the list
+ */
+static int srvid_handler_destructor(struct srvid_handler *h)
+{
+ struct srvid_handler_list *list = h->list;
+
+ DLIST_REMOVE(list->h, h);
+ if (list->h == NULL) {
+ talloc_free(list);
+ }
+ return 0;
+}
+
+/*
+ * When a list is freed, remove all handlers and remove db entry
+ */
+static int srvid_handler_list_destructor(struct srvid_handler_list *list)
+{
+ struct srvid_handler *h;
+
+ while (list->h != NULL) {
+ h = list->h;
+ DLIST_REMOVE(list->h, h);
+ TALLOC_FREE(h);
+ }
+
+ srvid_delete(list->srv, list->srvid);
+ DLIST_REMOVE(list->srv->list, list);
+ return 0;
+}
+
+/*
+ * Register a message handler
+ */
+int srvid_register(struct srvid_context *srv, TALLOC_CTX *mem_ctx,
+ uint64_t srvid, srvid_handler_fn handler,
+ void *private_data)
+{
+ struct srvid_handler_list *list;
+ struct srvid_handler *h;
+ int ret;
+
+ if (srv == NULL) {
+ return EINVAL;
+ }
+
+ h = talloc_zero(mem_ctx, struct srvid_handler);
+ if (h == NULL) {
+ return ENOMEM;
+ }
+
+ h->handler = handler;
+ h->private_data = private_data;
+
+ ret = srvid_fetch(srv, srvid, &list);
+ if (ret != 0) {
+ /* srvid not yet registered */
+ list = talloc_zero(srv, struct srvid_handler_list);
+ if (list == NULL) {
+ talloc_free(h);
+ return ENOMEM;
+ }
+
+ list->srv = srv;
+ list->srvid = srvid;
+
+ ret = srvid_insert(srv, srvid, list);
+ if (ret != 0) {
+ talloc_free(h);
+ talloc_free(list);
+ return ret;
+ }
+
+ DLIST_ADD(srv->list, list);
+ talloc_set_destructor(list, srvid_handler_list_destructor);
+ }
+
+ h->list = list;
+ DLIST_ADD(list->h, h);
+ talloc_set_destructor(h, srvid_handler_destructor);
+ return 0;
+}
+
+/*
+ * Deregister a message handler
+ */
+int srvid_deregister(struct srvid_context *srv, uint64_t srvid,
+ void *private_data)
+{
+ struct srvid_handler_list *list;
+ struct srvid_handler *h;
+ int ret;
+
+ ret = srvid_fetch(srv, srvid, &list);
+ if (ret != 0) {
+ return ret;
+ }
+
+ for (h = list->h; h != NULL; h = h->next) {
+ if (h->private_data == private_data) {
+ talloc_free(h);
+ return 0;
+ }
+ }
+
+ return ENOENT;
+}
+
+/*
+ * Check if a message handler exists
+ */
+int srvid_exists(struct srvid_context *srv, uint64_t srvid, void *private_data)
+{
+ struct srvid_handler_list *list;
+ struct srvid_handler *h;
+ int ret;
+
+ ret = srvid_fetch(srv, srvid, &list);
+ if (ret != 0) {
+ return ret;
+ }
+ if (list->h == NULL) {
+ return ENOENT;
+ }
+
+ if (private_data != NULL) {
+ for (h = list->h; h != NULL; h = h->next) {
+ if (h->private_data == private_data) {
+ return 0;
+ }
+ }
+
+ return ENOENT;
+ }
+
+ return 0;
+}
+
+/*
+ * Send a message to registered srvid and srvid_all
+ */
+int srvid_dispatch(struct srvid_context *srv, uint64_t srvid,
+ uint64_t srvid_all, TDB_DATA data)
+{
+ struct srvid_handler_list *list;
+ struct srvid_handler *h;
+ int ret;
+
+ ret = srvid_fetch(srv, srvid, &list);
+ if (ret == 0) {
+ for (h = list->h; h != NULL; h = h->next) {
+ h->handler(srvid, data, h->private_data);
+ }
+ }
+
+ if (srvid_all == 0) {
+ return ret;
+ }
+
+ ret = srvid_fetch(srv, srvid_all, &list);
+ if (ret == 0) {
+ for (h = list->h; h != NULL; h = h->next) {
+ h->handler(srvid, data, h->private_data);
+ }
+ }
+
+ return ret;
+}
diff --git a/ctdb/common/srvid.h b/ctdb/common/srvid.h
new file mode 100644
index 0000000..c0c2b30
--- /dev/null
+++ b/ctdb/common/srvid.h
@@ -0,0 +1,121 @@
+/*
+ Message handler database based on srvid
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_SRVID_H__
+#define __CTDB_SRVID_H__
+
+#include <talloc.h>
+#include <tdb.h>
+
+/**
+ * @file srvid.h
+ *
+ * @brief Database of message handlers based on srvid
+ *
+ * CTDB can be used to send messages between clients across nodes using
+ * CTDB_REQ_MESSAGE. Clients register for messages based on srvid. CTDB itself
+ * uses a small set of srvid messages. A large range (2^56) of srvid messages
+ * is reserved for Samba.
+ */
+
+/**
+ * @brief Message handler function
+ *
+ * To receive messages for a specific srvid, register a message handler function
+ * for the srvid.
+ */
+typedef void (*srvid_handler_fn)(uint64_t srvid, TDB_DATA data,
+ void *private_data);
+
+/**
+ * @brief Abstract struct to store srvid message handler database
+ */
+struct srvid_context;
+
+/**
+ * @brief Initialize srvid message handler database
+ *
+ * This returns a new srvid message handler database context. Freeing
+ * this context will free all the memory associated with the hash table.
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[out] result The new db_hash_context structure
+ * @return 0 on success, errno on failure
+ */
+int srvid_init(TALLOC_CTX *mem_ctx, struct srvid_context **result);
+
+/**
+ * @brief Register a message handler for a srvid
+ *
+ * The message handler is allocated using the specified talloc context. Freeing
+ * this talloc context, removes the message handler.
+ *
+ * @param[in] srv The srvid message handler database context
+ * @param[in] mem_ctx Talloc memory context for message handler
+ * @param[in] srvid The srvid
+ * @param[in] handler The message handler function for srvid
+ * @param[in] private_data Private data for message handler function
+ * @return 0 on success, errno on failure
+ */
+int srvid_register(struct srvid_context *srv, TALLOC_CTX *mem_ctx,
+ uint64_t srvid, srvid_handler_fn handler,
+ void *private_data);
+
+/**
+ * @brief Unregister a message handler for a srvid
+ *
+ * @param[in] srv The srvid message handler database context
+ * @param[in] srvid The srvid
+ * @param[in] private_data Private data of message handler function
+ * @return 0 on success, errno on failure
+ */
+int srvid_deregister(struct srvid_context *srv, uint64_t srvid,
+ void *private_data);
+
+/**
+ * @brief Check if any message handler is registered for srvid
+ *
+ * If private_data is NULL, then check if there is any registration
+ * for * specified srvid. If private_data is not NULL, then check for
+ * registration that matches the specified private data.
+ *
+ * @param[in] srv The srvid message handler database context
+ * @param[in] srvid The srvid
+ * @param[in] private_data Private data
+ * @return 0 on success, errno on failure
+ */
+int srvid_exists(struct srvid_context *srv, uint64_t srvid,
+ void *private_data);
+
+/**
+ * @brief Call message handlers for given srvid
+ *
+ * @param[in] srv The srvid message handler database context
+ * @param[in] srvid The srvid
+ * @param[in] srvid_all The srvid that gets all messages
+ * @param[in] data The data passed to each message handler
+ * @return 0 on success, errno on failure
+ *
+ * If srvid_all passed is 0, the message is not sent to message handlers
+ * registered with special srvid to receive all messages.
+ */
+int srvid_dispatch(struct srvid_context *srv, uint64_t srvid,
+ uint64_t srvid_all, TDB_DATA data);
+
+#endif /* __CTDB_SRVID_H__ */
diff --git a/ctdb/common/system.c b/ctdb/common/system.c
new file mode 100644
index 0000000..05a9564
--- /dev/null
+++ b/ctdb/common/system.c
@@ -0,0 +1,237 @@
+/*
+ common system utilities
+
+ Copyright (C) Amitay Isaacs 2014
+ Copyright (C) Martin Schwenke 2014
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/shmem.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <libgen.h>
+
+#include "lib/util/debug.h"
+
+#include "protocol/protocol.h"
+
+#include "common/logging.h"
+#include "common/system.h"
+
+#ifdef HAVE_SCHED_H
+#include <sched.h>
+#endif
+
+#ifdef HAVE_PROCINFO_H
+#include <procinfo.h>
+#endif
+
+/*
+ if possible, make this task real time
+ */
+bool set_scheduler(void)
+{
+#ifdef _AIX_
+#ifdef HAVE_THREAD_SETSCHED
+ struct thrdentry64 te;
+ tid64_t ti;
+
+ ti = 0ULL;
+ if (getthrds64(getpid(), &te, sizeof(te), &ti, 1) != 1) {
+ DEBUG(DEBUG_ERR, ("Unable to get thread information\n"));
+ return false;
+ }
+
+ if (thread_setsched(te.ti_tid, 0, SCHED_RR) == -1) {
+ DEBUG(DEBUG_ERR, ("Unable to set scheduler to SCHED_RR (%s)\n",
+ strerror(errno)));
+ return false;
+ } else {
+ return true;
+ }
+#endif
+#else /* no AIX */
+#ifdef HAVE_SCHED_SETSCHEDULER
+ struct sched_param p;
+
+ p.sched_priority = 1;
+
+ if (sched_setscheduler(0, SCHED_FIFO, &p) == -1) {
+ DEBUG(DEBUG_CRIT,("Unable to set scheduler to SCHED_FIFO (%s)\n",
+ strerror(errno)));
+ return false;
+ } else {
+ return true;
+ }
+#endif
+#endif
+ DEBUG(DEBUG_CRIT,("No way to set real-time priority.\n"));
+ return false;
+}
+
+/*
+ reset scheduler from real-time to normal scheduling
+ */
+void reset_scheduler(void)
+{
+#ifdef _AIX_
+#ifdef HAVE_THREAD_SETSCHED
+ struct thrdentry64 te;
+ tid64_t ti;
+
+ ti = 0ULL;
+ if (getthrds64(getpid(), &te, sizeof(te), &ti, 1) != 1) {
+ DEBUG(DEBUG_ERR, ("Unable to get thread information\n"));
+ }
+ if (thread_setsched(te.ti_tid, 0, SCHED_OTHER) == -1) {
+ DEBUG(DEBUG_ERR, ("Unable to set scheduler to SCHED_OTHER\n"));
+ }
+#endif
+#else /* no AIX */
+#ifdef HAVE_SCHED_SETSCHEDULER
+ struct sched_param p;
+
+ p.sched_priority = 0;
+ if (sched_setscheduler(0, SCHED_OTHER, &p) == -1) {
+ DEBUG(DEBUG_ERR, ("Unable to set scheduler to SCHED_OTHER\n"));
+ }
+#endif
+#endif
+}
+
+/* we don't lock future pages here; it would increase the chance that
+ * we'd fail to mmap later on. */
+void lockdown_memory(bool valgrinding)
+{
+#if defined(HAVE_MLOCKALL) && !defined(_AIX_)
+ /* Extra stack, please! */
+ char dummy[10000];
+ memset(dummy, 0, sizeof(dummy));
+
+ if (valgrinding) {
+ return;
+ }
+
+ /* Ignore when running in local daemons mode */
+ if (getuid() != 0) {
+ return;
+ }
+
+ /* Avoid compiler optimizing out dummy. */
+ mlock(dummy, sizeof(dummy));
+ if (mlockall(MCL_CURRENT) != 0) {
+ DEBUG(DEBUG_WARNING,("Failed to lockdown memory: %s'\n",
+ strerror(errno)));
+ }
+#endif
+}
+
+void ctdb_wait_for_process_to_exit(pid_t pid)
+{
+ while (kill(pid, 0) == 0 || errno != ESRCH) {
+ sleep(5);
+ }
+}
+
+#ifdef HAVE_IF_NAMEINDEX
+
+bool ctdb_sys_check_iface_exists(const char *iface)
+{
+ struct if_nameindex *ifnis, *ifni;
+ bool found = false;
+
+ ifnis = if_nameindex();
+ if (ifnis == NULL) {
+ DBG_ERR("Failed to retrieve interface list\n");
+ return false;
+ }
+
+ for (ifni = ifnis;
+ ifni->if_index != 0 || ifni->if_name != NULL;
+ ifni++) {
+ int cmp = strcmp(iface, ifni->if_name);
+ if (cmp == 0) {
+ found = true;
+ goto done;
+ }
+ }
+
+done:
+ if_freenameindex(ifnis);
+
+ return found;
+}
+
+#else /* HAVE_IF_NAMEINDEX */
+
+bool ctdb_sys_check_iface_exists(const char *iface)
+{
+ /* Not implemented: Interface always considered present */
+ return true;
+}
+
+#endif /* HAVE_IF_NAMEINDEX */
+
+#ifdef HAVE_PEERCRED
+
+int ctdb_get_peer_pid(const int fd, pid_t *peer_pid)
+{
+ struct ucred cr;
+ socklen_t crl = sizeof(struct ucred);
+ int ret;
+
+ ret = getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &cr, &crl);
+ if (ret == 0) {
+ *peer_pid = cr.pid;
+ } else {
+ *peer_pid = -1;
+ }
+ return ret;
+}
+
+#else /* HAVE_PEERCRED */
+
+#ifdef _AIX_
+
+int ctdb_get_peer_pid(const int fd, pid_t *peer_pid)
+{
+ struct peercred_struct cr;
+ socklen_t crl = sizeof(struct peercred_struct);
+ int ret;
+
+ ret = getsockopt(fd, SOL_SOCKET, SO_PEERID, &cr, &crl);
+ if (ret == 0) {
+ *peer_pid = cr.pid;
+ } else {
+ *peer_pid = -1;
+ }
+ return ret;
+}
+
+#else /* _AIX_ */
+
+int ctdb_get_peer_pid(const int fd, pid_t *peer_pid)
+{
+ /* Not implemented */
+ *peer_pid = -1;
+ return ENOSYS;
+}
+
+#endif /* _AIX_ */
+
+#endif /* HAVE_PEERCRED */
diff --git a/ctdb/common/system.h b/ctdb/common/system.h
new file mode 100644
index 0000000..042e7cc
--- /dev/null
+++ b/ctdb/common/system.h
@@ -0,0 +1,37 @@
+/*
+ System specific code
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_SYSTEM_H__
+#define __CTDB_SYSTEM_H__
+
+#include <talloc.h>
+
+/* From system_util.c */
+
+bool set_scheduler(void);
+void reset_scheduler(void);
+
+void lockdown_memory(bool valgrinding);
+
+void ctdb_wait_for_process_to_exit(pid_t pid);
+
+bool ctdb_sys_check_iface_exists(const char *iface);
+int ctdb_get_peer_pid(const int fd, pid_t *peer_pid);
+
+#endif /* __CTDB_SYSTEM_H__ */
diff --git a/ctdb/common/system_socket.c b/ctdb/common/system_socket.c
new file mode 100644
index 0000000..273b9c3
--- /dev/null
+++ b/ctdb/common/system_socket.c
@@ -0,0 +1,1168 @@
+/*
+ ctdb system specific code to manage raw sockets on linux
+
+ Copyright (C) Ronnie Sahlberg 2007
+ Copyright (C) Andrew Tridgell 2007
+ Copyright (C) Marc Dequènes (Duck) 2009
+ Copyright (C) Volker Lendecke 2012
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+/*
+ * Use BSD struct tcphdr field names for portability. Modern glibc
+ * makes them available by default via <netinet/tcp.h> but older glibc
+ * requires __FAVOR_BSD to be defined.
+ *
+ * __FAVOR_BSD is normally defined in <features.h> if _DEFAULT_SOURCE
+ * (new) or _BSD_SOURCE (now deprecated) is set and _GNU_SOURCE is not
+ * set. Including "replace.h" above causes <features.h> to be
+ * indirectly included and this will not set __FAVOR_BSD because
+ * _GNU_SOURCE is set in Samba's "config.h" (which is included by
+ * "replace.h").
+ *
+ * Therefore, set __FAVOR_BSD by hand below.
+ */
+#define __FAVOR_BSD 1
+#include "system/network.h"
+
+#ifdef HAVE_NETINET_IF_ETHER_H
+#include <netinet/if_ether.h>
+#endif
+#ifdef HAVE_NETINET_IP6_H
+#include <netinet/ip6.h>
+#endif
+#ifdef HAVE_NETINET_ICMP6_H
+#include <netinet/icmp6.h>
+#endif
+#ifdef HAVE_LINUX_IF_PACKET_H
+#include <linux/if_packet.h>
+#endif
+
+#ifndef ETHERTYPE_IP6
+#define ETHERTYPE_IP6 0x86dd
+#endif
+
+#include "lib/util/debug.h"
+#include "lib/util/blocking.h"
+
+#include "protocol/protocol.h"
+
+#include "common/logging.h"
+#include "common/system_socket.h"
+
+/*
+ uint16 checksum for n bytes
+ */
+static uint32_t uint16_checksum(uint8_t *data, size_t n)
+{
+ uint32_t sum=0;
+ uint16_t value;
+
+ while (n>=2) {
+ memcpy(&value, data, 2);
+ sum += (uint32_t)ntohs(value);
+ data += 2;
+ n -= 2;
+ }
+ if (n == 1) {
+ sum += (uint32_t)ntohs(*data);
+ }
+ return sum;
+}
+
+/*
+ * See if the given IP is currently on an interface
+ */
+bool ctdb_sys_have_ip(ctdb_sock_addr *_addr)
+{
+ int s;
+ int ret;
+ ctdb_sock_addr __addr = *_addr;
+ ctdb_sock_addr *addr = &__addr;
+ socklen_t addrlen = 0;
+
+ switch (addr->sa.sa_family) {
+ case AF_INET:
+ addr->ip.sin_port = 0;
+ addrlen = sizeof(struct sockaddr_in);
+ break;
+ case AF_INET6:
+ addr->ip6.sin6_port = 0;
+ addrlen = sizeof(struct sockaddr_in6);
+ break;
+ }
+
+ s = socket(addr->sa.sa_family, SOCK_STREAM, IPPROTO_TCP);
+ if (s == -1) {
+ return false;
+ }
+
+ ret = bind(s, (struct sockaddr *)addr, addrlen);
+
+ close(s);
+ return ret == 0;
+}
+
+/*
+ * simple TCP checksum - assumes data is multiple of 2 bytes long
+ */
+static uint16_t ip_checksum(uint8_t *data, size_t n, struct ip *ip)
+{
+ uint32_t sum = uint16_checksum(data, n);
+ uint16_t sum2;
+
+ sum += uint16_checksum((uint8_t *)&ip->ip_src, sizeof(ip->ip_src));
+ sum += uint16_checksum((uint8_t *)&ip->ip_dst, sizeof(ip->ip_dst));
+ sum += ip->ip_p + n;
+ sum = (sum & 0xFFFF) + (sum >> 16);
+ sum = (sum & 0xFFFF) + (sum >> 16);
+ sum2 = htons(sum);
+ sum2 = ~sum2;
+ if (sum2 == 0) {
+ return 0xFFFF;
+ }
+ return sum2;
+}
+
+static uint16_t ip6_checksum(uint8_t *data, size_t n, struct ip6_hdr *ip6)
+{
+ uint16_t phdr[3];
+ uint32_t sum = 0;
+ uint16_t sum2;
+ uint32_t len;
+
+ sum += uint16_checksum((uint8_t *)&ip6->ip6_src, 16);
+ sum += uint16_checksum((uint8_t *)&ip6->ip6_dst, 16);
+
+ len = htonl(n);
+ phdr[0] = len & UINT16_MAX;
+ phdr[1] = (len >> 16) & UINT16_MAX;
+ /* ip6_nxt is only 8 bits, so fits comfortably into a uint16_t */
+ phdr[2] = htons(ip6->ip6_nxt);
+ sum += uint16_checksum((uint8_t *)phdr, sizeof(phdr));
+
+ sum += uint16_checksum(data, n);
+
+ sum = (sum & 0xFFFF) + (sum >> 16);
+ sum = (sum & 0xFFFF) + (sum >> 16);
+ sum2 = htons(sum);
+ sum2 = ~sum2;
+ if (sum2 == 0) {
+ return 0xFFFF;
+ }
+ return sum2;
+}
+
+/*
+ * Send gratuitous ARP request/reply or IPv6 neighbor advertisement
+ */
+
+#ifdef HAVE_PACKETSOCKET
+
+/*
+ * Create IPv4 ARP requests/replies or IPv6 neighbour advertisement
+ * packets
+ */
+
+#define ARP_STRUCT_SIZE sizeof(struct ether_header) + \
+ sizeof(struct ether_arp)
+
+#define IP6_NA_STRUCT_SIZE sizeof(struct ether_header) + \
+ sizeof(struct ip6_hdr) + \
+ sizeof(struct nd_neighbor_advert) + \
+ sizeof(struct nd_opt_hdr) + \
+ sizeof(struct ether_addr)
+
+#define ARP_BUFFER_SIZE MAX(ARP_STRUCT_SIZE, 64)
+
+#define IP6_NA_BUFFER_SIZE MAX(IP6_NA_STRUCT_SIZE, 64)
+
+static int arp_build(uint8_t *buffer,
+ size_t buflen,
+ const struct sockaddr_in *addr,
+ const struct ether_addr *hwaddr,
+ bool reply,
+ struct ether_addr **ether_dhost,
+ size_t *len)
+{
+ size_t l = ARP_BUFFER_SIZE;
+ struct ether_header *eh;
+ struct ether_arp *ea;
+ struct arphdr *ah;
+
+ if (addr->sin_family != AF_INET) {
+ return EINVAL;
+ }
+
+ if (buflen < l) {
+ return EMSGSIZE;
+ }
+
+ memset(buffer, 0 , l);
+
+ eh = (struct ether_header *)buffer;
+ memset(eh->ether_dhost, 0xff, ETH_ALEN);
+ memcpy(eh->ether_shost, hwaddr, ETH_ALEN);
+ eh->ether_type = htons(ETHERTYPE_ARP);
+
+ ea = (struct ether_arp *)(buffer + sizeof(struct ether_header));
+ ah = &ea->ea_hdr;
+ ah->ar_hrd = htons(ARPHRD_ETHER);
+ ah->ar_pro = htons(ETH_P_IP);
+ ah->ar_hln = ETH_ALEN;
+ ah->ar_pln = sizeof(ea->arp_spa);
+
+ if (! reply) {
+ ah->ar_op = htons(ARPOP_REQUEST);
+ memcpy(ea->arp_sha, hwaddr, ETH_ALEN);
+ memcpy(ea->arp_spa, &addr->sin_addr, sizeof(ea->arp_spa));
+ memset(ea->arp_tha, 0, ETH_ALEN);
+ memcpy(ea->arp_tpa, &addr->sin_addr, sizeof(ea->arp_tpa));
+ } else {
+ ah->ar_op = htons(ARPOP_REPLY);
+ memcpy(ea->arp_sha, hwaddr, ETH_ALEN);
+ memcpy(ea->arp_spa, &addr->sin_addr, sizeof(ea->arp_spa));
+ memcpy(ea->arp_tha, hwaddr, ETH_ALEN);
+ memcpy(ea->arp_tpa, &addr->sin_addr, sizeof(ea->arp_tpa));
+ }
+
+ *ether_dhost = (struct ether_addr *)eh->ether_dhost;
+ *len = l;
+ return 0;
+}
+
+static int ip6_na_build(uint8_t *buffer,
+ size_t buflen,
+ const struct sockaddr_in6 *addr,
+ const struct ether_addr *hwaddr,
+ struct ether_addr **ether_dhost,
+ size_t *len)
+{
+ size_t l = IP6_NA_BUFFER_SIZE;
+ struct ether_header *eh;
+ struct ip6_hdr *ip6;
+ struct nd_neighbor_advert *nd_na;
+ struct nd_opt_hdr *nd_oh;
+ struct ether_addr *ea;
+ int ret;
+
+ if (addr->sin6_family != AF_INET6) {
+ return EINVAL;
+ }
+
+ if (buflen < l) {
+ return EMSGSIZE;
+ }
+
+ memset(buffer, 0 , l);
+
+ eh = (struct ether_header *)buffer;
+ /*
+ * Ethernet multicast: 33:33:00:00:00:01 (see RFC2464,
+ * section 7) - note memset 0 above!
+ */
+ eh->ether_dhost[0] = 0x33;
+ eh->ether_dhost[1] = 0x33;
+ eh->ether_dhost[5] = 0x01;
+ memcpy(eh->ether_shost, hwaddr, ETH_ALEN);
+ eh->ether_type = htons(ETHERTYPE_IP6);
+
+ ip6 = (struct ip6_hdr *)(buffer + sizeof(struct ether_header));
+ ip6->ip6_vfc = 6 << 4;
+ ip6->ip6_plen = htons(sizeof(struct nd_neighbor_advert) +
+ sizeof(struct nd_opt_hdr) +
+ ETH_ALEN);
+ ip6->ip6_nxt = IPPROTO_ICMPV6;
+ ip6->ip6_hlim = 255;
+ ip6->ip6_src = addr->sin6_addr;
+ /* all-nodes multicast */
+
+ ret = inet_pton(AF_INET6, "ff02::1", &ip6->ip6_dst);
+ if (ret != 1) {
+ return EIO;
+ }
+
+ nd_na = (struct nd_neighbor_advert *)(buffer +
+ sizeof(struct ether_header) +
+ sizeof(struct ip6_hdr));
+ nd_na->nd_na_type = ND_NEIGHBOR_ADVERT;
+ nd_na->nd_na_code = 0;
+ nd_na->nd_na_flags_reserved = ND_NA_FLAG_OVERRIDE;
+ nd_na->nd_na_target = addr->sin6_addr;
+
+ /* Option: Target link-layer address */
+ nd_oh = (struct nd_opt_hdr *)(buffer +
+ sizeof(struct ether_header) +
+ sizeof(struct ip6_hdr) +
+ sizeof(struct nd_neighbor_advert));
+ nd_oh->nd_opt_type = ND_OPT_TARGET_LINKADDR;
+ nd_oh->nd_opt_len = 1; /* multiple of 8 octets */
+
+ ea = (struct ether_addr *)(buffer +
+ sizeof(struct ether_header) +
+ sizeof(struct ip6_hdr) +
+ sizeof(struct nd_neighbor_advert) +
+ sizeof(struct nd_opt_hdr));
+ memcpy(ea, hwaddr, ETH_ALEN);
+
+ nd_na->nd_na_cksum = ip6_checksum((uint8_t *)nd_na,
+ ntohs(ip6->ip6_plen),
+ ip6);
+
+ *ether_dhost = (struct ether_addr *)eh->ether_dhost;
+ *len = l;
+ return 0;
+}
+
+int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
+{
+ int s;
+ struct sockaddr_ll sall = {0};
+ struct ifreq if_hwaddr = {
+ .ifr_ifru = {
+ .ifru_flags = 0
+ },
+ };
+ uint8_t buffer[MAX(ARP_BUFFER_SIZE, IP6_NA_BUFFER_SIZE)];
+ struct ifreq ifr = {
+ .ifr_ifru = {
+ .ifru_flags = 0
+ },
+ };
+ struct ether_addr *hwaddr = NULL;
+ struct ether_addr *ether_dhost = NULL;
+ size_t len = 0;
+ int ret = 0;
+
+ s = socket(AF_PACKET, SOCK_RAW, 0);
+ if (s == -1) {
+ ret = errno;
+ DBG_ERR("Failed to open raw socket\n");
+ return ret;
+ }
+ DBG_DEBUG("Created SOCKET FD:%d for sending arp\n", s);
+
+ /* Find interface */
+ strlcpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
+ if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
+ ret = errno;
+ DBG_ERR("Interface '%s' not found\n", iface);
+ goto fail;
+ }
+
+ /* Get MAC address */
+ strlcpy(if_hwaddr.ifr_name, iface, sizeof(if_hwaddr.ifr_name));
+ ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
+ if ( ret < 0 ) {
+ ret = errno;
+ DBG_ERR("ioctl failed\n");
+ goto fail;
+ }
+ if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) {
+ ret = 0;
+ D_DEBUG("Ignoring loopback arp request\n");
+ goto fail;
+ }
+ if (if_hwaddr.ifr_hwaddr.sa_family != ARPHRD_ETHER) {
+ ret = EINVAL;
+ DBG_ERR("Not an ethernet address family (0x%x)\n",
+ if_hwaddr.ifr_hwaddr.sa_family);
+ goto fail;;
+ }
+
+ /* Set up most of destination address structure */
+ sall.sll_family = AF_PACKET;
+ sall.sll_halen = sizeof(struct ether_addr);
+ sall.sll_protocol = htons(ETH_P_ALL);
+ sall.sll_ifindex = ifr.ifr_ifindex;
+
+ /* For clarity */
+ hwaddr = (struct ether_addr *)if_hwaddr.ifr_hwaddr.sa_data;
+
+ switch (addr->ip.sin_family) {
+ case AF_INET:
+ /* Send gratuitous ARP */
+ ret = arp_build(buffer,
+ sizeof(buffer),
+ &addr->ip,
+ hwaddr,
+ false,
+ &ether_dhost,
+ &len);
+ if (ret != 0) {
+ DBG_ERR("Failed to build ARP request\n");
+ goto fail;
+ }
+
+ memcpy(&sall.sll_addr[0], ether_dhost, sall.sll_halen);
+
+ ret = sendto(s,
+ buffer,
+ len,
+ 0,
+ (struct sockaddr *)&sall,
+ sizeof(sall));
+ if (ret < 0 ) {
+ ret = errno;
+ DBG_ERR("Failed sendto\n");
+ goto fail;
+ }
+
+ /* Send unsolicited ARP reply */
+ ret = arp_build(buffer,
+ sizeof(buffer),
+ &addr->ip,
+ hwaddr,
+ true,
+ &ether_dhost,
+ &len);
+ if (ret != 0) {
+ DBG_ERR("Failed to build ARP reply\n");
+ goto fail;
+ }
+
+ memcpy(&sall.sll_addr[0], ether_dhost, sall.sll_halen);
+
+ ret = sendto(s,
+ buffer,
+ len,
+ 0,
+ (struct sockaddr *)&sall,
+ sizeof(sall));
+ if (ret < 0 ) {
+ ret = errno;
+ DBG_ERR("Failed sendto\n");
+ goto fail;
+ }
+
+ close(s);
+ break;
+
+ case AF_INET6:
+ ret = ip6_na_build(buffer,
+ sizeof(buffer),
+ &addr->ip6,
+ hwaddr,
+ &ether_dhost,
+ &len);
+ if (ret != 0) {
+ DBG_ERR("Failed to build IPv6 neighbor advertisement\n");
+ goto fail;
+ }
+
+ memcpy(&sall.sll_addr[0], ether_dhost, sall.sll_halen);
+
+ ret = sendto(s,
+ buffer,
+ len,
+ 0,
+ (struct sockaddr *)&sall,
+ sizeof(sall));
+ if (ret < 0 ) {
+ ret = errno;
+ DBG_ERR("Failed sendto\n");
+ goto fail;
+ }
+
+ close(s);
+ break;
+
+ default:
+ ret = EINVAL;
+ DBG_ERR("Not an ipv4/ipv6 address (family is %u)\n",
+ addr->ip.sin_family);
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ close(s);
+ return ret;
+}
+
+#else /* HAVE_PACKETSOCKET */
+
+int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
+{
+ /* Not implemented */
+ return ENOSYS;
+}
+
+#endif /* HAVE_PACKETSOCKET */
+
+
+#define IP4_TCP_BUFFER_SIZE sizeof(struct ip) + \
+ sizeof(struct tcphdr)
+
+#define IP6_TCP_BUFFER_SIZE sizeof(struct ip6_hdr) + \
+ sizeof(struct tcphdr)
+
+static int tcp4_build(uint8_t *buf,
+ size_t buflen,
+ const struct sockaddr_in *src,
+ const struct sockaddr_in *dst,
+ uint32_t seq,
+ uint32_t ack,
+ int rst,
+ size_t *len)
+{
+ size_t l = IP4_TCP_BUFFER_SIZE;
+ struct {
+ struct ip ip;
+ struct tcphdr tcp;
+ } *ip4pkt;
+
+ if (l != sizeof(*ip4pkt)) {
+ return EMSGSIZE;
+ }
+
+ if (buflen < l) {
+ return EMSGSIZE;
+ }
+
+ ip4pkt = (void *)buf;
+ memset(ip4pkt, 0, l);
+
+ ip4pkt->ip.ip_v = 4;
+ ip4pkt->ip.ip_hl = sizeof(ip4pkt->ip)/sizeof(uint32_t);
+ ip4pkt->ip.ip_len = htons(sizeof(ip4pkt));
+ ip4pkt->ip.ip_ttl = 255;
+ ip4pkt->ip.ip_p = IPPROTO_TCP;
+ ip4pkt->ip.ip_src.s_addr = src->sin_addr.s_addr;
+ ip4pkt->ip.ip_dst.s_addr = dst->sin_addr.s_addr;
+ ip4pkt->ip.ip_sum = 0;
+
+ ip4pkt->tcp.th_sport = src->sin_port;
+ ip4pkt->tcp.th_dport = dst->sin_port;
+ ip4pkt->tcp.th_seq = seq;
+ ip4pkt->tcp.th_ack = ack;
+ ip4pkt->tcp.th_flags = 0;
+ ip4pkt->tcp.th_flags |= TH_ACK;
+ if (rst) {
+ ip4pkt->tcp.th_flags |= TH_RST;
+ }
+ ip4pkt->tcp.th_off = sizeof(ip4pkt->tcp)/sizeof(uint32_t);
+ /* this makes it easier to spot in a sniffer */
+ ip4pkt->tcp.th_win = htons(1234);
+ ip4pkt->tcp.th_sum = ip_checksum((uint8_t *)&ip4pkt->tcp,
+ sizeof(ip4pkt->tcp),
+ &ip4pkt->ip);
+
+ *len = l;
+ return 0;
+}
+
+static int tcp6_build(uint8_t *buf,
+ size_t buflen,
+ const struct sockaddr_in6 *src,
+ const struct sockaddr_in6 *dst,
+ uint32_t seq,
+ uint32_t ack,
+ int rst,
+ size_t *len)
+{
+ size_t l = IP6_TCP_BUFFER_SIZE;
+ struct {
+ struct ip6_hdr ip6;
+ struct tcphdr tcp;
+ } *ip6pkt;
+
+ if (l != sizeof(*ip6pkt)) {
+ return EMSGSIZE;
+ }
+
+ if (buflen < l) {
+ return EMSGSIZE;
+ }
+
+ ip6pkt = (void *)buf;
+ memset(ip6pkt, 0, l);
+
+ ip6pkt->ip6.ip6_vfc = 6 << 4;
+ ip6pkt->ip6.ip6_plen = htons(sizeof(struct tcphdr));
+ ip6pkt->ip6.ip6_nxt = IPPROTO_TCP;
+ ip6pkt->ip6.ip6_hlim = 64;
+ ip6pkt->ip6.ip6_src = src->sin6_addr;
+ ip6pkt->ip6.ip6_dst = dst->sin6_addr;
+
+ ip6pkt->tcp.th_sport = src->sin6_port;
+ ip6pkt->tcp.th_dport = dst->sin6_port;
+ ip6pkt->tcp.th_seq = seq;
+ ip6pkt->tcp.th_ack = ack;
+ ip6pkt->tcp.th_flags = 0;
+ ip6pkt->tcp.th_flags |= TH_ACK;
+ if (rst) {
+ ip6pkt->tcp.th_flags |= TH_RST;
+ }
+ ip6pkt->tcp.th_off = sizeof(ip6pkt->tcp)/sizeof(uint32_t);
+ /* this makes it easier to spot in a sniffer */
+ ip6pkt->tcp.th_win = htons(1234);
+ ip6pkt->tcp.th_sum = ip6_checksum((uint8_t *)&ip6pkt->tcp,
+ sizeof(ip6pkt->tcp),
+ &ip6pkt->ip6);
+
+ *len = l;
+ return 0;
+}
+
+/*
+ * Send tcp segment from the specified IP/port to the specified
+ * destination IP/port.
+ *
+ * This is used to trigger the receiving host into sending its own ACK,
+ * which should trigger early detection of TCP reset by the client
+ * after IP takeover
+ *
+ * This can also be used to send RST segments (if rst is true) and also
+ * if correct seq and ack numbers are provided.
+ */
+int ctdb_sys_send_tcp(const ctdb_sock_addr *dest,
+ const ctdb_sock_addr *src,
+ uint32_t seq,
+ uint32_t ack,
+ int rst)
+{
+ uint8_t buf[MAX(IP4_TCP_BUFFER_SIZE, IP6_TCP_BUFFER_SIZE)];
+ size_t len = 0;
+ int ret;
+ int s;
+ uint32_t one = 1;
+ struct sockaddr_in6 tmpdest = { 0 };
+ int saved_errno;
+
+ switch (src->ip.sin_family) {
+ case AF_INET:
+ ret = tcp4_build(buf,
+ sizeof(buf),
+ &src->ip,
+ &dest->ip,
+ seq,
+ ack,
+ rst,
+ &len);
+ if (ret != 0) {
+ DBG_ERR("Failed to build TCP packet (%d)\n", ret);
+ return ret;
+ }
+
+ /* open a raw socket to send this segment from */
+ s = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
+ if (s == -1) {
+ DBG_ERR("Failed to open raw socket (%s)\n",
+ strerror(errno));
+ return -1;
+ }
+
+ ret = setsockopt(s, IPPROTO_IP, IP_HDRINCL, &one, sizeof(one));
+ if (ret != 0) {
+ DBG_ERR("Failed to setup IP headers (%s)\n",
+ strerror(errno));
+ close(s);
+ return -1;
+ }
+
+ ret = sendto(s,
+ buf,
+ len,
+ 0,
+ (const struct sockaddr *)&dest->ip,
+ sizeof(dest->ip));
+ saved_errno = errno;
+ close(s);
+ if (ret == -1) {
+ D_ERR("Failed sendto (%s)\n", strerror(saved_errno));
+ return -1;
+ }
+ if ((size_t)ret != len) {
+ DBG_ERR("Failed sendto - didn't send full packet\n");
+ return -1;
+ }
+ break;
+
+ case AF_INET6:
+ ret = tcp6_build(buf,
+ sizeof(buf),
+ &src->ip6,
+ &dest->ip6,
+ seq,
+ ack,
+ rst,
+ &len);
+ if (ret != 0) {
+ DBG_ERR("Failed to build TCP packet (%d)\n", ret);
+ return ret;
+ }
+
+ s = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW);
+ if (s == -1) {
+ DBG_ERR("Failed to open sending socket\n");
+ return -1;
+
+ }
+ /*
+ * sendto() on an IPv6 raw socket requires the port to
+ * be either 0 or a protocol value
+ */
+ tmpdest = dest->ip6;
+ tmpdest.sin6_port = 0;
+
+ ret = sendto(s,
+ buf,
+ len,
+ 0,
+ (const struct sockaddr *)&tmpdest,
+ sizeof(tmpdest));
+ saved_errno = errno;
+ close(s);
+ if (ret == -1) {
+ D_ERR("Failed sendto (%s)\n", strerror(saved_errno));
+ return -1;
+ }
+ if ((size_t)ret != len) {
+ DBG_ERR("Failed sendto - didn't send full packet\n");
+ return -1;
+ }
+ break;
+
+ default:
+ DBG_ERR("Not an ipv4/v6 address\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int tcp4_extract(const uint8_t *ip_pkt,
+ size_t pktlen,
+ struct sockaddr_in *src,
+ struct sockaddr_in *dst,
+ uint32_t *ack_seq,
+ uint32_t *seq,
+ int *rst,
+ uint16_t *window)
+{
+ const struct ip *ip;
+ const struct tcphdr *tcp;
+
+ if (pktlen < sizeof(struct ip)) {
+ return EMSGSIZE;
+ }
+
+ ip = (const struct ip *)ip_pkt;
+
+ /* IPv4 only */
+ if (ip->ip_v != 4) {
+ return ENOMSG;
+ }
+ /* Don't look at fragments */
+ if ((ntohs(ip->ip_off)&0x1fff) != 0) {
+ return ENOMSG;
+ }
+ /* TCP only */
+ if (ip->ip_p != IPPROTO_TCP) {
+ return ENOMSG;
+ }
+
+ /* Ensure there is enough of the packet to gather required fields */
+ if (pktlen <
+ (ip->ip_hl * sizeof(uint32_t)) + offsetof(struct tcphdr, th_sum)) {
+ return EMSGSIZE;
+ }
+
+ tcp = (const struct tcphdr *)(ip_pkt + (ip->ip_hl * sizeof(uint32_t)));
+
+ src->sin_family = AF_INET;
+ src->sin_addr.s_addr = ip->ip_src.s_addr;
+ src->sin_port = tcp->th_sport;
+
+ dst->sin_family = AF_INET;
+ dst->sin_addr.s_addr = ip->ip_dst.s_addr;
+ dst->sin_port = tcp->th_dport;
+
+ *ack_seq = tcp->th_ack;
+ *seq = tcp->th_seq;
+ if (window != NULL) {
+ *window = tcp->th_win;
+ }
+ if (rst != NULL) {
+ *rst = tcp->th_flags & TH_RST;
+ }
+
+ return 0;
+}
+
+static int tcp6_extract(const uint8_t *ip_pkt,
+ size_t pktlen,
+ struct sockaddr_in6 *src,
+ struct sockaddr_in6 *dst,
+ uint32_t *ack_seq,
+ uint32_t *seq,
+ int *rst,
+ uint16_t *window)
+{
+ const struct ip6_hdr *ip6;
+ const struct tcphdr *tcp;
+
+ /* Ensure there is enough of the packet to gather required fields */
+ if (pktlen < sizeof(struct ip6_hdr) + offsetof(struct tcphdr, th_sum)) {
+ return EMSGSIZE;
+ }
+
+ ip6 = (const struct ip6_hdr *)ip_pkt;
+
+ /* IPv6 only */
+ if ((ip6->ip6_vfc >> 4) != 6){
+ return ENOMSG;
+ }
+
+ /* TCP only */
+ if (ip6->ip6_nxt != IPPROTO_TCP) {
+ return ENOMSG;
+ }
+
+ tcp = (const struct tcphdr *)(ip_pkt + sizeof(struct ip6_hdr));
+
+ src->sin6_family = AF_INET6;
+ src->sin6_port = tcp->th_sport;
+ src->sin6_addr = ip6->ip6_src;
+
+ dst->sin6_family = AF_INET6;
+ dst->sin6_port = tcp->th_dport;
+ dst->sin6_addr = ip6->ip6_dst;
+
+ *ack_seq = tcp->th_ack;
+ *seq = tcp->th_seq;
+ if (window != NULL) {
+ *window = tcp->th_win;
+ }
+ if (rst != NULL) {
+ *rst = tcp->th_flags & TH_RST;
+ }
+
+ return 0;
+}
+
+/*
+ * Packet capture
+ *
+ * If AF_PACKET is available then use a raw socket otherwise use pcap.
+ * wscript has checked to make sure that pcap is available if needed.
+ */
+
+#if defined(HAVE_AF_PACKET) && !defined(ENABLE_PCAP)
+
+/*
+ * This function is used to open a raw socket to capture from
+ */
+int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
+{
+ int s, ret;
+
+ /* Open a socket to capture all traffic */
+ s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
+ if (s == -1) {
+ DBG_ERR("Failed to open raw socket\n");
+ return -1;
+ }
+
+ DBG_DEBUG("Opened raw socket for TCP tickle capture (fd=%d)\n", s);
+
+ ret = set_blocking(s, false);
+ if (ret != 0) {
+ DBG_ERR("Failed to set socket non-blocking (%s)\n",
+ strerror(errno));
+ close(s);
+ return -1;
+ }
+
+ set_close_on_exec(s);
+
+ return s;
+}
+
+/*
+ * This function is used to do any additional cleanup required when closing
+ * a capture socket.
+ * Note that the socket itself is closed automatically in the caller.
+ */
+int ctdb_sys_close_capture_socket(void *private_data)
+{
+ return 0;
+}
+
+
+/*
+ * called when the raw socket becomes readable
+ */
+int ctdb_sys_read_tcp_packet(int s, void *private_data,
+ ctdb_sock_addr *src,
+ ctdb_sock_addr *dst,
+ uint32_t *ack_seq,
+ uint32_t *seq,
+ int *rst,
+ uint16_t *window)
+{
+ ssize_t nread;
+ uint8_t pkt[100]; /* Large enough for simple ACK/RST packets */
+ struct ether_header *eth;
+ int ret;
+
+ nread = recv(s, pkt, sizeof(pkt), MSG_TRUNC);
+ if (nread == -1) {
+ return errno;
+ }
+ if ((size_t)nread < sizeof(*eth)) {
+ return EMSGSIZE;
+ }
+
+ ZERO_STRUCTP(src);
+ ZERO_STRUCTP(dst);
+
+ /* Ethernet */
+ eth = (struct ether_header *)pkt;
+
+ /* we want either IPv4 or IPv6 */
+ if (ntohs(eth->ether_type) == ETHERTYPE_IP) {
+ ret = tcp4_extract(pkt + sizeof(struct ether_header),
+ (size_t)nread - sizeof(struct ether_header),
+ &src->ip,
+ &dst->ip,
+ ack_seq,
+ seq,
+ rst,
+ window);
+ return ret;
+
+ } else if (ntohs(eth->ether_type) == ETHERTYPE_IP6) {
+ ret = tcp6_extract(pkt + sizeof(struct ether_header),
+ (size_t)nread - sizeof(struct ether_header),
+ &src->ip6,
+ &dst->ip6,
+ ack_seq,
+ seq,
+ rst,
+ window);
+ return ret;
+ }
+
+ return ENOMSG;
+}
+
+#else /* defined(HAVE_AF_PACKET) && !defined(ENABLE_PCAP) */
+
+#include <pcap.h>
+
+/*
+ * Assume this exists if pcap.h exists - it has been around for a
+ * while
+ */
+#include <pcap/sll.h>
+
+int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
+{
+ char errbuf[PCAP_ERRBUF_SIZE];
+ pcap_t *pt;
+ int pcap_packet_type;
+ const char *t = NULL;
+ int fd;
+ int ret;
+
+ pt = pcap_create(iface, errbuf);
+ if (pt == NULL) {
+ DBG_ERR("Failed to open pcap capture device %s (%s)\n",
+ iface,
+ errbuf);
+ return -1;
+ }
+ /*
+ * pcap isn't very clear about defaults...
+ */
+ ret = pcap_set_snaplen(pt, 100);
+ if (ret < 0) {
+ DBG_ERR("Failed to set snaplen for pcap capture\n");
+ goto fail;
+ }
+ ret = pcap_set_promisc(pt, 0);
+ if (ret < 0) {
+ DBG_ERR("Failed to unset promiscuous mode for pcap capture\n");
+ goto fail;
+ }
+ ret = pcap_set_timeout(pt, 0);
+ if (ret < 0) {
+ DBG_ERR("Failed to set timeout for pcap capture\n");
+ goto fail;
+ }
+#ifdef HAVE_PCAP_SET_IMMEDIATE_MODE
+ ret = pcap_set_immediate_mode(pt, 1);
+ if (ret < 0) {
+ DBG_ERR("Failed to set immediate mode for pcap capture\n");
+ goto fail;
+ }
+#endif
+ ret = pcap_activate(pt);
+ if (ret < 0) {
+ DBG_ERR("Failed to activate pcap capture\n");
+ goto fail;
+ }
+
+ pcap_packet_type = pcap_datalink(pt);
+ switch (pcap_packet_type) {
+ case DLT_EN10MB:
+ t = "DLT_EN10MB";
+ break;
+ case DLT_LINUX_SLL:
+ t = "DLT_LINUX_SLL";
+ break;
+#ifdef DLT_LINUX_SLL2
+ case DLT_LINUX_SLL2:
+ t = "DLT_LINUX_SLL2";
+ break;
+#endif /* DLT_LINUX_SLL2 */
+ default:
+ DBG_ERR("Unknown pcap packet type %d\n", pcap_packet_type);
+ goto fail;
+ }
+
+ fd = pcap_get_selectable_fd(pt);
+ DBG_DEBUG("Opened pcap capture for TCP tickle (type=%s, fd=%d)\n",
+ t,
+ fd);
+
+ *((pcap_t **)private_data) = pt;
+ return fd;
+
+fail:
+ pcap_close(pt);
+ return -1;
+}
+
+int ctdb_sys_close_capture_socket(void *private_data)
+{
+ pcap_t *pt = (pcap_t *)private_data;
+ pcap_close(pt);
+ return 0;
+}
+
+int ctdb_sys_read_tcp_packet(int s,
+ void *private_data,
+ ctdb_sock_addr *src,
+ ctdb_sock_addr *dst,
+ uint32_t *ack_seq,
+ uint32_t *seq,
+ int *rst,
+ uint16_t *window)
+{
+ int ret;
+ struct pcap_pkthdr pkthdr;
+ const u_char *buffer;
+ pcap_t *pt = (pcap_t *)private_data;
+ int pcap_packet_type;
+ uint16_t ether_type;
+ size_t ll_hdr_len;
+
+ buffer=pcap_next(pt, &pkthdr);
+ if (buffer==NULL) {
+ return ENOMSG;
+ }
+
+ ZERO_STRUCTP(src);
+ ZERO_STRUCTP(dst);
+
+ pcap_packet_type = pcap_datalink(pt);
+ switch (pcap_packet_type) {
+ case DLT_EN10MB: {
+ const struct ether_header *eth =
+ (const struct ether_header *)buffer;
+ ether_type = ntohs(eth->ether_type);
+ ll_hdr_len = sizeof(struct ether_header);
+ break;
+ }
+ case DLT_LINUX_SLL: {
+ const struct sll_header *sll =
+ (const struct sll_header *)buffer;
+ uint16_t arphrd_type = ntohs(sll->sll_hatype);
+ switch (arphrd_type) {
+ case ARPHRD_ETHER:
+ case ARPHRD_INFINIBAND:
+ break;
+ default:
+ DBG_DEBUG("SLL: Unknown arphrd_type %"PRIu16"\n",
+ arphrd_type);
+ return EPROTONOSUPPORT;
+ }
+ ether_type = ntohs(sll->sll_protocol);
+ ll_hdr_len = SLL_HDR_LEN;
+ break;
+ }
+#ifdef DLT_LINUX_SLL2
+ case DLT_LINUX_SLL2: {
+ const struct sll2_header *sll2 =
+ (const struct sll2_header *)buffer;
+ uint16_t arphrd_type = ntohs(sll2->sll2_hatype);
+ switch (arphrd_type) {
+ case ARPHRD_ETHER:
+ case ARPHRD_INFINIBAND:
+ break;
+ default:
+ DBG_DEBUG("SLL2: Unknown arphrd_type %"PRIu16"\n",
+ arphrd_type);
+ return EPROTONOSUPPORT;
+ }
+ ether_type = ntohs(sll2->sll2_protocol);
+ ll_hdr_len = SLL2_HDR_LEN;
+ break;
+ }
+#endif /* DLT_LINUX_SLL2 */
+ default:
+ DBG_DEBUG("Unknown pcap packet type %d\n", pcap_packet_type);
+ return EPROTONOSUPPORT;
+ }
+
+ switch (ether_type) {
+ case ETHERTYPE_IP:
+ ret = tcp4_extract(buffer + ll_hdr_len,
+ (size_t)pkthdr.caplen - ll_hdr_len,
+ &src->ip,
+ &dst->ip,
+ ack_seq,
+ seq,
+ rst,
+ window);
+ break;
+ case ETHERTYPE_IP6:
+ ret = tcp6_extract(buffer + ll_hdr_len,
+ (size_t)pkthdr.caplen - ll_hdr_len,
+ &src->ip6,
+ &dst->ip6,
+ ack_seq,
+ seq,
+ rst,
+ window);
+ break;
+ case ETHERTYPE_ARP:
+ /* Silently ignore ARP packets */
+ return EPROTO;
+ default:
+ DBG_DEBUG("Unknown ether type %"PRIu16"\n", ether_type);
+ return EPROTO;
+ }
+
+ return ret;
+}
+
+#endif /* defined(HAVE_AF_PACKET) && !defined(ENABLE_PCAP) */
diff --git a/ctdb/common/system_socket.h b/ctdb/common/system_socket.h
new file mode 100644
index 0000000..065c53c
--- /dev/null
+++ b/ctdb/common/system_socket.h
@@ -0,0 +1,46 @@
+/*
+ System specific network code
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_SYSTEM_SOCKET_H__
+#define __CTDB_SYSTEM_SOCKET_H__
+
+#include "protocol/protocol.h"
+
+bool ctdb_sys_have_ip(ctdb_sock_addr *addr);
+
+int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface);
+
+int ctdb_sys_send_tcp(const ctdb_sock_addr *dest,
+ const ctdb_sock_addr *src,
+ uint32_t seq,
+ uint32_t ack,
+ int rst);
+
+int ctdb_sys_open_capture_socket(const char *iface, void **private_data);
+int ctdb_sys_close_capture_socket(void *private_data);
+int ctdb_sys_read_tcp_packet(int s,
+ void *private_data,
+ ctdb_sock_addr *src,
+ ctdb_sock_addr *dst,
+ uint32_t *ack_seq,
+ uint32_t *seq,
+ int *rst,
+ uint16_t *window);
+
+#endif /* __CTDB_SYSTEM_SOCKET_H__ */
diff --git a/ctdb/common/tmon.c b/ctdb/common/tmon.c
new file mode 100644
index 0000000..04bad1f
--- /dev/null
+++ b/ctdb/common/tmon.c
@@ -0,0 +1,602 @@
+/*
+ Trivial FD monitoring
+
+ Copyright (C) Martin Schwenke & Amitay Isaacs, DataDirect Networks 2022
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <ctype.h>
+
+#include "lib/util/blocking.h"
+#include "lib/util/sys_rw.h"
+#include "lib/util/tevent_unix.h"
+#include "lib/util/util.h"
+#include "lib/util/smb_strtox.h"
+
+#include "lib/async_req/async_sock.h"
+
+#include "common/tmon.h"
+
+
+enum tmon_message_type {
+ TMON_MSG_EXIT = 1,
+ TMON_MSG_ERRNO,
+ TMON_MSG_PING,
+ TMON_MSG_ASCII,
+ TMON_MSG_CUSTOM,
+};
+
+struct tmon_pkt {
+ enum tmon_message_type type;
+ uint16_t val;
+};
+
+struct tmon_buf {
+ uint8_t data[4];
+};
+
+static void tmon_packet_push(struct tmon_pkt *pkt,
+ struct tmon_buf *buf)
+{
+ uint16_t type_n, val_n;
+
+ type_n = htons(pkt->type);
+ val_n = htons(pkt->val);
+ memcpy(&buf->data[0], &type_n, 2);
+ memcpy(&buf->data[2], &val_n, 2);
+}
+
+static void tmon_packet_pull(struct tmon_buf *buf,
+ struct tmon_pkt *pkt)
+{
+ uint16_t type_n, val_n;
+
+ memcpy(&type_n, &buf->data[0], 2);
+ memcpy(&val_n, &buf->data[2], 2);
+
+ pkt->type = ntohs(type_n);
+ pkt->val = ntohs(val_n);
+}
+
+static int tmon_packet_write(int fd, struct tmon_pkt *pkt)
+{
+ struct tmon_buf buf;
+ ssize_t n;
+
+ tmon_packet_push(pkt, &buf);
+
+ n = sys_write(fd, &buf.data[0], sizeof(buf.data));
+ if (n == -1) {
+ return errno;
+ }
+ return 0;
+}
+
+bool tmon_set_exit(struct tmon_pkt *pkt)
+{
+ *pkt = (struct tmon_pkt) {
+ .type = TMON_MSG_EXIT,
+ };
+
+ return true;
+}
+
+bool tmon_set_errno(struct tmon_pkt *pkt, int err)
+{
+ if (err <= 0 || err > UINT16_MAX) {
+ return false;
+ }
+
+ *pkt = (struct tmon_pkt) {
+ .type = TMON_MSG_ERRNO,
+ .val = (uint16_t)err,
+ };
+
+ return true;
+}
+
+bool tmon_set_ping(struct tmon_pkt *pkt)
+{
+ *pkt = (struct tmon_pkt) {
+ .type = TMON_MSG_PING,
+ };
+
+ return true;
+}
+
+bool tmon_set_ascii(struct tmon_pkt *pkt, char c)
+{
+ if (!isascii(c)) {
+ return false;
+ }
+
+ *pkt = (struct tmon_pkt) {
+ .type = TMON_MSG_ASCII,
+ .val = (uint16_t)c,
+ };
+
+ return true;
+}
+
+bool tmon_set_custom(struct tmon_pkt *pkt, uint16_t val)
+{
+ *pkt = (struct tmon_pkt) {
+ .type = TMON_MSG_CUSTOM,
+ .val = val,
+ };
+
+ return true;
+}
+
+static bool tmon_parse_exit(struct tmon_pkt *pkt)
+{
+ if (pkt->type != TMON_MSG_EXIT) {
+ return false;
+ }
+ if (pkt->val != 0) {
+ return false;
+ }
+
+ return true;
+}
+
+static bool tmon_parse_errno(struct tmon_pkt *pkt, int *err)
+{
+ if (pkt->type != TMON_MSG_ERRNO) {
+ return false;
+ }
+ *err= (int)pkt->val;
+
+ return true;
+}
+
+bool tmon_parse_ping(struct tmon_pkt *pkt)
+{
+ if (pkt->type != TMON_MSG_PING) {
+ return false;
+ }
+ if (pkt->val != 0) {
+ return false;
+ }
+
+ return true;
+}
+
+bool tmon_parse_ascii(struct tmon_pkt *pkt, char *c)
+{
+ if (pkt->type != TMON_MSG_ASCII) {
+ return false;
+ }
+ if (!isascii((int)pkt->val)) {
+ return false;
+ }
+ *c = (char)pkt->val;
+
+ return true;
+}
+
+bool tmon_parse_custom(struct tmon_pkt *pkt, uint16_t *val)
+{
+ if (pkt->type != TMON_MSG_CUSTOM) {
+ return false;
+ }
+ *val = pkt->val;
+
+ return true;
+}
+
+struct tmon_state {
+ int fd;
+ int direction;
+ struct tevent_context *ev;
+ bool monitor_close;
+ unsigned long write_interval;
+ unsigned long read_timeout;
+ struct tmon_actions actions;
+ struct tevent_timer *timer;
+ void *private_data;
+};
+
+static void tmon_readable(struct tevent_req *subreq);
+static bool tmon_set_timeout(struct tevent_req *req,
+ struct tevent_context *ev);
+static void tmon_timedout(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval now,
+ void *private_data);
+static void tmon_write_loop(struct tevent_req *subreq);
+
+struct tevent_req *tmon_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ int fd,
+ int direction,
+ unsigned long read_timeout,
+ unsigned long write_interval,
+ struct tmon_actions *actions,
+ void *private_data)
+{
+ struct tevent_req *req, *subreq;
+ struct tmon_state *state;
+ bool status;
+
+ req = tevent_req_create(mem_ctx, &state, struct tmon_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ if (actions != NULL) {
+ /* If FD isn't readable then read actions are invalid */
+ if (!(direction & TMON_FD_READ) &&
+ (actions->timeout_callback != NULL ||
+ actions->read_callback != NULL ||
+ read_timeout != 0)) {
+ tevent_req_error(req, EINVAL);
+ return tevent_req_post(req, ev);
+ }
+ /* If FD isn't writeable then write actions are invalid */
+ if (!(direction & TMON_FD_WRITE) &&
+ (actions->write_callback != NULL ||
+ write_interval != 0)) {
+ tevent_req_error(req, EINVAL);
+ return tevent_req_post(req, ev);
+ }
+ /* Can't specify write interval without a callback */
+ if (state->write_interval != 0 &&
+ state->actions.write_callback == NULL) {
+ tevent_req_error(req, EINVAL);
+ return tevent_req_post(req, ev);
+ }
+ }
+
+ state->fd = fd;
+ state->direction = direction;
+ state->ev = ev;
+ state->write_interval = write_interval;
+ state->read_timeout = read_timeout;
+ state->private_data = private_data;
+
+ if (actions != NULL) {
+ state->actions = *actions;
+ }
+
+ status = set_close_on_exec(fd);
+ if (!status) {
+ tevent_req_error(req, errno);
+ return tevent_req_post(req, ev);
+ }
+
+ if (direction & TMON_FD_READ) {
+ subreq = wait_for_read_send(state, ev, fd, true);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, tmon_readable, req);
+ }
+
+ if (state->read_timeout != 0) {
+ status = tmon_set_timeout(req, state->ev);
+ if (!status) {
+ tevent_req_error(req, ENOMEM);
+ return tevent_req_post(req, ev);
+ }
+ }
+
+ if (state->write_interval != 0) {
+ subreq = tevent_wakeup_send(
+ state,
+ state->ev,
+ tevent_timeval_current_ofs(state->write_interval, 0));
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, state->ev);
+ }
+ tevent_req_set_callback(subreq, tmon_write_loop, req);
+ }
+
+ return req;
+}
+
+static void tmon_readable(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct tmon_state *state = tevent_req_data( req, struct tmon_state);
+ struct tmon_buf buf;
+ struct tmon_pkt pkt;
+ ssize_t nread;
+ bool status;
+ int err;
+ int ret;
+
+ status = wait_for_read_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (!status) {
+ if (ret == EPIPE && state->actions.close_callback != NULL) {
+ ret = state->actions.close_callback(state->private_data);
+ if (ret == TMON_STATUS_EXIT) {
+ ret = 0;
+ }
+ }
+ if (ret == 0) {
+ tevent_req_done(req);
+ } else {
+ tevent_req_error(req, ret);
+ }
+ return;
+ }
+
+ nread = sys_read(state->fd, buf.data, sizeof(buf.data));
+ if (nread == -1) {
+ tevent_req_error(req, errno);
+ return;
+ }
+ if (nread == 0) {
+ /* Can't happen, treat like EPIPE, above */
+ tevent_req_error(req, EPIPE);
+ return;
+ }
+ if (nread != sizeof(buf.data)) {
+ tevent_req_error(req, EPROTO);
+ return;
+ }
+
+ tmon_packet_pull(&buf, &pkt);
+
+ switch (pkt.type) {
+ case TMON_MSG_EXIT:
+ status = tmon_parse_exit(&pkt);
+ if (!status) {
+ tevent_req_error(req, EPROTO);
+ return;
+ }
+ tevent_req_done(req);
+ return;
+ case TMON_MSG_ERRNO:
+ status = tmon_parse_errno(&pkt, &err);
+ if (!status) {
+ err = EPROTO;
+ }
+ tevent_req_error(req, err);
+ return;
+ default:
+ break;
+ }
+
+ if (state->actions.read_callback == NULL) {
+ /* Shouldn't happen, other end should not write */
+ tevent_req_error(req, EIO);
+ return;
+ }
+ ret = state->actions.read_callback(state->private_data, &pkt);
+ if (ret == TMON_STATUS_EXIT) {
+ tevent_req_done(req);
+ return;
+ }
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ subreq = wait_for_read_send(state, state->ev, state->fd, true);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, tmon_readable, req);
+
+ /* Reset read timeout */
+ if (state->read_timeout != 0) {
+ status = tmon_set_timeout(req, state->ev);
+ if (!status) {
+ tevent_req_error(req, ENOMEM);
+ return;
+ }
+ }
+}
+
+static bool tmon_set_timeout(struct tevent_req *req,
+ struct tevent_context *ev)
+{
+ struct tmon_state *state = tevent_req_data(
+ req, struct tmon_state);
+ struct timeval endtime =
+ tevent_timeval_current_ofs(state->read_timeout, 0);
+
+ TALLOC_FREE(state->timer);
+
+ state->timer = tevent_add_timer(ev, req, endtime, tmon_timedout, req);
+ if (tevent_req_nomem(state->timer, req)) {
+ return false;
+ }
+
+ return true;
+}
+
+static void tmon_timedout(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval now,
+ void *private_data)
+{
+ struct tevent_req *req = talloc_get_type_abort(
+ private_data, struct tevent_req);
+ struct tmon_state *state = tevent_req_data(req, struct tmon_state);
+ int ret;
+
+ TALLOC_FREE(state->timer);
+
+ if (state->actions.timeout_callback != NULL) {
+ ret = state->actions.timeout_callback(state->private_data);
+ if (ret == TMON_STATUS_EXIT) {
+ ret = 0;
+ }
+ } else {
+ ret = ETIMEDOUT;
+ }
+
+ if (ret == 0) {
+ tevent_req_done(req);
+ } else {
+ tevent_req_error(req, ret);
+ }
+}
+
+static void tmon_write_loop(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct tmon_state *state = tevent_req_data(
+ req, struct tmon_state);
+ struct tmon_pkt pkt;
+ int ret;
+ bool status;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (!status) {
+ /* Ignore error */
+ }
+
+ ret = state->actions.write_callback(state->private_data, &pkt);
+ if (ret == TMON_STATUS_EXIT) {
+ tevent_req_done(req);
+ return;
+ }
+ if (ret == TMON_STATUS_SKIP) {
+ goto done;
+ }
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ status = tmon_write(req, &pkt);
+ if (!status) {
+ return;
+ }
+
+done:
+ subreq = tevent_wakeup_send(
+ state,
+ state->ev,
+ tevent_timeval_current_ofs(state->write_interval, 0));
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, tmon_write_loop, req);
+}
+
+bool tmon_write(struct tevent_req *req, struct tmon_pkt *pkt)
+{
+ struct tmon_state *state = tevent_req_data(
+ req, struct tmon_state);
+ int ret;
+
+ if (state->fd == -1) {
+ return false;
+ }
+
+ if (!(state->direction & TMON_FD_WRITE)) {
+ tevent_req_error(req, EINVAL);
+ return false;
+ }
+
+ ret = tmon_packet_write(state->fd, pkt);
+ if (ret != 0) {
+ if (ret == EPIPE && state->actions.close_callback != NULL) {
+ ret = state->actions.close_callback(state->private_data);
+ if (ret == TMON_STATUS_EXIT) {
+ ret = 0;
+ }
+ }
+
+ if (ret == 0) {
+ tevent_req_done(req);
+ } else {
+ tevent_req_error(req, ret);
+ }
+ state->fd = -1;
+ return false;
+ }
+
+ return true;
+}
+
+bool tmon_recv(struct tevent_req *req, int *perr)
+{
+ if (tevent_req_is_unix_error(req, perr)) {
+ return false;
+ }
+
+ return true;
+}
+
+static int ping_writer(void *private_data, struct tmon_pkt *pkt)
+{
+ tmon_set_ping(pkt);
+
+ return 0;
+}
+
+static int ping_reader(void *private_data, struct tmon_pkt *pkt)
+{
+ bool status;
+
+ /* Only expect pings */
+ status = tmon_parse_ping(pkt);
+ if (!status) {
+ return EPROTO;
+ }
+
+ return 0;
+}
+
+struct tevent_req *tmon_ping_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ int fd,
+ int direction,
+ unsigned long timeout,
+ unsigned long interval)
+{
+ struct tevent_req *req;
+ struct tmon_actions actions = {
+ .write_callback = NULL,
+ };
+
+ if ((direction & TMON_FD_WRITE) && interval != 0) {
+ actions.write_callback = ping_writer;
+ }
+ if ((direction & TMON_FD_READ) && timeout != 0) {
+ actions.read_callback = ping_reader;
+ }
+
+ req = tmon_send(mem_ctx,
+ ev,
+ fd,
+ direction,
+ timeout,
+ interval,
+ &actions,
+ NULL);
+ return req;
+}
+
+bool tmon_ping_recv(struct tevent_req *req, int *perr)
+{
+ bool status;
+
+ status = tmon_recv(req, perr);
+
+ return status;
+}
diff --git a/ctdb/common/tmon.h b/ctdb/common/tmon.h
new file mode 100644
index 0000000..7cbfbbd
--- /dev/null
+++ b/ctdb/common/tmon.h
@@ -0,0 +1,218 @@
+/*
+ Trivial FD monitoring
+
+ Copyright (C) Martin Schwenke & Amitay Isaacs, DataDirect Networks 2022
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_TMON_H__
+#define __CTDB_TMON_H__
+
+#include <talloc.h>
+#include <tevent.h>
+
+/**
+ * @file tmon.h
+ *
+ * @brief Interprocess file descriptor (pipe and socketpair) monitoring
+ *
+ * Assumes 2 processes connected by a pipe(2) or a socketpair(2). A
+ * simple protocol is defined to allow sending various types of status
+ * information. When a pipe(2) is used the reader can monitor for
+ * close and read packets, while the sender can write packets. When a
+ * socketpair(2) is used then both ends can monitor for close, and
+ * read and write packets. A read timeout can be specified,
+ * terminating the computation if no packets are received.
+ *
+ * A simplified interface is provided to monitor for close and allow
+ * sending/monitoring of one-way ping packets. A ping timeout occurs
+ * when one end is expecting pings but none are received during the
+ * timeout interval - no response is sent to pings, they merely reset
+ * a timer on the receiving end.
+ */
+
+struct tmon_pkt;
+
+struct tmon_actions {
+ int (*write_callback)(void *private_data, struct tmon_pkt *pkt);
+ int (*timeout_callback)(void *private_data);
+ int (*read_callback)(void *private_data, struct tmon_pkt *pkt);
+ int (*close_callback)(void *private_data);
+};
+
+/*
+ * Return value from write_callback() and read_callback() to cause the
+ * computation to exit successfully. For consistency this can also be
+ * used with timeout_callback() and close_callback().
+ */
+#define TMON_STATUS_EXIT (-1)
+
+/* Return value from write_callback() to skip write */
+#define TMON_STATUS_SKIP (-2)
+
+/* For direction, below */
+#define TMON_FD_READ 0x1
+#define TMON_FD_WRITE 0x2
+#define TMON_FD_BOTH (TMON_FD_READ | TMON_FD_WRITE)
+
+/**
+ * @brief Async computation to start FD monitoring
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] fd File descriptor for "this" end of pipe/socketpair
+ * @param[in] direction Read, write or both - for sanity checking
+ * @param[in] read_timeout Seconds to trigger timeout when no packets received
+ * @param[in] write_interval Seconds to trigger write_callback
+ * @param[in] actions struct containing callbacks
+ * @param[in] private_data Passed to callbacks
+ * @return new tevent request or NULL on failure
+ *
+ * @note read_timeout implies monitor_close
+ *
+ * @note The computation will complete when:
+ *
+ * - The writing end closes (e.g. writer process terminates) - EPIPE
+ * - read_timeout is non-zero and timeout occurs - ETIMEDOUT
+ * - Packets received with no read_callback defined - EIO
+ * - Invalid or unexpected packet received - EPROTO
+ * - File descriptor readable but no bytes to read - error: EPIPE
+ * - Invalid combination of direction, callbacks, timeouts: EINVAL
+ * - An unexpected error occurs - other
+ *
+ * @note action callbacks return an int that can be used to trigger
+ * other errors or override an error. For example:
+ *
+ * - write_callback() can return non-zero errno, causing an error
+ * - close_callback() can return zero, overriding the default EPIPE error
+ * - timeout_callback() can return something other than ETIMEDOUT
+ * - read_callback() can return EPROTO for unexpected packet types
+ *
+ * Reading of exit and errno packets is handled internally (read
+ * callback is never called). Write callback can return special
+ * value TMON_STATUS_SKIP to avoid sending any data.
+ */
+struct tevent_req *tmon_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ int fd,
+ int direction,
+ unsigned long read_timeout,
+ unsigned long write_interval,
+ struct tmon_actions *actions,
+ void *private_data);
+
+/**
+ * @brief Async computation to end FD monitoring
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @return true on success, false on failure
+ */
+bool tmon_recv(struct tevent_req *req, int *perr);
+
+/**
+ * @brief Fill in an exit packet
+ *
+ * @param[in,out] pkt An exit packet
+ * @return true on success, false on failure
+ */
+bool tmon_set_exit(struct tmon_pkt *pkt);
+/**
+ * @brief Fill in an errno packet
+ *
+ * @param[in,out] pkt An errno packet
+ * @param[in] err An errno to send in packet
+ * @return true on success, false on failure
+ */
+bool tmon_set_errno(struct tmon_pkt *pkt, int err);
+/**
+ * @brief Fill in a ping packet
+ *
+ * @param[in,out] pkt A ping packet
+ * @return true on success, false on failure
+ */
+bool tmon_set_ping(struct tmon_pkt *pkt);
+/**
+ * @brief Fill in an ASCII packet
+ *
+ * @param[in,out] pkt An ASCII packet
+ * @param[in] c An ASCII character to send in packet
+ * @return true on success, false on failure
+ */
+bool tmon_set_ascii(struct tmon_pkt *pkt, char c);
+/**
+ * @brief Fill in a custom packet
+ *
+ * @param[in,out] pkt A custom packet
+ * @param[in] val A uint16_t to send in a custom packet
+ * @return true on success, false on failure
+ */
+bool tmon_set_custom(struct tmon_pkt *pkt, uint16_t val);
+
+/**
+ * @brief Validate a ping packet
+ *
+ * @param[in] pkt A ping packet
+ * @return true on success, false on failure
+ */
+bool tmon_parse_ping(struct tmon_pkt *pkt);
+
+/**
+ * @brief Validate ASCII packet and parse out character
+ *
+ * @param[in] pkt An ASCII packet
+ * @param[out] c An ASCII character value from packet
+ * @return true on success, false on failure
+ */
+bool tmon_parse_ascii(struct tmon_pkt *pkt, char *c);
+
+/**
+ * @brief Validate custom packet and parse out value
+ *
+ * @param[in] pkt A custom packet
+ * @param[out] val A uint16_t value from packet
+ * @return true on success, false on failure
+ */
+bool tmon_parse_custom(struct tmon_pkt *pkt, uint16_t *val);
+
+/**
+ * @brief Write a packet
+ *
+ * @param[in] req Tevent request created by tmon_send
+ * @param[in] pkt Packet to write
+ * @return true on success, false on failure
+ */
+bool tmon_write(struct tevent_req *req, struct tmon_pkt *pkt);
+
+/**
+ * @brief Async computation to start ping monitoring
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] fd File descriptor for "this" end of pipe/socketpair
+ * @param[in] direction Read, write or both - for sanity checking
+ * @param[in] timeout Timeout for pings on receiving end
+ * @param[in] interval Send a ping packet every interval seconds
+ */
+struct tevent_req *tmon_ping_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ int fd,
+ int direction,
+ unsigned long timeout,
+ unsigned long interval);
+
+bool tmon_ping_recv(struct tevent_req *req, int *perr);
+
+#endif /* __CTDB_TMON_H__ */
diff --git a/ctdb/common/tunable.c b/ctdb/common/tunable.c
new file mode 100644
index 0000000..f366f23
--- /dev/null
+++ b/ctdb/common/tunable.c
@@ -0,0 +1,401 @@
+/*
+ Tunables utilities
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/locale.h"
+#include "system/network.h"
+
+#include <talloc.h>
+
+#include "lib/util/debug.h"
+#include "lib/util/smb_strtox.h"
+#include "lib/util/tini.h"
+
+#include "protocol/protocol.h"
+
+#include "common/tunable.h"
+
+static struct {
+ const char *label;
+ uint32_t value;
+ bool obsolete;
+ size_t offset;
+} tunable_map[] = {
+ { "MaxRedirectCount", 3, true,
+ offsetof(struct ctdb_tunable_list, max_redirect_count) },
+ { "SeqnumInterval", 1000, false,
+ offsetof(struct ctdb_tunable_list, seqnum_interval) },
+ { "ControlTimeout", 60, false,
+ offsetof(struct ctdb_tunable_list, control_timeout) },
+ { "TraverseTimeout", 20, false,
+ offsetof(struct ctdb_tunable_list, traverse_timeout) },
+ { "KeepaliveInterval", 5, false,
+ offsetof(struct ctdb_tunable_list, keepalive_interval) },
+ { "KeepaliveLimit", 5, false,
+ offsetof(struct ctdb_tunable_list, keepalive_limit) },
+ { "RecoverTimeout", 30, false,
+ offsetof(struct ctdb_tunable_list, recover_timeout) },
+ { "RecoverInterval", 1, false,
+ offsetof(struct ctdb_tunable_list, recover_interval) },
+ { "ElectionTimeout", 3, false,
+ offsetof(struct ctdb_tunable_list, election_timeout) },
+ { "TakeoverTimeout", 9, false,
+ offsetof(struct ctdb_tunable_list, takeover_timeout) },
+ { "MonitorInterval", 15, false,
+ offsetof(struct ctdb_tunable_list, monitor_interval) },
+ { "TickleUpdateInterval", 20, false,
+ offsetof(struct ctdb_tunable_list, tickle_update_interval) },
+ { "EventScriptTimeout", 30, false,
+ offsetof(struct ctdb_tunable_list, script_timeout) },
+ { "MonitorTimeoutCount", 20, false,
+ offsetof(struct ctdb_tunable_list, monitor_timeout_count) },
+ { "EventScriptUnhealthyOnTimeout", 0, true,
+ offsetof(struct ctdb_tunable_list, script_unhealthy_on_timeout) },
+ { "RecoveryGracePeriod", 120, false,
+ offsetof(struct ctdb_tunable_list, recovery_grace_period) },
+ { "RecoveryBanPeriod", 300, false,
+ offsetof(struct ctdb_tunable_list, recovery_ban_period) },
+ { "DatabaseHashSize", 100001, false,
+ offsetof(struct ctdb_tunable_list, database_hash_size) },
+ { "DatabaseMaxDead", 5, false,
+ offsetof(struct ctdb_tunable_list, database_max_dead) },
+ { "RerecoveryTimeout", 10, false,
+ offsetof(struct ctdb_tunable_list, rerecovery_timeout) },
+ { "EnableBans", 1, false,
+ offsetof(struct ctdb_tunable_list, enable_bans) },
+ { "DeterministicIPs", 0, true,
+ offsetof(struct ctdb_tunable_list, deterministic_public_ips) },
+ { "LCP2PublicIPs", 1, true,
+ offsetof(struct ctdb_tunable_list, lcp2_public_ip_assignment) },
+ { "ReclockPingPeriod", 60, true,
+ offsetof(struct ctdb_tunable_list, reclock_ping_period) },
+ { "NoIPFailback", 0, false,
+ offsetof(struct ctdb_tunable_list, no_ip_failback) },
+ { "DisableIPFailover", 0, true,
+ offsetof(struct ctdb_tunable_list, disable_ip_failover) },
+ { "VerboseMemoryNames", 0, false,
+ offsetof(struct ctdb_tunable_list, verbose_memory_names) },
+ { "RecdPingTimeout", 60, false,
+ offsetof(struct ctdb_tunable_list, recd_ping_timeout) },
+ { "RecdFailCount", 10, false,
+ offsetof(struct ctdb_tunable_list, recd_ping_failcount) },
+ { "LogLatencyMs", 0, false,
+ offsetof(struct ctdb_tunable_list, log_latency_ms) },
+ { "RecLockLatencyMs", 1000, false,
+ offsetof(struct ctdb_tunable_list, reclock_latency_ms) },
+ { "RecoveryDropAllIPs", 120, false,
+ offsetof(struct ctdb_tunable_list, recovery_drop_all_ips) },
+ { "VerifyRecoveryLock", 1, true,
+ offsetof(struct ctdb_tunable_list, verify_recovery_lock) },
+ { "VacuumInterval", 10, false,
+ offsetof(struct ctdb_tunable_list, vacuum_interval) },
+ { "VacuumMaxRunTime", 120, false,
+ offsetof(struct ctdb_tunable_list, vacuum_max_run_time) },
+ { "RepackLimit", 10*1000, false,
+ offsetof(struct ctdb_tunable_list, repack_limit) },
+ { "VacuumLimit", 5*1000, true,
+ offsetof(struct ctdb_tunable_list, vacuum_limit) },
+ { "VacuumFastPathCount", 60, false,
+ offsetof(struct ctdb_tunable_list, vacuum_fast_path_count) },
+ { "MaxQueueDropMsg", 1000*1000, false,
+ offsetof(struct ctdb_tunable_list, max_queue_depth_drop_msg) },
+ { "AllowUnhealthyDBRead", 0, false,
+ offsetof(struct ctdb_tunable_list, allow_unhealthy_db_read) },
+ { "StatHistoryInterval", 1, false,
+ offsetof(struct ctdb_tunable_list, stat_history_interval) },
+ { "DeferredAttachTO", 120, false,
+ offsetof(struct ctdb_tunable_list, deferred_attach_timeout) },
+ { "AllowClientDBAttach", 1, false,
+ offsetof(struct ctdb_tunable_list, allow_client_db_attach) },
+ { "RecoverPDBBySeqNum", 1, true,
+ offsetof(struct ctdb_tunable_list, recover_pdb_by_seqnum) },
+ { "DeferredRebalanceOnNodeAdd", 300, true,
+ offsetof(struct ctdb_tunable_list, deferred_rebalance_on_node_add) },
+ { "FetchCollapse", 1, false,
+ offsetof(struct ctdb_tunable_list, fetch_collapse) },
+ { "HopcountMakeSticky", 50, false,
+ offsetof(struct ctdb_tunable_list, hopcount_make_sticky) },
+ { "StickyDuration", 600, false,
+ offsetof(struct ctdb_tunable_list, sticky_duration) },
+ { "StickyPindown", 200, false,
+ offsetof(struct ctdb_tunable_list, sticky_pindown) },
+ { "NoIPTakeover", 0, false,
+ offsetof(struct ctdb_tunable_list, no_ip_takeover) },
+ { "DBRecordCountWarn", 100*1000, false,
+ offsetof(struct ctdb_tunable_list, db_record_count_warn) },
+ { "DBRecordSizeWarn", 10*1000*1000, false,
+ offsetof(struct ctdb_tunable_list, db_record_size_warn) },
+ { "DBSizeWarn", 100*1000*1000, false,
+ offsetof(struct ctdb_tunable_list, db_size_warn) },
+ { "PullDBPreallocation", 10*1024*1024, false,
+ offsetof(struct ctdb_tunable_list, pulldb_preallocation_size) },
+ { "NoIPHostOnAllDisabled", 1, true,
+ offsetof(struct ctdb_tunable_list, no_ip_host_on_all_disabled) },
+ { "Samba3AvoidDeadlocks", 0, true,
+ offsetof(struct ctdb_tunable_list, samba3_hack) },
+ { "TDBMutexEnabled", 1, true,
+ offsetof(struct ctdb_tunable_list, mutex_enabled) },
+ { "LockProcessesPerDB", 200, false,
+ offsetof(struct ctdb_tunable_list, lock_processes_per_db) },
+ { "RecBufferSizeLimit", 1000*1000, false,
+ offsetof(struct ctdb_tunable_list, rec_buffer_size_limit) },
+ { "QueueBufferSize", 1024, false,
+ offsetof(struct ctdb_tunable_list, queue_buffer_size) },
+ { "IPAllocAlgorithm", 2, false,
+ offsetof(struct ctdb_tunable_list, ip_alloc_algorithm) },
+ { "AllowMixedVersions", 0, false,
+ offsetof(struct ctdb_tunable_list, allow_mixed_versions) },
+ { .obsolete = true, }
+};
+
+void ctdb_tunable_set_defaults(struct ctdb_tunable_list *tun_list)
+{
+ int i;
+
+ for (i=0; tunable_map[i].label != NULL; i++) {
+ size_t offset = tunable_map[i].offset;
+ uint32_t value = tunable_map[i].value;
+ uint32_t *value_ptr;
+
+ value_ptr = (uint32_t *)((uint8_t *)tun_list + offset);
+ *value_ptr = value;
+ }
+}
+
+bool ctdb_tunable_get_value(struct ctdb_tunable_list *tun_list,
+ const char *tunable_str, uint32_t *value)
+{
+ int i;
+
+ for (i=0; tunable_map[i].label != NULL; i++) {
+ if (strcasecmp(tunable_map[i].label, tunable_str) == 0) {
+ uint32_t *value_ptr;
+
+ value_ptr = (uint32_t *)((uint8_t *)tun_list +
+ tunable_map[i].offset);
+ *value = *value_ptr;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool ctdb_tunable_set_value(struct ctdb_tunable_list *tun_list,
+ const char *tunable_str, uint32_t value,
+ bool *obsolete)
+{
+ int i;
+
+ for (i=0; tunable_map[i].label != NULL; i++) {
+ if (strcasecmp(tunable_map[i].label, tunable_str) == 0) {
+ uint32_t *value_ptr;
+
+ value_ptr = (uint32_t *)((uint8_t *)tun_list +
+ tunable_map[i].offset);
+ *value_ptr = value;
+ if (obsolete != NULL) {
+ *obsolete = tunable_map[i].obsolete;
+ }
+ return true;
+ }
+ }
+
+ return false;
+}
+
+struct ctdb_var_list *ctdb_tunable_names(TALLOC_CTX *mem_ctx)
+{
+ struct ctdb_var_list *list;
+ int i;
+
+ list = talloc_zero(mem_ctx, struct ctdb_var_list);
+ if (list == NULL) {
+ return NULL;
+ }
+
+ for (i=0; tunable_map[i].label != NULL; i++) {
+ if (tunable_map[i].obsolete) {
+ continue;
+ }
+
+ list->var = talloc_realloc(list, list->var, const char *,
+ list->count + 1);
+ if (list->var == NULL) {
+ goto fail;
+ }
+
+ list->var[list->count] = talloc_strdup(list,
+ tunable_map[i].label);
+ if (list->var[list->count] == NULL) {
+ goto fail;
+ }
+
+ list->count += 1;
+ }
+
+ return list;
+
+fail:
+ TALLOC_FREE(list);
+ return NULL;
+}
+
+char *ctdb_tunable_names_to_string(TALLOC_CTX *mem_ctx)
+{
+ char *str = NULL;
+ int i;
+
+ str = talloc_strdup(mem_ctx, ":");
+ if (str == NULL) {
+ return NULL;
+ }
+
+ for (i=0; tunable_map[i].label != NULL; i++) {
+ if (tunable_map[i].obsolete) {
+ continue;
+ }
+
+ str = talloc_asprintf_append(str, "%s:",
+ tunable_map[i].label);
+ if (str == NULL) {
+ return NULL;
+ }
+ }
+
+ /* Remove the last ':' */
+ str[strlen(str)-1] = '\0';
+
+ return str;
+}
+
+struct tunable_load_state {
+ struct ctdb_tunable_list *tun_list;
+ bool status;
+ const char *func;
+};
+
+static bool tunable_section(const char *section, void *private_data)
+{
+ struct tunable_load_state *state =
+ (struct tunable_load_state *)private_data;
+
+ D_ERR("%s: Invalid line for section [%s] - sections not supported \n",
+ state->func,
+ section);
+ state->status = false;
+
+ return true;
+}
+
+static bool tunable_option(const char *name,
+ const char *value,
+ void *private_data)
+{
+ struct tunable_load_state *state =
+ (struct tunable_load_state *)private_data;
+ unsigned long num;
+ bool obsolete;
+ bool ok;
+ int ret;
+
+ if (value[0] == '\0') {
+ D_ERR("%s: Invalid line containing \"%s\"\n", state->func, name);
+ state->status = false;
+ return true;
+ }
+
+ num = smb_strtoul(value, NULL, 0, &ret, SMB_STR_FULL_STR_CONV);
+ if (ret != 0) {
+ D_ERR("%s: Invalid value \"%s\" for tunable \"%s\"\n",
+ state->func,
+ value,
+ name);
+ state->status = false;
+ return true;
+ }
+
+ ok = ctdb_tunable_set_value(state->tun_list,
+ name,
+ (uint32_t)num,
+ &obsolete);
+ if (!ok) {
+ D_ERR("%s: Unknown tunable \"%s\"\n", state->func, name);
+ state->status = false;
+ return true;
+ }
+ if (obsolete) {
+ D_ERR("%s: Obsolete tunable \"%s\"\n", state->func, name);
+ state->status = false;
+ return true;
+ }
+
+ return true;
+}
+
+bool ctdb_tunable_load_file(TALLOC_CTX *mem_ctx,
+ struct ctdb_tunable_list *tun_list,
+ const char *file)
+{
+ struct tunable_load_state state = {
+ .tun_list = tun_list,
+ .status = true,
+ .func = __FUNCTION__,
+ };
+ FILE *fp;
+ bool status;
+
+ ctdb_tunable_set_defaults(tun_list);
+
+ fp = fopen(file, "r");
+ if (fp == NULL) {
+ if (errno == ENOENT) {
+ /* Doesn't need to exist */
+ return true;
+ }
+
+ DBG_ERR("Failed to open %s\n", file);
+ return false;
+ }
+
+ D_NOTICE("Loading tunables from %s\n", file);
+ /*
+ * allow_empty_value=true is somewhat counter-intuitive.
+ * However, if allow_empty_value=false then a tunable with no
+ * equals or value is regarded as empty and is simply ignored.
+ * Use true so an "empty value" can be caught in
+ * tunable_option().
+ *
+ * tunable_section() and tunable_option() return true while
+ * setting state.status=false, allowing all possible errors
+ * with tunables and values to be reported. This helps to
+ * avoid a potential game of whack-a-mole in a well-formed
+ * file with multiple minor errors.
+ */
+ status = tini_parse(fp, true, tunable_section, tunable_option, &state);
+
+ fclose(fp);
+
+ if (!status) {
+ DBG_ERR("Syntax error\n");
+ }
+
+ return status && state.status;
+}
diff --git a/ctdb/common/tunable.h b/ctdb/common/tunable.h
new file mode 100644
index 0000000..89f99f1
--- /dev/null
+++ b/ctdb/common/tunable.h
@@ -0,0 +1,35 @@
+/*
+ Tunable utilities
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_TUNABLE_H__
+#define __CTDB_TUNABLE_H__
+
+void ctdb_tunable_set_defaults(struct ctdb_tunable_list *tun_list);
+bool ctdb_tunable_get_value(struct ctdb_tunable_list *tun_list,
+ const char *tunable_str, uint32_t *value);
+bool ctdb_tunable_set_value(struct ctdb_tunable_list *tun_list,
+ const char *tunable_str, uint32_t value,
+ bool *obsolete);
+struct ctdb_var_list *ctdb_tunable_names(TALLOC_CTX *mem_ctx);
+char *ctdb_tunable_names_to_string(TALLOC_CTX *mem_ctx);
+bool ctdb_tunable_load_file(TALLOC_CTX *mem_ctx,
+ struct ctdb_tunable_list *tun_list,
+ const char *file);
+
+#endif /* __CTDB_TUNABLE_H__ */
diff --git a/ctdb/config/README b/ctdb/config/README
new file mode 100644
index 0000000..d28f4f0
--- /dev/null
+++ b/ctdb/config/README
@@ -0,0 +1,31 @@
+This directory contains run-time support scripts for CTDB.
+
+Selected highlights:
+
+ ctdb.init
+
+ An initscript for starting ctdbd at boot time.
+
+ events/
+
+ Eventscripts. See events/README for more details.
+
+ functions
+
+ Support functions, sourced by eventscripts and other scripts.
+
+ statd-callout
+
+ rpc.statd high-availability callout to support lock migration on
+ failover.
+
+Notes:
+
+* All of these scripts are written in POSIX Bourne shell. Please
+ avoid bash-isms, including the use of "local" variables (which are
+ not available in POSIX shell).
+
+* Do not use absolute paths for commands. Unit tests attempt to
+ replace many commands with stubs and can not do this if commands are
+ specified with absolute paths. The functions file controls $PATH so
+ absolute paths should not be required.
diff --git a/ctdb/config/ctdb-crash-cleanup.sh b/ctdb/config/ctdb-crash-cleanup.sh
new file mode 100755
index 0000000..95cfd75
--- /dev/null
+++ b/ctdb/config/ctdb-crash-cleanup.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+#
+# This script can be called from a cronjob to automatically drop/release
+# all public ip addresses if CTDBD has crashed or stopped running.
+#
+
+[ -n "$CTDB_BASE" ] || \
+ CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && echo "$PWD")
+
+. "${CTDB_BASE}/functions"
+
+# If ctdb is running, just exit
+if service ctdb status >/dev/null 2>&1 ; then
+ exit 0
+fi
+
+load_script_options "failover" "11.natgw"
+
+if [ ! -f "$CTDB_BASE/public_addresses" ] ; then
+ die "No public addresses file found. Can't clean up."
+fi
+
+drop_all_public_ips 2>&1 | script_log "ctdb-crash-cleanup.sh"
+
+if [ -n "$CTDB_NATGW_PUBLIC_IP" ] ; then
+ drop_ip "$CTDB_NATGW_PUBLIC_IP" "ctdb-crash-cleanup.sh"
+fi
diff --git a/ctdb/config/ctdb.conf b/ctdb/config/ctdb.conf
new file mode 100644
index 0000000..8e1b376
--- /dev/null
+++ b/ctdb/config/ctdb.conf
@@ -0,0 +1,22 @@
+# See ctdb.conf(5) for documentation
+#
+# See ctdb-script.options(5) for documentation about event script
+# options
+
+[logging]
+ # Enable logging to syslog
+ # location = syslog
+
+ # Default log level
+ # log level = NOTICE
+
+[cluster]
+ # Shared cluster lock file to avoid split brain. Daemon
+ # default is no cluster lock. Do NOT run CTDB without a
+ # cluster lock file unless you know exactly what you are
+ # doing.
+ #
+ # Please see the CLUSTER LOCK section in ctdb(7) for more
+ # details.
+ #
+ # cluster lock = !/bin/false CLUSTER LOCK NOT CONFIGURED
diff --git a/ctdb/config/ctdb.init b/ctdb/config/ctdb.init
new file mode 100755
index 0000000..6a7f781
--- /dev/null
+++ b/ctdb/config/ctdb.init
@@ -0,0 +1,161 @@
+#!/bin/sh
+
+# Start and stop CTDB (Clustered TDB daemon)
+#
+# chkconfig: - 90 01
+#
+# description: Starts and stops CTDB
+# pidfile: /var/run/ctdb/ctdbd.pid
+# config: /etc/sysconfig/ctdb
+
+### BEGIN INIT INFO
+# Provides: ctdb
+# Required-Start: $local_fs $syslog $network $remote_fs
+# Required-Stop: $local_fs $syslog $network $remote_fs
+# Default-Start: 2 3 4 5
+# Default-Stop: 0 1 6
+# Short-Description: start and stop ctdb service
+# Description: Start and stop CTDB (Clustered TDB daemon)
+### END INIT INFO
+
+# Source function library.
+if [ -f /etc/init.d/functions ] ; then
+ # Red Hat
+ . /etc/init.d/functions
+elif [ -f /etc/rc.d/init.d/functions ] ; then
+ # Red Hat
+ . /etc/rc.d/init.d/functions
+elif [ -f /etc/rc.status ] ; then
+ # SUSE
+ . /etc/rc.status
+ rc_reset
+ LC_ALL=en_US.UTF-8
+elif [ -f /lib/lsb/init-functions ] ; then
+ # Debian
+ . /lib/lsb/init-functions
+fi
+
+# Avoid using root's TMPDIR
+unset TMPDIR
+
+[ -n "$CTDB_BASE" ] || export CTDB_BASE="/etc/ctdb"
+
+. "${CTDB_BASE}/functions"
+
+load_system_config "network"
+
+# check networking is up (for redhat)
+if [ "$NETWORKING" = "no" ] ; then
+ exit 0
+fi
+
+load_system_config "ctdb"
+
+detect_init_style
+export CTDB_INIT_STYLE
+
+ctdbd="${CTDBD:-/usr/sbin/ctdbd}"
+ctdb="${CTDB:-/usr/bin/ctdb}"
+pidfile="/var/run/ctdb/ctdbd.pid"
+
+############################################################
+
+start()
+{
+ printf "Starting ctdbd service: "
+
+ case "$CTDB_INIT_STYLE" in
+ suse)
+ startproc "$ctdbd"
+ rc_status -v
+ ;;
+ redhat)
+ daemon --pidfile "$pidfile" "$ctdbd"
+ RETVAL=$?
+ echo
+ [ $RETVAL -eq 0 ] && touch /var/lock/subsys/ctdb || RETVAL=1
+ return $RETVAL
+ ;;
+ debian)
+ eval start-stop-daemon --start --quiet --background --exec "$ctdbd"
+ ;;
+ esac
+}
+
+stop()
+{
+ printf "Shutting down ctdbd service: "
+
+ case "$CTDB_INIT_STYLE" in
+ suse)
+ "$ctdb" "shutdown"
+ rc_status -v
+ ;;
+ redhat)
+ "$ctdb" "shutdown"
+ RETVAL=$?
+ # Common idiom in Red Hat init scripts - success() always
+ # succeeds so this does behave like if-then-else
+ # shellcheck disable=SC2015
+ [ $RETVAL -eq 0 ] && success || failure
+ echo ""
+ [ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/ctdb
+ return $RETVAL
+ ;;
+ debian)
+ "$ctdb" "shutdown"
+ log_end_msg $?
+ ;;
+ esac
+}
+
+restart()
+{
+ stop
+ start
+}
+
+check_status ()
+{
+ case "$CTDB_INIT_STYLE" in
+ suse)
+ checkproc -p "$pidfile" "$ctdbd"
+ rc_status -v
+ ;;
+ redhat)
+ status -p "$pidfile" -l "ctdb" "$ctdbd"
+ ;;
+ debian)
+ status_of_proc -p "$pidfile" "$ctdbd" "ctdb"
+ ;;
+ esac
+}
+
+############################################################
+
+case "$1" in
+ start)
+ start
+ ;;
+ stop)
+ stop
+ ;;
+ restart|reload|force-reload)
+ restart
+ ;;
+ status)
+ check_status
+ ;;
+ condrestart|try-restart)
+ if check_status >/dev/null ; then
+ restart
+ fi
+ ;;
+ cron)
+ # used from cron to auto-restart ctdb
+ check_status >/dev/null 2>&1 || restart
+ ;;
+ *)
+ echo "Usage: $0 {start|stop|restart|reload|force-reload|status|cron|condrestart|try-restart}"
+ exit 1
+esac
diff --git a/ctdb/config/ctdb.sudoers b/ctdb/config/ctdb.sudoers
new file mode 100644
index 0000000..1c23818
--- /dev/null
+++ b/ctdb/config/ctdb.sudoers
@@ -0,0 +1,3 @@
+Defaults!/usr/local/etc/ctdb/statd-callout !requiretty
+
+rpcuser ALL=(ALL) NOPASSWD: /usr/local/etc/ctdb/statd-callout
diff --git a/ctdb/config/ctdb.sysconfig b/ctdb/config/ctdb.sysconfig
new file mode 100644
index 0000000..fc57929
--- /dev/null
+++ b/ctdb/config/ctdb.sysconfig
@@ -0,0 +1,11 @@
+# If using SYSV init, install this as /etc/sysconfig/ctdb,
+# /etc/default/ctdb or similar
+
+# Allow 1M open files
+ulimit -n 1048576
+
+# Allow core files to be created
+ulimit -c unlimited
+
+# Useful if default detection doesn't work
+# CTDB_INIT_STYLE=debian
diff --git a/ctdb/config/ctdb.tunables b/ctdb/config/ctdb.tunables
new file mode 100644
index 0000000..b99e5cd
--- /dev/null
+++ b/ctdb/config/ctdb.tunables
@@ -0,0 +1,2 @@
+# Set some CTDB tunable variables during CTDB startup?
+# MutexEnabled=0
diff --git a/ctdb/config/debug-hung-script.sh b/ctdb/config/debug-hung-script.sh
new file mode 100755
index 0000000..c1ac0f1
--- /dev/null
+++ b/ctdb/config/debug-hung-script.sh
@@ -0,0 +1,61 @@
+#!/bin/sh
+
+# This script only works on Linux. Please modify (and submit patches)
+# for other operating systems.
+
+[ -n "$CTDB_BASE" ] || \
+ CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && echo "$PWD")
+
+. "${CTDB_BASE}/functions"
+
+load_script_options
+
+# Testing hook
+if [ -n "$CTDB_DEBUG_HUNG_SCRIPT_LOGFILE" ] ; then
+ tmp="${CTDB_DEBUG_HUNG_SCRIPT_LOGFILE}.part"
+ exec >>"$tmp" 2>&1
+fi
+
+(
+ # No use running several of these in parallel if, say, "releaseip"
+ # event hangs for multiple IPs. In that case the output would be
+ # interleaved in the log and would just be confusing.
+ flock --wait 2 9 || exit 1
+
+ echo "===== Start of hung script debug for PID=\"$1\", event=\"$2\" ====="
+
+ echo "pstree -p -a ${1}:"
+ out=$(pstree -p -a "$1")
+ echo "$out"
+
+ # Check for processes matching a regular expression and print
+ # stack staces. This could help confirm that certain processes
+ # are stuck in certain places such as the cluster filesystem. The
+ # regexp must separate items with "|" and must not contain
+ # parentheses. The default pattern can be replaced for testing.
+ default_pat='exportfs|rpcinfo'
+ pat="${CTDB_DEBUG_HUNG_SCRIPT_STACKPAT:-${default_pat}}"
+ echo "$out" |
+ sed -r -n "s@.*-(.*(${pat}).*),([0-9]*).*@\\3 \\1@p" |
+ while read pid name ; do
+ trace=$(cat "/proc/${pid}/stack" 2>/dev/null)
+ # No! Checking the exit code afterwards is actually clearer...
+ # shellcheck disable=SC2181
+ if [ $? -eq 0 ] ; then
+ echo "---- Stack trace of interesting process ${pid}[${name}] ----"
+ echo "$trace"
+ fi
+ done
+
+ if [ "$2" != "init" ] ; then
+ echo "---- ctdb scriptstatus ${2}: ----"
+ $CTDB scriptstatus "$2"
+ fi
+
+ echo "===== End of hung script debug for PID=\"$1\", event=\"$2\" ====="
+
+ if [ -n "$CTDB_DEBUG_HUNG_SCRIPT_LOGFILE" ] ; then
+ mv "$tmp" "$CTDB_DEBUG_HUNG_SCRIPT_LOGFILE"
+ fi
+
+) 9>"${CTDB_SCRIPT_VARDIR}/debug-hung-script.lock"
diff --git a/ctdb/config/debug_locks.sh b/ctdb/config/debug_locks.sh
new file mode 100755
index 0000000..6c730ee
--- /dev/null
+++ b/ctdb/config/debug_locks.sh
@@ -0,0 +1,218 @@
+#!/bin/sh
+
+# This script attempts to find processes holding locks on a particular
+# CTDB database and dumps a stack trace for each such processe.
+#
+# There are 2 cases:
+#
+# * Samba is configured to use fcntl locks
+#
+# In this case /proc/locks is parsed to find potential lock holders
+#
+# * Samba is configured to use POSIX robust mutexes
+#
+# In this case the helper program tdb_mutex_check is used to find
+# potential lock holders.
+#
+# This helper program uses a private glibc struct field, so is
+# neither portable nor supported. If this field is not available
+# then the helper is not built. Unexpected changes in internal
+# glibc structures may cause unexpected results, including crashes.
+# Bug reports for this helper program are not accepted without an
+# accompanying patch.
+
+[ -n "$CTDB_BASE" ] || \
+ CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && echo "$PWD")
+
+. "${CTDB_BASE}/functions"
+
+if [ $# -ne 4 ] ; then
+ die "usage: $0 <pid> { DB | RECORD } <tdb_path> { FCNTL | MUTEX }"
+fi
+
+lock_helper_pid="$1"
+# lock_scope is unused for now
+# shellcheck disable=SC2034
+lock_scope="$2"
+tdb_path="$3"
+lock_type="$4"
+
+# type is at least mentioned in POSIX and more is portable than which(1)
+# shellcheck disable=SC2039
+if ! type gstack >/dev/null 2>&1 ; then
+ gstack ()
+ {
+ _pid="$1"
+
+ gdb -batch --quiet -nx "/proc/${_pid}/exe" "$_pid" \
+ -ex "thread apply all bt" 2>/dev/null |
+ grep '^\(#\|Thread \)'
+ }
+fi
+
+# Load/cache database options from configuration file
+ctdb_get_db_options
+
+dump_stack ()
+{
+ _pid="$1"
+
+ echo "----- Stack trace for PID=${_pid} -----"
+ _state=$(ps -p "$_pid" -o state= | cut -c 1)
+ if [ "$_state" = "D" ] ; then
+ # Don't run gstack on a process in D state since
+ # gstack will hang until the process exits D state.
+ # Although it is possible for a process to transition
+ # to D state after this check, it is unlikely because
+ # if a process is stuck in D state then it is probably
+ # the reason why this script was called. Note that a
+ # kernel stack almost certainly won't help diagnose a
+ # deadlock... but it will probably give us someone to
+ # blame!
+ echo "----- Process in D state, printing kernel stack only"
+ get_proc "${_pid}/stack"
+ else
+ gstack "$_pid"
+ fi
+}
+
+dump_stacks ()
+{
+ _pids="$1"
+
+ # Use word splitting to squash whitespace
+ # shellcheck disable=SC2086
+ _pids=$(echo $_pids | tr ' ' '\n' | sort -u)
+
+ for _pid in $_pids; do
+ dump_stack "$_pid"
+ done
+}
+
+get_tdb_file_id ()
+{
+ if ! _device_inode=$(stat -c "%d:%i" "$tdb_path" 2>/dev/null) ; then
+ die "Unable to stat \"${tdb_path}\""
+ fi
+ _device="${_device_inode%%:*}"
+ _device_major=$((_device >> 8))
+ _device_minor=$((_device & 0xff))
+ _inode="${_device_inode#*:}"
+ printf '%02x:%02x:%u\n' "$_device_major" "$_device_minor" "$_inode"
+}
+
+debug_via_proc_locks ()
+{
+ # Get file ID to match relevant column in /proc/locks
+ _file_id=$(get_tdb_file_id)
+
+ # Log information from /proc/locks about the waiting process
+ _tdb=$(basename "$tdb_path")
+ _comm=$(ps -p "$lock_helper_pid" -o comm=)
+ _out=$(get_proc "locks" |
+ awk -v pid="$lock_helper_pid" \
+ -v file_id="$_file_id" \
+ -v file="$_tdb" \
+ -v comm="$_comm" \
+ '$2 == "->" &&
+ $3 == "POSIX" &&
+ $4 == "ADVISORY" &&
+ $5 == "WRITE" &&
+ $6 == pid &&
+ $7 == file_id { print $6, comm, file, $8, $9 }')
+ if [ -n "$_out" ] ; then
+ echo "Waiter:"
+ echo "$_out"
+ fi
+
+ # Parse /proc/locks and find process holding locks on $tdb_path
+ # extract following information
+ # pid process_name tdb_name offsets
+ _out=$(get_proc "locks" |
+ awk -v pid="$lock_helper_pid" \
+ -v file_id="$_file_id" \
+ -v file="$_tdb" \
+ '$2 == "POSIX" &&
+ $3 == "ADVISORY" &&
+ $4 == "WRITE" &&
+ $5 != pid &&
+ $6 == file_id { print $5, file, $7, $8 }' |
+ while read -r _pid _rest ; do
+ _pname=$(ps -p "$_pid" -o comm=)
+ echo "$_pid $_pname $_rest"
+ done)
+
+ if [ -z "$_out" ]; then
+ return
+ fi
+
+ # Log information about locks
+ echo "Lock holders:"
+ echo "$_out"
+
+ _pids=$(echo "$_out" | awk '{ print $1 }')
+
+ lock_holder_pids="${lock_holder_pids:+${lock_holder_pids} }${_pids}"
+}
+
+debug_via_tdb_mutex ()
+{
+ _helper="${CTDB_HELPER_BINDIR}/tdb_mutex_check"
+ if [ ! -x "$_helper" ] ; then
+ # Mutex helper not available - not supported?
+ # Avoid not found error...
+ return
+ fi
+
+ # Helper should always succeed
+ if ! _t=$("$_helper" "$tdb_path") ; then
+ return
+ fi
+
+ _out=$(echo "$_t" | sed -n -e 's#^\[\(.*\)\] pid=\(.*\)#\2 \1#p')
+
+ if [ -z "$_out" ]; then
+ if [ -n "$_t" ] ; then
+ echo "$_t" | grep -F 'trylock failed'
+ fi
+ return
+ fi
+
+ # Get process names, append $tdb_path
+ _out=$(echo "$_out" |
+ while read -r _pid _rest ; do
+ _pname=$(ps -p "$_pid" -o comm=)
+ _tdb=$(basename "$tdb_path")
+ echo "${_pid} ${_pname} ${_tdb} ${_rest}"
+ done)
+
+ # Log information about locks
+ echo "Lock holders:"
+ echo "$_out"
+
+ # Get PIDs of processes that are holding locks
+ _pids=$(echo "$_out" |
+ awk -v pid="$lock_helper_pid" '$1 != pid {print $1}')
+
+ lock_holder_pids="${lock_holder_pids:+${lock_holder_pids} }${_pids}"
+}
+
+(
+ flock -n 9 || exit 1
+
+ echo "===== Start of debug locks PID=$$ ====="
+
+ lock_holder_pids=""
+
+ debug_via_proc_locks
+
+ if [ "$lock_type" = "MUTEX" ] ; then
+ debug_via_tdb_mutex
+ fi
+
+ dump_stacks "$lock_holder_pids"
+
+ echo "===== End of debug locks PID=$$ ====="
+)9>"${CTDB_SCRIPT_VARDIR}/debug_locks.lock" | script_log "ctdbd-lock"
+
+exit 0
diff --git a/ctdb/config/events/README b/ctdb/config/events/README
new file mode 100644
index 0000000..6553830
--- /dev/null
+++ b/ctdb/config/events/README
@@ -0,0 +1,193 @@
+The events/ directory contains event scripts used by CTDB. Event
+scripts are triggered on certain events, such as startup, monitoring
+or public IP allocation. Scripts may be specific to services,
+networking or internal CTDB operations.
+
+Scripts are divided into subdirectories for different CTDB components.
+Right now the only component is "legacy".
+
+All event scripts start with the prefix 'NN.' where N is a digit. The
+event scripts are run in sequence based on NN. Thus 10.interface will
+be run before 60.nfs. It is recommended to keep each NN unique.
+However, scripts with the same NN prefix will be executed in
+alphanumeric sort order.
+
+As a special case, any eventscript that ends with a '~' character will be
+ignored since this is a common postfix that some editors will append to
+older versions of a file. Similarly, any eventscript with multiple '.'s
+will be ignored as package managers can create copies with additional
+suffix starting with '.' (e.g. .rpmnew, .dpkg-dist).
+
+Only executable event scripts are run by CTDB. Any event script that
+does not have execute permission is ignored.
+
+The eventscripts are called with varying number of arguments. The
+first argument is the event name and the rest of the arguments depend
+on the event name.
+
+Event scripts must return 0 for success and non-zero for failure.
+
+Output of event scripts is logged. On failure the output of the
+failing event script is included in the output of "ctdb scriptstatus".
+
+The following events are supported (with arguments shown):
+
+init
+
+ This event is triggered once when CTDB is starting up. This
+ event is used to do some basic cleanup and initialisation.
+
+ During the "init" event CTDB is not listening on its Unix
+ domain socket, so the "ctdb" CLI will not work.
+
+ Failure of this event will cause CTDB to terminate.
+
+ Example: 00.ctdb creates $CTDB_SCRIPT_VARDIR
+
+setup
+
+ This event is triggered once, after the "init" event has
+ completed.
+
+ For this and any subsequent events the CTDB Unix domain socket
+ is available, so the "ctdb" CLI will work.
+
+ Failure of this event will cause CTDB to terminate.
+
+ Example: 11.natgw checks that it has valid configuration
+
+startup
+
+ This event is triggered after the "setup" event has completed
+ and CTDB has finished its initial database recovery.
+
+ This event starts all services that are managed by CTDB. Each
+ service that is managed by CTDB should implement this event
+ and use it to (re)start the service.
+
+ If the "startup" event fails then CTDB will retry it until it
+ succeeds. There is no limit on the number of retries.
+
+ Example: 50.samba uses this event to start the Samba daemon.
+
+shutdown
+
+ This event is triggered when CTDB is shutting down.
+
+ This event shuts down all services that are managed by CTDB.
+ Each service that is managed by CTDB should implement this
+ event and use it to stop the service.
+
+ Example: 50.samba uses this event to shut down the Samba
+ daemon.
+
+monitor
+
+ This event is run periodically. The interval between
+ successive "monitor" events is configured using the
+ MonitorInterval tunable, which defaults to 15 seconds.
+
+ This event is triggered by CTDB to continuously monitor that
+ all managed services are healthy. If all event scripts
+ complete then the monitor event successfully then the node is
+ marked HEALTHY. If any event script fails then no subsequent
+ scripts will be run for that event and the node is marked
+ UNHEALTHY.
+
+ Each service that is managed by CTDB should implement this
+ event and use it to monitor the service.
+
+ Example: 10.interface checks that each configured interface
+ for public IP addresses has a physical link established.
+
+startrecovery
+
+ This event is triggered every time a database recovery process
+ is started.
+
+ This is rarely used.
+
+recovered
+
+ This event is triggered every time a database recovery process
+ is completed.
+
+ This is rarely used.
+
+takeip <interface> <ip-address> <netmask-bits>
+
+ This event is triggered for each public IP address taken by a
+ node during IP address (re)assignment. Multiple "takeip"
+ events can be run in parallel if multiple IP addresses are
+ being assigned.
+
+ Example: In 10.interface the "ip" command (from the Linux
+ iproute2 package) is used to add the specified public IP
+ address to the specified interface. The "ip" command can
+ safely be run concurrently. However, the "iptables" command
+ cannot be run concurrently so a wrapper is used to serialise
+ runs using exclusive locking.
+
+ If substantial work is required to reconfigure a service when
+ a public IP address is taken over it can be better to defer
+ service reconfiguration to the "ipreallocated" event, after
+ all IP addresses have been assigned.
+
+ Example: 60.nfs uses ctdb_service_set_reconfigure() to flag
+ that public IP addresses have changed so that service
+ reconfiguration will occur in the "ipreallocated" event.
+
+releaseip <interface> <ip-address> <netmask-bits>
+
+ This event is triggered for each public IP address released by
+ a node during IP address (re)assignment. Multiple "releaseip"
+ events can be run in parallel if multiple IP addresses are
+ being unassigned.
+
+ In all other regards, this event is analogous to the "takeip"
+ event above.
+
+updateip <old-interface> <new-interface> <ip-address> <netmask-bits>
+
+ This event is triggered for each public IP address moved
+ between interfaces on a node during IP address (re)assignment.
+ Multiple "updateip" events can be run in parallel if multiple
+ IP addresses are being moved.
+
+ This event is only used if multiple interfaces are capable of
+ hosting an IP address, as specified in the public addresses
+ configuration file.
+
+ This event is similar to the "takeip" event above.
+
+ipreallocated
+
+ This event is triggered on all nodes as the last step of
+ public IP address (re)assignment. It is unconditionally
+ triggered after any "releaseip", "takeip" and "updateip"
+ events, even though these events may not run on some nodes if
+ there are no relevant changes. That is, the "ipreallocated"
+ event is triggered unconditionally, even on nodes where public
+ IP addresses assignments have not changed.
+
+ This event is used to reconfigure services.
+
+ Since "ipreallocated" is always run, this allows
+ reconfiguration to depend on the states of other nodes rather
+ that just IP addresses.
+
+ Example: 11.natgw recalculates the NAT gateway master and
+ updates the relevant network configuration on each node if the
+ NAT gateway master has changed.
+
+Additional notes for "takeip", "releaseip", "updateip",
+"ipreallocated":
+
+* Failure of any of these events causes IP allocation to be retried.
+
+* An event script can use ctdb_service_set_reconfigure() in "takeip",
+ "releaseip" or "updateip" events to flag that its service needs to
+ be reconfigured. The "ipreallocated" event can then use
+ ctdb_service_needs_reconfigure() to test if there were public IPs
+ changes to determine what type of reconfiguration (if any) is
+ needed.
diff --git a/ctdb/config/events/legacy/00.ctdb.script b/ctdb/config/events/legacy/00.ctdb.script
new file mode 100755
index 0000000..81c16af
--- /dev/null
+++ b/ctdb/config/events/legacy/00.ctdb.script
@@ -0,0 +1,130 @@
+#!/bin/sh
+
+# Event script for ctdb-specific setup and other things that don't fit
+# elsewhere.
+
+[ -n "$CTDB_BASE" ] || \
+ CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD")
+
+. "${CTDB_BASE}/functions"
+
+load_script_options
+
+############################################################
+
+# type is commonly supported and more portable than which(1)
+# shellcheck disable=SC2039
+select_tdb_checker ()
+{
+ # Find the best TDB consistency check available.
+ use_tdb_tool_check=false
+ type tdbtool >/dev/null 2>&1 && found_tdbtool=true
+ type tdbdump >/dev/null 2>&1 && found_tdbdump=true
+
+ if $found_tdbtool && echo "help" | tdbtool | grep -q check ; then
+ use_tdb_tool_check=true
+ elif $found_tdbtool && $found_tdbdump ; then
+ cat <<EOF
+WARNING: The installed 'tdbtool' does not offer the 'check' subcommand.
+ Using 'tdbdump' for database checks.
+ Consider updating 'tdbtool' for better checks!
+EOF
+ elif $found_tdbdump ; then
+ cat <<EOF
+WARNING: 'tdbtool' is not available.
+ Using 'tdbdump' to check the databases.
+ Consider installing a recent 'tdbtool' for better checks!
+EOF
+ else
+ cat <<EOF
+WARNING: Cannot check databases since neither
+ 'tdbdump' nor 'tdbtool check' is available.
+ Consider installing tdbtool or at least tdbdump!
+EOF
+ return 1
+ fi
+}
+
+check_tdb ()
+{
+ _db="$1"
+
+ if $use_tdb_tool_check ; then
+ # tdbtool always exits with 0 :-(
+ if timeout 10 tdbtool "$_db" check 2>/dev/null |
+ grep -q "Database integrity is OK" ; then
+ return 0
+ else
+ return 1
+ fi
+ else
+ timeout 10 tdbdump "$_db" >/dev/null 2>/dev/null
+ return $?
+ fi
+}
+
+check_persistent_databases ()
+{
+ _dir="${CTDB_DBDIR_PERSISTENT:-${CTDB_VARDIR}/persistent}"
+ [ -d "$_dir" ] || return 0
+
+ for _db in "$_dir/"*.tdb.*[0-9] ; do
+ [ -r "$_db" ] || continue
+ check_tdb "$_db" || \
+ die "Persistent database $_db is corrupted! CTDB will not start."
+ done
+}
+
+check_non_persistent_databases ()
+{
+ _dir="${CTDB_DBDIR:-${CTDB_VARDIR}}"
+ [ -d "$_dir" ] || return 0
+
+ for _db in "${_dir}/"*.tdb.*[0-9] ; do
+ [ -r "$_db" ] || continue
+ check_tdb "$_db" || {
+ _backup="${_db}.$(date +'%Y%m%d.%H%M%S').corrupt"
+ cat <<EOF
+WARNING: database ${_db} is corrupted.
+ Moving to backup ${_backup} for later analysis.
+EOF
+ mv "$_db" "$_backup"
+
+ # Now remove excess backups
+ _max="${CTDB_MAX_CORRUPT_DB_BACKUPS:-10}"
+ _bdb="${_db##*/}" # basename
+ find "$_dir" -name "${_bdb}.*.corrupt" |
+ sort -r |
+ tail -n +$((_max + 1)) |
+ xargs rm -f
+ }
+ done
+}
+
+############################################################
+
+ctdb_check_args "$@"
+
+case "$1" in
+init)
+ # make sure we have a blank state directory for the scripts to work with
+ rm -rf "$CTDB_SCRIPT_VARDIR"
+ mkdir -p "$CTDB_SCRIPT_VARDIR" || \
+ die "mkdir -p ${CTDB_SCRIPT_VARDIR} - failed - $?" $?
+
+ # Load/cache database options from configuration file
+ ctdb_get_db_options
+
+ if select_tdb_checker ; then
+ check_persistent_databases || exit $?
+ check_non_persistent_databases
+ fi
+ ;;
+
+startup)
+ $CTDB attach ctdb.tdb persistent
+ ;;
+esac
+
+# all OK
+exit 0
diff --git a/ctdb/config/events/legacy/01.reclock.script b/ctdb/config/events/legacy/01.reclock.script
new file mode 100755
index 0000000..0406875
--- /dev/null
+++ b/ctdb/config/events/legacy/01.reclock.script
@@ -0,0 +1,34 @@
+#!/bin/sh
+# script to check accessibility to the reclock file on a node
+
+[ -n "$CTDB_BASE" ] || \
+ CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD")
+
+. "${CTDB_BASE}/functions"
+
+case "$1" in
+init)
+ recovery_lock=$("${CTDB_HELPER_BINDIR}/ctdb-config" \
+ get cluster "recovery lock")
+ # xshellcheck disable=SC2181
+ # Above is already complicated enough without embedding into "if"
+ case $? in
+ 0) : ;;
+ 2) exit 0 ;; # ENOENT: not configured
+ *) die "Unexpected error getting recovery lock configuration"
+ esac
+
+ if [ -z "$recovery_lock" ] ; then
+ exit 0
+ fi
+
+ # If a helper is specified then exit because this script can't
+ # do anything useful
+ case "$recovery_lock" in
+ !*) exit 0 ;;
+ esac
+
+ d=$(dirname "$recovery_lock")
+ mkdir -p "$d"
+ ;;
+esac
diff --git a/ctdb/config/events/legacy/05.system.script b/ctdb/config/events/legacy/05.system.script
new file mode 100755
index 0000000..bf36dd2
--- /dev/null
+++ b/ctdb/config/events/legacy/05.system.script
@@ -0,0 +1,198 @@
+#!/bin/sh
+# ctdb event script for checking local file system utilization
+
+[ -n "$CTDB_BASE" ] ||
+ CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD")
+
+. "${CTDB_BASE}/functions"
+
+load_script_options
+
+ctdb_setup_state_dir "service" "system-monitoring"
+
+validate_percentage()
+{
+ case "$1" in
+ "") return 1 ;; # A failure that doesn't need a warning
+ [0-9] | [0-9][0-9] | 100) return 0 ;;
+ *)
+ echo "WARNING: ${1} is an invalid percentage in \"${2}\" check"
+ return 1
+ ;;
+ esac
+}
+
+check_thresholds()
+{
+ _thing="$1"
+ _thresholds="$2"
+ _usage="$3"
+ _unhealthy_callout="$4"
+
+ case "$_thresholds" in
+ *:*)
+ _warn_threshold="${_thresholds%:*}"
+ _unhealthy_threshold="${_thresholds#*:}"
+ ;;
+ *)
+ _warn_threshold="$_thresholds"
+ _unhealthy_threshold=""
+ ;;
+ esac
+
+ _t=$(echo "$_thing" | sed -e 's@/@SLASH_@g' -e 's@ @_@g')
+ # script_state_dir set by ctdb_setup_state_dir()
+ # shellcheck disable=SC2154
+ _cache="${script_state_dir}/cache_${_t}"
+ if [ -r "$_cache" ]; then
+ read -r _prev <"$_cache"
+ else
+ _prev=0
+ fi
+ if validate_percentage "$_unhealthy_threshold" "$_thing"; then
+ if [ "$_usage" -ge "$_unhealthy_threshold" ]; then
+ printf 'ERROR: %s utilization %d%% >= threshold %d%%\n' \
+ "$_thing" \
+ "$_usage" \
+ "$_unhealthy_threshold"
+ # Only run unhealthy callout if passing the
+ # unhealthy threshold. That is, if the
+ # previous usage was below the threshold.
+ if [ "$_prev" -lt "$_unhealthy_threshold" ]; then
+ eval "$_unhealthy_callout"
+ fi
+ echo "$_usage" >"$_cache"
+ exit 1
+ fi
+ fi
+
+ if validate_percentage "$_warn_threshold" "$_thing"; then
+ if [ "$_usage" -ge "$_warn_threshold" ]; then
+ if [ "$_usage" = "$_prev" ]; then
+ return
+ fi
+ printf 'WARNING: %s utilization %d%% >= threshold %d%%\n' \
+ "$_thing" \
+ "$_usage" \
+ "$_warn_threshold"
+ echo "$_usage" >"$_cache"
+ else
+ if [ ! -r "$_cache" ]; then
+ return
+ fi
+ printf 'NOTICE: %s utilization %d%% < threshold %d%%\n' \
+ "$_thing" \
+ "$_usage" \
+ "$_warn_threshold"
+ rm -f "$_cache"
+ fi
+ fi
+}
+
+set_monitor_filsystem_usage_defaults()
+{
+ _fs_defaults_cache="${script_state_dir}/cache_filsystem_usage_defaults"
+
+ if [ ! -r "$_fs_defaults_cache" ]; then
+ # Determine filesystem for each database directory, generate
+ # an entry to warn at 90%, de-duplicate entries, put all items
+ # on 1 line (so the read below gets everything)
+ for _t in "${CTDB_DBDIR:-${CTDB_VARDIR}}" \
+ "${CTDB_DBDIR_PERSISTENT:-${CTDB_VARDIR}/persistent}" \
+ "${CTDB_DBDIR_STATE:-${CTDB_VARDIR}/state}"; do
+ df -kP "$_t" | awk 'NR == 2 { printf "%s:90\n", $6 }'
+ done | sort -u | xargs >"$_fs_defaults_cache"
+ fi
+
+ read -r CTDB_MONITOR_FILESYSTEM_USAGE <"$_fs_defaults_cache"
+}
+
+monitor_filesystem_usage()
+{
+ if [ -z "$CTDB_MONITOR_FILESYSTEM_USAGE" ]; then
+ set_monitor_filsystem_usage_defaults
+ fi
+
+ # Check each specified filesystem, specified in format
+ # <fs_mount>:<fs_warn_threshold>[:fs_unhealthy_threshold]
+ for _fs in $CTDB_MONITOR_FILESYSTEM_USAGE; do
+ _fs_mount="${_fs%%:*}"
+ _fs_thresholds="${_fs#*:}"
+
+ if [ ! -d "$_fs_mount" ]; then
+ echo "WARNING: Directory ${_fs_mount} does not exist"
+ continue
+ fi
+
+ # Get current utilization
+ _fs_usage=$(df -kP "$_fs_mount" |
+ sed -n -e 's@.*[[:space:]]\([[:digit:]]*\)%.*@\1@p')
+ if [ -z "$_fs_usage" ]; then
+ printf 'WARNING: Unable to get FS utilization for %s\n' \
+ "$_fs_mount"
+ continue
+ fi
+
+ check_thresholds "Filesystem ${_fs_mount}" \
+ "$_fs_thresholds" \
+ "$_fs_usage"
+ done
+}
+
+# shellcheck disable=SC2317
+# Called indirectly via check_thresholds()
+dump_memory_info()
+{
+ get_proc "meminfo"
+ ps auxfww
+ set_proc "sysrq-trigger" "m"
+}
+
+monitor_memory_usage()
+{
+ # Defaults
+ if [ -z "$CTDB_MONITOR_MEMORY_USAGE" ]; then
+ CTDB_MONITOR_MEMORY_USAGE=80
+ fi
+
+ _meminfo=$(get_proc "meminfo")
+ # Intentional word splitting here
+ # shellcheck disable=SC2046
+ set -- $(echo "$_meminfo" | awk '
+$1 == "MemAvailable:" { memavail += $2 }
+$1 == "MemFree:" { memfree += $2 }
+$1 == "Cached:" { memfree += $2 }
+$1 == "Buffers:" { memfree += $2 }
+$1 == "MemTotal:" { memtotal = $2 }
+$1 == "SwapFree:" { swapfree = $2 }
+$1 == "SwapTotal:" { swaptotal = $2 }
+END {
+ if (memavail != 0) { memfree = memavail ; }
+ if (memtotal + swaptotal != 0) {
+ usedtotal = memtotal - memfree + swaptotal - swapfree
+ print int(usedtotal / (memtotal + swaptotal) * 100)
+ } else {
+ print 0
+ }
+}')
+ _mem_usage="$1"
+
+ check_thresholds "System memory" \
+ "$CTDB_MONITOR_MEMORY_USAGE" \
+ "$_mem_usage" \
+ dump_memory_info
+}
+
+case "$1" in
+monitor)
+ # Load/cache database options from configuration file
+ ctdb_get_db_options
+
+ rc=0
+ monitor_filesystem_usage || rc=$?
+ monitor_memory_usage || rc=$?
+ exit $rc
+ ;;
+esac
+
+exit 0
diff --git a/ctdb/config/events/legacy/06.nfs.script b/ctdb/config/events/legacy/06.nfs.script
new file mode 100755
index 0000000..b937d43
--- /dev/null
+++ b/ctdb/config/events/legacy/06.nfs.script
@@ -0,0 +1,39 @@
+#!/bin/sh
+# script to manage nfs in a clustered environment
+
+[ -n "$CTDB_BASE" ] || \
+ CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD")
+
+. "${CTDB_BASE}/functions"
+
+service_name="nfs"
+
+load_script_options "service" "60.nfs"
+
+ctdb_setup_state_dir "service" "$service_name"
+
+######################################################################
+
+nfs_callout_pre ()
+{
+ _event="$1"
+ shift
+
+ nfs_callout "${_event}-pre" "$@"
+}
+
+######################################################################
+
+# script_state_dir set by ctdb_setup_state_dir()
+# shellcheck disable=SC2154
+nfs_callout_init "$script_state_dir"
+
+case "$1" in
+takeip)
+ nfs_callout_pre "$@"
+ ;;
+
+releaseip)
+ nfs_callout_pre "$@"
+ ;;
+esac
diff --git a/ctdb/config/events/legacy/10.interface.script b/ctdb/config/events/legacy/10.interface.script
new file mode 100755
index 0000000..fead88c
--- /dev/null
+++ b/ctdb/config/events/legacy/10.interface.script
@@ -0,0 +1,262 @@
+#!/bin/sh
+
+#################################
+# interface event script for ctdb
+# this adds/removes IPs from your
+# public interface
+
+[ -n "$CTDB_BASE" ] || \
+ CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD")
+
+. "${CTDB_BASE}/functions"
+
+load_script_options
+
+ctdb_public_addresses="${CTDB_BASE}/public_addresses"
+
+if [ ! -f "$ctdb_public_addresses" ]; then
+ if [ "$1" = "init" ] ; then
+ echo "No public addresses file found"
+ fi
+ exit 0
+fi
+
+# This sets $all_interfaces as a side-effect.
+get_all_interfaces ()
+{
+ # Get all the interfaces listed in the public_addresses file
+ all_interfaces=$(sed -e '/^#.*/d' \
+ -e 's/^[^\t ]*[\t ]*//' \
+ -e 's/,/ /g' \
+ -e 's/[\t ]*$//' "$ctdb_public_addresses")
+
+ # Get the interfaces for which CTDB has public IPs configured.
+ # That is, for all but the 1st line, get the 1st field.
+ ctdb_ifaces=$($CTDB -X ifaces | sed -e '1d' -e 's@^|@@' -e 's@|.*@@')
+
+ # Add $ctdb_ifaces and make $all_interfaces unique
+ # Use word splitting to squash whitespace
+ # shellcheck disable=SC2086
+ all_interfaces=$(echo $all_interfaces $ctdb_ifaces | tr ' ' '\n' | sort -u)
+}
+
+monitor_interfaces()
+{
+ get_all_interfaces
+
+ down_interfaces_found=false
+ up_interfaces_found=false
+
+ # Note that this loop must not exit early. It must process
+ # all interfaces so that the correct state for each interface
+ # is set in CTDB using setifacelink.
+ for _iface in $all_interfaces ; do
+ if interface_monitor "$_iface" ; then
+ up_interfaces_found=true
+ $CTDB setifacelink "$_iface" up >/dev/null 2>&1
+ else
+ down_interfaces_found=true
+ $CTDB setifacelink "$_iface" down >/dev/null 2>&1
+ fi
+ done
+
+ if ! $down_interfaces_found ; then
+ return 0
+ fi
+
+ if ! $up_interfaces_found ; then
+ return 1
+ fi
+
+ if [ "$CTDB_PARTIALLY_ONLINE_INTERFACES" != "yes" ]; then
+ return 1
+ fi
+
+ return 0
+}
+
+# Sets: iface, ip, maskbits
+get_iface_ip_maskbits ()
+{
+ _iface_in="$1"
+ ip="$2"
+ _maskbits_in="$3"
+
+ # Intentional word splitting here
+ # shellcheck disable=SC2046
+ set -- $(ip_maskbits_iface "$ip")
+ if [ -n "$1" ] ; then
+ maskbits="$1"
+ iface="$2"
+
+ if [ "$iface" != "$_iface_in" ] ; then
+ printf \
+ 'WARNING: Public IP %s hosted on interface %s but VNN says %s\n' \
+ "$ip" "$iface" "$_iface_in"
+ fi
+ if [ "$maskbits" != "$_maskbits_in" ] ; then
+ printf \
+ 'WARNING: Public IP %s has %s bit netmask but VNN says %s\n' \
+ "$ip" "$maskbits" "$_maskbits_in"
+ fi
+ else
+ die "ERROR: Unable to determine interface for IP ${ip}"
+ fi
+}
+
+ip_block ()
+{
+ _ip="$1"
+ _iface="$2"
+
+ case "$_ip" in
+ *:*) _family="inet6" ;;
+ *) _family="inet" ;;
+ esac
+
+ # Extra delete copes with previously killed script
+ iptables_wrapper "$_family" \
+ -D INPUT -i "$_iface" -d "$_ip" -j DROP 2>/dev/null
+ iptables_wrapper "$_family" \
+ -I INPUT -i "$_iface" -d "$_ip" -j DROP
+}
+
+ip_unblock ()
+{
+ _ip="$1"
+ _iface="$2"
+
+ case "$_ip" in
+ *:*) _family="inet6" ;;
+ *) _family="inet" ;;
+ esac
+
+ iptables_wrapper "$_family" \
+ -D INPUT -i "$_iface" -d "$_ip" -j DROP 2>/dev/null
+}
+
+ctdb_check_args "$@"
+
+case "$1" in
+init)
+ # make sure that we only respond to ARP messages from the NIC where
+ # a particular ip address is associated.
+ get_proc sys/net/ipv4/conf/all/arp_filter >/dev/null 2>&1 && {
+ set_proc sys/net/ipv4/conf/all/arp_filter 1
+ }
+
+ _promote="sys/net/ipv4/conf/all/promote_secondaries"
+ get_proc "$_promote" >/dev/null 2>&1 || \
+ die "Public IPs only supported if promote_secondaries is available"
+
+ # make sure we drop any ips that might still be held if
+ # previous instance of ctdb got killed with -9 or similar
+ drop_all_public_ips
+ ;;
+
+startup)
+ monitor_interfaces
+ ;;
+
+shutdown)
+ drop_all_public_ips
+ ;;
+
+takeip)
+ iface=$2
+ ip=$3
+ maskbits=$4
+
+ add_ip_to_iface "$iface" "$ip" "$maskbits" || {
+ exit 1;
+ }
+
+ # In case a previous "releaseip" for this IP was killed...
+ ip_unblock "$ip" "$iface"
+
+ flush_route_cache
+ ;;
+
+releaseip)
+ # releasing an IP is a bit more complex than it seems. Once the IP
+ # is released, any open tcp connections to that IP on this host will end
+ # up being stuck. Some of them (such as NFS connections) will be unkillable
+ # so we need to use the killtcp ctdb function to kill them off. We also
+ # need to make sure that no new connections get established while we are
+ # doing this! So what we do is this:
+ # 1) firewall this IP, so no new external packets arrive for it
+ # 2) find existing connections, and kill them
+ # 3) remove the IP from the interface
+ # 4) remove the firewall rule
+ shift
+ get_iface_ip_maskbits "$@"
+
+ ip_block "$ip" "$iface"
+
+ kill_tcp_connections "$iface" "$ip"
+
+ delete_ip_from_iface "$iface" "$ip" "$maskbits" || {
+ ip_unblock "$ip" "$iface"
+ exit 1
+ }
+
+ ip_unblock "$ip" "$iface"
+
+ flush_route_cache
+ ;;
+
+updateip)
+ # moving an IP is a bit more complex than it seems.
+ # First we drop all traffic on the old interface.
+ # Then we try to add the ip to the new interface and before
+ # we finally remove it from the old interface.
+ #
+ # 1) firewall this IP, so no new external packets arrive for it
+ # 2) remove the IP from the old interface (and new interface, to be sure)
+ # 3) add the IP to the new interface
+ # 4) remove the firewall rule
+ # 5) use ctdb gratarp to propagate the new mac address
+ # 6) use netstat -tn to find existing connections, and tickle them
+ _oiface=$2
+ niface=$3
+ _ip=$4
+ _maskbits=$5
+
+ get_iface_ip_maskbits "$_oiface" "$_ip" "$_maskbits"
+ oiface="$iface"
+
+ # Could check maskbits too. However, that should never change
+ # so we want to notice if it does.
+ if [ "$oiface" = "$niface" ] ; then
+ echo "Redundant \"updateip\" - ${ip} already on ${niface}"
+ exit 0
+ fi
+
+ ip_block "$ip" "$oiface"
+
+ delete_ip_from_iface "$oiface" "$ip" "$maskbits" 2>/dev/null
+ delete_ip_from_iface "$niface" "$ip" "$maskbits" 2>/dev/null
+
+ add_ip_to_iface "$niface" "$ip" "$maskbits" || {
+ ip_unblock "$ip" "$oiface"
+ exit 1
+ }
+
+ ip_unblock "$ip" "$oiface"
+
+ flush_route_cache
+
+ # propagate the new mac address
+ $CTDB gratarp "$ip" "$niface"
+
+ # tickle all existing connections, so that dropped packets
+ # are retransmitted and the tcp streams work
+ tickle_tcp_connections "$ip"
+ ;;
+
+monitor)
+ monitor_interfaces || exit 1
+ ;;
+esac
+
+exit 0
diff --git a/ctdb/config/events/legacy/11.natgw.script b/ctdb/config/events/legacy/11.natgw.script
new file mode 100755
index 0000000..fb93dea
--- /dev/null
+++ b/ctdb/config/events/legacy/11.natgw.script
@@ -0,0 +1,242 @@
+#!/bin/sh
+# Script to set up one of the nodes as a NAT gateway for all other nodes.
+# This is used to ensure that all nodes in the cluster can still originate
+# traffic to the external network even if there are no public addresses
+# available.
+#
+
+[ -n "$CTDB_BASE" ] || \
+ CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD")
+
+. "${CTDB_BASE}/functions"
+
+service_name="natgw"
+
+load_script_options
+
+[ -n "$CTDB_NATGW_NODES" ] || exit 0
+export CTDB_NATGW_NODES
+
+ctdb_setup_state_dir "failover" "$service_name"
+
+# script_state_dir set by ctdb_setup_state_dir()
+# shellcheck disable=SC2154
+natgw_cfg_new="${script_state_dir}/cfg_new"
+natgw_cfg_old="${script_state_dir}/cfg_old"
+natgw_leader_old="${script_state_dir}/leader_old"
+
+ctdb_natgw_follower_only ()
+{
+ _ip_address=$(ctdb_get_ip_address)
+
+ awk -v my_ip="$_ip_address" \
+ '$1 == my_ip { if ($2 ~ "follower-only") { exit 0 } else { exit 1 } }' \
+ "$CTDB_NATGW_NODES"
+}
+
+natgw_check_config ()
+{
+ [ -r "$CTDB_NATGW_NODES" ] || \
+ die "error: CTDB_NATGW_NODES=${CTDB_NATGW_NODES} unreadable"
+ if ! ctdb_natgw_follower_only ; then
+ [ -n "$CTDB_NATGW_PUBLIC_IP" ] || \
+ die "Invalid configuration: CTDB_NATGW_PUBLIC_IP not set"
+ [ -n "$CTDB_NATGW_PUBLIC_IFACE" ] || \
+ die "Invalid configuration: CTDB_NATGW_PUBLIC_IFACE not set"
+ fi
+ [ -n "$CTDB_NATGW_PRIVATE_NETWORK" ] || \
+ die "Invalid configuration: CTDB_NATGW_PRIVATE_NETWORK not set"
+
+ # The default is to create a single default route
+ [ -n "$CTDB_NATGW_STATIC_ROUTES" ] || CTDB_NATGW_STATIC_ROUTES="0.0.0.0/0"
+}
+
+natgw_write_config ()
+{
+ _f="$1"
+
+ cat >"$_f" <<EOF
+CTDB_NATGW_NODES="$CTDB_NATGW_NODES"
+CTDB_NATGW_PUBLIC_IP="$CTDB_NATGW_PUBLIC_IP"
+CTDB_NATGW_PUBLIC_IFACE="$CTDB_NATGW_PUBLIC_IFACE"
+CTDB_NATGW_DEFAULT_GATEWAY="$CTDB_NATGW_DEFAULT_GATEWAY"
+CTDB_NATGW_PRIVATE_NETWORK="$CTDB_NATGW_PRIVATE_NETWORK"
+CTDB_NATGW_STATIC_ROUTES="$CTDB_NATGW_STATIC_ROUTES"
+EOF
+}
+
+natgw_config_has_changed ()
+{
+ natgw_write_config "$natgw_cfg_new"
+
+ # Non-existent old returns true, no log message
+ if [ ! -f "$natgw_cfg_old" ] ; then
+ return 0
+ fi
+
+ # Handle no change
+ if cmp "$natgw_cfg_old" "$natgw_cfg_new" >/dev/null 2>&1 ; then
+ return 1
+ fi
+
+ echo "NAT gateway configuration has changed"
+ return 0
+}
+
+_natgw_clear ()
+{
+ _ip="${CTDB_NATGW_PUBLIC_IP%/*}"
+ _maskbits="${CTDB_NATGW_PUBLIC_IP#*/}"
+
+ delete_ip_from_iface \
+ "$CTDB_NATGW_PUBLIC_IFACE" "$_ip" "$_maskbits" >/dev/null 2>&1
+ for _net_gw in $CTDB_NATGW_STATIC_ROUTES ; do
+ _net="${_net_gw%@*}"
+ ip route del "$_net" metric 10 >/dev/null 2>/dev/null
+ done
+
+ # Delete the masquerading setup from a previous iteration where we
+ # were the NAT-GW
+ iptables -D POSTROUTING -t nat \
+ -s "$CTDB_NATGW_PRIVATE_NETWORK" ! -d "$CTDB_NATGW_PRIVATE_NETWORK" \
+ -j MASQUERADE >/dev/null 2>/dev/null
+
+ iptables -D INPUT -p tcp --syn -d "${_ip}/32" -j REJECT 2>/dev/null
+}
+
+natgw_clear ()
+{
+ if [ -r "$natgw_cfg_old" ] ; then
+ (. "$natgw_cfg_old" ; _natgw_clear)
+ else
+ _natgw_clear
+ fi
+}
+
+natgw_set_leader ()
+{
+ set_proc sys/net/ipv4/ip_forward 1
+ iptables -A POSTROUTING -t nat \
+ -s "$CTDB_NATGW_PRIVATE_NETWORK" ! -d "$CTDB_NATGW_PRIVATE_NETWORK" \
+ -j MASQUERADE
+
+ # block all incoming connections to the NATGW IP address
+ ctdb_natgw_public_ip_host="${CTDB_NATGW_PUBLIC_IP%/*}/32"
+ iptables -D INPUT -p tcp --syn \
+ -d "$ctdb_natgw_public_ip_host" -j REJECT 2>/dev/null
+ iptables -I INPUT -p tcp --syn \
+ -d "$ctdb_natgw_public_ip_host" -j REJECT 2>/dev/null
+
+ ip addr add "$CTDB_NATGW_PUBLIC_IP" dev "$CTDB_NATGW_PUBLIC_IFACE"
+ for _net_gw in $CTDB_NATGW_STATIC_ROUTES ; do
+ _net="${_net_gw%@*}"
+ if [ "$_net" != "$_net_gw" ] ; then
+ _gw="${_net_gw#*@}"
+ else
+ _gw="$CTDB_NATGW_DEFAULT_GATEWAY"
+ fi
+
+ [ -n "$_gw" ] || continue
+ ip route add "$_net" metric 10 via "$_gw"
+ done
+}
+
+natgw_set_follower ()
+{
+ _natgwip="$1"
+
+ for _net_gw in $CTDB_NATGW_STATIC_ROUTES ; do
+ _net="${_net_gw%@*}"
+ ip route add "$_net" via "$_natgwip" metric 10
+ done
+}
+
+natgw_ensure_leader ()
+{
+ # Intentional word splitting here
+ # shellcheck disable=SC2046
+ set -- $("${CTDB_HELPER_BINDIR}/ctdb_natgw" leader)
+ natgwleader="${1:--1}" # Default is -1, for failure above
+ natgwip="$2"
+
+ if [ "$natgwleader" = "-1" ]; then
+ # Fail...
+ die "There is no NATGW leader node"
+ fi
+}
+
+natgw_leader_has_changed ()
+{
+ if [ -r "$natgw_leader_old" ] ; then
+ read _old_natgwleader <"$natgw_leader_old"
+ else
+ _old_natgwleader=""
+ fi
+ [ "$_old_natgwleader" != "$natgwleader" ]
+}
+
+natgw_save_state ()
+{
+ echo "$natgwleader" >"$natgw_leader_old"
+ # Created by natgw_config_has_changed()
+ mv "$natgw_cfg_new" "$natgw_cfg_old"
+}
+
+
+case "$1" in
+setup)
+ natgw_check_config
+ ;;
+
+startup)
+ natgw_check_config
+
+ # Error if CTDB_NATGW_PUBLIC_IP is listed in public addresses
+ ip_pat=$(echo "$CTDB_NATGW_PUBLIC_IP" | sed -e 's@\.@\\.@g')
+ ctdb_public_addresses="${CTDB_BASE}/public_addresses"
+ if grep -q "^${ip_pat}[[:space:]]" "$ctdb_public_addresses" ; then
+ die "ERROR: CTDB_NATGW_PUBLIC_IP same as a public address"
+ fi
+
+ # do not send out arp requests from loopback addresses
+ set_proc sys/net/ipv4/conf/all/arp_announce 2
+ ;;
+
+updatenatgw|ipreallocated)
+ natgw_check_config
+
+ natgw_ensure_leader
+
+ natgw_config_has_changed || natgw_leader_has_changed || exit 0
+
+ natgw_clear
+
+ pnn=$(ctdb_get_pnn)
+ if [ "$pnn" = "$natgwleader" ]; then
+ natgw_set_leader
+ else
+ natgw_set_follower "$natgwip"
+ fi
+
+ # flush our route cache
+ set_proc sys/net/ipv4/route/flush 1
+
+ # Only update saved state when NATGW successfully updated
+ natgw_save_state
+ ;;
+
+shutdown|removenatgw)
+ natgw_check_config
+ natgw_clear
+ ;;
+
+monitor)
+ natgw_check_config
+
+ if [ -n "$CTDB_NATGW_PUBLIC_IFACE" ] ; then
+ interface_monitor "$CTDB_NATGW_PUBLIC_IFACE" || exit 1
+ fi
+ ;;
+esac
+
+exit 0
diff --git a/ctdb/config/events/legacy/11.routing.script b/ctdb/config/events/legacy/11.routing.script
new file mode 100755
index 0000000..7ba7f3b
--- /dev/null
+++ b/ctdb/config/events/legacy/11.routing.script
@@ -0,0 +1,49 @@
+#!/bin/sh
+
+# Attempt to add a set of static routes.
+#
+# Do this in "ipreallocated" rather than just "startup" because some
+# of the routes might be missing because the corresponding interface
+# has not previously had any IPs assigned or IPs were previously
+# released and corresponding routes were dropped.
+#
+# Addition of some routes might fail, errors go to /dev/null.
+#
+# Routes to add are defined in $CTDB_BASE/static-routes. Syntax is:
+#
+# IFACE NET/MASK GATEWAY
+#
+# Example:
+#
+# bond1 10.3.3.0/24 10.0.0.1
+
+[ -n "$CTDB_BASE" ] || \
+ CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD")
+
+. "${CTDB_BASE}/functions"
+
+load_script_options
+
+[ -f "${CTDB_BASE}/static-routes" ] || {
+ exit 0
+}
+
+case "$1" in
+ipreallocated)
+ while read iface dest gw; do
+ ip route add "$dest" via "$gw" dev "$iface" >/dev/null 2>&1
+ done <"${CTDB_BASE}/static-routes"
+ ;;
+
+updateip)
+ oiface=$2
+ niface=$3
+ while read iface dest gw; do
+ if [ "$niface" = "$iface" ] || [ "$oiface" = "$iface" ] ; then
+ ip route add "$dest" via "$gw" dev "$iface" >/dev/null 2>&1
+ fi
+ done <"${CTDB_BASE}/static-routes"
+ ;;
+esac
+
+exit 0
diff --git a/ctdb/config/events/legacy/13.per_ip_routing.script b/ctdb/config/events/legacy/13.per_ip_routing.script
new file mode 100755
index 0000000..d7949c6
--- /dev/null
+++ b/ctdb/config/events/legacy/13.per_ip_routing.script
@@ -0,0 +1,438 @@
+#!/bin/sh
+
+[ -n "$CTDB_BASE" ] || \
+ CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD")
+
+. "${CTDB_BASE}/functions"
+
+load_script_options
+
+service_name="per_ip_routing"
+
+# Do nothing if unconfigured
+[ -n "$CTDB_PER_IP_ROUTING_CONF" ] || exit 0
+
+table_id_prefix="ctdb."
+
+[ -n "$CTDB_PER_IP_ROUTING_RULE_PREF" ] || \
+ die "error: CTDB_PER_IP_ROUTING_RULE_PREF not configured"
+
+[ "$CTDB_PER_IP_ROUTING_TABLE_ID_LOW" -lt "$CTDB_PER_IP_ROUTING_TABLE_ID_HIGH" ] 2>/dev/null || \
+ die "error: CTDB_PER_IP_ROUTING_TABLE_ID_LOW[$CTDB_PER_IP_ROUTING_TABLE_ID_LOW] and/or CTDB_PER_IP_ROUTING_TABLE_ID_HIGH[$CTDB_PER_IP_ROUTING_TABLE_ID_HIGH] improperly configured"
+
+if [ "$CTDB_PER_IP_ROUTING_TABLE_ID_LOW" -le 253 ] && \
+ [ 255 -le "$CTDB_PER_IP_ROUTING_TABLE_ID_HIGH" ] ; then
+ die "error: range CTDB_PER_IP_ROUTING_TABLE_ID_LOW[$CTDB_PER_IP_ROUTING_TABLE_ID_LOW]..CTDB_PER_IP_ROUTING_TABLE_ID_HIGH[$CTDB_PER_IP_ROUTING_TABLE_ID_HIGH] must not include 253-255"
+fi
+
+have_link_local_config ()
+{
+ [ "$CTDB_PER_IP_ROUTING_CONF" = "__auto_link_local__" ]
+}
+
+if ! have_link_local_config && [ ! -r "$CTDB_PER_IP_ROUTING_CONF" ] ; then
+ die "error: CTDB_PER_IP_ROUTING_CONF=$CTDB_PER_IP_ROUTING_CONF file not found"
+fi
+
+ctdb_setup_state_dir "failover" "$service_name"
+
+######################################################################
+
+ipv4_is_valid_addr()
+{
+ _ip="$1"
+
+ _count=0
+ # Get the shell to break up the address into 1 word per octet
+ # Intentional word splitting here
+ # shellcheck disable=SC2086
+ for _o in $(export IFS="." ; echo $_ip) ; do
+ # The 2>/dev/null stops output from failures where an "octet"
+ # is not numeric. The test will still fail.
+ if ! [ 0 -le $_o ] && [ $_o -le 255 ] 2>/dev/null ; then
+ return 1
+ fi
+ _count=$((_count + 1))
+ done
+
+ # A valid IPv4 address has 4 octets
+ [ $_count -eq 4 ]
+}
+
+ensure_ipv4_is_valid_addr ()
+{
+ _event="$1"
+ _ip="$2"
+
+ ipv4_is_valid_addr "$_ip" || {
+ echo "$0: $_event not an ipv4 address skipping IP:$_ip"
+ exit 0
+ }
+}
+
+ipv4_host_addr_to_net ()
+{
+ _host="$1"
+ _maskbits="$2"
+
+ # Convert the host address to an unsigned long by splitting out
+ # the octets and doing the math.
+ _host_ul=0
+ # Intentional word splitting here
+ # shellcheck disable=SC2086
+ for _o in $(export IFS="." ; echo $_host) ; do
+ _host_ul=$(( (_host_ul << 8) + _o)) # work around Emacs color bug
+ done
+
+ # Calculate the mask and apply it.
+ _mask_ul=$(( 0xffffffff << (32 - _maskbits) ))
+ _net_ul=$(( _host_ul & _mask_ul ))
+
+ # Now convert to a network address one byte at a time.
+ _net=""
+ for _o in $(seq 1 4) ; do
+ _net="$((_net_ul & 255))${_net:+.}${_net}"
+ _net_ul=$((_net_ul >> 8))
+ done
+
+ echo "${_net}/${_maskbits}"
+}
+
+######################################################################
+
+ensure_rt_tables ()
+{
+ rt_tables="$CTDB_SYS_ETCDIR/iproute2/rt_tables"
+ # script_state_dir set by ctdb_setup_state_dir()
+ # shellcheck disable=SC2154
+ rt_tables_lock="${script_state_dir}/rt_tables_lock"
+
+ # This file should always exist. Even if this didn't exist on the
+ # system, adding a route will have created it. What if we startup
+ # and immediately shutdown? Let's be sure.
+ if [ ! -f "$rt_tables" ] ; then
+ mkdir -p "${rt_tables%/*}" # dirname
+ touch "$rt_tables"
+ fi
+}
+
+# Setup a table id to use for the given IP. We don't need to know it,
+# it just needs to exist in /etc/iproute2/rt_tables. Fail if no free
+# table id could be found in the configured range.
+ensure_table_id_for_ip ()
+{
+ _ip=$1
+
+ ensure_rt_tables
+
+ # Maintain a table id for each IP address we've ever seen in
+ # rt_tables. We use a "ctdb." prefix on the label.
+ _label="${table_id_prefix}${_ip}"
+
+ # This finds either the table id corresponding to the label or a
+ # new unused one (that is greater than all the used ones in the
+ # range).
+ (
+ # Note that die() just gets us out of the subshell...
+ flock --timeout 30 9 || \
+ die "ensure_table_id_for_ip: failed to lock file $rt_tables"
+
+ _new="$CTDB_PER_IP_ROUTING_TABLE_ID_LOW"
+ while read _t _l ; do
+ # Skip comments
+ case "$_t" in
+ \#*) continue ;;
+ esac
+ # Found existing: done
+ if [ "$_l" = "$_label" ] ; then
+ return 0
+ fi
+ # Potentially update the new table id to be used. The
+ # redirect stops error spam for a non-numeric value.
+ if [ "$_new" -le "$_t" ] && \
+ [ "$_t" -le "$CTDB_PER_IP_ROUTING_TABLE_ID_HIGH" ] \
+ 2>/dev/null ; then
+ _new=$((_t + 1))
+ fi
+ done <"$rt_tables"
+
+ # If the new table id is legal then add it to the file and
+ # print it.
+ if [ "$_new" -le "$CTDB_PER_IP_ROUTING_TABLE_ID_HIGH" ] ; then
+ printf '%d\t%s\n' "$_new" "$_label" >>"$rt_tables"
+ return 0
+ else
+ return 1
+ fi
+ ) 9>"$rt_tables_lock"
+}
+
+# Clean up all the table ids that we might own.
+clean_up_table_ids ()
+{
+ ensure_rt_tables
+
+ (
+ # Note that die() just gets us out of the subshell...
+ flock --timeout 30 9 || \
+ die "clean_up_table_ids: failed to lock file $rt_tables"
+
+ # Delete any items from the file that have a table id in our
+ # range or a label matching our label. Preserve comments.
+ _tmp="${rt_tables}.$$.ctdb"
+ awk -v min="$CTDB_PER_IP_ROUTING_TABLE_ID_LOW" \
+ -v max="$CTDB_PER_IP_ROUTING_TABLE_ID_HIGH" \
+ -v pre="$table_id_prefix" \
+ '/^#/ ||
+ !(min <= $1 && $1 <= max) &&
+ !(index($2, pre) == 1) {
+ print $0 }' "$rt_tables" >"$_tmp"
+
+ mv "$_tmp" "$rt_tables"
+ ) 9>"$rt_tables_lock"
+}
+
+######################################################################
+
+# This prints the config for an IP, which is either relevant entries
+# from the config file or, if set to the magic link local value, some
+# link local routing config for the IP.
+get_config_for_ip ()
+{
+ _ip="$1"
+
+ if have_link_local_config ; then
+ # When parsing public_addresses also split on '/'. This means
+ # that we get the maskbits as item #2 without further parsing.
+ while IFS="/$IFS" read _i _maskbits _x ; do
+ if [ "$_ip" = "$_i" ] ; then
+ printf "%s" "$_ip "; ipv4_host_addr_to_net "$_ip" "$_maskbits"
+ fi
+ done <"${CTDB_BASE}/public_addresses"
+ else
+ while read _i _rest ; do
+ if [ "$_ip" = "$_i" ] ; then
+ printf '%s\t%s\n' "$_ip" "$_rest"
+ fi
+ done <"$CTDB_PER_IP_ROUTING_CONF"
+ fi
+}
+
+ip_has_configuration ()
+{
+ _ip="$1"
+
+ _conf=$(get_config_for_ip "$_ip")
+ [ -n "$_conf" ]
+}
+
+add_routing_for_ip ()
+{
+ _iface="$1"
+ _ip="$2"
+
+ # Do nothing if no config for this IP.
+ ip_has_configuration "$_ip" || return 0
+
+ ensure_table_id_for_ip "$_ip" || \
+ die "add_routing_for_ip: out of table ids in range $CTDB_PER_IP_ROUTING_TABLE_ID_LOW - $CTDB_PER_IP_ROUTING_TABLE_ID_HIGH"
+
+ _pref="$CTDB_PER_IP_ROUTING_RULE_PREF"
+ _table_id="${table_id_prefix}${_ip}"
+
+ del_routing_for_ip "$_ip" >/dev/null 2>&1
+
+ ip rule add from "$_ip" pref "$_pref" table "$_table_id" || \
+ die "add_routing_for_ip: failed to add rule for $_ip"
+
+ # Add routes to table for any lines matching the IP.
+ get_config_for_ip "$_ip" |
+ while read _i _dest _gw ; do
+ _r="$_dest ${_gw:+via} $_gw dev $_iface table $_table_id"
+ # Intentionally unquoted multi-word value here
+ # shellcheck disable=SC2086
+ ip route add $_r || \
+ die "add_routing_for_ip: failed to add route: $_r"
+ done
+}
+
+del_routing_for_ip ()
+{
+ _ip="$1"
+
+ _pref="$CTDB_PER_IP_ROUTING_RULE_PREF"
+ _table_id="${table_id_prefix}${_ip}"
+
+ # Do this unconditionally since we own any matching table ids.
+ # However, print a meaningful message if something goes wrong.
+ _cmd="ip rule del from $_ip pref $_pref table $_table_id"
+ _out=$($_cmd 2>&1) || \
+ cat <<EOF
+WARNING: Failed to delete policy routing rule
+ Command "$_cmd" failed:
+ $_out
+EOF
+ # This should never usually fail, so don't redirect output.
+ # However, it can fail when deleting a rogue IP, since there will
+ # be no routes for that IP. In this case it should only fail when
+ # the rule deletion above has already failed because the table id
+ # is invalid. Therefore, go to a little bit of trouble to indent
+ # the failure message so that it is associated with the above
+ # warning message and doesn't look too nasty.
+ ip route flush table "$_table_id" 2>&1 | sed -e 's@^.@ &@'
+}
+
+######################################################################
+
+flush_rules_and_routes ()
+{
+ ip rule show |
+ while read _p _x _i _x _t ; do
+ # Remove trailing colon after priority/preference.
+ _p="${_p%:}"
+ # Only remove rules that match our priority/preference.
+ [ "$CTDB_PER_IP_ROUTING_RULE_PREF" = "$_p" ] || continue
+
+ echo "Removing ip rule for public address $_i for routing table $_t"
+ ip rule del from "$_i" table "$_t" pref "$_p"
+ ip route flush table "$_t" 2>/dev/null
+ done
+}
+
+# Add any missing routes. Some might have gone missing if, for
+# example, all IPs on the network were removed (possibly if the
+# primary was removed). If $1 is "force" then (re-)add all the
+# routes.
+add_missing_routes ()
+{
+ $CTDB ip -v -X | {
+ read _x # skip header line
+
+ # Read the rest of the lines. We're only interested in the
+ # "IP" and "ActiveInterface" columns. The latter is only set
+ # for addresses local to this node, making it easy to skip
+ # non-local addresses. For each IP local address we check if
+ # the relevant routing table is populated and populate it if
+ # not.
+ while IFS="|" read _x _ip _x _iface _x ; do
+ [ -n "$_iface" ] || continue
+
+ _table_id="${table_id_prefix}${_ip}"
+ if [ -z "$(ip route show table "$_table_id" 2>/dev/null)" ] || \
+ [ "$1" = "force" ] ; then
+ add_routing_for_ip "$_iface" "$_ip"
+ fi
+ done
+ } || exit $?
+}
+
+# Remove rules/routes for addresses that we're not hosting. If a
+# releaseip event failed in an earlier script then we might not have
+# had a chance to remove the corresponding rules/routes.
+remove_bogus_routes ()
+{
+ # Get a IPs current hosted by this node, each anchored with '@'.
+ _ips=$($CTDB ip -v -X | awk -F'|' 'NR > 1 && $4 != "" {printf "@%s@\n", $2}')
+
+ # x is intentionally ignored
+ # shellcheck disable=SC2034
+ ip rule show |
+ while read _p _x _i _x _t ; do
+ # Remove trailing colon after priority/preference.
+ _p="${_p%:}"
+ # Only remove rules that match our priority/preference.
+ [ "$CTDB_PER_IP_ROUTING_RULE_PREF" = "$_p" ] || continue
+ # Only remove rules for which we don't have an IP. This could
+ # be done with grep, but let's do it with shell prefix removal
+ # to avoid unnecessary processes. This falls through if
+ # "@${_i}@" isn't present in $_ips.
+ [ "$_ips" = "${_ips#*@"${_i}"@}" ] || continue
+
+ echo "Removing ip rule/routes for unhosted public address $_i"
+ del_routing_for_ip "$_i"
+ done
+}
+
+######################################################################
+
+ctdb_check_args "$@"
+
+case "$1" in
+startup)
+ flush_rules_and_routes
+
+ # make sure that we only respond to ARP messages from the NIC
+ # where a particular ip address is associated.
+ get_proc sys/net/ipv4/conf/all/arp_filter >/dev/null 2>&1 && {
+ set_proc sys/net/ipv4/conf/all/arp_filter 1
+ }
+ ;;
+
+shutdown)
+ flush_rules_and_routes
+ clean_up_table_ids
+ ;;
+
+takeip)
+ iface=$2
+ ip=$3
+ # maskbits included here so argument order is obvious
+ # shellcheck disable=SC2034
+ maskbits=$4
+
+ ensure_ipv4_is_valid_addr "$1" "$ip"
+ add_routing_for_ip "$iface" "$ip"
+
+ # flush our route cache
+ set_proc sys/net/ipv4/route/flush 1
+
+ $CTDB gratarp "$ip" "$iface"
+ ;;
+
+updateip)
+ # oiface, maskbits included here so argument order is obvious
+ # shellcheck disable=SC2034
+ oiface=$2
+ niface=$3
+ ip=$4
+ # shellcheck disable=SC2034
+ maskbits=$5
+
+ ensure_ipv4_is_valid_addr "$1" "$ip"
+ add_routing_for_ip "$niface" "$ip"
+
+ # flush our route cache
+ set_proc sys/net/ipv4/route/flush 1
+
+ $CTDB gratarp "$ip" "$niface"
+ tickle_tcp_connections "$ip"
+ ;;
+
+releaseip)
+ iface=$2
+ ip=$3
+ # maskbits included here so argument order is obvious
+ # shellcheck disable=SC2034
+ maskbits=$4
+
+ ensure_ipv4_is_valid_addr "$1" "$ip"
+ del_routing_for_ip "$ip"
+ ;;
+
+ipreallocated)
+ add_missing_routes
+ remove_bogus_routes
+ ;;
+
+reconfigure)
+ echo "Reconfiguring service \"${service_name}\"..."
+
+ add_missing_routes "force"
+ remove_bogus_routes
+
+ # flush our route cache
+ set_proc sys/net/ipv4/route/flush 1
+ ;;
+esac
+
+exit 0
diff --git a/ctdb/config/events/legacy/20.multipathd.script b/ctdb/config/events/legacy/20.multipathd.script
new file mode 100755
index 0000000..a420251
--- /dev/null
+++ b/ctdb/config/events/legacy/20.multipathd.script
@@ -0,0 +1,83 @@
+#!/bin/sh
+# ctdb event script for monitoring the multipath daemon
+#
+# Configure monitporing of multipath devices by listing the device serials
+# in /etc/ctdb/multipathd :
+# CTDB_MONITOR_MPDEVICES="device1 device2 ..."
+#
+
+[ -n "$CTDB_BASE" ] || \
+ CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD")
+
+. "${CTDB_BASE}/functions"
+
+service_name="multipathd"
+
+load_script_options
+
+[ -n "$CTDB_MONITOR_MPDEVICES" ] || exit 0
+
+ctdb_setup_state_dir "service" "$service_name"
+
+# script_state_dir set by ctdb_setup_state_dir()
+# shellcheck disable=SC2154
+multipath_fail="${script_state_dir}/fail"
+
+multipathd_check_background()
+{
+ for _device in $CTDB_MONITOR_MPDEVICES; do
+ # Check multipath knows about the device
+ _out=$(multipath -ll "$_device")
+ if [ -z "$_out" ] ; then
+ echo "ERROR: device \"${_device}\" not known to multipathd" \
+ >"$multipath_fail"
+ exit 1
+ fi
+
+ # Check for at least 1 active path
+ if ! echo "$_out" | grep 'prio=.* status=active' >/dev/null 2>&1 ; then
+ echo "ERROR: multipath device \"${_device}\" has no active paths" \
+ >"$multipath_fail"
+ exit 1
+ fi
+ done
+ exit 0
+}
+
+multipathd_check()
+{
+ # Run the actual check in the background since the call to
+ # multipath may block
+ multipathd_check_background </dev/null >/dev/null 2>&1 &
+ _pid="$!"
+ _timeleft=10
+
+ while [ $_timeleft -gt 0 ]; do
+ _timeleft=$((_timeleft - 1))
+
+ # see if the process still exists
+ kill -0 $_pid >/dev/null 2>&1 || {
+ if wait $_pid ; then
+ return 0
+ else
+ cat "$multipath_fail"
+ rm -f "$multipath_fail"
+ return 1
+ fi
+ }
+ sleep 1
+ done
+
+ echo "ERROR: callout to multipath checks hung"
+ # If hung then this probably won't work, but worth trying...
+ kill -9 $_pid >/dev/null 2>&1
+ return 1
+}
+
+case "$1" in
+monitor)
+ multipathd_check || die "multipath monitoring failed"
+ ;;
+esac
+
+exit 0
diff --git a/ctdb/config/events/legacy/31.clamd.script b/ctdb/config/events/legacy/31.clamd.script
new file mode 100755
index 0000000..5d60fe3
--- /dev/null
+++ b/ctdb/config/events/legacy/31.clamd.script
@@ -0,0 +1,37 @@
+#!/bin/sh
+# event script to manage clamd in a cluster environment
+
+[ -n "$CTDB_BASE" ] || \
+ CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD")
+
+. "${CTDB_BASE}/functions"
+
+detect_init_style
+
+case $CTDB_INIT_STYLE in
+redhat)
+ service_name="clamd"
+ ;;
+*)
+ service_name="clamav"
+ ;;
+esac
+
+load_script_options
+
+case "$1" in
+startup)
+ service "$service_name" stop > /dev/null 2>&1
+ service "$service_name" start || exit $?
+ ;;
+
+shutdown)
+ service "$service_name"_stop
+ ;;
+
+monitor)
+ ctdb_check_unix_socket "$CTDB_CLAMD_SOCKET" || exit $?
+ ;;
+esac
+
+exit 0
diff --git a/ctdb/config/events/legacy/40.vsftpd.script b/ctdb/config/events/legacy/40.vsftpd.script
new file mode 100755
index 0000000..2d2aac4
--- /dev/null
+++ b/ctdb/config/events/legacy/40.vsftpd.script
@@ -0,0 +1,57 @@
+#!/bin/sh
+# event strict to manage vsftpd in a cluster environment
+
+[ -n "$CTDB_BASE" ] || \
+ CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD")
+
+. "${CTDB_BASE}/functions"
+
+service_name="vsftpd"
+
+service_reconfigure ()
+{
+ # shellcheck disable=SC2317
+ # Called indirectly via ctdb_service_reconfigure()
+ service "$service_name" restart
+}
+
+load_script_options
+
+ctdb_setup_state_dir "service" "$service_name"
+
+port_21="vsftpd listening on TCP port 21"
+
+case "$1" in
+startup)
+ service "$service_name" stop > /dev/null 2>&1
+ service "$service_name" start
+ failcount_init "$port_21"
+ ;;
+
+shutdown)
+ service "$service_name" stop
+ ;;
+
+takeip|releaseip)
+ ctdb_service_set_reconfigure
+ ;;
+
+ipreallocated)
+ if ctdb_service_needs_reconfigure ; then
+ ctdb_service_reconfigure
+ fi
+ ;;
+
+monitor)
+ if ctdb_check_tcp_ports 21 ; then
+ failcount_reset "$port_21"
+ else
+ # Set defaults, if unset
+ : "${CTDB_VSFTPD_MONITOR_THRESHOLDS:=1:2}"
+
+ failcount_incr "$port_21" "$CTDB_VSFTPD_MONITOR_THRESHOLDS"
+ fi
+ ;;
+esac
+
+exit 0
diff --git a/ctdb/config/events/legacy/41.httpd.script b/ctdb/config/events/legacy/41.httpd.script
new file mode 100755
index 0000000..dd90aed
--- /dev/null
+++ b/ctdb/config/events/legacy/41.httpd.script
@@ -0,0 +1,78 @@
+#!/bin/sh
+# event script to manage httpd in a cluster environment
+
+[ -n "$CTDB_BASE" ] || \
+ CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD")
+
+. "${CTDB_BASE}/functions"
+
+detect_init_style
+
+case $CTDB_INIT_STYLE in
+redhat)
+ service_name="httpd"
+ ;;
+suse|debian|*)
+ service_name="apache2"
+ ;;
+esac
+
+load_script_options
+
+ctdb_setup_state_dir "service" "$service_name"
+
+# RHEL5 sometimes use a SIGKILL to terminate httpd, which then leaks
+# semaphores. This is a hack to clean them up.
+cleanup_httpd_semaphore_leak() {
+ killall -q -0 "$service_name" ||
+ for i in $(ipcs -s | awk '$3 == "apache" { print $2 }') ; do
+ ipcrm -s "$i"
+ done
+}
+
+##########
+
+service_start ()
+{
+ cleanup_httpd_semaphore_leak
+ service $service_name start
+}
+service_stop ()
+{
+ service $service_name stop
+ killall -q -9 $service_name || true
+}
+
+case "$1" in
+startup)
+ service_start
+ ctdb_counter_init
+ ;;
+
+shutdown)
+ service_stop
+ ;;
+
+monitor)
+ if ctdb_check_tcp_ports 80 >/dev/null 2>/dev/null ; then
+ ctdb_counter_init
+ else
+ ctdb_counter_incr
+ num_fails=$(ctdb_counter_get)
+ if [ "$num_fails" -eq 2 ] ; then
+ echo "HTTPD is not running. Trying to restart HTTPD."
+ service_stop
+ service_start
+ exit 0
+ elif [ "$num_fails" -ge 5 ] ; then
+ echo "HTTPD is not running. Trying to restart HTTPD."
+ service_stop
+ service_start
+ exit 1
+ fi
+ fi
+ ;;
+esac
+
+exit 0
+
diff --git a/ctdb/config/events/legacy/47.samba-dcerpcd.script b/ctdb/config/events/legacy/47.samba-dcerpcd.script
new file mode 100755
index 0000000..9492d55
--- /dev/null
+++ b/ctdb/config/events/legacy/47.samba-dcerpcd.script
@@ -0,0 +1,66 @@
+#!/bin/sh
+# ctdb event script for SAMBA DCERPCD Services
+
+[ -n "$CTDB_BASE" ] || \
+ CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD")
+
+. "${CTDB_BASE}/functions"
+
+detect_init_style
+
+case $CTDB_INIT_STYLE in
+ *)
+ # distributions don't have this yet,
+ # but assume samba-dcerpcd as service name
+ CTDB_SERVICE_SAMBA_DCERPCD=${CTDB_SERVICE_SAMBA_DCERPCD:-samba-dcerpcd}
+ ;;
+esac
+
+load_script_options
+
+service_start ()
+{
+ # make sure samba-dcerpcd is not already started
+ service "$CTDB_SERVICE_SAMBA_DCERPCD" stop > /dev/null 2>&1
+ killall -0 -q samba-dcerpcd && {
+ sleep 1
+ # make absolutely sure samba-dcerpcd is dead
+ killall -q -9 samba-dcerpcd
+ }
+
+ # start Samba dcerpcd service. Start it reniced, as under very heavy load
+ # the number of smbd processes will mean that it leaves few cycles
+ # for anything else
+ nice_service "$CTDB_SERVICE_SAMBA_DCERPCD" start || die "Failed to start samba-dcerpcd"
+}
+
+service_stop ()
+{
+ service "$CTDB_SERVICE_SAMBA_DCERPCD" stop
+}
+
+service_status ()
+{
+ service "$CTDB_SERVICE_SAMBA_DCERPCD" status > /dev/null
+ test $? = 0 && return 0
+ service "$CTDB_SERVICE_SAMBA_DCERPCD" status
+}
+
+###########################
+
+case "$1" in
+startup)
+ service_start
+ ;;
+
+shutdown)
+ service_stop
+ ;;
+
+monitor)
+ service_status
+ ;;
+
+esac
+
+exit 0
diff --git a/ctdb/config/events/legacy/48.netbios.script b/ctdb/config/events/legacy/48.netbios.script
new file mode 100755
index 0000000..1531e49
--- /dev/null
+++ b/ctdb/config/events/legacy/48.netbios.script
@@ -0,0 +1,75 @@
+#!/bin/sh
+# ctdb event script for Netbios Name Services
+
+[ -n "$CTDB_BASE" ] || \
+ CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD")
+
+. "${CTDB_BASE}/functions"
+
+detect_init_style
+
+case $CTDB_INIT_STYLE in
+ suse)
+ CTDB_SERVICE_NMB=${CTDB_SERVICE_NMB:-nmb}
+ ;;
+ debian)
+ CTDB_SERVICE_NMB=${CTDB_SERVICE_NMB:-nmbd}
+ ;;
+ *)
+ # Use redhat style as default:
+ CTDB_SERVICE_NMB=${CTDB_SERVICE_NMB:-nmb}
+ ;;
+esac
+
+service_name="netbios"
+
+load_script_options
+
+ctdb_setup_state_dir "service" "$service_name"
+
+service_start ()
+{
+ # make sure nmbd is not already started
+ service "$CTDB_SERVICE_NMB" stop > /dev/null 2>&1
+ killall -0 -q nmbd && {
+ sleep 1
+ # make absolutely sure nmbd is dead
+ killall -q -9 nmbd
+ }
+
+ # start Samba nmbd service. Start it reniced, as under very heavy load
+ # the number of smbd processes will mean that it leaves few cycles
+ # for anything else
+ nice_service "$CTDB_SERVICE_NMB" start || die "Failed to start nmbd"
+}
+
+service_stop ()
+{
+ service "$CTDB_SERVICE_NMB" stop
+}
+
+service_status ()
+{
+ service "$CTDB_SERVICE_NMB" status > /dev/null
+ test $? = 0 && return 0
+ service "$CTDB_SERVICE_NMB" status
+}
+
+###########################
+
+case "$1" in
+startup)
+ service_start
+ ;;
+
+shutdown)
+ service_stop
+ ;;
+
+monitor)
+ service_status
+ ;;
+
+esac
+
+exit 0
diff --git a/ctdb/config/events/legacy/49.winbind.script b/ctdb/config/events/legacy/49.winbind.script
new file mode 100755
index 0000000..852b541
--- /dev/null
+++ b/ctdb/config/events/legacy/49.winbind.script
@@ -0,0 +1,55 @@
+#!/bin/sh
+# ctdb event script for winbind
+
+[ -n "$CTDB_BASE" ] || \
+ CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD")
+
+. "${CTDB_BASE}/functions"
+
+CTDB_SERVICE_WINBIND=${CTDB_SERVICE_WINBIND:-winbind}
+
+# service_name is used by various functions
+# shellcheck disable=SC2034
+service_name="winbind"
+
+load_script_options
+
+service_start ()
+{
+ service "$CTDB_SERVICE_WINBIND" stop >/dev/null 2>&1
+ killall -0 -q winbindd && {
+ sleep 1
+ # make absolutely sure winbindd is dead
+ killall -q -9 winbindd
+ }
+
+ service "$CTDB_SERVICE_WINBIND" start || \
+ die "Failed to start winbind"
+}
+
+service_stop ()
+{
+ service "$CTDB_SERVICE_WINBIND" stop
+}
+
+###########################
+
+case "$1" in
+startup)
+ service_start
+ ;;
+
+shutdown)
+ service_stop
+ ;;
+
+monitor)
+ if ! out=$(wbinfo -p 2>&1) ; then
+ echo "ERROR: wbinfo -p returned error"
+ echo "$out"
+ exit 1
+ fi
+ ;;
+esac
+
+exit 0
diff --git a/ctdb/config/events/legacy/50.samba.script b/ctdb/config/events/legacy/50.samba.script
new file mode 100755
index 0000000..84600e2
--- /dev/null
+++ b/ctdb/config/events/legacy/50.samba.script
@@ -0,0 +1,166 @@
+#!/bin/sh
+# ctdb event script for Samba
+
+[ -n "$CTDB_BASE" ] ||
+ CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD")
+
+. "${CTDB_BASE}/functions"
+
+detect_init_style
+
+case $CTDB_INIT_STYLE in
+suse)
+ CTDB_SERVICE_SMB=${CTDB_SERVICE_SMB:-smb}
+ ;;
+debian)
+ CTDB_SERVICE_SMB=${CTDB_SERVICE_SMB:-smbd}
+ ;;
+*)
+ # Use redhat style as default:
+ CTDB_SERVICE_SMB=${CTDB_SERVICE_SMB:-smb}
+ ;;
+esac
+
+service_name="samba"
+
+load_script_options
+
+ctdb_setup_state_dir "service" "$service_name"
+
+service_start()
+{
+ # make sure samba is not already started
+ service "$CTDB_SERVICE_SMB" stop >/dev/null 2>&1
+ killall -0 -q smbd && {
+ sleep 1
+ # make absolutely sure samba is dead
+ killall -q -9 smbd
+ }
+ # start Samba service. Start it reniced, as under very heavy load
+ # the number of smbd processes will mean that it leaves few cycles
+ # for anything else
+ nice_service "$CTDB_SERVICE_SMB" start || die "Failed to start samba"
+}
+
+service_stop()
+{
+ service "$CTDB_SERVICE_SMB" stop
+ program_stack_traces "smbd" 5
+}
+
+######################################################################
+# Show the testparm output using a cached smb.conf to avoid delays due
+# to registry access.
+
+# script_state_dir set by ctdb_setup_state_dir()
+# shellcheck disable=SC2154
+smbconf_cache="$script_state_dir/smb.conf.cache"
+
+testparm_foreground_update()
+{
+ _timeout="$1"
+
+ # No need to remove these temporary files, since there are only 2
+ # of them.
+ _out="${smbconf_cache}.out"
+ _err="${smbconf_cache}.err"
+
+ timeout "$_timeout" testparm -v -s >"$_out" 2>"$_err"
+ case $? in
+ 0) : ;;
+ 124)
+ if [ -f "$smbconf_cache" ]; then
+ echo "WARNING: smb.conf cache update timed out - using old cache file"
+ return 1
+ else
+ echo "ERROR: smb.conf cache create failed - testparm command timed out"
+ exit 1
+ fi
+ ;;
+ *)
+ if [ -f "$smbconf_cache" ]; then
+ echo "WARNING: smb.conf cache update failed - using old cache file"
+ cat "$_err"
+ return 1
+ else
+ echo "ERROR: smb.conf cache create failed - testparm failed with:"
+ cat "$_err"
+ exit 1
+ fi
+ ;;
+ esac
+
+ # Only using $$ here to avoid a collision. This is written into
+ # CTDB's own state directory so there is no real need for a secure
+ # temporary file.
+ _tmpfile="${smbconf_cache}.$$"
+ # Patterns to exclude...
+ _pat='^[[:space:]]+(registry[[:space:]]+shares|include|copy|winbind[[:space:]]+separator)[[:space:]]+='
+ grep -Ev "$_pat" <"$_out" >"$_tmpfile"
+ mv "$_tmpfile" "$smbconf_cache" # atomic
+
+ return 0
+}
+
+testparm_background_update()
+{
+ _timeout="$1"
+
+ testparm_foreground_update "$_timeout" >/dev/null 2>&1 </dev/null &
+}
+
+testparm_get ()
+{
+ _param="$1"
+
+ sed -n \
+ -e "s|^[[:space:]]*${_param}[[:space:]]*=[[:space:]]\(..*\)|\1|p" \
+ "$smbconf_cache"
+
+}
+
+list_samba_shares()
+{
+ testparm_get "path" | sed -e 's/"//g'
+}
+
+list_samba_ports()
+{
+ testparm_get "smb ports"
+}
+
+###########################
+
+case "$1" in
+startup)
+ service_start
+ ;;
+
+shutdown)
+ service_stop
+ ;;
+
+monitor)
+ testparm_foreground_update 10
+ ret=$?
+
+ smb_ports="$CTDB_SAMBA_CHECK_PORTS"
+ if [ -z "$smb_ports" ]; then
+ smb_ports=$(list_samba_ports)
+ [ -n "$smb_ports" ] || die "Failed to set smb ports"
+ fi
+ # Intentionally unquoted multi-word value here
+ # shellcheck disable=SC2086
+ ctdb_check_tcp_ports $smb_ports || exit $?
+
+ if [ "$CTDB_SAMBA_SKIP_SHARE_CHECK" != "yes" ]; then
+ list_samba_shares | ctdb_check_directories || exit $?
+ fi
+
+ if [ $ret -ne 0 ]; then
+ testparm_background_update 10
+ fi
+ ;;
+esac
+
+exit 0
diff --git a/ctdb/config/events/legacy/60.nfs.script b/ctdb/config/events/legacy/60.nfs.script
new file mode 100755
index 0000000..b7ae074
--- /dev/null
+++ b/ctdb/config/events/legacy/60.nfs.script
@@ -0,0 +1,301 @@
+#!/bin/sh
+# script to manage nfs in a clustered environment
+
+[ -n "$CTDB_BASE" ] || \
+ CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD")
+
+. "${CTDB_BASE}/functions"
+
+service_name="nfs"
+
+load_system_config "nfs"
+
+load_script_options
+
+ctdb_setup_state_dir "service" "$service_name"
+
+######################################################################
+
+service_reconfigure ()
+{
+ # Restart lock manager, notify clients
+ # shellcheck disable=SC2317
+ # Called indirectly via check_thresholds()
+ if [ -x "${CTDB_BASE}/statd-callout" ] ; then
+ "${CTDB_BASE}/statd-callout" notify &
+ fi >/dev/null 2>&1
+}
+
+######################################################################
+
+######################################################
+# Check the health of NFS services
+#
+# Use .check files in $CTDB_NFS_CHECKS_DIR.
+# Default is "${CTDB_BASE}/nfs-checks.d/"
+######################################################
+nfs_check_services ()
+{
+ _dir="${CTDB_NFS_CHECKS_DIR:-${CTDB_BASE}/nfs-checks.d}"
+
+ # Files must end with .check - avoids editor backups, RPM fu, ...
+ for _f in "$_dir"/[0-9][0-9].*.check ; do
+ [ -r "$_f" ] || continue
+
+ _t="${_f%.check}"
+ _progname="${_t##*/[0-9][0-9].}"
+
+ nfs_check_service "$_progname" <"$_f"
+ done
+}
+
+######################################################
+# Check the health of an NFS service
+#
+# $1 - progname, passed to rpcinfo (looked up in /etc/rpc)
+#
+# Reads variables from stdin
+#
+# Variables are:
+#
+# * family - "tcp" or "udp" or space separated list
+# default: tcp, not used with "service_check_cmd"
+# * version - optional, RPC service version number
+# default is to omit to check for any version,
+# not used with "service_check_cmd"
+# * unhealthy_after - number of check fails before unhealthy
+# default: 1
+# * restart_every - number of check fails before restart
+# default: 0, meaning no restart
+# * service_stop_cmd - command to stop service
+# default: no default, must be provided if
+# restart_every > 0
+# * service_start_cmd - command to start service
+# default: no default, must be provided if
+# restart_every > 0
+# * service_check_cmd - command to check health of service
+# default is to check RPC service using rpcinfo
+# * service_debug_cmd - command to debug a service after trying to stop it;
+# for example, it can be useful to print stack
+# traces of threads that have not exited, since
+# they may be stuck doing I/O;
+# no default, see also function program_stack_traces()
+#
+# Quoting in values is not preserved
+#
+######################################################
+nfs_check_service ()
+{
+ _progname="$1"
+
+ # This sub-shell is created to intentionally limit the scope of
+ # variable values read from the .check files.
+ # shellcheck disable=SC2030
+ (
+ # Subshell to restrict scope variables...
+
+ # Defaults
+ family="tcp"
+ version=""
+ unhealthy_after=1
+ restart_every=0
+ service_stop_cmd=""
+ service_start_cmd=""
+ service_check_cmd=""
+ service_debug_cmd=""
+
+ # Eval line-by-line. Expands variable references in values.
+ # Also allows variable name checking, which seems useful.
+ while read _line ; do
+ case "$_line" in
+ \#*|"") : ;; # Ignore comments, blank lines
+
+ family=*|version=*|\
+ unhealthy_after=*|restart_every=*|\
+ service_stop_cmd=*|service_start_cmd=*|\
+ service_check_cmd=*|service_debug_cmd=*)
+
+ eval "$_line"
+ ;;
+ *)
+ echo "ERROR: Unknown variable for ${_progname}: ${_line}"
+ exit 1
+ esac
+ done
+
+ _ok=false
+ if [ -n "$service_check_cmd" ] ; then
+ # Using eval means variables can contain semicolon separated commands
+ if eval "$service_check_cmd" ; then
+ _ok=true
+ else
+ _err="monitoring service \"${_progname}\" failed"
+ fi
+ else
+ if nfs_check_rpcinfo \
+ "$_progname" "$version" "$family" >/dev/null ; then
+ _ok=true
+ else
+ _err="$ctdb_check_rpc_out"
+ fi
+ fi
+
+ if $_ok ; then
+ if [ $unhealthy_after -ne 1 ] || [ $restart_every -ne 0 ] ; then
+ ctdb_counter_init "$_progname"
+ fi
+ exit 0
+ fi
+
+ ctdb_counter_incr "$_progname"
+ _failcount=$(ctdb_counter_get "$_progname")
+
+ _unhealthy=false
+ if [ "$unhealthy_after" -gt 0 ] ; then
+ if [ "$_failcount" -ge "$unhealthy_after" ] ; then
+ _unhealthy=true
+ echo "ERROR: $_err"
+ fi
+ fi
+
+ if [ "$restart_every" -gt 0 ] ; then
+ if [ $((_failcount % restart_every)) -eq 0 ] ; then
+ if ! $_unhealthy ; then
+ echo "WARNING: $_err"
+ fi
+ nfs_restart_service
+ fi
+ fi
+
+ if $_unhealthy ; then
+ exit 1
+ fi
+
+ return 0
+ ) || exit 1
+}
+
+# Uses: service_stop_cmd, service_start_cmd, service_debug_cmd
+# This function is called within the sub-shell that shellcheck thinks
+# loses the above variable values.
+# shellcheck disable=SC2031
+nfs_restart_service ()
+{
+ if [ -z "$service_stop_cmd" ] || [ -z "$service_start_cmd" ] ; then
+ die "ERROR: Can not restart service \"${_progname}\" without corresponding service_start_cmd/service_stop_cmd settings"
+ fi
+
+ echo "Trying to restart service \"${_progname}\"..."
+ # Using eval means variables can contain semicolon separated commands
+ eval "$service_stop_cmd"
+ if [ -n "$service_debug_cmd" ] ; then
+ eval "$service_debug_cmd"
+ fi
+ background_with_logging eval "$service_start_cmd"
+}
+
+######################################################
+# Check an RPC service with rpcinfo
+######################################################
+ctdb_check_rpc ()
+{
+ _progname="$1" # passed to rpcinfo (looked up in /etc/rpc)
+ _version="$2" # optional, not passed if empty/unset
+ _family="${3:-tcp}" # optional, default is "tcp"
+
+ case "$_family" in
+ tcp6|udp6)
+ _localhost="${CTDB_RPCINFO_LOCALHOST6:-::1}"
+ ;;
+ *)
+ _localhost="${CTDB_RPCINFO_LOCALHOST:-127.0.0.1}"
+ esac
+
+ # $_version is not quoted because it is optional
+ # shellcheck disable=SC2086
+ if ! ctdb_check_rpc_out=$(rpcinfo -T "$_family" "$_localhost" \
+ "$_progname" $_version 2>&1) ; then
+ ctdb_check_rpc_out="$_progname failed RPC check:
+$ctdb_check_rpc_out"
+ echo "$ctdb_check_rpc_out"
+ return 1
+ fi
+}
+
+nfs_check_rpcinfo ()
+{
+ _progname="$1" # passed to rpcinfo (looked up in /etc/rpc)
+ _versions="$2" # optional, space separated, not passed if empty/unset
+ _families="${3:-tcp}" # optional, space separated, default is "tcp"
+
+ for _family in $_families ; do
+ if [ -n "$_versions" ] ; then
+ for _version in $_versions ; do
+ ctdb_check_rpc "$_progname" "$_version" "$_family" || return $?
+ done
+ else
+ ctdb_check_rpc "$_progname" "" "$_family" || return $?
+ fi
+ done
+}
+
+##################################################################
+# use statd-callout to update NFS lock info
+##################################################################
+nfs_update_lock_info ()
+{
+ if [ -x "$CTDB_BASE/statd-callout" ] ; then
+ "$CTDB_BASE/statd-callout" update
+ fi
+}
+
+######################################################################
+
+# script_state_dir set by ctdb_setup_state_dir()
+# shellcheck disable=SC2154
+nfs_callout_init "$script_state_dir"
+
+case "$1" in
+startup)
+ nfs_callout "$@" || exit $?
+ ;;
+
+shutdown)
+ nfs_callout "$@" || exit $?
+ ;;
+
+takeip)
+ nfs_callout "$@" || exit $?
+ ctdb_service_set_reconfigure
+ ;;
+
+releaseip)
+ nfs_callout "$@" || exit $?
+ ctdb_service_set_reconfigure
+ ;;
+
+ipreallocated)
+ if ctdb_service_needs_reconfigure ; then
+ ctdb_service_reconfigure
+ fi
+ ;;
+
+monitor)
+ nfs_callout "monitor-pre" || exit $?
+
+ # Check that directories for shares actually exist
+ if [ "$CTDB_NFS_SKIP_SHARE_CHECK" != "yes" ] ; then
+ nfs_callout "monitor-list-shares" | ctdb_check_directories || \
+ exit $?
+ fi
+
+ update_tickles 2049
+ nfs_update_lock_info
+
+ nfs_check_services
+
+ nfs_callout "monitor-post" || exit $?
+ ;;
+esac
+
+exit 0
diff --git a/ctdb/config/events/legacy/70.iscsi.script b/ctdb/config/events/legacy/70.iscsi.script
new file mode 100755
index 0000000..e74651d
--- /dev/null
+++ b/ctdb/config/events/legacy/70.iscsi.script
@@ -0,0 +1,87 @@
+#!/bin/sh
+
+# CTDB event script for TGTD based iSCSI
+
+[ -n "$CTDB_BASE" ] || \
+ CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD")
+
+. "${CTDB_BASE}/functions"
+
+# service_name is used by various functions
+# shellcheck disable=SC2034
+service_name="iscsi"
+
+load_script_options
+
+[ -z "$CTDB_START_ISCSI_SCRIPTS" ] && {
+ echo "No iscsi start script directory found"
+ exit 0
+}
+
+case "$1" in
+ipreallocated)
+ all_ips=$($CTDB -X ip | tail -n +2)
+
+ # Block the iSCSI port. Only block for the address families
+ # we have configured. This copes with, for example, ip6tables
+ # being unavailable on an IPv4-only system.
+ have_ipv4=false
+ have_ipv6=false
+ # x is intentionally ignored
+ # shellcheck disable=SC2034
+ while IFS='|' read x ip pnn x ; do
+ case "$ip" in
+ *:*) have_ipv6=true ;;
+ *) have_ipv4=true ;;
+ esac
+ done <<EOF
+$all_ips
+EOF
+ if $have_ipv4 ; then
+ iptables -I INPUT 1 -p tcp --dport 3260 -j DROP
+ fi
+ if $have_ipv6 ; then
+ ip6tables -I INPUT 1 -p tcp --dport 3260 -j DROP
+ fi
+
+ # Stop iSCSI daemon
+ killall -9 tgtd >/dev/null 2>/dev/null
+
+ pnn=$(ctdb_get_pnn)
+ [ -n "$pnn" ] || die "Failed to get node pnn"
+
+ # Start iSCSI daemon
+ tgtd >/dev/null 2>&1
+
+ # Run a script for each currently hosted public IP address
+ ips=$(echo "$all_ips" | awk -F'|' -v pnn="$pnn" '$3 == pnn {print $2}')
+ for ip in $ips ; do
+ script="${CTDB_START_ISCSI_SCRIPTS}/${ip}.sh"
+ if [ -x "$script" ] ; then
+ echo "Starting iSCSI service for public address ${ip}"
+ "$script"
+ fi
+ done
+
+ # Unblock iSCSI port. These can be unconditional (compared to
+ # blocking above), since errors are redirected.
+ while iptables -D INPUT -p tcp --dport 3260 -j DROP >/dev/null 2>&1 ; do
+ :
+ done
+ while ip6tables -D INPUT -p tcp --dport 3260 -j DROP >/dev/null 2>&1 ; do
+ :
+ done
+
+ ;;
+
+shutdown)
+ # Shutdown iSCSI daemon when ctdb goes down
+ killall -9 tgtd >/dev/null 2>&1
+ ;;
+
+monitor)
+ ctdb_check_tcp_ports 3260 || exit $?
+ ;;
+esac
+
+exit 0
diff --git a/ctdb/config/events/legacy/91.lvs.script b/ctdb/config/events/legacy/91.lvs.script
new file mode 100755
index 0000000..8855068
--- /dev/null
+++ b/ctdb/config/events/legacy/91.lvs.script
@@ -0,0 +1,124 @@
+#!/bin/sh
+# script to manage the lvs ip multiplexer for a single public address cluster
+
+[ -n "$CTDB_BASE" ] || \
+ CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD")
+
+. "${CTDB_BASE}/functions"
+
+load_script_options
+
+[ -n "$CTDB_LVS_NODES" ] || exit 0
+export CTDB_LVS_NODES
+
+# type is commonly supported and more portable than which(1)
+# shellcheck disable=SC2039
+if ! type ipvsadm >/dev/null 2>&1 ; then
+ echo "LVS configured but ipvsadm not found"
+ exit 0
+fi
+
+
+lvs_follower_only ()
+{
+ _ip_address=$(ctdb_get_ip_address)
+ awk -v my_ip="$_ip_address" \
+ '$1 == my_ip { if ($2 ~ "follower-only") { exit 0 } else { exit 1 } }' \
+ "$CTDB_LVS_NODES"
+}
+
+lvs_check_config ()
+{
+ [ -r "$CTDB_LVS_NODES" ] || \
+ die "error: CTDB_LVS_NODES=${CTDB_LVS_NODES} unreadable"
+ [ -n "$CTDB_LVS_PUBLIC_IP" ] || \
+ die "Invalid configuration: CTDB_LVS_PUBLIC_IP not set"
+ if ! lvs_follower_only ; then
+ [ -n "$CTDB_LVS_PUBLIC_IFACE" ] || \
+ die "Invalid configuration: CTDB_LVS_PUBLIC_IFACE not set"
+ fi
+}
+
+case "$1" in
+setup)
+ lvs_check_config
+ ;;
+startup)
+ lvs_check_config
+
+ ipvsadm -D -t "$CTDB_LVS_PUBLIC_IP" >/dev/null 2>&1
+ ipvsadm -D -u "$CTDB_LVS_PUBLIC_IP" >/dev/null 2>&1
+
+ ip addr add "${CTDB_LVS_PUBLIC_IP}/32" dev lo scope host
+
+ # do not respond to ARPs that are for ip addresses with scope 'host'
+ set_proc_maybe sys/net/ipv4/conf/all/arp_ignore 3
+ # do not send out arp requests from loopback addresses
+ set_proc_maybe sys/net/ipv4/conf/all/arp_announce 2
+ ;;
+
+shutdown)
+ lvs_check_config
+
+ ipvsadm -D -t "$CTDB_LVS_PUBLIC_IP"
+ ipvsadm -D -u "$CTDB_LVS_PUBLIC_IP"
+
+ ip addr del "${CTDB_LVS_PUBLIC_IP}/32" dev lo >/dev/null 2>&1
+
+ flush_route_cache
+ ;;
+
+ipreallocated)
+ lvs_check_config
+
+ # Kill connections
+ ipvsadm -D -t "$CTDB_LVS_PUBLIC_IP" >/dev/null 2>&1
+ ipvsadm -D -u "$CTDB_LVS_PUBLIC_IP" >/dev/null 2>&1
+ kill_tcp_connections_local_only \
+ "$CTDB_LVS_PUBLIC_IFACE" "$CTDB_LVS_PUBLIC_IP"
+
+ pnn=$(ctdb_get_pnn)
+ lvsleader=$("${CTDB_HELPER_BINDIR}/ctdb_lvs" leader)
+ if [ "$pnn" != "$lvsleader" ] ; then
+ # This node is not the LVS leader so change the IP address
+ # to have scope "host" so this node won't respond to ARPs
+ ip addr del "${CTDB_LVS_PUBLIC_IP}/32" dev lo >/dev/null 2>&1
+ ip addr add "${CTDB_LVS_PUBLIC_IP}/32" dev lo scope host
+ exit 0
+ fi
+
+ # Change the scope so this node starts responding to ARPs
+ ip addr del "${CTDB_LVS_PUBLIC_IP}/32" dev lo >/dev/null 2>&1
+ ip addr add "${CTDB_LVS_PUBLIC_IP}/32" dev lo >/dev/null 2>&1
+
+ ipvsadm -A -t "$CTDB_LVS_PUBLIC_IP" -p 1999999 -s lc
+ ipvsadm -A -u "$CTDB_LVS_PUBLIC_IP" -p 1999999 -s lc
+
+ # Add all nodes (except this node) as LVS servers
+ "${CTDB_HELPER_BINDIR}/ctdb_lvs" list |
+ awk -v pnn="$pnn" '$1 != pnn { print $2 }' |
+ while read ip ; do
+ ipvsadm -a -t "$CTDB_LVS_PUBLIC_IP" -r "$ip" -g
+ ipvsadm -a -u "$CTDB_LVS_PUBLIC_IP" -r "$ip" -g
+ done
+
+ # Add localhost too...
+ ipvsadm -a -t "$CTDB_LVS_PUBLIC_IP" -r 127.0.0.1
+ ipvsadm -a -u "$CTDB_LVS_PUBLIC_IP" -r 127.0.0.1
+
+ $CTDB gratarp \
+ "$CTDB_LVS_PUBLIC_IP" "$CTDB_LVS_PUBLIC_IFACE" >/dev/null 2>&1
+
+ flush_route_cache
+ ;;
+
+monitor)
+ lvs_check_config
+
+ if [ -n "$CTDB_LVS_PUBLIC_IFACE" ] ; then
+ interface_monitor "$CTDB_LVS_PUBLIC_IFACE" || exit 1
+ fi
+ ;;
+esac
+
+exit 0
diff --git a/ctdb/config/functions b/ctdb/config/functions
new file mode 100755
index 0000000..a40b276
--- /dev/null
+++ b/ctdb/config/functions
@@ -0,0 +1,1172 @@
+# Hey Emacs, this is a -*- shell-script -*- !!!
+
+# utility functions for ctdb event scripts
+
+if [ -z "$CTDB_BASE" ]; then
+ echo 'CTDB_BASE unset in CTDB functions file'
+ exit 1
+fi
+export CTDB_BASE
+
+# CTDB_VARDIR is used elsewhere
+# shellcheck disable=SC2034
+CTDB_VARDIR="/usr/local/var/lib/ctdb"
+
+CTDB="${CTDB:-/usr/local/bin/ctdb}"
+
+# Only (and always) override these variables in test code
+
+if [ -z "$CTDB_SCRIPT_VARDIR" ]; then
+ CTDB_SCRIPT_VARDIR="/usr/local/var/lib/ctdb/scripts"
+fi
+
+if [ -z "$CTDB_SYS_ETCDIR" ]; then
+ CTDB_SYS_ETCDIR="/etc"
+fi
+
+if [ -z "$CTDB_HELPER_BINDIR" ]; then
+ CTDB_HELPER_BINDIR="/usr/local/libexec/ctdb"
+fi
+
+#######################################
+# pull in a system config file, if any
+
+load_system_config()
+{
+ for _i; do
+
+ if [ -f "${CTDB_SYS_ETCDIR}/sysconfig/${_i}" ]; then
+ . "${CTDB_SYS_ETCDIR}/sysconfig/${_i}"
+ return
+ elif [ -f "${CTDB_SYS_ETCDIR}/default/${_i}" ]; then
+ . "${CTDB_SYS_ETCDIR}/default/${_i}"
+ return
+ fi
+ done
+}
+
+# load_script_options [ component script ]
+# script is an event script name relative to a component
+# component is currently ignored
+load_script_options()
+{
+ if [ $# -eq 2 ]; then
+ _script="$2"
+ elif [ $# -eq 0 ]; then
+ _script=""
+ else
+ die "usage: load_script_options [ component script ]"
+ fi
+
+ _options="${CTDB_BASE}/script.options"
+
+ if [ -r "$_options" ]; then
+ . "$_options"
+ fi
+
+ if [ -n "$_script" ]; then
+ _s="${CTDB_BASE}/events/legacy/${_script}"
+ else
+ _s="${0%.script}"
+ fi
+ _options="${_s}.options"
+
+ if [ -r "$_options" ]; then
+ . "$_options"
+ fi
+}
+
+##############################################################
+
+die()
+{
+ _msg="$1"
+ _rc="${2:-1}"
+
+ echo "$_msg" >&2
+ exit "$_rc"
+}
+
+# Log given message or stdin to either syslog or a CTDB log file
+# $1 is the tag passed to logger if syslog is in use.
+script_log()
+{
+ _tag="$1"
+ shift
+
+ case "$CTDB_LOGGING" in
+ file:)
+ if [ -n "$*" ] ; then
+ echo "$*"
+ else
+ cat
+ fi >&2
+ ;;
+ file:* | "")
+ if [ -n "$CTDB_LOGGING" ]; then
+ _file="${CTDB_LOGGING#file:}"
+ else
+ _file="/usr/local/var/log/log.ctdb"
+ fi
+ {
+ if [ -n "$*" ]; then
+ echo "$*"
+ else
+ cat
+ fi
+ } >>"$_file"
+ ;;
+ *)
+ # Handle all syslog:* variants here too. There's no tool to do
+ # the lossy things, so just use logger.
+ logger -t "ctdbd: ${_tag}" "$@"
+ ;;
+ esac
+}
+
+# When things are run in the background in an eventscript then logging
+# output might get lost. This is the "solution". :-)
+background_with_logging()
+{
+ (
+ "$@" 2>&1 </dev/null |
+ script_log "${script_name}&"
+ ) &
+
+ return 0
+}
+
+##############################################################
+# check number of args for different events
+ctdb_check_args()
+{
+ case "$1" in
+ takeip | releaseip)
+ if [ $# != 4 ]; then
+ echo "ERROR: must supply interface, IP and maskbits"
+ exit 1
+ fi
+ ;;
+ updateip)
+ if [ $# != 5 ]; then
+ echo "ERROR: must supply old interface, new interface, IP and maskbits"
+ exit 1
+ fi
+ ;;
+ esac
+}
+
+##############################################################
+# determine on what type of system (init style) we are running
+detect_init_style()
+{
+ _init_style_file="${CTDB_SCRIPT_VARDIR}/init-style"
+
+ if [ ! -f "$_init_style_file" ]; then
+ if [ -n "$CTDB_INIT_STYLE" ]; then
+ echo "$CTDB_INIT_STYLE" >"$_init_style_file"
+ return
+ fi
+
+ # Subshell to contain variables in os-release file
+ (
+ _os_release="${CTDB_SYS_ETCDIR}/os-release"
+ if [ -f "$_os_release" ]; then
+ . "$_os_release"
+ case "$ID" in
+ centos | fedora | rhel)
+ echo "redhat"
+ ;;
+ debian | ubuntu)
+ echo "debian"
+ ;;
+ sles | suse)
+ echo "suse"
+ ;;
+ *)
+ case "$ID_LIKE" in
+ *centos* | *rhel*)
+ echo "redhat"
+ ;;
+ *)
+ echo "$ID"
+ ;;
+ esac
+ ;;
+ esac
+ else
+ echo "WARNING: unknown distribution ${ID}" >&2
+ echo "unknown"
+ fi
+ ) >"$_init_style_file"
+ fi
+
+ read -r CTDB_INIT_STYLE <"$_init_style_file"
+}
+
+######################################################
+# simulate /sbin/service on platforms that don't have it
+# _service() makes it easier to hook the service() function for
+# testing.
+_service()
+{
+ _service_name="$1"
+ _op="$2"
+
+ # do nothing, when no service was specified
+ [ -z "$_service_name" ] && return
+
+ if [ -x /sbin/service ]; then
+ $_nice /sbin/service "$_service_name" "$_op"
+ elif [ -x /usr/sbin/service ]; then
+ $_nice /usr/sbin/service "$_service_name" "$_op"
+ elif [ -x /bin/systemctl ]; then
+ $_nice /bin/systemctl "$_op" "$_service_name"
+ elif [ -x "${CTDB_SYS_ETCDIR}/init.d/${_service_name}" ]; then
+ $_nice "${CTDB_SYS_ETCDIR}/init.d/${_service_name}" "$_op"
+ elif [ -x "${CTDB_SYS_ETCDIR}/rc.d/init.d/${_service_name}" ]; then
+ $_nice "${CTDB_SYS_ETCDIR}/rc.d/init.d/${_service_name}" "$_op"
+ fi
+}
+
+service()
+{
+ _nice=""
+ _service "$@"
+}
+
+######################################################
+# simulate /sbin/service (niced) on platforms that don't have it
+nice_service()
+{
+ _nice="nice"
+ _service "$@"
+}
+
+######################################################
+# Cached retrieval of PNN from local node. This never changes so why
+# open a client connection to the server each time this is needed?
+ctdb_get_pnn()
+{
+ _pnn_file="${CTDB_SCRIPT_VARDIR}/my-pnn"
+ if [ ! -f "$_pnn_file" ]; then
+ $CTDB pnn >"$_pnn_file"
+ fi
+
+ cat "$_pnn_file"
+}
+
+# Cached retrieval of private IP address from local node. This never
+# changes.
+ctdb_get_ip_address()
+{
+ _ip_addr_file="${CTDB_SCRIPT_VARDIR}/my-ip-address"
+ if [ ! -f "$_ip_addr_file" ]; then
+ $CTDB -X nodestatus |
+ awk -F '|' 'NR == 2 { print $3 }' >"$_ip_addr_file"
+ fi
+
+ cat "$_ip_addr_file"
+}
+
+# Cached retrieval of database options for use by event scripts.
+#
+# If the variables are already set then they should not be overwritten
+# - this should only happen during event script testing.
+ctdb_get_db_options()
+{
+ _db_opts_file="${CTDB_SCRIPT_VARDIR}/db_options.cache"
+
+ if [ ! -f "$_db_opts_file" ]; then
+ {
+ ctdb_translate_option "database" \
+ "volatile database directory" \
+ "CTDB_DBDIR"
+ ctdb_translate_option "database" \
+ "persistent database directory" \
+ "CTDB_DBDIR_PERSISTENT"
+ ctdb_translate_option "database" \
+ "state database directory" \
+ "CTDB_DBDIR_STATE"
+ } >"$_db_opts_file"
+ fi
+
+ . "$_db_opts_file"
+}
+
+ctdb_translate_option()
+{
+ _section="$1"
+ _opt="$2"
+ _variable="$3"
+
+ # ctdb-config already prints an error if something goes wrong
+ _t=$("${CTDB_HELPER_BINDIR}/ctdb-config" get "$_section" "$_opt") ||
+ exit $?
+ echo "${_variable}=\"${_t}\""
+}
+
+######################################################
+# wrapper around /proc/ settings to allow them to be hooked
+# for testing
+# 1st arg is relative path under /proc/, 2nd arg is value to set
+set_proc()
+{
+ echo "$2" >"/proc/$1"
+}
+
+set_proc_maybe()
+{
+ if [ -w "/proc/$1" ]; then
+ set_proc "$1" "$2"
+ fi
+}
+
+######################################################
+# wrapper around getting file contents from /proc/ to allow
+# this to be hooked for testing
+# 1st arg is relative path under /proc/
+get_proc()
+{
+ cat "/proc/$1"
+}
+
+######################################################
+# Print up to $_max kernel stack traces for processes named $_program
+program_stack_traces()
+{
+ _prog="$1"
+ _max="${2:-1}"
+
+ _count=1
+ for _pid in $(pidof "$_prog"); do
+ [ "$_count" -le "$_max" ] || break
+
+ # Do this first to avoid racing with process exit
+ _stack=$(get_proc "${_pid}/stack" 2>/dev/null)
+ if [ -n "$_stack" ]; then
+ echo "Stack trace for ${_prog}[${_pid}]:"
+ echo "$_stack"
+ _count=$((_count + 1))
+ fi
+ done
+}
+
+######################################################
+# Ensure $service_name is set
+assert_service_name()
+{
+ # service_name is set by the event script
+ # shellcheck disable=SC2154
+ [ -n "$service_name" ] || die "INTERNAL ERROR: \$service_name not set"
+}
+
+######################################################
+# check a set of directories is available
+# return 1 on a missing directory
+# directories are read from stdin
+######################################################
+ctdb_check_directories_probe()
+{
+ while IFS="" read -r d; do
+ case "$d" in
+ *%*)
+ continue
+ ;;
+ *)
+ [ -d "${d}/." ] || return 1
+ ;;
+ esac
+ done
+}
+
+######################################################
+# check a set of directories is available
+# directories are read from stdin
+######################################################
+ctdb_check_directories()
+{
+ ctdb_check_directories_probe || {
+ echo "ERROR: $service_name directory \"$d\" not available"
+ exit 1
+ }
+}
+
+######################################################
+# check a set of tcp ports
+# usage: ctdb_check_tcp_ports <ports...>
+######################################################
+
+# Check whether something is listening on all of the given TCP ports
+# using the "ctdb checktcpport" command.
+ctdb_check_tcp_ports()
+{
+ if [ -z "$1" ]; then
+ echo "INTERNAL ERROR: ctdb_check_tcp_ports - no ports specified"
+ exit 1
+ fi
+
+ for _p; do # process each function argument (port)
+ _cmd="$CTDB checktcpport $_p"
+ _out=$($_cmd 2>&1)
+ _ret=$?
+ case "$_ret" in
+ 0)
+ echo "$service_name not listening on TCP port $_p"
+ return 1
+ ;;
+ 98)
+ # Couldn't bind, something already listening, next port
+ continue
+ ;;
+ *)
+ echo "unexpected error (${_ret}) running \"${_cmd}\""
+ if [ -n "$_out" ]; then
+ echo "$_out"
+ fi
+ return $_ret
+ ;;
+ esac
+ done
+
+ # All ports listening
+ return 0
+}
+
+######################################################
+# check a unix socket
+# usage: ctdb_check_unix_socket SOCKPATH
+######################################################
+ctdb_check_unix_socket()
+{
+ _sockpath="$1"
+
+ if [ -z "$_sockpath" ]; then
+ echo "ERROR: ctdb_check_unix_socket() requires socket path"
+ return 1
+ fi
+
+ _out=$(ss -l -x "src ${_sockpath}" | tail -n +2)
+ if [ -z "$_out" ]; then
+ echo "ERROR: ${service_name} not listening on ${_sockpath}"
+ return 1
+ fi
+}
+
+################################################
+# kill off any TCP connections with the given IP
+################################################
+kill_tcp_connections()
+{
+ _iface="$1"
+ _ip="$2"
+
+ _oneway=false
+ if [ "$3" = "oneway" ]; then
+ _oneway=true
+ fi
+
+ get_tcp_connections_for_ip "$_ip" | {
+ _killcount=0
+ _connections=""
+ _nl="
+"
+ while read -r _dst _src; do
+ _destport="${_dst##*:}"
+ __oneway=$_oneway
+ case $_destport in
+ # we only do one-way killtcp for CIFS
+ 139 | 445) __oneway=true ;;
+ esac
+
+ _connections="${_connections}${_nl}${_src} ${_dst}"
+ if ! $__oneway; then
+ _connections="${_connections}${_nl}${_dst} ${_src}"
+ fi
+
+ _killcount=$((_killcount + 1))
+ done
+
+ if [ $_killcount -eq 0 ]; then
+ return
+ fi
+
+ if [ -n "$CTDB_KILLTCP_DEBUGLEVEL" ]; then
+ _debuglevel="$CTDB_KILLTCP_DEBUGLEVEL"
+ else
+ _debuglevel="$CTDB_DEBUGLEVEL"
+ fi
+ echo "$_connections" |
+ CTDB_DEBUGLEVEL="$_debuglevel" \
+ "${CTDB_HELPER_BINDIR}/ctdb_killtcp" "$_iface" || {
+ echo "Failed to kill TCP connections"
+ return
+ }
+
+ _connections=$(get_tcp_connections_for_ip "$_ip")
+ if [ -z "$_connections" ]; then
+ _remaining=0
+ else
+ _remaining=$(echo "$_connections" | wc -l)
+ fi
+
+ _actually_killed=$((_killcount - _remaining))
+
+ _t="${_actually_killed}/${_killcount}"
+ echo "Killed ${_t} TCP connections to released IP $_ip"
+
+ if [ -n "$_connections" ]; then
+ echo "Remaining connections:"
+ echo "$_connections" | sed -e 's|^| |'
+ fi
+ }
+}
+
+##################################################################
+# kill off the local end for any TCP connections with the given IP
+##################################################################
+kill_tcp_connections_local_only()
+{
+ kill_tcp_connections "$@" "oneway"
+}
+
+##################################################################
+# tickle any TCP connections with the given IP
+##################################################################
+tickle_tcp_connections()
+{
+ _ip="$1"
+
+ # Get connections, both directions
+ _conns=$(get_tcp_connections_for_ip "$_ip" |
+ awk '{ print $1, $2 ; print $2, $1 }')
+
+ echo "$_conns" | awk '{ print "Tickle TCP connection", $1, $2 }'
+ echo "$_conns" | ctdb tickle
+}
+
+get_tcp_connections_for_ip()
+{
+ _ip="$1"
+
+ ss -tn state established "src [$_ip]" | awk 'NR > 1 {print $3, $4}'
+}
+
+########################################################
+
+add_ip_to_iface()
+{
+ _iface=$1
+ _ip=$2
+ _maskbits=$3
+
+ # Ensure interface is up
+ ip link set "$_iface" up ||
+ die "Failed to bringup interface $_iface"
+
+ # Only need to define broadcast for IPv4
+ case "$_ip" in
+ *:*) _bcast="" ;;
+ *) _bcast="brd +" ;;
+ esac
+
+ # Intentionally unquoted multi-word value here
+ # shellcheck disable=SC2086
+ ip addr add "$_ip/$_maskbits" $_bcast dev "$_iface" || {
+ echo "Failed to add $_ip/$_maskbits on dev $_iface"
+ return 1
+ }
+
+ # Wait 5 seconds for IPv6 addresses to stop being tentative...
+ if [ -z "$_bcast" ]; then
+ for _x in $(seq 1 10); do
+ ip addr show to "${_ip}/128" | grep -q "tentative" || break
+ sleep 0.5
+ done
+
+ # If the address was a duplicate then it won't be on the
+ # interface so flag an error.
+ _t=$(ip addr show to "${_ip}/128")
+ case "$_t" in
+ "")
+ echo "Failed to add $_ip/$_maskbits on dev $_iface"
+ return 1
+ ;;
+ *tentative* | *dadfailed*)
+ echo "Failed to add $_ip/$_maskbits on dev $_iface"
+ ip addr del "$_ip/$_maskbits" dev "$_iface"
+ return 1
+ ;;
+ esac
+ fi
+}
+
+delete_ip_from_iface()
+{
+ _iface=$1
+ _ip=$2
+ _maskbits=$3
+
+ # This could be set globally for all interfaces but it is probably
+ # better to avoid surprises, so limit it the interfaces where CTDB
+ # has public IP addresses. There isn't anywhere else convenient
+ # to do this so just set it each time. This is much cheaper than
+ # remembering and re-adding secondaries.
+ set_proc "sys/net/ipv4/conf/${_iface}/promote_secondaries" 1
+
+ ip addr del "$_ip/$_maskbits" dev "$_iface" || {
+ echo "Failed to del $_ip on dev $_iface"
+ return 1
+ }
+}
+
+# If the given IP is hosted then print 2 items: maskbits and iface
+ip_maskbits_iface()
+{
+ _addr="$1"
+
+ case "$_addr" in
+ *:*) _bits=128 ;;
+ *) _bits=32 ;;
+ esac
+ ip addr show to "${_addr}/${_bits}" 2>/dev/null |
+ awk 'NR == 1 { iface = $2; sub(":$", "", iface) ;
+ sub("@.*", "", iface) }
+ $1 ~ /inet/ { mask = $2; sub(".*/", "", mask);
+ print mask, iface }'
+}
+
+drop_ip()
+{
+ _addr="${1%/*}" # Remove optional maskbits
+
+ # Intentional word splitting here
+ # shellcheck disable=SC2046
+ set -- $(ip_maskbits_iface "$_addr")
+ if [ -n "$1" ]; then
+ _maskbits="$1"
+ _iface="$2"
+ echo "Removing public address $_addr/$_maskbits from device $_iface"
+ delete_ip_from_iface "$_iface" "$_addr" "$_maskbits" >/dev/null 2>&1
+ fi
+}
+
+drop_all_public_ips()
+{
+ # _x is intentionally ignored
+ # shellcheck disable=SC2034
+ while read -r _ip _x; do
+ case "$_ip" in
+ \#*) continue ;;
+ esac
+ drop_ip "$_ip"
+ done <"${CTDB_BASE}/public_addresses"
+}
+
+flush_route_cache()
+{
+ set_proc_maybe sys/net/ipv4/route/flush 1
+ set_proc_maybe sys/net/ipv6/route/flush 1
+}
+
+########################################################
+# Interface monitoring
+
+# If the interface is a virtual one (e.g. VLAN) then get the
+# underlying interface
+interface_get_real()
+{
+ _iface="$1"
+
+ # If $_iface is a VLAN (i.e. contains an '@') then strip every
+ # before the '@', otherwise print the whole interface
+ echo "${_iface##*@}"
+}
+
+# Check whether an interface is operational
+interface_monitor()
+{
+ _iface="$1"
+
+ _iface_info=$(ip -br link show "$_iface" 2>&1) || {
+ echo "ERROR: Monitored interface ${_iface} does not exist"
+ return 1
+ }
+
+ # If the interface is a virtual one (e.g. VLAN) then get the
+ # underlying interface.
+ _realiface=$(interface_get_real "${_iface_info%% *}")
+
+ if _bi=$(get_proc "net/bonding/${_realiface}" 2>/dev/null); then
+ # This is a bond: various monitoring strategies
+ echo "$_bi" | grep -q 'Currently Active Slave: None' && {
+ echo "ERROR: No active slaves for bond device ${_realiface}"
+ return 1
+ }
+ echo "$_bi" | grep -q '^MII Status: up' || {
+ echo "ERROR: public network interface ${_realiface} is down"
+ return 1
+ }
+ echo "$_bi" | grep -q '^Bonding Mode: IEEE 802.3ad Dynamic link aggregation' && {
+ # This works around a bug in the driver where the
+ # overall bond status can be up but none of the actual
+ # physical interfaces have a link.
+ echo "$_bi" | grep 'MII Status:' | tail -n +2 | grep -q '^MII Status: up' || {
+ echo "ERROR: No active slaves for 802.ad bond device ${_realiface}"
+ return 1
+ }
+ }
+
+ return 0
+ else
+ # Not a bond
+ case "$_iface" in
+ lo*)
+ # loopback is always working
+ return 0
+ ;;
+ ib*)
+ # we don't know how to test ib links
+ return 0
+ ;;
+ *)
+ ethtool "$_iface" | grep -q 'Link detected: yes' || {
+ # On some systems, this is not successful when a
+ # cable is plugged but the interface has not been
+ # brought up previously. Bring the interface up
+ # and try again...
+ ip link set "$_iface" up
+ ethtool "$_iface" | grep -q 'Link detected: yes' || {
+ echo "ERROR: No link on the public network interface ${_iface}"
+ return 1
+ }
+ }
+ return 0
+ ;;
+ esac
+ fi
+}
+
+########################################################
+# Simple counters
+_ctdb_counter_common()
+{
+ [ $# -le 1 ] || die "usage: _ctdb_counter_common [name]"
+
+ if [ $# -eq 1 ]; then
+ _counter_name="${1}.failcount"
+ else
+ _counter_name="failcount"
+ fi
+
+ if [ -z "$script_state_dir" ]; then
+ die "ctdb_counter_* functions need ctdb_setup_state_dir()"
+ fi
+
+ _counter_file="${script_state_dir}/${_counter_name}"
+}
+# Some code passes an argument
+# shellcheck disable=SC2120
+ctdb_counter_init()
+{
+ _ctdb_counter_common "$1"
+
+ : >"$_counter_file"
+}
+ctdb_counter_incr()
+{
+ _ctdb_counter_common "$1"
+
+ # unary counting using newlines!
+ echo >>"$_counter_file"
+}
+ctdb_counter_get()
+{
+ _ctdb_counter_common "$1"
+ # unary counting!
+ _val=$(wc -c 2>/dev/null <"$_counter_file" || echo 0)
+ # Strip leading spaces from output of wc (on freebsd)
+ # shellcheck disable=SC2086
+ echo $_val
+}
+
+#
+# Fail counter/threshold combination to control warnings and node unhealthy
+#
+
+_failcount_validate_threshold()
+{
+ case "$1" in
+ "") return 1 ;; # A failure that doesn't need a warning
+ *)
+ if echo "$1" | grep -qx '[0-9]*'; then
+ return 0
+ fi
+
+ echo "WARNING: ${1} is an invalid threshold in \"${2}\" check"
+ return 1
+ ;;
+ esac
+}
+
+_failcount_common()
+{
+ _thing="$1"
+
+ _counter=$(echo "$_thing" | sed -e 's@/@_SLASH_@g' -e 's@ @_@g')
+}
+
+failcount_init()
+{
+ _thing="$1"
+
+ _failcount_common "$_thing"
+
+ ctdb_counter_init "$_counter"
+}
+
+failcount_reset()
+{
+ _thing="$1"
+
+ _failcount_common "$_thing"
+
+ _failcount=$(ctdb_counter_get "$_counter")
+ if [ "$_failcount" -eq 0 ]; then
+ return
+ fi
+
+ printf 'NOTICE: %s: no longer failing\n' "$_thing"
+ ctdb_counter_init "$_counter"
+}
+
+failcount_incr()
+{
+ _thing="$1"
+ _thresholds="$2"
+ _output="$3"
+
+ _failcount_common "$_thing"
+
+ ctdb_counter_incr "$_counter"
+ _failcount=$(ctdb_counter_get "$_counter")
+
+ case "$_thresholds" in
+ *:*)
+ _warn_threshold="${_thresholds%:*}"
+ _unhealthy_threshold="${_thresholds#*:}"
+ ;;
+ "")
+ _warn_threshold=1
+ _unhealthy_threshold=""
+ ;;
+ *)
+ _warn_threshold="$_thresholds"
+ _unhealthy_threshold=""
+ ;;
+ esac
+
+ if _failcount_validate_threshold "$_unhealthy_threshold" "$_thing"; then
+ if [ "$_failcount" -ge "$_unhealthy_threshold" ]; then
+ printf 'ERROR: %s: fail count %d >= threshold %d\n' \
+ "$_thing" \
+ "$_failcount" \
+ "$_unhealthy_threshold"
+ # Only print output when exceeding the
+ # unhealthy threshold
+ if [ "$_failcount" -eq "$_unhealthy_threshold" ] && \
+ [ -n "$_output" ]; then
+ echo "$_output"
+ fi
+ exit 1
+ fi
+ fi
+
+ if _failcount_validate_threshold "$_warn_threshold" "$_thing"; then
+ if [ "$_failcount" -lt "$_warn_threshold" ]; then
+ return 0
+ fi
+ fi
+
+ printf 'WARNING: %s: fail count %d >= threshold %d\n' \
+ "$_thing" \
+ "$_failcount" \
+ "$_warn_threshold"
+ if [ "$_failcount" -eq "$_warn_threshold" ] && [ -n "$_output" ]; then
+ # Only print output when exceeding the warning threshold
+ echo "$_output"
+ fi
+}
+
+########################################################
+
+# ctdb_setup_state_dir <type> <name>
+# Sets/creates script_state_dir)
+ctdb_setup_state_dir()
+{
+ [ $# -eq 2 ] || die "usage: ctdb_setup_state_dir <type> <name>"
+
+ _type="$1"
+ _name="$2"
+
+ script_state_dir="${CTDB_SCRIPT_VARDIR}/${_type}/${_name}"
+
+ mkdir -p "$script_state_dir" ||
+ die "Error creating script state dir \"${script_state_dir}\""
+}
+
+##################################################################
+# Reconfigure a service on demand
+
+_ctdb_service_reconfigure_common()
+{
+ if [ -z "$script_state_dir" ]; then
+ die "ctdb_service_*_reconfigure() needs ctdb_setup_state_dir()"
+ fi
+
+ _ctdb_service_reconfigure_flag="${script_state_dir}/need_reconfigure"
+}
+
+ctdb_service_needs_reconfigure()
+{
+ _ctdb_service_reconfigure_common
+ [ -e "$_ctdb_service_reconfigure_flag" ]
+}
+
+ctdb_service_set_reconfigure()
+{
+ _ctdb_service_reconfigure_common
+ : >"$_ctdb_service_reconfigure_flag"
+}
+
+ctdb_service_unset_reconfigure()
+{
+ _ctdb_service_reconfigure_common
+ rm -f "$_ctdb_service_reconfigure_flag"
+}
+
+ctdb_service_reconfigure()
+{
+ echo "Reconfiguring service \"${service_name}\"..."
+ ctdb_service_unset_reconfigure
+ service_reconfigure || return $?
+ # Intentionally have this use $service_name as default
+ # shellcheck disable=SC2119
+ ctdb_counter_init
+}
+
+# Default service_reconfigure() function does nothing.
+service_reconfigure()
+{
+ :
+}
+
+# Default service_start() and service_stop() functions.
+
+# These may be overridden in an eventscript.
+service_start()
+{
+ service "$service_name" start
+}
+
+service_stop()
+{
+ service "$service_name" stop
+}
+
+##################################################################
+
+# This exists only for backward compatibility with 3rd party scripts
+# that call it
+ctdb_standard_event_handler()
+{
+ :
+}
+
+iptables_wrapper()
+{
+ _family="$1"
+ shift
+ if [ "$_family" = "inet6" ]; then
+ _iptables_cmd="ip6tables"
+ else
+ _iptables_cmd="iptables"
+ fi
+
+ # iptables doesn't like being re-entered, so flock-wrap it.
+ flock -w 30 "${CTDB_SCRIPT_VARDIR}/iptables.flock" "$_iptables_cmd" "$@"
+}
+
+# AIX (and perhaps others?) doesn't have mktemp
+# type is commonly supported and more portable than which(1)
+# shellcheck disable=SC2039
+if ! type mktemp >/dev/null 2>&1; then
+ mktemp()
+ {
+ _dir=false
+ if [ "$1" = "-d" ]; then
+ _dir=true
+ shift
+ fi
+ _d="${TMPDIR:-/tmp}"
+ _hex10=$(dd if=/dev/urandom count=20 2>/dev/null |
+ cksum |
+ awk '{print $1}')
+ _t="${_d}/tmp.${_hex10}"
+ (
+ umask 077
+ if $_dir; then
+ mkdir "$_t"
+ else
+ : >"$_t"
+ fi
+ )
+ echo "$_t"
+ }
+fi
+
+######################################################################
+# NFS callout handling
+
+nfs_callout_init()
+{
+ _state_dir="$1"
+
+ if [ -z "$CTDB_NFS_CALLOUT" ]; then
+ CTDB_NFS_CALLOUT="${CTDB_BASE}/nfs-linux-kernel-callout"
+ fi
+ # Always export, for statd callout
+ export CTDB_NFS_CALLOUT
+
+ # If the callout wants to use this then it must create it
+ export CTDB_NFS_CALLOUT_STATE_DIR="${_state_dir}/callout-state"
+
+ # Export, if set, for use by clustered NFS callouts
+ if [ -n "$CTDB_NFS_STATE_FS_TYPE" ]; then
+ export CTDB_NFS_STATE_FS_TYPE
+ fi
+ if [ -n "$CTDB_NFS_STATE_MNT" ]; then
+ export CTDB_NFS_STATE_MNT
+ fi
+
+ nfs_callout_cache="${_state_dir}/nfs_callout_cache"
+ nfs_callout_cache_callout="${nfs_callout_cache}/CTDB_NFS_CALLOUT"
+ nfs_callout_cache_ops="${nfs_callout_cache}/ops"
+}
+
+nfs_callout_register()
+{
+ mkdir -p "$nfs_callout_cache_ops"
+ rm -f "$nfs_callout_cache_ops"/*
+
+ echo "$CTDB_NFS_CALLOUT" >"$nfs_callout_cache_callout"
+
+ _t=$("$CTDB_NFS_CALLOUT" "register")
+ if [ -n "$_t" ]; then
+ echo "$_t" |
+ while IFS="" read -r _op; do
+ touch "${nfs_callout_cache_ops}/${_op}"
+ done
+ else
+ touch "${nfs_callout_cache_ops}/ALL"
+ fi
+}
+
+nfs_callout()
+{
+ # Re-run registration if $CTDB_NFS_CALLOUT has changed
+ _prev=""
+ if [ -r "$nfs_callout_cache_callout" ]; then
+ read -r _prev <"$nfs_callout_cache_callout"
+ fi
+ if [ "$CTDB_NFS_CALLOUT" != "$_prev" ]; then
+ nfs_callout_register
+ fi
+
+ # Run the operation if it is registered...
+ if [ -e "${nfs_callout_cache_ops}/${1}" ] ||
+ [ -e "${nfs_callout_cache_ops}/ALL" ]; then
+ "$CTDB_NFS_CALLOUT" "$@"
+ fi
+}
+
+########################################################
+# tickle handling
+########################################################
+
+update_tickles()
+{
+ _port="$1"
+
+ tickledir="${CTDB_SCRIPT_VARDIR}/tickles"
+ mkdir -p "$tickledir"
+
+ # What public IPs do I hold?
+ _pnn=$(ctdb_get_pnn)
+ _ips=$($CTDB -X ip | awk -F'|' -v pnn="$_pnn" '$3 == pnn {print $2}')
+
+ # IPs and port as ss filters
+ _ip_filter=""
+ for _ip in $_ips; do
+ _ip_filter="${_ip_filter}${_ip_filter:+ || }src [${_ip}]"
+ done
+ _port_filter="sport == :${_port}"
+
+ # Record connections to our public IPs in a temporary file.
+ # This temporary file is in CTDB's private state directory and
+ # $$ is used to avoid a very rare race involving CTDB's script
+ # debugging. No security issue, nothing to see here...
+ _my_connections="${tickledir}/${_port}.connections.$$"
+ # Parentheses are needed around the filters for precedence but
+ # the parentheses can't be empty!
+ #
+ # Recent versions of ss print square brackets around IPv6
+ # addresses. While it is desirable to update CTDB's address
+ # parsing and printing code, something needs to be done here
+ # for backward compatibility, so just delete the brackets.
+ ss -tn state established \
+ "${_ip_filter:+( ${_ip_filter} )}" \
+ "${_port_filter:+( ${_port_filter} )}" |
+ awk 'NR > 1 {print $4, $3}' |
+ tr -d '][' |
+ sort >"$_my_connections"
+
+ # Record our current tickles in a temporary file
+ _my_tickles="${tickledir}/${_port}.tickles.$$"
+ for _i in $_ips; do
+ $CTDB -X gettickles "$_i" "$_port" |
+ awk -F'|' 'NR > 1 { printf "%s:%s %s:%s\n", $2, $3, $4, $5 }'
+ done |
+ sort >"$_my_tickles"
+
+ # Add tickles for connections that we haven't already got tickles for
+ comm -23 "$_my_connections" "$_my_tickles" |
+ $CTDB addtickle
+
+ # Remove tickles for connections that are no longer there
+ comm -13 "$_my_connections" "$_my_tickles" |
+ $CTDB deltickle
+
+ rm -f "$_my_connections" "$_my_tickles"
+
+ # Remove stale files from killed scripts
+ # Files can't have spaces in name, more portable than -print0/-0
+ # shellcheck disable=SC2038
+ (cd "$tickledir" && find . -type f -mmin +10 | xargs -r rm)
+}
+
+########################################################
+# load a site local config file
+########################################################
+
+[ -x "${CTDB_BASE}/rc.local" ] && {
+ . "${CTDB_BASE}/rc.local"
+}
+
+[ -d "${CTDB_BASE}/rc.local.d" ] && {
+ for i in "${CTDB_BASE}/rc.local.d"/*; do
+ [ -x "$i" ] && . "$i"
+ done
+}
+
+script_name="${0##*/}" # basename
diff --git a/ctdb/config/nfs-checks.d/00.portmapper.check b/ctdb/config/nfs-checks.d/00.portmapper.check
new file mode 100644
index 0000000..24def35
--- /dev/null
+++ b/ctdb/config/nfs-checks.d/00.portmapper.check
@@ -0,0 +1,2 @@
+# portmapper
+unhealthy_after=1
diff --git a/ctdb/config/nfs-checks.d/10.status.check b/ctdb/config/nfs-checks.d/10.status.check
new file mode 100644
index 0000000..b8ce1e0
--- /dev/null
+++ b/ctdb/config/nfs-checks.d/10.status.check
@@ -0,0 +1,7 @@
+# status
+version="1"
+restart_every=2
+unhealthy_after=6
+service_stop_cmd="$CTDB_NFS_CALLOUT stop status"
+service_start_cmd="$CTDB_NFS_CALLOUT start status"
+service_debug_cmd="program_stack_traces rpc.statd 5"
diff --git a/ctdb/config/nfs-checks.d/20.nfs.check b/ctdb/config/nfs-checks.d/20.nfs.check
new file mode 100644
index 0000000..dad1cdc
--- /dev/null
+++ b/ctdb/config/nfs-checks.d/20.nfs.check
@@ -0,0 +1,7 @@
+# nfs
+version="3"
+restart_every=10
+unhealthy_after=2
+service_stop_cmd="$CTDB_NFS_CALLOUT stop nfs"
+service_start_cmd="$CTDB_NFS_CALLOUT start nfs"
+service_debug_cmd="program_stack_traces nfsd 5"
diff --git a/ctdb/config/nfs-checks.d/30.nlockmgr.check b/ctdb/config/nfs-checks.d/30.nlockmgr.check
new file mode 100644
index 0000000..6660ca0
--- /dev/null
+++ b/ctdb/config/nfs-checks.d/30.nlockmgr.check
@@ -0,0 +1,6 @@
+# nlockmgr
+version="4"
+restart_every=2
+unhealthy_after=6
+service_stop_cmd="$CTDB_NFS_CALLOUT stop nlockmgr"
+service_start_cmd="$CTDB_NFS_CALLOUT start nlockmgr"
diff --git a/ctdb/config/nfs-checks.d/40.mountd.check b/ctdb/config/nfs-checks.d/40.mountd.check
new file mode 100644
index 0000000..bfe4c27
--- /dev/null
+++ b/ctdb/config/nfs-checks.d/40.mountd.check
@@ -0,0 +1,7 @@
+# mountd
+version="1"
+restart_every=2
+unhealthy_after=6
+service_stop_cmd="$CTDB_NFS_CALLOUT stop mountd"
+service_start_cmd="$CTDB_NFS_CALLOUT start mountd"
+service_debug_cmd="program_stack_traces rpc.mountd 5"
diff --git a/ctdb/config/nfs-checks.d/50.rquotad.check b/ctdb/config/nfs-checks.d/50.rquotad.check
new file mode 100644
index 0000000..98bd8d9
--- /dev/null
+++ b/ctdb/config/nfs-checks.d/50.rquotad.check
@@ -0,0 +1,7 @@
+# rquotad
+version="1"
+restart_every=2
+unhealthy_after=6
+service_stop_cmd="$CTDB_NFS_CALLOUT stop rquotad"
+service_start_cmd="$CTDB_NFS_CALLOUT start rquotad"
+service_debug_cmd="program_stack_traces rpc.rquotad 5"
diff --git a/ctdb/config/nfs-checks.d/README b/ctdb/config/nfs-checks.d/README
new file mode 100644
index 0000000..044067a
--- /dev/null
+++ b/ctdb/config/nfs-checks.d/README
@@ -0,0 +1,31 @@
+NFS check configuration files.
+
+Files are named NN.RPCSERVICE.check. Files without a .check suffix
+are ignored.
+
+Supported variables are:
+
+* family - "tcp" or "udp" or space separated list
+ default: tcp, not used with "service_check_cmd"
+* version - optional, RPC service version number
+ default is to omit to check for any version,
+ not used with "service_check_cmd"
+* unhealthy_after - number of check fails before unhealthy
+ default: 1
+* restart_every - number of check fails before restart
+ default: 0, meaning no restart
+* service_stop_cmd - command to stop service
+ default: no default, must be provided if
+ restart_every > 0
+* service_start_cmd - command to start service
+ default: no default, must be provided if
+ restart_every > 0
+* service_check_cmd - command to check health of service
+ default is to check RPC service using rpcinfo
+* service_debug_cmd - command to debug a service after trying to stop it;
+ for example, it can be useful to print stack
+ traces of threads that have not exited, since
+ they may be stuck doing I/O;
+ no default, see also function program_stack_traces()
+
+Quoting inside values is not preserved.
diff --git a/ctdb/config/nfs-linux-kernel-callout b/ctdb/config/nfs-linux-kernel-callout
new file mode 100755
index 0000000..f2f3e38
--- /dev/null
+++ b/ctdb/config/nfs-linux-kernel-callout
@@ -0,0 +1,441 @@
+#!/bin/sh
+
+# Exit on 1st error
+set -e
+
+# NFS exports file. Some code below keeps a cache of output derived
+# from exportfs(8). When this file is updated the cache is invalid
+# and needs to be regenerated.
+#
+# To change the file, edit the default value below. Do not set
+# CTDB_NFS_EXPORTS_FILE - it isn't a configuration variable, just a
+# hook for testing.
+nfs_exports_file="${CTDB_NFS_EXPORTS_FILE:-/var/lib/nfs/etab}"
+
+# As above, edit the default value below. CTDB_NFS_DISTRO_STYLE is a
+# test variable only.
+nfs_distro_style="${CTDB_NFS_DISTRO_STYLE:-systemd-redhat}"
+
+# As above, edit the default value below. CTDB_SYS_ETCDIR is a
+# test variable only.
+etc_dir="${CTDB_SYS_ETCDIR:-/etc}"
+
+# A value of "AUTO" for any service means that service is usually
+# automatically started and stopped by one of the other services.
+# Such services will still be restarted by hand on failure, if
+# configured to do so. This allows services that should not be
+# running to be set to "".
+
+case "$nfs_distro_style" in
+systemd-*)
+ # Defaults
+ nfs_service="nfs-server"
+ nfs_lock_service="rpc-statd"
+ nfs_mountd_service="nfs-mountd"
+ nfs_status_service="rpc-statd"
+ nfs_rquotad_service="rpc-rquotad"
+ nfs_config="${etc_dir}/sysconfig/nfs"
+ nfs_rquotad_config="" # Not use with systemd, restart via service
+
+ case "$nfs_distro_style" in
+ *-redhat | *-suse)
+ : # Defaults only
+ ;;
+ *-debian)
+ nfs_rquotad_service="quotarpc"
+ ;;
+ *)
+ echo "Internal error"
+ exit 1
+ ;;
+ esac
+ ;;
+
+sysvinit-*)
+ # Defaults
+ nfs_service="nfs"
+ nfs_lock_service="AUTO"
+ nfs_mountd_service="AUTO"
+ nfs_status_service="AUTO"
+ nfs_rquotad_service="AUTO"
+ nfs_config="${etc_dir}/sysconfig/nfs"
+ nfs_rquotad_config="$nfs_config"
+
+ case "$nfs_distro_style" in
+ *-redhat)
+ nfs_lock_service="nfslock"
+ ;;
+ *-suse)
+ nfs_service="nfsserver"
+ ;;
+ *-debian)
+ nfs_service="nfs-kernel-server"
+ nfs_config="${etc_dir}/default/nfs-kernel-server"
+ nfs_rquotad_config="${etc_dir}/default/quota"
+ ;;
+ *)
+ echo "Internal error"
+ exit 1
+ ;;
+ esac
+ ;;
+
+*)
+ echo "Internal error"
+ exit 1
+ ;;
+esac
+
+# Override for unit testing
+if [ -z "$PROCFS_PATH" ]; then
+ PROCFS_PATH="/proc"
+fi
+
+##################################################
+
+usage()
+{
+ _c=$(basename "$0")
+ cat <<EOF
+usage: $_c { shutdown | startup }
+ $_c { stop | start } { nfs | nlockmgr }
+ $_c { monitor-list-shares | monitor-post }
+ $_c { register }
+EOF
+ exit 1
+}
+
+##################################################
+
+nfs_load_config()
+{
+ _config="${1:-${nfs_config}}"
+
+ if [ -r "$_config" ]; then
+ . "$_config"
+ fi
+}
+
+##################################################
+
+service_is_auto_started()
+{
+ [ "$1" = "AUTO" ]
+}
+
+service_is_defined()
+{
+ _service="$1"
+
+ [ -n "$_service" ] && ! service_is_auto_started "$_service"
+}
+
+service_if_defined()
+{
+ _service="$1"
+ _action="$2"
+
+ if service_is_defined "$_service"; then
+ service "$_service" "$_action"
+ fi
+}
+
+##################################################
+# Overall NFS service stop and start
+
+nfs_service_stop()
+{
+ service_if_defined "$nfs_rquotad_service" stop
+
+ service "$nfs_service" stop
+
+ service_if_defined "$nfs_lock_service" stop
+}
+
+nfs_service_start()
+{
+ service_if_defined "$nfs_lock_service" start
+
+ service "$nfs_service" start
+
+ service_if_defined "$nfs_rquotad_service" start
+}
+
+##################################################
+# service "stop" and "start" options for restarting
+
+manual_stop()
+{
+ case "$1" in
+ mountd)
+ killall -q -9 rpc.mountd
+ ;;
+ rquotad)
+ killall -q -9 rpc.rquotad
+ ;;
+ status)
+ killall -q -9 rpc.statd
+ ;;
+ *)
+ echo "$0: Internal error - invalid call to manual_stop()"
+ exit 1
+ ;;
+ esac
+}
+
+service_or_manual_stop()
+{
+ _rpc_service="$1"
+ _system_service="$2"
+
+ if service_is_defined "$_system_service"; then
+ service "$_system_service" stop
+ elif service_is_auto_started "$_system_service"; then
+ manual_stop "$_rpc_service"
+ fi
+}
+
+service_stop()
+{
+ _rpc_service="$1"
+
+ case "$_rpc_service" in
+ nfs)
+ echo 0 >"${PROCFS_PATH}/fs/nfsd/threads"
+ nfs_service_stop >/dev/null 2>&1 || true
+ pkill -9 nfsd
+ ;;
+ nlockmgr)
+ if service_is_defined "$nfs_lock_service" ; then
+ service "$nfs_lock_service" stop >/dev/null 2>&1 || true
+ else
+ service "$nfs_service" stop >/dev/null 2>&1 || true
+ fi
+ ;;
+ mountd)
+ service_or_manual_stop "$_rpc_service" "$nfs_mountd_service"
+ ;;
+ rquotad)
+ service_or_manual_stop "$_rpc_service" "$nfs_rquotad_service"
+ ;;
+ status)
+ service_or_manual_stop "$_rpc_service" "$nfs_status_service"
+ ;;
+ *)
+ usage
+ ;;
+ esac
+}
+
+manual_start()
+{
+ case "$1" in
+ mountd)
+ nfs_load_config
+ if [ -z "$RPCMOUNTDOPTS" ]; then
+ RPCMOUNTDOPTS="${MOUNTD_PORT:+-p }$MOUNTD_PORT"
+ fi
+ # shellcheck disable=SC2086
+ rpc.mountd $RPCMOUNTDOPTS
+ ;;
+ rquotad)
+ nfs_load_config "$nfs_rquotad_config"
+ if [ -z "$RPCRQUOTADOPTS" ]; then
+ RPCRQUOTADOPTS="${RQUOTAD_PORT:+-p }$RQUOTAD_PORT"
+ fi
+ # shellcheck disable=SC2086
+ rpc.rquotad $RPCRQUOTADOPTS
+ ;;
+ status)
+ nfs_load_config
+ # Red Hat uses STATDARG, Debian uses STATDOPTS
+ opts="${STATDARG:-${STATDOPTS:-''}}"
+ if [ -z "$opts" ]; then
+ # shellcheck disable=SC2086
+ set -- \
+ ${STATD_HA_CALLOUT:+-H} $STATD_HA_CALLOUT \
+ ${STATD_HOSTNAME:+-n} $STATD_HOSTNAME \
+ ${STATD_PORT:+-p} $STATD_PORT \
+ ${STATD_OUTGOING_PORT:+-o} $STATD_OUTGOING_PORT
+ opts="$*"
+ fi
+ # shellcheck disable=SC2086
+ rpc.statd $opts
+ ;;
+ *)
+ echo "$0: Internal error - invalid call to manual_start()"
+ exit 1
+ ;;
+ esac
+}
+
+service_or_manual_start()
+{
+ _rpc_service="$1"
+ _system_service="$2"
+
+ if service_is_defined "$_system_service"; then
+ service "$_system_service" start
+ elif service_is_auto_started "$_system_service"; then
+ manual_start "$_rpc_service"
+ fi
+}
+
+service_start()
+{
+ _rpc_service="$1"
+
+ case "$_rpc_service" in
+ nfs)
+ nfs_service_start
+ ;;
+ nlockmgr)
+ if service_is_defined "$nfs_lock_service" ; then
+ service "$nfs_lock_service" start
+ else
+ service "$nfs_service" start
+ fi
+ ;;
+ mountd)
+ service_or_manual_start "$_rpc_service" "$nfs_mountd_service"
+ ;;
+ rquotad)
+ service_or_manual_start "$_rpc_service" "$nfs_rquotad_service"
+ ;;
+ status)
+ service_or_manual_start "$_rpc_service" "$nfs_status_service"
+ ;;
+ *)
+ usage
+ ;;
+ esac
+}
+
+##################################################
+# service init startup and final shutdown
+
+nfs_shutdown()
+{
+ nfs_service_stop
+}
+
+nfs_startup()
+{
+ nfs_service_stop || true
+ nfs_service_start
+ _f="${PROCFS_PATH}/sys/net/ipv4/tcp_tw_recycle"
+ if [ -f "$_f" ]; then
+ echo 1 >"$_f"
+ fi
+}
+
+##################################################
+# monitor-post support
+
+nfs_check_thread_count()
+{
+ # Load NFS configuration to get desired number of threads.
+ nfs_load_config
+
+ # If $RPCNFSDCOUNT/$USE_KERNEL_NFSD_NUMBER isn't set then we could
+ # guess the default from the initscript. However, let's just
+ # assume that those using the default don't care about the number
+ # of threads and that they have switched on this feature in error.
+ _configured_threads="${RPCNFSDCOUNT:-${USE_KERNEL_NFSD_NUMBER}}"
+ if [ -z "$_configured_threads" ] && type nfsconf >/dev/null 2>&1; then
+ _configured_threads=$(nfsconf --get nfsd threads) || true
+ fi
+ [ -n "$_configured_threads" ] || return 0
+
+ _threads_file="${PROCFS_PATH}/fs/nfsd/threads"
+
+ # nfsd should be running the configured number of threads. If
+ # there are a different number of threads then tell nfsd the
+ # correct number.
+ read -r _running_threads <"$_threads_file" || {
+ echo "WARNING: Reading \"${_threads_file}\" unexpectedly failed"
+ exit 0
+ }
+
+ # Intentionally not arithmetic comparison - avoids extra errors
+ # when above read fails in an unexpected way...
+ if [ "$_running_threads" != "$_configured_threads" ]; then
+ echo "Attempting to correct number of nfsd threads from ${_running_threads} to ${_configured_threads}"
+ echo "$_configured_threads" >"$_threads_file"
+ fi
+}
+
+##################################################
+# list share directories
+
+nfs_monitor_list_shares()
+{
+ _cache_file="${CTDB_NFS_CALLOUT_STATE_DIR}/list_shares_cache"
+ # -nt operator is well supported in Linux: dash, bash, ksh, ...
+ # shellcheck disable=SC2039,SC3013
+ if [ ! -r "$nfs_exports_file" ] || [ ! -r "$_cache_file" ] ||
+ [ "$nfs_exports_file" -nt "$_cache_file" ]; then
+ mkdir -p "$CTDB_NFS_CALLOUT_STATE_DIR"
+ # We could just use the contents of $nfs_exports_file.
+ # However, let's regard that file as internal to NFS and use
+ # exportfs, which is the public API.
+ if ! _exports=$(exportfs -v); then
+ echo "WARNING: failed to run exportfs to list NFS shares" >&2
+ return
+ fi
+
+ echo "$_exports" |
+ grep '^/' |
+ sed -e 's@[[:space:]][[:space:]]*[^[:space:]()][^[:space:]()]*([^[:space:]()][^[:space:]()]*)$@@' |
+ sort -u >"$_cache_file"
+ fi
+
+ cat "$_cache_file"
+}
+
+##################################################
+
+nfs_register()
+{
+ cat <<EOF
+shutdown
+startup
+stop
+start
+monitor-list-shares
+monitor-post
+EOF
+}
+
+##################################################
+
+case "$1" in
+shutdown)
+ nfs_shutdown
+ ;;
+startup)
+ nfs_startup
+ ;;
+stop)
+ service_stop "$2"
+ ;;
+start)
+ service_start "$2"
+ ;;
+monitor-list-shares)
+ nfs_monitor_list_shares
+ ;;
+monitor-post)
+ nfs_check_thread_count
+ ;;
+register)
+ nfs_register
+ ;;
+monitor-pre | releaseip | takeip | releaseip-pre | takeip-pre)
+ # Not required/implemented
+ :
+ ;;
+*)
+ usage
+ ;;
+esac
diff --git a/ctdb/config/notification.README b/ctdb/config/notification.README
new file mode 100755
index 0000000..16b632f
--- /dev/null
+++ b/ctdb/config/notification.README
@@ -0,0 +1,36 @@
+This directory should contain executable programs ending in ".script"
+to handle CTDB event notifications. The first and only argument
+passed to each program is the event, which is one of:
+
+ init, setup, startup, unhealthy, healthy
+
+An example script that sends SNMP traps for unhealthy/healthy might
+look like this:
+
+ #!/bin/sh
+
+ case "$1" in
+ unhealthy)
+ # Send an SNMP trap saying that the node is unhealthy:
+ snmptrap -m ALL -v 1 -c public 10.1.1.105 ctdb \
+ $(hostname) 0 0 $(date +"%s") ctdb.nodeHealth.0 i 1
+ ;;
+ healthy)
+ # Send an SNMP trap saying that the node is healthy again:
+ snmptrap -m ALL -v 1 -c public 10.1.1.105 ctdb \
+ $(hostname) 0 0 $(date +"%s") ctdb.nodeHealth.0 i 0
+ ;;
+ esac
+
+Alternatively, email could be sent:
+
+ #!/bin/sh
+
+ case "$1" in
+ unhealthy)
+ mail -s "$(hostname) is UNHEALTHY" foo@example.com </dev/null >/dev/null 2>&1
+ ;;
+ healthy)
+ mail -s "$(hostname) is HEALTHY" foo@example.com </dev/null >/dev/null 2>&1
+ ;;
+ esac
diff --git a/ctdb/config/notify.sh b/ctdb/config/notify.sh
new file mode 100755
index 0000000..db69afc
--- /dev/null
+++ b/ctdb/config/notify.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+# This is script is invoked from ctdb when certain events happen. See
+# /etc/ctdb/events/notification/README for more details.
+
+d=$(dirname "$0")
+nd="${d}/events/notification"
+
+ok=true
+
+for i in "${nd}/"*.script ; do
+ # Files must be executable
+ [ -x "$i" ] || continue
+
+ # Flag failures
+ "$i" "$1" || ok=false
+done
+
+$ok
diff --git a/ctdb/config/script.options b/ctdb/config/script.options
new file mode 100644
index 0000000..79e82af
--- /dev/null
+++ b/ctdb/config/script.options
@@ -0,0 +1,16 @@
+# For now, use script.options to demonstrate these options. See
+# *.options examples for more specific examples.
+
+#
+# Samba configuration
+#
+
+# 50.samba.options
+# CTDB_SAMBA_SKIP_SHARE_CHECK=yes
+
+#
+# NFS configuration
+#
+
+# 60.nfs.options
+CTDB_RPCINFO_LOCALHOST="127.0.0.1"
diff --git a/ctdb/config/statd-callout b/ctdb/config/statd-callout
new file mode 100755
index 0000000..38c155e
--- /dev/null
+++ b/ctdb/config/statd-callout
@@ -0,0 +1,254 @@
+#!/bin/sh
+
+# This must run as root as CTDB tool commands need to access CTDB socket
+[ "$(id -u)" -eq 0 ] || exec sudo "$0" "$@"
+
+# statd must be configured to use this script as its high availability call-out.
+#
+# In most Linux versions this can be done using something like the following...
+#
+# /etc/sysconfig/nfs (Red Hat) or /etc/default/nfs-common (Debian):
+# NFS_HOSTNAME=myhostname
+# STATD_HOSTNAME="${NFS_HOSTNAME} -H /etc/ctdb/statd-callout"
+#
+# Newer Red Hat Linux variants instead use /etc/nfs.conf:
+# [statd]
+# name = myhostname
+# ha-callout = /etc/ctdb/statd-callout
+
+[ -n "$CTDB_BASE" ] || \
+ CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && echo "$PWD")
+
+. "${CTDB_BASE}/functions"
+
+# Overwrite this so we get some logging
+die ()
+{
+ script_log "statd-callout" "$@"
+ exit 1
+}
+
+# Try different variables to find config file for NFS_HOSTNAME
+load_system_config "nfs" "nfs-common"
+
+# If NFS_HOSTNAME not set then try to pull it out of /etc/nfs.conf
+if [ -z "$NFS_HOSTNAME" ]; then
+ if type nfsconf >/dev/null 2>&1; then
+ NFS_HOSTNAME=$(nfsconf --get statd name)
+ elif type git >/dev/null 2>&1; then
+ # git to the rescue!
+ NFS_HOSTNAME=$(git config --file=/etc/nfs.conf statd.name)
+ fi
+fi
+
+[ -n "$NFS_HOSTNAME" ] || \
+ die "NFS_HOSTNAME is not configured. statd-callout failed"
+
+############################################################
+
+ctdb_setup_state_dir "service" "nfs"
+
+# script_state_dir set by ctdb_setup_state_dir()
+# shellcheck disable=SC2154
+d="${script_state_dir}/statd-callout"
+
+mkdir -p "$d" || die "Failed to create directory \"${d}\""
+cd "$d" || die "Failed to change directory to \"${d}\""
+
+pnn=$(ctdb_get_pnn)
+
+############################################################
+
+send_notifies ()
+{
+ _smnotify="${CTDB_HELPER_BINDIR}/smnotify"
+
+ # State must monotonically increase, across the entire
+ # cluster. Use seconds since epoch and hope the time is in
+ # sync across nodes. Even numbers mean service is shut down,
+ # odd numbers mean service is started.
+
+ # Intentionally round to an even number
+ # shellcheck disable=SC2017
+ _state_even=$(( $(date '+%s') / 2 * 2))
+
+ _prev=""
+ while read _sip _cip ; do
+ # NOTE: Consider optimising smnotify to read all the
+ # data from stdin and then run it in the background.
+
+ # Reset stateval for each serverip
+ if [ "$_sip" != "$_prev" ] ; then
+ _stateval="$_state_even"
+ fi
+
+ # Send notifies for server shutdown
+ "$_smnotify" --client="$_cip" --ip="$_sip" \
+ --server="$_sip" --stateval="$_stateval"
+ "$_smnotify" --client="$_cip" --ip="$_sip" \
+ --server="$NFS_HOSTNAME" --stateval="$_stateval"
+
+ # Send notifies for server startup
+ _stateval=$((_stateval + 1))
+ "$_smnotify" --client="$_cip" --ip="$_sip" \
+ --server="$_sip" --stateval="$_stateval"
+ "$_smnotify" --client="$_cip" --ip="$_sip" \
+ --server="$NFS_HOSTNAME" --stateval="$_stateval"
+ done
+}
+
+delete_records ()
+{
+ while read _sip _cip ; do
+ _key="statd-state@${_sip}@${_cip}"
+ echo "\"${_key}\" \"\""
+ done | $CTDB ptrans "ctdb.tdb"
+}
+
+############################################################
+
+case "$1" in
+ # Keep a single file to keep track of the last "add-client" or
+ # "del-client'. These get pushed to ctdb.tdb during "update",
+ # which will generally be run once each "monitor" cycle. In this
+ # way we avoid scalability problems with flood of persistent
+ # transactions after a "notify" when all the clients re-take their
+ # locks.
+
+ add-client)
+ # statd does not tell us to which IP the client connected so
+ # we must add it to all the IPs that we serve
+ cip="$2"
+ date=$(date '+%s')
+ # x is intentionally ignored
+ # shellcheck disable=SC2034
+ $CTDB ip -X |
+ tail -n +2 |
+ while IFS="|" read x sip node x ; do
+ [ "$node" = "$pnn" ] || continue # not us
+ key="statd-state@${sip}@${cip}"
+ echo "\"${key}\" \"${date}\"" >"$key"
+ done
+ ;;
+
+ del-client)
+ # statd does not tell us from which IP the client disconnected
+ # so we must add it to all the IPs that we serve
+ cip="$2"
+ # x is intentionally ignored
+ # shellcheck disable=SC2034
+ $CTDB ip -X |
+ tail -n +2 |
+ while IFS="|" read x sip node x ; do
+ [ "$node" = "$pnn" ] || continue # not us
+ key="statd-state@${sip}@${cip}"
+ echo "\"${key}\" \"\"" >"$key"
+ done
+ ;;
+
+ update)
+ files=$(echo statd-state@*)
+ if [ "$files" = "statd-state@*" ] ; then
+ # No files!
+ exit 0
+ fi
+ # Filter out lines for any IP addresses that are not currently
+ # hosted public IP addresses.
+ ctdb_ips=$($CTDB ip | tail -n +2)
+ sed_expr=$(echo "$ctdb_ips" |
+ awk -v pnn="$pnn" 'pnn == $2 {
+ ip = $1; gsub(/\./, "\\.", ip);
+ printf "/statd-state@%s@/p\n", ip }')
+ # Intentional multi-word expansion for multiple files
+ # shellcheck disable=SC2086
+ items=$(sed -n "$sed_expr" $files)
+ if [ -n "$items" ] ; then
+ if echo "$items" | $CTDB ptrans "ctdb.tdb" ; then
+ # shellcheck disable=SC2086
+ rm $files
+ fi
+ fi
+ ;;
+
+ notify)
+ # we must restart the lockmanager (on all nodes) so that we get
+ # a clusterwide grace period (so other clients don't take out
+ # conflicting locks through other nodes before all locks have been
+ # reclaimed)
+
+ # we need these settings to make sure that no tcp connections survive
+ # across a very fast failover/failback
+ #echo 10 > /proc/sys/net/ipv4/tcp_fin_timeout
+ #echo 0 > /proc/sys/net/ipv4/tcp_max_tw_buckets
+ #echo 0 > /proc/sys/net/ipv4/tcp_max_orphans
+
+ # Delete the notification list for statd, we don't want it to
+ # ping any clients
+ rm -f /var/lib/nfs/statd/sm/*
+ rm -f /var/lib/nfs/statd/sm.bak/*
+
+ # We must also let some time pass between stopping and
+ # restarting the lock manager. Otherwise there is a window
+ # where the lock manager will respond "strangely" immediately
+ # after restarting it, which causes clients to fail to reclaim
+ # their locks.
+ nfs_callout_init
+ "$CTDB_NFS_CALLOUT" "stop" "nlockmgr" >/dev/null 2>&1
+ sleep 2
+ "$CTDB_NFS_CALLOUT" "start" "nlockmgr" >/dev/null 2>&1
+
+ # we now need to send out additional statd notifications to ensure
+ # that clients understand that the lockmanager has restarted.
+ # we have three cases:
+ # 1, clients that ignore the ip address the stat notification came from
+ # and ONLY care about the 'name' in the notify packet.
+ # these clients ONLY work with lock failover IFF that name
+ # can be resolved into an ipaddress that matches the one used
+ # to mount the share. (==linux clients)
+ # This is handled when starting lockmanager above, but those
+ # packets are sent from the "wrong" ip address, something linux
+ # clients are ok with, buth other clients will barf at.
+ # 2, Some clients only accept statd packets IFF they come from the
+ # 'correct' ip address.
+ # 2a,Send out the notification using the 'correct' ip address and also
+ # specify the 'correct' hostname in the statd packet.
+ # Some clients require both the correct source address and also the
+ # correct name. (these clients also ONLY work if the ip addresses
+ # used to map the share can be resolved into the name returned in
+ # the notify packet.)
+ # 2b,Other clients require that the source ip address of the notify
+ # packet matches the ip address used to take out the lock.
+ # I.e. that the correct source address is used.
+ # These clients also require that the statd notify packet contains
+ # the name as the ip address used when the lock was taken out.
+ #
+ # Both 2a and 2b are commonly used in lockmanagers since they maximize
+ # probability that the client will accept the statd notify packet and
+ # not just ignore it.
+ # For all IPs we serve, collect info and push to the config database
+
+ # Construct a sed expression to take catdb output and produce pairs of:
+ # server-IP client-IP
+ # but only for the server-IPs that are hosted on this node.
+ ctdb_all_ips=$($CTDB ip all | tail -n +2)
+ sed_expr=$(echo "$ctdb_all_ips" |
+ awk -v pnn="$pnn" 'pnn == $2 {
+ ip = $1; gsub(/\./, "\\.", ip);
+ printf "s/^key.*=.*statd-state@\\(%s\\)@\\([^\"]*\\).*/\\1 \\2/p\n", ip }')
+
+ statd_state=$($CTDB catdb ctdb.tdb | sed -n "$sed_expr" | sort)
+ [ -n "$statd_state" ] || exit 0
+
+ echo "$statd_state" | send_notifies
+ echo "$statd_state" | delete_records
+
+ # Remove any stale touch files (i.e. for IPs not currently
+ # hosted on this node and created since the last "update").
+ # There's nothing else we can do with them at this stage.
+ echo "$ctdb_all_ips" |
+ awk -v pnn="$pnn" 'pnn != $2 { print $1 }' |
+ while read sip ; do
+ rm -f "statd-state@${sip}@"*
+ done
+ ;;
+esac
diff --git a/ctdb/configure b/ctdb/configure
new file mode 100755
index 0000000..48b786b
--- /dev/null
+++ b/ctdb/configure
@@ -0,0 +1,22 @@
+#!/bin/sh
+
+PREVPATH=`dirname $0`
+
+WAF=buildtools/bin/waf
+[ -x "$WAF" ] || WAF=../buildtools/bin/waf
+
+# using JOBS=1 gives maximum compatibility with
+# systems like AIX which have broken threading in python
+JOBS=1
+export JOBS
+
+# Make sure we don't have any library preloaded.
+unset LD_PRELOAD
+
+# Make sure we get stable hashes
+PYTHONHASHSEED=1
+export PYTHONHASHSEED
+
+cd . || exit 1
+$PYTHON $WAF configure "$@" || exit 1
+cd $PREVPATH
diff --git a/ctdb/configure.rpm b/ctdb/configure.rpm
new file mode 100755
index 0000000..e8ec3b2
--- /dev/null
+++ b/ctdb/configure.rpm
@@ -0,0 +1,20 @@
+#!/bin/sh
+
+if gcc -dM -E - </dev/null | grep -Eq '__(x86_64|powerpc64)__' ; then
+ _libdir=/usr/lib64
+else
+ _libdir=/usr/lib
+fi
+
+CFLAGS="-Wall -g -D_GNU_SOURCE" ./configure \
+ --builtin-libraries=replace,popt \
+ --bundled-libraries=!talloc,!tevent,!tdb \
+ --minimum-library-version=talloc:2.1.14,tdb:1.3.17,tevent:0.9.37 \
+ --prefix=/usr \
+ --includedir=/usr/include/ctdb \
+ --libdir=${_libdir} \
+ --libexecdir=/usr/libexec \
+ --sysconfdir=/etc \
+ --mandir=/usr/man \
+ --localstatedir=/var \
+ $*
diff --git a/ctdb/database/database_conf.c b/ctdb/database/database_conf.c
new file mode 100644
index 0000000..4c7cb2d
--- /dev/null
+++ b/ctdb/database/database_conf.c
@@ -0,0 +1,165 @@
+/*
+ CTDB database config handling
+
+ Copyright (C) Martin Schwenke 2018
+
+ database_conf_validate_lock_debug_script() based on
+ event_conf_validatye_debug_script():
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/dir.h"
+
+#include "lib/util/debug.h"
+#include "lib/util/samba_util.h"
+
+#include "common/conf.h"
+#include "common/path.h"
+
+#include "database_conf.h"
+
+#define DATABASE_CONF_VOLATILE_DB_DIR_DEFAULT CTDB_VARDIR "/volatile"
+#define DATABASE_CONF_PERSISTENT_DB_DIR_DEFAULT CTDB_VARDIR "/persistent"
+#define DATABASE_CONF_STATE_DB_DIR_DEFAULT CTDB_VARDIR "/state"
+
+static bool check_static_string_change(const char *key,
+ const char *old_value,
+ const char *new_value,
+ enum conf_update_mode mode)
+{
+ if (mode == CONF_MODE_RELOAD) {
+ if (strcmp(old_value, new_value) != 0) {
+ D_WARNING("Ignoring update of [%s] -> %s\n",
+ DATABASE_CONF_SECTION,
+ key);
+ }
+ }
+
+ return true;
+}
+
+static bool check_static_boolean_change(const char *key,
+ bool old_value,
+ bool new_value,
+ enum conf_update_mode mode)
+{
+ if (mode == CONF_MODE_RELOAD || CONF_MODE_API) {
+ if (old_value != new_value) {
+ D_WARNING("Ignoring update of [%s] -> %s\n",
+ DATABASE_CONF_SECTION,
+ key);
+ }
+ }
+
+ return true;
+}
+
+static bool database_conf_validate_lock_debug_script(const char *key,
+ const char *old_script,
+ const char *new_script,
+ enum conf_update_mode mode)
+{
+ char script[PATH_MAX];
+ char script_path[PATH_MAX];
+ struct stat st;
+ size_t len;
+ int ret;
+
+ if (new_script == NULL) {
+ return true;
+ }
+
+ len = strlcpy(script, new_script, sizeof(script));
+ if (len >= sizeof(script)) {
+ D_ERR("lock debug script name too long\n");
+ return false;
+ }
+
+ ret = snprintf(script_path,
+ sizeof(script_path),
+ "%s/%s",
+ path_etcdir(),
+ basename(script));
+ if (ret < 0 || (size_t)ret >= sizeof(script_path)) {
+ D_ERR("lock debug script path too long\n");
+ return false;
+ }
+
+ ret = stat(script_path, &st);
+ if (ret == -1) {
+ D_ERR("lock debug script %s does not exist\n", script_path);
+ return false;
+ }
+
+ if (! S_ISREG(st.st_mode)) {
+ D_ERR("lock debug script %s is not a file\n", script_path);
+ return false;
+ }
+ if (! (st.st_mode & S_IXUSR)) {
+ D_ERR("lock debug script %s is not executable\n", script_path);
+ return false;
+ }
+
+ return true;
+}
+
+static bool database_conf_validate_db_dir(const char *key,
+ const char *old_dir,
+ const char *new_dir,
+ enum conf_update_mode mode)
+{
+ if (! directory_exist(new_dir)) {
+ D_ERR("%s \"%s\" does not exist\n", key, new_dir);
+ return false;
+ }
+
+ /* This sometimes warns but always returns true */
+ return check_static_string_change(key, old_dir, new_dir, mode);
+}
+
+void database_conf_init(struct conf_context *conf)
+{
+ conf_define_section(conf, DATABASE_CONF_SECTION, NULL);
+
+ conf_define_string(conf,
+ DATABASE_CONF_SECTION,
+ DATABASE_CONF_VOLATILE_DB_DIR,
+ DATABASE_CONF_VOLATILE_DB_DIR_DEFAULT,
+ database_conf_validate_db_dir);
+ conf_define_string(conf,
+ DATABASE_CONF_SECTION,
+ DATABASE_CONF_PERSISTENT_DB_DIR,
+ DATABASE_CONF_PERSISTENT_DB_DIR_DEFAULT,
+ database_conf_validate_db_dir);
+ conf_define_string(conf,
+ DATABASE_CONF_SECTION,
+ DATABASE_CONF_STATE_DB_DIR,
+ DATABASE_CONF_STATE_DB_DIR_DEFAULT,
+ database_conf_validate_db_dir);
+ conf_define_string(conf,
+ DATABASE_CONF_SECTION,
+ DATABASE_CONF_LOCK_DEBUG_SCRIPT,
+ NULL,
+ database_conf_validate_lock_debug_script);
+ conf_define_boolean(conf,
+ DATABASE_CONF_SECTION,
+ DATABASE_CONF_TDB_MUTEXES,
+ true,
+ check_static_boolean_change);
+}
diff --git a/ctdb/database/database_conf.h b/ctdb/database/database_conf.h
new file mode 100644
index 0000000..6fa579c
--- /dev/null
+++ b/ctdb/database/database_conf.h
@@ -0,0 +1,35 @@
+/*
+ CTDB database config handling
+
+ Copyright (C) Martin Schwenke 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_DATABASE_CONF_H__
+#define __CTDB_DATABASE_CONF_H__
+
+#include "common/conf.h"
+
+#define DATABASE_CONF_SECTION "database"
+
+#define DATABASE_CONF_VOLATILE_DB_DIR "volatile database directory"
+#define DATABASE_CONF_PERSISTENT_DB_DIR "persistent database directory"
+#define DATABASE_CONF_STATE_DB_DIR "state database directory"
+#define DATABASE_CONF_LOCK_DEBUG_SCRIPT "lock debug script"
+#define DATABASE_CONF_TDB_MUTEXES "tdb mutexes"
+
+void database_conf_init(struct conf_context *conf);
+
+#endif /* __CTDB_DATABASE_CONF_H__ */
diff --git a/ctdb/doc/cluster_mutex_helper.txt b/ctdb/doc/cluster_mutex_helper.txt
new file mode 100644
index 0000000..4ee018f
--- /dev/null
+++ b/ctdb/doc/cluster_mutex_helper.txt
@@ -0,0 +1,80 @@
+Writing CTDB cluster mutex helpers
+==================================
+
+CTDB uses cluster-wide mutexes to protect against a "split brain",
+which could occur if the cluster becomes partitioned due to network
+failure or similar.
+
+CTDB uses a cluster-wide mutex for its "cluster lock", which is used
+to ensure that only one database recovery can happen at a time. For
+an overview of cluster lock configuration see the CLUSTER LOCK
+section in ctdb(7). CTDB tries to ensure correct operation of the
+cluster lock by attempting to take the cluster lock when CTDB knows
+that it should already be held.
+
+By default, CTDB uses a supplied mutex helper that uses a fcntl(2)
+lock on a specified file in the cluster filesystem.
+
+However, a user supplied mutex helper can be used as an alternative.
+The rest of this document describes the API for mutex helpers.
+
+A mutex helper is an external executable
+----------------------------------------
+
+A mutex helper is an external executable that can be run by CTDB.
+There are no CTDB-specific compilation dependencies. This means that
+a helper could easily be scripted around existing commands. Mutex
+helpers are run relatively rarely and are not time critical.
+Therefore, reliability is preferred over high performance.
+
+Taking a mutex with a helper
+----------------------------
+
+1. Helper is executed with helper-specific arguments
+
+2. Helper attempts to take mutex
+
+3. On success, the helper writes ASCII 0 to standard output
+
+4. Helper stays running, holding mutex, awaiting termination by CTDB
+
+5. When a helper receives SIGTERM it must release any mutex it is
+ holding and then exit.
+
+Status codes
+------------
+
+CTDB ignores the exit code of a helper. Instead, CTDB reacts to a
+single ASCII character that is sent to it via a helper's standard
+output.
+
+Valid status codes are:
+
+0 - The helper took the mutex and is holding it, awaiting termination.
+
+1 - The helper was unable to take the mutex due to contention.
+
+2 - The helper took too long to take the mutex.
+
+ Helpers do not need to implement this status code. CTDB
+ already implements any required timeout handling.
+
+3 - An unexpected error occurred.
+
+If a 0 status code is sent then it the helper should periodically
+check if the (original) parent processes still exists while awaiting
+termination. If the parent process disappears then the helper should
+release the mutex and exit. This avoids stale mutexes. Note that a
+helper should never wait for parent process ID 1!
+
+If a non-0 status code is sent then the helper can exit immediately.
+However, if the helper does not exit then it must terminate if it
+receives SIGTERM.
+
+Logging
+-------
+
+Anything written to standard error by a helper is incorporated into
+CTDB's logs. A helper should generally only output to stderr for
+unexpected errors and avoid output to stderr on success or on mutex
+contention.
diff --git a/ctdb/doc/ctdb-etcd.7.xml b/ctdb/doc/ctdb-etcd.7.xml
new file mode 100644
index 0000000..af343db
--- /dev/null
+++ b/ctdb/doc/ctdb-etcd.7.xml
@@ -0,0 +1,119 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE refentry
+ PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+ "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<refentry id="ctdb-etcd.7">
+
+ <refentryinfo>
+ <author>
+ <contrib>
+ This documentation was written by
+ Jose A. Rivera
+ </contrib>
+ </author>
+
+ <copyright>
+ <year>2016</year>
+ <holder>Jose A. Rivera</holder>
+ </copyright>
+ <legalnotice>
+ <para>
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 3 of
+ the License, or (at your option) any later version.
+ </para>
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ PURPOSE. See the GNU General Public License for more details.
+ </para>
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, see
+ <ulink url="http://www.gnu.org/licenses"/>.
+ </para>
+ </legalnotice>
+ </refentryinfo>
+
+ <refmeta>
+ <refentrytitle>ctdb-etcd</refentrytitle>
+ <manvolnum>7</manvolnum>
+ <refmiscinfo class="source">ctdb</refmiscinfo>
+ <refmiscinfo class="manual">CTDB - clustered TDB database</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+ <refname>ctdb-etcd</refname>
+ <refpurpose>CTDB etcd integration</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <cmdsynopsis>
+ <command>ctdb_etcd_lock</command>
+ </cmdsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>DESCRIPTION</title>
+ <para>
+ ctdb_etcd_lock is intended to be run as a mutex helper for CTDB. It
+ will try to connect to an existing etcd cluster and grab a lock in that
+ cluster to function as CTDB's cluster lock. Please see
+ <emphasis>ctdb/doc/cluster_mutex_helper.txt</emphasis> for details on
+ the mutex helper API. To use this, include the following line in
+ the <literal>[cluster]</literal> section of
+ <citerefentry><refentrytitle>ctdb.conf</refentrytitle>
+ <manvolnum>5</manvolnum></citerefentry>:
+ </para>
+ <screen format="linespecific">
+cluster lock = !/usr/local/usr/libexec/ctdb/ctdb_etcd_lock
+ </screen>
+ <para>
+ You can also pass "-v", "-vv", or "-vvv" to include verbose output in
+ the CTDB log. Additional "v"s indicate increases in verbosity.
+ </para>
+ <para>
+ This mutex helper expects the system Python interpreter to have access
+ to the etcd Python module. It also expects an etcd cluster to be
+ configured and running. To integrate with this, there is an optional
+ config file of the following format:
+ </para>
+ <screen format="linespecific">
+key = value
+ </screen>
+ <para>
+ The following configuration parameters (and their defaults) are defined
+ for use by ctdb_etcd_lock:
+ </para>
+ <screen format="linespecific">
+port = 2379 # connecting port for the etcd cluster
+lock_ttl = 9 # seconds for TTL
+refresh = 2 # seconds between attempts to maintain lock
+locks_dir = _ctdb # where to store CTDB locks in etcd
+ # The final etcd directory for any given lock looks like:
+ # /_locks/{locks_dir}/{netbios name}/
+ </screen>
+ <para>
+ In addition, any keyword parameter that can be used to configure an
+ etcd client may be specified and modified here. For more documentation
+ on these parameters, see here: https://github.com/jplana/python-etcd/
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>SEE ALSO</title>
+ <para>
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>ctdbd</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry>,
+
+ <ulink url="http://ctdb.samba.org/"/>
+ </para>
+ </refsect1>
+
+
+</refentry>
diff --git a/ctdb/doc/ctdb-script.options.5.xml b/ctdb/doc/ctdb-script.options.5.xml
new file mode 100644
index 0000000..a01b10a
--- /dev/null
+++ b/ctdb/doc/ctdb-script.options.5.xml
@@ -0,0 +1,1137 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE refentry
+ PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+ "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+
+<refentry id="ctdb-script.options.5">
+
+ <refmeta>
+ <refentrytitle>ctdb-script.options</refentrytitle>
+ <manvolnum>5</manvolnum>
+ <refmiscinfo class="source">ctdb</refmiscinfo>
+ <refmiscinfo class="manual">CTDB - clustered TDB database</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+ <refname>ctdb-script.options</refname>
+ <refpurpose>CTDB scripts configuration files</refpurpose>
+ </refnamediv>
+
+ <refsect1>
+ <title>DESCRIPTION</title>
+
+ <refsect2>
+ <title>Location</title>
+ <para>
+ Each CTDB script has 2 possible locations for its configuration options:
+ </para>
+
+ <variablelist>
+
+ <varlistentry>
+ <term>
+ <filename>/usr/local/etc/ctdb/script.options</filename>
+ </term>
+ <listitem>
+ <para>
+ This is a catch-all global file for general purpose
+ scripts and for options that are used in multiple event
+ scripts.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>
+ <parameter>SCRIPT</parameter>.options
+ </term>
+ <listitem>
+ <para>
+ That is, options for
+ <filename><parameter>SCRIPT</parameter></filename> are
+ placed in a file alongside the script, with a ".script"
+ suffix added. This style is usually recommended for event
+ scripts.
+ </para>
+
+ <para>
+ Options in this script-specific file override those in
+ the global file.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+ </refsect2>
+
+ <refsect2>
+ <title>Contents</title>
+
+ <para>
+ These files should include simple shell-style variable
+ assignments and shell-style comments.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>Monitoring Thresholds</title>
+
+ <para>
+ Event scripts can monitor resources or services. When a
+ problem is detected, it may be better to warn about a problem
+ rather than to immediately fail monitoring and mark a node as
+ unhealthy. CTDB provides support for event scripts to do
+ threshold-based monitoring.
+ </para>
+
+ <para>
+ A threshold setting looks like
+ <parameter>WARNING_THRESHOLD<optional>:ERROR_THRESHOLD</optional></parameter>.
+ If the number of problems is ≥ WARNING_THRESHOLD then the
+ script will log a warning and continue. If the number
+ problems is ≥ ERROR_THRESHOLD then the script will log an
+ error and exit with failure, causing monitoring to fail. Note
+ that ERROR_THRESHOLD is optional, and follows the optional
+ colon (:) separator.
+ </para>
+ </refsect2>
+
+ </refsect1>
+
+ <refsect1>
+ <title>NETWORK CONFIGURATION</title>
+
+ <refsect2>
+ <title>10.interface</title>
+
+ <para>
+ This event script handles monitoring of interfaces using by
+ public IP addresses.
+ </para>
+
+ <variablelist>
+
+ <varlistentry>
+ <term>
+ CTDB_PARTIALLY_ONLINE_INTERFACES=yes|no
+ </term>
+ <listitem>
+ <para>
+ Whether one or more offline interfaces should cause a
+ monitor event to fail if there are other interfaces that
+ are up. If this is "yes" and a node has some interfaces
+ that are down then <command>ctdb status</command> will
+ display the node as "PARTIALLYONLINE".
+ </para>
+
+ <para>
+ Note that CTDB_PARTIALLY_ONLINE_INTERFACES=yes is not
+ generally compatible with NAT gateway or LVS. NAT
+ gateway relies on the interface configured by
+ CTDB_NATGW_PUBLIC_IFACE to be up and LVS replies on
+ CTDB_LVS_PUBLIC_IFACE to be up. CTDB does not check if
+ these options are set in an incompatible way so care is
+ needed to understand the interaction.
+ </para>
+
+ <para>
+ Default is "no".
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+ </refsect2>
+
+ <refsect2>
+ <title>11.natgw</title>
+
+ <para>
+ Provides CTDB's NAT gateway functionality.
+ </para>
+
+ <para>
+ NAT gateway is used to configure fallback routing for nodes
+ when they do not host any public IP addresses. For example,
+ it allows unhealthy nodes to reliably communicate with
+ external infrastructure. One node in a NAT gateway group will
+ be designated as the NAT gateway leader node and other (follower)
+ nodes will be configured with fallback routes via the NAT
+ gateway leader node. For more information, see the
+ <citetitle>NAT GATEWAY</citetitle> section in
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry>.
+ </para>
+
+ <variablelist>
+
+ <varlistentry>
+ <term>CTDB_NATGW_DEFAULT_GATEWAY=<parameter>IPADDR</parameter></term>
+ <listitem>
+ <para>
+ IPADDR is an alternate network gateway to use on the NAT
+ gateway leader node. If set, a fallback default route
+ is added via this network gateway.
+ </para>
+ <para>
+ No default. Setting this variable is optional - if not
+ set that no route is created on the NAT gateway leader
+ node.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>CTDB_NATGW_NODES=<parameter>FILENAME</parameter></term>
+ <listitem>
+ <para>
+ FILENAME contains the list of nodes that belong to the
+ same NAT gateway group.
+ </para>
+ <para>
+ File format:
+ <screen>
+<parameter>IPADDR</parameter> <optional>follower-only</optional>
+ </screen>
+ </para>
+ <para>
+ IPADDR is the private IP address of each node in the NAT
+ gateway group.
+ </para>
+ <para>
+ If "follower-only" is specified then the corresponding node
+ can not be the NAT gateway leader node. In this case
+ <varname>CTDB_NATGW_PUBLIC_IFACE</varname> and
+ <varname>CTDB_NATGW_PUBLIC_IP</varname> are optional and
+ unused.
+ </para>
+ <para>
+ No default, usually
+ <filename>/usr/local/etc/ctdb/natgw_nodes</filename> when enabled.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>CTDB_NATGW_PRIVATE_NETWORK=<parameter>IPADDR/MASK</parameter></term>
+ <listitem>
+ <para>
+ IPADDR/MASK is the private sub-network that is
+ internally routed via the NAT gateway leader node. This
+ is usually the private network that is used for node
+ addresses.
+ </para>
+ <para>
+ No default.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>CTDB_NATGW_PUBLIC_IFACE=<parameter>IFACE</parameter></term>
+ <listitem>
+ <para>
+ IFACE is the network interface on which the
+ CTDB_NATGW_PUBLIC_IP will be configured.
+ </para>
+ <para>
+ No default.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>CTDB_NATGW_PUBLIC_IP=<parameter>IPADDR/MASK</parameter></term>
+ <listitem>
+ <para>
+ IPADDR/MASK indicates the IP address that is used for
+ outgoing traffic (originating from
+ CTDB_NATGW_PRIVATE_NETWORK) on the NAT gateway leader
+ node. This <emphasis>must not</emphasis> be a
+ configured public IP address.
+ </para>
+ <para>
+ No default.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>CTDB_NATGW_STATIC_ROUTES=<parameter>IPADDR/MASK[@GATEWAY]</parameter> ...</term>
+ <listitem>
+ <para>
+ Each IPADDR/MASK identifies a network or host to which
+ NATGW should create a fallback route, instead of
+ creating a single default route. This can be used when
+ there is already a default route, via an interface that
+ can not reach required infrastructure, that overrides
+ the NAT gateway default route.
+ </para>
+ <para>
+ If GATEWAY is specified then the corresponding route on
+ the NATGW leader node will be via GATEWAY. Such routes
+ are created even if
+ <varname>CTDB_NATGW_DEFAULT_GATEWAY</varname> is not
+ specified. If GATEWAY is not specified for some
+ networks then routes are only created on the NATGW
+ leader node for those networks if
+ <varname>CTDB_NATGW_DEFAULT_GATEWAY</varname> is
+ specified.
+ </para>
+ <para>
+ This should be used with care to avoid causing traffic
+ to unnecessarily double-hop through the NAT gateway
+ leader, even when a node is hosting public IP addresses.
+ Each specified network or host should probably have a
+ corresponding automatically created link route or static
+ route to avoid this.
+ </para>
+ <para>
+ No default.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+
+ <refsect3>
+ <title>Example</title>
+ <screen>
+CTDB_NATGW_NODES=/usr/local/etc/ctdb/natgw_nodes
+CTDB_NATGW_PRIVATE_NETWORK=192.168.1.0/24
+CTDB_NATGW_DEFAULT_GATEWAY=10.0.0.1
+CTDB_NATGW_PUBLIC_IP=10.0.0.227/24
+CTDB_NATGW_PUBLIC_IFACE=eth0
+ </screen>
+
+ <para>
+ A variation that ensures that infrastructure (ADS, DNS, ...)
+ directly attached to the public network (10.0.0.0/24) is
+ always reachable would look like this:
+ </para>
+ <screen>
+CTDB_NATGW_NODES=/usr/local/etc/ctdb/natgw_nodes
+CTDB_NATGW_PRIVATE_NETWORK=192.168.1.0/24
+CTDB_NATGW_PUBLIC_IP=10.0.0.227/24
+CTDB_NATGW_PUBLIC_IFACE=eth0
+CTDB_NATGW_STATIC_ROUTES=10.0.0.0/24
+ </screen>
+ <para>
+ Note that <varname>CTDB_NATGW_DEFAULT_GATEWAY</varname> is
+ not specified.
+ </para>
+ </refsect3>
+
+ </refsect2>
+
+ <refsect2>
+ <title>13.per_ip_routing</title>
+
+ <para>
+ Provides CTDB's policy routing functionality.
+ </para>
+
+ <para>
+ A node running CTDB may be a component of a complex network
+ topology. In particular, public addresses may be spread
+ across several different networks (or VLANs) and it may not be
+ possible to route packets from these public addresses via the
+ system's default route. Therefore, CTDB has support for
+ policy routing via the <filename>13.per_ip_routing</filename>
+ eventscript. This allows routing to be specified for packets
+ sourced from each public address. The routes are added and
+ removed as CTDB moves public addresses between nodes.
+ </para>
+
+ <para>
+ For more information, see the <citetitle>POLICY
+ ROUTING</citetitle> section in
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry>.
+ </para>
+
+ <variablelist>
+ <varlistentry>
+ <term>CTDB_PER_IP_ROUTING_CONF=<parameter>FILENAME</parameter></term>
+ <listitem>
+ <para>
+ FILENAME contains elements for constructing the desired
+ routes for each source address.
+ </para>
+
+ <para>
+ The special FILENAME value
+ <constant>__auto_link_local__</constant> indicates that no
+ configuration file is provided and that CTDB should
+ generate reasonable link-local routes for each public IP
+ address.
+ </para>
+
+ <para>
+ File format:
+ <screen>
+ <parameter>IPADDR</parameter> <parameter>DEST-IPADDR/MASK</parameter> <optional><parameter>GATEWAY-IPADDR</parameter></optional>
+ </screen>
+ </para>
+
+ <para>
+ No default, usually
+ <filename>/usr/local/etc/ctdb/policy_routing</filename>
+ when enabled.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>
+ CTDB_PER_IP_ROUTING_RULE_PREF=<parameter>NUM</parameter>
+ </term>
+ <listitem>
+ <para>
+ NUM sets the priority (or preference) for the routing
+ rules that are added by CTDB.
+ </para>
+
+ <para>
+ This should be (strictly) greater than 0 and (strictly)
+ less than 32766. A priority of 100 is recommended, unless
+ this conflicts with a priority already in use on the
+ system. See
+ <citerefentry><refentrytitle>ip</refentrytitle>
+ <manvolnum>8</manvolnum></citerefentry>, for more details.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>
+ CTDB_PER_IP_ROUTING_TABLE_ID_LOW=<parameter>LOW-NUM</parameter>,
+ CTDB_PER_IP_ROUTING_TABLE_ID_HIGH=<parameter>HIGH-NUM</parameter>
+ </term>
+ <listitem>
+ <para>
+ CTDB determines a unique routing table number to use for
+ the routing related to each public address. LOW-NUM and
+ HIGH-NUM indicate the minimum and maximum routing table
+ numbers that are used.
+ </para>
+
+ <para>
+ <citerefentry><refentrytitle>ip</refentrytitle>
+ <manvolnum>8</manvolnum></citerefentry> uses some
+ reserved routing table numbers below 255. Therefore,
+ CTDB_PER_IP_ROUTING_TABLE_ID_LOW should be (strictly)
+ greater than 255.
+ </para>
+
+ <para>
+ CTDB uses the standard file
+ <filename>/etc/iproute2/rt_tables</filename> to maintain
+ a mapping between the routing table numbers and labels.
+ The label for a public address
+ <replaceable>ADDR</replaceable> will look like
+ ctdb.<replaceable>addr</replaceable>. This means that
+ the associated rules and routes are easy to read (and
+ manipulate).
+ </para>
+
+ <para>
+ No default, usually 1000 and 9000.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+
+ <refsect3>
+ <title>Example</title>
+ <screen>
+CTDB_PER_IP_ROUTING_CONF=/usr/local/etc/ctdb/policy_routing
+CTDB_PER_IP_ROUTING_RULE_PREF=100
+CTDB_PER_IP_ROUTING_TABLE_ID_LOW=1000
+CTDB_PER_IP_ROUTING_TABLE_ID_HIGH=9000
+ </screen>
+ </refsect3>
+
+ </refsect2>
+
+ <refsect2>
+ <title>91.lvs</title>
+
+ <para>
+ Provides CTDB's LVS functionality.
+ </para>
+
+ <para>
+ For a general description see the <citetitle>LVS</citetitle>
+ section in <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry>.
+ </para>
+
+ <variablelist>
+
+ <varlistentry>
+ <term>
+ CTDB_LVS_NODES=<parameter>FILENAME</parameter>
+ </term>
+ <listitem>
+ <para>
+ FILENAME contains the list of nodes that belong to the
+ same LVS group.
+ </para>
+ <para>
+ File format:
+ <screen>
+<parameter>IPADDR</parameter> <optional>follower-only</optional>
+ </screen>
+ </para>
+ <para>
+ IPADDR is the private IP address of each node in the LVS
+ group.
+ </para>
+ <para>
+ If "follower-only" is specified then the corresponding node
+ can not be the LVS leader node. In this case
+ <varname>CTDB_LVS_PUBLIC_IFACE</varname> and
+ <varname>CTDB_LVS_PUBLIC_IP</varname> are optional and
+ unused.
+ </para>
+ <para>
+ No default, usually
+ <filename>/usr/local/etc/ctdb/lvs_nodes</filename> when enabled.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>
+ CTDB_LVS_PUBLIC_IFACE=<parameter>INTERFACE</parameter>
+ </term>
+ <listitem>
+ <para>
+ INTERFACE is the network interface that clients will use
+ to connection to <varname>CTDB_LVS_PUBLIC_IP</varname>.
+ This is optional for follower-only nodes.
+ No default.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>
+ CTDB_LVS_PUBLIC_IP=<parameter>IPADDR</parameter>
+ </term>
+ <listitem>
+ <para>
+ CTDB_LVS_PUBLIC_IP is the LVS public address. No
+ default.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+ </refsect2>
+
+ </refsect1>
+
+ <refsect1>
+ <title>SERVICE CONFIGURATION</title>
+
+ <para>
+ CTDB can be configured to manage and/or monitor various NAS (and
+ other) services via its eventscripts.
+ </para>
+
+ <para>
+ In the simplest case CTDB will manage a service. This means the
+ service will be started and stopped along with CTDB, CTDB will
+ monitor the service and CTDB will do any required
+ reconfiguration of the service when public IP addresses are
+ failed over.
+ </para>
+
+ <refsect2>
+ <title>20.multipathd</title>
+
+ <para>
+ Provides CTDB's Linux multipathd service management.
+ </para>
+
+ <para>
+ It can monitor multipath devices to ensure that active paths
+ are available.
+ </para>
+
+ <variablelist>
+ <varlistentry>
+ <term>
+ CTDB_MONITOR_MPDEVICES=<parameter>MP-DEVICE-LIST</parameter>
+ </term>
+ <listitem>
+ <para>
+ MP-DEVICE-LIST is a list of multipath devices for CTDB to monitor?
+ </para>
+ <para>
+ No default.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect2>
+
+ <refsect2>
+ <title>31.clamd</title>
+
+ <para>
+ This event script provide CTDB's ClamAV anti-virus service
+ management.
+ </para>
+
+ <para>
+ This eventscript is not enabled by default. Use <command>ctdb
+ enablescript</command> to enable it.
+ </para>
+
+ <variablelist>
+
+ <varlistentry>
+ <term>
+ CTDB_CLAMD_SOCKET=<parameter>FILENAME</parameter>
+ </term>
+ <listitem>
+ <para>
+ FILENAME is the socket to monitor ClamAV.
+ </para>
+ <para>
+ No default.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+
+ </refsect2>
+
+ <refsect2>
+ <title>40.vsftpd</title>
+
+ <para>
+ Provides CTDB's vsftpd service management.
+ </para>
+
+ <variablelist>
+ <varlistentry>
+ <term>
+ CTDB_VSFTPD_MONITOR_THRESHOLDS=<parameter>THRESHOLDS</parameter>
+ </term>
+ <listitem>
+ <para>
+ THRESHOLDS indicates how many consecutive monitoring
+ attempts need to report that vsftpd is not listening on
+ TCP port 21 before a warning is logged and before
+ monitoring fails. See the <citetitle>Monitoring
+ Thresholds</citetitle> for a description of how
+ monitoring thresholds work.
+ </para>
+ <para>
+ Default is 1:2.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+
+ </refsect2>
+
+ <refsect2>
+ <title>48.netbios</title>
+
+ <para>
+ Provides CTDB's NetBIOS service management.
+ </para>
+
+ <variablelist>
+ <varlistentry>
+ <term>
+ CTDB_SERVICE_NMB=<parameter>SERVICE</parameter>
+ </term>
+ <listitem>
+ <para>
+ Distribution specific SERVICE for managing nmbd.
+ </para>
+ <para>
+ Default is distribution-dependant.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+
+ </refsect2>
+
+ <refsect2>
+ <title>49.winbind</title>
+
+ <para>
+ Provides CTDB's Samba winbind service management.
+ </para>
+
+ <variablelist>
+
+ <varlistentry>
+ <term>
+ CTDB_SERVICE_WINBIND=<parameter>SERVICE</parameter>
+ </term>
+ <listitem>
+ <para>
+ Distribution specific SERVICE for managing winbindd.
+ </para>
+ <para>
+ Default is "winbind".
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+
+ </refsect2>
+
+ <refsect2>
+ <title>50.samba</title>
+
+ <para>
+ Provides the core of CTDB's Samba file service management.
+ </para>
+
+ <variablelist>
+
+ <varlistentry>
+ <term>
+ CTDB_SAMBA_CHECK_PORTS=<parameter>PORT-LIST</parameter>
+ </term>
+ <listitem>
+ <para>
+ When monitoring Samba, check TCP ports in
+ space-separated PORT-LIST.
+ </para>
+ <para>
+ Default is to monitor ports that Samba is configured to listen on.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>
+ CTDB_SAMBA_SKIP_SHARE_CHECK=yes|no
+ </term>
+ <listitem>
+ <para>
+ As part of monitoring, should CTDB skip the check for
+ the existence of each directory configured as share in
+ Samba. This may be desirable if there is a large number
+ of shares.
+ </para>
+ <para>
+ Default is no.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>
+ CTDB_SERVICE_SMB=<parameter>SERVICE</parameter>
+ </term>
+ <listitem>
+ <para>
+ Distribution specific SERVICE for managing smbd.
+ </para>
+ <para>
+ Default is distribution-dependant.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+
+ </refsect2>
+
+ <refsect2>
+ <title>60.nfs</title>
+
+ <para>
+ This event script (along with 06.nfs) provides CTDB's NFS
+ service management.
+ </para>
+
+ <para>
+ This includes parameters for the kernel NFS server.
+ Alternative NFS subsystems (such as <ulink
+ url="https://github.com/nfs-ganesha/nfs-ganesha/wiki">NFS-Ganesha</ulink>)
+ can be integrated using <varname>CTDB_NFS_CALLOUT</varname>.
+ </para>
+
+ <variablelist>
+
+ <varlistentry>
+ <term>
+ CTDB_NFS_CALLOUT=<parameter>COMMAND</parameter>
+ </term>
+ <listitem>
+ <para>
+ COMMAND specifies the path to a callout to handle
+ interactions with the configured NFS system, including
+ startup, shutdown, monitoring.
+ </para>
+ <para>
+ Default is the included
+ <command>nfs-linux-kernel-callout</command>.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>
+ CTDB_NFS_CHECKS_DIR=<parameter>DIRECTORY</parameter>
+ </term>
+ <listitem>
+ <para>
+ Specifies the path to a DIRECTORY containing files that
+ describe how to monitor the responsiveness of NFS RPC
+ services. See the README file for this directory for an
+ explanation of the contents of these "check" files.
+ </para>
+ <para>
+ CTDB_NFS_CHECKS_DIR can be used to point to different
+ sets of checks for different NFS servers.
+ </para>
+ <para>
+ One way of using this is to have it point to, say,
+ <filename>/usr/local/etc/ctdb/nfs-checks-enabled.d</filename>
+ and populate it with symbolic links to the desired check
+ files. This avoids duplication and is upgrade-safe.
+ </para>
+ <para>
+ Default is
+ <filename>/usr/local/etc/ctdb/nfs-checks.d</filename>,
+ which contains NFS RPC checks suitable for Linux kernel
+ NFS.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>
+ CTDB_NFS_SKIP_SHARE_CHECK=yes|no
+ </term>
+ <listitem>
+ <para>
+ As part of monitoring, should CTDB skip the check for
+ the existence of each directory exported via NFS. This
+ may be desirable if there is a large number of exports.
+ </para>
+ <para>
+ Default is no.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>
+ CTDB_RPCINFO_LOCALHOST=<parameter>IPADDR</parameter>|<parameter>HOSTNAME</parameter>
+ </term>
+ <listitem>
+ <para>
+ IPADDR or HOSTNAME indicates the address that
+ <command>rpcinfo</command> should connect to when doing
+ <command>rpcinfo</command> check on IPv4 RPC service during
+ monitoring. Optimally this would be "localhost".
+ However, this can add some performance overheads.
+ </para>
+ <para>
+ Default is "127.0.0.1".
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>
+ CTDB_RPCINFO_LOCALHOST6=<parameter>IPADDR</parameter>|<parameter>HOSTNAME</parameter>
+ </term>
+ <listitem>
+ <para>
+ IPADDR or HOSTNAME indicates the address that
+ <command>rpcinfo</command> should connect to when doing
+ <command>rpcinfo</command> check on IPv6 RPC service
+ during monitoring. Optimally this would be "localhost6"
+ (or similar). However, this can add some performance
+ overheads.
+ </para>
+ <para>
+ Default is "::1".
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>
+ CTDB_NFS_STATE_FS_TYPE=<parameter>TYPE</parameter>
+ </term>
+ <listitem>
+ <para>
+ The type of filesystem used for a clustered NFS' shared
+ state. No default.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>
+ CTDB_NFS_STATE_MNT=<parameter>DIR</parameter>
+ </term>
+ <listitem>
+ <para>
+ The directory where a clustered NFS' shared state will be
+ located. No default.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+
+ </refsect2>
+
+ <refsect2>
+ <title>70.iscsi</title>
+
+ <para>
+ Provides CTDB's Linux iSCSI tgtd service management.
+ </para>
+
+ <variablelist>
+
+ <varlistentry>
+ <term>
+ CTDB_START_ISCSI_SCRIPTS=<parameter>DIRECTORY</parameter>
+ </term>
+ <listitem>
+ <para>
+ DIRECTORY on shared storage containing scripts to start
+ tgtd for each public IP address.
+ </para>
+ <para>
+ No default.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect2>
+
+ </refsect1>
+
+ <refsect1>
+ <title>
+ DATABASE SETUP
+ </title>
+
+ <para>
+ CTDB checks the consistency of databases during startup.
+ </para>
+
+ <refsect2>
+ <title>00.ctdb</title>
+
+ <variablelist>
+
+ <varlistentry>
+ <term>CTDB_MAX_CORRUPT_DB_BACKUPS=<parameter>NUM</parameter></term>
+ <listitem>
+ <para>
+ NUM is the maximum number of volatile TDB database
+ backups to be kept (for each database) when a corrupt
+ database is found during startup. Volatile TDBs are
+ zeroed during startup so backups are needed to debug
+ any corruption that occurs before a restart.
+ </para>
+ <para>
+ Default is 10.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+ </refsect2>
+
+ </refsect1>
+
+ <refsect1>
+ <title>SYSTEM RESOURCE MONITORING</title>
+
+ <refsect2>
+ <title>
+ 05.system
+ </title>
+
+ <para>
+ Provides CTDB's filesystem and memory usage monitoring.
+ </para>
+
+ <para>
+ CTDB can experience seemingly random (performance and other)
+ issues if system resources become too constrained. Options in
+ this section can be enabled to allow certain system resources
+ to be checked. They allows warnings to be logged and nodes to
+ be marked unhealthy when system resource usage reaches the
+ configured thresholds.
+ </para>
+
+ <para>
+ Some checks are enabled by default. It is recommended that
+ these checks remain enabled or are augmented by extra checks.
+ There is no supported way of completely disabling the checks.
+ </para>
+
+ <variablelist>
+
+ <varlistentry>
+ <term>
+ CTDB_MONITOR_FILESYSTEM_USAGE=<parameter>FS-LIMIT-LIST</parameter>
+ </term>
+ <listitem>
+ <para>
+ FS-LIMIT-LIST is a space-separated list of
+ <parameter>FILESYSTEM</parameter>:<parameter>WARN_LIMIT</parameter><optional>:<parameter>UNHEALTHY_LIMIT</parameter></optional>
+ triples indicating that warnings should be logged if the
+ space used on FILESYSTEM reaches WARN_LIMIT%. If usage
+ reaches UNHEALTHY_LIMIT then the node should be flagged
+ unhealthy. Either WARN_LIMIT or UNHEALTHY_LIMIT may be
+ left blank, meaning that check will be omitted.
+ </para>
+
+ <para>
+ Default is to warn for each filesystem containing a
+ database directory
+ (<literal>volatile&nbsp;database&nbsp;directory</literal>,
+ <literal>persistent&nbsp;database&nbsp;directory</literal>,
+ <literal>state&nbsp;database&nbsp;directory</literal>)
+ with a threshold of 90%.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>
+ CTDB_MONITOR_MEMORY_USAGE=<parameter>MEM-LIMITS</parameter>
+ </term>
+ <listitem>
+ <para>
+ MEM-LIMITS takes the form
+ <parameter>WARN_LIMIT</parameter><optional>:<parameter>UNHEALTHY_LIMIT</parameter></optional>
+ indicating that warnings should be logged if memory
+ usage reaches WARN_LIMIT%. If usage reaches
+ UNHEALTHY_LIMIT then the node should be flagged
+ unhealthy. Either WARN_LIMIT or UNHEALTHY_LIMIT may be
+ left blank, meaning that check will be omitted.
+ </para>
+ <para>
+ Default is 80, so warnings will be logged when memory
+ usage reaches 80%.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+ </refsect2>
+
+ </refsect1>
+
+
+ <refsect1>
+ <title>EVENT SCRIPT DEBUGGING</title>
+
+ <refsect2>
+ <title>
+ debug-hung-script.sh
+ </title>
+
+ <variablelist>
+
+ <varlistentry>
+ <term>CTDB_DEBUG_HUNG_SCRIPT_STACKPAT=<parameter>REGEXP</parameter></term>
+ <listitem>
+ <para>
+ REGEXP specifies interesting processes for which stack
+ traces should be logged when debugging hung eventscripts
+ and those processes are matched in pstree output.
+ REGEXP is an extended regexp so choices are separated by
+ pipes ('|'). However, REGEXP should not contain
+ parentheses. See also the <citerefentry><refentrytitle>ctdb.conf</refentrytitle>
+ <manvolnum>5</manvolnum></citerefentry>
+ [event] "debug&nbsp;script" option.
+ </para>
+ <para>
+ Default is "exportfs|rpcinfo".
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+ </refsect2>
+
+ </refsect1>
+
+ <refsect1>
+ <title>FILES</title>
+
+ <simplelist>
+ <member><filename>/usr/local/etc/ctdb/script.options</filename></member>
+ </simplelist>
+ </refsect1>
+
+ <refsect1>
+ <title>SEE ALSO</title>
+ <para>
+ <citerefentry><refentrytitle>ctdbd</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry>,
+
+ <ulink url="http://ctdb.samba.org/"/>
+ </para>
+ </refsect1>
+
+ <refentryinfo>
+ <author>
+ <contrib>
+ This documentation was written by
+ Amitay Isaacs,
+ Martin Schwenke
+ </contrib>
+ </author>
+
+ <copyright>
+ <year>2007</year>
+ <holder>Andrew Tridgell</holder>
+ <holder>Ronnie Sahlberg</holder>
+ </copyright>
+ <legalnotice>
+ <para>
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 3 of
+ the License, or (at your option) any later version.
+ </para>
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ PURPOSE. See the GNU General Public License for more details.
+ </para>
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, see
+ <ulink url="http://www.gnu.org/licenses"/>.
+ </para>
+ </legalnotice>
+ </refentryinfo>
+
+</refentry>
diff --git a/ctdb/doc/ctdb-statistics.7.xml b/ctdb/doc/ctdb-statistics.7.xml
new file mode 100644
index 0000000..0d10484
--- /dev/null
+++ b/ctdb/doc/ctdb-statistics.7.xml
@@ -0,0 +1,689 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE refentry
+ PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+ "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+
+<refentry id="ctdb-statistics.7">
+
+ <refmeta>
+ <refentrytitle>ctdb-statistics</refentrytitle>
+ <manvolnum>7</manvolnum>
+ <refmiscinfo class="source">ctdb</refmiscinfo>
+ <refmiscinfo class="manual">CTDB - clustered TDB database</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+ <refname>ctdb-statistics</refname>
+ <refpurpose>CTDB statistics output</refpurpose>
+ </refnamediv>
+
+ <refsect1>
+ <title>OVERALL STATISTICS</title>
+
+ <para>
+ CTDB maintains information about various messages communicated
+ and some of the important operations per node. See the
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry> commands
+ <command>statistics</command> and <command>statisticsreset</command>
+ for displaying statistics.
+ </para>
+
+ <refsect2>
+ <title>Example: ctdb statistics</title>
+ <screen>
+CTDB version 1
+Current time of statistics : Fri Sep 12 13:32:32 2014
+Statistics collected since : (000 01:49:20) Fri Sep 12 11:43:12 2014
+ num_clients 6
+ frozen 0
+ recovering 0
+ num_recoveries 2
+ client_packets_sent 281293
+ client_packets_recv 296317
+ node_packets_sent 452387
+ node_packets_recv 182394
+ keepalive_packets_sent 3927
+ keepalive_packets_recv 3928
+ node
+ req_call 48605
+ reply_call 1
+ req_dmaster 23404
+ reply_dmaster 24917
+ reply_error 0
+ req_message 958
+ req_control 197513
+ reply_control 153705
+ client
+ req_call 130866
+ req_message 770
+ req_control 168921
+ timeouts
+ call 0
+ control 0
+ traverse 0
+ locks
+ num_calls 220
+ num_current 0
+ num_pending 0
+ num_failed 0
+ total_calls 130866
+ pending_calls 0
+ childwrite_calls 1
+ pending_childwrite_calls 0
+ memory_used 334490
+ max_hop_count 18
+ total_ro_delegations 2
+ total_ro_revokes 2
+ hop_count_buckets: 42816 5464 26 1 0 0 0 0 0 0 0 0 0 0 0 0
+ lock_buckets: 9 165 14 15 7 2 2 0 0 0 0 0 0 0 0 0
+ locks_latency MIN/AVG/MAX 0.000685/0.160302/6.369342 sec out of 214
+ reclock_ctdbd MIN/AVG/MAX 0.004940/0.004969/0.004998 sec out of 2
+ reclock_recd MIN/AVG/MAX 0.000000/0.000000/0.000000 sec out of 0
+ call_latency MIN/AVG/MAX 0.000006/0.000719/4.562991 sec out of 126626
+ childwrite_latency MIN/AVG/MAX 0.014527/0.014527/0.014527 sec out of 1
+ </screen>
+ </refsect2>
+
+ <refsect2>
+ <title>CTDB version</title>
+ <para>
+ Version of the ctdb protocol used by the node.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>Current time of statistics</title>
+ <para>
+ Time when the statistics are generated.
+ </para>
+ <para>
+ This is useful when collecting statistics output periodically
+ for post-processing.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>Statistics collected since</title>
+ <para>
+ Time when ctdb was started or the last time statistics was reset.
+ The output shows the duration and the timestamp.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>num_clients</title>
+ <para>
+ Number of processes currently connected to CTDB's unix socket.
+ This includes recovery daemon, ctdb tool and samba processes
+ (smbd, winbindd).
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>frozen</title>
+ <para>
+ 1 if the databases are currently frozen, 0 otherwise.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>recovering</title>
+ <para>
+ 1 if recovery is active, 0 otherwise.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>num_recoveries</title>
+ <para>
+ Number of recoveries since the start of ctdb or since the last
+ statistics reset.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>client_packets_sent</title>
+ <para>
+ Number of packets sent to client processes via unix domain socket.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>client_packets_recv</title>
+ <para>
+ Number of packets received from client processes via unix domain socket.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>node_packets_sent</title>
+ <para>
+ Number of packets sent to the other nodes in the cluster via TCP.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>node_packets_recv</title>
+ <para>
+ Number of packets received from the other nodes in the cluster via TCP.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>keepalive_packets_sent</title>
+ <para>
+ Number of keepalive messages sent to other nodes.
+ </para>
+ <para>
+ CTDB periodically sends keepalive messages to other nodes.
+ See <citetitle>KeepaliveInterval</citetitle> tunable in
+ <citerefentry><refentrytitle>ctdb-tunables</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry> for more details.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>keepalive_packets_recv</title>
+ <para>
+ Number of keepalive messages received from other nodes.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>node</title>
+ <para>
+ This section lists various types of messages processed which
+ originated from other nodes via TCP.
+ </para>
+
+ <refsect3>
+ <title>req_call</title>
+ <para>
+ Number of REQ_CALL messages from the other nodes.
+ </para>
+ </refsect3>
+
+ <refsect3>
+ <title>reply_call</title>
+ <para>
+ Number of REPLY_CALL messages from the other nodes.
+ </para>
+ </refsect3>
+
+ <refsect3>
+ <title>req_dmaster</title>
+ <para>
+ Number of REQ_DMASTER messages from the other nodes.
+ </para>
+ </refsect3>
+
+ <refsect3>
+ <title>reply_dmaster</title>
+ <para>
+ Number of REPLY_DMASTER messages from the other nodes.
+ </para>
+ </refsect3>
+
+ <refsect3>
+ <title>reply_error</title>
+ <para>
+ Number of REPLY_ERROR messages from the other nodes.
+ </para>
+ </refsect3>
+
+ <refsect3>
+ <title>req_message</title>
+ <para>
+ Number of REQ_MESSAGE messages from the other nodes.
+ </para>
+ </refsect3>
+
+ <refsect3>
+ <title>req_control</title>
+ <para>
+ Number of REQ_CONTROL messages from the other nodes.
+ </para>
+ </refsect3>
+
+ <refsect3>
+ <title>reply_control</title>
+ <para>
+ Number of REPLY_CONTROL messages from the other nodes.
+ </para>
+ </refsect3>
+
+ <refsect3>
+ <title>req_tunnel</title>
+ <para>
+ Number of REQ_TUNNEL messages from the other nodes.
+ </para>
+ </refsect3>
+
+ </refsect2>
+
+ <refsect2>
+ <title>client</title>
+ <para>
+ This section lists various types of messages processed which
+ originated from clients via unix domain socket.
+ </para>
+
+ <refsect3>
+ <title>req_call</title>
+ <para>
+ Number of REQ_CALL messages from the clients.
+ </para>
+ </refsect3>
+
+ <refsect3>
+ <title>req_message</title>
+ <para>
+ Number of REQ_MESSAGE messages from the clients.
+ </para>
+ </refsect3>
+
+ <refsect3>
+ <title>req_control</title>
+ <para>
+ Number of REQ_CONTROL messages from the clients.
+ </para>
+ </refsect3>
+
+ <refsect3>
+ <title>req_tunnel</title>
+ <para>
+ Number of REQ_TUNNEL messages from the clients.
+ </para>
+ </refsect3>
+
+ </refsect2>
+
+ <refsect2>
+ <title>timeouts</title>
+ <para>
+ This section lists timeouts occurred when sending various messages.
+ </para>
+
+ <refsect3>
+ <title>call</title>
+ <para>
+ Number of timeouts for REQ_CALL messages.
+ </para>
+ </refsect3>
+
+ <refsect3>
+ <title>control</title>
+ <para>
+ Number of timeouts for REQ_CONTROL messages.
+ </para>
+ </refsect3>
+
+ <refsect3>
+ <title>traverse</title>
+ <para>
+ Number of timeouts for database traverse operations.
+ </para>
+ </refsect3>
+ </refsect2>
+
+ <refsect2>
+ <title>locks</title>
+ <para>
+ This section lists locking statistics.
+ </para>
+
+ <refsect3>
+ <title>num_calls</title>
+ <para>
+ Number of completed lock calls. This includes database locks
+ and record locks.
+ </para>
+ </refsect3>
+
+ <refsect3>
+ <title>num_current</title>
+ <para>
+ Number of scheduled lock calls. This includes database locks
+ and record locks.
+ </para>
+ </refsect3>
+
+ <refsect3>
+ <title>num_pending</title>
+ <para>
+ Number of queued lock calls. This includes database locks and
+ record locks.
+ </para>
+ </refsect3>
+
+ <refsect3>
+ <title>num_failed</title>
+ <para>
+ Number of failed lock calls. This includes database locks and
+ record locks.
+ </para>
+ </refsect3>
+
+ </refsect2>
+
+ <refsect2>
+ <title>total_calls</title>
+ <para>
+ Number of req_call messages processed from clients. This number
+ should be same as client --> req_call.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>pending_calls</title>
+ <para>
+ Number of req_call messages which are currently being processed.
+ This number indicates the number of record migrations in flight.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>childwrite_calls</title>
+ <para>
+ Number of record update calls. Record update calls are used to
+ update a record under a transaction.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>pending_childwrite_calls</title>
+ <para>
+ Number of record update calls currently active.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>memory_used</title>
+ <para>
+ The amount of memory in bytes currently used by CTDB using
+ talloc. This includes all the memory used for CTDB's internal
+ data structures. This does not include the memory mapped TDB
+ databases.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>max_hop_count</title>
+ <para>
+ The maximum number of hops required for a record migration request
+ to obtain the record. High numbers indicate record contention.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>total_ro_delegations</title>
+ <para>
+ Number of readonly delegations created.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>total_ro_revokes</title>
+ <para>
+ Number of readonly delegations that were revoked. The difference
+ between total_ro_revokes and total_ro_delegations gives the
+ number of currently active readonly delegations.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>hop_count_buckets</title>
+ <para>
+ Distribution of migration requests based on hop counts values.
+ Buckets are 0, &lt;&nbsp;2, &lt;&nbsp;4, &lt;&nbsp;8,
+ &lt;&nbsp;16, &lt;&nbsp;32, &lt;&nbsp;64, &lt;&nbsp;128,
+ &lt;&nbsp;256, &lt;&nbsp;512, &lt;&nbsp;1024, &lt;&nbsp;2048,
+ &lt;&nbsp;4096, &lt;&nbsp;8192, &lt;&nbsp;16384, &ge;&nbsp;16384.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>lock_buckets</title>
+ <para>
+ Distribution of record lock requests based on time required to
+ obtain locks. Buckets are &lt;&nbsp;1ms, &lt;&nbsp;10ms,
+ &lt;&nbsp;100ms, &lt;&nbsp;1s, &lt;&nbsp;2s, &lt;&nbsp;4s,
+ &lt;&nbsp;8s, &lt;&nbsp;16s, &lt;&nbsp;32s, &lt;&nbsp;64s,
+ &ge;&nbsp;64s.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>locks_latency</title>
+ <para>
+ The minimum, the average and the maximum time (in seconds)
+ required to obtain record locks.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>reclock_ctdbd</title>
+ <para>
+ The minimum, the average and the maximum time (in seconds)
+ required to check if recovery lock is still held by recovery
+ daemon when recovery mode is changed. This check is done in ctdb daemon.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>reclock_recd</title>
+ <para>
+ The minimum, the average and the maximum time (in seconds)
+ required to check if recovery lock is still held by recovery
+ daemon during recovery. This check is done in recovery daemon.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>call_latency</title>
+ <para>
+ The minimum, the average and the maximum time (in seconds) required
+ to process a REQ_CALL message from client. This includes the time
+ required to migrate a record from remote node, if the record is
+ not available on the local node.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>childwrite_latency</title>
+ <para>Default: 0</para>
+ <para>
+ The minimum, the average and the maximum time (in seconds)
+ required to update records under a transaction.
+ </para>
+ </refsect2>
+ </refsect1>
+
+ <refsect1>
+ <title>DATABASE STATISTICS</title>
+
+ <para>
+ CTDB maintains per database statistics about important operations.
+ See the <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry> command
+ <command>dbstatistics</command> for displaying database statistics.
+ </para>
+
+ <refsect2>
+ <title>Example: ctdb dbstatistics notify_index.tdb</title>
+ <screen>
+DB Statistics: notify_index.tdb
+ ro_delegations 0
+ ro_revokes 0
+ locks
+ total 131
+ failed 0
+ current 0
+ pending 0
+ hop_count_buckets: 9890 5454 26 1 0 0 0 0 0 0 0 0 0 0 0 0
+ lock_buckets: 4 117 10 0 0 0 0 0 0 0 0 0 0 0 0 0
+ locks_latency MIN/AVG/MAX 0.000683/0.004198/0.014730 sec out of 131
+ Num Hot Keys: 3
+ Count:7 Key:2f636c75737465726673
+ Count:18 Key:2f636c757374657266732f64617461
+ Count:7 Key:2f636c757374657266732f646174612f636c69656e7473
+ </screen>
+ </refsect2>
+
+ <refsect2>
+ <title>DB Statistics</title>
+ <para>
+ Name of the database.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>ro_delegations</title>
+ <para>
+ Number of readonly delegations created in the database.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>ro_revokes</title>
+ <para>
+ Number of readonly delegations revoked. The difference in
+ ro_delegations and ro_revokes indicates the currently active
+ readonly delegations.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>locks</title>
+ <para>
+ This section lists locking statistics.
+ </para>
+
+ <refsect3>
+ <title>total</title>
+ <para>
+ Number of completed lock calls. This includes database locks
+ and record locks.
+ </para>
+ </refsect3>
+
+ <refsect3>
+ <title>failed</title>
+ <para>
+ Number of failed lock calls. This includes database locks and
+ record locks.
+ </para>
+ </refsect3>
+
+ <refsect3>
+ <title>current</title>
+ <para>
+ Number of scheduled lock calls. This includes database locks
+ and record locks.
+ </para>
+ </refsect3>
+
+ <refsect3>
+ <title>pending</title>
+ <para>
+ Number of queued lock calls. This includes database locks and
+ record locks.
+ </para>
+ </refsect3>
+
+ </refsect2>
+
+ <refsect2>
+ <title>hop_count_buckets</title>
+ <para>
+ Distribution of migration requests based on hop counts values.
+ Buckets are 0, &lt;&nbsp;2, &lt;&nbsp;4, &lt;&nbsp;8,
+ &lt;&nbsp;16, &lt;&nbsp;32, &lt;&nbsp;64, &lt;&nbsp;128,
+ &lt;&nbsp;256, &lt;&nbsp;512, &lt;&nbsp;1024, &lt;&nbsp;2048,
+ &lt;&nbsp;4096, &lt;&nbsp;8192, &lt;&nbsp;16384, &ge;&nbsp;16384.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>lock_buckets</title>
+ <para>
+ Distribution of record lock requests based on time required to
+ obtain locks. Buckets are &lt;&nbsp;1ms, &lt;&nbsp;10ms,
+ &lt;&nbsp;100ms, &lt;&nbsp;1s, &lt;&nbsp;2s, &lt;&nbsp;4s,
+ &lt;&nbsp;8s, &lt;&nbsp;16s, &lt;&nbsp;32s, &lt;&nbsp;64s,
+ &ge;&nbsp;64s.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>locks_latency</title>
+ <para>
+ The minimum, the average and the maximum time (in seconds)
+ required to obtain record locks.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>Num Hot Keys</title>
+ <para>
+ Number of contended records determined by hop count. CTDB keeps
+ track of top 10 hot records and the output shows hex encoded
+ keys for the hot records.
+ </para>
+ </refsect2>
+ </refsect1>
+
+ <refsect1>
+ <title>SEE ALSO</title>
+ <para>
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>ctdbd</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>ctdb-tunables</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry>,
+
+ <ulink url="http://ctdb.samba.org/"/>
+ </para>
+ </refsect1>
+
+ <refentryinfo>
+ <author>
+ <contrib>
+ This documentation was written by
+ Amitay Isaacs,
+ Martin Schwenke
+ </contrib>
+ </author>
+
+ <copyright>
+ <year>2007</year>
+ <holder>Andrew Tridgell</holder>
+ <holder>Ronnie Sahlberg</holder>
+ </copyright>
+ <legalnotice>
+ <para>
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 3 of
+ the License, or (at your option) any later version.
+ </para>
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ PURPOSE. See the GNU General Public License for more details.
+ </para>
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, see
+ <ulink url="http://www.gnu.org/licenses"/>.
+ </para>
+ </legalnotice>
+ </refentryinfo>
+
+</refentry>
+
diff --git a/ctdb/doc/ctdb-tunables.7.xml b/ctdb/doc/ctdb-tunables.7.xml
new file mode 100644
index 0000000..766213e
--- /dev/null
+++ b/ctdb/doc/ctdb-tunables.7.xml
@@ -0,0 +1,783 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE refentry
+ PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+ "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+
+<refentry id="ctdb-tunables.7">
+
+ <refmeta>
+ <refentrytitle>ctdb-tunables</refentrytitle>
+ <manvolnum>7</manvolnum>
+ <refmiscinfo class="source">ctdb</refmiscinfo>
+ <refmiscinfo class="manual">CTDB - clustered TDB database</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+ <refname>ctdb-tunables</refname>
+ <refpurpose>CTDB tunable configuration variables</refpurpose>
+ </refnamediv>
+
+ <refsect1>
+ <title>DESCRIPTION</title>
+
+ <para>
+ CTDB's behaviour can be configured by setting run-time tunable
+ variables. This lists and describes all tunables. See the
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry>
+ <command>listvars</command>, <command>setvar</command> and
+ <command>getvar</command> commands for more details.
+ </para>
+
+ <para>
+ Unless otherwise stated, tunables should be set to the same
+ value on all nodes. Setting tunables to different values across
+ nodes may produce unexpected results. Future releases may set
+ (some or most) tunables globally across the cluster but doing so
+ is currently a manual process.
+ </para>
+
+ <para>
+ Tunables can be set at startup from the
+ <filename>/usr/local/etc/ctdb/ctdb.tunables</filename>
+ configuration file.
+
+ <literallayout>
+<replaceable>TUNABLE</replaceable>=<replaceable>VALUE</replaceable>
+ </literallayout>
+
+ Comment lines beginning with '#' are permitted. Whitespace may
+ be used for formatting/alignment. VALUE must be a non-negative
+ integer and must be the last thing on a line (i.e. no trailing
+ garbage, trailing comments are not permitted).
+ </para>
+
+ <para>
+ For example:
+
+ <screen format="linespecific">
+MonitorInterval=20
+ </screen>
+ </para>
+
+ <para>
+ The available tunable variables are listed alphabetically below.
+ </para>
+
+ <refsect2>
+ <title>AllowClientDBAttach</title>
+ <para>Default: 1</para>
+ <para>
+ When set to 0, clients are not allowed to attach to any databases.
+ This can be used to temporarily block any new processes from
+ attaching to and accessing the databases. This is mainly used
+ for detaching a volatile database using 'ctdb detach'.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>AllowMixedVersions</title>
+ <para>Default: 0</para>
+ <para>
+ CTDB will not allow incompatible versions to co-exist in
+ a cluster. If a version mismatch is found, then losing CTDB
+ will shutdown. To disable the incompatible version check,
+ set this tunable to 1.
+ </para>
+ <para>
+ For version checking, CTDB uses major and minor version.
+ For example, CTDB 4.6.1 and CTDB 4.6.2 are matching versions;
+ CTDB 4.5.x and CTDB 4.6.y do not match.
+ </para>
+ <para>
+ CTDB with version check support will lose to CTDB without
+ version check support. Between two different CTDB versions with
+ version check support, one running for less time will lose.
+ If the running time for both CTDB versions with version check
+ support is equal (to seconds), then the older version will lose.
+ The losing CTDB daemon will shutdown.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>AllowUnhealthyDBRead</title>
+ <para>Default: 0</para>
+ <para>
+ When set to 1, ctdb allows database traverses to read unhealthy
+ databases. By default, ctdb does not allow reading records from
+ unhealthy databases.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>ControlTimeout</title>
+ <para>Default: 60</para>
+ <para>
+ This is the default setting for timeout for when sending a
+ control message to either the local or a remote ctdb daemon.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>DatabaseHashSize</title>
+ <para>Default: 100001</para>
+ <para>
+ Number of the hash chains for the local store of the tdbs that
+ ctdb manages.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>DatabaseMaxDead</title>
+ <para>Default: 5</para>
+ <para>
+ Maximum number of dead records per hash chain for the tdb databases
+ managed by ctdb.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>DBRecordCountWarn</title>
+ <para>Default: 100000</para>
+ <para>
+ When set to non-zero, ctdb will log a warning during recovery if
+ a database has more than this many records. This will produce a
+ warning if a database grows uncontrollably with orphaned records.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>DBRecordSizeWarn</title>
+ <para>Default: 10000000</para>
+ <para>
+ When set to non-zero, ctdb will log a warning during recovery
+ if a single record is bigger than this size. This will produce
+ a warning if a database record grows uncontrollably.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>DBSizeWarn</title>
+ <para>Default: 1000000000</para>
+ <para>
+ When set to non-zero, ctdb will log a warning during recovery if
+ a database size is bigger than this. This will produce a warning
+ if a database grows uncontrollably.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>DeferredAttachTO</title>
+ <para>Default: 120</para>
+ <para>
+ When databases are frozen we do not allow clients to attach to
+ the databases. Instead of returning an error immediately to the
+ client, the attach request from the client is deferred until
+ the database becomes available again at which stage we respond
+ to the client.
+ </para>
+ <para>
+ This timeout controls how long we will defer the request from the
+ client before timing it out and returning an error to the client.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>ElectionTimeout</title>
+ <para>Default: 3</para>
+ <para>
+ The number of seconds to wait for the election of recovery
+ master to complete. If the election is not completed during this
+ interval, then that round of election fails and ctdb starts a
+ new election.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>EnableBans</title>
+ <para>Default: 1</para>
+ <para>
+ This parameter allows ctdb to ban a node if the node is misbehaving.
+ </para>
+ <para>
+ When set to 0, this disables banning completely in the cluster
+ and thus nodes can not get banned, even it they break. Don't
+ set to 0 unless you know what you are doing.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>EventScriptTimeout</title>
+ <para>Default: 30</para>
+ <para>
+ Maximum time in seconds to allow an event to run before timing
+ out. This is the total time for all enabled scripts that are
+ run for an event, not just a single event script.
+ </para>
+ <para>
+ Note that timeouts are ignored for some events ("takeip",
+ "releaseip", "startrecovery", "recovered") and converted to
+ success. The logic here is that the callers of these events
+ implement their own additional timeout.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>FetchCollapse</title>
+ <para>Default: 1</para>
+ <para>
+ This parameter is used to avoid multiple migration requests for
+ the same record from a single node. All the record requests for
+ the same record are queued up and processed when the record is
+ migrated to the current node.
+ </para>
+ <para>
+ When many clients across many nodes try to access the same record
+ at the same time this can lead to a fetch storm where the record
+ becomes very active and bounces between nodes very fast. This
+ leads to high CPU utilization of the ctdbd daemon, trying to
+ bounce that record around very fast, and poor performance.
+ This can improve performance and reduce CPU utilization for
+ certain workloads.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>HopcountMakeSticky</title>
+ <para>Default: 50</para>
+ <para>
+ For database(s) marked STICKY (using 'ctdb setdbsticky'),
+ any record that is migrating so fast that hopcount
+ exceeds this limit is marked as STICKY record for
+ <varname>StickyDuration</varname> seconds. This means that
+ after each migration the sticky record will be kept on the node
+ <varname>StickyPindown</varname>milliseconds and prevented from
+ being migrated off the node.
+ </para>
+ <para>
+ This will improve performance for certain workloads, such as
+ locking.tdb if many clients are opening/closing the same file
+ concurrently.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>IPAllocAlgorithm</title>
+ <para>Default: 2</para>
+ <para>
+ Selects the algorithm that CTDB should use when doing public
+ IP address allocation. Meaningful values are:
+ </para>
+ <variablelist>
+ <varlistentry>
+ <term>0</term>
+ <listitem>
+ <para>
+ Deterministic IP address allocation.
+ </para>
+ <para>
+ This is a simple and fast option. However, it can cause
+ unnecessary address movement during fail-over because
+ each address has a "home" node. Works badly when some
+ nodes do not have any addresses defined. Should be used
+ with care when addresses are defined across multiple
+ networks.
+ </para>
+ <para>
+ You can override the automatic "home" node allocation by
+ creating a file "home_nodes" next to the
+ "public_addresses" file. As an example the following
+ "home_nodes" file assigns the address 192.168.1.1 to
+ node 0 and 192.168.1.2 to node 2:
+ </para>
+ <screen format="linespecific">
+ 192.168.1.1 0
+ 192.168.1.2 2
+ </screen>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>1</term>
+ <listitem>
+ <para>
+ Non-deterministic IP address allocation.
+ </para>
+ <para>
+ This is a relatively fast option that attempts to do a
+ minimise unnecessary address movements. Addresses do
+ not have a "home" node. Rebalancing is limited but it
+ usually adequate. Works badly when addresses are
+ defined across multiple networks.
+ </para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>2</term>
+ <listitem>
+ <para>
+ LCP2 IP address allocation.
+ </para>
+ <para>
+ Uses a heuristic to assign addresses defined across
+ multiple networks, usually balancing addresses on each
+ network evenly across nodes. Addresses do not have a
+ "home" node. Minimises unnecessary address movements.
+ The algorithm is complex, so is slower than other
+ choices for a large number of addresses. However, it
+ can calculate an optimal assignment of 900 addresses in
+ under 10 seconds on modern hardware.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <para>
+ If the specified value is not one of these then the default
+ will be used.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>KeepaliveInterval</title>
+ <para>Default: 5</para>
+ <para>
+ How often in seconds should the nodes send keep-alive packets to
+ each other.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>KeepaliveLimit</title>
+ <para>Default: 5</para>
+ <para>
+ After how many keepalive intervals without any traffic should
+ a node wait until marking the peer as DISCONNECTED.
+ </para>
+ <para>
+ If a node has hung, it can take
+ <varname>KeepaliveInterval</varname> *
+ (<varname>KeepaliveLimit</varname> + 1) seconds before
+ ctdb determines that the node is DISCONNECTED and performs
+ a recovery. This limit should not be set too high to enable
+ early detection and avoid any application timeouts (e.g. SMB1)
+ to kick in before the fail over is completed.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>LockProcessesPerDB</title>
+ <para>Default: 200</para>
+ <para>
+ This is the maximum number of lock helper processes ctdb will
+ create for obtaining record locks. When ctdb cannot get a record
+ lock without blocking, it creates a helper process that waits
+ for the lock to be obtained.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>LogLatencyMs</title>
+ <para>Default: 0</para>
+ <para>
+ When set to non-zero, ctdb will log if certains operations
+ take longer than this value, in milliseconds, to complete.
+ These operations include "process a record request from client",
+ "take a record or database lock", "update a persistent database
+ record" and "vacuum a database".
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>MaxQueueDropMsg</title>
+ <para>Default: 1000000</para>
+ <para>
+ This is the maximum number of messages to be queued up for
+ a client before ctdb will treat the client as hung and will
+ terminate the client connection.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>MonitorInterval</title>
+ <para>Default: 15</para>
+ <para>
+ How often should ctdb run the 'monitor' event in seconds to check
+ for a node's health.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>MonitorTimeoutCount</title>
+ <para>Default: 20</para>
+ <para>
+ How many 'monitor' events in a row need to timeout before a node
+ is flagged as UNHEALTHY. This setting is useful if scripts can
+ not be written so that they do not hang for benign reasons.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>NoIPFailback</title>
+ <para>Default: 0</para>
+ <para>
+ When set to 1, ctdb will not perform failback of IP addresses
+ when a node becomes healthy. When a node becomes UNHEALTHY,
+ ctdb WILL perform failover of public IP addresses, but when the
+ node becomes HEALTHY again, ctdb will not fail the addresses back.
+ </para>
+ <para>
+ Use with caution! Normally when a node becomes available to the
+ cluster ctdb will try to reassign public IP addresses onto the
+ new node as a way to distribute the workload evenly across the
+ clusternode. Ctdb tries to make sure that all running nodes have
+ approximately the same number of public addresses it hosts.
+ </para>
+ <para>
+ When you enable this tunable, ctdb will no longer attempt to
+ rebalance the cluster by failing IP addresses back to the new
+ nodes. An unbalanced cluster will therefore remain unbalanced
+ until there is manual intervention from the administrator. When
+ this parameter is set, you can manually fail public IP addresses
+ over to the new node(s) using the 'ctdb moveip' command.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>NoIPTakeover</title>
+ <para>Default: 0</para>
+ <para>
+ When set to 1, ctdb will not allow IP addresses to be failed
+ over to other nodes. Any IP addresses already hosted on
+ healthy nodes will remain. Any IP addresses hosted on
+ unhealthy nodes will be released by unhealthy nodes and will
+ become un-hosted.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>PullDBPreallocation</title>
+ <para>Default: 10*1024*1024</para>
+ <para>
+ This is the size of a record buffer to pre-allocate for sending
+ reply to PULLDB control. Usually record buffer starts with size
+ of the first record and gets reallocated every time a new record
+ is added to the record buffer. For a large number of records,
+ this can be very inefficient to grow the record buffer one record
+ at a time.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>QueueBufferSize</title>
+ <para>Default: 1024</para>
+ <para>
+ This is the maximum amount of data (in bytes) ctdb will read
+ from a socket at a time.
+ </para>
+ <para>
+ For a busy setup, if ctdb is not able to process the TCP sockets
+ fast enough (large amount of data in Recv-Q for tcp sockets),
+ then this tunable value should be increased. However, large
+ values can keep ctdb busy processing packets and prevent ctdb
+ from handling other events.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>RecBufferSizeLimit</title>
+ <para>Default: 1000000</para>
+ <para>
+ This is the limit on the size of the record buffer to be sent
+ in various controls. This limit is used by new controls used
+ for recovery and controls used in vacuuming.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>RecdFailCount</title>
+ <para>Default: 10</para>
+ <para>
+ If the recovery daemon has failed to ping the main daemon for
+ this many consecutive intervals, the main daemon will consider
+ the recovery daemon as hung and will try to restart it to recover.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>RecdPingTimeout</title>
+ <para>Default: 60</para>
+ <para>
+ If the main daemon has not heard a "ping" from the recovery daemon
+ for this many seconds, the main daemon will log a message that
+ the recovery daemon is potentially hung. This also increments a
+ counter which is checked against <varname>RecdFailCount</varname>
+ for detection of hung recovery daemon.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>RecLockLatencyMs</title>
+ <para>Default: 1000</para>
+ <para>
+ When using a reclock file for split brain prevention, if set
+ to non-zero this tunable will make the recovery daemon log a
+ message if the fcntl() call to lock/testlock the recovery file
+ takes longer than this number of milliseconds.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>RecoverInterval</title>
+ <para>Default: 1</para>
+ <para>
+ How frequently in seconds should the recovery daemon perform the
+ consistency checks to determine if it should perform a recovery.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>RecoverTimeout</title>
+ <para>Default: 120</para>
+ <para>
+ This is the default setting for timeouts for controls when sent
+ from the recovery daemon. We allow longer control timeouts from
+ the recovery daemon than from normal use since the recovery
+ daemon often use controls that can take a lot longer than normal
+ controls.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>RecoveryBanPeriod</title>
+ <para>Default: 300</para>
+ <para>
+ The duration in seconds for which a node is banned if the node
+ fails during recovery. After this time has elapsed the node will
+ automatically get unbanned and will attempt to rejoin the cluster.
+ </para>
+ <para>
+ A node usually gets banned due to real problems with the node.
+ Don't set this value too small. Otherwise, a problematic node
+ will try to re-join cluster too soon causing unnecessary recoveries.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>RecoveryDropAllIPs</title>
+ <para>Default: 120</para>
+ <para>
+ If a node is stuck in recovery, or stopped, or banned, for this
+ many seconds, then ctdb will release all public addresses on
+ that node.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>RecoveryGracePeriod</title>
+ <para>Default: 120</para>
+ <para>
+ During recoveries, if a node has not caused recovery failures
+ during the last grace period in seconds, any records of
+ transgressions that the node has caused recovery failures will be
+ forgiven. This resets the ban-counter back to zero for that node.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>RepackLimit</title>
+ <para>Default: 10000</para>
+ <para>
+ During vacuuming, if the number of freelist records are more than
+ <varname>RepackLimit</varname>, then the database is repacked
+ to get rid of the freelist records to avoid fragmentation.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>RerecoveryTimeout</title>
+ <para>Default: 10</para>
+ <para>
+ Once a recovery has completed, no additional recoveries are
+ permitted until this timeout in seconds has expired.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>SeqnumInterval</title>
+ <para>Default: 1000</para>
+ <para>
+ Some databases have seqnum tracking enabled, so that samba will
+ be able to detect asynchronously when there has been updates
+ to the database. Every time a database is updated its sequence
+ number is increased.
+ </para>
+ <para>
+ This tunable is used to specify in milliseconds how frequently
+ ctdb will send out updates to remote nodes to inform them that
+ the sequence number is increased.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>StatHistoryInterval</title>
+ <para>Default: 1</para>
+ <para>
+ Granularity of the statistics collected in the statistics
+ history. This is reported by 'ctdb stats' command.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>StickyDuration</title>
+ <para>Default: 600</para>
+ <para>
+ Once a record has been marked STICKY, this is the duration in
+ seconds, the record will be flagged as a STICKY record.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>StickyPindown</title>
+ <para>Default: 200</para>
+ <para>
+ Once a STICKY record has been migrated onto a node, it will be
+ pinned down on that node for this number of milliseconds. Any
+ request from other nodes to migrate the record off the node will
+ be deferred.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>TakeoverTimeout</title>
+ <para>Default: 9</para>
+ <para>
+ This is the duration in seconds in which ctdb tries to complete IP
+ failover.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>TickleUpdateInterval</title>
+ <para>Default: 20</para>
+ <para>
+ Every <varname>TickleUpdateInterval</varname> seconds, ctdb
+ synchronizes the client connection information across nodes.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>TraverseTimeout</title>
+ <para>Default: 20</para>
+ <para>
+ This is the duration in seconds for which a database traverse
+ is allowed to run. If the traverse does not complete during
+ this interval, ctdb will abort the traverse.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>VacuumFastPathCount</title>
+ <para>Default: 60</para>
+ <para>
+ During a vacuuming run, ctdb usually processes only the records
+ marked for deletion also called the fast path vacuuming. After
+ finishing <varname>VacuumFastPathCount</varname> number of fast
+ path vacuuming runs, ctdb will trigger a scan of complete database
+ for any empty records that need to be deleted.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>VacuumInterval</title>
+ <para>Default: 10</para>
+ <para>
+ Periodic interval in seconds when vacuuming is triggered for
+ volatile databases.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>VacuumMaxRunTime</title>
+ <para>Default: 120</para>
+ <para>
+ The maximum time in seconds for which the vacuuming process is
+ allowed to run. If vacuuming process takes longer than this
+ value, then the vacuuming process is terminated.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>VerboseMemoryNames</title>
+ <para>Default: 0</para>
+ <para>
+ When set to non-zero, ctdb assigns verbose names for some of
+ the talloc allocated memory objects. These names are visible
+ in the talloc memory report generated by 'ctdb dumpmemory'.
+ </para>
+ </refsect2>
+
+ </refsect1>
+
+ <refsect1>
+ <title>FILES></title>
+
+ <simplelist>
+ <member><filename>/usr/local/etc/ctdb/ctdb.tunables</filename></member>
+ </simplelist>
+ </refsect1>
+
+ <refsect1>
+ <title>SEE ALSO</title>
+ <para>
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>ctdbd</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>ctdb.conf</refentrytitle>
+ <manvolnum>5</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry>,
+
+ <ulink url="http://ctdb.samba.org/"/>
+ </para>
+ </refsect1>
+
+ <refentryinfo>
+ <author>
+ <contrib>
+ This documentation was written by
+ Ronnie Sahlberg,
+ Amitay Isaacs,
+ Martin Schwenke
+ </contrib>
+ </author>
+
+ <copyright>
+ <year>2007</year>
+ <holder>Andrew Tridgell</holder>
+ <holder>Ronnie Sahlberg</holder>
+ </copyright>
+ <legalnotice>
+ <para>
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 3 of
+ the License, or (at your option) any later version.
+ </para>
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ PURPOSE. See the GNU General Public License for more details.
+ </para>
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, see
+ <ulink url="http://www.gnu.org/licenses"/>.
+ </para>
+ </legalnotice>
+ </refentryinfo>
+
+</refentry>
diff --git a/ctdb/doc/ctdb.1.xml b/ctdb/doc/ctdb.1.xml
new file mode 100644
index 0000000..75934ef
--- /dev/null
+++ b/ctdb/doc/ctdb.1.xml
@@ -0,0 +1,1863 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE refentry
+ PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+ "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<refentry id="ctdb.1">
+
+ <refentryinfo>
+ <author>
+ <contrib>
+ This documentation was written by
+ Ronnie Sahlberg,
+ Amitay Isaacs,
+ Martin Schwenke
+ </contrib>
+ </author>
+
+ <copyright>
+ <year>2007</year>
+ <holder>Andrew Tridgell</holder>
+ <holder>Ronnie Sahlberg</holder>
+ </copyright>
+ <legalnotice>
+ <para>
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 3 of
+ the License, or (at your option) any later version.
+ </para>
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ PURPOSE. See the GNU General Public License for more details.
+ </para>
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, see
+ <ulink url="http://www.gnu.org/licenses"/>.
+ </para>
+ </legalnotice>
+ </refentryinfo>
+
+ <refmeta>
+ <refentrytitle>ctdb</refentrytitle>
+ <manvolnum>1</manvolnum>
+ <refmiscinfo class="source">ctdb</refmiscinfo>
+ <refmiscinfo class="manual">CTDB - clustered TDB database</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+ <refname>ctdb</refname>
+ <refpurpose>CTDB management utility</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <cmdsynopsis>
+ <command>ctdb</command>
+ <arg rep="repeat"><replaceable>OPTION</replaceable></arg>
+ <arg choice="req"><replaceable>COMMAND</replaceable></arg>
+ <arg choice="opt"><replaceable>COMMAND-ARGS</replaceable></arg>
+ </cmdsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>DESCRIPTION</title>
+ <para>
+ ctdb is a utility to view and manage a CTDB cluster.
+ </para>
+
+ <para>
+ The following terms are used when referring to nodes in a
+ cluster:
+ <variablelist>
+ <varlistentry>
+ <term>PNN</term>
+ <listitem>
+ <para>
+ Physical Node Number. The physical node number is an
+ integer that describes the node in the cluster. The
+ first node has physical node number 0. in a cluster.
+ </para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>PNN-LIST</term>
+ <listitem>
+ <para>
+ This is either a single PNN, a comma-separate list of PNNs
+ or "all".
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </para>
+
+ <para>
+ Commands that reference a database use the following terms:
+ <variablelist>
+ <varlistentry>
+ <term>DB</term>
+ <listitem>
+ <para>
+ This is either a database name, such as
+ <filename>locking.tdb</filename> or a database ID such
+ as "0x42fe72c5".
+ </para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DB-LIST</term>
+ <listitem>
+ <para>
+ A space separated list of at least one
+ <parameter>DB</parameter>.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>OPTIONS</title>
+
+ <variablelist>
+ <varlistentry><term>-n <parameter>PNN</parameter></term>
+ <listitem>
+ <para>
+ The node specified by PNN should be queried for the
+ requested information. Default is to query the daemon
+ running on the local host.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>-Y</term>
+ <listitem>
+ <para>
+ Produce output in machine readable form for easier parsing
+ by scripts. This uses a field delimiter of ':'. Not all
+ commands support this option.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>-x <parameter>SEPARATOR</parameter></term>
+ <listitem>
+ <para>
+ Use SEPARATOR to delimit fields in machine readable output.
+ This implies -Y.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>-X</term>
+ <listitem>
+ <para>
+ Produce output in machine readable form for easier parsing
+ by scripts. This uses a field delimiter of '|'. Not all
+ commands support this option.
+ </para>
+ <para>
+ This is equivalent to "-x|" and avoids some shell quoting
+ issues.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>-t <parameter>TIMEOUT</parameter></term>
+ <listitem>
+ <para>
+ Indicates that ctdb should wait up to TIMEOUT seconds for
+ a response to most commands sent to the CTDB daemon. The
+ default is 10 seconds.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>-T <parameter>TIMELIMIT</parameter></term>
+ <listitem>
+ <para>
+ Indicates that TIMELIMIT is the maximum run time (in
+ seconds) for the ctdb command. When TIMELIMIT is exceeded
+ the ctdb command will terminate with an error. The default
+ is 120 seconds.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>-? --help</term>
+ <listitem>
+ <para>
+ Print some help text to the screen.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>--usage</term>
+ <listitem>
+ <para>
+ Print usage information to the screen.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>-d --debug=<parameter>DEBUGLEVEL</parameter></term>
+ <listitem>
+ <para>
+ Change the debug level for the command. Default is NOTICE.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+
+ <refsect1>
+ <title>ADMINISTRATIVE COMMANDS</title>
+ <para>
+ These are commands used to monitor and administer a CTDB cluster.
+ </para>
+
+ <refsect2>
+ <title>pnn</title>
+ <para>
+ This command displays the PNN of the current node.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>status</title>
+ <para>
+ This command shows the current status of all CTDB nodes based
+ on information from the queried node.
+ </para>
+
+ <para>
+ Note: If the queried node is INACTIVE then the status
+ might not be current.
+ </para>
+
+ <refsect3>
+ <title>Node status</title>
+ <para>
+ This includes the number of physical nodes and the status of
+ each node. See <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry> for information
+ about node states.
+ </para>
+ </refsect3>
+
+ <refsect3>
+ <title>Generation</title>
+ <para>
+ The generation id is a number that indicates the current generation
+ of a cluster instance. Each time a cluster goes through a
+ reconfiguration or a recovery its generation id will be changed.
+ </para>
+ <para>
+ This number does not have any particular meaning other than
+ to keep track of when a cluster has gone through a
+ recovery. It is a random number that represents the current
+ instance of a ctdb cluster and its databases. The CTDB
+ daemon uses this number internally to be able to tell when
+ commands to operate on the cluster and the databases was
+ issued in a different generation of the cluster, to ensure
+ that commands that operate on the databases will not survive
+ across a cluster database recovery. After a recovery, all
+ old outstanding commands will automatically become invalid.
+ </para>
+ <para>
+ Sometimes this number will be shown as "INVALID". This only means that
+ the ctdbd daemon has started but it has not yet merged with the cluster through a recovery.
+ All nodes start with generation "INVALID" and are not assigned a real
+ generation id until they have successfully been merged with a cluster
+ through a recovery.
+ </para>
+ </refsect3>
+
+ <refsect3>
+ <title>Virtual Node Number (VNN) map</title>
+ <para>
+ Consists of the number of virtual nodes and mapping from
+ virtual node numbers to physical node numbers. Only nodes
+ that are participating in the VNN map can become lmaster for
+ database records.
+ </para>
+ </refsect3>
+
+ <refsect3>
+ <title>Recovery mode</title>
+ <para>
+ This is the current recovery mode of the cluster. There are two possible modes:
+ </para>
+ <para>
+ NORMAL - The cluster is fully operational.
+ </para>
+ <para>
+ RECOVERY - The cluster databases have all been frozen, pausing all services while the cluster awaits a recovery process to complete. A recovery process should finish within seconds. If a cluster is stuck in the RECOVERY state this would indicate a cluster malfunction which needs to be investigated.
+ </para>
+ <para>
+ Once the leader detects an inconsistency, for example a node
+ becomes disconnected/connected, the recovery daemon will trigger a
+ cluster recovery process, where all databases are remerged across the
+ cluster. When this process starts, the leader will first
+ "freeze" all databases to prevent applications such as samba from
+ accessing the databases and it will also mark the recovery mode as
+ RECOVERY.
+ </para>
+ <para>
+ When the CTDB daemon starts up, it will start in RECOVERY
+ mode. Once the node has been merged into a cluster and all
+ databases have been recovered, the node mode will change into
+ NORMAL mode and the databases will be "thawed", allowing samba
+ to access the databases again.
+ </para>
+ </refsect3>
+ <refsect3>
+ <title>Leader</title>
+ <para>
+ This is the cluster node that is currently designated as the
+ leader. This node is responsible of monitoring the
+ consistency of the cluster and to perform the actual
+ recovery process when required.
+ </para>
+ <para>
+ Only one node at a time can be the designated leader. Which
+ node is designated the leader is decided by an election
+ process in the recovery daemons running on each node.
+ </para>
+ </refsect3>
+
+ <refsect3>
+ <title>Example</title>
+ <screen>
+# ctdb status
+Number of nodes:4
+pnn:0 192.168.2.200 OK (THIS NODE)
+pnn:1 192.168.2.201 OK
+pnn:2 192.168.2.202 OK
+pnn:3 192.168.2.203 OK
+Generation:1362079228
+Size:4
+hash:0 lmaster:0
+hash:1 lmaster:1
+hash:2 lmaster:2
+hash:3 lmaster:3
+Recovery mode:NORMAL (0)
+Leader:0
+ </screen>
+ </refsect3>
+ </refsect2>
+
+ <refsect2>
+ <title>nodestatus <optional><parameter>PNN-LIST</parameter></optional></title>
+ <para>
+ This command is similar to the <command>status</command>
+ command. It displays the "node status" subset of output. The
+ main differences are:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ The exit code is the bitwise-OR of the flags for each
+ specified node, while <command>ctdb status</command> exits
+ with 0 if it was able to retrieve status for all nodes.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ <command>ctdb status</command> provides status information
+ for all nodes. <command>ctdb nodestatus</command>
+ defaults to providing status for only the current node.
+ If PNN-LIST is provided then status is given for
+ the indicated node(s).
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ A common invocation in scripts is <command>ctdb nodestatus
+ all</command> to check whether all nodes in a cluster are
+ healthy.
+ </para>
+
+ <refsect3>
+ <title>Example</title>
+ <screen>
+# ctdb nodestatus
+pnn:0 10.0.0.30 OK (THIS NODE)
+
+# ctdb nodestatus all
+Number of nodes:2
+pnn:0 10.0.0.30 OK (THIS NODE)
+pnn:1 10.0.0.31 OK
+ </screen>
+ </refsect3>
+ </refsect2>
+
+ <refsect2>
+ <title>leader</title>
+ <para>
+ This command shows the pnn of the node which is currently the leader.
+ </para>
+
+ <para>
+ Note: If the queried node is INACTIVE then the status
+ might not be current.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>uptime</title>
+ <para>
+ This command shows the uptime for the ctdb daemon. When the last recovery or ip-failover completed and how long it took. If the "duration" is shown as a negative number, this indicates that there is a recovery/failover in progress and it started that many seconds ago.
+ </para>
+
+ <refsect3>
+ <title>Example</title>
+ <screen>
+# ctdb uptime
+Current time of node : Thu Oct 29 10:38:54 2009
+Ctdbd start time : (000 16:54:28) Wed Oct 28 17:44:26 2009
+Time of last recovery/failover: (000 16:53:31) Wed Oct 28 17:45:23 2009
+Duration of last recovery/failover: 2.248552 seconds
+ </screen>
+ </refsect3>
+ </refsect2>
+
+ <refsect2>
+ <title>listnodes</title>
+ <para>
+ This command shows lists the ip addresses of all the nodes in the cluster.
+ </para>
+
+ <refsect3>
+ <title>Example</title>
+ <screen>
+# ctdb listnodes
+192.168.2.200
+192.168.2.201
+192.168.2.202
+192.168.2.203
+ </screen>
+ </refsect3>
+ </refsect2>
+
+ <refsect2>
+ <title>natgw {leader|list|status}</title>
+ <para>
+ This command shows different aspects of NAT gateway status.
+ For an overview of CTDB's NAT gateway functionality please see
+ the <citetitle>NAT GATEWAY</citetitle> section in
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry>.
+ </para>
+
+ <variablelist>
+ <varlistentry>
+ <term>leader</term>
+ <listitem>
+ <para>
+ Show the PNN and private IP address of the current NAT
+ gateway leader node.
+ </para>
+ <para>
+ Example output:
+ </para>
+ <screen>
+1 192.168.2.201
+ </screen>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>list</term>
+ <listitem>
+ <para>
+ List the private IP addresses of nodes in the current
+ NAT gateway group, annotating the leader node.
+ </para>
+ <para>
+ Example output:
+ </para>
+ <screen>
+192.168.2.200
+192.168.2.201 LEADER
+192.168.2.202
+192.168.2.203
+ </screen>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>status</term>
+ <listitem>
+ <para>
+ List the nodes in the current NAT gateway group and
+ their status.
+ </para>
+ <para>
+ Example output:
+ </para>
+ <screen>
+pnn:0 192.168.2.200 UNHEALTHY (THIS NODE)
+pnn:1 192.168.2.201 OK
+pnn:2 192.168.2.202 OK
+pnn:3 192.168.2.203 OK
+ </screen>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect2>
+
+ <refsect2>
+ <title>ping</title>
+ <para>
+ This command will "ping" specified CTDB nodes in the cluster
+ to verify that they are running.
+ </para>
+ <refsect3>
+ <title>Example</title>
+ <screen>
+# ctdb ping
+response from 0 time=0.000054 sec (3 clients)
+ </screen>
+ </refsect3>
+ </refsect2>
+
+ <refsect2>
+ <title>ifaces</title>
+ <para>
+ This command will display the list of network interfaces, which could
+ host public addresses, along with their status.
+ </para>
+ <refsect3>
+ <title>Example</title>
+ <screen>
+# ctdb ifaces
+Interfaces on node 0
+name:eth5 link:up references:2
+name:eth4 link:down references:0
+name:eth3 link:up references:1
+name:eth2 link:up references:1
+
+# ctdb -X ifaces
+|Name|LinkStatus|References|
+|eth5|1|2|
+|eth4|0|0|
+|eth3|1|1|
+|eth2|1|1|
+ </screen>
+ </refsect3>
+ </refsect2>
+
+ <refsect2>
+ <title>ip</title>
+ <para>
+ This command will display the list of public addresses that are provided by the cluster and which physical node is currently serving this ip. By default this command will ONLY show those public addresses that are known to the node itself. To see the full list of all public ips across the cluster you must use "ctdb ip all".
+ </para>
+ <refsect3>
+ <title>Example</title>
+ <screen>
+# ctdb ip -v
+Public IPs on node 0
+172.31.91.82 node[1] active[] available[eth2,eth3] configured[eth2,eth3]
+172.31.91.83 node[0] active[eth3] available[eth2,eth3] configured[eth2,eth3]
+172.31.91.84 node[1] active[] available[eth2,eth3] configured[eth2,eth3]
+172.31.91.85 node[0] active[eth2] available[eth2,eth3] configured[eth2,eth3]
+172.31.92.82 node[1] active[] available[eth5] configured[eth4,eth5]
+172.31.92.83 node[0] active[eth5] available[eth5] configured[eth4,eth5]
+172.31.92.84 node[1] active[] available[eth5] configured[eth4,eth5]
+172.31.92.85 node[0] active[eth5] available[eth5] configured[eth4,eth5]
+
+# ctdb -X ip -v
+|Public IP|Node|ActiveInterface|AvailableInterfaces|ConfiguredInterfaces|
+|172.31.91.82|1||eth2,eth3|eth2,eth3|
+|172.31.91.83|0|eth3|eth2,eth3|eth2,eth3|
+|172.31.91.84|1||eth2,eth3|eth2,eth3|
+|172.31.91.85|0|eth2|eth2,eth3|eth2,eth3|
+|172.31.92.82|1||eth5|eth4,eth5|
+|172.31.92.83|0|eth5|eth5|eth4,eth5|
+|172.31.92.84|1||eth5|eth4,eth5|
+|172.31.92.85|0|eth5|eth5|eth4,eth5|
+ </screen>
+ </refsect3>
+ </refsect2>
+
+ <refsect2>
+ <title>ipinfo <parameter>IP</parameter></title>
+ <para>
+ This command will display details about the specified public addresses.
+ </para>
+ <refsect3>
+ <title>Example</title>
+ <screen>
+# ctdb ipinfo 172.31.92.85
+Public IP[172.31.92.85] info on node 0
+IP:172.31.92.85
+CurrentNode:0
+NumInterfaces:2
+Interface[1]: Name:eth4 Link:down References:0
+Interface[2]: Name:eth5 Link:up References:2 (active)
+ </screen>
+ </refsect3>
+ </refsect2>
+
+ <refsect2>
+ <title>event run|status|script list|script enable|script disable</title>
+ <para>
+ This command is used to control event daemon and to inspect
+ status of various events.
+ </para>
+
+ <para>
+ The commands below require a component to be specified. In
+ the current version the only valid component is
+ <literal>legacy</literal>.
+ </para>
+
+ <variablelist>
+ <varlistentry>
+ <term>run <parameter>TIMEOUT</parameter> <parameter>COMPONENT</parameter> <parameter>EVENT</parameter> <optional><parameter>ARGUMENTS</parameter></optional> </term>
+ <listitem>
+ <para>
+ This command can be used to manually run specified EVENT
+ in COMPONENT with optional ARGUMENTS. The event will be
+ allowed to run a maximum of TIMEOUT seconds. If TIMEOUT
+ is 0, then there is no time limit for running the event.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>status <parameter>COMPONENT</parameter> <parameter>EVENT</parameter></term>
+ <listitem>
+ <para>
+ This command displays the last execution status of the
+ specified EVENT in COMPONENT.
+ </para>
+ <para>
+ The command will terminate with the exit status
+ corresponding to the overall status of event that is
+ displayed.
+ </para>
+ <para>
+ The output is the list of event scripts executed.
+ Each line shows the name, status, duration and start time
+ for each script. Output from each script is shown.
+ </para>
+ <para>
+ Example #1
+ </para>
+ <screen>
+# ctdb event status legacy monitor
+00.ctdb OK 0.014 Sat Dec 17 19:39:11 2016
+01.reclock OK 0.013 Sat Dec 17 19:39:11 2016
+05.system OK 0.029 Sat Dec 17 19:39:11 2016
+06.nfs OK 0.014 Sat Dec 17 19:39:11 2016
+10.interface OK 0.037 Sat Dec 17 19:39:11 2016
+11.natgw OK 0.011 Sat Dec 17 19:39:11 2016
+11.routing OK 0.007 Sat Dec 17 19:39:11 2016
+13.per_ip_routing OK 0.007 Sat Dec 17 19:39:11 2016
+20.multipathd OK 0.007 Sat Dec 17 19:39:11 2016
+31.clamd OK 0.007 Sat Dec 17 19:39:11 2016
+40.vsftpd OK 0.013 Sat Dec 17 19:39:11 2016
+41.httpd OK 0.018 Sat Dec 17 19:39:11 2016
+49.winbind OK 0.023 Sat Dec 17 19:39:11 2016
+50.samba OK 0.100 Sat Dec 17 19:39:12 2016
+60.nfs OK 0.376 Sat Dec 17 19:39:12 2016
+70.iscsi OK 0.009 Sat Dec 17 19:39:12 2016
+91.lvs OK 0.007 Sat Dec 17 19:39:12 2016
+ </screen>
+
+ <para>
+ Example #2
+ </para>
+ <screen>
+# ctdb event status legacy monitor
+00.ctdb OK 0.011 Sat Dec 17 19:40:46 2016
+01.reclock OK 0.010 Sat Dec 17 19:40:46 2016
+05.system OK 0.030 Sat Dec 17 19:40:46 2016
+06.nfs OK 0.014 Sat Dec 17 19:40:46 2016
+10.interface OK 0.041 Sat Dec 17 19:40:46 2016
+11.natgw OK 0.008 Sat Dec 17 19:40:46 2016
+11.routing OK 0.007 Sat Dec 17 19:40:46 2016
+13.per_ip_routing OK 0.007 Sat Dec 17 19:40:46 2016
+20.multipathd OK 0.007 Sat Dec 17 19:40:46 2016
+31.clamd OK 0.007 Sat Dec 17 19:40:46 2016
+40.vsftpd OK 0.013 Sat Dec 17 19:40:46 2016
+41.httpd OK 0.015 Sat Dec 17 19:40:46 2016
+49.winbind OK 0.022 Sat Dec 17 19:40:46 2016
+50.samba ERROR 0.077 Sat Dec 17 19:40:46 2016
+ OUTPUT: ERROR: samba tcp port 445 is not responding
+ </screen>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>script list <parameter>COMPONENT</parameter></term>
+ <listitem>
+ <para>
+ List the available event scripts in COMPONENT. Enabled
+ scripts are flagged with a '*'.
+ </para>
+ <para>
+ Generally, event scripts are provided by CTDB. However,
+ local or 3rd party event scripts may also be available.
+ These are shown in a separate section after those
+ provided by CTDB.
+ </para>
+ <para>
+ Example
+ </para>
+ <screen>
+# ctdb event script list legacy
+* 00.ctdb
+* 01.reclock
+* 05.system
+* 06.nfs
+* 10.interface
+ 11.natgw
+ 11.routing
+ 13.per_ip_routing
+ 20.multipathd
+ 31.clamd
+ 40.vsftpd
+ 41.httpd
+* 49.winbind
+* 50.samba
+* 60.nfs
+ 70.iscsi
+ 91.lvs
+
+* 02.local
+ </screen>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>script enable <parameter>COMPONENT</parameter> <parameter>SCRIPT</parameter></term>
+ <listitem>
+ <para>
+ Enable the specified event SCRIPT in COMPONENT. Only
+ enabled scripts will be executed when running any event.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>script disable <parameter>COMPONENT</parameter> <parameter>SCRIPT</parameter></term>
+ <listitem>
+ <para>
+ Disable the specified event SCRIPT in COMPONENT. This
+ will prevent the script from executing when running any
+ event.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect2>
+
+ <refsect2>
+ <title>scriptstatus</title>
+ <para>
+ This is an alias for <command>ctdb event status legacy
+ <optional>EVENT</optional></command>, where EVENT defaults to
+ <command>monitor</command>.
+ </para>
+ <para>
+ This command is deprecated. It's provided for backward
+ compatibility. Use <command>ctdb event status</command>
+ instead.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>listvars</title>
+ <para>
+ List all tuneable variables, except the values of the obsolete tunables
+ like VacuumMinInterval. The obsolete tunables can be retrieved only
+ explicitly with the "ctdb getvar" command.
+ </para>
+ <refsect3>
+ <title>Example</title>
+ <screen>
+# ctdb listvars
+SeqnumInterval = 1000
+ControlTimeout = 60
+TraverseTimeout = 20
+KeepaliveInterval = 5
+KeepaliveLimit = 5
+RecoverTimeout = 120
+RecoverInterval = 1
+ElectionTimeout = 3
+TakeoverTimeout = 9
+MonitorInterval = 15
+TickleUpdateInterval = 20
+EventScriptTimeout = 30
+MonitorTimeoutCount = 20
+RecoveryGracePeriod = 120
+RecoveryBanPeriod = 300
+DatabaseHashSize = 100001
+DatabaseMaxDead = 5
+RerecoveryTimeout = 10
+EnableBans = 1
+NoIPFailback = 0
+VerboseMemoryNames = 0
+RecdPingTimeout = 60
+RecdFailCount = 10
+LogLatencyMs = 0
+RecLockLatencyMs = 1000
+RecoveryDropAllIPs = 120
+VacuumInterval = 10
+VacuumMaxRunTime = 120
+RepackLimit = 10000
+VacuumFastPathCount = 60
+MaxQueueDropMsg = 1000000
+AllowUnhealthyDBRead = 0
+StatHistoryInterval = 1
+DeferredAttachTO = 120
+AllowClientDBAttach = 1
+RecoverPDBBySeqNum = 1
+DeferredRebalanceOnNodeAdd = 300
+FetchCollapse = 1
+HopcountMakeSticky = 50
+StickyDuration = 600
+StickyPindown = 200
+NoIPTakeover = 0
+DBRecordCountWarn = 100000
+DBRecordSizeWarn = 10000000
+DBSizeWarn = 100000000
+PullDBPreallocation = 10485760
+LockProcessesPerDB = 200
+RecBufferSizeLimit = 1000000
+QueueBufferSize = 1024
+IPAllocAlgorithm = 2
+ </screen>
+ </refsect3>
+ </refsect2>
+
+ <refsect2>
+ <title>getvar <parameter>NAME</parameter></title>
+ <para>
+ Get the runtime value of a tuneable variable.
+ </para>
+ <refsect3>
+ <title>Example</title>
+ <screen>
+# ctdb getvar MonitorInterval
+MonitorInterval = 15
+ </screen>
+ </refsect3>
+ </refsect2>
+
+ <refsect2>
+ <title>setvar <parameter>NAME</parameter> <parameter>VALUE</parameter></title>
+ <para>
+ Set the runtime value of a tuneable variable.
+ </para>
+ <refsect3>
+ <title>Example</title>
+ <screen>
+# ctdb setvar MonitorInterval 20
+ </screen>
+ </refsect3>
+ </refsect2>
+
+ <refsect2>
+ <title>lvs {leader|list|status}</title>
+ <para>
+ This command shows different aspects of LVS status. For an
+ overview of CTDB's LVS functionality please see the
+ <citetitle>LVS</citetitle> section in
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry>.
+ </para>
+
+ <variablelist>
+ <varlistentry>
+ <term>leader</term>
+ <listitem>
+ <para>
+ Shows the PNN of the current LVS leader node.
+ </para>
+ <para>
+ Example output:
+ </para>
+ <screen>
+2
+ </screen>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>list</term>
+ <listitem>
+ <para>
+ Lists the currently usable LVS nodes.
+ </para>
+ <para>
+ Example output:
+ </para>
+ <screen>
+2 10.0.0.13
+3 10.0.0.14
+ </screen>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>status</term>
+ <listitem>
+ <para>
+ List the nodes in the current LVS group and their status.
+ </para>
+ <para>
+ Example output:
+ </para>
+ <screen>
+pnn:0 10.0.0.11 UNHEALTHY (THIS NODE)
+pnn:1 10.0.0.12 UNHEALTHY
+pnn:2 10.0.0.13 OK
+pnn:3 10.0.0.14 OK
+ </screen>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+
+ </refsect2>
+
+
+ <refsect2>
+ <title>getcapabilities</title>
+
+ <para>
+ This command shows the capabilities of the current node. See
+ the <citetitle>CAPABILITIES</citetitle> section in
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry> for more details.
+ </para>
+
+ <para>
+ Example output:
+ </para>
+ <screen>
+LEADER: YES
+LMASTER: YES
+ </screen>
+
+ </refsect2>
+
+ <refsect2>
+ <title>statistics</title>
+ <para>
+ Collect statistics from the CTDB daemon about
+ how many calls it has served. Information about
+ various fields in statistics can be found in
+ <citerefentry><refentrytitle>ctdb-statistics</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry>.
+ </para>
+ <refsect3>
+ <title>Example</title>
+ <screen>
+# ctdb statistics
+CTDB version 1
+Current time of statistics : Tue Mar 8 15:18:51 2016
+Statistics collected since : (003 21:31:32) Fri Mar 4 17:47:19 2016
+ num_clients 9
+ frozen 0
+ recovering 0
+ num_recoveries 2
+ client_packets_sent 8170534
+ client_packets_recv 7166132
+ node_packets_sent 16549998
+ node_packets_recv 5244418
+ keepalive_packets_sent 201969
+ keepalive_packets_recv 201969
+ node
+ req_call 26
+ reply_call 0
+ req_dmaster 9
+ reply_dmaster 12
+ reply_error 0
+ req_message 1339231
+ req_control 8177506
+ reply_control 6831284
+ client
+ req_call 15
+ req_message 334809
+ req_control 6831308
+ timeouts
+ call 0
+ control 0
+ traverse 0
+ locks
+ num_calls 8
+ num_current 0
+ num_pending 0
+ num_failed 0
+ total_calls 15
+ pending_calls 0
+ childwrite_calls 0
+ pending_childwrite_calls 0
+ memory_used 394879
+ max_hop_count 1
+ total_ro_delegations 0
+ total_ro_revokes 0
+ hop_count_buckets: 8 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ lock_buckets: 0 0 8 0 0 0 0 0 0 0 0 0 0 0 0 0
+ locks_latency MIN/AVG/MAX 0.010005/0.010418/0.011010 sec out of 8
+ reclock_ctdbd MIN/AVG/MAX 0.002538/0.002538/0.002538 sec out of 1
+ reclock_recd MIN/AVG/MAX 0.000000/0.000000/0.000000 sec out of 0
+ call_latency MIN/AVG/MAX 0.000044/0.002142/0.011702 sec out of 15
+ childwrite_latency MIN/AVG/MAX 0.000000/0.000000/0.000000 sec out of 0
+ </screen>
+ </refsect3>
+ </refsect2>
+
+ <refsect2>
+ <title>statisticsreset</title>
+ <para>
+ This command is used to clear all statistics counters in a node.
+ </para>
+ <para>
+ Example: ctdb statisticsreset
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>dbstatistics <parameter>DB</parameter></title>
+ <para>
+ Display statistics about the database DB. Information
+ about various fields in dbstatistics can be found in
+ <citerefentry><refentrytitle>ctdb-statistics</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry>.
+ </para>
+ <refsect3>
+ <title>Example</title>
+ <screen>
+# ctdb dbstatistics locking.tdb
+DB Statistics: locking.tdb
+ ro_delegations 0
+ ro_revokes 0
+ locks
+ total 14356
+ failed 0
+ current 0
+ pending 0
+ hop_count_buckets: 28087 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0
+ lock_buckets: 0 14188 38 76 32 19 3 0 0 0 0 0 0 0 0 0
+ locks_latency MIN/AVG/MAX 0.001066/0.012686/4.202292 sec out of 14356
+ vacuum_latency MIN/AVG/MAX 0.000472/0.002207/15.243570 sec out of 224530
+ Num Hot Keys: 1
+ Count:8 Key:ff5bd7cb3ee3822edc1f0000000000000000000000000000
+ </screen>
+ </refsect3>
+ </refsect2>
+
+ <refsect2>
+ <title>getreclock</title>
+ <para>
+ Show details of the recovery lock, if any.
+ </para>
+
+ <para>
+ Example output:
+ </para>
+ <screen>
+ /clusterfs/.ctdb/recovery.lock
+ </screen>
+
+ </refsect2>
+
+ <refsect2>
+ <title>getdebug</title>
+ <para>
+ Get the current debug level for the node. the debug level controls what information is written to the log file.
+ </para>
+ <para>
+ The debug levels are mapped to the corresponding syslog levels.
+ When a debug level is set, only those messages at that level and higher
+ levels will be printed.
+ </para>
+ <para>
+ The list of debug levels from highest to lowest are :
+ </para>
+ <para>
+ ERROR WARNING NOTICE INFO DEBUG
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>setdebug <parameter>DEBUGLEVEL</parameter></title>
+ <para>
+ Set the debug level of a node. This controls what information will be logged.
+ </para>
+ <para>
+ The debuglevel is one of ERROR WARNING NOTICE INFO DEBUG
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>getpid</title>
+ <para>
+ This command will return the process id of the ctdb daemon.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>disable</title>
+ <para>
+ This command is used to administratively disable a node in the cluster.
+ A disabled node will still participate in the cluster and host
+ clustered TDB records but its public ip address has been taken over by
+ a different node and it no longer hosts any services.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>enable</title>
+ <para>
+ Re-enable a node that has been administratively disabled.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>stop</title>
+ <para>
+ This command is used to administratively STOP a node in the cluster.
+ A STOPPED node is connected to the cluster but will not host any
+ public ip addresses, nor does it participate in the VNNMAP.
+ The difference between a DISABLED node and a STOPPED node is that
+ a STOPPED node does not host any parts of the database which means
+ that a recovery is required to stop/continue nodes.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>continue</title>
+ <para>
+ Re-start a node that has been administratively stopped.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>addip <parameter>IPADDR</parameter>/<parameter>mask</parameter> <parameter>IFACE</parameter></title>
+ <para>
+ This command is used to add a new public ip to a node
+ during runtime. It should be followed by a <command>ctdb
+ ipreallocate</command>. This allows public addresses to be
+ added to a cluster without having to restart the ctdb daemons.
+ </para>
+ <para>
+ Note that this only updates the runtime instance of ctdb. Any
+ changes will be lost next time ctdb is restarted and the public
+ addresses file is re-read. If you want this change to be
+ permanent you must also update the public addresses file manually.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>delip <parameter>IPADDR</parameter></title>
+ <para>
+ This command flags IPADDR for deletion from a node at runtime.
+ It should be followed by a <command>ctdb
+ ipreallocate</command>. If IPADDR is currently hosted by the
+ node it is being removed from, this ensures that the IP will
+ first be failed over to another node, if possible, and that it
+ is then actually removed.
+ </para>
+ <para>
+ Note that this only updates the runtime instance of CTDB. Any
+ changes will be lost next time CTDB is restarted and the
+ public addresses file is re-read. If you want this change to
+ be permanent you must also update the public addresses file
+ manually.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>moveip <parameter>IPADDR</parameter> <parameter>PNN</parameter></title>
+ <para>
+ This command can be used to manually fail a public ip address to a
+ specific node.
+ </para>
+ <para>
+ In order to manually override the "automatic" distribution of public
+ ip addresses that ctdb normally provides, this command only works
+ when you have changed the tunables for the daemon to:
+ </para>
+ <para>
+ IPAllocAlgorithm != 0
+ </para>
+ <para>
+ NoIPFailback = 1
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>shutdown</title>
+ <para>
+ This command will shutdown a specific CTDB daemon.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>setlmasterrole on|off</title>
+ <para>
+ This command is used to enable/disable the LMASTER capability for a node at runtime. This capability determines whether or not a node can be used as an LMASTER for records in the database. A node that does not have the LMASTER capability will not show up in the vnnmap.
+ </para>
+
+ <para>
+ Nodes will by default have this capability, but it can be stripped off nodes by the setting in the sysconfig file or by using this command.
+ </para>
+ <para>
+ Once this setting has been enabled/disabled, you need to perform a recovery for it to take effect.
+ </para>
+ <para>
+ See also "ctdb getcapabilities"
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>setleaderrole on|off</title>
+ <para>
+ This command is used to enable/disable the LEADER capability
+ for a node at runtime. This capability determines whether or
+ not a node can be elected leader of the cluster. A node that
+ does not have the LEADER capability can not be elected
+ leader. If the current leader has this capability removed then
+ an election will occur.
+ </para>
+
+ <para>
+ Nodes have this capability enabled by default, but it can be
+ removed via the <command>cluster:leader capability</command>
+ configuration setting or by using this command.
+ </para>
+ <para>
+ See also "ctdb getcapabilities"
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>reloadnodes</title>
+ <para>
+ This command is used when adding new nodes, or removing
+ existing nodes from an existing cluster.
+ </para>
+ <para>
+ Procedure to add nodes:
+ </para>
+ <orderedlist>
+ <listitem>
+ <para>
+ To expand an existing cluster, first ensure with
+ <command>ctdb status</command> that all nodes are up and
+ running and that they are all healthy. Do not try to
+ expand a cluster unless it is completely healthy!
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ On all nodes, edit <filename>/usr/local/etc/ctdb/nodes</filename>
+ and <emphasis>add the new nodes at the end of this
+ file</emphasis>.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Verify that all the nodes have identical
+ <filename>/usr/local/etc/ctdb/nodes</filename> files after adding
+ the new nodes.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Run <command>ctdb reloadnodes</command> to force all nodes
+ to reload the nodes file.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Use <command>ctdb status</command> on all nodes and verify
+ that they now show the additional nodes.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Install and configure the new node and bring it online.
+ </para>
+ </listitem>
+ </orderedlist>
+ <para>
+ Procedure to remove nodes:
+ </para>
+ <orderedlist>
+ <listitem>
+ <para>
+ To remove nodes from an existing cluster, first ensure
+ with <command>ctdb status</command> that all nodes, except
+ the node to be deleted, are up and running and that they
+ are all healthy. Do not try to remove nodes from a
+ cluster unless the cluster is completely healthy!
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Shutdown and power off the node to be removed.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ On all other nodes, edit the
+ <filename>/usr/local/etc/ctdb/nodes</filename> file and
+ <emphasis>comment out</emphasis> the nodes to be removed.
+ <emphasis>Do not delete the lines for the deleted
+ nodes</emphasis>, just comment them out by adding a '#' at
+ the beginning of the lines.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Run <command>ctdb reloadnodes</command> to force all nodes
+ to reload the nodes file.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Use <command>ctdb status</command> on all nodes and verify
+ that the deleted nodes are no longer listed.
+ </para>
+ </listitem>
+ </orderedlist>
+
+ </refsect2>
+
+ <refsect2>
+ <title>
+ reloadips
+ <optional><parameter>PNN-LIST</parameter></optional>
+ </title>
+ <para>
+ This command reloads the public addresses configuration file
+ on the specified nodes. When it completes addresses will be
+ reconfigured and reassigned across the cluster as necessary.
+ </para>
+
+ <para>
+ This command is currently unable to make changes to the
+ netmask or interfaces associated with existing addresses.
+ Such changes must be made in 2 steps by deleting addresses in
+ question and re-adding then. Unfortunately this will disrupt
+ connections to the changed addresses.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>getdbmap</title>
+ <para>
+ This command lists all clustered TDB databases that the CTDB
+ daemon has attached to. Some databases are flagged as PERSISTENT,
+ this means that the database stores data persistently and the
+ data will remain across reboots. One example of such a database
+ is secrets.tdb where information about how the cluster was joined
+ to the domain is stored. Some database are flagged as REPLICATED,
+ this means that the data in that database is replicated across all
+ the nodes. But the data will not remain across reboots. This
+ type of database is used by CTDB to store it's internal state.
+ </para>
+ <para>
+ If a PERSISTENT database is not in a healthy state the database
+ is flagged as UNHEALTHY. If there's at least one completely
+ healthy node running in the cluster, it's possible that the
+ content is restored by a recovery run automatically. Otherwise an
+ administrator needs to analyze the problem.
+ </para>
+ <para>
+ See also "ctdb getdbstatus", "ctdb backupdb", "ctdb restoredb",
+ "ctdb dumpbackup", "ctdb wipedb", "ctdb setvar AllowUnhealthyDBRead 1"
+ and (if samba or tdb-utils are installed) "tdbtool check".
+ </para>
+ <para>
+ Most databases are not persistent and only store the state
+ information that the currently running samba daemons need. These
+ databases are always wiped when ctdb/samba starts and when a
+ node is rebooted.
+ </para>
+
+ <refsect3>
+ <title>Example</title>
+ <screen>
+# ctdb getdbmap
+Number of databases:10
+dbid:0x435d3410 name:notify.tdb path:/usr/local/var/lib/ctdb/notify.tdb.0
+dbid:0x42fe72c5 name:locking.tdb path:/usr/local/var/lib/ctdb/locking.tdb.0
+dbid:0x1421fb78 name:brlock.tdb path:/usr/local/var/lib/ctdb/brlock.tdb.0
+dbid:0x17055d90 name:connections.tdb path:/usr/local/var/lib/ctdb/connections.tdb.0
+dbid:0xc0bdde6a name:sessionid.tdb path:/usr/local/var/lib/ctdb/sessionid.tdb.0
+dbid:0x122224da name:test.tdb path:/usr/local/var/lib/ctdb/test.tdb.0
+dbid:0x2672a57f name:idmap2.tdb path:/usr/local/var/lib/ctdb/persistent/idmap2.tdb.0 PERSISTENT
+dbid:0xb775fff6 name:secrets.tdb path:/usr/local/var/lib/ctdb/persistent/secrets.tdb.0 PERSISTENT
+dbid:0xe98e08b6 name:group_mapping.tdb path:/usr/local/var/lib/ctdb/persistent/group_mapping.tdb.0 PERSISTENT
+dbid:0x7bbbd26c name:passdb.tdb path:/usr/local/var/lib/ctdb/persistent/passdb.tdb.0 PERSISTENT
+
+# ctdb getdbmap # example for unhealthy database
+Number of databases:1
+dbid:0xb775fff6 name:secrets.tdb path:/usr/local/var/lib/ctdb/persistent/secrets.tdb.0 PERSISTENT UNHEALTHY
+
+# ctdb -X getdbmap
+|ID|Name|Path|Persistent|Unhealthy|
+|0x7bbbd26c|passdb.tdb|/usr/local/var/lib/ctdb/persistent/passdb.tdb.0|1|0|
+ </screen>
+ </refsect3>
+ </refsect2>
+
+ <refsect2>
+ <title>
+ backupdb
+ <parameter>DB</parameter>
+ <parameter>FILE</parameter>
+ </title>
+ <para>
+ Copy the contents of database DB to FILE. FILE can later be
+ read back using <command>restoredb</command>. This is mainly
+ useful for backing up persistent databases such as
+ <filename>secrets.tdb</filename> and similar.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>
+ restoredb
+ <parameter>FILE</parameter>
+ <optional><parameter>DB</parameter></optional>
+ </title>
+ <para>
+ This command restores a persistent database that was
+ previously backed up using backupdb. By default the data will
+ be restored back into the same database as it was created
+ from. By specifying dbname you can restore the data into a
+ different database.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>setdbreadonly <parameter>DB</parameter></title>
+ <para>
+ This command will enable the read-only record support for a
+ database. This is an experimental feature to improve
+ performance for contended records primarily in locking.tdb and
+ brlock.tdb. When enabling this feature you must set it on all
+ nodes in the cluster.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>setdbsticky <parameter>DB</parameter></title>
+ <para>
+ This command will enable the sticky record support for the
+ specified database. This is an experimental feature to
+ improve performance for contended records primarily in
+ locking.tdb and brlock.tdb. When enabling this feature you
+ must set it on all nodes in the cluster.
+ </para>
+ </refsect2>
+
+ </refsect1>
+
+ <refsect1>
+ <title>INTERNAL COMMANDS</title>
+
+ <para>
+ Internal commands are used by CTDB's scripts and are not
+ required for managing a CTDB cluster. Their parameters and
+ behaviour are subject to change.
+ </para>
+
+ <refsect2>
+ <title>gettickles <parameter>IPADDR</parameter></title>
+ <para>
+ Show TCP connections that are registered with CTDB to be
+ "tickled" if there is a failover.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>gratarp <parameter>IPADDR</parameter> <parameter>INTERFACE</parameter></title>
+ <para>
+ Send out a gratuitous ARP for the specified interface through
+ the specified interface. This command is mainly used by the
+ ctdb eventscripts.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>
+ pdelete <parameter>DB</parameter> <parameter>KEY</parameter>
+ </title>
+ <para>
+ Delete KEY from DB.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>
+ pfetch <parameter>DB</parameter> <parameter>KEY</parameter>
+ </title>
+ <para>
+ Print the value associated with KEY in DB.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>
+ pstore
+ <parameter>DB</parameter>
+ <parameter>KEY</parameter>
+ <parameter>FILE</parameter>
+ </title>
+ <para>
+ Store KEY in DB with contents of FILE as the associated value.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>
+ ptrans
+ <parameter>DB</parameter>
+ <optional><parameter>FILE</parameter></optional>
+ </title>
+ <para>
+ Read a list of key-value pairs, one per line from FILE, and
+ store them in DB using a single transaction. An empty value
+ is equivalent to deleting the given key.
+ </para>
+ <para>
+ The key and value should be separated by spaces or tabs. Each
+ key/value should be a printable string enclosed in
+ double-quotes.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>runstate [setup|first_recovery|startup|running]</title>
+ <para>
+ Print the runstate of the specified node. Runstates are used
+ to serialise important state transitions in CTDB, particularly
+ during startup.
+ </para>
+ <para>
+ If one or more optional runstate arguments are specified then
+ the node must be in one of these runstates for the command to
+ succeed.
+ </para>
+ <refsect3>
+ <title>Example</title>
+ <screen>
+# ctdb runstate
+RUNNING
+ </screen>
+ </refsect3>
+ </refsect2>
+
+ <refsect2>
+ <title>setifacelink <parameter>IFACE</parameter> up|down</title>
+ <para>
+ Set the internal state of network interface IFACE. This is
+ typically used in the <filename>10.interface</filename> script
+ in the "monitor" event.
+ </para>
+ <para>
+ Example: ctdb setifacelink eth0 up
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>tickle</title>
+ <para>
+ Read a list of TCP connections, one per line, from standard
+ input and send a TCP tickle to the source host for each
+ connection. A connection is specified as:
+ </para>
+ <synopsis>
+ <parameter>SRC-IPADDR</parameter>:<parameter>SRC-PORT</parameter> <parameter>DST-IPADDR</parameter>:<parameter>DST-PORT</parameter>
+ </synopsis>
+ <para>
+ A single connection can be specified on the command-line
+ rather than on standard input.
+ </para>
+ <para>
+ A TCP tickle is a TCP ACK packet with an invalid sequence and
+ acknowledge number and will when received by the source host
+ result in it sending an immediate correct ACK back to the
+ other end.
+ </para>
+ <para>
+ TCP tickles are useful to "tickle" clients after a IP failover has
+ occurred since this will make the client immediately recognize the
+ TCP connection has been disrupted and that the client will need
+ to reestablish. This greatly speeds up the time it takes for a client
+ to detect and reestablish after an IP failover in the ctdb cluster.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>version</title>
+ <para>
+ Display the CTDB version.
+ </para>
+ </refsect2>
+
+ </refsect1>
+
+ <refsect1>
+ <title>DEBUGGING COMMANDS</title>
+ <para>
+ These commands are primarily used for CTDB development and testing and
+ should not be used for normal administration.
+ </para>
+
+
+ <refsect2>
+ <title>OPTIONS</title>
+
+ <variablelist>
+ <varlistentry><term>--print-emptyrecords</term>
+ <listitem>
+ <para>
+ This enables printing of empty records when dumping databases
+ with the catdb, cattbd and dumpdbbackup commands. Records with
+ empty data segment are considered deleted by ctdb and cleaned
+ by the vacuuming mechanism, so this switch can come in handy for
+ debugging the vacuuming behaviour.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>--print-datasize</term>
+ <listitem>
+ <para>
+ This lets database dumps (catdb, cattdb, dumpdbbackup) print the
+ size of the record data instead of dumping the data contents.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>--print-lmaster</term>
+ <listitem>
+ <para>
+ This lets catdb print the lmaster for each record.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>--print-hash</term>
+ <listitem>
+ <para>
+ This lets database dumps (catdb, cattdb, dumpdbbackup) print the
+ hash for each record.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>--print-recordflags</term>
+ <listitem>
+ <para>
+ This lets catdb and dumpdbbackup print the
+ record flags for each record. Note that cattdb always
+ prints the flags.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+ </refsect2>
+
+ <refsect2>
+ <title>process-exists <parameter>PID</parameter> <parameter>[SRVID]</parameter></title>
+ <para>
+ This command checks if a specific process exists on the CTDB
+ host. This is mainly used by Samba to check if remote instances
+ of samba are still running or not. When the optional SRVID
+ argument is specified, the command check if a specific process
+ exists on the CTDB host and has registered for specified SRVID.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>getdbstatus <parameter>DB</parameter></title>
+ <para>
+ This command displays more details about a database.
+ </para>
+ <refsect3>
+ <title>Example</title>
+ <screen>
+# ctdb getdbstatus test.tdb.0
+dbid: 0x122224da
+name: test.tdb
+path: /usr/local/var/lib/ctdb/test.tdb.0
+PERSISTENT: no
+HEALTH: OK
+
+# ctdb getdbstatus registry.tdb # with a corrupted TDB
+dbid: 0xf2a58948
+name: registry.tdb
+path: /usr/local/var/lib/ctdb/persistent/registry.tdb.0
+PERSISTENT: yes
+HEALTH: NO-HEALTHY-NODES - ERROR - Backup of corrupted TDB in '/usr/local/var/lib/ctdb/persistent/registry.tdb.0.corrupted.20091208091949.0Z'
+ </screen>
+ </refsect3>
+ </refsect2>
+
+ <refsect2>
+ <title>catdb <parameter>DB</parameter></title>
+ <para>
+ Print a dump of the clustered TDB database DB.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>cattdb <parameter>DB</parameter></title>
+ <para>
+ Print a dump of the contents of the local TDB database DB.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>dumpdbbackup <parameter>FILE</parameter></title>
+ <para>
+ Print a dump of the contents from database backup FILE,
+ similar to <command>catdb</command>.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>wipedb <parameter>DB</parameter></title>
+ <para>
+ Remove all contents of database DB.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>recover</title>
+ <para>
+ This command will trigger the recovery daemon to do a cluster
+ recovery.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>ipreallocate, sync</title>
+ <para>
+ This command will force the leader to perform a full ip
+ reallocation process and redistribute all ip addresses. This
+ is useful to "reset" the allocations back to its default state
+ if they have been changed using the "moveip" command. While a
+ "recover" will also perform this reallocation, a recovery is
+ much more hevyweight since it will also rebuild all the
+ databases.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>attach <parameter>DBNAME</parameter> [persistent|replicated]</title>
+ <para>
+ Create a new CTDB database called DBNAME and attach to it on
+ all nodes.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>detach <parameter>DB-LIST</parameter></title>
+ <para>
+ Detach specified non-persistent database(s) from the cluster. This
+ command will disconnect specified database(s) on all nodes in
+ the cluster. This command should only be used when none of the
+ specified database(s) are in use.
+ </para>
+ <para>
+ All nodes should be active and tunable AllowClientDBAccess should
+ be disabled on all nodes before detaching databases.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>dumpmemory</title>
+ <para>
+ This is a debugging command. This command will make the ctdb
+ daemon to write a fill memory allocation map to standard output.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>rddumpmemory</title>
+ <para>
+ This is a debugging command. This command will dump the talloc memory
+ allocation tree for the recovery daemon to standard output.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>ban <parameter>BANTIME</parameter></title>
+ <para>
+ Administratively ban a node for BANTIME seconds. The node
+ will be unbanned after BANTIME seconds have elapsed.
+ </para>
+ <para>
+ A banned node does not participate in the cluster. It does
+ not host any records for the clustered TDB and does not host
+ any public IP addresses.
+ </para>
+ <para>
+ Nodes are automatically banned if they misbehave. For
+ example, a node may be banned if it causes too many cluster
+ recoveries.
+ </para>
+ <para>
+ To administratively exclude a node from a cluster use the
+ <command>stop</command> command.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>unban</title>
+ <para>
+ This command is used to unban a node that has either been
+ administratively banned using the ban command or has been
+ automatically banned.
+ </para>
+ </refsect2>
+
+ </refsect1>
+
+ <!-- UNDOCUMENTED: stats
+ addtickle deltickle
+ tfetch tstore readkey writekey
+ checktcpport getdbseqnum
+ -->
+
+ <refsect1>
+ <title>SEE ALSO</title>
+ <para>
+ <citerefentry><refentrytitle>ctdbd</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>onnode</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>ctdb-statistics</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>ctdb-tunables</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry>,
+
+ <ulink url="http://ctdb.samba.org/"/>
+ </para>
+ </refsect1>
+
+
+</refentry>
diff --git a/ctdb/doc/ctdb.7.xml b/ctdb/doc/ctdb.7.xml
new file mode 100644
index 0000000..0f3fbc6
--- /dev/null
+++ b/ctdb/doc/ctdb.7.xml
@@ -0,0 +1,1182 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE refentry
+ PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+ "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<refentry id="ctdb.7">
+
+<refmeta>
+ <refentrytitle>ctdb</refentrytitle>
+ <manvolnum>7</manvolnum>
+ <refmiscinfo class="source">ctdb</refmiscinfo>
+ <refmiscinfo class="manual">CTDB - clustered TDB database</refmiscinfo>
+</refmeta>
+
+
+<refnamediv>
+ <refname>ctdb</refname>
+ <refpurpose>Clustered TDB</refpurpose>
+</refnamediv>
+
+<refsect1>
+ <title>DESCRIPTION</title>
+
+ <para>
+ CTDB is a clustered database component in clustered Samba that
+ provides a high-availability load-sharing CIFS server cluster.
+ </para>
+
+ <para>
+ The main functions of CTDB are:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ Provide a clustered version of the TDB database with automatic
+ rebuild/recovery of the databases upon node failures.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Monitor nodes in the cluster and services running on each node.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Manage a pool of public IP addresses that are used to provide
+ services to clients. Alternatively, CTDB can be used with
+ LVS.
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ Combined with a cluster filesystem CTDB provides a full
+ high-availablity (HA) environment for services such as clustered
+ Samba, NFS and other services.
+ </para>
+
+ <para>
+ In addition to the CTDB manual pages there is much more
+ information available at
+ <ulink url="https://wiki.samba.org/index.php/CTDB_and_Clustered_Samba"/>.
+ </para>
+</refsect1>
+
+<refsect1>
+ <title>ANATOMY OF A CTDB CLUSTER</title>
+
+ <para>
+ A CTDB cluster is a collection of nodes with 2 or more network
+ interfaces. All nodes provide network (usually file/NAS) services
+ to clients. Data served by file services is stored on shared
+ storage (usually a cluster filesystem) that is accessible by all
+ nodes.
+ </para>
+ <para>
+ CTDB provides an "all active" cluster, where services are load
+ balanced across all nodes.
+ </para>
+</refsect1>
+
+ <refsect1>
+ <title>Cluster leader</title>
+
+ <para>
+ CTDB uses a <emphasis>cluster leader and follower</emphasis>
+ model of cluster management. All nodes in a cluster elect one
+ node to be the leader. The leader node coordinates privileged
+ operations such as database recovery and IP address failover.
+ </para>
+
+ <para>
+ CTDB previously referred to the leader as the <emphasis>recovery
+ master</emphasis> or <emphasis>recmaster</emphasis>. References
+ to these terms may still be found in documentation and code.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Cluster Lock</title>
+
+ <para>
+ CTDB uses a cluster lock to assert its privileged role in the
+ cluster. This node takes the cluster lock when it becomes
+ leader and holds the lock until it is no longer leader. The
+ <emphasis>cluster lock</emphasis> helps CTDB to avoid a
+ <emphasis>split brain</emphasis>, where a cluster becomes
+ partitioned and each partition attempts to operate
+ independently. Issues that can result from a split brain
+ include file data corruption, because file locking metadata may
+ not be tracked correctly.
+ </para>
+
+ <para>
+ CTDB previously referred to the cluster lock as the
+ <emphasis>recovery lock</emphasis>. The abbreviation
+ <emphasis>reclock</emphasis> is still used - just "clock" would
+ be confusing.
+ </para>
+
+ <para>
+ <emphasis>CTDB is unable configure a default cluster
+ lock</emphasis>, because this would depend on factors such as
+ cluster filesystem mountpoints. However, <emphasis>running CTDB
+ without a cluster lock is not recommended</emphasis> as there
+ will be no split brain protection.
+ </para>
+
+ <para>
+ When a cluster lock is configured it is used as the election
+ mechanism. Nodes race to take the cluster lock and the winner
+ is the cluster leader. This avoids problems when a node wins an
+ election but is unable to take the lock - this can occur if a
+ cluster becomes partitioned (for example, due to a communication
+ failure) and a different leader is elected by the nodes in each
+ partition, or if the cluster filesystem has a high failover
+ latency.
+ </para>
+
+ <para>
+ By default, the cluster lock is implemented using a file
+ (specified by <parameter>cluster lock</parameter> in the
+ <literal>[cluster]</literal> section of
+ <citerefentry><refentrytitle>ctdb.conf</refentrytitle>
+ <manvolnum>5</manvolnum></citerefentry>) residing in shared
+ storage (usually) on a cluster filesystem. To support a
+ cluster lock the cluster filesystem must support lock
+ coherence. See
+ <citerefentry><refentrytitle>ping_pong</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry> for more details.
+ </para>
+
+ <para>
+ The cluster lock can also be implemented using an arbitrary
+ cluster mutex helper (or call-out). This is indicated by using
+ an exclamation point ('!') as the first character of the
+ <parameter>cluster lock</parameter> parameter. For example, a
+ value of <command>!/usr/local/bin/myhelper cluster</command>
+ would run the given helper with the specified arguments. The
+ helper will continue to run as long as it holds its mutex. See
+ <filename>ctdb/doc/cluster_mutex_helper.txt</filename> in the
+ source tree, and related code, for clues about writing helpers.
+ </para>
+
+ <para>
+ When a file is specified for the <parameter>cluster
+ lock</parameter> parameter (i.e. no leading '!') the file lock
+ is implemented by a default helper
+ (<command>/usr/local/libexec/ctdb/ctdb_mutex_fcntl_helper</command>).
+ This helper has arguments as follows:
+
+ <!-- cmdsynopsis would not require long line but does not work :-( -->
+ <synopsis>
+<command>ctdb_mutex_fcntl_helper</command> <parameter>FILE</parameter> <optional><parameter>RECHECK-INTERVAL</parameter></optional>
+ </synopsis>
+
+ <command>ctdb_mutex_fcntl_helper</command> will take a lock on
+ FILE and then check every RECHECK-INTERVAL seconds to ensure
+ that FILE still exists and that its inode number is unchanged
+ from when the lock was taken. The default value for
+ RECHECK-INTERVAL is 5.
+ </para>
+
+ <para>
+ CTDB does sanity checks to ensure that the cluster lock is held
+ as expected.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Private vs Public addresses</title>
+
+ <para>
+ Each node in a CTDB cluster has multiple IP addresses assigned
+ to it:
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ A single private IP address that is used for communication
+ between nodes.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ One or more public IP addresses that are used to provide
+ NAS or other services.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </para>
+
+ <refsect2>
+ <title>Private address</title>
+
+ <para>
+ Each node is configured with a unique, permanently assigned
+ private address. This address is configured by the operating
+ system. This address uniquely identifies a physical node in
+ the cluster and is the address that CTDB daemons will use to
+ communicate with the CTDB daemons on other nodes.
+ </para>
+
+ <para>
+ Private addresses are listed in the file
+ <filename>/usr/local/etc/ctdb/nodes</filename>). This file
+ contains the list of private addresses for all nodes in the
+ cluster, one per line. This file must be the same on all nodes
+ in the cluster.
+ </para>
+
+ <para>
+ Some users like to put this configuration file in their
+ cluster filesystem. A symbolic link should be used in this
+ case.
+ </para>
+
+ <para>
+ Private addresses should not be used by clients to connect to
+ services provided by the cluster.
+ </para>
+ <para>
+ It is strongly recommended that the private addresses are
+ configured on a private network that is separate from client
+ networks. This is because the CTDB protocol is both
+ unauthenticated and unencrypted. If clients share the private
+ network then steps need to be taken to stop injection of
+ packets to relevant ports on the private addresses. It is
+ also likely that CTDB protocol traffic between nodes could
+ leak sensitive information if it can be intercepted.
+ </para>
+
+ <para>
+ Example <filename>/usr/local/etc/ctdb/nodes</filename> for a four node
+ cluster:
+ </para>
+ <screen format="linespecific">
+192.168.1.1
+192.168.1.2
+192.168.1.3
+192.168.1.4
+ </screen>
+ </refsect2>
+
+ <refsect2>
+ <title>Public addresses</title>
+
+ <para>
+ Public addresses are used to provide services to clients.
+ Public addresses are not configured at the operating system
+ level and are not permanently associated with a particular
+ node. Instead, they are managed by CTDB and are assigned to
+ interfaces on physical nodes at runtime.
+ </para>
+ <para>
+ The CTDB cluster will assign/reassign these public addresses
+ across the available healthy nodes in the cluster. When one
+ node fails, its public addresses will be taken over by one or
+ more other nodes in the cluster. This ensures that services
+ provided by all public addresses are always available to
+ clients, as long as there are nodes available capable of
+ hosting this address.
+ </para>
+
+ <para>
+ The public address configuration is stored in
+ <filename>/usr/local/etc/ctdb/public_addresses</filename> on
+ each node. This file contains a list of the public addresses
+ that the node is capable of hosting, one per line. Each entry
+ also contains the netmask and the interface to which the
+ address should be assigned. If this file is missing then no
+ public addresses are configured.
+ </para>
+
+ <para>
+ Some users who have the same public addresses on all nodes
+ like to put this configuration file in their cluster
+ filesystem. A symbolic link should be used in this case.
+ </para>
+
+ <para>
+ Example <filename>/usr/local/etc/ctdb/public_addresses</filename> for a
+ node that can host 4 public addresses, on 2 different
+ interfaces:
+ </para>
+ <screen format="linespecific">
+10.1.1.1/24 eth1
+10.1.1.2/24 eth1
+10.1.2.1/24 eth2
+10.1.2.2/24 eth2
+ </screen>
+
+ <para>
+ In many cases the public addresses file will be the same on
+ all nodes. However, it is possible to use different public
+ address configurations on different nodes.
+ </para>
+
+ <para>
+ Example: 4 nodes partitioned into two subgroups:
+ </para>
+ <screen format="linespecific">
+Node 0:/usr/local/etc/ctdb/public_addresses
+ 10.1.1.1/24 eth1
+ 10.1.1.2/24 eth1
+
+Node 1:/usr/local/etc/ctdb/public_addresses
+ 10.1.1.1/24 eth1
+ 10.1.1.2/24 eth1
+
+Node 2:/usr/local/etc/ctdb/public_addresses
+ 10.1.2.1/24 eth2
+ 10.1.2.2/24 eth2
+
+Node 3:/usr/local/etc/ctdb/public_addresses
+ 10.1.2.1/24 eth2
+ 10.1.2.2/24 eth2
+ </screen>
+ <para>
+ In this example nodes 0 and 1 host two public addresses on the
+ 10.1.1.x network while nodes 2 and 3 host two public addresses
+ for the 10.1.2.x network.
+ </para>
+ <para>
+ Public address 10.1.1.1 can be hosted by either of nodes 0 or
+ 1 and will be available to clients as long as at least one of
+ these two nodes are available.
+ </para>
+ <para>
+ If both nodes 0 and 1 become unavailable then public address
+ 10.1.1.1 also becomes unavailable. 10.1.1.1 can not be failed
+ over to nodes 2 or 3 since these nodes do not have this public
+ address configured.
+ </para>
+ <para>
+ The <command>ctdb ip</command> command can be used to view the
+ current assignment of public addresses to physical nodes.
+ </para>
+ </refsect2>
+ </refsect1>
+
+
+ <refsect1>
+ <title>Node status</title>
+
+ <para>
+ The current status of each node in the cluster can be viewed by the
+ <command>ctdb status</command> command.
+ </para>
+
+ <para>
+ A node can be in one of the following states:
+ </para>
+
+ <variablelist>
+ <varlistentry>
+ <term>OK</term>
+ <listitem>
+ <para>
+ This node is healthy and fully functional. It hosts public
+ addresses to provide services.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>DISCONNECTED</term>
+ <listitem>
+ <para>
+ This node is not reachable by other nodes via the private
+ network. It is not currently participating in the cluster.
+ It <emphasis>does not</emphasis> host public addresses to
+ provide services. It might be shut down.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>DISABLED</term>
+ <listitem>
+ <para>
+ This node has been administratively disabled. This node is
+ partially functional and participates in the cluster.
+ However, it <emphasis>does not</emphasis> host public
+ addresses to provide services.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>UNHEALTHY</term>
+ <listitem>
+ <para>
+ A service provided by this node has failed a health check
+ and should be investigated. This node is partially
+ functional and participates in the cluster. However, it
+ <emphasis>does not</emphasis> host public addresses to
+ provide services. Unhealthy nodes should be investigated
+ and may require an administrative action to rectify.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>BANNED</term>
+ <listitem>
+ <para>
+ CTDB is not behaving as designed on this node. For example,
+ it may have failed too many recovery attempts. Such nodes
+ are banned from participating in the cluster for a
+ configurable time period before they attempt to rejoin the
+ cluster. A banned node <emphasis>does not</emphasis> host
+ public addresses to provide services. All banned nodes
+ should be investigated and may require an administrative
+ action to rectify.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>STOPPED</term>
+ <listitem>
+ <para>
+ This node has been administratively exclude from the
+ cluster. A stopped node does no participate in the cluster
+ and <emphasis>does not</emphasis> host public addresses to
+ provide services. This state can be used while performing
+ maintenance on a node.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>PARTIALLYONLINE</term>
+ <listitem>
+ <para>
+ A node that is partially online participates in a cluster
+ like a healthy (OK) node. Some interfaces to serve public
+ addresses are down, but at least one interface is up. See
+ also <command>ctdb ifaces</command>.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>CAPABILITIES</title>
+
+ <para>
+ Cluster nodes can have several different capabilities enabled.
+ These are listed below.
+ </para>
+
+ <variablelist>
+
+ <varlistentry>
+ <term>LEADER</term>
+ <listitem>
+ <para>
+ Indicates that a node can become the CTDB cluster leader.
+ The current leader is decided via an
+ election held by all active nodes with this capability.
+ </para>
+ <para>
+ Default is YES.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>LMASTER</term>
+ <listitem>
+ <para>
+ Indicates that a node can be the location master (LMASTER)
+ for database records. The LMASTER always knows which node
+ has the latest copy of a record in a volatile database.
+ </para>
+ <para>
+ Default is YES.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+
+ <para>
+ The LEADER and LMASTER capabilities can be disabled when CTDB
+ is used to create a cluster spanning across WAN links. In this
+ case CTDB acts as a WAN accelerator.
+ </para>
+
+ </refsect1>
+
+ <refsect1>
+ <title>LVS</title>
+
+ <para>
+ LVS is a mode where CTDB presents one single IP address for the
+ entire cluster. This is an alternative to using public IP
+ addresses and round-robin DNS to loadbalance clients across the
+ cluster.
+ </para>
+
+ <para>
+ This is similar to using a layer-4 loadbalancing switch but with
+ some restrictions.
+ </para>
+
+ <para>
+ One extra LVS public address is assigned on the public network
+ to each LVS group. Each LVS group is a set of nodes in the
+ cluster that presents the same LVS address public address to the
+ outside world. Normally there would only be one LVS group
+ spanning an entire cluster, but in situations where one CTDB
+ cluster spans multiple physical sites it might be useful to have
+ one LVS group for each site. There can be multiple LVS groups
+ in a cluster but each node can only be member of one LVS group.
+ </para>
+
+ <para>
+ Client access to the cluster is load-balanced across the HEALTHY
+ nodes in an LVS group. If no HEALTHY nodes exists then all
+ nodes in the group are used, regardless of health status. CTDB
+ will, however never load-balance LVS traffic to nodes that are
+ BANNED, STOPPED, DISABLED or DISCONNECTED. The <command>ctdb
+ lvs</command> command is used to show which nodes are currently
+ load-balanced across.
+ </para>
+
+ <para>
+ In each LVS group, one of the nodes is selected by CTDB to be
+ the LVS leader. This node receives all traffic from clients
+ coming in to the LVS public address and multiplexes it across
+ the internal network to one of the nodes that LVS is using.
+ When responding to the client, that node will send the data back
+ directly to the client, bypassing the LVS leader node. The
+ command <command>ctdb lvs leader</command> will show which node
+ is the current LVS leader.
+ </para>
+
+ <para>
+ The path used for a client I/O is:
+ <orderedlist>
+ <listitem>
+ <para>
+ Client sends request packet to LVS leader.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ LVS leader passes the request on to one node across the
+ internal network.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Selected node processes the request.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Node responds back to client.
+ </para>
+ </listitem>
+ </orderedlist>
+ </para>
+
+ <para>
+ This means that all incoming traffic to the cluster will pass
+ through one physical node, which limits scalability. You can
+ send more data to the LVS address that one physical node can
+ multiplex. This means that you should not use LVS if your I/O
+ pattern is write-intensive since you will be limited in the
+ available network bandwidth that node can handle. LVS does work
+ very well for read-intensive workloads where only smallish READ
+ requests are going through the LVS leader bottleneck and the
+ majority of the traffic volume (the data in the read replies)
+ goes straight from the processing node back to the clients. For
+ read-intensive i/o patterns you can achieve very high throughput
+ rates in this mode.
+ </para>
+
+ <para>
+ Note: you can use LVS and public addresses at the same time.
+ </para>
+
+ <para>
+ If you use LVS, you must have a permanent address configured for
+ the public interface on each node. This address must be routable
+ and the cluster nodes must be configured so that all traffic
+ back to client hosts are routed through this interface. This is
+ also required in order to allow samba/winbind on the node to
+ talk to the domain controller. This LVS IP address can not be
+ used to initiate outgoing traffic.
+ </para>
+ <para>
+ Make sure that the domain controller and the clients are
+ reachable from a node <emphasis>before</emphasis> you enable
+ LVS. Also ensure that outgoing traffic to these hosts is routed
+ out through the configured public interface.
+ </para>
+
+ <refsect2>
+ <title>Configuration</title>
+
+ <para>
+ To activate LVS on a CTDB node you must specify the
+ <varname>CTDB_LVS_PUBLIC_IFACE</varname>,
+ <varname>CTDB_LVS_PUBLIC_IP</varname> and
+ <varname>CTDB_LVS_NODES</varname> configuration variables.
+ <varname>CTDB_LVS_NODES</varname> specifies a file containing
+ the private address of all nodes in the current node's LVS
+ group.
+ </para>
+
+ <para>
+ Example:
+ <screen format="linespecific">
+CTDB_LVS_PUBLIC_IFACE=eth1
+CTDB_LVS_PUBLIC_IP=10.1.1.237
+CTDB_LVS_NODES=/usr/local/etc/ctdb/lvs_nodes
+ </screen>
+ </para>
+
+ <para>
+ Example <filename>/usr/local/etc/ctdb/lvs_nodes</filename>:
+ </para>
+ <screen format="linespecific">
+192.168.1.2
+192.168.1.3
+192.168.1.4
+ </screen>
+
+ <para>
+ Normally any node in an LVS group can act as the LVS leader.
+ Nodes that are highly loaded due to other demands maybe
+ flagged with the "follower-only" option in the
+ <varname>CTDB_LVS_NODES</varname> file to limit the LVS
+ functionality of those nodes.
+ </para>
+
+ <para>
+ LVS nodes file that excludes 192.168.1.4 from being
+ the LVS leader node:
+ </para>
+ <screen format="linespecific">
+192.168.1.2
+192.168.1.3
+192.168.1.4 follower-only
+ </screen>
+
+ </refsect2>
+ </refsect1>
+
+ <refsect1>
+ <title>TRACKING AND RESETTING TCP CONNECTIONS</title>
+
+ <para>
+ CTDB tracks TCP connections from clients to public IP addresses,
+ on known ports. When an IP address moves from one node to
+ another, all existing TCP connections to that IP address are
+ reset. The node taking over this IP address will also send
+ gratuitous ARPs (for IPv4, or neighbour advertisement, for
+ IPv6). This allows clients to reconnect quickly, rather than
+ waiting for TCP timeouts, which can be very long.
+ </para>
+
+ <para>
+ It is important that established TCP connections do not survive
+ a release and take of a public IP address on the same node.
+ Such connections can get out of sync with sequence and ACK
+ numbers, potentially causing a disruptive ACK storm.
+ </para>
+
+ </refsect1>
+
+ <refsect1>
+ <title>NAT GATEWAY</title>
+
+ <para>
+ NAT gateway (NATGW) is an optional feature that is used to
+ configure fallback routing for nodes. This allows cluster nodes
+ to connect to external services (e.g. DNS, AD, NIS and LDAP)
+ when they do not host any public addresses (e.g. when they are
+ unhealthy).
+ </para>
+ <para>
+ This also applies to node startup because CTDB marks nodes as
+ UNHEALTHY until they have passed a "monitor" event. In this
+ context, NAT gateway helps to avoid a "chicken and egg"
+ situation where a node needs to access an external service to
+ become healthy.
+ </para>
+ <para>
+ Another way of solving this type of problem is to assign an
+ extra static IP address to a public interface on every node.
+ This is simpler but it uses an extra IP address per node, while
+ NAT gateway generally uses only one extra IP address.
+ </para>
+
+ <refsect2>
+ <title>Operation</title>
+
+ <para>
+ One extra NATGW public address is assigned on the public
+ network to each NATGW group. Each NATGW group is a set of
+ nodes in the cluster that shares the same NATGW address to
+ talk to the outside world. Normally there would only be one
+ NATGW group spanning an entire cluster, but in situations
+ where one CTDB cluster spans multiple physical sites it might
+ be useful to have one NATGW group for each site.
+ </para>
+ <para>
+ There can be multiple NATGW groups in a cluster but each node
+ can only be member of one NATGW group.
+ </para>
+ <para>
+ In each NATGW group, one of the nodes is selected by CTDB to
+ be the NATGW leader and the other nodes are consider to be
+ NATGW followers. NATGW followers establish a fallback default route
+ to the NATGW leader via the private network. When a NATGW
+ follower hosts no public IP addresses then it will use this route
+ for outbound connections. The NATGW leader hosts the NATGW
+ public IP address and routes outgoing connections from
+ follower nodes via this IP address. It also establishes a
+ fallback default route.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>Configuration</title>
+
+ <para>
+ NATGW is usually configured similar to the following example configuration:
+ </para>
+ <screen format="linespecific">
+CTDB_NATGW_NODES=/usr/local/etc/ctdb/natgw_nodes
+CTDB_NATGW_PRIVATE_NETWORK=192.168.1.0/24
+CTDB_NATGW_PUBLIC_IP=10.0.0.227/24
+CTDB_NATGW_PUBLIC_IFACE=eth0
+CTDB_NATGW_DEFAULT_GATEWAY=10.0.0.1
+ </screen>
+
+ <para>
+ Normally any node in a NATGW group can act as the NATGW
+ leader. Some configurations may have special nodes that lack
+ connectivity to a public network. In such cases, those nodes
+ can be flagged with the "follower-only" option in the
+ <varname>CTDB_NATGW_NODES</varname> file to limit the NATGW
+ functionality of those nodes.
+ </para>
+
+ <para>
+ See the <citetitle>NAT GATEWAY</citetitle> section in
+ <citerefentry><refentrytitle>ctdb-script.options</refentrytitle>
+ <manvolnum>5</manvolnum></citerefentry> for more details of
+ NATGW configuration.
+ </para>
+ </refsect2>
+
+
+ <refsect2>
+ <title>Implementation details</title>
+
+ <para>
+ When the NATGW functionality is used, one of the nodes is
+ selected to act as a NAT gateway for all the other nodes in
+ the group when they need to communicate with the external
+ services. The NATGW leader is selected to be a node that is
+ most likely to have usable networks.
+ </para>
+
+ <para>
+ The NATGW leader hosts the NATGW public IP address
+ <varname>CTDB_NATGW_PUBLIC_IP</varname> on the configured public
+ interfaces <varname>CTDB_NATGW_PUBLIC_IFACE</varname> and acts as
+ a router, masquerading outgoing connections from follower nodes
+ via this IP address. If
+ <varname>CTDB_NATGW_DEFAULT_GATEWAY</varname> is set then it
+ also establishes a fallback default route to the configured
+ this gateway with a metric of 10. A metric 10 route is used
+ so it can co-exist with other default routes that may be
+ available.
+ </para>
+
+ <para>
+ A NATGW follower establishes its fallback default route to the
+ NATGW leader via the private network
+ <varname>CTDB_NATGW_PRIVATE_NETWORK</varname>with a metric of 10.
+ This route is used for outbound connections when no other
+ default route is available because the node hosts no public
+ addresses. A metric 10 routes is used so that it can co-exist
+ with other default routes that may be available when the node
+ is hosting public addresses.
+ </para>
+
+ <para>
+ <varname>CTDB_NATGW_STATIC_ROUTES</varname> can be used to
+ have NATGW create more specific routes instead of just default
+ routes.
+ </para>
+
+ <para>
+ This is implemented in the <filename>11.natgw</filename>
+ eventscript. Please see the eventscript file and the
+ <citetitle>NAT GATEWAY</citetitle> section in
+ <citerefentry><refentrytitle>ctdb-script.options</refentrytitle>
+ <manvolnum>5</manvolnum></citerefentry> for more details.
+ </para>
+
+ </refsect2>
+ </refsect1>
+
+ <refsect1>
+ <title>POLICY ROUTING</title>
+
+ <para>
+ Policy routing is an optional CTDB feature to support complex
+ network topologies. Public addresses may be spread across
+ several different networks (or VLANs) and it may not be possible
+ to route packets from these public addresses via the system's
+ default route. Therefore, CTDB has support for policy routing
+ via the <filename>13.per_ip_routing</filename> eventscript.
+ This allows routing to be specified for packets sourced from
+ each public address. The routes are added and removed as CTDB
+ moves public addresses between nodes.
+ </para>
+
+ <refsect2>
+ <title>Configuration variables</title>
+
+ <para>
+ There are 4 configuration variables related to policy routing:
+ <varname>CTDB_PER_IP_ROUTING_CONF</varname>,
+ <varname>CTDB_PER_IP_ROUTING_RULE_PREF</varname>,
+ <varname>CTDB_PER_IP_ROUTING_TABLE_ID_LOW</varname>,
+ <varname>CTDB_PER_IP_ROUTING_TABLE_ID_HIGH</varname>. See the
+ <citetitle>POLICY ROUTING</citetitle> section in
+ <citerefentry><refentrytitle>ctdb-script.options</refentrytitle>
+ <manvolnum>5</manvolnum></citerefentry> for more details.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>Configuration</title>
+
+ <para>
+ The format of each line of
+ <varname>CTDB_PER_IP_ROUTING_CONF</varname> is:
+ </para>
+
+ <screen>
+&lt;public_address&gt; &lt;network&gt; [ &lt;gateway&gt; ]
+ </screen>
+
+ <para>
+ Leading whitespace is ignored and arbitrary whitespace may be
+ used as a separator. Lines that have a "public address" item
+ that doesn't match an actual public address are ignored. This
+ means that comment lines can be added using a leading
+ character such as '#', since this will never match an IP
+ address.
+ </para>
+
+ <para>
+ A line without a gateway indicates a link local route.
+ </para>
+
+ <para>
+ For example, consider the configuration line:
+ </para>
+
+ <screen>
+ 192.168.1.99 192.168.1.0/24
+ </screen>
+
+ <para>
+ If the corresponding public_addresses line is:
+ </para>
+
+ <screen>
+ 192.168.1.99/24 eth2,eth3
+ </screen>
+
+ <para>
+ <varname>CTDB_PER_IP_ROUTING_RULE_PREF</varname> is 100, and
+ CTDB adds the address to eth2 then the following routing
+ information is added:
+ </para>
+
+ <screen>
+ ip rule add from 192.168.1.99 pref 100 table ctdb.192.168.1.99
+ ip route add 192.168.1.0/24 dev eth2 table ctdb.192.168.1.99
+ </screen>
+
+ <para>
+ This causes traffic from 192.168.1.99 to 192.168.1.0/24 go via
+ eth2.
+ </para>
+
+ <para>
+ The <command>ip rule</command> command will show (something
+ like - depending on other public addresses and other routes on
+ the system):
+ </para>
+
+ <screen>
+ 0: from all lookup local
+ 100: from 192.168.1.99 lookup ctdb.192.168.1.99
+ 32766: from all lookup main
+ 32767: from all lookup default
+ </screen>
+
+ <para>
+ <command>ip route show table ctdb.192.168.1.99</command> will show:
+ </para>
+
+ <screen>
+ 192.168.1.0/24 dev eth2 scope link
+ </screen>
+
+ <para>
+ The usual use for a line containing a gateway is to add a
+ default route corresponding to a particular source address.
+ Consider this line of configuration:
+ </para>
+
+ <screen>
+ 192.168.1.99 0.0.0.0/0 192.168.1.1
+ </screen>
+
+ <para>
+ In the situation described above this will cause an extra
+ routing command to be executed:
+ </para>
+
+ <screen>
+ ip route add 0.0.0.0/0 via 192.168.1.1 dev eth2 table ctdb.192.168.1.99
+ </screen>
+
+ <para>
+ With both configuration lines, <command>ip route show table
+ ctdb.192.168.1.99</command> will show:
+ </para>
+
+ <screen>
+ 192.168.1.0/24 dev eth2 scope link
+ default via 192.168.1.1 dev eth2
+ </screen>
+ </refsect2>
+
+ <refsect2>
+ <title>Sample configuration</title>
+
+ <para>
+ Here is a more complete example configuration.
+ </para>
+
+ <screen>
+/usr/local/etc/ctdb/public_addresses:
+
+ 192.168.1.98 eth2,eth3
+ 192.168.1.99 eth2,eth3
+
+/usr/local/etc/ctdb/policy_routing:
+
+ 192.168.1.98 192.168.1.0/24
+ 192.168.1.98 192.168.200.0/24 192.168.1.254
+ 192.168.1.98 0.0.0.0/0 192.168.1.1
+ 192.168.1.99 192.168.1.0/24
+ 192.168.1.99 192.168.200.0/24 192.168.1.254
+ 192.168.1.99 0.0.0.0/0 192.168.1.1
+ </screen>
+
+ <para>
+ The routes local packets as expected, the default route is as
+ previously discussed, but packets to 192.168.200.0/24 are
+ routed via the alternate gateway 192.168.1.254.
+ </para>
+
+ </refsect2>
+ </refsect1>
+
+ <refsect1>
+ <title>NOTIFICATIONS</title>
+
+ <para>
+ When certain state changes occur in CTDB, it can be configured
+ to perform arbitrary actions via notifications. For example,
+ sending SNMP traps or emails when a node becomes unhealthy or
+ similar.
+ </para>
+
+ <para>
+ The notification mechanism runs all executable files ending in
+ ".script" in
+ <filename>/usr/local/etc/ctdb/events/notification/</filename>,
+ ignoring any failures and continuing to run all files.
+ </para>
+
+ <para>
+ CTDB currently generates notifications after CTDB changes to
+ these states:
+ </para>
+
+ <simplelist>
+ <member>init</member>
+ <member>setup</member>
+ <member>startup</member>
+ <member>healthy</member>
+ <member>unhealthy</member>
+ </simplelist>
+
+ </refsect1>
+
+ <refsect1>
+ <title>LOG LEVELS</title>
+
+ <para>
+ Valid log levels, in increasing order of verbosity, are:
+ </para>
+
+ <simplelist>
+ <member>ERROR</member>
+ <member>WARNING</member>
+ <member>NOTICE</member>
+ <member>INFO</member>
+ <member>DEBUG</member>
+ </simplelist>
+ </refsect1>
+
+
+ <refsect1>
+ <title>REMOTE CLUSTER NODES</title>
+ <para>
+It is possible to have a CTDB cluster that spans across a WAN link.
+For example where you have a CTDB cluster in your datacentre but you also
+want to have one additional CTDB node located at a remote branch site.
+This is similar to how a WAN accelerator works but with the difference
+that while a WAN-accelerator often acts as a Proxy or a MitM, in
+the ctdb remote cluster node configuration the Samba instance at the remote site
+IS the genuine server, not a proxy and not a MitM, and thus provides 100%
+correct CIFS semantics to clients.
+ </para>
+
+ <para>
+ See the cluster as one single multihomed samba server where one of
+ the NICs (the remote node) is very far away.
+ </para>
+
+ <para>
+ NOTE: This does require that the cluster filesystem you use can cope
+ with WAN-link latencies. Not all cluster filesystems can handle
+ WAN-link latencies! Whether this will provide very good WAN-accelerator
+ performance or it will perform very poorly depends entirely
+ on how optimized your cluster filesystem is in handling high latency
+ for data and metadata operations.
+ </para>
+
+ <para>
+ To activate a node as being a remote cluster node you need to
+ set the following two parameters in
+ /usr/local/etc/ctdb/ctdb.conf for the remote node:
+ <screen format="linespecific">
+[legacy]
+ lmaster capability = false
+ leader capability = false
+ </screen>
+ </para>
+
+ <para>
+ Verify with the command "ctdb getcapabilities" that that node no longer
+ has the leader or the lmaster capabilities.
+ </para>
+
+ </refsect1>
+
+
+ <refsect1>
+ <title>SEE ALSO</title>
+
+ <para>
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>ctdbd</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>ctdb_diagnostics</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>ltdbtool</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>onnode</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>ping_pong</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>ctdb.conf</refentrytitle>
+ <manvolnum>5</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>ctdb-script.options</refentrytitle>
+ <manvolnum>5</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>ctdb.sysconfig</refentrytitle>
+ <manvolnum>5</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>ctdb-statistics</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>ctdb-tunables</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry>,
+
+ <ulink url="https://wiki.samba.org/index.php/CTDB_and_Clustered_Samba"/>,
+
+ <ulink url="http://ctdb.samba.org/"/>
+ </para>
+ </refsect1>
+
+ <refentryinfo>
+ <author>
+ <contrib>
+ This documentation was written by
+ Ronnie Sahlberg,
+ Amitay Isaacs,
+ Martin Schwenke
+ </contrib>
+ </author>
+
+ <copyright>
+ <year>2007</year>
+ <holder>Andrew Tridgell</holder>
+ <holder>Ronnie Sahlberg</holder>
+ </copyright>
+ <legalnotice>
+ <para>
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 3 of
+ the License, or (at your option) any later version.
+ </para>
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ PURPOSE. See the GNU General Public License for more details.
+ </para>
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, see
+ <ulink url="http://www.gnu.org/licenses"/>.
+ </para>
+ </legalnotice>
+ </refentryinfo>
+
+</refentry>
diff --git a/ctdb/doc/ctdb.conf.5.xml b/ctdb/doc/ctdb.conf.5.xml
new file mode 100644
index 0000000..b9bf3a6
--- /dev/null
+++ b/ctdb/doc/ctdb.conf.5.xml
@@ -0,0 +1,652 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE refentry
+ PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+ "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+
+<refentry id="ctdb.conf.5">
+
+ <refmeta>
+ <refentrytitle>ctdb.conf</refentrytitle>
+ <manvolnum>5</manvolnum>
+ <refmiscinfo class="source">ctdb</refmiscinfo>
+ <refmiscinfo class="manual">CTDB - clustered TDB database</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+ <refname>ctdb.conf</refname>
+ <refpurpose>CTDB configuration file</refpurpose>
+ </refnamediv>
+
+ <refsect1>
+ <title>DESCRIPTION</title>
+
+ <para>
+ This file contains CTDB configuration options that affect the
+ operation of CTDB daemons and command-line tools. The default
+ location of this file is
+ <filename>/usr/local/etc/ctdb/ctdb.conf</filename>.
+ </para>
+
+ <para>
+ Note that this is a Samba-style configuration file, so it has a
+ very different syntax to previous CTDB configuration files.
+ </para>
+
+ <para>
+ For event script options please see
+ <citerefentry><refentrytitle>ctdb-script.options</refentrytitle>
+ <manvolnum>5</manvolnum></citerefentry>.
+ </para>
+
+ <para>
+ Configuration options are grouped into several sections below.
+ There are only a few options in each section, allowing them to
+ be ordered (approximately) in decreasing order of importance.
+ </para>
+
+ </refsect1>
+
+ <refsect1>
+ <title>
+ LOGGING CONFIGURATION
+ </title>
+
+ <para>
+ Options in this section control CTDB's logging. They are valid
+ within the <emphasis>logging</emphasis> section of file,
+ indicated by <literal>[logging]</literal>.
+ </para>
+
+ <variablelist>
+
+ <varlistentry>
+ <term>log level = <parameter>LOGLEVEL</parameter></term>
+ <listitem>
+ <para>
+ LOGLEVEL is a string that controls the verbosity of
+ ctdbd's logging. See the <citetitle>LOG
+ LEVELS</citetitle> section in
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry> for more details.
+ </para>
+ <para>
+ Default: <literal>NOTICE</literal>
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>location = <parameter>STRING</parameter></term>
+ <listitem>
+ <para>
+ STRING specifies where ctdbd will write its log.
+ </para>
+ <para>
+ Valid values are:
+ </para>
+ <variablelist>
+ <varlistentry>
+ <term>file:<parameter>FILENAME</parameter></term>
+ <listitem>
+ <para>
+ FILENAME where ctdbd will write its log. This is usually
+ <filename>/usr/local/var/log/log.ctdb</filename>.
+ </para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>syslog<optional>:<parameter>METHOD</parameter></optional></term>
+ <listitem>
+ <para>
+ CTDB will log to syslog. By default this will use
+ the syslog(3) API.
+ </para>
+ <para>
+ If METHOD is specified then it specifies an
+ extension that causes logging to be done in a
+ non-blocking fashion. This can be useful under
+ heavy loads that might cause the syslog daemon to
+ dequeue messages too slowly, which would otherwise
+ cause CTDB to block when logging. METHOD must be
+ one of:
+ </para>
+ <variablelist>
+ <varlistentry>
+ <term>nonblocking</term>
+ <listitem>
+ <para>
+ CTDB will log to syslog via
+ <filename>/dev/log</filename> in non-blocking
+ mode.
+ </para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>udp</term>
+ <listitem>
+ <para>
+ CTDB will log to syslog via UDP to
+ localhost:514. The syslog daemon must be
+ configured to listen on (at least)
+ localhost:514. Most implementations will log
+ the messages against hostname "localhost" -
+ this is a limit of the implementation for
+ compatibility with more syslog daemon
+ implementations.
+ </para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>udp-rfc5424</term>
+ <listitem>
+ <para>
+ As with "udp" but messages are sent in RFC5424
+ format. This method will log the correct
+ hostname but is not as widely implemented in
+ syslog daemons.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <para>
+ Default:
+ file:<filename>/usr/local/var/log/log.ctdb</filename>
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>
+ CLUSTER CONFIGURATION
+ </title>
+
+ <para>
+ Options in this section affect the CTDB cluster setup. They
+ are valid within the <emphasis>cluster</emphasis> section of
+ file, indicated by <literal>[cluster]</literal>.
+ </para>
+
+ <variablelist>
+
+ <varlistentry>
+ <term>cluster lock = <parameter>LOCK</parameter></term>
+ <listitem>
+ <para>
+ LOCK specifies the cluster-wide mutex used to detect and
+ prevent a partitioned cluster (or "split brain").
+ </para>
+ <para>
+ For information about the cluster lock please see the
+ <citetitle>CLUSTER LOCK</citetitle> section in
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry>.
+ </para>
+ <para>
+ Default: NONE. However, uses of a cluster lock is
+ <emphasis>strongly recommended</emphasis>.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>leader capability = true|false</term>
+ <listitem>
+ <para>
+ Indicates whether a node can become the leader
+ for the cluster. If this is set to
+ <literal>false</literal> then the node will not be able to
+ become the leader for the cluster. This feature
+ is primarily used for making a cluster span across a WAN
+ link and use CTDB as a WAN-accelerator.
+ </para>
+ <para>
+ Please see the <citetitle>REMOTE CLUSTER NODES</citetitle>
+ section in
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry> for more
+ information.
+ </para>
+ <para>
+ Default: <literal>true</literal>
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>leader timeout = <parameter>SECONDS</parameter></term>
+ <listitem>
+ <para>
+ Number of SECONDS without a leader broadcast before a node
+ triggers an election.
+ </para>
+ <para>
+ Default: <literal>5</literal>
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>node address = <parameter>IPADDR</parameter></term>
+ <listitem>
+ <para>
+ IPADDR is the private IP address that ctdbd will bind to.
+ </para>
+ <para>
+ This option is only required when automatic address
+ detection can not be used. This can be the case when
+ running multiple ctdbd daemons/nodes on the same physical
+ host (usually for testing), using InfiniBand for the
+ private network or on Linux when sysctl
+ net.ipv4.ip_nonlocal_bind=1.
+ </para>
+ <para>
+ Default: CTDB selects the first address from the nodes
+ list that it can bind to. See also the <citetitle>PRIVATE
+ ADDRESS</citetitle> section in
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry>.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>transport = tcp|ib</term>
+ <listitem>
+ <para>
+ This option specifies which transport to use for ctdbd
+ internode communications on the private network.
+ </para>
+ <para>
+ <literal>ib</literal> means InfiniBand. The InfiniBand
+ support is not regularly tested. If it is known to be
+ broken then it may be disabled so that a value of
+ <literal>ib</literal> is considered invalid.
+ </para>
+ <para>
+ Default: <literal>tcp</literal>
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>
+ DATABASE CONFIGURATION
+ </title>
+
+ <para>
+ Options in this section affect the CTDB database setup. They
+ are valid within the <emphasis>database</emphasis> section of
+ file, indicated by <literal>[database]</literal>.
+ </para>
+
+ <variablelist>
+
+ <varlistentry>
+ <term>volatile database directory = <parameter>DIRECTORY</parameter></term>
+ <listitem>
+ <para>
+ DIRECTORY on local storage where CTDB keeps a local copy
+ of volatile TDB databases. This directory is local for
+ each node and should not be stored on the shared cluster
+ filesystem.
+ </para>
+ <para>
+ Mounting a tmpfs (or similar memory filesystem) on this
+ directory can provide a significant performance
+ improvement when there is I/O contention on the local
+ disk.
+ </para>
+ <para>
+ Default: <filename>/usr/local/var/lib/ctdb/volatile</filename>
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>persistent database directory=<parameter>DIRECTORY</parameter></term>
+ <listitem>
+ <para>
+ DIRECTORY on local storage where CTDB keeps a local copy
+ of persistent TDB databases. This directory is local for
+ each node and should not be stored on the shared cluster
+ filesystem.
+ </para>
+ <para>
+ Default: <filename>/usr/local/var/lib/ctdb/persistent</filename>
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>state database directory = <parameter>DIRECTORY</parameter></term>
+ <listitem>
+ <para>
+ DIRECTORY on local storage where CTDB keeps a local copy
+ of internal state TDB databases. This directory is local
+ for each node and should not be stored on the shared
+ cluster filesystem.
+ </para>
+ <para>
+ Default: <filename>/usr/local/var/lib/ctdb/state</filename>
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>tdb mutexes = true|false</term>
+ <listitem>
+ <para>
+ This parameter enables TDB_MUTEX_LOCKING feature on
+ volatile databases if the robust mutexes are
+ supported. This optimizes the record locking using robust
+ mutexes and is much more efficient that using posix locks.
+ </para>
+ <para>
+ If robust mutexes are unreliable on the platform being
+ used then they can be disabled by setting this to
+ <literal>false</literal>.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>lock debug script = <parameter>FILENAME</parameter></term>
+ <listitem>
+ <para>
+ FILENAME is a script used by CTDB's database locking code
+ to attempt to provide debugging information when CTDB is
+ unable to lock an entire database or a record.
+ </para>
+ <para>
+ This script should be a bare filename relative to the CTDB
+ configuration directory
+ (<filename>/usr/local/etc/ctdb/</filename>). Any
+ directory prefix is ignored and the path is calculated
+ relative to this directory.
+ </para>
+ <para>
+ CTDB provides a lock debugging script and installs it as
+ <filename>/usr/local/etc/ctdb/debug_locks.sh</filename>.
+ </para>
+ <para>
+ Default: NONE
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>
+ EVENT HANDLING CONFIGURATION
+ </title>
+
+ <para>
+ Options in this section affect CTDB event handling. They are
+ valid within the <emphasis>event</emphasis> section of file,
+ indicated by <literal>[event]</literal>.
+ </para>
+
+ <variablelist>
+
+ <varlistentry>
+ <term>debug script = <parameter>FILENAME</parameter></term>
+ <listitem>
+ <para>
+ FILENAME is a script used by CTDB's event handling code to
+ attempt to provide debugging information when an event
+ times out.
+ </para>
+ <para>
+ This script should be a bare filename relative to the CTDB
+ configuration directory
+ (<filename>/usr/local/etc/ctdb/</filename>). Any
+ directory prefix is ignored and the path is calculated
+ relative to this directory.
+ </para>
+ <para>
+ CTDB provides a script for debugging timed out event
+ scripts and installs it as
+ <filename>/usr/local/etc/ctdb/debug-hung-script.sh</filename>.
+ </para>
+ <para>
+ Default: NONE
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>
+ FAILOVER CONFIGURATION
+ </title>
+
+ <para>
+ Options in this section affect CTDB failover. They are
+ valid within the <emphasis>failover</emphasis> section of file,
+ indicated by <literal>[failover]</literal>.
+ </para>
+
+ <variablelist>
+
+ <varlistentry>
+ <term>disabled = true|false</term>
+ <listitem>
+ <para>
+ If set to <literal>true</literal> then public IP failover
+ is disabled.
+ </para>
+ <para>
+ Default: <literal>false</literal>
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>
+ LEGACY CONFIGURATION
+ </title>
+
+ <para>
+ Options in this section affect legacy CTDB setup. They are valid
+ within the <emphasis>legacy</emphasis> section of file,
+ indicated by <literal>[legacy]</literal>.
+ </para>
+
+ <variablelist>
+
+ <varlistentry>
+ <term>ctdb start as stopped = true|false</term>
+ <listitem>
+ <para>
+ If set to <literal>true</literal> CTDB starts in the
+ STOPPED state.
+ </para>
+ <para>
+ To allow the node to take part in the cluster it must be
+ manually continued with the <command>ctdb
+ continue</command> command.
+ </para>
+ <para>
+ Please see the <citetitle>NODE STATES</citetitle> section
+ in <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry> for more
+ information about the STOPPED state.
+ </para>
+ <para>
+ Default: <literal>false</literal>
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>start as disabled = true|false</term>
+ <listitem>
+ <para>
+ If set to <literal>true</literal> CTDB starts in the
+ DISABLED state.
+ </para>
+ <para>
+ To allow the node to host public IP addresses and
+ services, it must be manually enabled using the
+ <command>ctdb enable</command> command.
+ </para>
+ <para>
+ Please see the <citetitle>NODE STATES</citetitle> section
+ in <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry> for more
+ information about the DISABLED state.
+ </para>
+ <para>
+ Default: <literal>false</literal>
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>realtime scheduling = true|false</term>
+ <listitem>
+ <para>
+ Usually CTDB runs with real-time priority. This helps it
+ to perform effectively on a busy system, such as when
+ there are thousands of Samba clients. If you are running
+ CTDB on a platform that does not support real-time
+ priority, you can set this to <literal>false</literal>.
+ </para>
+ <para>
+ Default: <literal>true</literal>
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>lmaster capability = true|false</term>
+ <listitem>
+ <para>
+ Indicates whether a node can become a location master for
+ records in a database. If this is set to
+ <literal>false</literal> then the node will not be part of
+ the vnnmap. This feature is primarily used for making a
+ cluster span across a WAN link and use CTDB as a
+ WAN-accelerator.
+ </para>
+ <para>
+ Please see the <citetitle>REMOTE CLUSTER NODES</citetitle>
+ section in
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry> for more
+ information.
+ </para>
+ <para>
+ Default: <literal>true</literal>
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>script log level = <parameter>LOGLEVEL</parameter></term>
+ <listitem>
+ <para>
+ This option sets the debug level of event script output to
+ LOGLEVEL.
+ </para>
+ <para>
+ See the <citetitle>DEBUG LEVELS</citetitle> section in
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry> for more
+ information.
+ </para>
+ <para>
+ Default: <literal>ERROR</literal>
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+
+ </refsect1>
+
+ <refsect1>
+ <title>FILES</title>
+
+ <simplelist>
+ <member><filename>/usr/local/etc/ctdb/ctdb.conf</filename></member>
+ </simplelist>
+ </refsect1>
+
+ <refsect1>
+ <title>SEE ALSO</title>
+ <para>
+ <citerefentry><refentrytitle>ctdbd</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>onnode</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>ctdb.sysconfig</refentrytitle>
+ <manvolnum>5</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>ctdb-script.options</refentrytitle>
+ <manvolnum>5</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>ctdb-tunables</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry>,
+
+ <ulink url="http://ctdb.samba.org/"/>
+ </para>
+ </refsect1>
+
+ <info>
+ <author>
+ <contrib>
+ This documentation was written by
+ Amitay Isaacs,
+ Martin Schwenke
+ </contrib>
+ </author>
+
+ <copyright>
+ <year>2007</year>
+ <holder>Andrew Tridgell</holder>
+ <holder>Ronnie Sahlberg</holder>
+ </copyright>
+ <legalnotice>
+ <para>
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 3 of
+ the License, or (at your option) any later version.
+ </para>
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ PURPOSE. See the GNU General Public License for more details.
+ </para>
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, see
+ <ulink url="http://www.gnu.org/licenses"/>.
+ </para>
+ </legalnotice>
+ </info>
+
+</refentry>
diff --git a/ctdb/doc/ctdb.sysconfig.5.xml b/ctdb/doc/ctdb.sysconfig.5.xml
new file mode 100644
index 0000000..b4cdaaf
--- /dev/null
+++ b/ctdb/doc/ctdb.sysconfig.5.xml
@@ -0,0 +1,240 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE refentry
+ PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+ "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+
+<refentry id="ctdb.sysconfig.5">
+
+ <refmeta>
+ <refentrytitle>ctdb.sysconfig</refentrytitle>
+ <manvolnum>5</manvolnum>
+ <refmiscinfo class="source">ctdb</refmiscinfo>
+ <refmiscinfo class="manual">CTDB - clustered TDB database</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+ <refname>ctdb.sysconfig</refname>
+ <refpurpose>CTDB daemon configuration file</refpurpose>
+ </refnamediv>
+
+ <refsect1>
+ <title>DESCRIPTION</title>
+
+ <para>
+ This file contains configuration that affects the operation of
+ CTDB. This is a distribution-specific service configuration
+ file such as <filename>/etc/sysconfig/ctdb</filename> (Red Hat)
+ or <filename>/etc/default/ctdb</filename> (Debian) and is a
+ shell script (see
+ <citerefentry><refentrytitle>sh</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry>).
+ </para>
+
+ </refsect1>
+
+ <refsect1>
+ <title>
+ GLOBAL CONFIGURATION
+ </title>
+
+ <variablelist>
+
+ <varlistentry>
+ <term>CTDB_INIT_STYLE=debian|redhat|suse</term>
+ <listitem>
+ <para>
+ This is the init style used by the Linux distribution (or
+ other operating system) being used. This is usually
+ determined dynamically by checking the system. This
+ variable is used by the initscript to determine which init
+ system primitives to use. It is also used by some
+ eventscripts to choose the name of initscripts for certain
+ services, since these can vary between distributions.
+ </para>
+ <para>
+ If using CTDB's event scripts are unable to determine an
+ appropriate default then this option can also be placed in
+ a relevant
+ <citerefentry><refentrytitle>ctdb-script.options</refentrytitle>
+ <manvolnum>5</manvolnum></citerefentry> file.
+ </para>
+ <para>
+ Default: NONE. Guessed, based on features of
+ distribution.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>
+ RESOURCE LIMITS
+ </title>
+
+ <refsect2>
+ <title>
+ Maximum number of open files
+ </title>
+
+ <para>
+ CTDB can use a lot of file descriptors, especially when used
+ with Samba. If there are thousands of smbd processes
+ connected to CTDB when this can mean that thousands of file
+ descriptors are used. For CTDB, it is often necessary to
+ increase limit on the maximum number of open files.
+ </para>
+
+ <para>
+ The maximum number of open files should be configured using an
+ operating system mechanism.
+ </para>
+
+ <variablelist>
+
+ <varlistentry>
+ <term>
+ systemd
+ </term>
+ <listitem>
+ <para>
+ The
+ <literal>LimitNOFILE=<option>LIMIT</option></literal>
+ option can be used in a unit/service file increase the
+ maximum number of open files. See
+ <citerefentry><refentrytitle>systemd.exec</refentrytitle>
+ <manvolnum>5</manvolnum></citerefentry> for details.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>
+ SYSV init
+ </term>
+ <listitem>
+ <para>
+ Use a command like <command>ulimit -n
+ <option>LIMIT</option></command> to increase the maximum
+ number of open files. This command can be put in the
+ relevant distribution-specific service configuration file.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+
+ </refsect2>
+
+ <refsect2>
+ <title>
+ Allowing core dumps
+ </title>
+
+ <para>
+ Many distributions do not allow core dump files to be
+ generated by default. To assist with debugging, core files
+ can be enabled. This should be configured using an operating
+ system mechanism.
+ </para>
+
+ <variablelist>
+
+ <varlistentry>
+ <term>
+ systemd
+ </term>
+ <listitem>
+ <para>
+ The <literal>LimitCORE=0|unlimited</literal> option can
+ be used in a unit/service file. <literal>0</literal>
+ disallows core files, <literal>unlimited</literal>
+ allows them. maximum number of open files. See
+ <citerefentry><refentrytitle>systemd.exec</refentrytitle>
+ <manvolnum>5</manvolnum></citerefentry> for details.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>
+ SYSV init
+ </term>
+ <listitem>
+ <para>
+ Use a command like <command>ulimit -c 0|unlimited</command>
+ to disable or enable core files as required. This
+ command can be put in the relevant distribution-specific
+ service configuration file.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+
+ </refsect2>
+
+ </refsect1>
+
+ <refsect1>
+ <title>FILES</title>
+
+ <simplelist>
+ <member><filename>/etc/sysconfig/ctdb</filename></member>
+ <member><filename>/etc/default/ctdb</filename></member>
+ <member><filename>/usr/local/etc/ctdb/script.options</filename></member>
+ </simplelist>
+ </refsect1>
+
+ <refsect1>
+ <title>SEE ALSO</title>
+ <para>
+ <citerefentry><refentrytitle>ctdbd</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>ctdb-script.options</refentrytitle>
+ <manvolnum>5</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry>,
+
+ <ulink url="http://ctdb.samba.org/"/>
+ </para>
+ </refsect1>
+
+ <refentryinfo>
+ <author>
+ <contrib>
+ This documentation was written by
+ Martin Schwenke
+ </contrib>
+ </author>
+
+ <copyright>
+ <year>2007</year>
+ <holder>Andrew Tridgell</holder>
+ <holder>Ronnie Sahlberg</holder>
+ </copyright>
+ <legalnotice>
+ <para>
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 3 of
+ the License, or (at your option) any later version.
+ </para>
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ PURPOSE. See the GNU General Public License for more details.
+ </para>
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, see
+ <ulink url="http://www.gnu.org/licenses"/>.
+ </para>
+ </legalnotice>
+ </refentryinfo>
+
+</refentry>
diff --git a/ctdb/doc/ctdb_diagnostics.1.xml b/ctdb/doc/ctdb_diagnostics.1.xml
new file mode 100644
index 0000000..2f28131
--- /dev/null
+++ b/ctdb/doc/ctdb_diagnostics.1.xml
@@ -0,0 +1,128 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE refentry
+ PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+ "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<refentry id="ctdb_diagnostics.1">
+
+ <refmeta>
+ <refentrytitle>ctdb_diagnostics</refentrytitle>
+ <manvolnum>1</manvolnum>
+ <refmiscinfo class="source">ctdb</refmiscinfo>
+ <refmiscinfo class="manual">CTDB - clustered TDB database</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+ <refname>ctdb_diagnostics</refname>
+ <refpurpose>dump diagnostic information about CTDB/Samba installation</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <cmdsynopsis>
+ <command>ctdb_diagnostics</command>
+ <arg>OPTIONS</arg>
+ <arg choice="plain">...</arg>
+ </cmdsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>DESCRIPTION</title>
+ <para>
+ ctdb_diagnostics is used to dump diagnostic information about a
+ clustered Samba installation. This includes configuration
+ files, output of relevant commands and logs. This information
+ can be used to check the correctness of the configuration and to
+ diagnose problems.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>OPTIONS</title>
+
+ <variablelist>
+
+ <varlistentry>
+ <term>-n &lt;nodes&gt;</term>
+ <listitem>
+ <para>
+ Comma separated list of nodes to operate on
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>-c</term>
+ <listitem>
+ <para>
+ Ignore comment lines (starting with '#') in file comparisons
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>-w</term>
+ <listitem>
+ <para>
+ Ignore whitespace in file comparisons
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>--no-ads</term>
+ <listitem>
+ <para>
+ Do not use commands that assume an Active Directory Server
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+
+ </refsect1>
+
+ <refsect1>
+ <title>SEE ALSO</title>
+ <para>
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry>,
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry>,
+ <ulink url="https://ctdb.samba.org/"/>
+ </para>
+ </refsect1>
+
+ <refentryinfo>
+ <author>
+ <contrib>
+ This documentation was written by Martijn van Brummelen
+ </contrib>
+ </author>
+
+ <copyright>
+ <year>2015</year>
+ <holder>Martijn van Brummelen</holder>
+ </copyright>
+
+ <legalnotice>
+ <para>
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 3 of
+ the License, or (at your option) any later version.
+ </para>
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ PURPOSE. See the GNU General Public License for more details.
+ </para>
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, see
+ <ulink url="http://www.gnu.org/licenses"/>.
+ </para>
+ </legalnotice>
+
+ </refentryinfo>
+
+</refentry>
diff --git a/ctdb/doc/ctdb_mutex_ceph_rados_helper.7.xml b/ctdb/doc/ctdb_mutex_ceph_rados_helper.7.xml
new file mode 100644
index 0000000..f558f87
--- /dev/null
+++ b/ctdb/doc/ctdb_mutex_ceph_rados_helper.7.xml
@@ -0,0 +1,96 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE refentry
+ PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+ "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<refentry id="ctdb_mutex_ceph_rados_helper.7">
+
+ <refmeta>
+ <refentrytitle>Ceph RADOS Mutex</refentrytitle>
+ <manvolnum>7</manvolnum>
+ <refmiscinfo class="source">ctdb</refmiscinfo>
+ <refmiscinfo class="manual">CTDB - clustered TDB database</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+ <refname>ctdb_mutex_ceph_rados_helper</refname>
+ <refpurpose>Ceph RADOS cluster mutex helper</refpurpose>
+ </refnamediv>
+
+ <refsect1>
+ <title>DESCRIPTION</title>
+ <para>
+ ctdb_mutex_ceph_rados_helper can be used as a cluster lock provider
+ for CTDB. When configured, split brain avoidance during CTDB recovery
+ will be handled using locks against an object located in a Ceph RADOS
+ pool.
+ To enable this functionality, include the following line in the
+ <literal>[cluster]</literal> section of
+ <citerefentry><refentrytitle>ctdb.conf</refentrytitle>
+ <manvolnum>5</manvolnum></citerefentry>:
+ </para>
+ <screen format="linespecific">
+cluster lock = !ctdb_mutex_ceph_rados_helper [Cluster] [User] [Pool] [Object]
+
+Cluster: Ceph cluster name (e.g. ceph)
+User: Ceph cluster user name (e.g. client.admin)
+Pool: Ceph RADOS pool name
+Object: Ceph RADOS object name
+ </screen>
+ <para>
+ The Ceph cluster <parameter>Cluster</parameter> must be up and running,
+ with a configuration, and keyring file for <parameter>User</parameter>
+ located in a librados default search path (e.g. /etc/ceph/).
+ <parameter>Pool</parameter> must already exist.
+ </para>
+ <para>
+ For informational purposes, ctdb_mutex_ceph_rados_helper will also
+ register the cluster lock holder in Ceph Manager's service map.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>SEE ALSO</title>
+ <para>
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>ctdbd</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry>,
+
+ <ulink url="http://ctdb.samba.org/"/>
+ </para>
+ </refsect1>
+
+ <refentryinfo>
+ <author>
+ <contrib>
+ This documentation was written by David Disseldorp
+ </contrib>
+ </author>
+
+ <copyright>
+ <year>2016</year>
+ <holder>David Disseldorp</holder>
+ </copyright>
+ <legalnotice>
+ <para>
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 3 of
+ the License, or (at your option) any later version.
+ </para>
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ PURPOSE. See the GNU General Public License for more details.
+ </para>
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, see
+ <ulink url="http://www.gnu.org/licenses"/>.
+ </para>
+ </legalnotice>
+ </refentryinfo>
+
+</refentry>
diff --git a/ctdb/doc/ctdbd.1.xml b/ctdb/doc/ctdbd.1.xml
new file mode 100644
index 0000000..c046294
--- /dev/null
+++ b/ctdb/doc/ctdbd.1.xml
@@ -0,0 +1,129 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE refentry
+ PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+ "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+
+<refentry id="ctdbd.1">
+
+ <refmeta>
+ <refentrytitle>ctdbd</refentrytitle>
+ <manvolnum>1</manvolnum>
+ <refmiscinfo class="source">ctdb</refmiscinfo>
+ <refmiscinfo class="manual">CTDB - clustered TDB database</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+ <refname>ctdbd</refname>
+ <refpurpose>The CTDB cluster daemon</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <cmdsynopsis>
+ <command>ctdbd</command>
+ <arg rep="repeat"><replaceable>OPTION</replaceable></arg>
+ </cmdsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>DESCRIPTION</title>
+ <para>
+ ctdbd is the main CTDB daemon.
+ </para>
+
+ <para>
+ See <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry> for an overview of CTDB.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>GENERAL OPTIONS</title>
+
+ <variablelist>
+ <varlistentry>
+ <term>-i, --interactive</term>
+ <listitem>
+ <para>
+ Enable interactive mode. This will make ctdbd run in the
+ foreground and not detach from the terminal. In this mode
+ ctdbd will log to stderr.
+ </para>
+ <para>
+ By default ctdbd will detach itself and run in the
+ background as a daemon, logging to the configured
+ destination.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>-?, --help</term>
+ <listitem>
+ <para>
+ Display a summary of options.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>SEE ALSO</title>
+ <para>
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>onnode</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>ctdb.conf</refentrytitle>
+ <manvolnum>5</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>ctdb-tunables</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry>,
+
+ <ulink url="http://ctdb.samba.org/"/>
+ </para>
+ </refsect1>
+
+ <refentryinfo>
+ <author>
+ <contrib>
+ This documentation was written by
+ Ronnie Sahlberg,
+ Amitay Isaacs,
+ Martin Schwenke
+ </contrib>
+ </author>
+
+ <copyright>
+ <year>2007</year>
+ <holder>Andrew Tridgell</holder>
+ <holder>Ronnie Sahlberg</holder>
+ </copyright>
+ <legalnotice>
+ <para>
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 3 of
+ the License, or (at your option) any later version.
+ </para>
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ PURPOSE. See the GNU General Public License for more details.
+ </para>
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, see
+ <ulink url="http://www.gnu.org/licenses"/>.
+ </para>
+ </legalnotice>
+ </refentryinfo>
+
+</refentry>
diff --git a/ctdb/doc/examples/11.natgw.options b/ctdb/doc/examples/11.natgw.options
new file mode 100644
index 0000000..e2460cb
--- /dev/null
+++ b/ctdb/doc/examples/11.natgw.options
@@ -0,0 +1,25 @@
+# NAT gateway configuration
+#
+# See ctdb.conf for main cluster configuration details
+#
+# Cluster provides file services on following IP addresses
+#
+# 10.1.1.101 - 10.1.1.106
+#
+# When a node is not hosting any IPs, it cannot connect to network
+# infrastructure (e.g. DNS, Active Directory, ...).
+#
+# Using NAT gateway feature of CTDB allows a node not hosting IPs to connect
+# to network infrastructure using the additional CTDB_NATGW_PUBLIC_IP.
+
+# ---------- /etc/ctdb/natgw_nodes ----------
+# 192.168.1.1
+# 192.168.1.2
+# 192.168.1.3
+# ---------- /etc/ctdb/natgw_nodes ----------
+#
+CTDB_NATGW_PUBLIC_IP=10.1.1.121/24
+CTDB_NATGW_PUBLIC_IFACE=eth1
+CTDB_NATGW_DEFAULT_GATEWAY=10.1.1.254
+CTDB_NATGW_PRIVATE_NETWORK=192.168.1.0/24
+CTDB_NATGW_NODES=/etc/ctdb/natgw_nodes
diff --git a/ctdb/doc/examples/20.nfs_ganesha.check b/ctdb/doc/examples/20.nfs_ganesha.check
new file mode 100644
index 0000000..3288f16
--- /dev/null
+++ b/ctdb/doc/examples/20.nfs_ganesha.check
@@ -0,0 +1,8 @@
+# nfs_ganesha
+restart_every=2
+unhealthy_after=6
+service_stop_cmd="$CTDB_NFS_CALLOUT stop nfs"
+service_start_cmd="$CTDB_NFS_CALLOUT start nfs"
+service_check_cmd="$CTDB_NFS_CALLOUT check nfs"
+# Ganesha initscript restarts rpc.statd and stack trace is desirable!
+service_debug_cmd="program_stack_traces status 5"
diff --git a/ctdb/doc/examples/91.lvs.options b/ctdb/doc/examples/91.lvs.options
new file mode 100644
index 0000000..adb3660
--- /dev/null
+++ b/ctdb/doc/examples/91.lvs.options
@@ -0,0 +1,12 @@
+# LVS configuration
+#
+# ---------- /etc/ctdb/lvs_nodes ----------
+# 192.168.1.1
+# 192.168.1.2
+# 192.168.1.3
+# ---------- /etc/ctdb/lvs_nodes ----------
+#
+CTDB_LVS_NODES=/etc/ctdb/lvs_nodes
+
+CTDB_LVS_PUBLIC_IP=10.1.1.101
+CTDB_LVS_PUBLIC_IFACE=eth1
diff --git a/ctdb/doc/examples/README b/ctdb/doc/examples/README
new file mode 100644
index 0000000..a4ea222
--- /dev/null
+++ b/ctdb/doc/examples/README
@@ -0,0 +1,15 @@
+This directory includes...
+
+Sample CTDB configuration files:
+
+ o ctdb.conf - Main configuration file
+ o 11.natgw.options - Options for the 11.natgw event script
+ o 91.lvs.options - Options for the 91.lvs event script
+
+Sample 60.nfs configuration for NFS ganesha - callout script and
+.check file
+
+ o nfs-ganesha-callout
+ o 20.nfs_ganesha.check
+
+See the comment at the top of nfs-ganesha-callout for instructions.
diff --git a/ctdb/doc/examples/config_migrate.sh b/ctdb/doc/examples/config_migrate.sh
new file mode 100755
index 0000000..874e96c
--- /dev/null
+++ b/ctdb/doc/examples/config_migrate.sh
@@ -0,0 +1,741 @@
+#!/bin/sh
+
+# config_migrate.sh - migrate old ctdbd.conf file to new configuration files
+#
+# Input files are old-style CTDB configuration files, including:
+#
+# /etc/ctdb/ctdbd.conf
+# /usr/local/etc/ctdb/ctdbd.conf
+# /etc/sysconfig/ctdb
+# /etc/defaults/ctdb
+#
+# These files are sourced by this script. They used to be sourced by
+# ctdbd_wrapper, so this should not be too surprising.
+#
+# By default, the output directory is the given configuration
+# directory. An alternate output directory can be specified if this
+# isn't desired.
+#
+# The output directory will contain the following if non-empty:
+#
+# * ctdb.conf (may be empty)
+# * script.options
+# * ctdb.tunables
+# * ctdb.sysconfig - consider installing as /etc/sysconfig/ctdb,
+# /etc/default/ctdb, or similar
+# * commands.sh - consider running commands in this files
+# * README.warn - warnings about removed/invalid configuration options
+
+usage ()
+{
+ cat <<EOF
+usage: config_migrate.sh [-f] [-d <ctdb-config-dir>] [-o <out-dir>] <file> ...
+EOF
+ exit 1
+}
+
+config_dir=""
+out_dir=""
+force=false
+
+while getopts "d:fho:?" opt ; do
+ case "$opt" in
+ d) config_dir="$OPTARG" ;;
+ f) force=true ;;
+ o) out_dir="$OPTARG" ;;
+ \?|h) usage ;;
+ esac
+done
+shift $((OPTIND - 1))
+
+if [ $# -lt 1 ] ; then
+ usage
+fi
+
+if [ -z "$config_dir" ] ; then
+ echo "Assuming \"/etc/ctdb\" as ctdb configuration directory"
+ echo "If that's not correct, please specify config dir with -d"
+ echo
+ config_dir="/etc/ctdb"
+else
+ echo "Using \"$config_dir\" as ctdb configuration directory"
+ echo
+fi
+
+if [ -z "$out_dir" ] ; then
+ echo "No output directory specified, using \"$config_dir\""
+ echo
+ out_dir="$config_dir"
+fi
+
+############################################################
+
+#
+# Output file handling
+#
+
+out_file_check_and_create ()
+{
+ _out_file="$1"
+
+ if [ -f "$_out_file" ] ; then
+ if ! $force ; then
+ echo "Not overwriting existing file: ${_out_file}" >&2
+ return 1
+ fi
+ mv -v "$_out_file" "${_out_file}.convertsave"
+ fi
+
+ touch "$_out_file"
+
+ return 0
+}
+
+out_file_remove_if_empty ()
+{
+ _out_file="$1"
+
+ if [ ! -s "$_out_file" ] ; then
+ rm "$_out_file"
+ fi
+}
+
+############################################################
+
+#
+# Option/tunable/service conversion and validity checking
+#
+# This is basically the data that drives most of the rest of the
+# script
+#
+
+# Convert a ctdbd.conf opt+val into a ctdb.conf section+opt+val
+#
+# If opt is matched and val is empty then output is printed, allowing
+# this function to be reused to check if opt is valid.
+#
+# Note that for boolean options, the expected value and the new value
+# form part of the data.
+get_ctdb_conf_option ()
+{
+ _opt="$1"
+ _val="$2"
+
+ awk -v opt="${_opt}" -v val="${_val}" \
+ '$3 == opt {
+ if (!$4 || !val || val == $4) {
+ if ($5) {
+ print $1, $2, $5
+ } else {
+ print $1, $2, val
+ }
+ }
+ }' <<EOF
+cluster node-address CTDB_NODE_ADDRESS
+cluster cluster-lock CTDB_RECOVERY_LOCK
+cluster transport CTDB_TRANSPORT
+cluster leader-capability CTDB_CAPABILITY_RECMASTER no false
+database lock-debug-script CTDB_DEBUG_LOCKS
+database persistent-database-directory CTDB_DBDIR_PERSISTENT
+database state-database-directory CTDB_DBDIR_STATE
+database volatile-database-directory CTDB_DBDIR
+event debug-script CTDB_DEBUG_HUNG_SCRIPT
+legacy lmaster-capability CTDB_CAPABILITY_LMASTER no false
+legacy realtime-scheduling CTDB_NOSETSCHED yes false
+legacy script-log-level CTDB_SCRIPT_LOG_LEVEL
+legacy start-as-disabled CTDB_START_AS_DISABLED yes true
+legacy start-as-stopped CTDB_START_AS_STOPPED yes true
+logging location CTDB_LOGGING
+logging log-level CTDB_DEBUGLEVEL
+EOF
+
+}
+
+# Check if an option will convert to a ctdb.conf option
+check_ctdb_conf_option ()
+{
+ _opt="$1"
+
+ _out=$(get_ctdb_conf_option "$_opt" "")
+ [ -n "$_out" ]
+}
+
+# Convert a ctdbd.conf tunable option into a ctdb.conf section+opt
+#
+# The difference between this and get_ctdb_conf_option() is that only
+# the tunable part of the option is passed as opt and it is matched
+# case-insensitively.
+get_ctdb_conf_tunable_option ()
+{
+ _opt="$1"
+ _val="$2"
+
+ awk -v opt="${_opt}" -v val="${_val}" \
+ 'tolower($3) == tolower(opt) {
+ if (!$4 || !val || (val == 0 ? 0 : 1) == $4) {
+ if ($5) {
+ print $1, $2, $5
+ } else {
+ print $1, $2, val
+ }
+ }
+ }' <<EOF
+database tdb-mutexes TDBMutexEnabled 0 false
+failover disabled DisableIPFailover 1 true
+EOF
+
+}
+
+# Check if a tunable will convert to a ctdb.conf option
+check_ctdb_conf_tunable_option ()
+{
+ _opt="$1"
+
+ _out=$(get_ctdb_conf_tunable_option "$_opt" "")
+ [ -n "$_out" ]
+}
+
+# Check if an option has been removed
+check_removed_option ()
+{
+ _option="$1"
+
+ grep -Fqx "$_option" <<EOF
+CTDB_BASE
+CTDB_PIDFILE
+CTDB_SOCKET
+CTDB_EVENT_SCRIPT_DIR
+CTDB_NOTIFY_SCRIPT
+CTDB_PUBLIC_INTERFACE
+CTDB_MAX_PERSISTENT_CHECK_ERRORS
+CTDB_SHUTDOWN_TIMEOUT
+CTDB_MONITOR_SWAP_USAGE
+EOF
+}
+
+# Check if an option is a valid script option
+check_valid_script_option ()
+{
+ _option="$1"
+
+ grep -Fqx "$_option" <<EOF
+# 10.interface
+CTDB_PARTIALLY_ONLINE_INTERFACES
+# 11.natgw
+CTDB_NATGW_DEFAULT_GATEWAY
+CTDB_NATGW_NODES
+CTDB_NATGW_PRIVATE_NETWORK
+CTDB_NATGW_PUBLIC_IFACE
+CTDB_NATGW_PUBLIC_IP
+CTDB_NATGW_STATIC_ROUTES
+# 13.per_ip_routing
+CTDB_PER_IP_ROUTING_CONF
+CTDB_PER_IP_ROUTING_RULE_PREF
+CTDB_PER_IP_ROUTING_TABLE_ID_LOW
+CTDB_PER_IP_ROUTING_TABLE_ID_HIGH
+# 90.lvs
+CTDB_LVS_NODES
+CTDB_LVS_PUBLIC_IFACE
+CTDB_LVS_PUBLIC_IP
+# 20.multipathd
+CTDB_MONITOR_MPDEVICES
+# 31.clamd
+CTDB_CLAMD_SOCKET
+# 48.netbios
+CTDB_SERVICE_NMB
+# 49.winbind
+CTDB_SERVICE_WINBIND
+# 50.samba
+CTDB_SAMBA_CHECK_PORTS
+CTDB_SAMBA_SKIP_SHARE_CHECK
+CTDB_SERVICE_SMB
+# 60.nfs
+CTDB_NFS_CALLOUT
+CTDB_NFS_CHECKS_DIR
+CTDB_NFS_SKIP_SHARE_CHECK
+CTDB_RPCINFO_LOCALHOST
+CTDB_RPCINFO_LOCALHOST6
+CTDB_NFS_STATE_FS_TYPE
+CTDB_NFS_STATE_MNT
+# 70.iscsi
+CTDB_START_ISCSI_SCRIPTS
+# 00.ctdb
+CTDB_MAX_CORRUPT_DB_BACKUPS
+# 05.system
+CTDB_MONITOR_FILESYSTEM_USAGE
+CTDB_MONITOR_MEMORY_USAGE
+# debug_hung_scripts.sh
+CTDB_DEBUG_HUNG_SCRIPT_STACKPAT
+EOF
+}
+
+# Check if a tunable is valid
+check_valid_tunable ()
+{
+ _tunable="$1"
+
+ grep -Fiqx "$_tunable" <<EOF
+AllowClientDBAttach
+AllowMixedVersions
+AllowUnhealthyDBRead
+ControlTimeout
+DBRecordCountWarn
+DBRecordSizeWarn
+DBSizeWarn
+DatabaseHashSize
+DatabaseMaxDead
+DeferredAttachTO
+DisableIPFailover
+ElectionTimeout
+EnableBans
+EventScriptTimeout
+FetchCollapse
+HopcountMakeSticky
+IPAllocAlgorithm
+KeepaliveInterval
+KeepaliveLimit
+LockProcessesPerDB
+LogLatencyMs
+MaxQueueDropMsg
+MonitorInterval
+MonitorTimeoutCount
+NoIPFailback
+NoIPTakeover
+PullDBPreallocation
+QueueBufferSize
+RecBufferSizeLimit
+RecLockLatencyMs
+RecdFailCount
+RecdPingTimeout
+RecoverInterval
+RecoverTimeout
+RecoveryBanPeriod
+RecoveryDropAllIPs
+RecoveryGracePeriod
+RepackLimit
+RerecoveryTimeout
+SeqnumInterval
+StatHistoryInterval
+StickyDuration
+StickyPindown
+TDBMutexEnabled
+TakeoverTimeout
+TickleUpdateInterval
+TraverseTimeout
+VacuumFastPathCount
+VacuumInterval
+VacuumMaxRunTime
+VerboseMemoryNames
+EOF
+}
+
+# Check if a tunable has been removed
+check_removed_tunable ()
+{
+ _tunable="$1"
+
+ grep -Fiqx "$_tunable" <<EOF
+NoIPHostOnAllDisabled
+VacuumLimit
+EOF
+}
+
+# Print a command to enable an event script for the given service
+print_event_script_enable_command ()
+{
+ _service="$1"
+
+ _component=""
+ _script=""
+ case "$_service" in
+ samba) _component="legacy" ; _script="50.samba" ;;
+ winbind) _component="legacy" ; _script="49.winbind" ;;
+ apache2|httpd) _component="legacy" ; _script="41.httpd" ;;
+ clamd) _component="legacy" ; _script="31.clamd" ;;
+ iscsi) _component="legacy" ; _script="70.iscsi" ;;
+ nfs) _component="legacy" ; _script="60.nfs" ;;
+ vsftpd) _component="legacy" ; _script="40.vsftpd" ;;
+ esac
+
+ if [ -z "$_script" ] ; then
+ return 1
+ fi
+
+ cat <<EOF
+# Enable the ${_service} service
+ctdb event script enable ${_component} ${_script}
+
+EOF
+}
+
+# Check if the given service is valid
+check_valid_service ()
+{
+ _service="$1"
+
+ print_event_script_enable_command "$_service" >/dev/null
+}
+
+############################################################
+
+#
+# Utilities
+#
+
+# List all options starting with "CTDB_" set in given configuration files
+list_options ()
+{
+ set |
+ sed -n 's|^\(CTDB_[^=]*\)=\(.*\)|\1 \2|p' |
+ while read -r _var _val ; do
+ # Strip quotes from value
+ _val=$(echo "$_val" | sed -e "s|^'||" -e "s|'\$||")
+
+ echo "${_var} ${_val}"
+ done
+}
+
+# List all tunables set in the given configuration files
+list_tunables ()
+{
+ list_options |
+ while read -r _opt _val ; do
+ case "$_opt" in
+ CTDB_SET_*) echo "${_opt#CTDB_SET_} ${_val}" ;;
+ esac
+ done
+}
+
+# List all managed services according to the given configuration files
+list_managed_services ()
+{
+ #
+ # CTDB_MANAGES_<service>="yes"
+ #
+ list_options |
+ while read -r _opt _val ; do
+ case "$_opt" in
+ CTDB_MANAGES_*) : ;;
+ *) continue ;;
+ esac
+
+ if [ "$_val" != "yes" ] ; then
+ continue
+ fi
+
+ # Trim and downcase
+ echo "${_opt#CTDB_MANAGES_}" | tr '[:upper:]' '[:lower:]'
+ done
+
+ #
+ # CTDB_MANAGED_SERVICES
+ #
+ for _service in $CTDB_MANAGED_SERVICES ; do
+ echo "$_service"
+ done
+}
+
+############################################################
+
+#
+# Print warnings for removed and unknown options
+#
+
+
+# Print a warning as a bullet list item
+#
+# Arguments after the 1st are printed as a subsequent paragraph.
+warn ()
+{
+ bullet="$1" ; shift
+
+ printf '* %s\n\n' "$bullet"
+
+ if [ $# -gt 0 ] ; then
+ printf ' %s\n\n' "$*"
+ fi
+}
+
+warn_about_CTDB_DBDIR_tmpfs_yes ()
+{
+ if $ctdb_dbdir_tmpfs_magic ; then
+ warn "Option \"CTDB_DBDIR=tmpfs\" is no longer available:" \
+ "Permanently mount a tmpfs filesystem on the volatile" \
+ "database directory"
+ fi
+}
+
+warn_about_unknown_managed_services ()
+{
+ list_managed_services |
+ while read -r _s ; do
+ if check_valid_service "$_s" ; then
+ continue
+ fi
+ warn "Unknown service \"${_s}\" marked as managed" \
+ "If this is a 3rd party service, please enable it manually"
+ done
+}
+
+warn_about_removed_and_unknown_options ()
+{
+ list_options |
+ while read -r _opt _val ; do
+ if check_ctdb_conf_option "$_opt" ; then
+ continue
+ fi
+
+ if check_valid_script_option "$_opt" ; then
+ continue
+ fi
+
+ case "$_opt" in
+ CTDB_MANAGED_SERVICES|\
+ CTDB_MANAGES_*|\
+ CTDB_SET_*|\
+ CTDB_NODES|\
+ CTDB_PUBLIC_ADDRESSES|\
+ CTDB_MAX_OPEN_FILES|\
+ CTDB_SUPPRESS_COREFILE)
+ # Handled elsewhere
+ continue
+ ;;
+ esac
+
+ if check_removed_option "$_opt" ; then
+ warn "Option \"${_opt}\" is no longer available" \
+ "Please see the WHATSNEW.txt"
+ continue
+ fi
+
+ warn "Option \"${_opt}\" is unknown"
+ done
+}
+
+warn_about_removed_and_unknown_tunables ()
+{
+ list_tunables |
+ while read -r _var _val ; do
+ if check_valid_tunable "$_var" ; then
+ continue
+ fi
+
+ if check_removed_tunable "$_var" ; then
+ warn "Tunable \"${_var}\" is no longer available" \
+ "Please see the WHATSNEW.txt"
+ continue
+ fi
+
+ warn "Tunable \"${_var}\" is unknown"
+ done
+}
+
+############################################################
+
+#
+# Top-level file builders
+#
+
+build_ctdb_conf ()
+{
+ _out_file="$1"
+
+ out_file_check_and_create "$_out_file" || return
+
+ list_options |
+ while read -r _opt _val ; do
+ case "$_opt" in
+ CTDB_SET_*)
+ _opt="${_opt#CTDB_SET_}"
+ _out=$(get_ctdb_conf_tunable_option "$_opt" "$_val")
+ ;;
+ *)
+ _out=$(get_ctdb_conf_option "$_opt" "$_val")
+ esac
+ if [ -z "$_out" ] ; then
+ continue
+ fi
+
+ # $_out is section and key, replace dashes with spaces
+ # Intentional word splitting
+ # shellcheck disable=SC2086
+ set -- $_out
+ _section=$(echo "$1" | sed -e 's|-| |g')
+ _key=$(echo "$2" | sed -e 's|-| |g')
+ _newval="$3"
+
+ if ! grep -Fqx "[${_section}]" "$_out_file" ; then
+ # Add blank line if file is not empty
+ if [ -s "$_out_file" ] ; then
+ echo >>"$_out_file"
+ fi
+ # Create section at end of file
+ echo "[${_section}]" >>"$_out_file"
+ fi
+
+ # Must escape leading TAB or sed eats it
+ sed -i -e "/\\[${_section}\\]/a\
+\\ ${_key} = ${_newval}
+" "$_out_file"
+
+ done
+
+}
+
+build_script_options ()
+{
+ _out_file="$1"
+
+ out_file_check_and_create "$_out_file" || return
+
+ list_options |
+ while read -r _var _val ; do
+ if check_valid_script_option "$_var" ; then
+ echo "${_var}=${_val}"
+ fi
+ done >>"$_out_file"
+
+ out_file_remove_if_empty "$_out_file"
+}
+
+build_ctdb_tunables ()
+{
+ _out_file="$1"
+
+ out_file_check_and_create "$_out_file" || return
+
+ list_tunables |
+ while read -r _var _val ; do
+ if check_ctdb_conf_tunable_option "$_var" ; then
+ continue
+ fi
+ if ! check_valid_tunable "$_var" ; then
+ continue
+ fi
+ echo "${_var}=${_val}"
+ done >>"$_out_file"
+
+ out_file_remove_if_empty "$_out_file"
+}
+
+build_ctdb_sysconfig ()
+{
+ _out_file="$1"
+
+ out_file_check_and_create "$_out_file" || return
+
+ if [ -n "$CTDB_SUPPRESS_COREFILE" ] ; then
+ if [ "$CTDB_SUPPRESS_COREFILE" = "yes" ] ; then
+ echo "ulimit -c 0"
+ else
+ echo "ulimit -c unlimited"
+ fi >>"$_out_file"
+ fi
+
+ if [ -n "$CTDB_MAX_OPEN_FILES" ] ; then
+ echo "ulimit -n ${CTDB_MAX_OPEN_FILES}" >>"$_out_file"
+ fi
+
+ out_file_remove_if_empty "$_out_file"
+}
+
+build_commands_sh ()
+{
+ _out_file="$1"
+
+ out_file_check_and_create "$_out_file" || return
+
+ #
+ # Enable script for managed services
+ #
+ list_managed_services |
+ while read -r _service ; do
+ print_event_script_enable_command "$_service"
+ done >>"$_out_file"
+
+ #
+ # CTDB_NODES no longer available
+ #
+ if [ -n "$CTDB_NODES" ] ; then
+ if [ "$CTDB_NODES" = "${config_dir}/nodes" ] ; then
+ cat <<EOF
+# CTDB_NODES=${CTDB_NODES}
+# Looks like the standard location. Nothing to do.
+
+EOF
+ else
+ cat <<EOF
+# CTDB_NODES=${CTDB_NODES}
+# Looks like a non-standard location. Use the default location
+# in the configuration directory or create a symlink.
+ln -s "$CTDB_NODES" "${config_dir}/nodes"
+
+EOF
+ fi >>"$_out_file"
+ fi
+
+ #
+ # CTDB_PUBLIC_ADDRESSES no longer available
+ #
+ if [ -n "$CTDB_PUBLIC_ADDRESSES" ] ; then
+ _pa="public_addresses"
+ if [ "$CTDB_PUBLIC_ADDRESSES" = "${config_dir}/${_pa}" ] ; then
+ cat <<EOF
+# CTDB_PUBLIC_ADDRESSES=${CTDB_PUBLIC_ADDRESSES}
+# Looks like the standard location. Nothing to do.
+
+EOF
+ else
+ cat <<EOF
+# CTDB_PUBLIC_ADDRESSES=${CTDB_PUBLIC_ADDRESSES}
+# Looks like a non-standard location. Use the default location
+# in the configuration directory or create a symlink.
+ln -s "$CTDB_PUBLIC_ADDRESSES" "${config_dir}/${_pa}"
+
+EOF
+ fi >>"$_out_file"
+ fi
+
+ out_file_remove_if_empty "$_out_file"
+}
+
+build_README_warn ()
+{
+ _out_file="$1"
+
+ out_file_check_and_create "$_out_file" || return
+
+ {
+ warn_about_CTDB_DBDIR_tmpfs_yes
+ warn_about_unknown_managed_services
+ warn_about_removed_and_unknown_options
+ warn_about_removed_and_unknown_tunables
+ } >>"$_out_file"
+
+ out_file_remove_if_empty "$_out_file"
+}
+
+############################################################
+
+mkdir -p "$out_dir" || exit 1
+
+# Source the input files
+for i ; do
+ # Unknown non-constant source
+ # shellcheck disable=SC1090
+ . "$i"
+done
+
+# Special case
+ctdb_dbdir_tmpfs_magic=false
+if [ "$CTDB_DBDIR" = "tmpfs" ] ; then
+ ctdb_dbdir_tmpfs_magic=true
+ unset CTDB_DBDIR
+fi
+
+build_ctdb_conf "${out_dir}/ctdb.conf"
+build_script_options "${out_dir}/script.options"
+build_ctdb_tunables "${out_dir}/ctdb.tunables"
+build_ctdb_sysconfig "${out_dir}/ctdb.sysconfig"
+build_commands_sh "${out_dir}/commands.sh"
+build_README_warn "${out_dir}/README.warn"
diff --git a/ctdb/doc/examples/config_migrate.test_input b/ctdb/doc/examples/config_migrate.test_input
new file mode 100644
index 0000000..e823fd4
--- /dev/null
+++ b/ctdb/doc/examples/config_migrate.test_input
@@ -0,0 +1,50 @@
+CTDB_BASE="/etc/ctdb"
+
+CTDB_RECOVERY_LOCK="/some/place/on/shared/storage"
+
+CTDB_NODES="/etc/ctdb/nodes"
+
+CTDB_PUBLIC_ADDRESSES="/clusterfs/.ctdb/public_addresses"
+
+CTDB_SOCKET="/var/run/ctdb.sock"
+
+CTDB_MANAGES_SAMBA="yes"
+CTDB_MANAGES_WINBIND="yes"
+CTDB_MANAGES_NFS="yes"
+CTDB_MANAGED_SERVICES="vsftpd"
+CTDB_MANAGES_FOOBAR="yes"
+
+CTDB_MAX_OPEN_FILES=10000
+
+CTDB_LOGGING="syslog"
+CTDB_DEBUGLEVEL="ERROR"
+
+CTDB_SAMBA_CHECK_PORTS="445"
+CTDB_SAMBA_SKIP_SHARE_CHECK="yes"
+
+CTDB_START_AS_STOPPED="yes"
+CTDB_CAPABILITY_RECMASTER="no"
+CTDB_CAPABILITY_LMASTER="yes"
+
+CTDB_NOSETSCHED="yes"
+
+CTDB_FOO="bar"
+
+CTDB_NATGW_PUBLIC_IP=10.1.1.121/24
+CTDB_NATGW_PUBLIC_IFACE=eth1
+CTDB_NATGW_DEFAULT_GATEWAY=10.1.1.254
+CTDB_NATGW_PRIVATE_NETWORK=192.168.1.0/24
+CTDB_NATGW_NODES=/etc/ctdb/natgw_nodes
+
+CTDB_SET_TDBMutexEnabled=0
+CTDB_SET_IPALLOCALGORITHM=0
+CTDB_SET_noiphostonalldisabled=0
+CTDB_SET_DisableIpFailover=1
+CTDB_SET_foobar=0
+
+CTDB_SUPPRESS_COREFILE="yes"
+CTDB_MAX_OPEN_FILES="1048576"
+
+CTDB_NOTIFY_SCRIPT=/etc/ctdb/notify-custom.sh
+
+CTDB_DBDIR=tmpfs
diff --git a/ctdb/doc/examples/ctdb.conf b/ctdb/doc/examples/ctdb.conf
new file mode 100644
index 0000000..3a8ccc6
--- /dev/null
+++ b/ctdb/doc/examples/ctdb.conf
@@ -0,0 +1,73 @@
+#
+# CTDB configuration for simple cluster
+#
+# This is the sample configuration for a 3-node CTDB cluster providing file
+# services via Samba and NFS.
+#
+# Cluster details:
+#
+# internal network (192.168.1.0/24)
+# -------+----------------------+-----------------------+----------
+# | | |
+# | | |
+# eth0 | 192.168.1.1 eth0 | 192.168.1.2 eth0 | 192.168.1.3
+# +-----+-----+ +-----+-----+ +-----+-----+
+# | | | | | |
+# | Node 1 | | Node 2 | | Node 3 |
+# | | | | | |
+# +-----+-----+ +-----+-----+ +-----+-----+
+# eth1 | 10.1.1.1 eth1 | 10.1.1.2 eth1 | 10.1.1.3
+# | | |
+# | | |
+# -------+----------------------+-----------------------+----------
+# public network (10.1.1.0/24)
+#
+#
+# Storage details:
+#
+# Each node has a shared storage - /shared
+#
+#
+# Service details:
+#
+# Cluster provides file services on following IP addresses
+#
+# 10.1.1.101 - 10.1.1.106
+#
+# Each node also has a fixed IP address on public network. This is used to
+# communicate to network infrastructure (e.g. DNS, Active Directory, ...).
+# Make sure that file services are not available on these fixed IP addresses
+# (e.g. network filtering, using cluster hostname instead of IPs)
+
+[logging]
+ # Enable logging to syslog
+ location = syslog
+
+ # Default log level
+ log level = NOTICE
+
+[cluster]
+ cluster lock = /shared/cluster.lock
+
+#
+# Nodes configuration
+#
+# ---------- /etc/ctdb/nodes ----------
+# 192.168.1.1
+# 192.168.1.2
+# 192.168.1.3
+# ---------- /etc/ctdb/nodes ----------
+#
+#
+# Public addresses configuration
+#
+# ---------- /etc/ctdb/public_addresses ----------
+# 10.1.1.101/24 eth1
+# 10.1.1.102/24 eth1
+# 10.1.1.103/24 eth1
+# 10.1.1.104/24 eth1
+# 10.1.1.105/24 eth1
+# 10.1.1.106/24 eth1
+# ---------- /etc/ctdb/public_addresses ----------
+#
+# Event script configuration - see *.options examples
diff --git a/ctdb/doc/examples/ctdb.spec.in b/ctdb/doc/examples/ctdb.spec.in
new file mode 100644
index 0000000..addb7e1
--- /dev/null
+++ b/ctdb/doc/examples/ctdb.spec.in
@@ -0,0 +1,291 @@
+%define with_systemd %{?_with_systemd: 1} %{?!_with_systemd: 0}
+%define initdir %{_sysconfdir}/init.d
+Name: ctdb
+Summary: Clustered TDB
+Vendor: Samba Team
+Packager: Samba Team <samba@samba.org>
+Version: @VERSION@
+Release: @RELEASE@
+Epoch: 0
+License: GNU GPL version 3
+Group: System Environment/Daemons
+URL: http://ctdb.samba.org/
+
+Source: ctdb-%{version}.tar.gz
+
+# Packages
+Requires: coreutils, sed, gawk, iptables, iproute, procps, ethtool, sudo
+# Commands - package name might vary
+Requires: /usr/bin/killall, /bin/kill, /bin/ss
+
+Provides: ctdb = %{version}
+
+Prefix: /usr
+BuildRoot: %{_tmppath}/%{name}-%{version}-root
+
+# Allow build with system libraries
+# To enable, run rpmbuild with,
+# "--with system_talloc"
+# "--with system_tdb"
+# "--with system_tevent"
+#%define with_included_talloc %{?_with_system_talloc: 0} %{?!_with_system_talloc: 1}
+#%define with_included_tdb %{?_with_system_tdb: 0} %{?!_with_system_tdb: 1}
+#%define with_included_tevent %{?_with_system_tevent: 0} %{?!_with_system_tevent: 1}
+
+%define with_included_talloc 0
+%define with_included_tevent 0
+%define with_included_tdb 0
+
+# Required minimum library versions when building with system libraries
+%define libtalloc_version 2.0.8
+%define libtdb_version 1.3.11
+%define libtevent_version 0.9.16
+
+%if ! %with_included_talloc
+BuildRequires: libtalloc-devel >= %{libtalloc_version}
+Requires: libtalloc >= %{libtalloc_version}
+%endif
+%if ! %with_included_tdb
+BuildRequires: libtdb-devel >= %{libtdb_version}
+Requires: libtdb >= %{libtdb_version}
+%endif
+%if ! %with_included_tevent
+BuildRequires: libtevent-devel >= %{libtevent_version}
+Requires: libtevent >= %{libtevent_version}
+%endif
+
+# To build the ctdb-pcp-pmda package, run rpmbuild with "--with pmda"
+%define with_pcp_pmda %{?_with_pmda: 1} %{?!_with_pmda: 0}
+%if %with_pcp_pmda
+BuildRequires: pcp-libs-devel
+%endif
+
+%if %{with_systemd}
+BuildRequires: systemd-units
+%endif
+
+%description
+ctdb is the clustered database used by samba
+
+#######################################################################
+
+
+
+%prep
+%setup -q
+# setup the init script and sysconfig file
+%setup -T -D -n ctdb-%{version} -q
+
+%build
+
+## check for ccache
+if ccache -h >/dev/null 2>&1 ; then
+ CC="ccache gcc"
+else
+ CC="gcc"
+fi
+
+export CC
+
+CFLAGS="$RPM_OPT_FLAGS $EXTRA -D_GNU_SOURCE" \
+./configure \
+ --builtin-libraries=replace,popt \
+ --bundled-libraries=!talloc,!tevent,!tdb \
+ --minimum-library-version=talloc:%libtalloc_version,tdb:%libtdb_version,tevent:%libtevent_version \
+%if %with_pcp_pmda
+ --enable-pmda \
+%endif
+ --prefix=%{_prefix} \
+ --includedir=%{_includedir}/ctdb \
+ --libdir=%{_libdir} \
+ --libexecdir=%{_libexecdir} \
+ --sysconfdir=%{_sysconfdir} \
+ --mandir=%{_mandir} \
+ --localstatedir=%{_localstatedir}
+
+make -j
+
+%install
+# Clean up in case there is trash left from a previous build
+rm -rf $RPM_BUILD_ROOT
+
+# Create the target build directory hierarchy
+mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/sudoers.d
+
+DESTDIR=$RPM_BUILD_ROOT make -j install
+
+install -m644 config/ctdb.conf $RPM_BUILD_ROOT%{_sysconfdir}/ctdb
+install -m644 config/ctdb.tunables $RPM_BUILD_ROOT%{_sysconfdir}/ctdb
+install -m644 config/script.options $RPM_BUILD_ROOT%{_sysconfdir}/ctdb
+
+mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/sysconfig
+install -m644 config/ctdb.sysconfig $RPM_BUILD_ROOT%{_sysconfdir}/sysconfig/ctdb
+
+%if %{with_systemd}
+mkdir -p $RPM_BUILD_ROOT%{_unitdir}
+install -m 755 config/ctdb.service $RPM_BUILD_ROOT%{_unitdir}
+%else
+mkdir -p $RPM_BUILD_ROOT%{initdir}
+install -m755 config/ctdb.init $RPM_BUILD_ROOT%{initdir}/ctdb
+%endif
+
+# This is a hack. All documents should be installed in /usr/share/doc.
+cp config/events/README README.eventscripts
+rm -f $RPM_BUILD_ROOT%{_sysconfdir}/ctdb/nfs-checks.d/README
+cp config/nfs-checks.d/README README.nfs-checks.d
+cp config/notification.README README.notification
+
+# Remove "*.old" files
+find $RPM_BUILD_ROOT -name "*.old" -exec rm -f {} \;
+
+%clean
+rm -rf $RPM_BUILD_ROOT
+
+%post
+
+# If mandatory 00.ctdb event script is not enabled then enable it and
+# some other scripts. The assumption here is that this is a
+# first-time install or an upgrade to a version that requires event
+# scripts to be enabled via symlinks.
+required_script="00.ctdb"
+required_path="%{_sysconfdir}/ctdb/events/legacy/${required_script}.script"
+if [ ! -L "$required_path" ] && [ ! -e "$required_path" ] ; then
+ default_scripts="${required_script}
+ 01.reclock
+ 05.system
+ 10.interface
+ "
+ for t in $default_scripts ; do
+ tgt="%{_datadir}/ctdb/events/legacy/${t}.script"
+ name="%{_sysconfdir}/ctdb/events/legacy/${t}.script"
+ # Directory is created via install and files
+ ln -s "$tgt" "$name"
+ done
+fi
+
+%preun
+
+# Uninstall, not upgrade. Clean up by removing any remaining links.
+if [ "$1" = "0" ] ; then
+ for i in "%{_sysconfdir}/ctdb/events/legacy/"*.script ; do
+ if [ -L "$i" ] ; then
+ rm -f "$i"
+ fi
+ done
+fi
+
+%files
+%defattr(-,root,root)
+
+%config(noreplace) %{_sysconfdir}/ctdb/ctdb.conf
+%config(noreplace) %{_sysconfdir}/ctdb/ctdb.tunables
+%config(noreplace) %{_sysconfdir}/ctdb/script.options
+%{_sysconfdir}/ctdb/notify.sh
+%config(noreplace) %{_sysconfdir}/ctdb/debug-hung-script.sh
+%config(noreplace) %{_sysconfdir}/ctdb/ctdb-crash-cleanup.sh
+%config(noreplace) %{_sysconfdir}/ctdb/debug_locks.sh
+
+%config(noreplace, missingok) %{_sysconfdir}/sysconfig/ctdb
+
+%if %{with_systemd}
+%{_unitdir}/ctdb.service
+%else
+%attr(755,root,root) %{initdir}/ctdb
+%endif
+
+%doc README COPYING
+%doc README.eventscripts README.notification
+%doc doc/recovery-process.txt
+%doc doc/cluster_mutex_helper.txt
+%doc doc/*.html
+%doc doc/examples
+%{_sysconfdir}/sudoers.d/ctdb
+%dir %{_sysconfdir}/ctdb
+%{_sysconfdir}/ctdb/functions
+%dir %{_sysconfdir}/ctdb/events
+%{_sysconfdir}/ctdb/events/*
+%dir %{_sysconfdir}/ctdb/nfs-checks.d
+%config(noreplace) %{_sysconfdir}/ctdb/nfs-checks.d/00.portmapper.check
+%config(noreplace) %{_sysconfdir}/ctdb/nfs-checks.d/10.status.check
+%config(noreplace) %{_sysconfdir}/ctdb/nfs-checks.d/20.nfs.check
+%config(noreplace) %{_sysconfdir}/ctdb/nfs-checks.d/30.nlockmgr.check
+%config(noreplace) %{_sysconfdir}/ctdb/nfs-checks.d/40.mountd.check
+%config(noreplace) %{_sysconfdir}/ctdb/nfs-checks.d/50.rquotad.check
+%{_sysconfdir}/ctdb/statd-callout
+%{_sysconfdir}/ctdb/nfs-linux-kernel-callout
+%{_sbindir}/ctdbd
+%{_bindir}/ctdb
+%{_bindir}/ping_pong
+%{_bindir}/ltdbtool
+%{_bindir}/ctdb_diagnostics
+%{_bindir}/onnode
+%dir %{_libexecdir}/ctdb
+%{_libexecdir}/ctdb/*
+%dir %{_libdir}/ctdb
+%{_libexecdir}/tdb_mutex_check
+%{_libdir}/ctdb/lib*
+%dir %{_datadir}/ctdb/events
+%{_datadir}/ctdb/events/*
+%{_mandir}/man1/ctdb.1.gz
+%{_mandir}/man1/ctdb_diagnostics.1.gz
+%{_mandir}/man1/ctdbd.1.gz
+%{_mandir}/man1/onnode.1.gz
+%{_mandir}/man1/ltdbtool.1.gz
+%{_mandir}/man1/ping_pong.1.gz
+%{_mandir}/man5/ctdb.conf.5.gz
+%{_mandir}/man5/ctdb-script.options.5.gz
+%{_mandir}/man5/ctdb.sysconfig.5.gz
+%{_mandir}/man7/ctdb.7.gz
+%{_mandir}/man7/ctdb-statistics.7.gz
+%{_mandir}/man7/ctdb-tunables.7.gz
+%attr(0700,root,root) %dir %{_localstatedir}/lib/ctdb
+%attr(0700,root,root) %dir %{_localstatedir}/lib/ctdb/*
+%attr(0700,root,root) %dir %{_localstatedir}/run/ctdb
+
+
+%package devel
+Summary: CTDB development libraries
+Group: Development/Libraries
+
+%description devel
+development libraries for ctdb
+
+%files devel
+%defattr(-,root,root)
+
+%package tests
+Summary: CTDB test suite
+Group: Development/Tools
+Requires: ctdb = %{version}
+Requires: nc, tcpdump
+
+%description tests
+test suite for ctdb
+
+%files tests
+%defattr(-,root,root)
+%dir %{_datadir}/%{name}/tests
+%{_datadir}/%{name}/tests/*
+%dir %{_libexecdir}/%{name}/tests
+%{_libexecdir}/%{name}/tests/*
+%{_bindir}/ctdb_run_tests
+%{_bindir}/ctdb_run_cluster_tests
+%{_bindir}/ctdb_local_daemons
+%doc tests/README
+
+%if %with_pcp_pmda
+
+%package pcp-pmda
+Summary: CTDB PCP pmda support
+Group: Development/Tools
+Requires: ctdb = %{version}
+Requires: pcp-libs
+
+%description pcp-pmda
+Performance Co-Pilot (PCP) support for CTDB
+
+%files pcp-pmda
+%dir %{_localstatedir}/lib/pcp/pmdas/ctdb
+%{_localstatedir}/lib/pcp/pmdas/ctdb/*
+
+%endif
diff --git a/ctdb/doc/examples/nfs-ganesha-callout b/ctdb/doc/examples/nfs-ganesha-callout
new file mode 100755
index 0000000..7c7b074
--- /dev/null
+++ b/ctdb/doc/examples/nfs-ganesha-callout
@@ -0,0 +1,352 @@
+#!/bin/sh
+
+# This is an example CTDB NFS callout script for Ganesha. It is based
+# on the last version of 60.ganesha shipped with CTDB. As such, it
+# does not try to monitor RPC services that were not monitored by
+# 60.ganesha - this might be a useful improvement. It has also not
+# been properly tested.
+
+# You should check your version of NFS Ganesha to see if it ships with
+# a newer callout.
+
+# To use this:
+#
+# * Set CTDB_NFS_CALLOUT in your CTDB configuration to point to (a
+# copy of) this script, making sure it is executable.
+#
+# * Create a new directory alongside the nfs-checks.d directory, for
+# example nfs-checks-ganesha.d. Install 20.nfs-ganesha.check in
+# this directory. Symlink to any other check files from
+# nfs-checks.d that should still be used, such as
+# 00.portmapper.check. Set CTDB_NFS_CHECKS_DIR to point to this new
+# directory of check files.
+#
+# * It is recommended, but not required, to install the grace_period
+# script (usually shipped in a utils package for NFS-Ganesha) to
+# /usr/bin/grace_period
+
+# I (Martin Schwenke) hereby relicense all of my contributions to this
+# callout (and, previously, to 60.ganesha) to a license compatible
+# with NFS Ganesha (right now this is LGPLv3, but I'm flexible).
+# There may be other contributions to be considered for relicensing,
+# particularly those in commit 28cbe527d47822f870e8252495ab2a1c8fddd12f.
+
+######################################################################
+
+# Exit on 1st error
+set -e
+
+# Filesystem type and mount point for the (typically clustered)
+# volume that will contain the NFS-Ganesha state.
+state_fs="${CTDB_NFS_STATE_FS_TYPE:-gpfs}"
+state_dir="${CTDB_NFS_STATE_MNT}" # No sane default.
+
+# To change the following, edit the default values below. Do not set
+# these - they aren't configuration variables, just hooks for testing.
+nfs_exports_file="${CTDB_NFS_EXPORTS_FILE:-/etc/ganesha/ganesha.conf}"
+nfs_service="${CTDB_NFS_SERVICE:-nfs-ganesha}"
+ganesha_rec_subdir=${CTDB_GANESHA_REC_SUBDIR:-.ganesha}
+procfs=${PROCFS_PATH:-/proc}
+
+case "$state_fs" in
+gpfs)
+ GANRECDIR="/var/lib/nfs/ganesha"
+ ;;
+glusterfs)
+ if [ -z "${state_dir}" ]; then
+ echo "CTDB_NFS_STATE_MNT not defined for GlusterFS"
+ exit 1
+ fi
+ host=$(hostname)
+ NODESTATEDIR="$state_dir/nfs-ganesha/$host"
+ GANSTATEDIR="$state_dir/nfs-ganesha/.noderefs"
+ NODESTATELN="$GANSTATEDIR/$host"
+ ;;
+esac
+
+
+##################################################
+
+usage ()
+{
+ _c=$(basename "$0")
+ cat <<EOF
+usage: $_c { shutdown | startup }
+ $_c { stop | start | check } nfs
+ $_c { releaseip | takeip }
+ $_c { monitor-list-shares }
+EOF
+ exit 1
+}
+
+
+##################################################
+# Basic service stop and start
+
+basic_stop ()
+{
+ case "$1" in
+ nfs)
+ service "$nfs_service" stop
+ ;;
+ *)
+ usage
+ esac
+}
+
+basic_start ()
+{
+ case "$1" in
+ nfs)
+ service "$nfs_service" start
+ ;;
+ *)
+ usage
+ esac
+}
+
+##################################################
+# "stop" and "start" options for restarting
+
+service_stop ()
+{
+ case "$1" in
+ nfs)
+ basic_stop "nfs"
+ ;;
+ nlockmgr)
+ # Do nothing - used by statd-callout
+ :
+ ;;
+ *)
+ usage
+ esac
+}
+
+service_start ()
+{
+ case "$1" in
+ nfs)
+ basic_start "nfs"
+ ;;
+ nlockmgr)
+ # Do nothing - used by statd-callout
+ :
+ ;;
+ *)
+ usage
+ esac
+}
+
+##################################################
+# Nitty gritty - monitoring and IP handling
+
+# Check that a symlink exists, create it otherwise.
+# Usage: check_ln <TARGET> <LINK>
+check_ln ()
+{
+ if [ ! -L "${2}" ] ; then
+ rm -vrf "${2}"
+ else
+ _t=$(readlink "${2}")
+ if [ "$_t" != "${1}" ] ; then
+ rm -v "${2}"
+ fi
+ fi
+ # This is not an "else". It also re-creates the link if it was
+ # removed above!
+ if [ ! -e "${2}" ]; then
+ ln -sfv "${1}" "${2}"
+ fi
+}
+
+# Return 'active' if the shared filesystem is accessible.
+get_cluster_fs_state ()
+{
+ case $state_fs in
+ gpfs)
+ /usr/lpp/mmfs/bin/mmgetstate | awk 'NR == 4 { print $3 }'
+ ;;
+ glusterfs)
+ # Since we're past create_ganesha_recdirs(), we're active.
+ echo "active"
+ ;;
+ *)
+ echo "File system $state_fs not supported"
+ exit 1
+ ;;
+ esac
+}
+
+create_ganesha_recdirs ()
+{
+ if ! _mounts=$(mount | grep "$state_fs"); then
+ echo "Failed to find mounts of type $state_fs"
+ exit 1
+ fi
+ if [ -z "$_mounts" ]; then
+ echo "startup $state_fs not ready"
+ exit 0
+ fi
+
+ case $state_fs in
+ gpfs)
+ _mntpt=$(echo "$_mounts" | sort | awk 'NR == 1 {print $3}')
+ _link_dst="${_mntpt}/${ganesha_rec_subdir}"
+ mkdir -vp "$_link_dst"
+ check_ln "$_link_dst" "$GANRECDIR"
+ ;;
+ glusterfs)
+ [ -d /var/lib/nfs.backup ] || \
+ mv /var/lib/nfs /var/lib/nfs.backup
+ check_ln "$NODESTATEDIR" /var/lib/nfs
+
+ mkdir -p "${NODESTATEDIR}/ganesha/v4recov"
+ mkdir -p "${NODESTATEDIR}/ganesha/v4old"
+ mkdir -p "${NODESTATEDIR}/statd/sm"
+ mkdir -p "${NODESTATEDIR}/statd/sm.bak"
+ touch "${NODESTATEDIR}/state"
+ touch "${NODESTATEDIR}/statd/state"
+
+ mkdir -p "$GANSTATEDIR"
+ check_ln "$NODESTATEDIR" "$NODESTATELN"
+ for _dir in "${GANSTATEDIR}/"* ; do
+ # Handle no directories case
+ if [ ! -d "$_dir" ] ; then
+ break
+ fi
+
+ _node="${_dir##*/}" # basename
+ if [ "${_node}" != "${host}" ]; then
+ check_ln "${GANSTATEDIR}/${_node}/ganesha" \
+ "${NODESTATEDIR}/ganesha/${_node}"
+ check_ln "${GANSTATEDIR}/${_node}/statd" \
+ "${NODESTATEDIR}/statd/${_node}"
+ fi
+ done
+ ;;
+ esac
+}
+
+service_check ()
+{
+ create_ganesha_recdirs
+
+ # Always succeed if cluster filesystem is not active
+ _cluster_fs_state=$(get_cluster_fs_state)
+ if [ "$_cluster_fs_state" != "active" ] ; then
+ return 0
+ fi
+
+ # Check that NFS Ganesha is running, according to PID file
+ _pidfile="/var/run/ganesha.pid"
+ _ganesha="/usr/bin/ganesha.nfsd"
+ if ! { read -r _pid < "$_pidfile" && \
+ grep "$_ganesha" "${procfs}/${_pid}/cmdline" ; } >/dev/null 2>&1
+ then
+
+ echo "ERROR: NFS Ganesha not running according to PID file"
+ return 1
+ fi
+
+ return 0
+}
+
+#-------------------------------------------------
+
+nfs_releaseip ()
+{
+ if [ -x "/usr/bin/grace_period" ]; then
+ /usr/bin/grace_period "2:${2}"
+ else
+ dbus-send --print-reply --system --dest=org.ganesha.nfsd \
+ /org/ganesha/nfsd/admin org.ganesha.nfsd.admin.grace \
+ string:"2:${2}"
+ fi
+}
+
+nfs_takeip ()
+{
+ case $state_fs in
+ glusterfs)
+ check_ln "$NODESTATEDIR" "${GANSTATEDIR}/${2}"
+ ;;
+ esac
+ if [ -x "/usr/bin/grace_period" ]; then
+ /usr/bin/grace_period "5:${2}"
+ else
+ dbus-send --print-reply --system --dest=org.ganesha.nfsd \
+ /org/ganesha/nfsd/admin org.ganesha.nfsd.admin.grace \
+ string:"5:${2}"
+ fi
+}
+
+##################################################
+# service init startup and final shutdown
+
+nfs_shutdown ()
+{
+ basic_stop "nfs"
+}
+
+nfs_startup ()
+{
+ basic_stop "nfs" || true
+
+ create_ganesha_recdirs
+
+ basic_start "nfs"
+ _f="${procfs}/sys/net/ipv4/tcp_tw_recycle"
+ if [ -f "$_f" ] ; then
+ echo 1 >"$_f"
+ fi
+}
+
+##################################################
+# list share directories
+
+nfs_monitor_list_shares ()
+{
+ grep Path "$nfs_exports_file" |
+ cut -f2 -d\" |
+ sort -u
+}
+
+##################################################
+
+nfs_register ()
+{
+ cat <<EOF
+shutdown
+startup
+stop
+start
+check
+releaseip
+takeip
+monitor-list-shares
+EOF
+}
+
+##################################################
+
+action="$1"
+shift
+
+case "$action" in
+shutdown) nfs_shutdown ;;
+startup) nfs_startup ;;
+stop) service_stop "$1" ;;
+start) service_start "$1" ;;
+check) service_check "$1" ;;
+releaseip) nfs_releaseip "$@" ;;
+takeip) nfs_takeip "$@" ;;
+monitor-list-shares) nfs_monitor_list_shares ;;
+register) nfs_register ;;
+monitor-pre|monitor-post|releaseip-pre|takeip-pre)
+ # Not required/implemented
+ :
+ ;;
+*)
+ usage
+esac
diff --git a/ctdb/doc/ltdbtool.1.xml b/ctdb/doc/ltdbtool.1.xml
new file mode 100644
index 0000000..875131a
--- /dev/null
+++ b/ctdb/doc/ltdbtool.1.xml
@@ -0,0 +1,300 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE refentry
+ PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+ "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<refentry id="ltdbtool.1">
+
+ <refmeta>
+ <refentrytitle>ltdbtool</refentrytitle>
+ <manvolnum>1</manvolnum>
+ <refmiscinfo class="source">ctdb</refmiscinfo>
+ <refmiscinfo class="manual">CTDB - clustered TDB database</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+ <refname>ltdbtool</refname>
+ <refpurpose>manipulate CTDB's local TDB files</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <cmdsynopsis>
+ <command>ltdbtool</command>
+ <arg rep="repeat"><replaceable>OPTION</replaceable></arg>
+ <arg choice="req"><replaceable>COMMAND</replaceable></arg>
+ <arg><replaceable>COMMAND-ARGS</replaceable></arg>
+ </cmdsynopsis>
+</refsynopsisdiv>
+
+ <refsect1>
+ <title>DESCRIPTION</title>
+
+ <para>
+ ltdbtool is a utility to manipulate CTDB's local TDB databases
+ (LTDBs) without connecting to a CTDB daemon.
+ </para>
+
+ <para>
+ It can be used to:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ dump the contents of a LTDB, optionally printing the CTDB
+ record header information,
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ convert between an LTDB and a non-clustered tdb
+ by adding or removing CTDB headers and
+ </para>
+ </listitem>
+ <listitem>
+ <para>convert between 64 and 32 bit LTDBs where the CTDB record
+ headers differ by 4 bytes of padding.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </refsect1>
+
+ <refsect1>
+ <title>OPTIONS</title>
+
+ <variablelist>
+ <varlistentry><term>-e</term>
+ <listitem>
+ <para>
+ Dump empty records. These are normally excluded.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>-p</term>
+ <listitem>
+ <para>
+ Dump with header information, similar to "ctdb catdb".
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>
+ -s
+ <group choice="req">
+ <arg choice="plain">0</arg>
+ <arg choice="plain">32</arg>
+ <arg choice="plain">64</arg>
+ </group>
+ </term>
+ <listitem>
+ <para>
+ Specify how to determine the CTDB record header size
+ for the input database:
+ <variablelist spacing="normal">
+ <varlistentry><term>0</term>
+ <listitem>
+ <para>no CTDB header</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry><term>32</term>
+ <listitem>
+ <para>CTDB header size of a 32 bit system (20 bytes)</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry><term>64</term>
+ <listitem>
+ <para>CTDB header size of a 64 bit system (24 bytes)</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ The default is 32 or 64 depending on the system architecture.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>
+ -o
+ <group choice="req">
+ <arg choice="plain">0</arg>
+ <arg choice="plain">32</arg>
+ <arg choice="plain">64</arg>
+ </group>
+ </term>
+ <listitem>
+ <para>
+ Specify how to determine the CTDB record header size
+ for the output database, see -s.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>-S <parameter>SIZE</parameter></term>
+ <listitem>
+ <para>
+ Explicitly specify the CTDB record header SIZE of the
+ input database in bytes.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>-O <parameter>SIZE</parameter></term>
+ <listitem>
+ <para>
+ Explicitly specify the CTDB record header SIZE for the
+ output database in bytes.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>-h</term>
+ <listitem>
+ <para>
+ Print help text.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>COMMANDS</title>
+
+ <variablelist>
+ <varlistentry><term>help</term>
+ <listitem>
+ <para>
+ Print help text.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>dump <parameter>IDB</parameter></term>
+ <listitem>
+ <para>
+ Dump the contents of an LTDB input file IDB to standard
+ output in a human-readable format.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>
+ convert <parameter>IDB</parameter> <parameter>ODB</parameter>
+ </term>
+ <listitem>
+ <para>
+ Copy an LTDB input file IDB to output file ODB, optionally
+ adding or removing CTDB headers.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>EXAMPLES</title>
+
+ <para>
+ Print a local tdb in "tdbdump" style:
+ </para>
+ <screen format="linespecific">
+ ltdbtool dump idmap2.tdb.0
+ </screen>
+
+ <para>
+ Print a local tdb with header information similar to "ctdb catdb":
+ </para>
+ <screen format="linespecific">
+ ltdbtool dump -p idmap2.tdb.0
+ </screen>
+
+ <para>
+ Strip the CTDB headers from records:
+ </para>
+ <screen format="linespecific">
+ ltdbtool convert -o0 idmap2.tdb.0 idmap.tdb
+ </screen>
+
+ <para>
+ Strip 64 bit CTDB headers from records, running on i386:
+ </para>
+ <screen format="linespecific">
+ ltdbtool convert -s64 -o0 idmap2.tdb.0 idmap.tdb
+ </screen>
+
+ <para>
+ Strip the CTDB headers from records by piping through tdbrestore:
+ </para>
+ <screen format="linespecific">
+ ltdbtool dump idmap2.tdb.0 | tdbrestore idmap.tdb
+ </screen>
+
+ <para>
+ Convert a local tdb from a 64 bit system for usage on a 32 bit system:
+ </para>
+ <screen format="linespecific">
+ ltdbtool convert -s64 -o32 idmap2.tdb.0 idmap2.tdb.1
+ </screen>
+
+ <para>
+ Add a default header:
+ </para>
+ <screen format="linespecific">
+ ltdbtool convert -s0 idmap.tdb idmap2.tdb.0
+ </screen>
+ </refsect1>
+
+ <refsect1><title>SEE ALSO</title>
+ <para>
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>tdbdump</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>tdbrestore</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry>,
+
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry>,
+
+ <ulink url="http://ctdb.samba.org/"/>
+ </para>
+ </refsect1>
+
+ <refentryinfo>
+ <author>
+ <contrib>
+ This documentation was written by Gregor Beck
+ </contrib>
+ </author>
+
+ <copyright>
+ <year>2011</year>
+ <holder>Gregor Beck</holder>
+ <holder>Michael Adam</holder>
+ </copyright>
+ <legalnotice>
+ <para>
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 3 of
+ the License, or (at your option) any later version.
+ </para>
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ PURPOSE. See the GNU General Public License for more details.
+ </para>
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, see
+ <ulink url="http://www.gnu.org/licenses"/>.
+ </para>
+ </legalnotice>
+ </refentryinfo>
+
+</refentry>
diff --git a/ctdb/doc/onnode.1.xml b/ctdb/doc/onnode.1.xml
new file mode 100644
index 0000000..8e58ee7
--- /dev/null
+++ b/ctdb/doc/onnode.1.xml
@@ -0,0 +1,315 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE refentry
+ PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+ "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<refentry id="onnode.1">
+
+ <refmeta>
+ <refentrytitle>onnode</refentrytitle>
+ <manvolnum>1</manvolnum>
+ <refmiscinfo class="source">ctdb</refmiscinfo>
+ <refmiscinfo class="manual">CTDB - clustered TDB database</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+ <refname>onnode</refname>
+ <refpurpose>run commands on CTDB cluster nodes</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <cmdsynopsis>
+ <command>onnode</command>
+ <arg rep="repeat"><replaceable>OPTION</replaceable></arg>
+ <arg choice="req"><replaceable>NODES</replaceable></arg>
+ <arg choice="req"><replaceable>COMMAND</replaceable></arg>
+ </cmdsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>DESCRIPTION</title>
+ <para>
+ onnode is a utility to run commands on a specific node of a CTDB
+ cluster, or on all nodes.
+ </para>
+ <para>
+ <replaceable>NODES</replaceable> specifies which node(s) to run
+ a command on. See section <citetitle>NODES
+ SPECIFICATION</citetitle> for details.
+ </para>
+ <para>
+ <replaceable>COMMAND</replaceable> can be any shell command. The
+ onnode utility uses ssh or rsh to connect to the remote nodes
+ and run the command.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>OPTIONS</title>
+
+ <variablelist>
+ <varlistentry><term>-c</term>
+ <listitem>
+ <para>
+ Execute COMMAND in the current working directory on the
+ specified nodes.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>-f <parameter>FILENAME</parameter></term>
+ <listitem>
+ <para>
+ Specify an alternative nodes FILENAME to use instead of
+ the default. See the discussion of
+ <filename>/usr/local/etc/ctdb/nodes</filename> in the
+ FILES section for more details.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>-i</term>
+ <listitem>
+ <para>
+ Keep standard input open, allowing data to be piped to
+ onnode. Normally onnode closes stdin to avoid surprises
+ when scripting. Note that this option is ignored when
+ using <option>-p</option> or if <envar>ONNODE_SSH</envar>
+ is set to anything other than "ssh".
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>-n</term>
+ <listitem>
+ <para>
+ Allow nodes to be specified by name rather than node
+ numbers. These nodes don't need to be listed in the nodes
+ file. You can avoid the nodes file entirely by combining
+ this with <code>-f /dev/null</code>.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>-p</term>
+ <listitem>
+ <para>
+ Run COMMAND in parallel on the specified nodes. The
+ default is to run COMMAND sequentially on each node.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>-P</term>
+ <listitem>
+ <para>
+ Push files to nodes. Names of files to push are specified
+ rather than the usual command. Quoting is fragile/broken
+ - filenames with whitespace in them are not supported.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>-q</term>
+ <listitem>
+ <para>
+ Do not print node addresses. Normally, onnode prints
+ informational node addresses if more than one node is
+ specified. This overrides -v.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>-v</term>
+ <listitem>
+ <para>
+ Print node addresses even if only one node is specified.
+ Normally, onnode prints informational node addresses when
+ more than one node is specified.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>-h, --help</term>
+ <listitem>
+ <para>
+ Show a short usage guide.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>NODES SPECIFICATION</title>
+
+ <para>
+ Nodes can be specified via numeric node numbers (from 0 to N-1)
+ or mnemonics. Multiple nodes are specified using lists of
+ nodes, separated by commas, and ranges of numeric node numbers,
+ separated by dashes. If nodes are specified multiple times then
+ the command will be executed multiple times on those nodes. The
+ order of nodes is significant.
+ </para>
+
+ <para>
+ The following mnemonics are available:
+ </para>
+
+ <variablelist>
+ <varlistentry><term>all</term>
+ <listitem>
+ <para>
+ All nodes.
+ </para>
+ </listitem>
+ </varlistentry>
+ <varlistentry><term>any</term>
+ <listitem>
+ <para>
+ A node where ctdbd is running. This semi-random but
+ there is a bias towards choosing a low numbered node.
+ </para>
+ </listitem>
+ </varlistentry>
+ <varlistentry><term>ok | healthy</term>
+ <listitem>
+ <para>
+ All nodes that are not disconnected, banned, disabled or
+ unhealthy.
+ </para>
+ </listitem>
+ </varlistentry>
+ <varlistentry><term>con | connected</term>
+ <listitem>
+ <para>
+ All nodes that are not disconnected.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>EXAMPLES</title>
+
+ <para>
+ The following command would show the process ID of ctdbd on all nodes
+ </para>
+ <screen format="linespecific">
+ onnode all ctdb getpid
+ </screen>
+
+ <para>
+ The following command would show the last 5 lines of log on each
+ node, preceded by the node's hostname
+ </para>
+ <screen format="linespecific">
+ onnode all "hostname; tail -5 /usr/local/var/log/log.ctdb"
+ </screen>
+
+ <para>
+ The following command would restart the ctdb service on all
+ nodes, in parallel.
+ </para>
+ <screen format="linespecific">
+ onnode -p all service ctdb restart
+ </screen>
+
+ <para>
+ The following command would run ./foo in the current working
+ directory, in parallel, on nodes 0, 2, 3 and 4.
+ </para>
+ <screen format="linespecific">
+ onnode -c -p 0,2-4 ./foo
+ </screen>
+ </refsect1>
+
+ <refsect1>
+ <title>FILES</title>
+
+ <variablelist>
+ <varlistentry><term><filename>/usr/local/etc/ctdb/nodes</filename></term>
+ <listitem>
+ <para>
+ Default file containing a list of each node's IP address
+ or hostname.
+ </para>
+ <para>
+ As above, a file specified via the <option>-f</option>
+ is given precedence. If a
+ relative path is specified and no corresponding file
+ exists relative to the current directory then the file is
+ also searched for in the CTDB configuration directory.
+ </para>
+ <para>
+ Otherwise the default is
+ <filename>/usr/local/etc/ctdb/nodes</filename>.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><filename>/usr/local/etc/ctdb/onnode.conf</filename></term>
+ <listitem>
+ <para>
+ If this file exists it is sourced by onnode. The main
+ purpose is to allow the administrator to set
+ <envar>ONNODE_SSH</envar> to something other than "ssh".
+ In this case the -t option is ignored.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>SEE ALSO</title>
+
+ <para>
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry>,
+
+ <ulink url="http://ctdb.samba.org/"/>
+ </para>
+ </refsect1>
+
+ <refentryinfo>
+ <author>
+ <contrib>
+ This documentation was written by
+ Andrew Tridgell,
+ Martin Schwenke
+ </contrib>
+ </author>
+
+ <copyright>
+ <year>2007</year>
+ <holder>Andrew Tridgell</holder>
+ <holder>Ronnie Sahlberg</holder>
+ </copyright>
+ <copyright>
+ <year>2008</year>
+ <holder>Martin Schwenke</holder>
+ </copyright>
+ <legalnotice>
+ <para>
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 3 of
+ the License, or (at your option) any later version.
+ </para>
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ PURPOSE. See the GNU General Public License for more details.
+ </para>
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, see
+ <ulink url="http://www.gnu.org/licenses"/>.
+ </para>
+ </legalnotice>
+ </refentryinfo>
+
+</refentry>
diff --git a/ctdb/doc/ping_pong.1.xml b/ctdb/doc/ping_pong.1.xml
new file mode 100644
index 0000000..cbb30b5
--- /dev/null
+++ b/ctdb/doc/ping_pong.1.xml
@@ -0,0 +1,164 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE refentry
+ PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+ "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<refentry id="ping_pong.1">
+
+ <refmeta>
+ <refentrytitle>ping_pong</refentrytitle>
+ <manvolnum>1</manvolnum>
+ <refmiscinfo class="source">ctdb</refmiscinfo>
+ <refmiscinfo class="manual">CTDB - clustered TDB database</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+ <refname>ping_pong</refname>
+ <refpurpose>measures the ping-pong byte range lock latency</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <cmdsynopsis>
+ <command>ping_pong</command>
+ <group choice="req">
+ <arg choice="plain">-r</arg>
+ <arg choice="plain">-w</arg>
+ <arg choice="plain">-rw</arg>
+ </group>
+ <arg>-m</arg>
+ <arg>-c</arg>
+ <arg choice="req"><replaceable>FILENAME</replaceable></arg>
+ <arg choice="req"><replaceable>NUM-LOCKS</replaceable></arg>
+ </cmdsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>DESCRIPTION</title>
+ <para>
+ ping_pong measures the byte range lock latency. It is especially
+ useful on a cluster of nodes sharing a common lock manager as it
+ will give some indication of the lock manager's performance
+ under stress.
+ </para>
+
+ <para>
+ FILENAME is a file on shared storage to use for byte range
+ locking tests.
+ </para>
+
+ <para>
+ NUM-LOCKS is the number of byte range locks, so needs to be
+ (strictly) greater than the number of nodes in the cluster.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>OPTIONS</title>
+
+ <variablelist>
+ <varlistentry>
+ <term>-r</term>
+ <listitem>
+ <para>
+ test read performance
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>-w</term>
+ <listitem>
+ <para>
+ test write performance
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>-m</term>
+ <listitem>
+ <para>
+ use mmap
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>-c</term>
+ <listitem>
+ <para>
+ validate the locks
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>EXAMPLES</title>
+ <para>
+ Testing lock coherence
+ </para>
+ <screen format="linespecific">
+ ping_pong test.dat N
+ </screen>
+
+ <para>
+ Testing lock coherence with lock validation
+ </para>
+ <screen format="linespecific">
+ ping_pong -c test.dat N
+ </screen>
+
+ <para>
+ Testing IO coherence
+ </para>
+ <screen format="linespecific">
+ ping_pong -rw test.dat N
+ </screen>
+ </refsect1>
+
+ <refsect1>
+ <title>SEE ALSO</title>
+ <para>
+ <citerefentry><refentrytitle>ctdb</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry>,
+
+ <ulink url="https://wiki.samba.org/index.php/Ping_pong"/>
+
+ </para>
+ </refsect1>
+
+ <refentryinfo>
+ <author>
+ <contrib>
+ This documentation was written by Mathieu Parent
+ </contrib>
+ </author>
+
+ <copyright>
+ <year>2002</year>
+ <holder>Andrew Tridgell</holder>
+ </copyright>
+ <legalnotice>
+ <para>
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 3 of
+ the License, or (at your option) any later version.
+ </para>
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ PURPOSE. See the GNU General Public License for more details.
+ </para>
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, see
+ <ulink url="http://www.gnu.org/licenses"/>.
+ </para>
+ </legalnotice>
+ </refentryinfo>
+
+</refentry>
diff --git a/ctdb/doc/readonlyrecords.txt b/ctdb/doc/readonlyrecords.txt
new file mode 100644
index 0000000..e7be1c3
--- /dev/null
+++ b/ctdb/doc/readonlyrecords.txt
@@ -0,0 +1,343 @@
+Read-Only locks in CTDB
+=======================
+
+Problem
+=======
+CTDB currently only supports exclusive Read-Write locks for clients(samba) accessing the
+TDB databases.
+This mostly works well but when very many clients are accessing the same file,
+at the same time, this causes the exclusive lock as well as the record itself to
+rapidly bounce between nodes and acts as a scalability limitation.
+
+This primarily affects locking.tdb and brlock.tdb, two databases where record access is
+read-mostly and where writes are semi-rare.
+
+For the common case, if CTDB provided shared non-exclusive Read-Only lock semantics
+this would greatly improve scaling for these workloads.
+
+
+Desired properties
+==================
+We can not make backward incompatible changes the ctdb_ltdb header for the records.
+
+A Read-Only lock enabled ctdb demon must be able to interoperate with a non-Read-Only
+lock enbled daemon.
+
+Getting a Read-Only lock should not be slower than getting a Read-Write lock.
+
+When revoking Read-Only locks for a record, this should involve only those nodes that
+currently hold a Read-Only lock and should avoid broadcasting opportunistic revocations.
+(must track which nodes are delegated to)
+
+When a Read-Write lock is requested, if there are Read-Only locks delegated to other
+nodes, the DMASTER will defer the record migration until all read-only locks are first
+revoked (synchronous revoke).
+
+Due to the cost of revoking Read-Only locks has on getting a Read-Write lock, the
+implementation should try to avoid creating Read-Only locks unless it has indication
+that there is contention. This may mean that even if client requests a Read-Only lock
+we might still provide a full Read-Write lock in order to avoid the cost of revoking
+the locks in some cases.
+
+Read-Only locks require additional state to be stored in a separate database, containing
+information about which nodes have have been delegated Read-Only locks.
+This database should be kept at minimal size.
+
+Read-Only locks should not significantly complicate the normal record
+create/migration/deletion cycle for normal records.
+
+Read-Only locks should not complicate the recovery process.
+
+Read-Only locks should not complicate the vacuuming process.
+
+We should avoid forking new child processes as far as possible from the main daemon.
+
+Client-side implementation, samba, libctdb, others, should have minimal impact when
+Read-Only locks are implemented.
+Client-side implementation must be possible with only minor conditionals added to the
+existing lock-check-fetch-unlock loop that clients use today for Read-Write locks. So
+that clients only need one single loop that can handle both Read-Write locking as well
+as Read-Only locking. Clients should not need two nearly identical loops.
+
+
+Implementation
+==============
+
+Four new flags are allocated in the ctdb_ltdb record header.
+HAVE_DELEGATIONS, HAVE_READONLY_LOCK, REVOKING_READONLY and REVOKE_COMPLETE
+
+HAVE_DELEGATIONS is a flag that can only be set on the node that is currently the
+DMASTER for the record. When set, this flag indicates that there are Read-Only locks
+delegated to other nodes in the cluster for this record.
+
+HAVE_READONLY is a flag that is only set on nodes that are NOT the DMASTER for the
+record. If set this flag indicates that this record contains an up-to-date Read-Only
+version of this record. A client that only needs to read, but not to write, the record
+can safely use the content of this record as is regardless of the value of the DMASTER
+field of the record.
+
+REVOKING_READONLY is a flag that is used while a set of read only delegations are being
+revoked.
+This flag is only set when HAVE_DELEGATIONS is also set, and is cleared at the same time
+as HAVE_DELEGATIONS is cleared.
+Normal operations is that first the HAVE_DELEGATIONS flag is set when the first
+delegation is generated. When the delegations are about to be revoked, the
+REVOKING_READONLY flag is set too.
+Once all delegations are revoked, both flags are cleared at the same time.
+While REVOKING_READONLY is set, any requests for the record, either normal request or
+request for readonly will be deferred.
+Deferred requests are linked on a list for deferred requests until the time that the
+revokation is completed.
+This flags is set by the main ctdb daemon when it starts revoking this record.
+
+REVOKE_COMPLETE
+The actual revoke of records is done by a child process, spawned from the main ctdb
+daemon when it starts the process to revoke the records.
+Once the child process has finished revoking all delegations it will set the flag
+REVOKE_COMPLETE for this record to signal to the main daemon that the record has been
+successfully revoked.
+At this stage the child process will also trigger an event in the main daemon that
+revoke is complete and that the main daemon should start re-processing all deferred
+requests.
+
+
+
+Once the revoke process is completed there will be at least one deferred request to
+access this record. That is the initical call to for an exclusive fetch_lock() that
+triggered the revoke process to be started.
+In addition to this deferred request there may also be additional requests that have
+also become deferred while the revoke was in process. These can be either exclusive
+fetch_locks() or they can be readonly lock requests.
+Once the revoke is completed the main daemon will reprocess all exclusive fetch_lock()
+requests immediately and respond to these clients.
+Any requests for readadonly lock requests will be deferred for an additional period of
+time before they are re-processed.
+This is to allow the client that needs a fetch_lock() to update the record to get some
+time to access and work on the record without having to compete with the possibly
+very many readonly requests.
+
+
+
+
+
+The ctdb_db structure is expanded so that it contains one extra TDB database for each
+normal, non-persistent database.
+This new database is used for tracking delegations for the records.
+A record in the normal database that has "HAVE_DELEGATION" set will always have a
+corresponding record at the same key. This record contains the set of all nodes that
+the record is delegated to.
+This tracking database is lockless, using TDB_NOLOCK, and is only ever accessed by
+the main ctdbd daemon.
+The lockless nature and the fact that no other process ever access this TDB means we
+are guaranteed non-blocking access to records in the tracking database.
+
+The ctdb_call PDU is allocated with a new flag WANT_READONLY and possibly also a new
+callid: CTDB_FETCH_WITH_HEADER_FUNC.
+This new function returns not only the record, as CTDB_FETCH_FUNC does, but also
+returns the full ctdb_ltdb record HEADER prepended to the record.
+This function is optional, clients that do not care what the header is can continue
+using just CTDB_FETCH_FUNC
+
+
+This flag is used to requesting a read-only record from the DMASTER/LMASTER.
+If the record does not yet exist, this is a returned as an error to the client and the
+client will retry the request loop.
+
+A new control is added to make remote nodes remove the HAVE_READONLY_LOCK from a record
+and to invalidate any deferred readonly copies from the databases.
+
+
+
+Client implementation
+=====================
+Clients today use a loop for record fetch lock that looks like this
+ try_again:
+ lock record in tdb
+
+ if record does not exist in tdb,
+ unlock record
+ ask ctdb to migrate record onto the node
+ goto try_again
+
+ if record dmaster != this node pnn
+ unlock record
+ ask ctdb to migrate record onto the node
+ goto try_again
+
+ finished:
+
+where we basically spin, until the record is migrated onto the node and we have managed
+to pin it down.
+
+This will change to instead to something like
+
+ try_again:
+ lock record in tdb
+
+ if record does not exist in tdb,
+ unlock record
+ ask ctdb to migrate record onto the node
+ goto try_again
+
+ if record dmaster == current node pnn
+ goto finished
+
+ if read-only lock
+ if HAVE_READONLY_LOCK or HAVE_DELEGATIONS is set
+ goto finished
+ else
+ unlock record
+ ask ctdb for read-only copy (WANT_READONLY[|WITH_HEADER])
+ if failed to get read-only copy (*A)
+ ask ctdb to migrate the record onto the node
+ goto try_again
+ lock record in tdb
+ goto finished
+
+ unlock record
+ ask ctdb to migrate record onto the node
+ goto try_again
+
+ finished:
+
+If the record does not yet exist in the local TDB, we always perform a full fetch for a
+Read-Write lock even if only a Read-Only lock was requested.
+This means that for first access we always grab a Read-Write lock and thus upgrade any
+requests for Read-Only locks into a Read-Write request.
+This creates the record, migrates it onto the node and makes the local node become
+the DMASTER for the record.
+
+Future reference to this same record by the local samba daemons will still access/lock
+the record locally without triggereing a Read-Only delegation to be created since the
+record is already hosted on the local node as DMASTER.
+
+Only if the record is contended, i.e. it has been created an migrated onto the node but
+we are no longer the DMASTER for this record, only for this case will we create a
+Read-Only delegation.
+This heuristics provide a mechanism where we will not create Read-Only delegations until
+we have some indication that the record may be contended.
+
+This avoids creating and revoking Read-Only delegations when only a single client is
+repeatedly accessing the same set of records.
+This also aims to limit the size of the tracking tdb.
+
+
+Server implementation
+=====================
+When receiving a ctdb_call with the WANT_READONLY flag:
+
+If this is the LMASTER for the record and the record does not yet exist, LMASTER will
+return an error back to the client (*A above) and the client will try to recover.
+In particular, LMASTER will not create a new record for this case.
+
+If this is the LMASTER for the record and the record exists, the PDU will be forwarded to
+the DMASTER for the record.
+
+If this node is not the DMASTER for this record, we forward the PDU back to the
+LMASTER. Just as we always do today.
+
+If this is the DMASTER for the record, we need to create a Read-Only delegation.
+This is done by
+ lock record
+ increase the RSN by one for this record
+ set the HAVE_DELEGATIONS flag for the record
+ write the updated record to the TDB
+ create/update the tracking TDB nd add this new node to the set of delegations
+ send a modified copy of the record back to the requesting client.
+ modifications are that RSN is decremented by one, so delegated records are "older" than on the DMASTER,
+ it has HAVE_DELEGATIONS flag stripped off, and has HAVE_READONLY_LOCK added.
+ unlock record
+
+Important to note is that this does not trigger a record migration.
+
+
+When receiving a ctdb_call without the WANT_READONLY flag:
+
+If this is the DMASTER for the this might trigger a migration. If there exists
+delegations we must first revoke these before allowing the Read-Write request from
+proceeding. So,
+IF the record has HAVE_DELEGATIONS set, we create a child process and defer processing
+of this PDU until the child process has completed.
+
+From the child process we will call out to all nodes that have delegations for this
+record and tell them to invalidate this record by clearing the HAVE_READONLY_LOCK from
+the record.
+Once all delegated nodes respond back, the child process signals back to the main daemon
+the revoke has completed. (child process may not access the tracking tdb since it is
+lockless)
+
+Main process is triggered to re-process the PDU once the child process has finished.
+Main daemon deletes the corresponding record in the tracking database, clears the
+HAVE_DELEGATIONS flag for the record and then proceeds to perform the migration as usual.
+
+When receiving a ctdb_call without the flag we want all delegations to be revoked,
+so we must take care that the delegations are revoked unconditionally before we even
+check if we are already the DMASTER (in which case the ctdb_call would normally just
+be no-op (*B below))
+
+
+
+Recovery process changes
+========================
+A recovery implicitly clears/revokes any read only records and delegations from all
+databases.
+
+During delegations of Read-Only locks, this is done in such way that delegated records
+will have a RSN smaller than the DMASTER. This guarantees that read-only copies always
+have a RSN that is smaller than the DMASTER.
+
+During recoveries we do not need to take any special action other than always picking
+the copy of the record that has the highest RSN, which is what we already do today.
+
+During the recovery process, we strip all flags off all records while writing the new
+content of the database during the PUSH_DB control.
+
+During processing of the PUSH_DB control and once the new database has been written we
+then also wipe the tracking database.
+
+This makes changes to the recovery process minimal and nonintrusive.
+
+
+
+Vacuuming process
+=================
+Vacuuming needs only minimal changes.
+
+
+When vacuuming runs, it will do a fetch_lock to migrate any remote records back onto the
+LMASTER before the record can be purged. This will automatically force all delegations
+for that record to be revoked before the migration is copied back onto the LMASTER.
+This handles the case where LMASTER is not the DMASTER for the record that will be
+purged.
+The migration in this case does force any delegations to be revoked before the
+vacuuming takes place.
+
+Missing is the case when delegations exist and the LMASTER is also the DMASTER.
+For this case we need to change the vacuuming to unconditionally always try to do a
+fetch_lock when HAVE_DELEGATIONS is set, even if the record is already stored locally.
+(*B)
+This fetch lock will not cause any migrations by the ctdb daemon, but since it does
+not have the WANT_READONLY this will still force the delegations to be revoked but no
+migration will trigger.
+
+
+Traversal process
+=================
+Traversal process is changed to ignore any records with the HAVE_READONLY_LOCK
+
+
+Forward/Backward Compatibility
+==============================
+Non-readonly locking daemons must be able to interoperate with readonly locking enabled daemons.
+
+Non-readonly enabled daemons fetching records from Readonly enabled daemons:
+Non-readonly enabled daemons do not know, and never set the WANT_READONLY flag so these daemons will always request a full migration for a full fetch-lock for all records. Thus a request from a non-readonly enabled daemon will always cause any existing delegations to be immediately revoked. Access will work but performance may be harmed since there will be a lot of revoking of delegations.
+
+Readonly enabled daemons fetching records with WANT_READONLY from non-readonly enabled daemons:
+Non-readonly enabled daemons ignore the WANT_READONLY flag and never return delegations. They always return a full record migration.
+Full record migration is allowed by the protocol, even if the originator only requests the 'hint' WANT_READONLY,
+so this access also interoperates between daemons with different capabilities.
+
+
+
+
diff --git a/ctdb/event/event.h b/ctdb/event/event.h
new file mode 100644
index 0000000..13617c0
--- /dev/null
+++ b/ctdb/event/event.h
@@ -0,0 +1,55 @@
+/*
+ CTDB event daemon client
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_EVENT_H__
+#define __CTDB_EVENT_H__
+
+#include "event/event_protocol.h"
+
+struct ctdb_event_context;
+
+int ctdb_event_init(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_event_context **result);
+
+struct tevent_req *ctdb_event_run_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_event_context *eclient,
+ struct ctdb_event_request_run *run);
+bool ctdb_event_run_recv(struct tevent_req *req, int *perr, int *result);
+
+struct tevent_req *ctdb_event_status_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_event_context *eclient,
+ struct ctdb_event_request_status *status);
+bool ctdb_event_status_recv(struct tevent_req *req,
+ int *perr,
+ int *result,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_event_reply_status **status);
+
+struct tevent_req *ctdb_event_script_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_event_context *eclient,
+ struct ctdb_event_request_script *script);
+bool ctdb_event_script_recv(struct tevent_req *req, int *perr, int *result);
+
+#endif /* __CTDB_EVENT_H__ */
diff --git a/ctdb/event/event_client.c b/ctdb/event/event_client.c
new file mode 100644
index 0000000..d0bee0d
--- /dev/null
+++ b/ctdb/event/event_client.c
@@ -0,0 +1,351 @@
+/*
+ CTDB event daemon client
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/debug.h"
+#include "lib/util/tevent_unix.h"
+
+#include "common/logging.h"
+#include "common/path.h"
+#include "common/sock_client.h"
+
+#include "event/event_protocol_api.h"
+#include "event/event.h"
+
+struct ctdb_event_context {
+ char *socket;
+ struct sock_client_context *sockc;
+};
+
+struct event_request {
+ struct ctdb_event_header header;
+ struct ctdb_event_request *request;
+};
+
+struct event_reply {
+ struct ctdb_event_header header;
+ struct ctdb_event_reply *reply;
+};
+
+static int event_request_push(void *request_data,
+ uint32_t reqid,
+ TALLOC_CTX *mem_ctx,
+ uint8_t **buf,
+ size_t *buflen,
+ void *private_data)
+{
+ struct event_request *r = (struct event_request *)request_data;
+ int ret;
+
+ r->header.reqid = reqid;
+
+ *buflen = ctdb_event_request_len(&r->header, r->request);
+ *buf = talloc_size(mem_ctx, *buflen);
+ if (*buf == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_event_request_push(&r->header, r->request, *buf, buflen);
+ if (ret != 0) {
+ talloc_free(*buf);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int event_reply_pull(uint8_t *buf,
+ size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ void **reply_data,
+ void *private_data)
+{
+ struct event_reply *r;
+ int ret;
+
+ r = talloc_zero(mem_ctx, struct event_reply);
+ if (r == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_event_reply_pull(buf, buflen, &r->header, r, &r->reply);
+ if (ret != 0) {
+ talloc_free(r);
+ return ret;
+ }
+
+ *reply_data = r;
+
+ return 0;
+}
+
+static int event_reply_reqid(uint8_t *buf,
+ size_t buflen,
+ uint32_t *reqid,
+ void *private_data)
+{
+ struct ctdb_event_header header;
+ int ret;
+
+ ret = ctdb_event_header_extract(buf, buflen, &header);
+ if (ret != 0) {
+ return ret;
+ }
+
+ *reqid = header.reqid;
+ return 0;
+}
+
+struct sock_client_proto_funcs event_proto_funcs = {
+ .request_push = event_request_push,
+ .reply_pull = event_reply_pull,
+ .reply_reqid = event_reply_reqid,
+};
+
+int ctdb_event_init(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_event_context **result)
+{
+ struct ctdb_event_context *eclient;
+ int ret;
+
+ eclient = talloc_zero(mem_ctx, struct ctdb_event_context);
+ if (eclient == NULL) {
+ return ENOMEM;
+ }
+
+ eclient->socket = path_socket(eclient, "eventd");
+ if (eclient->socket == NULL) {
+ talloc_free(eclient);
+ return ENOMEM;
+ }
+
+ ret = sock_client_setup(eclient,
+ ev,
+ eclient->socket,
+ &event_proto_funcs,
+ eclient,
+ &eclient->sockc);
+ if (ret != 0) {
+ talloc_free(eclient);
+ return ret;
+ }
+
+ *result = eclient;
+ return 0;
+}
+
+/*
+ * Handle request and reply
+ */
+
+struct ctdb_event_msg_state {
+ struct event_request e_request;
+ struct event_reply *e_reply;
+};
+
+static void ctdb_event_msg_done(struct tevent_req *subreq);
+
+static struct tevent_req *ctdb_event_msg_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_event_context *eclient,
+ struct ctdb_event_request *request)
+{
+ struct tevent_req *req, *subreq;
+ struct ctdb_event_msg_state *state;
+
+ req = tevent_req_create(mem_ctx, &state, struct ctdb_event_msg_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->e_request.request = request;
+
+ subreq = sock_client_msg_send(mem_ctx,
+ ev,
+ eclient->sockc,
+ tevent_timeval_zero(),
+ &state->e_request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, ctdb_event_msg_done, req);
+
+ return req;
+}
+
+static void ctdb_event_msg_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ctdb_event_msg_state *state = tevent_req_data(
+ req, struct ctdb_event_msg_state);
+ int ret = 0;
+ bool ok;
+
+ ok = sock_client_msg_recv(subreq, &ret, state, &state->e_reply);
+ TALLOC_FREE(subreq);
+ if (!ok) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+static bool ctdb_event_msg_recv(struct tevent_req *req,
+ int *perr,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_event_reply **reply)
+{
+ struct ctdb_event_msg_state *state = tevent_req_data(
+ req, struct ctdb_event_msg_state);
+ int ret = 0;
+
+ if (tevent_req_is_unix_error(req, &ret)) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+
+ *reply = talloc_steal(mem_ctx, state->e_reply->reply);
+
+ return true;
+}
+
+/*
+ * API functions
+ */
+
+struct tevent_req *ctdb_event_run_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_event_context *eclient,
+ struct ctdb_event_request_run *run)
+{
+ struct ctdb_event_request request;
+
+ request.cmd = CTDB_EVENT_CMD_RUN;
+ request.data.run = run;
+
+ return ctdb_event_msg_send(mem_ctx, ev, eclient, &request);
+}
+
+bool ctdb_event_run_recv(struct tevent_req *req, int *perr, int *result)
+{
+ struct ctdb_event_reply *reply = NULL;
+ bool ok;
+
+ ok = ctdb_event_msg_recv(req, perr, req, &reply);
+ if (!ok) {
+ return false;
+ }
+
+ if (reply->cmd != CTDB_EVENT_CMD_RUN) {
+ *result = EPROTO;
+ } else {
+ *result = reply->result;
+ }
+
+ talloc_free(reply);
+ return true;
+}
+
+struct tevent_req *ctdb_event_status_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_event_context *eclient,
+ struct ctdb_event_request_status *status)
+{
+ struct ctdb_event_request request;
+
+ request.cmd = CTDB_EVENT_CMD_STATUS;
+ request.data.status = status;
+
+ return ctdb_event_msg_send(mem_ctx, ev, eclient, &request);
+}
+
+bool ctdb_event_status_recv(struct tevent_req *req,
+ int *perr,
+ int *result,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_event_reply_status **status)
+{
+ struct ctdb_event_reply *reply = NULL;
+ bool ok;
+
+ ok = ctdb_event_msg_recv(req, perr, req, &reply);
+ if (!ok) {
+ return false;
+ }
+
+ if (reply->cmd != CTDB_EVENT_CMD_STATUS) {
+ *result = EPROTO;
+ } else {
+ *result = reply->result;
+ }
+
+ if (reply->result == 0) {
+ *status = talloc_steal(mem_ctx, reply->data.status);
+ } else {
+ *status = NULL;
+ }
+
+ talloc_free(reply);
+ return true;
+}
+
+struct tevent_req *ctdb_event_script_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_event_context *eclient,
+ struct ctdb_event_request_script *script)
+{
+ struct ctdb_event_request request;
+
+ request.cmd = CTDB_EVENT_CMD_SCRIPT;
+ request.data.script = script;
+
+ return ctdb_event_msg_send(mem_ctx, ev, eclient, &request);
+}
+
+bool ctdb_event_script_recv(struct tevent_req *req, int *perr, int *result)
+{
+ struct ctdb_event_reply *reply = NULL;
+ bool ok;
+
+ ok = ctdb_event_msg_recv(req, perr, req, &reply);
+ if (!ok) {
+ return false;
+ }
+
+ if (reply->cmd != CTDB_EVENT_CMD_SCRIPT) {
+ *result = EPROTO;
+ } else {
+ *result = reply->result;
+ }
+
+ talloc_free(reply);
+ return true;
+}
diff --git a/ctdb/event/event_cmd.c b/ctdb/event/event_cmd.c
new file mode 100644
index 0000000..db39e4d
--- /dev/null
+++ b/ctdb/event/event_cmd.c
@@ -0,0 +1,358 @@
+/*
+ CTDB event daemon - command handling
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/debug.h"
+#include "lib/util/tevent_unix.h"
+
+#include "common/logging.h"
+
+#include "event/event_private.h"
+
+struct event_cmd_state {
+ struct event_context *eventd;
+ struct ctdb_event_request *request;
+ struct ctdb_event_reply *reply;
+};
+
+/*
+ * CTDB_EVENT_CMD_RUN
+ */
+
+static void event_cmd_run_done(struct tevent_req *subreq);
+
+static struct tevent_req *event_cmd_run_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct event_context *eventd,
+ struct ctdb_event_request *request,
+ struct ctdb_event_reply *reply)
+{
+ struct tevent_req *req, *subreq;
+ struct event_cmd_state *state;
+ struct run_event_context *run_ctx;
+ struct ctdb_event_request_run *rdata;
+ int ret;
+ bool continue_on_failure = false;
+
+ req = tevent_req_create(mem_ctx, &state, struct event_cmd_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->eventd = eventd;
+ state->request = request;
+ state->reply = reply;
+
+ rdata = request->data.run;
+
+ ret = eventd_run_ctx(eventd, rdata->component, &run_ctx);
+ if (ret != 0) {
+ state->reply->result = ret;
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+ }
+
+ if (rdata->flags & CTDB_EVENT_RUN_ALL) {
+ continue_on_failure = true;
+ }
+
+ subreq = run_event_send(state,
+ ev,
+ run_ctx,
+ rdata->event,
+ rdata->args,
+ tevent_timeval_current_ofs(rdata->timeout,0),
+ continue_on_failure);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, event_cmd_run_done, req);
+
+ return req;
+}
+
+static void event_cmd_run_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct event_cmd_state *state = tevent_req_data(
+ req, struct event_cmd_state);
+ struct run_event_script_list *script_list = NULL;
+ struct ctdb_event_request_run *rdata;
+ int ret;
+ bool ok;
+
+ ok = run_event_recv(subreq, &ret, state, &script_list);
+ TALLOC_FREE(subreq);
+ if (!ok) {
+ state->reply->result = ret;
+ goto done;
+ }
+
+ if (script_list == NULL) {
+ state->reply->result = EIO;
+ goto done;
+ }
+
+ if (script_list->summary == -ECANCELED) {
+ state->reply->result = ECANCELED;
+ goto done;
+ }
+
+ rdata = state->request->data.run;
+ ret = eventd_set_event_result(state->eventd,
+ rdata->component,
+ rdata->event,
+ script_list);
+ if (ret != 0) {
+ state->reply->result = ret;
+ goto done;
+ }
+
+ if (script_list->summary == -ETIMEDOUT) {
+ state->reply->result = ETIMEDOUT;
+ } else if (script_list->summary != 0) {
+ state->reply->result = ENOEXEC;
+ }
+
+done:
+ tevent_req_done(req);
+}
+
+/*
+ * CTDB_EVENT_CMD_STATUS
+ */
+
+static struct tevent_req *event_cmd_status_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct event_context *eventd,
+ struct ctdb_event_request *request,
+ struct ctdb_event_reply *reply)
+{
+ struct tevent_req *req;
+ struct event_cmd_state *state;
+ struct ctdb_event_request_run *rdata;
+ struct run_event_script_list *script_list;
+ int ret;
+
+ req = tevent_req_create(mem_ctx, &state, struct event_cmd_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ reply->data.status = talloc_zero(reply,
+ struct ctdb_event_reply_status);
+ if (tevent_req_nomem(reply->data.status, req)) {
+ reply->result = ENOMEM;
+ goto done;
+ }
+
+ rdata = request->data.run;
+
+ ret = eventd_get_event_result(eventd,
+ rdata->component,
+ rdata->event,
+ &script_list);
+ if (ret != 0) {
+ reply->result = ret;
+ goto done;
+ }
+
+ reply->data.status->script_list = eventd_script_list(reply,
+ script_list);
+ if (reply->data.status->script_list == NULL) {
+ reply->result = ENOMEM;
+ goto done;
+ }
+ reply->data.status->summary = script_list->summary;
+
+ reply->result = 0;
+
+done:
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+}
+
+/*
+ * CTDB_EVENT_CMD_SCRIPT
+ */
+
+static struct tevent_req *event_cmd_script_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct event_context *eventd,
+ struct ctdb_event_request *request,
+ struct ctdb_event_reply *reply)
+{
+ struct tevent_req *req;
+ struct event_cmd_state *state;
+ struct run_event_context *run_ctx;
+ struct ctdb_event_request_script *rdata;
+ int ret;
+
+ req = tevent_req_create(mem_ctx, &state, struct event_cmd_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ rdata = request->data.script;
+
+ ret = eventd_run_ctx(eventd, rdata->component, &run_ctx);
+ if (ret != 0) {
+ reply->result = ret;
+ goto done;
+ }
+
+ if (rdata->action == CTDB_EVENT_SCRIPT_DISABLE) {
+ ret = run_event_script_disable(run_ctx, rdata->script);
+ } else if (rdata->action == CTDB_EVENT_SCRIPT_ENABLE) {
+ ret = run_event_script_enable(run_ctx, rdata->script);
+ } else {
+ D_ERR("Invalid action specified\n");
+ reply->result = EPROTO;
+ goto done;
+ }
+
+ if (ret != 0) {
+ reply->result = ret;
+ goto done;
+ }
+
+ reply->result = 0;
+
+done:
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+}
+
+static bool event_cmd_recv(struct tevent_req *req, int *perr)
+{
+ if (tevent_req_is_unix_error(req, perr)) {
+ return false;
+ }
+
+ return true;
+}
+
+
+struct event_cmd_dispatch_state {
+ struct ctdb_event_reply *reply;
+};
+
+static void event_cmd_dispatch_done(struct tevent_req *subreq);
+
+struct tevent_req *event_cmd_dispatch_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct event_context *eventd,
+ struct ctdb_event_request *request)
+{
+ struct tevent_req *req, *subreq;
+ struct event_cmd_dispatch_state *state;
+
+ req = tevent_req_create(mem_ctx,
+ &state,
+ struct event_cmd_dispatch_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->reply = talloc_zero(state, struct ctdb_event_reply);
+ if (tevent_req_nomem(state->reply, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ state->reply->cmd = request->cmd;
+
+ switch (request->cmd) {
+ case CTDB_EVENT_CMD_RUN:
+ subreq = event_cmd_run_send(state,
+ ev,
+ eventd,
+ request,
+ state->reply);
+ break;
+
+ case CTDB_EVENT_CMD_STATUS:
+ subreq = event_cmd_status_send(state,
+ ev,
+ eventd,
+ request,
+ state->reply);
+ break;
+
+ case CTDB_EVENT_CMD_SCRIPT:
+ subreq = event_cmd_script_send(state,
+ ev,
+ eventd,
+ request,
+ state->reply);
+ break;
+
+ default:
+ state->reply->result = EPROTO;
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+ }
+
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, event_cmd_dispatch_done, req);
+
+ return req;
+}
+
+static void event_cmd_dispatch_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ int ret;
+ bool ok;
+
+ ok = event_cmd_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (!ok) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+bool event_cmd_dispatch_recv(struct tevent_req *req,
+ int *perr,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_event_reply **reply)
+{
+ struct event_cmd_dispatch_state *state = tevent_req_data(
+ req, struct event_cmd_dispatch_state);
+
+ if (tevent_req_is_unix_error(req, perr)) {
+ return false;
+ }
+
+ *reply = talloc_steal(mem_ctx, state->reply);
+ return true;
+}
diff --git a/ctdb/event/event_conf.c b/ctdb/event/event_conf.c
new file mode 100644
index 0000000..61ecf8e
--- /dev/null
+++ b/ctdb/event/event_conf.c
@@ -0,0 +1,85 @@
+/*
+ CTDB event daemon
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/dir.h"
+
+#include "lib/util/debug.h"
+
+#include "common/conf.h"
+#include "common/path.h"
+
+#include "event/event_conf.h"
+
+static bool event_conf_validate_debug_script(const char *key,
+ const char *old_script,
+ const char *new_script,
+ enum conf_update_mode mode)
+{
+ char script[PATH_MAX];
+ char script_path[PATH_MAX];
+ struct stat st;
+ size_t len;
+ int ret;
+
+ len = strlcpy(script, new_script, sizeof(script));
+ if (len >= sizeof(script)) {
+ D_ERR("debug script name too long\n");
+ return false;
+ }
+
+ ret = snprintf(script_path,
+ sizeof(script_path),
+ "%s/%s",
+ path_etcdir(),
+ basename(script));
+ if (ret < 0 || (size_t)ret >= sizeof(script_path)) {
+ D_ERR("debug script path too long\n");
+ return false;
+ }
+
+ ret = stat(script_path, &st);
+ if (ret == -1) {
+ D_ERR("debug script %s does not exist\n", script_path);
+ return false;
+ }
+
+ if (! S_ISREG(st.st_mode)) {
+ D_ERR("debug script %s is not a file\n", script_path);
+ return false;
+ }
+ if (! (st.st_mode & S_IXUSR)) {
+ D_ERR("debug script %s is not executable\n", script_path);
+ return false;
+ }
+
+ return true;
+}
+
+void event_conf_init(struct conf_context *conf)
+{
+ conf_define_section(conf, EVENT_CONF_SECTION, NULL);
+
+ conf_define_string(conf,
+ EVENT_CONF_SECTION,
+ EVENT_CONF_DEBUG_SCRIPT,
+ NULL,
+ event_conf_validate_debug_script);
+}
diff --git a/ctdb/event/event_conf.h b/ctdb/event/event_conf.h
new file mode 100644
index 0000000..964a18a
--- /dev/null
+++ b/ctdb/event/event_conf.h
@@ -0,0 +1,31 @@
+/*
+ CTDB event daemon
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_EVENT_CONF_H__
+#define __CTDB_EVENT_CONF_H__
+
+#include "common/conf.h"
+
+#define EVENT_CONF_SECTION "event"
+
+#define EVENT_CONF_DEBUG_SCRIPT "debug script"
+
+void event_conf_init(struct conf_context *conf);
+
+#endif /* __CTDB_EVENT_CONF_H__ */
diff --git a/ctdb/event/event_config.c b/ctdb/event/event_config.c
new file mode 100644
index 0000000..d282622
--- /dev/null
+++ b/ctdb/event/event_config.c
@@ -0,0 +1,122 @@
+/*
+ CTDB event daemon - config handling
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <talloc.h>
+
+#include "common/conf.h"
+#include "common/logging_conf.h"
+#include "common/path.h"
+
+#include "event/event_private.h"
+#include "event/event_conf.h"
+
+struct event_config {
+ char *config_file;
+ struct conf_context *conf;
+
+ const char *logging_location;
+ const char *logging_loglevel;
+ const char *debug_script;
+};
+
+int event_config_init(TALLOC_CTX *mem_ctx, struct event_config **result)
+{
+ struct event_config *config;
+ int ret;
+ bool ok;
+
+ config = talloc_zero(mem_ctx, struct event_config);
+ if (config == NULL) {
+ return ENOMEM;
+ }
+
+ config->config_file = path_config(config);
+ if (config->config_file == NULL) {
+ talloc_free(config);
+ return ENOMEM;
+ }
+
+ ret = conf_init(config, &config->conf);
+ if (ret != 0) {
+ talloc_free(config);
+ return ret;
+ }
+
+ logging_conf_init(config->conf, NULL);
+
+ conf_assign_string_pointer(config->conf,
+ LOGGING_CONF_SECTION,
+ LOGGING_CONF_LOCATION,
+ &config->logging_location);
+ conf_assign_string_pointer(config->conf,
+ LOGGING_CONF_SECTION,
+ LOGGING_CONF_LOG_LEVEL,
+ &config->logging_loglevel);
+
+ event_conf_init(config->conf);
+
+ conf_assign_string_pointer(config->conf,
+ EVENT_CONF_SECTION,
+ EVENT_CONF_DEBUG_SCRIPT,
+ &config->debug_script);
+
+ ok = conf_valid(config->conf);
+ if (!ok) {
+ talloc_free(config);
+ return EINVAL;
+ }
+
+ ret = conf_load(config->conf, config->config_file, true);
+ if (ret != 0 && ret != ENOENT) {
+ talloc_free(config);
+ return ret;
+ }
+
+ *result = config;
+ return 0;
+}
+
+const char *event_config_log_location(struct event_config *config)
+{
+ return config->logging_location;
+}
+
+const char *event_config_log_level(struct event_config *config)
+{
+ return config->logging_loglevel;
+}
+
+const char *event_config_debug_script(struct event_config *config)
+{
+ return config->debug_script;
+}
+
+int event_config_reload(struct event_config *config)
+{
+ int ret;
+
+ ret = conf_reload(config->conf);
+ if (ret != 0 && ret != ENOENT) {
+ return ret;
+ }
+
+ return 0;
+}
diff --git a/ctdb/event/event_context.c b/ctdb/event/event_context.c
new file mode 100644
index 0000000..79bcd83
--- /dev/null
+++ b/ctdb/event/event_context.c
@@ -0,0 +1,472 @@
+/*
+ CTDB event daemon - daemon state
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/dir.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/debug.h"
+#include "lib/util/dlinklist.h"
+
+#include "common/logging.h"
+#include "common/run_event.h"
+#include "common/path.h"
+
+#include "event/event_private.h"
+
+struct event_event {
+ struct event_event *prev, *next;
+
+ const char *name;
+ struct run_event_script_list *script_list;
+};
+
+struct event_component {
+ struct event_component *prev, *next;
+
+ /* component state */
+ const char *name;
+ const char *path;
+ struct run_event_context *run_ctx;
+
+ /* events list */
+ struct event_event *event;
+};
+
+struct event_client {
+ struct event_client *prev, *next;
+
+ struct sock_client_context *client;
+};
+
+struct event_context {
+ struct tevent_context *ev;
+ struct event_config *config;
+ struct run_proc_context *run_proc_ctx;
+
+ const char *script_dir;
+ const char *debug_script;
+
+ /* component list */
+ struct event_component *component;
+
+ /* client list */
+ struct event_client *client;
+};
+
+/*
+ * event_event functions
+ */
+
+static struct event_event *eventd_event_find(struct event_component *comp,
+ const char *event_name)
+{
+ struct event_event *event;
+
+ if (event_name == NULL) {
+ return NULL;
+ }
+
+ for (event = comp->event; event != NULL; event = event->next) {
+ if (strcmp(event->name, event_name) == 0) {
+ return event;
+ }
+ }
+
+ return NULL;
+}
+
+static int eventd_event_add(struct event_component *comp,
+ const char *event_name,
+ struct event_event **result)
+{
+ struct event_event *event;
+
+ if (event_name == NULL) {
+ return EINVAL;
+ }
+
+ event = eventd_event_find(comp, event_name);
+ if (event != NULL) {
+ goto done;
+ }
+
+ event = talloc_zero(comp, struct event_event);
+ if (event == NULL) {
+ return ENOMEM;
+ }
+
+ event->name = talloc_strdup(event, event_name);
+ if (event->name == NULL) {
+ talloc_free(event);
+ return ENOMEM;
+ }
+
+ DLIST_ADD_END(comp->event, event);
+
+done:
+ if (result != NULL) {
+ *result = event;
+ }
+ return 0;
+}
+
+static int eventd_event_set(struct event_component *comp,
+ const char *event_name,
+ struct run_event_script_list *script_list)
+{
+ struct event_event *event = NULL;
+ int ret;
+
+ ret = eventd_event_add(comp, event_name, &event);
+ if (ret != 0) {
+ return ret;
+ }
+
+ TALLOC_FREE(event->script_list);
+ if (script_list != NULL) {
+ event->script_list = talloc_steal(event, script_list);
+ }
+
+ return 0;
+}
+
+static int eventd_event_get(struct event_component *comp,
+ const char *event_name,
+ struct run_event_script_list **result)
+{
+ struct event_event *event;
+
+ event = eventd_event_find(comp, event_name);
+ if (event == NULL) {
+ return EINVAL;
+ }
+
+ *result = event->script_list;
+ return 0;
+}
+
+/*
+ * event_component functions
+ */
+
+static struct event_component *eventd_component_find(
+ struct event_context *eventd,
+ const char *comp_name)
+{
+ struct event_component *comp;
+
+ if (comp_name == NULL) {
+ return NULL;
+ }
+
+ for (comp = eventd->component; comp != NULL; comp = comp->next) {
+ if (strcmp(comp->name, comp_name) == 0) {
+ return comp;
+ }
+ }
+
+ return NULL;
+}
+
+static int eventd_component_add(struct event_context *eventd,
+ const char *comp_name,
+ struct event_component **result)
+{
+ struct event_component *comp;
+ int ret;
+
+ if (comp_name == NULL) {
+ return EINVAL;
+ }
+
+ comp = eventd_component_find(eventd, comp_name);
+ if (comp != NULL) {
+ goto done;
+ }
+
+ comp = talloc_zero(eventd, struct event_component);
+ if (comp == NULL) {
+ return ENOMEM;
+ }
+
+ comp->name = talloc_strdup(comp, comp_name);
+ if (comp->name == NULL) {
+ talloc_free(comp);
+ return ENOMEM;
+ }
+
+ comp->path = talloc_asprintf(comp,
+ "%s/%s",
+ eventd->script_dir,
+ comp_name);
+ if (comp->path == NULL) {
+ talloc_free(comp);
+ return ENOMEM;
+ }
+
+ ret = run_event_init(eventd,
+ eventd->run_proc_ctx,
+ comp->path,
+ eventd->debug_script,
+ &comp->run_ctx);
+ if (ret != 0) {
+ talloc_free(comp);
+ return ret;
+ }
+
+ DLIST_ADD_END(eventd->component, comp);
+
+done:
+ if (result != NULL) {
+ *result = comp;
+ }
+ return 0;
+}
+
+/*
+ * event_client functions
+ */
+
+static struct event_client *eventd_client_find(
+ struct event_context *eventd,
+ struct sock_client_context *client)
+{
+ struct event_client *e;
+
+ for (e = eventd->client; e != NULL; e = e->next) {
+ if (e->client == client) {
+ return e;
+ }
+ }
+
+ return NULL;
+}
+
+int eventd_client_add(struct event_context *eventd,
+ struct sock_client_context *client)
+{
+ struct event_client *e;
+
+ e = talloc_zero(eventd, struct event_client);
+ if (e == NULL) {
+ return ENOMEM;
+ }
+
+ e->client = client;
+
+ DLIST_ADD_END(eventd->client, e);
+
+ return 0;
+}
+
+void eventd_client_del(struct event_context *eventd,
+ struct sock_client_context *client)
+{
+ struct event_client *e;
+
+ e = eventd_client_find(eventd, client);
+ if (e == NULL) {
+ return;
+ }
+
+ DLIST_REMOVE(eventd->client, e);
+
+ talloc_free(e);
+}
+
+bool eventd_client_exists(struct event_context *eventd,
+ struct sock_client_context *client)
+{
+ struct event_client *e;
+
+ e = eventd_client_find(eventd, client);
+ if (e == NULL) {
+ return false;
+ }
+
+ return true;
+}
+
+/* public functions */
+
+int event_context_init(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct event_config *config,
+ struct event_context **result)
+{
+ struct event_context *eventd;
+ const char *debug_script;
+ int ret;
+
+ eventd = talloc_zero(mem_ctx, struct event_context);
+ if (eventd == NULL) {
+ return ENOMEM;
+ }
+
+ eventd->ev = ev;
+ eventd->config = config;
+
+ ret = run_proc_init(eventd, ev, &eventd->run_proc_ctx);
+ if (ret != 0) {
+ talloc_free(eventd);
+ return ret;
+ }
+
+ eventd->script_dir = path_etcdir_append(eventd, "events");
+ if (eventd->script_dir == NULL) {
+ talloc_free(eventd);
+ return ENOMEM;
+ }
+
+ /* FIXME
+ status = directory_exist(eventd->script_dir);
+ if (! status) {
+ talloc_free(eventd);
+ return EINVAL;
+ }
+ */
+
+ debug_script = event_config_debug_script(config);
+ if (debug_script != NULL) {
+ eventd->debug_script = path_etcdir_append(eventd,
+ debug_script);
+ if (eventd->debug_script == NULL) {
+ D_WARNING("Failed to set debug script to %s\n",
+ debug_script);
+ }
+ }
+
+ *result = eventd;
+ return 0;
+}
+
+struct event_config *eventd_config(struct event_context *eventd)
+{
+ return eventd->config;
+}
+
+int eventd_run_ctx(struct event_context *eventd,
+ const char *comp_name,
+ struct run_event_context **result)
+{
+ struct event_component *comp;
+ int ret;
+
+ ret = eventd_component_add(eventd, comp_name, &comp);
+ if (ret != 0) {
+ return ret;
+ }
+
+ *result = comp->run_ctx;
+ return 0;
+}
+
+int eventd_set_event_result(struct event_context *eventd,
+ const char *comp_name,
+ const char *event_name,
+ struct run_event_script_list *script_list)
+{
+ struct event_component *comp;
+
+ comp = eventd_component_find(eventd, comp_name);
+ if (comp == NULL) {
+ return ENOENT;
+ }
+
+ return eventd_event_set(comp, event_name, script_list);
+}
+
+int eventd_get_event_result(struct event_context *eventd,
+ const char *comp_name,
+ const char *event_name,
+ struct run_event_script_list **result)
+{
+ struct event_component *comp;
+ int ret;
+
+ ret = eventd_component_add(eventd, comp_name, &comp);
+ if (ret != 0) {
+ return ret;
+ }
+
+ return eventd_event_get(comp, event_name, result);
+}
+
+struct ctdb_event_script_list *eventd_script_list(
+ TALLOC_CTX *mem_ctx,
+ struct run_event_script_list *script_list)
+{
+ struct ctdb_event_script_list *value;
+ int num_scripts = 0;
+ int i;
+
+ value = talloc_zero(mem_ctx, struct ctdb_event_script_list);
+ if (value == NULL) {
+ return NULL;
+ }
+
+ if (script_list != NULL) {
+ num_scripts = script_list->num_scripts;
+ }
+
+ if (num_scripts <= 0) {
+ return value;
+ }
+
+ value->script = talloc_array(value,
+ struct ctdb_event_script,
+ num_scripts);
+ if (value->script == NULL) {
+ goto fail;
+ }
+
+ for (i=0; i<num_scripts; i++) {
+ struct run_event_script *rscript = &script_list->script[i];
+ struct ctdb_event_script *escript = &value->script[i];
+
+ escript->name = talloc_strdup(value, rscript->name);
+ if (escript->name == NULL) {
+ goto fail;
+ }
+
+ escript->begin = rscript->begin;
+ escript->end = rscript->end;
+ escript->result = rscript->summary;
+
+ if (rscript->output == NULL) {
+ escript->output = NULL;
+ continue;
+ }
+
+ escript->output = talloc_strdup(value, rscript->output);
+ if (escript->output == NULL) {
+ goto fail;
+ }
+ }
+ value->num_scripts = num_scripts;
+
+ return value;
+
+fail:
+ talloc_free(value);
+ return NULL;
+}
diff --git a/ctdb/event/event_daemon.c b/ctdb/event/event_daemon.c
new file mode 100644
index 0000000..d96ff6f
--- /dev/null
+++ b/ctdb/event/event_daemon.c
@@ -0,0 +1,382 @@
+/*
+ CTDB event daemon
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+
+#include <popt.h>
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/tevent_unix.h"
+
+#include "common/logging.h"
+#include "common/path.h"
+#include "common/sock_daemon.h"
+
+#include "event/event_private.h"
+
+struct event_daemon_state {
+ TALLOC_CTX *mem_ctx;
+ char *socket;
+ char *pidfile;
+ struct tevent_context *ev;
+ struct event_config *config;
+ struct sock_daemon_context *sockd;
+ struct event_context *eventd;
+};
+
+static int event_daemon_startup(void *private_data)
+{
+ struct event_daemon_state *e_state = talloc_get_type_abort(
+ private_data, struct event_daemon_state);
+ int ret;
+
+ ret = event_context_init(e_state,
+ e_state->ev,
+ e_state->config,
+ &e_state->eventd);
+ if (ret != 0) {
+ D_ERR("Failed to initialize event context\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int event_daemon_reconfigure(void *private_data)
+{
+ struct event_daemon_state *e_state = talloc_get_type_abort(
+ private_data, struct event_daemon_state);
+ int ret;
+
+ ret = event_config_reload(e_state->config);
+ if (ret != 0) {
+ D_WARNING("Configuration reload failed\n");
+ }
+
+ return 0;
+}
+
+static int event_daemon_reopen_logs(void *private_data)
+{
+ bool status;
+
+ status = logging_reopen_logs();
+
+ return status ? 0 : 1;
+}
+
+static void event_daemon_shutdown(void *private_data)
+{
+ struct event_daemon_state *e_state = talloc_get_type_abort(
+ private_data, struct event_daemon_state);
+
+ TALLOC_FREE(e_state->eventd);
+}
+
+static bool event_client_connect(struct sock_client_context *client,
+ pid_t pid,
+ void *private_data)
+{
+ struct event_daemon_state *e_state = talloc_get_type_abort(
+ private_data, struct event_daemon_state);
+ int ret;
+
+ ret = eventd_client_add(e_state->eventd, client);
+ if (ret != 0) {
+ D_ERR("Failed to register client, ret=%d\n", ret);
+ return false;
+ }
+
+ return true;
+}
+
+static void event_client_disconnect(struct sock_client_context *client,
+ void *private_data)
+{
+ struct event_daemon_state *e_state = talloc_get_type_abort(
+ private_data, struct event_daemon_state);
+
+ eventd_client_del(e_state->eventd, client);
+}
+
+struct event_client_state {
+ struct tevent_context *ev;
+ struct event_context *eventd;
+ struct sock_client_context *client;
+ uint8_t *buf;
+ size_t buflen;
+};
+
+static void event_client_request_done(struct tevent_req *subreq);
+static void event_client_reply_done(struct tevent_req *subreq);
+
+static struct tevent_req *event_client_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct sock_client_context *client,
+ uint8_t *buf,
+ size_t buflen,
+ void *private_data)
+{
+ struct event_daemon_state *e_state = talloc_get_type_abort(
+ private_data, struct event_daemon_state);
+ struct tevent_req *req, *subreq;
+ struct event_client_state *state;
+
+ req = tevent_req_create(mem_ctx, &state, struct event_client_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->eventd = e_state->eventd;
+ state->client = client;
+
+ subreq = event_pkt_send(state, ev, e_state->eventd, buf, buflen);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, event_client_request_done, req);
+
+ return req;
+}
+
+static void event_client_request_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct event_client_state *state = tevent_req_data(
+ req, struct event_client_state);
+ int ret = 0;
+ bool ok;
+
+ ok = event_pkt_recv(subreq, &ret, state, &state->buf, &state->buflen);
+ TALLOC_FREE(subreq);
+ if (!ok) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ok = eventd_client_exists(state->eventd, state->client);
+ if (!ok) {
+ /* Client has already disconnected */
+ talloc_free(state->buf);
+ tevent_req_done(req);
+ return;
+ }
+
+ subreq = sock_socket_write_send(state,
+ state->ev,
+ state->client,
+ state->buf,
+ state->buflen);
+ if (tevent_req_nomem(subreq, req)) {
+ talloc_free(state->buf);
+ return;
+ }
+ tevent_req_set_callback(subreq, event_client_reply_done, req);
+}
+
+static void event_client_reply_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct event_client_state *state = tevent_req_data(
+ req, struct event_client_state);
+ int ret = 0;
+ bool ok;
+
+ talloc_free(state->buf);
+
+ ok = sock_socket_write_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (!ok) {
+ D_ERR("Sending reply failed\n");
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+static bool event_client_recv(struct tevent_req *req, int *perr)
+{
+ if (tevent_req_is_unix_error(req, perr)) {
+ return false;
+ }
+
+ return true;
+}
+
+static struct {
+ int pid;
+ int startup_fd;
+} options = {
+ .pid = -1,
+ .startup_fd = -1,
+};
+
+struct poptOption cmdline_options[] = {
+ POPT_AUTOHELP
+ { "pid", 'P', POPT_ARG_INT, &options.pid, 0,
+ "pid to wait for", "PID" },
+ { "startup-fd", 'S', POPT_ARG_INT, &options.startup_fd, 0,
+ "file descriptor to notify of successful start", "FD" },
+ POPT_TABLEEND
+};
+
+int main(int argc, const char **argv)
+{
+ poptContext pc;
+ struct event_daemon_state *e_state;
+ struct sock_daemon_funcs daemon_funcs;
+ struct sock_socket_funcs socket_funcs;
+ const char *log_location = "file:";
+ const char *log_level = "NOTICE";
+ const char *t;
+ int interactive = 0;
+ int opt, ret;
+ bool ok;
+
+ pc = poptGetContext(argv[0],
+ argc,
+ argv,
+ cmdline_options,
+ 0);
+ while ((opt = poptGetNextOpt(pc)) != -1) {
+ D_ERR("Invalid options %s: %s\n",
+ poptBadOption(pc, 0),
+ poptStrerror(opt));
+ exit(1);
+ }
+
+ t = getenv("CTDB_INTERACTIVE");
+ if (t != NULL) {
+ interactive = 1;
+ }
+
+ e_state = talloc_zero(NULL, struct event_daemon_state);
+ if (e_state == NULL) {
+ D_ERR("Memory allocation error\n");
+ ret = 1;
+ goto fail;
+ }
+
+ e_state->mem_ctx = talloc_new(e_state);
+ if (e_state->mem_ctx == NULL) {
+ D_ERR("Memory allocation error\n");
+ ret = 1;
+ goto fail;
+ }
+
+ e_state->socket = path_socket(e_state, "eventd");
+ if (e_state->socket == NULL) {
+ D_ERR("Memory allocation error\n");
+ ret = 1;
+ goto fail;
+ }
+
+ e_state->pidfile = path_pidfile(e_state, "eventd");
+ if (e_state->pidfile == NULL) {
+ D_ERR("Memory allocation error\n");
+ ret = 1;
+ goto fail;
+ }
+
+ ret = event_config_init(e_state, &e_state->config);
+ if (ret != 0) {
+ D_ERR("Failed to initialize event config\n");
+ goto fail;
+ }
+
+ e_state->ev = tevent_context_init(e_state->mem_ctx);
+ if (e_state->ev == NULL) {
+ D_ERR("Failed to initialize tevent\n");
+ ret = 1;
+ goto fail;
+ }
+
+ daemon_funcs = (struct sock_daemon_funcs) {
+ .startup = event_daemon_startup,
+ .reconfigure = event_daemon_reconfigure,
+ .reopen_logs = event_daemon_reopen_logs,
+ .shutdown = event_daemon_shutdown,
+ };
+
+ if (interactive == 0) {
+ log_location = event_config_log_location(e_state->config);
+ log_level = event_config_log_level(e_state->config);
+ }
+
+ ret = sock_daemon_setup(e_state->mem_ctx,
+ "ctdb-eventd",
+ log_location,
+ log_level,
+ &daemon_funcs,
+ e_state,
+ &e_state->sockd);
+ if (ret != 0) {
+ D_ERR("Failed to setup sock daemon\n");
+ goto fail;
+ }
+
+ socket_funcs = (struct sock_socket_funcs) {
+ .connect = event_client_connect,
+ .disconnect = event_client_disconnect,
+ .read_send = event_client_send,
+ .read_recv = event_client_recv,
+ };
+
+ ret = sock_daemon_add_unix(e_state->sockd,
+ e_state->socket,
+ &socket_funcs,
+ e_state);
+ if (ret != 0) {
+ D_ERR("Failed to setup socket %s\n", e_state->socket);
+ goto fail;
+ }
+
+ if (options.startup_fd != -1) {
+ ok = sock_daemon_set_startup_fd(e_state->sockd,
+ options.startup_fd);
+ if (!ok) {
+ goto fail;
+ }
+ }
+
+ ret = sock_daemon_run(e_state->ev,
+ e_state->sockd,
+ e_state->pidfile,
+ false,
+ false,
+ options.pid);
+ if (ret == EINTR) {
+ ret = 0;
+ }
+
+ if (t != NULL) {
+ talloc_report_full(e_state->mem_ctx, stderr);
+ }
+
+fail:
+ talloc_free(e_state);
+ (void)poptFreeContext(pc);
+ exit(ret);
+}
diff --git a/ctdb/event/event_private.h b/ctdb/event/event_private.h
new file mode 100644
index 0000000..0cc8d80
--- /dev/null
+++ b/ctdb/event/event_private.h
@@ -0,0 +1,103 @@
+/*
+ CTDB event daemon
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_EVENT_PRIVATE_H__
+#define __CTDB_EVENT_PRIVATE_H__
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "common/run_event.h"
+#include "common/sock_daemon.h"
+
+#include "event/event_protocol.h"
+
+struct event_config;
+struct event_context;
+
+/* From event/event_cmd.c */
+
+struct tevent_req *event_cmd_dispatch_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct event_context *eventd,
+ struct ctdb_event_request *request);
+bool event_cmd_dispatch_recv(struct tevent_req *req,
+ int *perr,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_event_reply **reply);
+
+/* From event/event_config.c */
+
+int event_config_init(TALLOC_CTX *mem_ctx, struct event_config **result);
+
+const char *event_config_log_location(struct event_config *config);
+const char *event_config_log_level(struct event_config *config);
+const char *event_config_debug_script(struct event_config *config);
+
+int event_config_reload(struct event_config *config);
+
+/* From event/event_context.c */
+
+int eventd_client_add(struct event_context *eventd,
+ struct sock_client_context *client);
+void eventd_client_del(struct event_context *eventd,
+ struct sock_client_context *client);
+bool eventd_client_exists(struct event_context *eventd,
+ struct sock_client_context *client);
+
+int event_context_init(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct event_config *config,
+ struct event_context **result);
+
+struct event_config *eventd_config(struct event_context *eventd);
+int eventd_run_ctx(struct event_context *eventd,
+ const char *comp_name,
+ struct run_event_context **result);
+
+int eventd_set_event_result(struct event_context *eventd,
+ const char *comp_name,
+ const char *event_name,
+ struct run_event_script_list *script_list);
+int eventd_get_event_result(struct event_context *eventd,
+ const char *comp_name,
+ const char *event_name,
+ struct run_event_script_list **result);
+
+struct ctdb_event_script_list *eventd_script_list(
+ TALLOC_CTX *mem_ctx,
+ struct run_event_script_list *script_list);
+
+
+/* From event/event_request.c */
+
+struct tevent_req *event_pkt_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct event_context *eventd,
+ uint8_t *buf,
+ size_t buflen);
+
+bool event_pkt_recv(struct tevent_req *req,
+ int *perr,
+ TALLOC_CTX *mem_ctx,
+ uint8_t **buf,
+ size_t *buflen);
+
+#endif /* __CTDB_EVENT_PRIVATE_H__ */
diff --git a/ctdb/event/event_protocol.c b/ctdb/event/event_protocol.c
new file mode 100644
index 0000000..baa9e1e
--- /dev/null
+++ b/ctdb/event/event_protocol.c
@@ -0,0 +1,1123 @@
+/*
+ CTDB event daemon protocol
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <talloc.h>
+
+#include "protocol/protocol_basic.h"
+
+#include "event_protocol.h"
+#include "event_protocol_api.h"
+
+static size_t ctdb_event_script_action_len(enum ctdb_event_script_action in)
+{
+ uint32_t u32 = in;
+
+ return ctdb_uint32_len(&u32);
+}
+
+static void ctdb_event_script_action_push(enum ctdb_event_script_action in,
+ uint8_t *buf,
+ size_t *npush)
+{
+ uint32_t u32 = in;
+
+ ctdb_uint32_push(&u32, buf, npush);
+}
+
+static int ctdb_event_script_action_pull(uint8_t *buf,
+ size_t buflen,
+ enum ctdb_event_script_action *out,
+ size_t *npull)
+{
+ enum ctdb_event_script_action value;
+ uint32_t u32;
+ size_t np;
+ int ret;
+
+ ret = ctdb_uint32_pull(buf, buflen, &u32, &np);
+ if (ret != 0) {
+ return ret;
+ }
+
+ switch (u32) {
+ case 0:
+ value = CTDB_EVENT_SCRIPT_DISABLE;
+ break;
+
+ case 1:
+ value = CTDB_EVENT_SCRIPT_ENABLE;
+ break;
+
+ default:
+ return EINVAL;
+ }
+
+ *out = value;
+ *npull = np;
+
+ return 0;
+}
+
+static size_t ctdb_event_command_len(enum ctdb_event_command in)
+{
+ uint32_t u32 = in;
+
+ return ctdb_uint32_len(&u32);
+}
+
+static void ctdb_event_command_push(enum ctdb_event_command in,
+ uint8_t *buf,
+ size_t *npush)
+{
+ uint32_t u32 = in;
+
+ ctdb_uint32_push(&u32, buf, npush);
+}
+
+static int ctdb_event_command_pull(uint8_t *buf,
+ size_t buflen,
+ enum ctdb_event_command *out,
+ size_t *npull)
+{
+ enum ctdb_event_command value;
+ uint32_t u32;
+ size_t np;
+ int ret;
+
+ ret = ctdb_uint32_pull(buf, buflen, &u32, &np);
+ if (ret != 0) {
+ return ret;
+ }
+
+ switch (u32) {
+ case 1:
+ value = CTDB_EVENT_CMD_RUN;
+ break;
+
+ case 2:
+ value = CTDB_EVENT_CMD_STATUS;
+ break;
+
+ case 3:
+ value = CTDB_EVENT_CMD_SCRIPT;
+ break;
+
+ default:
+ return EINVAL;
+ }
+
+ *out = value;
+ *npull = np;
+
+ return 0;
+}
+
+static size_t ctdb_event_script_len(struct ctdb_event_script *in)
+{
+ return ctdb_stringn_len(&in->name) +
+ ctdb_timeval_len(&in->begin) +
+ ctdb_timeval_len(&in->end) +
+ ctdb_int32_len(&in->result) +
+ ctdb_stringn_len(&in->output);
+}
+
+static void ctdb_event_script_push(struct ctdb_event_script *in,
+ uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_stringn_push(&in->name, buf+offset, &np);
+ offset += np;
+
+ ctdb_timeval_push(&in->begin, buf+offset, &np);
+ offset += np;
+
+ ctdb_timeval_push(&in->end, buf+offset, &np);
+ offset += np;
+
+ ctdb_int32_push(&in->result, buf+offset, &np);
+ offset += np;
+
+ ctdb_stringn_push(&in->output, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+static int ctdb_event_script_pull_elems(uint8_t *buf,
+ size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_event_script *value,
+ size_t *npull)
+{
+ size_t offset = 0, np;
+ int ret;
+
+ ret = ctdb_stringn_pull(buf+offset,
+ buflen-offset,
+ mem_ctx,
+ &value->name,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_timeval_pull(buf+offset,
+ buflen-offset,
+ &value->begin,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_timeval_pull(buf+offset,
+ buflen-offset,
+ &value->end,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_int32_pull(buf+offset,
+ buflen-offset,
+ &value->result,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_stringn_pull(buf+offset,
+ buflen-offset,
+ mem_ctx,
+ &value->output,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ *npull = offset;
+
+ return 0;
+}
+
+#ifdef EVENT_PROTOCOL_TEST
+static int ctdb_event_script_pull(uint8_t *buf,
+ size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_event_script **out,
+ size_t *npull)
+{
+ struct ctdb_event_script *value;
+ int ret;
+
+ value = talloc(mem_ctx, struct ctdb_event_script);
+ if (value == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_event_script_pull_elems(buf, buflen, value, value, npull);
+ if (ret != 0) {
+ talloc_free(value);
+ return ret;
+ }
+
+ *out = value;
+
+ return 0;
+}
+#endif
+
+static size_t ctdb_event_script_list_len(struct ctdb_event_script_list *in)
+{
+ size_t len;
+ int i;
+
+ len = ctdb_int32_len(&in->num_scripts);
+
+ for (i=0; i<in->num_scripts; i++) {
+ len += ctdb_event_script_len(&in->script[i]);
+ }
+
+ return len;
+}
+
+static void ctdb_event_script_list_push(struct ctdb_event_script_list *in,
+ uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+ int i;
+
+ ctdb_int32_push(&in->num_scripts, buf+offset, &np);
+ offset += np;
+
+ for (i=0; i<in->num_scripts; i++) {
+ ctdb_event_script_push(&in->script[i], buf+offset, &np);
+ offset += np;
+ }
+
+ *npush = offset;
+}
+
+static int ctdb_event_script_list_pull(uint8_t *buf,
+ size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_event_script_list **out,
+ size_t *npull)
+{
+ struct ctdb_event_script_list *value = NULL;
+ size_t offset = 0, np;
+ int num_scripts;
+ int ret, i;
+
+ ret = ctdb_int32_pull(buf+offset, buflen-offset, &num_scripts, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ if (num_scripts < 0) {
+ return EINVAL;
+ }
+
+ value = talloc_zero(mem_ctx, struct ctdb_event_script_list);
+ if (value == NULL) {
+ return ENOMEM;
+ }
+
+ value->num_scripts = num_scripts;
+ if (num_scripts == 0) {
+ goto done;
+ }
+
+ value->script = talloc_array(value, struct ctdb_event_script,
+ num_scripts);
+ if (value->script == NULL) {
+ ret = ENOMEM;
+ goto fail;
+ }
+
+ for (i=0; i<num_scripts; i++) {
+ ret = ctdb_event_script_pull_elems(buf+offset,
+ buflen-offset,
+ value,
+ &value->script[i],
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+ }
+
+done:
+ *out = value;
+ *npull = offset;
+
+ return 0;
+
+fail:
+ talloc_free(value);
+ return ret;
+}
+
+static size_t ctdb_event_request_run_len(struct ctdb_event_request_run *in)
+{
+ return ctdb_stringn_len(&in->component) +
+ ctdb_stringn_len(&in->event) +
+ ctdb_stringn_len(&in->args) +
+ ctdb_uint32_len(&in->timeout) +
+ ctdb_uint32_len(&in->flags);
+}
+
+static void ctdb_event_request_run_push(struct ctdb_event_request_run *in,
+ uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_stringn_push(&in->component, buf+offset, &np);
+ offset += np;
+
+ ctdb_stringn_push(&in->event, buf+offset, &np);
+ offset += np;
+
+ ctdb_stringn_push(&in->args, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->timeout, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->flags, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+static int ctdb_event_request_run_pull(uint8_t *buf,
+ size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_event_request_run **out,
+ size_t *npull)
+{
+ struct ctdb_event_request_run *value;
+ size_t offset = 0, np;
+ int ret;
+
+ value = talloc(mem_ctx, struct ctdb_event_request_run);
+ if (value == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_stringn_pull(buf+offset,
+ buflen-offset,
+ value,
+ &value->component,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_stringn_pull(buf+offset,
+ buflen-offset,
+ value,
+ &value->event,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_stringn_pull(buf+offset,
+ buflen-offset,
+ value,
+ &value->args,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset,
+ buflen-offset,
+ &value->timeout,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset,
+ buflen-offset,
+ &value->flags,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ *out = value;
+ *npull = offset;
+
+ return 0;
+
+fail:
+ talloc_free(value);
+ return ret;
+}
+
+static size_t ctdb_event_request_status_len(
+ struct ctdb_event_request_status *in)
+{
+ return ctdb_stringn_len(&in->component) +
+ ctdb_stringn_len(&in->event);
+}
+
+static void ctdb_event_request_status_push(
+ struct ctdb_event_request_status *in,
+ uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_stringn_push(&in->component, buf+offset, &np);
+ offset += np;
+
+ ctdb_stringn_push(&in->event, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+static int ctdb_event_request_status_pull(
+ uint8_t *buf,
+ size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_event_request_status **out,
+ size_t *npull)
+{
+ struct ctdb_event_request_status *value;
+ size_t offset = 0, np;
+ int ret;
+
+ value = talloc(mem_ctx, struct ctdb_event_request_status);
+ if (value == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_stringn_pull(buf+offset,
+ buflen-offset,
+ value,
+ &value->component,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_stringn_pull(buf+offset,
+ buflen-offset,
+ value,
+ &value->event,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ *out = value;
+ *npull = offset;
+
+ return 0;
+
+fail:
+ talloc_free(value);
+ return ret;
+}
+
+static size_t ctdb_event_request_script_len(
+ struct ctdb_event_request_script *in)
+{
+ return ctdb_stringn_len(&in->component) +
+ ctdb_stringn_len(&in->script) +
+ ctdb_event_script_action_len(in->action);
+}
+
+static void ctdb_event_request_script_push(
+ struct ctdb_event_request_script *in,
+ uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_stringn_push(&in->component, buf+offset, &np);
+ offset += np;
+
+ ctdb_stringn_push(&in->script, buf+offset, &np);
+ offset += np;
+
+ ctdb_event_script_action_push(in->action, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+static int ctdb_event_request_script_pull(
+ uint8_t *buf,
+ size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_event_request_script **out,
+ size_t *npull)
+{
+ struct ctdb_event_request_script *value;
+ size_t offset = 0, np;
+ int ret;
+
+ value = talloc(mem_ctx, struct ctdb_event_request_script);
+ if (value == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_stringn_pull(buf+offset,
+ buflen-offset,
+ value,
+ &value->component,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_stringn_pull(buf+offset,
+ buflen-offset,
+ value,
+ &value->script,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_event_script_action_pull(buf+offset,
+ buflen-offset,
+ &value->action,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ *out = value;
+ *npull = offset;
+
+ return 0;
+
+fail:
+ talloc_free(value);
+ return ret;
+}
+
+static size_t ctdb_event_reply_status_len(
+ struct ctdb_event_reply_status *in)
+{
+ return ctdb_int32_len(&in->summary) +
+ ctdb_event_script_list_len(in->script_list);
+}
+
+static void ctdb_event_reply_status_push(
+ struct ctdb_event_reply_status *in,
+ uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_int32_push(&in->summary, buf+offset, &np);
+ offset += np;
+
+ ctdb_event_script_list_push(in->script_list, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+static int ctdb_event_reply_status_pull(
+ uint8_t *buf,
+ size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_event_reply_status **out,
+ size_t *npull)
+{
+ struct ctdb_event_reply_status *value;
+ size_t offset = 0, np;
+ int ret;
+
+ value = talloc(mem_ctx, struct ctdb_event_reply_status);
+ if (value == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_int32_pull(buf+offset, buflen-offset, &value->summary, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_event_script_list_pull(buf+offset,
+ buflen-offset,
+ value,
+ &value->script_list,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ *out = value;
+ *npull = offset;
+
+ return 0;
+
+fail:
+ talloc_free(value);
+ return ret;
+}
+
+static size_t ctdb_event_header_len(struct ctdb_event_header *in)
+{
+ return ctdb_uint32_len(&in->length) +
+ ctdb_uint32_len(&in->version) +
+ ctdb_uint32_len(&in->reqid);
+}
+
+static void ctdb_event_header_push(struct ctdb_event_header *in,
+ uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_uint32_push(&in->length, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->version, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->reqid, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+static int ctdb_event_header_pull(uint8_t *buf,
+ size_t buflen,
+ struct ctdb_event_header *value,
+ size_t *npull)
+{
+ size_t offset = 0, np;
+ int ret;
+
+ ret = ctdb_uint32_pull(buf+offset,
+ buflen-offset,
+ &value->length,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset,
+ buflen-offset,
+ &value->version,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset,
+ buflen-offset,
+ &value->reqid,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ *npull = offset;
+
+ return 0;
+}
+
+int ctdb_event_header_extract(uint8_t *buf,
+ size_t buflen,
+ struct ctdb_event_header *value)
+{
+ size_t np;
+
+ return ctdb_event_header_pull(buf, buflen, value, &np);
+}
+
+static size_t ctdb_event_request_data_len(struct ctdb_event_request *in)
+{
+ size_t len;
+
+ len = ctdb_event_command_len(in->cmd);
+
+ switch (in->cmd) {
+ case CTDB_EVENT_CMD_RUN:
+ len += ctdb_event_request_run_len(in->data.run);
+ break;
+
+ case CTDB_EVENT_CMD_STATUS:
+ len += ctdb_event_request_status_len(in->data.status);
+ break;
+
+ case CTDB_EVENT_CMD_SCRIPT:
+ len += ctdb_event_request_script_len(in->data.script);
+ break;
+
+ default:
+ break;
+ }
+
+ return len;
+}
+
+static void ctdb_event_request_data_push(struct ctdb_event_request *in,
+ uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_event_command_push(in->cmd, buf+offset, &np);
+ offset += np;
+
+ switch (in->cmd) {
+ case CTDB_EVENT_CMD_RUN:
+ ctdb_event_request_run_push(in->data.run, buf+offset, &np);
+ break;
+
+ case CTDB_EVENT_CMD_STATUS:
+ ctdb_event_request_status_push(in->data.status,
+ buf+offset,
+ &np);
+ break;
+
+ case CTDB_EVENT_CMD_SCRIPT:
+ ctdb_event_request_script_push(in->data.script,
+ buf+offset,
+ &np);
+ break;
+ default:
+ np = 0;
+ break;
+ }
+ offset += np;
+
+ *npush = offset;
+}
+
+static int ctdb_event_request_data_pull(uint8_t *buf,
+ size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_event_request **out,
+ size_t *npull)
+{
+ struct ctdb_event_request *value;
+ size_t offset = 0, np;
+ int ret;
+
+ value = talloc(mem_ctx, struct ctdb_event_request);
+ if (value == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_event_command_pull(buf+offset,
+ buflen-offset,
+ &value->cmd,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ switch (value->cmd) {
+ case CTDB_EVENT_CMD_RUN:
+ ret = ctdb_event_request_run_pull(buf+offset,
+ buflen-offset,
+ value,
+ &value->data.run,
+ &np);
+ break;
+
+ case CTDB_EVENT_CMD_STATUS:
+ ret = ctdb_event_request_status_pull(buf+offset,
+ buflen-offset,
+ value,
+ &value->data.status,
+ &np);
+ break;
+
+ case CTDB_EVENT_CMD_SCRIPT:
+ ret = ctdb_event_request_script_pull(buf+offset,
+ buflen-offset,
+ value,
+ &value->data.script,
+ &np);
+ break;
+
+ default:
+ np = 0;
+ break;
+ }
+
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ *out = value;
+ *npull = offset;
+
+ return 0;
+
+fail:
+ talloc_free(value);
+ return ret;
+}
+
+static size_t ctdb_event_reply_data_len(struct ctdb_event_reply *in)
+{
+ size_t len;
+
+ len = ctdb_event_command_len(in->cmd) +
+ ctdb_int32_len(&in->result);
+
+ if (in->result != 0) {
+ goto done;
+ }
+
+ switch (in->cmd) {
+ case CTDB_EVENT_CMD_STATUS:
+ len += ctdb_event_reply_status_len(in->data.status);
+ break;
+
+ default:
+ break;
+ }
+
+done:
+ return len;
+}
+
+static void ctdb_event_reply_data_push(struct ctdb_event_reply *in,
+ uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_event_command_push(in->cmd, buf+offset, &np);
+ offset += np;
+
+ ctdb_int32_push(&in->result, buf+offset, &np);
+ offset += np;
+
+ if (in->result != 0) {
+ goto done;
+ }
+
+ switch (in->cmd) {
+ case CTDB_EVENT_CMD_STATUS:
+ ctdb_event_reply_status_push(in->data.status, buf+offset, &np);
+ break;
+
+ default:
+ np = 0;
+ break;
+ }
+ offset += np;
+
+done:
+ *npush = offset;
+}
+
+static int ctdb_event_reply_data_pull(uint8_t *buf,
+ size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_event_reply **out,
+ size_t *npull)
+{
+ struct ctdb_event_reply *value;
+ size_t offset = 0, np;
+ int ret;
+
+ value = talloc(mem_ctx, struct ctdb_event_reply);
+ if (value == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_event_command_pull(buf+offset,
+ buflen-offset,
+ &value->cmd,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_int32_pull(buf+offset, buflen-offset, &value->result, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ if (value->result != 0) {
+ goto done;
+ }
+
+ switch (value->cmd) {
+ case CTDB_EVENT_CMD_STATUS:
+ ret = ctdb_event_reply_status_pull(buf+offset,
+ buflen-offset,
+ value,
+ &value->data.status,
+ &np);
+ break;
+
+ default:
+ np = 0;
+ break;
+ }
+
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+done:
+ *out = value;
+ *npull = offset;
+
+ return 0;
+
+fail:
+ talloc_free(value);
+ return ret;
+}
+
+size_t ctdb_event_request_len(struct ctdb_event_header *h,
+ struct ctdb_event_request *in)
+{
+ return ctdb_event_header_len(h) +
+ ctdb_event_request_data_len(in);
+}
+
+int ctdb_event_request_push(struct ctdb_event_header *h,
+ struct ctdb_event_request *in,
+ uint8_t *buf,
+ size_t *buflen)
+{
+ size_t len, offset = 0, np;
+
+ len = ctdb_event_request_len(h, in);
+ if (*buflen < len) {
+ *buflen = len;
+ return EMSGSIZE;
+ }
+
+ h->length = *buflen;
+
+ ctdb_event_header_push(h, buf+offset, &np);
+ offset += np;
+
+ ctdb_event_request_data_push(in, buf+offset, &np);
+ offset += np;
+
+ if (offset > *buflen) {
+ return EMSGSIZE;
+ }
+
+ return 0;
+}
+
+int ctdb_event_request_pull(uint8_t *buf,
+ size_t buflen,
+ struct ctdb_event_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_event_request **out)
+{
+ size_t offset = 0, np;
+ int ret;
+
+ ret = ctdb_event_header_pull(buf+offset, buflen-offset, h, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_event_request_data_pull(buf+offset,
+ buflen-offset,
+ mem_ctx,
+ out,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ if (offset > buflen) {
+ return EMSGSIZE;
+ }
+
+ return 0;
+}
+
+size_t ctdb_event_reply_len(struct ctdb_event_header *h,
+ struct ctdb_event_reply *in)
+{
+ return ctdb_event_header_len(h) +
+ ctdb_event_reply_data_len(in);
+}
+
+int ctdb_event_reply_push(struct ctdb_event_header *h,
+ struct ctdb_event_reply *in,
+ uint8_t *buf,
+ size_t *buflen)
+{
+ size_t len, offset = 0, np;
+
+ len = ctdb_event_reply_len(h, in);
+ if (*buflen < len) {
+ *buflen = len;
+ return EMSGSIZE;
+ }
+
+ h->length = *buflen;
+
+ ctdb_event_header_push(h, buf+offset, &np);
+ offset += np;
+
+ ctdb_event_reply_data_push(in, buf+offset, &np);
+ offset += np;
+
+ if (offset > *buflen) {
+ return EMSGSIZE;
+ }
+
+ return 0;
+}
+
+int ctdb_event_reply_pull(uint8_t *buf,
+ size_t buflen,
+ struct ctdb_event_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_event_reply **out)
+{
+ size_t offset = 0, np;
+ int ret;
+
+ ret = ctdb_event_header_pull(buf+offset, buflen-offset, h, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_event_reply_data_pull(buf+offset,
+ buflen-offset,
+ mem_ctx,
+ out,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ if (offset > buflen) {
+ return EMSGSIZE;
+ }
+
+ return 0;
+}
diff --git a/ctdb/event/event_protocol.h b/ctdb/event/event_protocol.h
new file mode 100644
index 0000000..e7680fc
--- /dev/null
+++ b/ctdb/event/event_protocol.h
@@ -0,0 +1,100 @@
+/*
+ CTDB event daemon protocol
+ Based on eventd code
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_EVENT_PROTOCOL_H__
+#define __CTDB_EVENT_PROTOCOL_H__
+
+#define CTDB_EVENT_PROTOCOL_VERSION 1
+
+enum ctdb_event_script_action {
+ CTDB_EVENT_SCRIPT_DISABLE = 0,
+ CTDB_EVENT_SCRIPT_ENABLE = 1,
+};
+
+enum ctdb_event_command {
+ CTDB_EVENT_CMD_RUN = 1,
+ CTDB_EVENT_CMD_STATUS = 2,
+ CTDB_EVENT_CMD_SCRIPT = 3,
+ CTDB_EVENT_CMD_MAX = 4,
+};
+
+struct ctdb_event_script {
+ const char *name;
+ struct timeval begin;
+ struct timeval end;
+ int result;
+ const char *output;
+};
+
+struct ctdb_event_script_list {
+ int num_scripts;
+ struct ctdb_event_script *script;
+};
+
+#define CTDB_EVENT_RUN_ALL 1
+
+struct ctdb_event_request_run {
+ const char *component;
+ const char *event;
+ const char *args;
+ uint32_t timeout;
+ uint32_t flags;
+};
+
+struct ctdb_event_request_status {
+ const char *component;
+ const char *event;
+};
+
+struct ctdb_event_request_script {
+ const char *component;
+ const char *script;
+ enum ctdb_event_script_action action;
+};
+
+struct ctdb_event_reply_status {
+ int32_t summary;
+ struct ctdb_event_script_list *script_list;
+};
+
+struct ctdb_event_header {
+ uint32_t length;
+ uint32_t version;
+ uint32_t reqid;
+};
+
+struct ctdb_event_request {
+ enum ctdb_event_command cmd;
+ union {
+ struct ctdb_event_request_run *run;
+ struct ctdb_event_request_status *status;
+ struct ctdb_event_request_script *script;
+ } data;
+};
+
+struct ctdb_event_reply {
+ enum ctdb_event_command cmd;
+ int32_t result;
+ union {
+ struct ctdb_event_reply_status *status;
+ } data;
+};
+
+#endif /* __CTDB_EVENT_PROTOCOL_H__ */
diff --git a/ctdb/event/event_protocol_api.h b/ctdb/event/event_protocol_api.h
new file mode 100644
index 0000000..e2ab439
--- /dev/null
+++ b/ctdb/event/event_protocol_api.h
@@ -0,0 +1,61 @@
+/*
+ CTDB event daemon protocol
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_EVENT_PROTOCOL_API_H__
+#define __CTDB_EVENT_PROTOCOL_API_H__
+
+#include <talloc.h>
+
+#include "event/event_protocol.h"
+
+/* From event/event_protocol.c */
+
+int ctdb_event_header_extract(uint8_t *buf,
+ size_t buflen,
+ struct ctdb_event_header *h);
+
+size_t ctdb_event_request_len(struct ctdb_event_header *h,
+ struct ctdb_event_request *in);
+int ctdb_event_request_push(struct ctdb_event_header *h,
+ struct ctdb_event_request *in,
+ uint8_t *buf,
+ size_t *buflen);
+int ctdb_event_request_pull(uint8_t *buf,
+ size_t buflen,
+ struct ctdb_event_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_event_request **out);
+
+size_t ctdb_event_reply_len(struct ctdb_event_header *h,
+ struct ctdb_event_reply *in);
+int ctdb_event_reply_push(struct ctdb_event_header *h,
+ struct ctdb_event_reply *in,
+ uint8_t *buf,
+ size_t *buflen);
+int ctdb_event_reply_pull(uint8_t *buf,
+ size_t buflen,
+ struct ctdb_event_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_event_reply **out);
+
+/* From event/event_protocol_util.c */
+
+const char *ctdb_event_command_to_string(enum ctdb_event_command cmd);
+
+#endif /* __CTDB_EVENT_PROTOCOL_API_H__ */
diff --git a/ctdb/event/event_protocol_test.c b/ctdb/event/event_protocol_test.c
new file mode 100644
index 0000000..8f34fa5
--- /dev/null
+++ b/ctdb/event/event_protocol_test.c
@@ -0,0 +1,412 @@
+/*
+ CTDB event daemon - protocol test
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <talloc.h>
+#include <assert.h>
+
+#define EVENT_PROTOCOL_TEST
+#include "event/event_protocol.c"
+
+#include "tests/src/protocol_common_basic.h"
+
+/*
+ * Functions to fill and verify event protocol structures
+ */
+
+static void fill_ctdb_event_script(TALLOC_CTX *mem_ctx,
+ struct ctdb_event_script *p)
+{
+ fill_ctdb_stringn(mem_ctx, &p->name);
+ fill_ctdb_timeval(&p->begin);
+ fill_ctdb_timeval(&p->end);
+ p->result = rand32i();
+ fill_ctdb_stringn(mem_ctx, &p->output);
+}
+
+static void verify_ctdb_event_script(struct ctdb_event_script *p1,
+ struct ctdb_event_script *p2)
+{
+ verify_ctdb_stringn(&p1->name, &p2->name);
+ verify_ctdb_timeval(&p1->begin, &p2->begin);
+ verify_ctdb_timeval(&p1->end, &p2->end);
+ assert(p1->result == p2->result);
+ verify_ctdb_stringn(&p1->output, &p2->output);
+}
+
+static void fill_ctdb_event_script_list(TALLOC_CTX *mem_ctx,
+ struct ctdb_event_script_list *p)
+{
+ int i;
+
+ p->num_scripts = rand_int(32);
+ if (p->num_scripts > 0) {
+ p->script = talloc_array(mem_ctx,
+ struct ctdb_event_script,
+ p->num_scripts);
+ assert(p->script != NULL);
+
+ for (i=0; i<p->num_scripts; i++) {
+ fill_ctdb_event_script(mem_ctx, &p->script[i]);
+ }
+ } else {
+ p->script = NULL;
+ }
+}
+
+static void verify_ctdb_event_script_list(struct ctdb_event_script_list *p1,
+ struct ctdb_event_script_list *p2)
+{
+ int i;
+
+ assert(p1->num_scripts == p2->num_scripts);
+ for (i=0; i<p1->num_scripts; i++) {
+ verify_ctdb_event_script(&p1->script[i], &p2->script[i]);
+ }
+}
+
+static void fill_ctdb_event_request_run(TALLOC_CTX *mem_ctx,
+ struct ctdb_event_request_run *p)
+{
+ fill_ctdb_stringn(mem_ctx, &p->component);
+ fill_ctdb_stringn(mem_ctx, &p->event);
+ fill_ctdb_stringn(mem_ctx, &p->args);
+ p->timeout = rand32();
+ p->flags = rand32();
+}
+
+static void verify_ctdb_event_request_run(struct ctdb_event_request_run *p1,
+ struct ctdb_event_request_run *p2)
+{
+ verify_ctdb_stringn(&p1->component, &p2->component);
+ verify_ctdb_stringn(&p1->event, &p2->event);
+ verify_ctdb_stringn(&p1->args, &p2->args);
+ assert(p1->timeout == p2->timeout);
+ assert(p1->flags == p2->flags);
+}
+
+static void fill_ctdb_event_request_status(TALLOC_CTX *mem_ctx,
+ struct ctdb_event_request_status *p)
+{
+ fill_ctdb_stringn(mem_ctx, &p->component);
+ fill_ctdb_stringn(mem_ctx, &p->event);
+}
+
+static void verify_ctdb_event_request_status(
+ struct ctdb_event_request_status *p1,
+ struct ctdb_event_request_status *p2)
+{
+ verify_ctdb_stringn(&p1->component, &p2->component);
+ verify_ctdb_stringn(&p1->event, &p2->event);
+}
+
+static void fill_ctdb_event_request_script(TALLOC_CTX *mem_ctx,
+ struct ctdb_event_request_script *p)
+{
+ fill_ctdb_stringn(mem_ctx, &p->component);
+ fill_ctdb_stringn(mem_ctx, &p->script);
+ if (rand_int(1) == 0) {
+ p->action = CTDB_EVENT_SCRIPT_DISABLE;
+ } else {
+ p->action = CTDB_EVENT_SCRIPT_ENABLE;
+ }
+}
+
+static void fill_ctdb_event_reply_status(TALLOC_CTX *mem_ctx,
+ struct ctdb_event_reply_status *p)
+{
+ p->summary = rand32i();
+ p->script_list = talloc(mem_ctx, struct ctdb_event_script_list);
+ assert(p->script_list != NULL);
+
+ fill_ctdb_event_script_list(mem_ctx, p->script_list);
+}
+
+static void verify_ctdb_event_reply_status(struct ctdb_event_reply_status *p1,
+ struct ctdb_event_reply_status *p2)
+{
+ assert(p1->summary == p2->summary);
+ verify_ctdb_event_script_list(p1->script_list, p2->script_list);
+}
+
+static void verify_ctdb_event_request_script(
+ struct ctdb_event_request_script *p1,
+ struct ctdb_event_request_script *p2)
+{
+ verify_ctdb_stringn(&p1->component, &p2->component);
+ verify_ctdb_stringn(&p1->script, &p2->script);
+ assert(p1->action == p2->action);
+}
+
+static void fill_ctdb_event_request_data(TALLOC_CTX *mem_ctx,
+ struct ctdb_event_request *p,
+ uint32_t cmd)
+{
+ p->cmd = cmd;
+
+ switch (cmd) {
+ case CTDB_EVENT_CMD_RUN:
+ p->data.run = talloc(mem_ctx, struct ctdb_event_request_run);
+ assert(p->data.run != NULL);
+
+ fill_ctdb_event_request_run(mem_ctx, p->data.run);
+ break;
+
+ case CTDB_EVENT_CMD_STATUS:
+ p->data.status = talloc(mem_ctx,
+ struct ctdb_event_request_status);
+ assert(p->data.status != NULL);
+
+ fill_ctdb_event_request_status(mem_ctx, p->data.status);
+ break;
+
+ case CTDB_EVENT_CMD_SCRIPT:
+ p->data.script = talloc(mem_ctx,
+ struct ctdb_event_request_script);
+ assert(p->data.script != NULL);
+
+ fill_ctdb_event_request_script(mem_ctx, p->data.script);
+ break;
+
+ default:
+ assert(cmd > 0 && cmd < CTDB_EVENT_CMD_MAX);
+ }
+}
+
+static void verify_ctdb_event_request_data(struct ctdb_event_request *p1,
+ struct ctdb_event_request *p2)
+{
+ assert(p1->cmd == p2->cmd);
+
+ switch (p1->cmd) {
+ case CTDB_EVENT_CMD_RUN:
+ verify_ctdb_event_request_run(p1->data.run, p2->data.run);
+ break;
+
+ case CTDB_EVENT_CMD_STATUS:
+ verify_ctdb_event_request_status(p1->data.status,
+ p2->data.status);
+ break;
+
+ case CTDB_EVENT_CMD_SCRIPT:
+ verify_ctdb_event_request_script(p1->data.script,
+ p2->data.script);
+ break;
+
+ default:
+ assert(p1->cmd > 0 && p1->cmd < CTDB_EVENT_CMD_MAX);
+ }
+}
+
+static void fill_ctdb_event_reply_data(TALLOC_CTX *mem_ctx,
+ struct ctdb_event_reply *p,
+ uint32_t cmd)
+{
+ p->cmd = cmd;
+ p->result = rand32i();
+
+ if (p->result != 0) {
+ return;
+ }
+
+ switch (cmd) {
+ case CTDB_EVENT_CMD_STATUS:
+ p->data.status = talloc(mem_ctx,
+ struct ctdb_event_reply_status);
+ assert(p->data.status != NULL);
+
+ fill_ctdb_event_reply_status(mem_ctx, p->data.status);
+ break;
+
+ default:
+ assert(cmd > 0 && cmd < CTDB_EVENT_CMD_MAX);
+ }
+}
+
+static void verify_ctdb_event_reply_data(struct ctdb_event_reply *p1,
+ struct ctdb_event_reply *p2)
+{
+ assert(p1->cmd == p2->cmd);
+ assert(p1->result == p2->result);
+
+ if (p1->result != 0) {
+ return;
+ }
+
+ switch (p1->cmd) {
+ case CTDB_EVENT_CMD_STATUS:
+ verify_ctdb_event_reply_status(p1->data.status,
+ p2->data.status);
+ break;
+
+ default:
+ assert(p1->cmd > 0 && p1->cmd < CTDB_EVENT_CMD_MAX);
+ }
+}
+
+static void fill_ctdb_event_header(struct ctdb_event_header *p)
+{
+ p->length = 0; /* updated by push functions */
+ p->version = 0; /* updated by push functions */
+ p->reqid = rand32();
+}
+
+static void verify_ctdb_event_header(struct ctdb_event_header *p1,
+ struct ctdb_event_header *p2)
+{
+ assert(p1->length == p2->length);
+ assert(p1->version == p2->version);
+ assert(p1->reqid == p2->reqid);
+}
+
+static void fill_ctdb_event_request(TALLOC_CTX *mem_ctx,
+ struct ctdb_event_request *p,
+ uint32_t cmd)
+{
+ fill_ctdb_event_request_data(mem_ctx, p, cmd);
+}
+
+static void verify_ctdb_event_request(struct ctdb_event_request *p1,
+ struct ctdb_event_request *p2)
+{
+ verify_ctdb_event_request_data(p1, p2);
+}
+
+static void fill_ctdb_event_reply(TALLOC_CTX *mem_ctx,
+ struct ctdb_event_reply *p,
+ uint32_t cmd)
+{
+ fill_ctdb_event_reply_data(mem_ctx, p, cmd);
+}
+
+static void verify_ctdb_event_reply(struct ctdb_event_reply *p1,
+ struct ctdb_event_reply *p2)
+{
+ verify_ctdb_event_reply_data(p1, p2);
+}
+
+#define EVENT_PROTOCOL1_TEST(TYPE, NAME) \
+static void TEST_FUNC(NAME)(uint32_t cmd) \
+{ \
+ TALLOC_CTX *mem_ctx; \
+ TYPE c1, *c2; \
+ uint8_t *buf; \
+ size_t buflen, np; \
+ int ret; \
+\
+ protocol_test_iterate_tag("%s %u\n", #NAME, cmd); \
+ mem_ctx = talloc_new(NULL); \
+ assert(mem_ctx != NULL); \
+ FILL_FUNC(NAME)(mem_ctx, &c1, cmd); \
+ buflen = LEN_FUNC(NAME)(&c1); \
+ buf = talloc_size(mem_ctx, buflen); \
+ assert(buf != NULL); \
+ np = 0; \
+ PUSH_FUNC(NAME)(&c1, buf, &np); \
+ assert(np == buflen); \
+ np = 0; \
+ ret = PULL_FUNC(NAME)(buf, buflen, mem_ctx, &c2, &np); \
+ assert(ret == 0); \
+ assert(np == buflen); \
+ VERIFY_FUNC(NAME)(&c1, c2); \
+ talloc_free(mem_ctx); \
+}
+
+#define EVENT_PROTOCOL2_TEST(TYPE, NAME) \
+static void TEST_FUNC(NAME)(uint32_t cmd) \
+{ \
+ TALLOC_CTX *mem_ctx; \
+ struct ctdb_event_header h1, h2; \
+ TYPE c1, *c2; \
+ uint8_t *buf; \
+ size_t buflen, len; \
+ int ret; \
+\
+ protocol_test_iterate_tag("%s %u\n", #NAME, cmd); \
+ mem_ctx = talloc_new(NULL); \
+ assert(mem_ctx != NULL); \
+ fill_ctdb_event_header(&h1); \
+ FILL_FUNC(NAME)(mem_ctx, &c1, cmd); \
+ buflen = LEN_FUNC(NAME)(&h1, &c1); \
+ buf = talloc_size(mem_ctx, buflen); \
+ assert(buf != NULL); \
+ len = 0; \
+ ret = PUSH_FUNC(NAME)(&h1, &c1, buf, &len); \
+ assert(ret == EMSGSIZE); \
+ assert(len == buflen); \
+ ret = PUSH_FUNC(NAME)(&h1, &c1, buf, &buflen); \
+ assert(ret == 0); \
+ ret = PULL_FUNC(NAME)(buf, buflen, &h2, mem_ctx, &c2); \
+ assert(ret == 0); \
+ verify_ctdb_event_header(&h1, &h2); \
+ VERIFY_FUNC(NAME)(&c1, c2); \
+ talloc_free(mem_ctx); \
+}
+
+PROTOCOL_TYPE3_TEST(struct ctdb_event_script, ctdb_event_script);
+PROTOCOL_TYPE3_TEST(struct ctdb_event_script_list, ctdb_event_script_list);
+
+PROTOCOL_TYPE3_TEST(struct ctdb_event_request_run, ctdb_event_request_run);
+PROTOCOL_TYPE3_TEST(struct ctdb_event_request_status,
+ ctdb_event_request_status);
+PROTOCOL_TYPE3_TEST(struct ctdb_event_request_script,
+ ctdb_event_request_script);
+
+PROTOCOL_TYPE3_TEST(struct ctdb_event_reply_status, ctdb_event_reply_status);
+
+EVENT_PROTOCOL1_TEST(struct ctdb_event_request, ctdb_event_request_data);
+EVENT_PROTOCOL1_TEST(struct ctdb_event_reply, ctdb_event_reply_data);
+
+EVENT_PROTOCOL2_TEST(struct ctdb_event_request, ctdb_event_request);
+EVENT_PROTOCOL2_TEST(struct ctdb_event_reply, ctdb_event_reply);
+
+static void event_protocol_test(void)
+{
+ uint32_t cmd;
+
+ TEST_FUNC(ctdb_event_script)();
+ TEST_FUNC(ctdb_event_script_list)();
+
+ TEST_FUNC(ctdb_event_request_run)();
+ TEST_FUNC(ctdb_event_request_status)();
+ TEST_FUNC(ctdb_event_request_script)();
+
+ TEST_FUNC(ctdb_event_reply_status)();
+
+ for (cmd=1; cmd<CTDB_EVENT_CMD_MAX; cmd++) {
+ TEST_FUNC(ctdb_event_request_data)(cmd);
+ }
+ for (cmd=1; cmd<CTDB_EVENT_CMD_MAX; cmd++) {
+ TEST_FUNC(ctdb_event_reply_data)(cmd);
+ }
+
+ for (cmd=1; cmd<CTDB_EVENT_CMD_MAX; cmd++) {
+ TEST_FUNC(ctdb_event_request)(cmd);
+ }
+ for (cmd=1; cmd<CTDB_EVENT_CMD_MAX; cmd++) {
+ TEST_FUNC(ctdb_event_reply)(cmd);
+ }
+}
+
+int main(int argc, const char **argv)
+{
+ protocol_test_iterate(argc, argv, event_protocol_test);
+ return 0;
+}
diff --git a/ctdb/event/event_protocol_util.c b/ctdb/event/event_protocol_util.c
new file mode 100644
index 0000000..cc59615
--- /dev/null
+++ b/ctdb/event/event_protocol_util.c
@@ -0,0 +1,46 @@
+/*
+ CTDB event daemon - protocol utilities
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include "event/event_protocol.h"
+#include "event/event_protocol_api.h"
+
+static struct {
+ enum ctdb_event_command command;
+ const char *label;
+} event_command_map[] = {
+ { CTDB_EVENT_CMD_RUN, "RUN" },
+ { CTDB_EVENT_CMD_STATUS, "STATUS" },
+ { CTDB_EVENT_CMD_SCRIPT, "SCRIPT" },
+ { CTDB_EVENT_CMD_MAX, NULL },
+};
+
+const char *ctdb_event_command_to_string(enum ctdb_event_command command)
+{
+ int i;
+
+ for (i=0; event_command_map[i].label != NULL; i++) {
+ if (event_command_map[i].command == command) {
+ return event_command_map[i].label;
+ }
+ }
+
+ return "UNKNOWN";
+}
diff --git a/ctdb/event/event_request.c b/ctdb/event/event_request.c
new file mode 100644
index 0000000..303e735
--- /dev/null
+++ b/ctdb/event/event_request.c
@@ -0,0 +1,217 @@
+/*
+ CTDB event daemon - handle requests
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include "lib/util/debug.h"
+#include "lib/util/tevent_unix.h"
+
+#include "common/logging.h"
+
+#include "event/event_private.h"
+#include "event/event_protocol_api.h"
+
+struct event_request_state {
+ struct ctdb_event_request *request;
+ struct ctdb_event_reply *reply;
+};
+
+static void event_request_done(struct tevent_req *subreq);
+
+static struct tevent_req *event_request_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct event_context *eventd,
+ struct ctdb_event_header *header,
+ struct ctdb_event_request *request)
+{
+ struct tevent_req *req, *subreq;
+ struct event_request_state *state;
+
+ req = tevent_req_create(mem_ctx, &state, struct event_request_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->request = request;
+
+ subreq = event_cmd_dispatch_send(state, ev, eventd, request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, event_request_done, req);
+
+ return req;
+}
+
+static void event_request_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct event_request_state *state = tevent_req_data(
+ req, struct event_request_state);
+ int ret;
+ bool ok;
+
+ ok = event_cmd_dispatch_recv(subreq, &ret, state, &state->reply);
+ TALLOC_FREE(subreq);
+ if (!ok) {
+ D_ERR("Command %s failed, ret=%d\n",
+ ctdb_event_command_to_string(state->request->cmd), ret);
+
+ state->reply = talloc_zero(state, struct ctdb_event_reply);
+ if (tevent_req_nomem(state->reply, req)) {
+ return;
+ }
+
+ state->reply->cmd = state->request->cmd;
+ state->reply->result = EIO;
+ }
+
+ tevent_req_done(req);
+}
+
+static bool event_request_recv(struct tevent_req *req,
+ int *perr,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_event_reply **reply)
+{
+ struct event_request_state *state = tevent_req_data(
+ req, struct event_request_state);
+
+ if (tevent_req_is_unix_error(req, perr)) {
+ return false;
+ }
+
+ *reply = talloc_steal(mem_ctx, state->reply);
+
+ return true;
+}
+
+struct event_pkt_state {
+ struct ctdb_event_header header;
+ struct ctdb_event_request *request;
+ uint8_t *buf;
+ size_t buflen;
+};
+
+static void event_pkt_done(struct tevent_req *subreq);
+
+struct tevent_req *event_pkt_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct event_context *eventd,
+ uint8_t *buf,
+ size_t buflen)
+{
+ struct tevent_req *req, *subreq;
+ struct event_pkt_state *state;
+ int ret;
+
+ req = tevent_req_create(mem_ctx, &state, struct event_pkt_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ ret = ctdb_event_request_pull(buf,
+ buflen,
+ &state->header,
+ state,
+ &state->request);
+ if (ret != 0) {
+ /* Ignore invalid packets */
+ D_ERR("Invalid packet received, buflen=%zu\n", buflen);
+ tevent_req_error(req, EPROTO);
+ return tevent_req_post(req, ev);
+ }
+
+ subreq = event_request_send(state,
+ ev,
+ eventd,
+ &state->header,
+ state->request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, event_pkt_done, req);
+
+ return req;
+}
+
+static void event_pkt_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct event_pkt_state *state = tevent_req_data(
+ req, struct event_pkt_state);
+ struct ctdb_event_header header;
+ struct ctdb_event_reply *reply;
+ int ret;
+ bool ok;
+
+ ok = event_request_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+ TALLOC_FREE(state->request);
+ if (!ok) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ header = (struct ctdb_event_header) {
+ .reqid = state->header.reqid,
+ };
+
+ state->buflen = ctdb_event_reply_len(&header, reply);
+ state->buf = talloc_zero_size(state, state->buflen);
+ if (tevent_req_nomem(state->buf, req)) {
+ talloc_free(reply);
+ return;
+ }
+
+ ret = ctdb_event_reply_push(&header,
+ reply,
+ state->buf,
+ &state->buflen);
+ talloc_free(reply);
+ if (ret != 0) {
+ talloc_free(state->buf);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+bool event_pkt_recv(struct tevent_req *req,
+ int *perr,
+ TALLOC_CTX *mem_ctx,
+ uint8_t **buf,
+ size_t *buflen)
+{
+ struct event_pkt_state *state = tevent_req_data(
+ req, struct event_pkt_state);
+
+ if (tevent_req_is_unix_error(req, perr)) {
+ return false;
+ }
+
+ *buf = talloc_steal(mem_ctx, state->buf);
+ *buflen = state->buflen;
+
+ return true;
+}
diff --git a/ctdb/event/event_tool.c b/ctdb/event/event_tool.c
new file mode 100644
index 0000000..d6b7156
--- /dev/null
+++ b/ctdb/event/event_tool.c
@@ -0,0 +1,846 @@
+/*
+ CTDB event daemon utility code
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/time.h"
+
+#include <popt.h>
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/debug.h"
+
+#include "common/cmdline.h"
+#include "common/logging.h"
+#include "common/path.h"
+#include "common/event_script.h"
+
+#include "event/event_protocol_api.h"
+#include "event/event.h"
+#include "event/event_tool.h"
+
+struct event_tool_context {
+ struct cmdline_context *cmdline;
+ struct tevent_context *ev;
+ struct ctdb_event_context *eclient;
+};
+
+static int compact_args(TALLOC_CTX *mem_ctx,
+ const char **argv,
+ int argc,
+ int from,
+ const char **result)
+{
+ char *arg_str;
+ int i;
+
+ if (argc <= from) {
+ *result = NULL;
+ return 0;
+ }
+
+ arg_str = talloc_strdup(mem_ctx, argv[from]);
+ if (arg_str == NULL) {
+ return ENOMEM;
+ }
+
+ for (i = from+1; i < argc; i++) {
+ arg_str = talloc_asprintf_append(arg_str, " %s", argv[i]);
+ if (arg_str == NULL) {
+ return ENOMEM;
+ }
+ }
+
+ *result = arg_str;
+ return 0;
+}
+
+static int event_command_run(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct event_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct event_tool_context);
+ struct tevent_req *req;
+ struct ctdb_event_request_run request_run;
+ const char *arg_str = NULL;
+ const char *t;
+ int timeout, ret = 0, result = 0;
+ bool ok;
+
+ if (argc < 3) {
+ cmdline_usage(ctx->cmdline, "run");
+ return 1;
+ }
+
+ ret = ctdb_event_init(ctx, ctx->ev, &ctx->eclient);
+ if (ret != 0) {
+ D_ERR("Failed to initialize event client, ret=%d\n", ret);
+ return ret;
+ }
+
+ timeout = atoi(argv[0]);
+ if (timeout < 0) {
+ timeout = 0;
+ }
+
+ ret = compact_args(mem_ctx, argv, argc, 3, &arg_str);
+ if (ret != 0) {
+ D_ERR("Memory allocation error\n");
+ return 1;
+ }
+
+ request_run.component = argv[1];
+ request_run.event = argv[2];
+ request_run.args = arg_str;
+ request_run.timeout = timeout;
+ request_run.flags = 0;
+
+ t = getenv("CTDB_TEST_MODE");
+ if (t != NULL) {
+ t = getenv("CTDB_EVENT_RUN_ALL");
+ if (t != NULL) {
+ request_run.flags = CTDB_EVENT_RUN_ALL;
+ }
+ }
+
+ req = ctdb_event_run_send(mem_ctx,
+ ctx->ev,
+ ctx->eclient,
+ &request_run);
+ if (req == NULL) {
+ D_ERR("Memory allocation error\n");
+ return 1;
+ }
+
+ tevent_req_poll(req, ctx->ev);
+
+ ok = ctdb_event_run_recv(req, &ret, &result);
+ if (!ok) {
+ D_ERR("Failed to run event %s in %s, ret=%d\n",
+ argv[2],
+ argv[1],
+ ret);
+ return 1;
+ }
+
+ D_NOTICE("Command run finished with result=%d\n", result);
+
+ if (result == ENOENT) {
+ printf("Event dir for %s does not exist\n", argv[1]);
+ } else if (result == ETIMEDOUT) {
+ printf("Event %s in %s timed out\n", argv[2], argv[1]);
+ } else if (result == ECANCELED) {
+ printf("Event %s in %s got cancelled\n", argv[2], argv[1]);
+ } else if (result == ENOEXEC) {
+ printf("Event %s in %s failed\n", argv[2], argv[1]);
+ } else if (result != 0) {
+ printf("Failed to run event %s in %s, result=%d\n",
+ argv[2],
+ argv[1],
+ result);
+ }
+
+ ret = (result < 0) ? -result : result;
+ return ret;
+}
+
+static double timeval_delta(struct timeval *tv2, struct timeval *tv)
+{
+ return (tv2->tv_sec - tv->tv_sec) +
+ (tv2->tv_usec - tv->tv_usec) * 1.0e-6;
+}
+
+static void print_status_one(struct ctdb_event_script *script)
+{
+ if (script->result == -ETIMEDOUT) {
+ printf("%-20s %-10s %s",
+ script->name,
+ "TIMEDOUT",
+ ctime(&script->begin.tv_sec));
+ } else if (script->result == -ENOEXEC) {
+ printf("%-20s %-10s\n", script->name, "DISABLED");
+ } else if (script->result < 0) {
+ printf("%-20s %-10s (%s)\n",
+ script->name,
+ "CANNOT RUN",
+ strerror(-script->result));
+ } else if (script->result == 0) {
+ printf("%-20s %-10s %.3lf %s",
+ script->name,
+ "OK",
+ timeval_delta(&script->end, &script->begin),
+ ctime(&script->begin.tv_sec));
+ } else {
+ printf("%-20s %-10s %.3lf %s",
+ script->name,
+ "ERROR",
+ timeval_delta(&script->end, &script->begin),
+ ctime(&script->begin.tv_sec));
+ }
+
+ if ((script->result != 0 && script->result != -ENOEXEC) ||
+ script->output != NULL) {
+ /* Empty output is informative so always print it on failure */
+ const char *t = script->output == NULL ? "" : script->output;
+ size_t len = strlen(t);
+ char output[len+1];
+ char *t1, *t2;
+
+ strlcpy(output, t, sizeof(output));
+
+ /*
+ * Strip trailing newlines, they are clutter and
+ * interfere with multi-line detection
+ */
+ t1 = output + len - 1;
+ while (t1 >= output && *t1 == '\n') {
+ *t1 = '\0';
+ t1--;
+ }
+
+ /* If the output is a single line then print it inline */
+ t2 = strchr(output, '\n');
+ if (t2 == NULL) {
+ printf(" OUTPUT: %s\n", output);
+ return;
+ }
+
+ /*
+ * More than 1 line. Print a header and then each
+ * line, with suitable indent. There are more general
+ * ways to do this, but let's maintain intermediate
+ * blank lines (e.g. strv_split() loses blank lines).
+ */
+ printf(" OUTPUT:\n");
+ t1 = output;
+ do {
+ /*
+ * Points to newline character. t2 initially
+ * set non-NULL outside loop because this loop
+ * only covers multi-line output.
+ */
+ *t2 = '\0';
+
+
+ printf(" %s\n", t1);
+ t1 = t2 + 1;
+
+ if (t1 >= output + len) {
+ break;
+ }
+
+ /* strchrnul() would be awesome, but isn't portable */
+ t2 = strchr(t1, '\n');
+ if (t2 == NULL) {
+ t2 = output + len;
+ }
+ } while (true);
+ }
+}
+
+static void print_status(const char *component,
+ const char *event,
+ int result,
+ struct ctdb_event_reply_status *status)
+{
+ int i;
+
+ if (result != 0) {
+ if (result == ENOENT) {
+ printf("Event dir for %s does not exist\n", component);
+ } else if (result == EINVAL) {
+ printf("Event %s has never run in %s\n",
+ event,
+ component);
+ } else {
+ printf("Unknown error (%d) for event %s in %s\n",
+ result,
+ event,
+ component);
+ }
+ return;
+ }
+
+ for (i=0; i<status->script_list->num_scripts; i++) {
+ print_status_one(&status->script_list->script[i]);
+ }
+}
+
+static int event_command_status(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct event_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct event_tool_context);
+ struct tevent_req *req;
+ struct ctdb_event_request_status request_status;
+ struct ctdb_event_reply_status *reply_status;
+ int ret = 0, result = 0;
+ bool ok;
+
+ if (argc != 2) {
+ cmdline_usage(ctx->cmdline, "status");
+ return 1;
+ }
+
+ ret = ctdb_event_init(ctx, ctx->ev, &ctx->eclient);
+ if (ret != 0) {
+ D_ERR("Failed to initialize event client, ret=%d\n", ret);
+ return ret;
+ }
+
+ request_status.component = argv[0];
+ request_status.event = argv[1];
+
+ req = ctdb_event_status_send(mem_ctx,
+ ctx->ev,
+ ctx->eclient,
+ &request_status);
+ if (req == NULL) {
+ D_ERR("Memory allocation error\n");
+ return 1;
+ }
+
+ tevent_req_poll(req, ctx->ev);
+
+ ok = ctdb_event_status_recv(req,
+ &ret,
+ &result,
+ mem_ctx,
+ &reply_status);
+ if (!ok) {
+ D_ERR("Failed to get status for event %s in %s, ret=%d\n",
+ argv[1],
+ argv[0],
+ ret);
+ return 1;
+ }
+
+ D_NOTICE("Command status finished with result=%d\n", result);
+
+ print_status(argv[0], argv[1], result, reply_status);
+
+ if (reply_status == NULL) {
+ ret = result;
+ } else {
+ ret = reply_status->summary;
+ ret = (ret < 0) ? -ret : ret;
+ }
+ return ret;
+}
+
+#define EVENT_SCRIPT_DISABLED ' '
+#define EVENT_SCRIPT_ENABLED '*'
+
+static int event_command_script_list(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct event_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct event_tool_context);
+ char *subdir = NULL;
+ char *data_dir = NULL;
+ char *etc_dir = NULL;
+ char *t = NULL;
+ struct event_script_list *data_list = NULL;
+ struct event_script_list *etc_list = NULL;
+ unsigned int i, j, matched;
+ int ret = 0;
+
+ if (argc != 1) {
+ cmdline_usage(ctx->cmdline, "script list");
+ return 1;
+ }
+
+ subdir = talloc_asprintf(mem_ctx, "events/%s", argv[0]);
+ if (subdir == NULL) {
+ return ENOMEM;
+ }
+
+ data_dir = path_datadir_append(mem_ctx, subdir);
+ if (data_dir == NULL) {
+ return ENOMEM;
+ }
+
+ t = talloc_size(mem_ctx, PATH_MAX);
+ if (t == NULL) {
+ return ENOMEM;
+ }
+
+ data_dir = realpath(data_dir, t);
+ if (data_dir == NULL) {
+ if (errno != ENOENT) {
+ return errno;
+ }
+ D_ERR("Command script list finished with result=%d\n", ENOENT);
+ return ENOENT;
+ }
+
+ etc_dir = path_etcdir_append(mem_ctx, subdir);
+ if (etc_dir == NULL) {
+ return ENOMEM;
+ }
+
+ /*
+ * Ignore error on ENOENT for cut down (e.g. fixed/embedded)
+ * installs that don't use symlinks but just populate etc_dir
+ * directly
+ */
+ ret = event_script_get_list(mem_ctx, data_dir, &data_list);
+ if (ret != 0 && ret != ENOENT) {
+ D_ERR("Command script list finished with result=%d\n", ret);
+ goto done;
+ }
+
+ ret = event_script_get_list(mem_ctx, etc_dir, &etc_list);
+ if (ret != 0) {
+ D_ERR("Command script list finished with result=%d\n", ret);
+ goto done;
+ }
+
+ D_NOTICE("Command script list finished with result=%d\n", ret);
+
+ if (data_list == NULL) {
+ goto list_enabled_only;
+ }
+
+ /*
+ * First list scripts provided by CTDB. Flag those that are
+ * enabled via a symlink and arrange for them to be excluded
+ * from the subsequent list of local scripts.
+ *
+ * Both lists are sorted, so walk the list of enabled scripts
+ * only once in this pass.
+ */
+ j = 0;
+ matched = 0;
+ for (i = 0; i < data_list->num_scripts; i++) {
+ struct event_script *d = data_list->script[i];
+ char flag = EVENT_SCRIPT_DISABLED;
+ char buf[PATH_MAX];
+ ssize_t len;
+
+ /* Check to see if this script is enabled */
+ while (j < etc_list->num_scripts) {
+ struct event_script *e = etc_list->script[j];
+
+ ret = strcmp(e->name, d->name);
+
+ if (ret > 0) {
+ /*
+ * Enabled name is greater, so needs
+ * to be considered later: done
+ */
+ break;
+ }
+
+ if (ret < 0) {
+ /* Enabled name is less: next */
+ j++;
+ continue;
+ }
+
+ len = readlink(e->path, buf, sizeof(buf));
+ if (len == -1 || (size_t)len >= sizeof(buf)) {
+ /*
+ * Not a link? Disappeared? Invalid
+ * link target? Something else?
+ *
+ * Doesn't match provided script: next, done
+ */
+ j++;
+ break;
+ }
+
+ /* readlink() does not NUL-terminate */
+ buf[len] = '\0';
+
+ ret = strcmp(buf, d->path);
+ if (ret != 0) {
+ /* Enabled link doesn't match: next, done */
+ j++;
+ break;
+ }
+
+ /*
+ * Enabled script's symlink matches our
+ * script: flag our script as enabled
+ *
+ * Also clear the enabled script so it can be
+ * trivially skipped in the next pass
+ */
+ flag = EVENT_SCRIPT_ENABLED;
+ TALLOC_FREE(etc_list->script[j]);
+ j++;
+ matched++;
+ break;
+ }
+
+ printf("%c %s\n", flag, d->name);
+ }
+
+ /* Print blank line if both provided and local lists are being printed */
+ if (data_list->num_scripts > 0 && matched != etc_list->num_scripts) {
+ printf("\n");
+ }
+
+list_enabled_only:
+
+ /* Now print details of local scripts, after a blank line */
+ for (j = 0; j < etc_list->num_scripts; j++) {
+ struct event_script *e = etc_list->script[j];
+ char flag = EVENT_SCRIPT_DISABLED;
+
+ if (e == NULL) {
+ /* Matched in previous pass: next */
+ continue;
+ }
+
+ /* Script is local: if executable then flag as enabled */
+ if (e->enabled) {
+ flag = EVENT_SCRIPT_ENABLED;
+ }
+
+ printf("%c %s\n", flag, e->name);
+ }
+
+ ret = 0;
+
+done:
+ talloc_free(subdir);
+ talloc_free(data_dir);
+ talloc_free(etc_dir);
+ talloc_free(data_list);
+ talloc_free(etc_list);
+
+ return ret;
+}
+
+static int event_command_script(TALLOC_CTX *mem_ctx,
+ struct event_tool_context *ctx,
+ const char *component,
+ const char *script,
+ bool enable)
+{
+ char *subdir, *etc_dir;
+ int result = 0;
+
+ subdir = talloc_asprintf(mem_ctx, "events/%s", component);
+ if (subdir == NULL) {
+ return ENOMEM;
+ }
+
+ etc_dir = path_etcdir_append(mem_ctx, subdir);
+ if (etc_dir == NULL) {
+ return ENOMEM;
+ }
+
+ if (enable) {
+ result = event_script_chmod(etc_dir, script, true);
+ } else {
+ result = event_script_chmod(etc_dir, script, false);
+ }
+
+ talloc_free(subdir);
+ talloc_free(etc_dir);
+
+ D_NOTICE("Command script finished with result=%d\n", result);
+
+ if (result == EINVAL) {
+ printf("Script %s is invalid in %s\n", script, component);
+ } else if (result == ENOENT) {
+ printf("Script %s does not exist in %s\n", script, component);
+ }
+
+ return result;
+}
+
+static int event_command_script_enable(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct event_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct event_tool_context);
+ struct stat statbuf;
+ char *script, *etc_script;
+ int ret;
+
+ if (argc != 2) {
+ cmdline_usage(ctx->cmdline, "script enable");
+ return 1;
+ }
+
+ script = talloc_asprintf(mem_ctx, "events/%s/%s.script", argv[0], argv[1]);
+ if (script == NULL) {
+ return ENOMEM;
+ }
+
+ etc_script = path_etcdir_append(mem_ctx, script);
+ if (etc_script == NULL) {
+ return ENOMEM;
+ }
+
+ ret = lstat(etc_script, &statbuf);
+ if (ret == 0) {
+ if (S_ISLNK(statbuf.st_mode)) {
+ /* Link already exists */
+ return 0;
+ } else if (S_ISREG(statbuf.st_mode)) {
+ return event_command_script(mem_ctx,
+ ctx,
+ argv[0],
+ argv[1],
+ true);
+ }
+
+ printf("Script %s is not a file or a link\n", etc_script);
+ return EINVAL;
+ } else {
+ if (errno == ENOENT) {
+ char *t;
+ char *data_script;
+
+ data_script = path_datadir_append(mem_ctx, script);
+ if (data_script == NULL) {
+ return ENOMEM;
+ }
+
+ t = talloc_size(mem_ctx, PATH_MAX);
+ if (t == NULL) {
+ return ENOMEM;
+ }
+
+ data_script = realpath(data_script, t);
+ if (data_script == NULL) {
+ if (errno != ENOENT) {
+ return errno;
+ }
+ printf("Script %s does not exist in %s\n",
+ argv[1],
+ argv[0]);
+ return ENOENT;
+ }
+
+ ret = stat(data_script, &statbuf);
+ if (ret != 0) {
+ printf("Script %s does not exist in %s\n",
+ argv[1], argv[0]);
+ return ENOENT;
+ }
+
+ ret = symlink(data_script, etc_script);
+ if (ret != 0) {
+ printf("Failed to create symlink %s\n",
+ etc_script);
+ return EIO;
+ }
+
+ return 0;
+ }
+
+ printf("Script %s does not exist\n", etc_script);
+ return EINVAL;
+ }
+}
+
+static int event_command_script_disable(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct event_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct event_tool_context);
+ struct stat statbuf;
+ char *script, *etc_script;
+ int ret;
+
+
+ if (argc != 2) {
+ cmdline_usage(ctx->cmdline, "script disable");
+ return 1;
+ }
+
+ script = talloc_asprintf(mem_ctx, "events/%s/%s.script", argv[0], argv[1]);
+ if (script == NULL) {
+ return ENOMEM;
+ }
+
+ etc_script = path_etcdir_append(mem_ctx, script);
+ if (etc_script == NULL) {
+ return ENOMEM;
+ }
+
+ ret = lstat(etc_script, &statbuf);
+ if (ret == 0) {
+ if (S_ISLNK(statbuf.st_mode)) {
+ /* Link exists */
+ ret = unlink(etc_script);
+ if (ret != 0) {
+ printf("Failed to remove symlink %s\n",
+ etc_script);
+ return EIO;
+ }
+
+ return 0;
+ } else if (S_ISREG(statbuf.st_mode)) {
+ return event_command_script(mem_ctx,
+ ctx,
+ argv[0],
+ argv[1],
+ false);
+ }
+
+ printf("Script %s is not a file or a link\n", etc_script);
+ return EINVAL;
+ }
+
+ return 0;
+}
+
+struct cmdline_command event_commands[] = {
+ { "run", event_command_run,
+ "Run an event", "<timeout> <component> <event> <args>" },
+ { "status", event_command_status,
+ "Get status of an event", "<component> <event>" },
+ { "script list", event_command_script_list,
+ "List event scripts", "<component>" },
+ { "script enable", event_command_script_enable,
+ "Enable an event script", "<component> <script>" },
+ { "script disable", event_command_script_disable,
+ "Disable an event script", "<component> <script>" },
+ CMDLINE_TABLEEND
+};
+
+int event_tool_init(TALLOC_CTX *mem_ctx,
+ const char *prog,
+ struct poptOption *options,
+ int argc,
+ const char **argv,
+ bool parse_options,
+ struct event_tool_context **result)
+{
+ struct event_tool_context *ctx;
+ int ret;
+
+ ctx = talloc_zero(mem_ctx, struct event_tool_context);
+ if (ctx == NULL) {
+ D_ERR("Memory allocation error\n");
+ return ENOMEM;
+ }
+
+ ret = cmdline_init(mem_ctx,
+ prog,
+ options,
+ NULL,
+ event_commands,
+ &ctx->cmdline);
+ if (ret != 0) {
+ D_ERR("Failed to initialize cmdline, ret=%d\n", ret);
+ talloc_free(ctx);
+ return ret;
+ }
+
+ ret = cmdline_parse(ctx->cmdline, argc, argv, parse_options);
+ if (ret != 0) {
+ cmdline_usage(ctx->cmdline, NULL);
+ talloc_free(ctx);
+ return ret;
+ }
+
+ *result = ctx;
+ return 0;
+}
+
+int event_tool_run(struct event_tool_context *ctx, int *result)
+{
+ int ret;
+
+ ctx->ev = tevent_context_init(ctx);
+ if (ctx->ev == NULL) {
+ D_ERR("Failed to initialize tevent\n");
+ return ENOMEM;
+ }
+
+ ret = cmdline_run(ctx->cmdline, ctx, result);
+ return ret;
+}
+
+#ifdef CTDB_EVENT_TOOL
+
+static struct {
+ const char *debug;
+} event_data = {
+ .debug = "ERROR",
+};
+
+struct poptOption event_options[] = {
+ { "debug", 'd', POPT_ARG_STRING, &event_data.debug, 0,
+ "debug level", "ERROR|WARNING|NOTICE|INFO|DEBUG" },
+ POPT_TABLEEND
+};
+
+int main(int argc, const char **argv)
+{
+ TALLOC_CTX *mem_ctx;
+ struct event_tool_context *ctx;
+ int ret, result = 0;
+ int level;
+ bool ok;
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ret = event_tool_init(mem_ctx,
+ "ctdb-event",
+ event_options,
+ argc,
+ argv,
+ true,
+ &ctx);
+ if (ret != 0) {
+ talloc_free(mem_ctx);
+ exit(1);
+ }
+
+ setup_logging("ctdb-event", DEBUG_STDERR);
+ ok = debug_level_parse(event_data.debug, &level);
+ if (!ok) {
+ level = DEBUG_ERR;
+ }
+ debuglevel_set(level);
+
+ ret = event_tool_run(ctx, &result);
+ if (ret != 0) {
+ exit(1);
+ }
+
+ talloc_free(mem_ctx);
+ exit(result);
+}
+
+#endif /* CTDB_EVENT_TOOL */
diff --git a/ctdb/event/event_tool.h b/ctdb/event/event_tool.h
new file mode 100644
index 0000000..3abe716
--- /dev/null
+++ b/ctdb/event/event_tool.h
@@ -0,0 +1,38 @@
+/*
+ CTDB event daemon utility code
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_EVENT_TOOL_H__
+#define __CTDB_EVENT_TOOL_H__
+
+#include <popt.h>
+#include <talloc.h>
+
+struct event_tool_context;
+
+int event_tool_init(TALLOC_CTX *mem_ctx,
+ const char *prog,
+ struct poptOption *options,
+ int argc,
+ const char **argv,
+ bool parse_options,
+ struct event_tool_context **result);
+
+int event_tool_run(struct event_tool_context *ctx, int *result);
+
+#endif /* __CTDB_EVENT_TOOL_H__ */
diff --git a/ctdb/failover/failover_conf.c b/ctdb/failover/failover_conf.c
new file mode 100644
index 0000000..0f199cb
--- /dev/null
+++ b/ctdb/failover/failover_conf.c
@@ -0,0 +1,53 @@
+/*
+ CTDB database config handling
+
+ Copyright (C) Martin Schwenke 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include "lib/util/debug.h"
+
+#include "common/conf.h"
+
+#include "failover/failover_conf.h"
+
+static bool check_static_boolean_change(const char *key,
+ bool old_value,
+ bool new_value,
+ enum conf_update_mode mode)
+{
+ if (mode == CONF_MODE_RELOAD || CONF_MODE_API) {
+ if (old_value != new_value) {
+ D_WARNING("Ignoring update of [%s] -> %s\n",
+ FAILOVER_CONF_SECTION,
+ key);
+ }
+ }
+
+ return true;
+}
+
+void failover_conf_init(struct conf_context *conf)
+{
+ conf_define_section(conf, FAILOVER_CONF_SECTION, NULL);
+
+ conf_define_boolean(conf,
+ FAILOVER_CONF_SECTION,
+ FAILOVER_CONF_DISABLED,
+ false,
+ check_static_boolean_change);
+}
diff --git a/ctdb/failover/failover_conf.h b/ctdb/failover/failover_conf.h
new file mode 100644
index 0000000..d154daa
--- /dev/null
+++ b/ctdb/failover/failover_conf.h
@@ -0,0 +1,31 @@
+/*
+ CTDB failover config handling
+
+ Copyright (C) Martin Schwenke 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_FAILOVER_CONF_H__
+#define __CTDB_FAILOVER_CONF_H__
+
+#include "common/conf.h"
+
+#define FAILOVER_CONF_SECTION "failover"
+
+#define FAILOVER_CONF_DISABLED "disabled"
+
+void failover_conf_init(struct conf_context *conf);
+
+#endif /* __CTDB_FAILOVER_CONF_H__ */
diff --git a/ctdb/ib/README.txt b/ctdb/ib/README.txt
new file mode 100644
index 0000000..e238a1d
--- /dev/null
+++ b/ctdb/ib/README.txt
@@ -0,0 +1,10 @@
+Compilation
+===========
+
+For the configure script, please set the OFED include & library path by e.g.:
+
+export CFLAGS="-I/usr/local/ofed/include -L/usr/local/ofed/lib"
+
+After then:
+
+./configure --enable-infiniband
diff --git a/ctdb/ib/ibw_ctdb.c b/ctdb/ib/ibw_ctdb.c
new file mode 100644
index 0000000..38314c3
--- /dev/null
+++ b/ctdb/ib/ibw_ctdb.c
@@ -0,0 +1,185 @@
+/*
+ * Unix SMB/CIFS implementation.
+ * Join infiniband wrapper and ctdb.
+ *
+ * Copyright (C) Sven Oehme <oehmes@de.ibm.com> 2006
+ *
+ * Major code contributions by Peter Somogyi <psomogyi@gamax.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <assert.h>
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/time.h"
+#include "lib/util/debug.h"
+
+#include "ctdb_private.h"
+
+#include "common/common.h"
+#include "common/logging.h"
+
+#include "ibwrapper.h"
+#include "ibw_ctdb.h"
+
+int ctdb_ibw_get_address(struct ctdb_context *ctdb,
+ const char *address, struct in_addr *addr)
+{
+ if (inet_pton(AF_INET, address, addr) <= 0) {
+ struct hostent *he = gethostbyname(address);
+ if (he == NULL || he->h_length > sizeof(*addr)) {
+ ctdb_set_error(ctdb, "invalid network address '%s'\n",
+ address);
+ return -1;
+ }
+ memcpy(addr, he->h_addr, he->h_length);
+ }
+ return 0;
+}
+
+int ctdb_ibw_node_connect(struct ctdb_node *node)
+{
+ struct ctdb_ibw_node *cn = talloc_get_type(node->transport_data,
+ struct ctdb_ibw_node);
+ int rc;
+
+ assert(cn!=NULL);
+ assert(cn->conn!=NULL);
+
+ rc = ibw_connect(cn->conn, &node->address.ip, node);
+ if (rc) {
+ DEBUG(DEBUG_ERR, ("ctdb_ibw_node_connect/ibw_connect failed - retrying...\n"));
+ /* try again once a second */
+ tevent_add_timer(node->ctdb->ev, node,
+ timeval_current_ofs(1, 0),
+ ctdb_ibw_node_connect_event, node);
+ }
+
+ /* continues at ibw_ctdb.c/IBWC_CONNECTED in good case */
+ return 0;
+}
+
+void ctdb_ibw_node_connect_event(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_node *node = talloc_get_type(private_data, struct ctdb_node);
+
+ ctdb_ibw_node_connect(node);
+}
+
+int ctdb_ibw_connstate_handler(struct ibw_ctx *ctx, struct ibw_conn *conn)
+{
+ if (ctx!=NULL) {
+ /* ctx->state changed */
+ switch(ctx->state) {
+ case IBWS_INIT: /* ctx start - after ibw_init */
+ break;
+ case IBWS_READY: /* after ibw_bind & ibw_listen */
+ break;
+ case IBWS_CONNECT_REQUEST: /* after [IBWS_READY + incoming request] */
+ /* => [(ibw_accept)IBWS_READY | (ibw_disconnect)STOPPED | ERROR] */
+ if (ibw_accept(ctx, conn, NULL)) {
+ DEBUG(DEBUG_ERR, ("connstate_handler/ibw_accept failed\n"));
+ return -1;
+ } /* else continue in IBWC_CONNECTED */
+ break;
+ case IBWS_STOPPED: /* normal stop <= ibw_disconnect+(IBWS_READY | IBWS_CONNECT_REQUEST) */
+ /* TODO: have a CTDB upcall for which CTDB should wait in a (final) loop */
+ break;
+ case IBWS_ERROR: /* abnormal state; ibw_stop must be called after this */
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+
+ if (conn!=NULL) {
+ /* conn->state changed */
+ switch(conn->state) {
+ case IBWC_INIT: /* conn start - internal state */
+ break;
+ case IBWC_CONNECTED: { /* after ibw_accept or ibw_connect */
+ struct ctdb_node *node = talloc_get_type(conn->conn_userdata, struct ctdb_node);
+ if (node!=NULL) { /* after ibw_connect */
+ struct ctdb_ibw_node *cn = talloc_get_type(
+ node->transport_data,
+ struct ctdb_ibw_node);
+
+ node->ctdb->upcalls->node_connected(node);
+ ctdb_flush_cn_queue(cn);
+ } else { /* after ibw_accept */
+ /* NOP in CTDB case */
+ }
+ } break;
+ case IBWC_DISCONNECTED: { /* after ibw_disconnect */
+ struct ctdb_node *node = talloc_get_type(conn->conn_userdata, struct ctdb_node);
+ if (node!=NULL)
+ node->ctdb->upcalls->node_dead(node);
+ talloc_free(conn);
+ /* normal + intended disconnect => not reconnecting in this layer */
+ } break;
+ case IBWC_ERROR: {
+ struct ctdb_node *node = talloc_get_type(conn->conn_userdata, struct ctdb_node);
+ if (node!=NULL) {
+ struct ctdb_ibw_node *cn = talloc_get_type(
+ node->transport_data,
+ struct ctdb_ibw_node);
+ struct ibw_ctx *ictx = cn->conn->ctx;
+
+ DEBUG(DEBUG_DEBUG, ("IBWC_ERROR, reconnecting...\n"));
+ talloc_free(cn->conn); /* internal queue content is destroyed */
+ cn->conn = (void *)ibw_conn_new(ictx, node);
+ tevent_add_timer(node->ctdb->ev, node,
+ timeval_current_ofs(1, 0),
+ ctdb_ibw_node_connect_event, node);
+ }
+ } break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+int ctdb_ibw_receive_handler(struct ibw_conn *conn, void *buf, int n)
+{
+ struct ctdb_context *ctdb = talloc_get_type(conn->ctx->ctx_userdata, struct ctdb_context);
+ void *buf2; /* future TODO: a solution for removal of this */
+
+ assert(ctdb!=NULL);
+ assert(buf!=NULL);
+ assert(conn!=NULL);
+ assert(conn->state==IBWC_CONNECTED);
+
+ /* so far "buf" is an ib-registered memory area
+ * and being reused for next receive
+ * noticed that HL requires talloc-ed memory to be stolen */
+ buf2 = talloc_zero_size(conn, n);
+ CTDB_NO_MEMORY(ctdb, buf2);
+
+ memcpy(buf2, buf, n);
+
+ ctdb->upcalls->recv_pkt(ctdb, (uint8_t *)buf2, (uint32_t)n);
+
+ return 0;
+}
diff --git a/ctdb/ib/ibw_ctdb.h b/ctdb/ib/ibw_ctdb.h
new file mode 100644
index 0000000..57f659b
--- /dev/null
+++ b/ctdb/ib/ibw_ctdb.h
@@ -0,0 +1,51 @@
+/*
+ * Unix SMB/CIFS implementation.
+ * Join infiniband wrapper and ctdb.
+ *
+ * Copyright (C) Sven Oehme <oehmes@de.ibm.com> 2006
+ *
+ * Major code contributions by Peter Somogyi <psomogyi@gamax.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+struct ctdb_ibw_msg {
+ uint8_t *data;
+ uint32_t length;
+ struct ctdb_ibw_msg *prev;
+ struct ctdb_ibw_msg *next;
+};
+
+struct ctdb_ibw_node {
+ struct ibw_conn *conn;
+
+ struct ctdb_ibw_msg *queue;
+ struct ctdb_ibw_msg *queue_last;
+ int qcnt;
+};
+
+int ctdb_ibw_get_address(struct ctdb_context *ctdb,
+ const char *address, struct in_addr *addr);
+
+int ctdb_ibw_connstate_handler(struct ibw_ctx *ctx, struct ibw_conn *conn);
+int ctdb_ibw_receive_handler(struct ibw_conn *conn, void *buf, int n);
+
+int ctdb_ibw_node_connect(struct ctdb_node *node);
+void ctdb_ibw_node_connect_event(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data);
+
+int ctdb_flush_cn_queue(struct ctdb_ibw_node *cn);
+
+int ctdb_ibw_init(struct ctdb_context *ctdb);
diff --git a/ctdb/ib/ibw_ctdb_init.c b/ctdb/ib/ibw_ctdb_init.c
new file mode 100644
index 0000000..f9d00c6
--- /dev/null
+++ b/ctdb/ib/ibw_ctdb_init.c
@@ -0,0 +1,255 @@
+/*
+ * Unix SMB/CIFS implementation.
+ * Join infiniband wrapper and ctdb.
+ *
+ * Copyright (C) Sven Oehme <oehmes@de.ibm.com> 2006
+ *
+ * Major code contributions by Peter Somogyi <psomogyi@gamax.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <assert.h>
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/dlinklist.h"
+#include "lib/util/debug.h"
+
+#include "ctdb_private.h"
+
+#include "common/common.h"
+#include "common/logging.h"
+
+#include "ibwrapper.h"
+#include "ibw_ctdb.h"
+
+static int ctdb_ibw_listen(struct ctdb_context *ctdb, int backlog)
+{
+ struct ibw_ctx *ictx = talloc_get_type(ctdb->transport_data,
+ struct ibw_ctx);
+
+ assert(ictx!=NULL);
+
+ if (ibw_bind(ictx, &ctdb->address->ip)) {
+ DEBUG(DEBUG_CRIT, ("ctdb_ibw_listen: ibw_bind failed\n"));
+ return -1;
+ }
+
+ if (ibw_listen(ictx, backlog)) {
+ DEBUG(DEBUG_CRIT, ("ctdb_ibw_listen: ibw_listen failed\n"));
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * initialise ibw portion of a ctdb node
+ */
+static int ctdb_ibw_add_node(struct ctdb_node *node)
+{
+ struct ibw_ctx *ictx = talloc_get_type(node->ctdb->transport_data,
+ struct ibw_ctx);
+ struct ctdb_ibw_node *cn = talloc_zero(node, struct ctdb_ibw_node);
+
+ assert(cn!=NULL);
+ cn->conn = ibw_conn_new(ictx, node);
+ node->transport_data = (void *)cn;
+
+ return (cn->conn!=NULL ? 0 : -1);
+}
+
+/*
+ * initialise infiniband
+ */
+static int ctdb_ibw_initialise(struct ctdb_context *ctdb)
+{
+ int i, ret;
+
+ ret = ctdb_ibw_init(ctdb);
+ if (ret != 0) {
+ return ret;
+ }
+
+ for (i=0; i<ctdb->num_nodes; i++) {
+ if (ctdb_ibw_add_node(ctdb->nodes[i]) != 0) {
+ DEBUG(DEBUG_CRIT, ("methods->add_node failed at %d\n", i));
+ return -1;
+ }
+ }
+
+ /* listen on our own address */
+ if (ctdb_ibw_listen(ctdb, 10)) /* TODO: backlog as param */
+ return -1;
+
+ return 0;
+}
+
+
+/*
+ * Start infiniband
+ */
+static int ctdb_ibw_start(struct ctdb_context *ctdb)
+{
+ int i;
+
+ /* everything async here */
+ for (i=0;i<ctdb->num_nodes;i++) {
+ struct ctdb_node *node = ctdb->nodes[i];
+ if (!ctdb_same_address(ctdb->address, &node->address)) {
+ ctdb_ibw_node_connect(node);
+ }
+ }
+
+ return 0;
+}
+
+static int ctdb_ibw_send_pkt(struct ibw_conn *conn, uint8_t *data, uint32_t length)
+{
+ void *buf, *key;
+
+ if (ibw_alloc_send_buf(conn, &buf, &key, length)) {
+ DEBUG(DEBUG_ERR, ("queue_pkt/ibw_alloc_send_buf failed\n"));
+ return -1;
+ }
+
+ memcpy(buf, data, length);
+ return ibw_send(conn, buf, key, length);
+}
+
+int ctdb_flush_cn_queue(struct ctdb_ibw_node *cn)
+{
+ struct ctdb_ibw_msg *p;
+ int rc = 0;
+
+ while(cn->queue) {
+ p = cn->queue;
+ rc = ctdb_ibw_send_pkt(cn->conn, p->data, p->length);
+ if (rc)
+ return -1; /* will be retried later when conn is up */
+
+ DLIST_REMOVE(cn->queue, p);
+ cn->qcnt--;
+ talloc_free(p); /* it will talloc_free p->data as well */
+ }
+ assert(cn->qcnt==0);
+ /* cn->queue_last = NULL is not needed - see DLIST_ADD_AFTER */
+
+ return rc;
+}
+
+static int ctdb_ibw_queue_pkt(struct ctdb_node *node, uint8_t *data, uint32_t length)
+{
+ struct ctdb_ibw_node *cn = talloc_get_type(node->transport_data,
+ struct ctdb_ibw_node);
+ int rc;
+
+ assert(length>=sizeof(uint32_t));
+ assert(cn!=NULL);
+
+ if (cn->conn==NULL) {
+ DEBUG(DEBUG_ERR, ("ctdb_ibw_queue_pkt: conn is NULL\n"));
+ return -1;
+ }
+
+ if (cn->conn->state==IBWC_CONNECTED) {
+ rc = ctdb_ibw_send_pkt(cn->conn, data, length);
+ } else {
+ struct ctdb_ibw_msg *p = talloc_zero(cn, struct ctdb_ibw_msg);
+ CTDB_NO_MEMORY(node->ctdb, p);
+
+ p->data = talloc_memdup(p, data, length);
+ CTDB_NO_MEMORY(node->ctdb, p->data);
+
+ p->length = length;
+
+ DLIST_ADD_AFTER(cn->queue, p, cn->queue_last);
+ cn->queue_last = p;
+ cn->qcnt++;
+
+ rc = 0;
+ }
+
+ return rc;
+}
+
+static void ctdb_ibw_restart(struct ctdb_node *node)
+{
+ /* TODO: implement this method for IB */
+ DEBUG(DEBUG_ALERT,("WARNING: method restart is not yet implemented for IB\n"));
+}
+
+/*
+ * transport packet allocator - allows transport to control memory for packets
+ */
+static void *ctdb_ibw_allocate_pkt(TALLOC_CTX *mem_ctx, size_t size)
+{
+ /* TODO: use ibw_alloc_send_buf instead... */
+ return talloc_size(mem_ctx, size);
+}
+
+#ifdef __NOTDEF__
+
+static int ctdb_ibw_stop(struct ctdb_context *cctx)
+{
+ struct ibw_ctx *ictx = talloc_get_type(cctx->private_data, struct ibw_ctx);
+
+ assert(ictx!=NULL);
+ return ibw_stop(ictx);
+}
+
+#endif /* __NOTDEF__ */
+
+static const struct ctdb_methods ctdb_ibw_methods = {
+ .initialise= ctdb_ibw_initialise,
+ .start = ctdb_ibw_start,
+ .queue_pkt = ctdb_ibw_queue_pkt,
+ .add_node = ctdb_ibw_add_node,
+ .allocate_pkt = ctdb_ibw_allocate_pkt,
+ .restart = ctdb_ibw_restart,
+
+// .stop = ctdb_ibw_stop
+};
+
+/*
+ * initialise ibw portion of ctdb
+ */
+int ctdb_ibw_init(struct ctdb_context *ctdb)
+{
+ struct ibw_ctx *ictx;
+
+ DEBUG(DEBUG_DEBUG, ("ctdb_ibw_init invoked...\n"));
+ ictx = ibw_init(
+ NULL, //struct ibw_initattr *attr, /* TODO */
+ 0, //int nattr, /* TODO */
+ ctdb,
+ ctdb_ibw_connstate_handler,
+ ctdb_ibw_receive_handler,
+ ctdb->ev);
+
+ if (ictx==NULL) {
+ DEBUG(DEBUG_CRIT, ("ctdb_ibw_init: ibw_init failed\n"));
+ return -1;
+ }
+
+ ctdb->methods = &ctdb_ibw_methods;
+ ctdb->transport_data = ictx;
+
+ DEBUG(DEBUG_DEBUG, ("ctdb_ibw_init succeeded.\n"));
+ return 0;
+}
diff --git a/ctdb/ib/ibwrapper.c b/ctdb/ib/ibwrapper.c
new file mode 100644
index 0000000..cf4efa5
--- /dev/null
+++ b/ctdb/ib/ibwrapper.c
@@ -0,0 +1,1361 @@
+/*
+ * Unix SMB/CIFS implementation.
+ * Wrap Infiniband calls.
+ *
+ * Copyright (C) Sven Oehme <oehmes@de.ibm.com> 2006
+ *
+ * Major code contributions by Peter Somogyi <psomogyi@gamax.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <assert.h>
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/dlinklist.h"
+#include "lib/util/debug.h"
+
+#include "common/logging.h"
+
+#include <rdma/rdma_cma_abi.h>
+#include <rdma/rdma_cma.h>
+
+#include "ibwrapper.h"
+#include "ibwrapper_internal.h"
+
+#define IBW_LASTERR_BUFSIZE 512
+static char ibw_lasterr[IBW_LASTERR_BUFSIZE];
+
+#define IBW_MAX_SEND_WR 256
+#define IBW_MAX_RECV_WR 1024
+#define IBW_RECV_BUFSIZE 256
+#define IBW_RECV_THRESHOLD (1 * 1024 * 1024)
+
+static void ibw_event_handler_verbs(struct tevent_context *ev,
+ struct tevent_fd *fde, uint16_t flags, void *private_data);
+static int ibw_fill_cq(struct ibw_conn *conn);
+static int ibw_wc_recv(struct ibw_conn *conn, struct ibv_wc *wc);
+static int ibw_wc_send(struct ibw_conn *conn, struct ibv_wc *wc);
+static int ibw_send_packet(struct ibw_conn *conn, void *buf, struct ibw_wr *p, uint32_t len);
+
+static void *ibw_alloc_mr(struct ibw_ctx_priv *pctx, struct ibw_conn_priv *pconn,
+ uint32_t n, struct ibv_mr **ppmr)
+{
+ void *buf;
+
+ DEBUG(DEBUG_DEBUG, ("ibw_alloc_mr(cmid=%p, n=%u)\n", pconn->cm_id, n));
+ buf = memalign(pctx->pagesize, n);
+ if (!buf) {
+ sprintf(ibw_lasterr, "couldn't allocate memory\n");
+ return NULL;
+ }
+
+ *ppmr = ibv_reg_mr(pconn->pd, buf, n, IBV_ACCESS_LOCAL_WRITE);
+ if (!*ppmr) {
+ sprintf(ibw_lasterr, "couldn't allocate mr\n");
+ free(buf);
+ return NULL;
+ }
+
+ return buf;
+}
+
+static void ibw_free_mr(char **ppbuf, struct ibv_mr **ppmr)
+{
+ DEBUG(DEBUG_DEBUG, ("ibw_free_mr(%p %p)\n", *ppbuf, *ppmr));
+ if (*ppmr!=NULL) {
+ ibv_dereg_mr(*ppmr);
+ *ppmr = NULL;
+ }
+ if (*ppbuf) {
+ free(*ppbuf);
+ *ppbuf = NULL;
+ }
+}
+
+static int ibw_init_memory(struct ibw_conn *conn)
+{
+ struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv);
+ struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
+ struct ibw_opts *opts = &pctx->opts;
+ int i;
+ struct ibw_wr *p;
+
+ DEBUG(DEBUG_DEBUG, ("ibw_init_memory(cmid: %p)\n", pconn->cm_id));
+ pconn->buf_send = ibw_alloc_mr(pctx, pconn,
+ opts->max_send_wr * opts->recv_bufsize, &pconn->mr_send);
+ if (!pconn->buf_send) {
+ sprintf(ibw_lasterr, "couldn't allocate work send buf\n");
+ return -1;
+ }
+
+ pconn->buf_recv = ibw_alloc_mr(pctx, pconn,
+ opts->max_recv_wr * opts->recv_bufsize, &pconn->mr_recv);
+ if (!pconn->buf_recv) {
+ sprintf(ibw_lasterr, "couldn't allocate work recv buf\n");
+ return -1;
+ }
+
+ pconn->wr_index = talloc_size(pconn, opts->max_send_wr * sizeof(struct ibw_wr *));
+ assert(pconn->wr_index!=NULL);
+
+ for(i=0; i<opts->max_send_wr; i++) {
+ p = pconn->wr_index[i] = talloc_zero(pconn, struct ibw_wr);
+ p->buf = pconn->buf_send + (i * opts->recv_bufsize);
+ p->wr_id = i;
+
+ DLIST_ADD(pconn->wr_list_avail, p);
+ }
+
+ return 0;
+}
+
+static int ibw_ctx_priv_destruct(struct ibw_ctx_priv *pctx)
+{
+ DEBUG(DEBUG_DEBUG, ("ibw_ctx_priv_destruct(%p)\n", pctx));
+
+ /*
+ * tevent_fd must be removed before the fd is closed
+ */
+ TALLOC_FREE(pctx->cm_channel_event);
+
+ /* destroy cm */
+ if (pctx->cm_channel) {
+ rdma_destroy_event_channel(pctx->cm_channel);
+ pctx->cm_channel = NULL;
+ }
+ if (pctx->cm_id) {
+ rdma_destroy_id(pctx->cm_id);
+ pctx->cm_id = NULL;
+ }
+
+ return 0;
+}
+
+static int ibw_ctx_destruct(struct ibw_ctx *ctx)
+{
+ DEBUG(DEBUG_DEBUG, ("ibw_ctx_destruct(%p)\n", ctx));
+ return 0;
+}
+
+static int ibw_conn_priv_destruct(struct ibw_conn_priv *pconn)
+{
+ DEBUG(DEBUG_DEBUG, ("ibw_conn_priv_destruct(%p, cmid: %p)\n",
+ pconn, pconn->cm_id));
+
+ /* pconn->wr_index is freed by talloc */
+ /* pconn->wr_index[i] are freed by talloc */
+
+ /*
+ * tevent_fd must be removed before the fd is closed
+ */
+ TALLOC_FREE(pconn->verbs_channel_event);
+
+ /* destroy verbs */
+ if (pconn->cm_id!=NULL && pconn->cm_id->qp!=NULL) {
+ rdma_destroy_qp(pconn->cm_id);
+ pconn->cm_id->qp = NULL;
+ }
+
+ if (pconn->cq!=NULL) {
+ ibv_destroy_cq(pconn->cq);
+ pconn->cq = NULL;
+ }
+
+ if (pconn->verbs_channel!=NULL) {
+ ibv_destroy_comp_channel(pconn->verbs_channel);
+ pconn->verbs_channel = NULL;
+ }
+
+ /* free memory regions */
+ ibw_free_mr(&pconn->buf_send, &pconn->mr_send);
+ ibw_free_mr(&pconn->buf_recv, &pconn->mr_recv);
+
+ if (pconn->pd) {
+ ibv_dealloc_pd(pconn->pd);
+ pconn->pd = NULL;
+ DEBUG(DEBUG_DEBUG, ("pconn=%p pd deallocated\n", pconn));
+ }
+
+ if (pconn->cm_id) {
+ rdma_destroy_id(pconn->cm_id);
+ pconn->cm_id = NULL;
+ DEBUG(DEBUG_DEBUG, ("pconn=%p cm_id destroyed\n", pconn));
+ }
+
+ return 0;
+}
+
+static int ibw_wr_destruct(struct ibw_wr *wr)
+{
+ if (wr->buf_large!=NULL)
+ ibw_free_mr(&wr->buf_large, &wr->mr_large);
+ return 0;
+}
+
+static int ibw_conn_destruct(struct ibw_conn *conn)
+{
+ DEBUG(DEBUG_DEBUG, ("ibw_conn_destruct(%p)\n", conn));
+
+ /* important here: ctx is a talloc _parent_ */
+ DLIST_REMOVE(conn->ctx->conn_list, conn);
+ return 0;
+}
+
+struct ibw_conn *ibw_conn_new(struct ibw_ctx *ctx, TALLOC_CTX *mem_ctx)
+{
+ struct ibw_conn *conn;
+ struct ibw_conn_priv *pconn;
+
+ assert(ctx!=NULL);
+
+ conn = talloc_zero(mem_ctx, struct ibw_conn);
+ assert(conn!=NULL);
+ talloc_set_destructor(conn, ibw_conn_destruct);
+
+ pconn = talloc_zero(conn, struct ibw_conn_priv);
+ assert(pconn!=NULL);
+ talloc_set_destructor(pconn, ibw_conn_priv_destruct);
+
+ conn->ctx = ctx;
+ conn->internal = (void *)pconn;
+
+ DLIST_ADD(ctx->conn_list, conn);
+
+ return conn;
+}
+
+static int ibw_setup_cq_qp(struct ibw_conn *conn)
+{
+ struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv);
+ struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
+ struct ibv_qp_init_attr init_attr;
+ struct ibv_qp_attr attr;
+ int rc;
+
+ DEBUG(DEBUG_DEBUG, ("ibw_setup_cq_qp(cmid: %p)\n", pconn->cm_id));
+
+ /* init verbs */
+ pconn->verbs_channel = ibv_create_comp_channel(pconn->cm_id->verbs);
+ if (!pconn->verbs_channel) {
+ sprintf(ibw_lasterr, "ibv_create_comp_channel failed %d\n", errno);
+ return -1;
+ }
+ DEBUG(DEBUG_DEBUG, ("created channel %p\n", pconn->verbs_channel));
+
+ pconn->verbs_channel_event = tevent_add_fd(pctx->ectx, NULL, /* not pconn or conn */
+ pconn->verbs_channel->fd, TEVENT_FD_READ, ibw_event_handler_verbs, conn);
+
+ pconn->pd = ibv_alloc_pd(pconn->cm_id->verbs);
+ if (!pconn->pd) {
+ sprintf(ibw_lasterr, "ibv_alloc_pd failed %d\n", errno);
+ return -1;
+ }
+ DEBUG(DEBUG_DEBUG, ("created pd %p\n", pconn->pd));
+
+ /* init mr */
+ if (ibw_init_memory(conn))
+ return -1;
+
+ /* init cq */
+ pconn->cq = ibv_create_cq(pconn->cm_id->verbs,
+ pctx->opts.max_recv_wr + pctx->opts.max_send_wr,
+ conn, pconn->verbs_channel, 0);
+ if (pconn->cq==NULL) {
+ sprintf(ibw_lasterr, "ibv_create_cq failed\n");
+ return -1;
+ }
+
+ rc = ibv_req_notify_cq(pconn->cq, 0);
+ if (rc) {
+ sprintf(ibw_lasterr, "ibv_req_notify_cq failed with %d\n", rc);
+ return rc;
+ }
+
+ /* init qp */
+ memset(&init_attr, 0, sizeof(init_attr));
+ init_attr.cap.max_send_wr = pctx->opts.max_send_wr;
+ init_attr.cap.max_recv_wr = pctx->opts.max_recv_wr;
+ init_attr.cap.max_recv_sge = 1;
+ init_attr.cap.max_send_sge = 1;
+ init_attr.qp_type = IBV_QPT_RC;
+ init_attr.send_cq = pconn->cq;
+ init_attr.recv_cq = pconn->cq;
+
+ rc = rdma_create_qp(pconn->cm_id, pconn->pd, &init_attr);
+ if (rc) {
+ sprintf(ibw_lasterr, "rdma_create_qp failed with %d\n", rc);
+ return rc;
+ }
+ /* elase result is in pconn->cm_id->qp */
+
+ rc = ibv_query_qp(pconn->cm_id->qp, &attr, IBV_QP_PATH_MTU, &init_attr);
+ if (rc) {
+ sprintf(ibw_lasterr, "ibv_query_qp failed with %d\n", rc);
+ return rc;
+ }
+
+ return ibw_fill_cq(conn);
+}
+
+static int ibw_refill_cq_recv(struct ibw_conn *conn)
+{
+ struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv);
+ struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
+ int rc;
+ struct ibv_sge list = {
+ .addr = (uintptr_t) NULL, /* filled below */
+ .length = pctx->opts.recv_bufsize,
+ .lkey = pconn->mr_recv->lkey /* always the same */
+ };
+ struct ibv_recv_wr wr = {
+ .wr_id = 0, /* filled below */
+ .sg_list = &list,
+ .num_sge = 1,
+ };
+ struct ibv_recv_wr *bad_wr;
+
+ DEBUG(DEBUG_DEBUG, ("ibw_refill_cq_recv(cmid: %p)\n", pconn->cm_id));
+
+ list.addr = (uintptr_t) pconn->buf_recv + pctx->opts.recv_bufsize * pconn->recv_index;
+ wr.wr_id = pconn->recv_index;
+ pconn->recv_index = (pconn->recv_index + 1) % pctx->opts.max_recv_wr;
+
+ rc = ibv_post_recv(pconn->cm_id->qp, &wr, &bad_wr);
+ if (rc) {
+ sprintf(ibw_lasterr, "refill/ibv_post_recv failed with %d\n", rc);
+ DEBUG(DEBUG_ERR, ("%s", ibw_lasterr));
+ return -2;
+ }
+
+ return 0;
+}
+
+static int ibw_fill_cq(struct ibw_conn *conn)
+{
+ struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv);
+ struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
+ int i, rc;
+ struct ibv_sge list = {
+ .addr = (uintptr_t) NULL, /* filled below */
+ .length = pctx->opts.recv_bufsize,
+ .lkey = pconn->mr_recv->lkey /* always the same */
+ };
+ struct ibv_recv_wr wr = {
+ .wr_id = 0, /* filled below */
+ .sg_list = &list,
+ .num_sge = 1,
+ };
+ struct ibv_recv_wr *bad_wr;
+
+ DEBUG(DEBUG_DEBUG, ("ibw_fill_cq(cmid: %p)\n", pconn->cm_id));
+
+ for(i = pctx->opts.max_recv_wr; i!=0; i--) {
+ list.addr = (uintptr_t) pconn->buf_recv + pctx->opts.recv_bufsize * pconn->recv_index;
+ wr.wr_id = pconn->recv_index;
+ pconn->recv_index = (pconn->recv_index + 1) % pctx->opts.max_recv_wr;
+
+ rc = ibv_post_recv(pconn->cm_id->qp, &wr, &bad_wr);
+ if (rc) {
+ sprintf(ibw_lasterr, "fill/ibv_post_recv failed with %d\n", rc);
+ DEBUG(DEBUG_ERR, ("%s", ibw_lasterr));
+ return -2;
+ }
+ }
+
+ return 0;
+}
+
+static int ibw_manage_connect(struct ibw_conn *conn)
+{
+ struct rdma_conn_param conn_param;
+ struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
+ int rc;
+
+ DEBUG(DEBUG_DEBUG, ("ibw_manage_connect(cmid: %p)\n", pconn->cm_id));
+
+ if (ibw_setup_cq_qp(conn))
+ return -1;
+
+ /* cm connect */
+ memset(&conn_param, 0, sizeof conn_param);
+ conn_param.responder_resources = 1;
+ conn_param.initiator_depth = 1;
+ conn_param.retry_count = 10;
+
+ rc = rdma_connect(pconn->cm_id, &conn_param);
+ if (rc)
+ sprintf(ibw_lasterr, "rdma_connect error %d\n", rc);
+
+ return rc;
+}
+
+static void ibw_event_handler_cm(struct tevent_context *ev,
+ struct tevent_fd *fde, uint16_t flags, void *private_data)
+{
+ int rc;
+ struct ibw_ctx *ctx = talloc_get_type(private_data, struct ibw_ctx);
+ struct ibw_ctx_priv *pctx = talloc_get_type(ctx->internal, struct ibw_ctx_priv);
+ struct ibw_conn *conn = NULL;
+ struct ibw_conn_priv *pconn = NULL;
+ struct rdma_cm_id *cma_id = NULL;
+ struct rdma_cm_event *event = NULL;
+
+ assert(ctx!=NULL);
+
+ rc = rdma_get_cm_event(pctx->cm_channel, &event);
+ if (rc) {
+ ctx->state = IBWS_ERROR;
+ event = NULL;
+ sprintf(ibw_lasterr, "rdma_get_cm_event error %d\n", rc);
+ goto error;
+ }
+ cma_id = event->id;
+
+ DEBUG(DEBUG_DEBUG, ("cma_event type %d cma_id %p (%s)\n", event->event, cma_id,
+ (cma_id == pctx->cm_id) ? "parent" : "child"));
+
+ switch (event->event) {
+ case RDMA_CM_EVENT_ADDR_RESOLVED:
+ DEBUG(DEBUG_DEBUG, ("RDMA_CM_EVENT_ADDR_RESOLVED\n"));
+ /* continuing from ibw_connect ... */
+ rc = rdma_resolve_route(cma_id, 2000);
+ if (rc) {
+ sprintf(ibw_lasterr, "rdma_resolve_route error %d\n", rc);
+ goto error;
+ }
+ /* continued at RDMA_CM_EVENT_ROUTE_RESOLVED */
+ break;
+
+ case RDMA_CM_EVENT_ROUTE_RESOLVED:
+ DEBUG(DEBUG_DEBUG, ("RDMA_CM_EVENT_ROUTE_RESOLVED\n"));
+ /* after RDMA_CM_EVENT_ADDR_RESOLVED: */
+ assert(cma_id->context!=NULL);
+ conn = talloc_get_type(cma_id->context, struct ibw_conn);
+
+ rc = ibw_manage_connect(conn);
+ if (rc)
+ goto error;
+
+ break;
+
+ case RDMA_CM_EVENT_CONNECT_REQUEST:
+ DEBUG(DEBUG_DEBUG, ("RDMA_CM_EVENT_CONNECT_REQUEST\n"));
+ ctx->state = IBWS_CONNECT_REQUEST;
+ conn = ibw_conn_new(ctx, ctx);
+ pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
+ pconn->cm_id = cma_id; /* !!! event will be freed but id not */
+ cma_id->context = (void *)conn;
+ DEBUG(DEBUG_DEBUG, ("pconn->cm_id %p\n", pconn->cm_id));
+
+ if (ibw_setup_cq_qp(conn))
+ goto error;
+
+ conn->state = IBWC_INIT;
+ pctx->connstate_func(ctx, conn);
+
+ /* continued at ibw_accept when invoked by the func above */
+ if (!pconn->is_accepted) {
+ rc = rdma_reject(cma_id, NULL, 0);
+ if (rc)
+ DEBUG(DEBUG_ERR, ("rdma_reject failed with rc=%d\n", rc));
+ talloc_free(conn);
+ DEBUG(DEBUG_DEBUG, ("pconn->cm_id %p wasn't accepted\n", pconn->cm_id));
+ }
+
+ /* TODO: clarify whether if it's needed by upper layer: */
+ ctx->state = IBWS_READY;
+ pctx->connstate_func(ctx, NULL);
+
+ /* NOTE: more requests can arrive until RDMA_CM_EVENT_ESTABLISHED ! */
+ break;
+
+ case RDMA_CM_EVENT_ESTABLISHED:
+ /* expected after ibw_accept and ibw_connect[not directly] */
+ DEBUG(DEBUG_INFO, ("ESTABLISHED (conn: %p)\n", cma_id->context));
+ conn = talloc_get_type(cma_id->context, struct ibw_conn);
+ assert(conn!=NULL); /* important assumption */
+
+ DEBUG(DEBUG_DEBUG, ("ibw_setup_cq_qp succeeded (cmid=%p)\n", cma_id));
+
+ /* client conn is up */
+ conn->state = IBWC_CONNECTED;
+
+ /* both ctx and conn have changed */
+ pctx->connstate_func(ctx, conn);
+ break;
+
+ case RDMA_CM_EVENT_ADDR_ERROR:
+ sprintf(ibw_lasterr, "RDMA_CM_EVENT_ADDR_ERROR, error %d\n", event->status);
+ goto error;
+ case RDMA_CM_EVENT_ROUTE_ERROR:
+ sprintf(ibw_lasterr, "RDMA_CM_EVENT_ROUTE_ERROR, error %d\n", event->status);
+ goto error;
+ case RDMA_CM_EVENT_CONNECT_ERROR:
+ sprintf(ibw_lasterr, "RDMA_CM_EVENT_CONNECT_ERROR, error %d\n", event->status);
+ goto error;
+ case RDMA_CM_EVENT_UNREACHABLE:
+ sprintf(ibw_lasterr, "RDMA_CM_EVENT_UNREACHABLE, error %d\n", event->status);
+ goto error;
+ case RDMA_CM_EVENT_REJECTED:
+ sprintf(ibw_lasterr, "RDMA_CM_EVENT_REJECTED, error %d\n", event->status);
+ DEBUG(DEBUG_INFO, ("cm event handler: %s", ibw_lasterr));
+ conn = talloc_get_type(cma_id->context, struct ibw_conn);
+ if (conn) {
+ /* must be done BEFORE connstate */
+ if ((rc=rdma_ack_cm_event(event)))
+ DEBUG(DEBUG_ERR, ("reject/rdma_ack_cm_event failed with %d\n", rc));
+ event = NULL; /* not to touch cma_id or conn */
+ conn->state = IBWC_ERROR;
+ /* it should free the conn */
+ pctx->connstate_func(NULL, conn);
+ }
+ break; /* this is not strictly an error */
+
+ case RDMA_CM_EVENT_DISCONNECTED:
+ DEBUG(DEBUG_DEBUG, ("RDMA_CM_EVENT_DISCONNECTED\n"));
+ if ((rc=rdma_ack_cm_event(event)))
+ DEBUG(DEBUG_ERR, ("disc/rdma_ack_cm_event failed with %d\n", rc));
+ event = NULL; /* don't ack more */
+
+ if (cma_id!=pctx->cm_id) {
+ DEBUG(DEBUG_ERR, ("client DISCONNECT event cm_id=%p\n", cma_id));
+ conn = talloc_get_type(cma_id->context, struct ibw_conn);
+ conn->state = IBWC_DISCONNECTED;
+ pctx->connstate_func(NULL, conn);
+ }
+ break;
+
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:
+ sprintf(ibw_lasterr, "cma detected device removal!\n");
+ goto error;
+
+ default:
+ sprintf(ibw_lasterr, "unknown event %d\n", event->event);
+ goto error;
+ }
+
+ if (event!=NULL && (rc=rdma_ack_cm_event(event))) {
+ sprintf(ibw_lasterr, "rdma_ack_cm_event failed with %d\n", rc);
+ goto error;
+ }
+
+ return;
+error:
+ DEBUG(DEBUG_ERR, ("cm event handler: %s", ibw_lasterr));
+
+ if (event!=NULL) {
+ if (cma_id!=NULL && cma_id!=pctx->cm_id) {
+ conn = talloc_get_type(cma_id->context, struct ibw_conn);
+ if (conn) {
+ conn->state = IBWC_ERROR;
+ pctx->connstate_func(NULL, conn);
+ }
+ } else {
+ ctx->state = IBWS_ERROR;
+ pctx->connstate_func(ctx, NULL);
+ }
+
+ if ((rc=rdma_ack_cm_event(event))!=0) {
+ DEBUG(DEBUG_ERR, ("rdma_ack_cm_event failed with %d\n", rc));
+ }
+ }
+
+ return;
+}
+
+static void ibw_event_handler_verbs(struct tevent_context *ev,
+ struct tevent_fd *fde, uint16_t flags, void *private_data)
+{
+ struct ibw_conn *conn = talloc_get_type(private_data, struct ibw_conn);
+ struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
+ struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv);
+
+ struct ibv_wc wc;
+ int rc;
+ struct ibv_cq *ev_cq;
+ void *ev_ctx;
+
+ DEBUG(DEBUG_DEBUG, ("ibw_event_handler_verbs(%u)\n", (uint32_t)flags));
+
+ /* TODO: check whether if it's good to have more channels here... */
+ rc = ibv_get_cq_event(pconn->verbs_channel, &ev_cq, &ev_ctx);
+ if (rc) {
+ sprintf(ibw_lasterr, "Failed to get cq_event with %d\n", rc);
+ goto error;
+ }
+ if (ev_cq != pconn->cq) {
+ sprintf(ibw_lasterr, "ev_cq(%p) != pconn->cq(%p)\n", ev_cq, pconn->cq);
+ goto error;
+ }
+ rc = ibv_req_notify_cq(pconn->cq, 0);
+ if (rc) {
+ sprintf(ibw_lasterr, "Couldn't request CQ notification (%d)\n", rc);
+ goto error;
+ }
+
+ while((rc=ibv_poll_cq(pconn->cq, 1, &wc))==1) {
+ if (wc.status) {
+ sprintf(ibw_lasterr, "cq completion failed status=%d, opcode=%d, rc=%d\n",
+ wc.status, wc.opcode, rc);
+ goto error;
+ }
+
+ switch(wc.opcode) {
+ case IBV_WC_SEND:
+ DEBUG(DEBUG_DEBUG, ("send completion\n"));
+ if (ibw_wc_send(conn, &wc))
+ goto error;
+ break;
+
+ case IBV_WC_RDMA_WRITE:
+ DEBUG(DEBUG_DEBUG, ("rdma write completion\n"));
+ break;
+
+ case IBV_WC_RDMA_READ:
+ DEBUG(DEBUG_DEBUG, ("rdma read completion\n"));
+ break;
+
+ case IBV_WC_RECV:
+ DEBUG(DEBUG_DEBUG, ("recv completion\n"));
+ if (ibw_wc_recv(conn, &wc))
+ goto error;
+ break;
+
+ default:
+ sprintf(ibw_lasterr, "unknown completion %d\n", wc.opcode);
+ goto error;
+ }
+ }
+ if (rc!=0) {
+ sprintf(ibw_lasterr, "ibv_poll_cq error %d\n", rc);
+ goto error;
+ }
+
+ ibv_ack_cq_events(pconn->cq, 1);
+
+ return;
+error:
+ ibv_ack_cq_events(pconn->cq, 1);
+
+ DEBUG(DEBUG_ERR, ("%s", ibw_lasterr));
+
+ if (conn->state!=IBWC_ERROR) {
+ conn->state = IBWC_ERROR;
+ pctx->connstate_func(NULL, conn);
+ }
+}
+
+static int ibw_process_queue(struct ibw_conn *conn)
+{
+ struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
+ struct ibw_ctx_priv *pctx;
+ struct ibw_wr *p;
+ int rc;
+ uint32_t msg_size;
+
+ if (pconn->queue==NULL)
+ return 0; /* NOP */
+
+ p = pconn->queue;
+
+ /* we must have at least 1 fragment to send */
+ assert(p->queued_ref_cnt>0);
+ p->queued_ref_cnt--;
+
+ pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv);
+ msg_size = (p->queued_ref_cnt) ? pctx->opts.recv_bufsize : p->queued_rlen;
+
+ assert(p->queued_msg!=NULL);
+ assert(msg_size!=0);
+
+ DEBUG(DEBUG_DEBUG, ("ibw_process_queue refcnt=%d msgsize=%u\n",
+ p->queued_ref_cnt, msg_size));
+
+ rc = ibw_send_packet(conn, p->queued_msg, p, msg_size);
+
+ /* was this the last fragment? */
+ if (p->queued_ref_cnt) {
+ p->queued_msg += pctx->opts.recv_bufsize;
+ } else {
+ DLIST_REMOVE2(pconn->queue, p, qprev, qnext);
+ p->queued_msg = NULL;
+ }
+
+ return rc;
+}
+
+static int ibw_wc_send(struct ibw_conn *conn, struct ibv_wc *wc)
+{
+ struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv);
+ struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
+ struct ibw_wr *p;
+ int send_index;
+
+ DEBUG(DEBUG_DEBUG, ("ibw_wc_send(cmid: %p, wr_id: %u, bl: %u)\n",
+ pconn->cm_id, (uint32_t)wc->wr_id, (uint32_t)wc->byte_len));
+
+ assert(pconn->cm_id->qp->qp_num==wc->qp_num);
+ assert(wc->wr_id >= pctx->opts.max_recv_wr);
+ send_index = wc->wr_id - pctx->opts.max_recv_wr;
+ pconn->wr_sent--;
+
+ if (send_index < pctx->opts.max_send_wr) {
+ DEBUG(DEBUG_DEBUG, ("ibw_wc_send#1 %u\n", (int)wc->wr_id));
+ p = pconn->wr_index[send_index];
+ if (p->buf_large!=NULL) {
+ if (p->ref_cnt) {
+ /* awaiting more of it... */
+ p->ref_cnt--;
+ } else {
+ ibw_free_mr(&p->buf_large, &p->mr_large);
+ DLIST_REMOVE(pconn->wr_list_used, p);
+ DLIST_ADD(pconn->wr_list_avail, p);
+ }
+ } else { /* nasty - but necessary */
+ DLIST_REMOVE(pconn->wr_list_used, p);
+ DLIST_ADD(pconn->wr_list_avail, p);
+ }
+ } else { /* "extra" request - not optimized */
+ DEBUG(DEBUG_DEBUG, ("ibw_wc_send#2 %u\n", (int)wc->wr_id));
+ for(p=pconn->extra_sent; p!=NULL; p=p->next)
+ if ((p->wr_id + pctx->opts.max_recv_wr)==(int)wc->wr_id)
+ break;
+ if (p==NULL) {
+ sprintf(ibw_lasterr, "failed to find wr_id %d\n", (int)wc->wr_id);
+ return -1;
+ }
+ if (p->ref_cnt) {
+ p->ref_cnt--;
+ } else {
+ ibw_free_mr(&p->buf_large, &p->mr_large);
+ DLIST_REMOVE(pconn->extra_sent, p);
+ DLIST_ADD(pconn->extra_avail, p);
+ }
+ }
+
+ return ibw_process_queue(conn);
+}
+
+static int ibw_append_to_part(struct ibw_conn_priv *pconn,
+ struct ibw_part *part, char **pp, uint32_t add_len, int info)
+{
+ DEBUG(DEBUG_DEBUG, ("ibw_append_to_part: cmid=%p, (bs=%u, len=%u, tr=%u), al=%u, i=%u\n",
+ pconn->cm_id, part->bufsize, part->len, part->to_read, add_len, info));
+
+ /* allocate more if necessary - it's an "evergrowing" buffer... */
+ if (part->len + add_len > part->bufsize) {
+ if (part->buf==NULL) {
+ assert(part->len==0);
+ part->buf = talloc_size(pconn, add_len);
+ if (part->buf==NULL) {
+ sprintf(ibw_lasterr, "recv talloc_size error (%u) #%d\n",
+ add_len, info);
+ return -1;
+ }
+ part->bufsize = add_len;
+ } else {
+ part->buf = talloc_realloc_size(pconn,
+ part->buf, part->len + add_len);
+ if (part->buf==NULL) {
+ sprintf(ibw_lasterr, "recv realloc error (%u + %u) #%d\n",
+ part->len, add_len, info);
+ return -1;
+ }
+ }
+ part->bufsize = part->len + add_len;
+ }
+
+ /* consume pp */
+ memcpy(part->buf + part->len, *pp, add_len);
+ *pp += add_len;
+ part->len += add_len;
+ part->to_read -= add_len;
+
+ return 0;
+}
+
+static int ibw_wc_mem_threshold(struct ibw_conn_priv *pconn,
+ struct ibw_part *part, uint32_t threshold)
+{
+ DEBUG(DEBUG_DEBUG, ("ibw_wc_mem_threshold: cmid=%p, (bs=%u, len=%u, tr=%u), thr=%u\n",
+ pconn->cm_id, part->bufsize, part->len, part->to_read, threshold));
+
+ if (part->bufsize > threshold) {
+ DEBUG(DEBUG_DEBUG, ("ibw_wc_mem_threshold: cmid=%p, %u > %u\n",
+ pconn->cm_id, part->bufsize, threshold));
+ talloc_free(part->buf);
+ part->buf = talloc_size(pconn, threshold);
+ if (part->buf==NULL) {
+ sprintf(ibw_lasterr, "talloc_size failed\n");
+ return -1;
+ }
+ part->bufsize = threshold;
+ }
+ return 0;
+}
+
+static int ibw_wc_recv(struct ibw_conn *conn, struct ibv_wc *wc)
+{
+ struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv);
+ struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
+ struct ibw_part *part = &pconn->part;
+ char *p;
+ uint32_t remain = wc->byte_len;
+
+ DEBUG(DEBUG_DEBUG, ("ibw_wc_recv: cmid=%p, wr_id: %u, bl: %u\n",
+ pconn->cm_id, (uint32_t)wc->wr_id, remain));
+
+ assert(pconn->cm_id->qp->qp_num==wc->qp_num);
+ assert((int)wc->wr_id < pctx->opts.max_recv_wr);
+ assert(wc->byte_len <= pctx->opts.recv_bufsize);
+
+ p = pconn->buf_recv + ((int)wc->wr_id * pctx->opts.recv_bufsize);
+
+ while(remain) {
+ /* here always true: (part->len!=0 && part->to_read!=0) ||
+ (part->len==0 && part->to_read==0) */
+ if (part->len) { /* is there a partial msg to be continued? */
+ int read_len = (part->to_read<=remain) ? part->to_read : remain;
+ if (ibw_append_to_part(pconn, part, &p, read_len, 421))
+ goto error;
+ remain -= read_len;
+
+ if (part->len<=sizeof(uint32_t) && part->to_read==0) {
+ assert(part->len==sizeof(uint32_t));
+ /* set it again now... */
+ part->to_read = *((uint32_t *)(part->buf)); /* TODO: ntohl */
+ if (part->to_read<sizeof(uint32_t)) {
+ sprintf(ibw_lasterr, "got msglen=%u #2\n", part->to_read);
+ goto error;
+ }
+ part->to_read -= sizeof(uint32_t); /* it's already read */
+ }
+
+ if (part->to_read==0) {
+ if (pctx->receive_func(conn, part->buf, part->len) != 0) {
+ goto error;
+ }
+ part->len = 0; /* tells not having partial data (any more) */
+ if (ibw_wc_mem_threshold(pconn, part, pctx->opts.recv_threshold))
+ goto error;
+ }
+ } else {
+ if (remain>=sizeof(uint32_t)) {
+ uint32_t msglen = *(uint32_t *)p; /* TODO: ntohl */
+ if (msglen<sizeof(uint32_t)) {
+ sprintf(ibw_lasterr, "got msglen=%u\n", msglen);
+ goto error;
+ }
+
+ /* mostly awaited case: */
+ if (msglen<=remain) {
+ if (pctx->receive_func(conn, p, msglen) != 0) {
+ goto error;
+ }
+ p += msglen;
+ remain -= msglen;
+ } else {
+ part->to_read = msglen;
+ /* part->len is already 0 */
+ if (ibw_append_to_part(pconn, part, &p, remain, 422))
+ goto error;
+ remain = 0; /* to be continued ... */
+ /* part->to_read > 0 here */
+ }
+ } else { /* edge case: */
+ part->to_read = sizeof(uint32_t);
+ /* part->len is already 0 */
+ if (ibw_append_to_part(pconn, part, &p, remain, 423))
+ goto error;
+ remain = 0;
+ /* part->to_read > 0 here */
+ }
+ }
+ } /* <remain> is always decreased at least by 1 */
+
+ if (ibw_refill_cq_recv(conn))
+ goto error;
+
+ return 0;
+
+error:
+ DEBUG(DEBUG_ERR, ("ibw_wc_recv error: %s", ibw_lasterr));
+ return -1;
+}
+
+static int ibw_process_init_attrs(struct ibw_initattr *attr, int nattr, struct ibw_opts *opts)
+{
+ int i;
+ const char *name, *value;
+
+ DEBUG(DEBUG_DEBUG, ("ibw_process_init_attrs: nattr: %d\n", nattr));
+
+ opts->max_send_wr = IBW_MAX_SEND_WR;
+ opts->max_recv_wr = IBW_MAX_RECV_WR;
+ opts->recv_bufsize = IBW_RECV_BUFSIZE;
+ opts->recv_threshold = IBW_RECV_THRESHOLD;
+
+ for(i=0; i<nattr; i++) {
+ name = attr[i].name;
+ value = attr[i].value;
+
+ assert(name!=NULL && value!=NULL);
+ if (strcmp(name, "max_send_wr")==0)
+ opts->max_send_wr = atoi(value);
+ else if (strcmp(name, "max_recv_wr")==0)
+ opts->max_recv_wr = atoi(value);
+ else if (strcmp(name, "recv_bufsize")==0)
+ opts->recv_bufsize = atoi(value);
+ else if (strcmp(name, "recv_threshold")==0)
+ opts->recv_threshold = atoi(value);
+ else {
+ sprintf(ibw_lasterr, "ibw_init: unknown name %s\n", name);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+struct ibw_ctx *ibw_init(struct ibw_initattr *attr, int nattr,
+ void *ctx_userdata,
+ ibw_connstate_fn_t ibw_connstate,
+ ibw_receive_fn_t ibw_receive,
+ struct tevent_context *ectx)
+{
+ struct ibw_ctx *ctx = talloc_zero(NULL, struct ibw_ctx);
+ struct ibw_ctx_priv *pctx;
+ int rc;
+
+ DEBUG(DEBUG_DEBUG, ("ibw_init(ctx_userdata: %p, ectx: %p)\n", ctx_userdata, ectx));
+
+ /* initialize basic data structures */
+ memset(ibw_lasterr, 0, IBW_LASTERR_BUFSIZE);
+
+ assert(ctx!=NULL);
+ ibw_lasterr[0] = '\0';
+ talloc_set_destructor(ctx, ibw_ctx_destruct);
+ ctx->ctx_userdata = ctx_userdata;
+
+ pctx = talloc_zero(ctx, struct ibw_ctx_priv);
+ talloc_set_destructor(pctx, ibw_ctx_priv_destruct);
+ ctx->internal = (void *)pctx;
+ assert(pctx!=NULL);
+
+ pctx->connstate_func = ibw_connstate;
+ pctx->receive_func = ibw_receive;
+
+ pctx->ectx = ectx;
+
+ /* process attributes */
+ if (ibw_process_init_attrs(attr, nattr, &pctx->opts))
+ goto cleanup;
+
+ /* init cm */
+ pctx->cm_channel = rdma_create_event_channel();
+ if (!pctx->cm_channel) {
+ sprintf(ibw_lasterr, "rdma_create_event_channel error %d\n", errno);
+ goto cleanup;
+ }
+
+ pctx->cm_channel_event = tevent_add_fd(pctx->ectx, pctx,
+ pctx->cm_channel->fd, TEVENT_FD_READ, ibw_event_handler_cm, ctx);
+
+#if RDMA_USER_CM_MAX_ABI_VERSION >= 2
+ rc = rdma_create_id(pctx->cm_channel, &pctx->cm_id, ctx, RDMA_PS_TCP);
+#else
+ rc = rdma_create_id(pctx->cm_channel, &pctx->cm_id, ctx);
+#endif
+ if (rc) {
+ rc = errno;
+ sprintf(ibw_lasterr, "rdma_create_id error %d\n", rc);
+ goto cleanup;
+ }
+ DEBUG(DEBUG_DEBUG, ("created cm_id %p\n", pctx->cm_id));
+
+ pctx->pagesize = sysconf(_SC_PAGESIZE);
+
+ return ctx;
+ /* don't put code here */
+cleanup:
+ DEBUG(DEBUG_ERR, ("%s", ibw_lasterr));
+
+ if (ctx)
+ talloc_free(ctx);
+
+ return NULL;
+}
+
+int ibw_stop(struct ibw_ctx *ctx)
+{
+ struct ibw_ctx_priv *pctx = (struct ibw_ctx_priv *)ctx->internal;
+ struct ibw_conn *p;
+
+ DEBUG(DEBUG_DEBUG, ("ibw_stop\n"));
+
+ for(p=ctx->conn_list; p!=NULL; p=p->next) {
+ if (p->state==IBWC_ERROR || p->state==IBWC_CONNECTED) {
+ if (ibw_disconnect(p))
+ return -1;
+ }
+ }
+
+ ctx->state = IBWS_STOPPED;
+ pctx->connstate_func(ctx, NULL);
+
+ return 0;
+}
+
+int ibw_bind(struct ibw_ctx *ctx, struct sockaddr_in *my_addr)
+{
+ struct ibw_ctx_priv *pctx = (struct ibw_ctx_priv *)ctx->internal;
+ int rc;
+
+ DEBUG(DEBUG_DEBUG, ("ibw_bind: addr=%s, port=%u\n",
+ inet_ntoa(my_addr->sin_addr), ntohs(my_addr->sin_port)));
+ rc = rdma_bind_addr(pctx->cm_id, (struct sockaddr *) my_addr);
+ if (rc) {
+ sprintf(ibw_lasterr, "rdma_bind_addr error %d\n", rc);
+ DEBUG(DEBUG_ERR, ("%s", ibw_lasterr));
+ return rc;
+ }
+ DEBUG(DEBUG_DEBUG, ("rdma_bind_addr successful\n"));
+
+ return 0;
+}
+
+int ibw_listen(struct ibw_ctx *ctx, int backlog)
+{
+ struct ibw_ctx_priv *pctx = talloc_get_type(ctx->internal, struct ibw_ctx_priv);
+ int rc;
+
+ DEBUG(DEBUG_DEBUG, ("ibw_listen\n"));
+ rc = rdma_listen(pctx->cm_id, backlog);
+ if (rc) {
+ sprintf(ibw_lasterr, "rdma_listen failed: %d\n", rc);
+ DEBUG(DEBUG_ERR, ("%s", ibw_lasterr));
+ return rc;
+ }
+
+ return 0;
+}
+
+int ibw_accept(struct ibw_ctx *ctx, struct ibw_conn *conn, void *conn_userdata)
+{
+ struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
+ struct rdma_conn_param conn_param;
+ int rc;
+
+ DEBUG(DEBUG_DEBUG, ("ibw_accept: cmid=%p\n", pconn->cm_id));
+ conn->conn_userdata = conn_userdata;
+
+ memset(&conn_param, 0, sizeof(struct rdma_conn_param));
+ conn_param.responder_resources = 1;
+ conn_param.initiator_depth = 1;
+ rc = rdma_accept(pconn->cm_id, &conn_param);
+ if (rc) {
+ sprintf(ibw_lasterr, "rdma_accept failed %d\n", rc);
+ DEBUG(DEBUG_ERR, ("%s", ibw_lasterr));
+ return -1;;
+ }
+
+ pconn->is_accepted = 1;
+
+ /* continued at RDMA_CM_EVENT_ESTABLISHED */
+
+ return 0;
+}
+
+int ibw_connect(struct ibw_conn *conn, struct sockaddr_in *serv_addr, void *conn_userdata)
+{
+ struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv);
+ struct ibw_conn_priv *pconn = NULL;
+ int rc;
+
+ assert(conn!=NULL);
+
+ conn->conn_userdata = conn_userdata;
+ pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
+ DEBUG(DEBUG_DEBUG, ("ibw_connect: addr=%s, port=%u\n", inet_ntoa(serv_addr->sin_addr),
+ ntohs(serv_addr->sin_port)));
+
+ /* clean previous - probably half - initialization */
+ if (ibw_conn_priv_destruct(pconn)) {
+ DEBUG(DEBUG_ERR, ("ibw_connect/ibw_pconn_destruct failed for cm_id=%p\n", pconn->cm_id));
+ return -1;
+ }
+
+ /* init cm */
+#if RDMA_USER_CM_MAX_ABI_VERSION >= 2
+ rc = rdma_create_id(pctx->cm_channel, &pconn->cm_id, conn, RDMA_PS_TCP);
+#else
+ rc = rdma_create_id(pctx->cm_channel, &pconn->cm_id, conn);
+#endif
+ if (rc) {
+ rc = errno;
+ sprintf(ibw_lasterr, "ibw_connect/rdma_create_id error %d\n", rc);
+ talloc_free(conn);
+ return -1;
+ }
+ DEBUG(DEBUG_DEBUG, ("ibw_connect: rdma_create_id succeeded, cm_id=%p\n", pconn->cm_id));
+
+ rc = rdma_resolve_addr(pconn->cm_id, NULL, (struct sockaddr *) serv_addr, 2000);
+ if (rc) {
+ sprintf(ibw_lasterr, "rdma_resolve_addr error %d\n", rc);
+ DEBUG(DEBUG_ERR, ("%s", ibw_lasterr));
+ talloc_free(conn);
+ return -1;
+ }
+
+ /* continued at RDMA_CM_EVENT_ADDR_RESOLVED */
+
+ return 0;
+}
+
+int ibw_disconnect(struct ibw_conn *conn)
+{
+ int rc;
+ struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
+
+ DEBUG(DEBUG_DEBUG, ("ibw_disconnect: cmid=%p\n", pconn->cm_id));
+
+ assert(pconn!=NULL);
+
+ switch(conn->state) {
+ case IBWC_ERROR:
+ ibw_conn_priv_destruct(pconn); /* do this here right now */
+ break;
+ case IBWC_CONNECTED:
+ rc = rdma_disconnect(pconn->cm_id);
+ if (rc) {
+ sprintf(ibw_lasterr, "ibw_disconnect failed with %d\n", rc);
+ DEBUG(DEBUG_ERR, ("%s", ibw_lasterr));
+ return rc;
+ }
+ break;
+ default:
+ DEBUG(DEBUG_DEBUG, ("invalid state for disconnect: %d\n", conn->state));
+ break;
+ }
+
+ return 0;
+}
+
+int ibw_alloc_send_buf(struct ibw_conn *conn, void **buf, void **key, uint32_t len)
+{
+ struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv);
+ struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
+ struct ibw_wr *p = pconn->wr_list_avail;
+
+ if (p!=NULL) {
+ DEBUG(DEBUG_DEBUG, ("ibw_alloc_send_buf#1: cmid=%p, len=%d\n", pconn->cm_id, len));
+
+ DLIST_REMOVE(pconn->wr_list_avail, p);
+ DLIST_ADD(pconn->wr_list_used, p);
+
+ if (len <= pctx->opts.recv_bufsize) {
+ *buf = (void *)p->buf;
+ } else {
+ p->buf_large = ibw_alloc_mr(pctx, pconn, len, &p->mr_large);
+ if (p->buf_large==NULL) {
+ sprintf(ibw_lasterr, "ibw_alloc_mr#1 failed\n");
+ goto error;
+ }
+ *buf = (void *)p->buf_large;
+ }
+ /* p->wr_id is already filled in ibw_init_memory */
+ } else {
+ DEBUG(DEBUG_DEBUG, ("ibw_alloc_send_buf#2: cmid=%p, len=%d\n", pconn->cm_id, len));
+ /* not optimized */
+ p = pconn->extra_avail;
+ if (!p) {
+ p = pconn->extra_avail = talloc_zero(pconn, struct ibw_wr);
+ talloc_set_destructor(p, ibw_wr_destruct);
+ if (p==NULL) {
+ sprintf(ibw_lasterr, "talloc_zero failed (emax: %u)\n", pconn->extra_max);
+ goto error;
+ }
+ p->wr_id = pctx->opts.max_send_wr + pconn->extra_max;
+ pconn->extra_max++;
+ switch(pconn->extra_max) {
+ case 1: DEBUG(DEBUG_INFO, ("warning: queue performed\n")); break;
+ case 10: DEBUG(DEBUG_INFO, ("warning: queue reached 10\n")); break;
+ case 100: DEBUG(DEBUG_INFO, ("warning: queue reached 100\n")); break;
+ case 1000: DEBUG(DEBUG_INFO, ("warning: queue reached 1000\n")); break;
+ default: break;
+ }
+ }
+
+ p->buf_large = ibw_alloc_mr(pctx, pconn, len, &p->mr_large);
+ if (p->buf_large==NULL) {
+ sprintf(ibw_lasterr, "ibw_alloc_mr#2 failed\n");
+ goto error;
+ }
+ *buf = (void *)p->buf_large;
+
+ DLIST_REMOVE(pconn->extra_avail, p);
+ /* we don't have prepared index for this, so that
+ * we will have to find this by wr_id later on */
+ DLIST_ADD(pconn->extra_sent, p);
+ }
+
+ *key = (void *)p;
+
+ return 0;
+error:
+ DEBUG(DEBUG_ERR, ("ibw_alloc_send_buf error: %s", ibw_lasterr));
+ return -1;
+}
+
+
+static int ibw_send_packet(struct ibw_conn *conn, void *buf, struct ibw_wr *p, uint32_t len)
+{
+ struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv);
+ struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
+ int rc;
+
+ /* can we send it right now? */
+ if (pconn->wr_sent<pctx->opts.max_send_wr) {
+ struct ibv_send_wr *bad_wr;
+ struct ibv_sge list = {
+ .addr = (uintptr_t)buf,
+ .length = len,
+ .lkey = pconn->mr_send->lkey
+ };
+ struct ibv_send_wr wr = {
+ .wr_id = p->wr_id + pctx->opts.max_recv_wr,
+ .sg_list = &list,
+ .num_sge = 1,
+ .opcode = IBV_WR_SEND,
+ .send_flags = IBV_SEND_SIGNALED,
+ };
+
+ if (p->buf_large==NULL) {
+ DEBUG(DEBUG_DEBUG, ("ibw_send#normal(cmid: %p, wrid: %u, n: %d)\n",
+ pconn->cm_id, (uint32_t)wr.wr_id, len));
+ } else {
+ DEBUG(DEBUG_DEBUG, ("ibw_send#large(cmid: %p, wrid: %u, n: %d)\n",
+ pconn->cm_id, (uint32_t)wr.wr_id, len));
+ list.lkey = p->mr_large->lkey;
+ }
+
+ rc = ibv_post_send(pconn->cm_id->qp, &wr, &bad_wr);
+ if (rc) {
+ sprintf(ibw_lasterr, "ibv_post_send error %d (%d)\n",
+ rc, pconn->wr_sent);
+ goto error;
+ }
+
+ pconn->wr_sent++;
+
+ return rc;
+ } /* else put the request into our own queue: */
+
+ DEBUG(DEBUG_DEBUG, ("ibw_send#queued(cmid: %p, len: %u)\n", pconn->cm_id, len));
+
+ /* TODO: clarify how to continue when state==IBWC_STOPPED */
+
+ /* to be sent by ibw_wc_send */
+ /* regardless "normal" or [a part of] "large" packet */
+ if (!p->queued_ref_cnt) {
+ DLIST_ADD_END2(pconn->queue, p, struct ibw_wr *,
+ qprev, qnext); /* TODO: optimize */
+ p->queued_msg = buf;
+ }
+ p->queued_ref_cnt++;
+ p->queued_rlen = len; /* last wins; see ibw_wc_send */
+
+ return 0;
+error:
+ DEBUG(DEBUG_ERR, ("%s", ibw_lasterr));
+ return -1;
+}
+
+int ibw_send(struct ibw_conn *conn, void *buf, void *key, uint32_t len)
+{
+ struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv);
+ struct ibw_wr *p = talloc_get_type(key, struct ibw_wr);
+ int rc;
+
+ assert(len>=sizeof(uint32_t));
+ assert((*((uint32_t *)buf)==len)); /* TODO: htonl */
+
+ if (len > pctx->opts.recv_bufsize) {
+ struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
+ int rlen = len;
+ char *packet = (char *)buf;
+ uint32_t recv_bufsize = pctx->opts.recv_bufsize;
+
+ DEBUG(DEBUG_DEBUG, ("ibw_send#frag(cmid: %p, buf: %p, len: %u)\n",
+ pconn->cm_id, buf, len));
+
+ /* single threaded => no race here: */
+ assert(p->ref_cnt==0);
+ while(rlen > recv_bufsize) {
+ rc = ibw_send_packet(conn, packet, p, recv_bufsize);
+ if (rc)
+ return rc;
+ packet += recv_bufsize;
+ rlen -= recv_bufsize;
+ p->ref_cnt++; /* not good to have it in ibw_send_packet */
+ }
+ if (rlen) {
+ rc = ibw_send_packet(conn, packet, p, rlen);
+ p->ref_cnt++; /* not good to have it in ibw_send_packet */
+ }
+ p->ref_cnt--; /* for the same handling */
+ } else {
+ assert(p->ref_cnt==0);
+ assert(p->queued_ref_cnt==0);
+
+ rc = ibw_send_packet(conn, buf, p, len);
+ }
+ return rc;
+}
+
+int ibw_cancel_send_buf(struct ibw_conn *conn, void *buf, void *key)
+{
+ struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv);
+ struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
+ struct ibw_wr *p = talloc_get_type(key, struct ibw_wr);
+
+ assert(p!=NULL);
+ assert(buf!=NULL);
+ assert(conn!=NULL);
+
+ if (p->buf_large!=NULL)
+ ibw_free_mr(&p->buf_large, &p->mr_large);
+
+ /* parallel case */
+ if (p->wr_id < pctx->opts.max_send_wr) {
+ DEBUG(DEBUG_DEBUG, ("ibw_cancel_send_buf#1 %u", (int)p->wr_id));
+ DLIST_REMOVE(pconn->wr_list_used, p);
+ DLIST_ADD(pconn->wr_list_avail, p);
+ } else { /* "extra" packet */
+ DEBUG(DEBUG_DEBUG, ("ibw_cancel_send_buf#2 %u", (int)p->wr_id));
+ DLIST_REMOVE(pconn->extra_sent, p);
+ DLIST_ADD(pconn->extra_avail, p);
+ }
+
+ return 0;
+}
+
+const char *ibw_getLastError(void)
+{
+ return ibw_lasterr;
+}
diff --git a/ctdb/ib/ibwrapper.h b/ctdb/ib/ibwrapper.h
new file mode 100644
index 0000000..d5cdc60
--- /dev/null
+++ b/ctdb/ib/ibwrapper.h
@@ -0,0 +1,218 @@
+/*
+ * Unix SMB/CIFS implementation.
+ * Wrap Infiniband calls.
+ *
+ * Copyright (C) Sven Oehme <oehmes@de.ibm.com> 2006
+ *
+ * Major code contributions by Peter Somogyi <psomogyi@gamax.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* Server communication state */
+enum ibw_state_ctx {
+ IBWS_INIT = 0, /* ctx start - after ibw_init */
+ IBWS_READY, /* after ibw_bind & ibw_listen */
+ IBWS_CONNECT_REQUEST, /* after [IBWS_READY + incoming request] */
+ /* => [(ibw_accept)IBWS_READY | (ibw_disconnect)STOPPED | ERROR] */
+ IBWS_STOPPED, /* normal stop <= ibw_disconnect+(IBWS_READY | IBWS_CONNECT_REQUEST) */
+ IBWS_ERROR /* abnormal state; ibw_stop must be called after this */
+};
+
+/* Connection state */
+struct ibw_ctx {
+ void *ctx_userdata; /* see ibw_init */
+
+ enum ibw_state_ctx state;
+ void *internal;
+
+ struct ibw_conn *conn_list; /* 1st elem of double linked list */
+};
+
+enum ibw_state_conn {
+ IBWC_INIT = 0, /* conn start - internal state */
+ IBWC_CONNECTED, /* after ibw_accept or ibw_connect */
+ IBWC_DISCONNECTED, /* after ibw_disconnect */
+ IBWC_ERROR
+};
+
+struct ibw_conn {
+ struct ibw_ctx *ctx;
+ enum ibw_state_conn state;
+
+ void *conn_userdata; /* see ibw_connect and ibw_accept */
+ void *internal;
+
+ struct ibw_conn *prev, *next;
+};
+
+/*
+ * (name, value) pair for array param of ibw_init
+ */
+struct ibw_initattr {
+ const char *name;
+ const char *value;
+};
+
+/*
+ * Callback function definition which should inform you about
+ * connection state change
+ * This callback is invoked whenever server or client connection changes.
+ * Both <conn> and <ctx> can be NULL if their state didn't change.
+ * Return nonzero on error.
+ */
+typedef int (*ibw_connstate_fn_t)(struct ibw_ctx *ctx, struct ibw_conn *conn);
+
+/*
+ * Callback function definition which should process incoming packets
+ * This callback is invoked whenever any message arrives.
+ * Return nonzero on error.
+ *
+ * Important: you mustn't store buf pointer for later use.
+ * Process its contents before returning.
+ */
+typedef int (*ibw_receive_fn_t)(struct ibw_conn *conn, void *buf, int n);
+
+/*
+ * settings: array of (name, value) pairs
+ * where name is one of:
+ * max_send_wr [default is 256]
+ * max_recv_wr [default is 1024]
+ * <...>
+ *
+ * Must be called _ONCE_ for each node.
+ *
+ * max_msg_size is the maximum size of a message
+ * (max_send_wr + max_recv_wr) * max_msg_size bytes allocated per connection
+ *
+ * returns non-NULL on success
+ *
+ * talloc_free must be called for the result in IBWS_STOPPED;
+ * it will close resources by destructor
+ * connections(ibw_conn *) must have been closed prior talloc_free
+ */
+struct ibw_ctx *ibw_init(struct ibw_initattr *attr, int nattr,
+ void *ctx_userdata,
+ ibw_connstate_fn_t ibw_connstate,
+ ibw_receive_fn_t ibw_receive,
+ struct tevent_context *ectx);
+
+/*
+ * Must be called in states of (IBWS_ERROR, IBWS_READY, IBWS_CONNECT_REQUEST)
+ *
+ * It will send out disconnect requests and free up ibw_conn structures.
+ * The ctx->state will transit to IBWS_STOPPED after every conn are disconnected.
+ * During that time, you mustn't send/recv/disconnect any more.
+ * Only after ctx->state=IBWS_STOPPED you can talloc_free the ctx.
+ */
+int ibw_stop(struct ibw_ctx *ctx);
+
+/*************** connection initiation - like stream sockets *****/
+
+/*
+ * works like socket bind
+ * needs a normal internet address here
+ *
+ * return 0 on success
+ */
+int ibw_bind(struct ibw_ctx *ctx, struct sockaddr_in *my_addr);
+
+/*
+ * works like socket listen
+ * non-blocking
+ * enables accepting incoming connections (after IBWS_READY)
+ * (it doesn't touch ctx->state by itself)
+ *
+ * returns 0 on success
+ */
+int ibw_listen(struct ibw_ctx *ctx, int backlog);
+
+/*
+ * works like socket accept
+ * initializes a connection to a client
+ * must be called when state=IBWS_CONNECT_REQUEST
+ *
+ * returns 0 on success
+ *
+ * You have +1 waiting here: you will get ibw_conn (having the
+ * same <conn_userdata> member) structure in ibw_connstate_fn_t.
+ *
+ * Important: you won't get remote IP address (only internal conn info)
+ */
+int ibw_accept(struct ibw_ctx *ctx, struct ibw_conn *conn, void *conn_userdata);
+
+/*
+ * Create a new connection structure
+ * available for queueing ibw_send
+ *
+ * <parent> is needed to be notified by talloc destruct action.
+ */
+struct ibw_conn *ibw_conn_new(struct ibw_ctx *ctx, TALLOC_CTX *mem_ctx);
+
+/*
+ * Needs a normal internet address here
+ * can be called within IBWS_READY|IBWS_CONNECT_REQUEST
+ *
+ * returns non-NULL on success
+ *
+ * You have +1 waiting here: you will get ibw_conn (having the
+ * same <conn_userdata> member) structure in ibw_connstate_fn_t.
+ */
+int ibw_connect(struct ibw_conn *conn, struct sockaddr_in *serv_addr, void *conn_userdata);
+
+/*
+ * Sends out a disconnect request.
+ * You should process fds after calling this function
+ * and then process it with ibw_process_event normally
+ * until you get conn->state = IBWC_DISCONNECTED
+ *
+ * You mustn't talloc_free <conn> yet right after this,
+ * first wait for IBWC_DISCONNECTED.
+ */
+int ibw_disconnect(struct ibw_conn *conn);
+
+/************ Infiniband specific event loop wrapping ******************/
+
+/*
+ * You have to use this buf to fill in before send.
+ * It's just to avoid memcpy.in ibw_send.
+ * Use the same (buf, key) pair with ibw_send.
+ * Don't use more space than maxsize (see ibw_init).
+ *
+ * Returns 0 on success.
+ */
+int ibw_alloc_send_buf(struct ibw_conn *conn, void **buf, void **key, uint32_t len);
+
+/*
+ * Send the message in one
+ * Can be invoked any times (should fit into buffers) and at any time
+ * (in conn->state=IBWC_CONNECTED)
+ * n must be less or equal than max_msg_size (see ibw_init)
+ *
+ * You mustn't use (buf, key) any more for sending.
+ */
+int ibw_send(struct ibw_conn *conn, void *buf, void *key, uint32_t len);
+
+/*
+ * Call this after ibw_alloc_send_buf
+ * when you won't call ibw_send for (buf, key)
+ * You mustn't use (buf, key) any more.
+ */
+int ibw_cancel_send_buf(struct ibw_conn *conn, void *buf, void *key);
+
+/*
+ * Retrieves the last error
+ * result: always non-zero, mustn't be freed (static)
+ */
+const char *ibw_getLastError(void);
diff --git a/ctdb/ib/ibwrapper_internal.h b/ctdb/ib/ibwrapper_internal.h
new file mode 100644
index 0000000..b8100a8
--- /dev/null
+++ b/ctdb/ib/ibwrapper_internal.h
@@ -0,0 +1,126 @@
+/*
+ * Unix SMB/CIFS implementation.
+ * Wrap Infiniband calls.
+ *
+ * Copyright (C) Sven Oehme <oehmes@de.ibm.com> 2006
+ *
+ * Major code contributions by Peter Somogyi <psomogyi@gamax.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+struct ibw_opts {
+ uint32_t max_send_wr;
+ uint32_t max_recv_wr;
+ uint32_t recv_bufsize;
+ uint32_t recv_threshold;
+};
+
+struct ibw_wr {
+ char *buf; /* initialized in ibw_init_memory once per connection */
+ int wr_id; /* position in wr_index list; also used as wr id */
+
+ char *buf_large; /* allocated specially for "large" message */
+ struct ibv_mr *mr_large;
+ int ref_cnt; /* reference count for ibw_wc_send to know when to release */
+
+ char *queued_msg; /* set at ibw_send - can be different than above */
+ int queued_ref_cnt; /* instead of adding the same to the queue again */
+ uint32_t queued_rlen; /* last wins when queued_ref_cnt>0; or simple msg size */
+
+ struct ibw_wr *next, *prev; /* in wr_list_avail or wr_list_used */
+ /* or extra_sent or extra_avail */
+ struct ibw_wr *qnext, *qprev; /* in queue */
+};
+
+struct ibw_ctx_priv {
+ struct tevent_context *ectx;
+
+ struct ibw_opts opts;
+
+ struct rdma_cm_id *cm_id; /* server cm id */
+
+ struct rdma_event_channel *cm_channel;
+ struct tevent_fd *cm_channel_event;
+
+ ibw_connstate_fn_t connstate_func; /* see ibw_init */
+ ibw_receive_fn_t receive_func; /* see ibw_init */
+
+ long pagesize; /* sysconf result for memalign */
+};
+
+struct ibw_part {
+ char *buf; /* talloced memory buffer */
+ uint32_t bufsize; /* allocated size of buf - always grows */
+ uint32_t len; /* message part length */
+ uint32_t to_read; /* 4 or *((uint32_t)buf) if len>=sizeof(uint32_t) */
+};
+
+struct ibw_conn_priv {
+ struct ibv_comp_channel *verbs_channel;
+ struct tevent_fd *verbs_channel_event;
+
+ struct rdma_cm_id *cm_id; /* client's cm id */
+ struct ibv_pd *pd;
+ int is_accepted;
+
+ struct ibv_cq *cq; /* qp is in cm_id */
+
+ char *buf_send; /* max_send_wr * avg_send_size */
+ struct ibv_mr *mr_send;
+ struct ibw_wr *wr_list_avail;
+ struct ibw_wr *wr_list_used;
+ struct ibw_wr **wr_index; /* array[0..(qsize-1)] of (ibw_wr *) */
+ int wr_sent; /* # of send wrs in the CQ */
+
+ struct ibw_wr *extra_sent;
+ struct ibw_wr *extra_avail;
+ int extra_max; /* max wr_id in the queue */
+
+ struct ibw_wr *queue;
+
+ /* buf_recv is a ring buffer */
+ char *buf_recv; /* max_recv_wr * avg_recv_size */
+ struct ibv_mr *mr_recv;
+ int recv_index; /* index of the next recv buffer when refilling */
+ struct ibw_part part;
+};
+
+/* remove an element from a list - element doesn't have to be in list. */
+#define DLIST_REMOVE2(list, p, prev, next) \
+do { \
+ if ((p) == (list)) { \
+ (list) = (p)->next; \
+ if (list) (list)->prev = NULL; \
+ } else { \
+ if ((p)->prev) (p)->prev->next = (p)->next; \
+ if ((p)->next) (p)->next->prev = (p)->prev; \
+ } \
+ if ((p) != (list)) (p)->next = (p)->prev = NULL; \
+} while (0)
+
+/* hook into the end of the list - needs a tmp pointer */
+#define DLIST_ADD_END2(list, p, type, prev, next) \
+do { \
+ if (!(list)) { \
+ (list) = (p); \
+ (p)->next = (p)->prev = NULL; \
+ } else { \
+ type tmp; \
+ for (tmp = (list); tmp->next; tmp = tmp->next) ; \
+ tmp->next = (p); \
+ (p)->next = NULL; \
+ (p)->prev = tmp; \
+ } \
+} while (0)
diff --git a/ctdb/ib/ibwrapper_test.c b/ctdb/ib/ibwrapper_test.c
new file mode 100644
index 0000000..77a5323
--- /dev/null
+++ b/ctdb/ib/ibwrapper_test.c
@@ -0,0 +1,659 @@
+/*
+ * Unix SMB/CIFS implementation.
+ * Test the infiniband wrapper.
+ *
+ * Copyright (C) Sven Oehme <oehmes@de.ibm.com> 2006
+ *
+ * Major code contributions by Peter Somogyi <psomogyi@gamax.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/time.h"
+#include "system/wait.h"
+
+#include <assert.h>
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/time.h"
+#include "lib/util/debug.h"
+
+#include "common/logging.h"
+
+#include "ib/ibwrapper.h"
+
+struct ibwtest_ctx {
+ int is_server;
+ char *id; /* my id */
+
+ struct ibw_initattr *attrs;
+ int nattrs;
+ char *opts; /* option string */
+
+ struct sockaddr_in *addrs; /* dynamic array of dest addrs */
+ int naddrs;
+
+ unsigned int nsec; /* delta times between messages in nanosec */
+ unsigned int sleep_usec; /* microsecs to sleep in the main loop to emulate overloading */
+ uint32_t maxsize; /* maximum variable message size */
+
+ int cnt;
+ int nsent;
+
+ int nmsg; /* number of messages to send (client) */
+
+ int kill_me;
+ int stopping;
+ int error;
+ struct ibw_ctx *ibwctx;
+
+ struct timeval start_time, end_time;
+};
+
+struct ibwtest_conn {
+ char *id;
+};
+
+enum testopcode {
+ TESTOP_SEND_ID = 1,
+ TESTOP_SEND_TEXT = 2,
+ TESTOP_SEND_RND = 3
+};
+
+static int ibwtest_connect_everybody(struct ibwtest_ctx *tcx)
+{
+ struct ibw_conn *conn;
+ struct ibwtest_conn *tconn = talloc_zero(tcx, struct ibwtest_conn);
+ int i;
+
+ for(i=0; i<tcx->naddrs; i++) {
+ conn = ibw_conn_new(tcx->ibwctx, tconn);
+ if (ibw_connect(conn, &tcx->addrs[i], tconn)) {
+ fprintf(stderr, "ibw_connect error at %d\n", i);
+ return -1;
+ }
+ }
+ DEBUG(DEBUG_DEBUG, ("sent %d connect request...\n", tcx->naddrs));
+
+ return 0;
+}
+
+static int ibwtest_send_id(struct ibw_conn *conn)
+{
+ struct ibwtest_ctx *tcx = talloc_get_type(conn->ctx->ctx_userdata, struct ibwtest_ctx);
+ char *buf;
+ void *key;
+ uint32_t len;
+
+ DEBUG(DEBUG_DEBUG, ("ibwtest_send_id\n"));
+ len = sizeof(uint32_t)+strlen(tcx->id)+2;
+ if (ibw_alloc_send_buf(conn, (void **)&buf, &key, len)) {
+ DEBUG(DEBUG_ERR, ("send_id: ibw_alloc_send_buf failed\n"));
+ return -1;
+ }
+
+ /* first sizeof(uint32_t) size bytes are for length */
+ *((uint32_t *)buf) = len;
+ buf[sizeof(uint32_t)] = (char)TESTOP_SEND_ID;
+ strcpy(buf+sizeof(uint32_t)+1, tcx->id);
+
+ if (ibw_send(conn, buf, key, len)) {
+ DEBUG(DEBUG_ERR, ("send_id: ibw_send error\n"));
+ return -1;
+ }
+ tcx->nsent++;
+
+ return 0;
+}
+
+static int ibwtest_send_test_msg(struct ibwtest_ctx *tcx, struct ibw_conn *conn, const char *msg)
+{
+ char *buf, *p;
+ void *key;
+ uint32_t len;
+
+ if (conn->state!=IBWC_CONNECTED)
+ return 0; /* not yet up */
+
+ len = strlen(msg) + 2 + sizeof(uint32_t);
+ if (ibw_alloc_send_buf(conn, (void **)&buf, &key, len)) {
+ fprintf(stderr, "send_test_msg: ibw_alloc_send_buf failed\n");
+ return -1;
+ }
+
+ *((uint32_t *)buf) = len;
+ p = buf;
+ p += sizeof(uint32_t);
+ p[0] = (char)TESTOP_SEND_TEXT;
+ p++;
+ strcpy(p, msg);
+
+ if (ibw_send(conn, buf, key, len)) {
+ DEBUG(DEBUG_ERR, ("send_test_msg: ibw_send error\n"));
+ return -1;
+ }
+ tcx->nsent++;
+
+ return 0;
+}
+
+static unsigned char ibwtest_fill_random(unsigned char *buf, uint32_t size)
+{
+ uint32_t i = size;
+ unsigned char sum = 0;
+ unsigned char value;
+ while(i) {
+ i--;
+ value = (unsigned char)(256.0 * (rand() / (RAND_MAX + 1.0)));
+ buf[i] = value;
+ sum += value;
+ }
+ return sum;
+}
+
+static unsigned char ibwtest_get_sum(unsigned char *buf, uint32_t size)
+{
+ uint32_t i = size;
+ unsigned char sum = 0;
+
+ while(i) {
+ i--;
+ sum += buf[i];
+ }
+ return sum;
+}
+
+static int ibwtest_do_varsize_scenario_conn_size(struct ibwtest_ctx *tcx, struct ibw_conn *conn, uint32_t size)
+{
+ unsigned char *buf;
+ void *key;
+ uint32_t len;
+ unsigned char sum;
+
+ len = sizeof(uint32_t) + 1 + size + 1;
+ if (ibw_alloc_send_buf(conn, (void **)&buf, &key, len)) {
+ DEBUG(DEBUG_ERR, ("varsize/ibw_alloc_send_buf failed\n"));
+ return -1;
+ }
+ *((uint32_t *)buf) = len;
+ buf[sizeof(uint32_t)] = TESTOP_SEND_RND;
+ sum = ibwtest_fill_random(buf + sizeof(uint32_t) + 1, size);
+ buf[sizeof(uint32_t) + 1 + size] = sum;
+ if (ibw_send(conn, buf, key, len)) {
+ DEBUG(DEBUG_ERR, ("varsize/ibw_send failed\n"));
+ return -1;
+ }
+ tcx->nsent++;
+
+ return 0;
+}
+
+static int ibwtest_do_varsize_scenario_conn(struct ibwtest_ctx *tcx, struct ibw_conn *conn)
+{
+ uint32_t size;
+ int i;
+
+ for(i=0; i<tcx->nmsg; i++)
+ {
+ //size = (uint32_t)((float)(tcx->maxsize) * (rand() / (RAND_MAX + 1.0)));
+ size = (uint32_t)((float)(tcx->maxsize) * ((float)(i+1)/(float)tcx->nmsg));
+ if (ibwtest_do_varsize_scenario_conn_size(tcx, conn, size))
+ return -1;
+ }
+ return 0;
+}
+
+/*int ibwtest_do_varsize_scenario(ibwtest_ctx *tcx)
+{
+ int rc;
+ struct ibw_conn *conn;
+
+ for(conn=tcx->ibwctx->conn_list; conn!=NULL; conn=conn->next) {
+ if (conn->state==IBWC_CONNECTED) {
+ rc = ibwtest_do_varsize_scenario_conn(tcx, conn);
+ if (rc)
+ tcx->error = rc;
+ }
+ }
+}*/
+
+static int ibwtest_connstate_handler(struct ibw_ctx *ctx, struct ibw_conn *conn)
+{
+ struct ibwtest_ctx *tcx = NULL; /* userdata */
+ struct ibwtest_conn *tconn = NULL; /* userdata */
+
+ if (ctx) {
+ tcx = talloc_get_type(ctx->ctx_userdata, struct ibwtest_ctx);
+
+ switch(ctx->state) {
+ case IBWS_INIT:
+ DEBUG(DEBUG_DEBUG, ("test IBWS_INIT\n"));
+ break;
+ case IBWS_READY:
+ DEBUG(DEBUG_DEBUG, ("test IBWS_READY\n"));
+ break;
+ case IBWS_CONNECT_REQUEST:
+ DEBUG(DEBUG_DEBUG, ("test IBWS_CONNECT_REQUEST\n"));
+ tconn = talloc_zero(conn, struct ibwtest_conn);
+ if (ibw_accept(ctx, conn, tconn)) {
+ DEBUG(DEBUG_ERR, ("error accepting the connect request\n"));
+ }
+ break;
+ case IBWS_STOPPED:
+ DEBUG(DEBUG_DEBUG, ("test IBWS_STOPPED\n"));
+ tcx->kill_me = 1; /* main loop can exit */
+ break;
+ case IBWS_ERROR:
+ DEBUG(DEBUG_DEBUG, ("test IBWS_ERROR\n"));
+ ibw_stop(tcx->ibwctx);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+
+ if (conn) {
+ tconn = talloc_get_type(conn->conn_userdata, struct ibwtest_conn);
+ switch(conn->state) {
+ case IBWC_INIT:
+ DEBUG(DEBUG_DEBUG, ("test IBWC_INIT\n"));
+ break;
+ case IBWC_CONNECTED:
+ if (gettimeofday(&tcx->start_time, NULL)) {
+ DEBUG(DEBUG_ERR, ("gettimeofday error %d", errno));
+ return -1;
+ }
+ ibwtest_send_id(conn);
+ break;
+ case IBWC_DISCONNECTED:
+ DEBUG(DEBUG_DEBUG, ("test IBWC_DISCONNECTED\n"));
+ talloc_free(conn);
+ break;
+ case IBWC_ERROR:
+ DEBUG(DEBUG_DEBUG, ("test IBWC_ERROR %s\n", ibw_getLastError()));
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+ return 0;
+}
+
+static int ibwtest_receive_handler(struct ibw_conn *conn, void *buf, int n)
+{
+ struct ibwtest_conn *tconn;
+ enum testopcode op;
+ struct ibwtest_ctx *tcx = talloc_get_type(conn->ctx->ctx_userdata, struct ibwtest_ctx);
+ int rc = 0;
+
+ assert(conn!=NULL);
+ assert(n>=sizeof(uint32_t)+1);
+ tconn = talloc_get_type(conn->conn_userdata, struct ibwtest_conn);
+
+ op = (enum testopcode)((char *)buf)[sizeof(uint32_t)];
+ if (op==TESTOP_SEND_ID) {
+ tconn->id = talloc_strdup(tconn, ((char *)buf)+sizeof(uint32_t)+1);
+ }
+ if (op==TESTOP_SEND_ID || op==TESTOP_SEND_TEXT) {
+ DEBUG(DEBUG_DEBUG, ("[%d]msg from %s: \"%s\"(%d)\n", op,
+ tconn->id ? tconn->id : "NULL", ((char *)buf)+sizeof(uint32_t)+1, n));
+ }
+
+ if (tcx->is_server) {
+ if (op==TESTOP_SEND_RND) {
+ unsigned char sum;
+ sum = ibwtest_get_sum((unsigned char *)buf + sizeof(uint32_t) + 1,
+ n - sizeof(uint32_t) - 2);
+ DEBUG(DEBUG_DEBUG, ("[%d]msg varsize %u/sum %u from %s\n",
+ op,
+ (uint32_t)(n - sizeof(uint32_t) - 2),
+ (uint32_t)sum,
+ tconn->id ? tconn->id : "NULL"));
+ if (sum!=((unsigned char *)buf)[n-1]) {
+ DEBUG(DEBUG_ERR, ("ERROR: checksum mismatch %u!=%u\n",
+ (uint32_t)sum, (uint32_t)((unsigned char *)buf)[n-1]));
+ ibw_stop(tcx->ibwctx);
+ goto error;
+ }
+ } else if (op!=TESTOP_SEND_ID) {
+ char *buf2;
+ void *key2;
+
+ /* bounce message regardless what it is */
+ if (ibw_alloc_send_buf(conn, (void **)&buf2, &key2, n)) {
+ fprintf(stderr, "ibw_alloc_send_buf error #2\n");
+ goto error;
+ }
+ memcpy(buf2, buf, n);
+ if (ibw_send(conn, buf2, key2, n)) {
+ fprintf(stderr, "ibw_send error #2\n");
+ goto error;
+ }
+ tcx->nsent++;
+ }
+ } else { /* client: */
+ if (op==TESTOP_SEND_ID && tcx->maxsize) {
+ /* send them in one blow */
+ rc = ibwtest_do_varsize_scenario_conn(tcx, conn);
+ }
+
+ if (tcx->nmsg) {
+ char msg[26];
+ sprintf(msg, "hello world %d", tcx->nmsg--);
+ rc = ibwtest_send_test_msg(tcx, conn, msg);
+ if (tcx->nmsg==0) {
+ ibw_stop(tcx->ibwctx);
+ tcx->stopping = 1;
+ }
+ }
+ }
+
+ if (rc)
+ tcx->error = rc;
+
+ return rc;
+error:
+ return -1;
+}
+
+static void ibwtest_timeout_handler(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct ibwtest_ctx *tcx = talloc_get_type(private_data, struct ibwtest_ctx);
+ int rc;
+
+ if (!tcx->is_server) {
+ struct ibw_conn *conn;
+ char msg[50];
+
+ /* fill it with something variable... */
+ sprintf(msg, "hello world %d", tcx->cnt++);
+
+ /* send something to everybody... */
+ for(conn=tcx->ibwctx->conn_list; conn!=NULL; conn=conn->next) {
+ if (conn->state==IBWC_CONNECTED) {
+ rc = ibwtest_send_test_msg(tcx, conn, msg);
+ if (rc)
+ tcx->error = rc;
+ }
+ }
+ } /* else allow main loop run */
+}
+
+static struct ibwtest_ctx *testctx = NULL;
+
+static void ibwtest_sigint_handler(int sig)
+{
+ DEBUG(DEBUG_ERR, ("got SIGINT\n"));
+ if (testctx) {
+ if (testctx->ibwctx->state==IBWS_READY ||
+ testctx->ibwctx->state==IBWS_CONNECT_REQUEST ||
+ testctx->ibwctx->state==IBWS_ERROR)
+ {
+ if (testctx->stopping) {
+ DEBUG(DEBUG_DEBUG, ("forcing exit...\n"));
+ testctx->kill_me = 1;
+ } else {
+ /* mostly expected case */
+ ibw_stop(testctx->ibwctx);
+ testctx->stopping = 1;
+ }
+ } else
+ testctx->kill_me = 1;
+ }
+}
+
+static int ibwtest_parse_attrs(struct ibwtest_ctx *tcx, char *optext,
+ struct ibw_initattr **pattrs, int *nattrs, char op)
+{
+ int i = 0, n = 1;
+ int porcess_next = 1;
+ char *p, *q;
+ struct ibw_initattr *attrs = NULL;
+
+ *pattrs = NULL;
+ for(p = optext; *p!='\0'; p++) {
+ if (*p==',')
+ n++;
+ }
+
+ attrs = (struct ibw_initattr *)talloc_size(tcx,
+ n * sizeof(struct ibw_initattr));
+ for(p = optext; *p!='\0'; p++) {
+ if (porcess_next) {
+ attrs[i].name = p;
+ q = strchr(p, ':');
+ if (q==NULL) {
+ fprintf(stderr, "-%c format error\n", op);
+ return -1;
+ }
+ *q = '\0';
+ attrs[i].value = q + 1;
+
+ porcess_next = 0;
+ i++;
+ p = q; /* ++ at end */
+ }
+ if (*p==',') {
+ *p = '\0'; /* ++ at end */
+ porcess_next = 1;
+ }
+ }
+ *pattrs = attrs;
+ *nattrs = n;
+
+ return 0;
+}
+
+static int ibwtest_get_address(const char *address, struct in_addr *addr)
+{
+ if (inet_pton(AF_INET, address, addr) <= 0) {
+ struct hostent *he = gethostbyname(address);
+ if (he == NULL || he->h_length > sizeof(*addr)) {
+ DEBUG(DEBUG_ERR, ("invalid network address '%s'\n", address));
+ return -1;
+ }
+ memcpy(addr, he->h_addr, he->h_length);
+ }
+ return 0;
+}
+
+static int ibwtest_getdests(struct ibwtest_ctx *tcx, char op)
+{
+ int i;
+ struct ibw_initattr *attrs = NULL;
+ struct sockaddr_in *p;
+ char *tmp;
+
+ tmp = talloc_strdup(tcx, optarg);
+ if (tmp == NULL) return -1;
+ /* hack to reuse the above ibw_initattr parser */
+ if (ibwtest_parse_attrs(tcx, tmp, &attrs, &tcx->naddrs, op))
+ return -1;
+
+ tcx->addrs = talloc_size(tcx,
+ tcx->naddrs * sizeof(struct sockaddr_in));
+ for(i=0; i<tcx->naddrs; i++) {
+ p = tcx->addrs + i;
+ p->sin_family = AF_INET;
+ if (ibwtest_get_address(attrs[i].name, &p->sin_addr))
+ return -1;
+ p->sin_port = htons(atoi(attrs[i].value));
+ }
+
+ return 0;
+}
+
+static int ibwtest_init_server(struct ibwtest_ctx *tcx)
+{
+ if (tcx->naddrs!=1) {
+ fprintf(stderr, "incorrect number of addrs(%d!=1)\n", tcx->naddrs);
+ return -1;
+ }
+
+ if (ibw_bind(tcx->ibwctx, &tcx->addrs[0])) {
+ DEBUG(DEBUG_ERR, ("ERROR: ibw_bind failed\n"));
+ return -1;
+ }
+
+ if (ibw_listen(tcx->ibwctx, 1)) {
+ DEBUG(DEBUG_ERR, ("ERROR: ibw_listen failed\n"));
+ return -1;
+ }
+
+ /* continued at IBWS_READY */
+ return 0;
+}
+
+static void ibwtest_usage(struct ibwtest_ctx *tcx, char *name)
+{
+ printf("Usage:\n");
+ printf("\t%s -i <id> -o {name:value} -d {addr:port} -t nsec -s\n", name);
+ printf("\t-i <id> is a free text, acting as a server id, max 23 chars [mandatory]\n");
+ printf("\t-o name1:value1,name2:value2,... is a list of (name, value) pairs\n");
+ printf("\t-a addr1:port1,addr2:port2,... is a list of destination ip addresses\n");
+ printf("\t-t nsec delta time between sends in nanosec [default %d]\n", tcx->nsec);
+ printf("\t\t send message periodically and endless when nsec is non-zero\n");
+ printf("\t-s server mode (you have to give exactly one -d address:port in this case)\n");
+ printf("\t-n number of messages to send [default %d]\n", tcx->nmsg);
+ printf("\t-l usec time to sleep in the main loop [default %d]\n", tcx->sleep_usec);
+ printf("\t-v max variable msg size in bytes [default %d], 0=don't send var. size\n", tcx->maxsize);
+ printf("\t-d LogLevel [default %d]\n", DEBUGLEVEL);
+ printf("Press ctrl+C to stop the program.\n");
+}
+
+int main(int argc, char *argv[])
+{
+ int rc, op;
+ int result = 1;
+ struct tevent_context *ev = NULL;
+ struct ibwtest_ctx *tcx = NULL;
+ float usec;
+
+ tcx = talloc_zero(NULL, struct ibwtest_ctx);
+ memset(tcx, 0, sizeof(struct ibwtest_ctx));
+ tcx->nsec = 0;
+ tcx->nmsg = 1000;
+ debuglevel_set(0);
+
+ /* here is the only case we can't avoid using global... */
+ testctx = tcx;
+ signal(SIGINT, ibwtest_sigint_handler);
+ srand((unsigned)time(NULL));
+
+ while ((op=getopt(argc, argv, "i:o:d:m:st:n:l:v:a:")) != -1) {
+ switch (op) {
+ case 'i':
+ tcx->id = talloc_strdup(tcx, optarg);
+ break;
+ case 'o':
+ tcx->opts = talloc_strdup(tcx, optarg);
+ if (tcx->opts) goto cleanup;
+ if (ibwtest_parse_attrs(tcx, tcx->opts, &tcx->attrs,
+ &tcx->nattrs, op))
+ goto cleanup;
+ break;
+ case 'a':
+ if (ibwtest_getdests(tcx, op))
+ goto cleanup;
+ break;
+ case 's':
+ tcx->is_server = 1;
+ break;
+ case 't':
+ tcx->nsec = (unsigned int)atoi(optarg);
+ break;
+ case 'n':
+ tcx->nmsg = atoi(optarg);
+ break;
+ case 'l':
+ tcx->sleep_usec = (unsigned int)atoi(optarg);
+ break;
+ case 'v':
+ tcx->maxsize = (unsigned int)atoi(optarg);
+ break;
+ case 'd':
+ debuglevel_set(atoi(optarg));
+ break;
+ default:
+ fprintf(stderr, "ERROR: unknown option -%c\n", (char)op);
+ ibwtest_usage(tcx, argv[0]);
+ goto cleanup;
+ }
+ }
+ if (tcx->id==NULL) {
+ ibwtest_usage(tcx, argv[0]);
+ goto cleanup;
+ }
+
+ ev = tevent_context_init(NULL);
+ assert(ev);
+
+ tcx->ibwctx = ibw_init(tcx->attrs, tcx->nattrs,
+ tcx,
+ ibwtest_connstate_handler,
+ ibwtest_receive_handler,
+ ev
+ );
+ if (!tcx->ibwctx)
+ goto cleanup;
+
+ if (tcx->is_server)
+ rc = ibwtest_init_server(tcx);
+ else
+ rc = ibwtest_connect_everybody(tcx);
+ if (rc)
+ goto cleanup;
+
+ while(!tcx->kill_me && !tcx->error) {
+ if (tcx->nsec) {
+ tevent_add_timer(ev, tcx,
+ timeval_current_ofs(0, tcx->nsec),
+ ibwtest_timeout_handler, tcx);
+ }
+
+ tevent_loop_once(ev);
+
+ if (tcx->sleep_usec)
+ usleep(tcx->sleep_usec);
+ }
+
+ if (!tcx->is_server && tcx->nsent!=0 && !tcx->error) {
+ if (gettimeofday(&tcx->end_time, NULL)) {
+ DEBUG(DEBUG_ERR, ("gettimeofday error %d\n", errno));
+ goto cleanup;
+ }
+ usec = (tcx->end_time.tv_sec - tcx->start_time.tv_sec) * 1000000 +
+ (tcx->end_time.tv_usec - tcx->start_time.tv_usec);
+ printf("usec: %f, nmsg: %d, usec/nmsg: %f\n",
+ usec, tcx->nsent, usec/(float)tcx->nsent);
+ }
+
+ if (!tcx->error)
+ result = 0; /* everything OK */
+
+cleanup:
+ if (tcx)
+ talloc_free(tcx);
+ if (ev)
+ talloc_free(ev);
+ DEBUG(DEBUG_ERR, ("exited with code %d\n", result));
+ return result;
+}
diff --git a/ctdb/include/common/srvid.h b/ctdb/include/common/srvid.h
new file mode 120000
index 0000000..5a36c27
--- /dev/null
+++ b/ctdb/include/common/srvid.h
@@ -0,0 +1 @@
+../../common/srvid.h \ No newline at end of file
diff --git a/ctdb/include/ctdb_client.h b/ctdb/include/ctdb_client.h
new file mode 100644
index 0000000..b993620
--- /dev/null
+++ b/ctdb/include/ctdb_client.h
@@ -0,0 +1,289 @@
+/*
+ ctdb database library: old client interface
+
+ Copyright (C) Andrew Tridgell 2006
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CTDB_CLIENT_H
+#define _CTDB_CLIENT_H
+
+#include "common/srvid.h"
+#include "ctdb_protocol.h"
+
+enum control_state {
+ CTDB_CONTROL_WAIT,
+ CTDB_CONTROL_DONE,
+ CTDB_CONTROL_ERROR,
+ CTDB_CONTROL_TIMEOUT
+};
+
+struct ctdb_client_control_state {
+ struct ctdb_context *ctdb;
+ uint32_t reqid;
+ int32_t status;
+ TDB_DATA outdata;
+ enum control_state state;
+ char *errormsg;
+ struct ctdb_req_control_old *c;
+
+ /* if we have a callback registered for the completion (or failure) of
+ this control
+ if a callback is used, it MUST talloc_free the cb_data passed to it
+ */
+ struct {
+ void (*fn)(struct ctdb_client_control_state *);
+ void *private_data;
+ } async;
+};
+
+struct tevent_context;
+struct ctdb_db_context;
+
+/*
+ allocate a packet for use in client<->daemon communication
+ */
+struct ctdb_req_header *_ctdbd_allocate_pkt(struct ctdb_context *ctdb,
+ TALLOC_CTX *mem_ctx,
+ enum ctdb_operation operation,
+ size_t length, size_t slength,
+ const char *type);
+
+#define ctdbd_allocate_pkt(ctdb, mem_ctx, operation, length, type) \
+ (type *)_ctdbd_allocate_pkt(ctdb, mem_ctx, operation, length, \
+ sizeof(type), #type)
+
+int ctdb_call_local(struct ctdb_db_context *ctdb_db, struct ctdb_call *call,
+ struct ctdb_ltdb_header *header, TALLOC_CTX *mem_ctx,
+ TDB_DATA *data, bool updatetdb);
+
+void ctdb_request_message(struct ctdb_context *ctdb,
+ struct ctdb_req_header *hdr);
+
+void ctdb_client_read_cb(uint8_t *data, size_t cnt, void *args);
+
+int ctdb_socket_connect(struct ctdb_context *ctdb);
+
+/*
+ make a ctdb call. The associated ctdb call function will be called on the DMASTER
+ for the given record
+*/
+struct ctdb_client_call_state *ctdb_call_send(struct ctdb_db_context *ctdb_db,
+ struct ctdb_call *call);
+int ctdb_call_recv(struct ctdb_client_call_state *state,
+ struct ctdb_call *call);
+int ctdb_call(struct ctdb_db_context *ctdb_db, struct ctdb_call *call);
+
+/* setup a handler for ctdb messages */
+typedef void (*ctdb_msg_fn_t)(struct ctdb_context *, uint64_t srvid,
+ TDB_DATA data, void *);
+
+int ctdb_client_set_message_handler(struct ctdb_context *ctdb, uint64_t srvid,
+ srvid_handler_fn handler,
+ void *private_data);
+int ctdb_client_remove_message_handler(struct ctdb_context *ctdb,
+ uint64_t srvid, void *private_data);
+
+/* send a ctdb message */
+int ctdb_client_send_message(struct ctdb_context *ctdb, uint32_t pnn,
+ uint64_t srvid, TDB_DATA data);
+
+struct ctdb_client_control_state *ctdb_control_send(struct ctdb_context *ctdb,
+ uint32_t destnode,
+ uint64_t srvid,
+ uint32_t opcode,
+ uint32_t flags,
+ TDB_DATA data,
+ TALLOC_CTX *mem_ctx,
+ struct timeval *timeout,
+ char **errormsg);
+int ctdb_control_recv(struct ctdb_context *ctdb,
+ struct ctdb_client_control_state *state,
+ TALLOC_CTX *mem_ctx, TDB_DATA *outdata,
+ int32_t *status, char **errormsg);
+int ctdb_control(struct ctdb_context *ctdb, uint32_t destnode, uint64_t srvid,
+ uint32_t opcode, uint32_t flags, TDB_DATA data,
+ TALLOC_CTX *mem_ctx, TDB_DATA *outdata, int32_t *status,
+ struct timeval *timeout, char **errormsg);
+
+int ctdb_ctrl_getvnnmap(struct ctdb_context *ctdb, struct timeval timeout,
+ uint32_t destnode, TALLOC_CTX *mem_ctx,
+ struct ctdb_vnn_map **vnnmap);
+
+/*
+ get the recovery mode of a remote node
+ */
+struct ctdb_client_control_state *ctdb_ctrl_getrecmode_send(
+ struct ctdb_context *ctdb,
+ TALLOC_CTX *mem_ctx,
+ struct timeval timeout,
+ uint32_t destnode);
+int ctdb_ctrl_getrecmode_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx,
+ struct ctdb_client_control_state *state,
+ uint32_t *recmode);
+int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx,
+ struct timeval timeout, uint32_t destnode,
+ uint32_t *recmode);
+
+/*
+ set the recovery mode of a remote node
+ */
+int ctdb_ctrl_setrecmode(struct ctdb_context *ctdb, struct timeval timeout,
+ uint32_t destnode, uint32_t recmode);
+
+int ctdb_ctrl_getnodemap(struct ctdb_context *ctdb, struct timeval timeout,
+ uint32_t destnode, TALLOC_CTX *mem_ctx,
+ struct ctdb_node_map_old **nodemap);
+
+int ctdb_ctrl_get_runstate(struct ctdb_context *ctdb, struct timeval timeout,
+ uint32_t destnode, uint32_t *runstate);
+
+int ctdb_ctrl_get_debuglevel(struct ctdb_context *ctdb, uint32_t destnode,
+ int32_t *level);
+
+int ctdb_ctrl_freeze(struct ctdb_context *ctdb, struct timeval timeout,
+ uint32_t destnode);
+
+int ctdb_ctrl_getpnn(struct ctdb_context *ctdb, struct timeval timeout,
+ uint32_t destnode);
+
+int ctdb_ctrl_get_public_ips_flags(struct ctdb_context *ctdb,
+ struct timeval timeout, uint32_t destnode,
+ TALLOC_CTX *mem_ctx, uint32_t flags,
+ struct ctdb_public_ip_list_old **ips);
+int ctdb_ctrl_get_public_ips(struct ctdb_context *ctdb,
+ struct timeval timeout, uint32_t destnode,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_public_ip_list_old **ips);
+
+int ctdb_ctrl_get_ifaces(struct ctdb_context *ctdb,
+ struct timeval timeout, uint32_t destnode,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_iface_list_old **ifaces);
+
+int ctdb_ctrl_get_all_tunables(struct ctdb_context *ctdb,
+ struct timeval timeout, uint32_t destnode,
+ struct ctdb_tunable_list *tunables);
+
+/*
+ set some flags
+*/
+void ctdb_set_flags(struct ctdb_context *ctdb, unsigned flags);
+
+const char *ctdb_get_socketname(struct ctdb_context *ctdb);
+
+/* return pnn of this node */
+uint32_t ctdb_get_pnn(struct ctdb_context *ctdb);
+
+typedef void (*client_async_callback)(struct ctdb_context *ctdb,
+ uint32_t node_pnn, int32_t res,
+ TDB_DATA outdata, void *callback_data);
+
+struct client_async_data {
+ enum ctdb_controls opcode;
+ bool dont_log_errors;
+ uint32_t count;
+ uint32_t fail_count;
+ client_async_callback callback;
+ client_async_callback fail_callback;
+ void *callback_data;
+};
+
+void ctdb_client_async_add(struct client_async_data *data,
+ struct ctdb_client_control_state *state);
+int ctdb_client_async_wait(struct ctdb_context *ctdb,
+ struct client_async_data *data);
+int ctdb_client_async_control(struct ctdb_context *ctdb,
+ enum ctdb_controls opcode, uint32_t *nodes,
+ uint64_t srvid, struct timeval timeout,
+ bool dont_log_errors, TDB_DATA data,
+ client_async_callback client_callback,
+ client_async_callback fail_callback,
+ void *callback_data);
+
+uint32_t *list_of_vnnmap_nodes(struct ctdb_context *ctdb,
+ struct ctdb_vnn_map *vnn_map,
+ TALLOC_CTX *mem_ctx, bool include_self);
+
+uint32_t *list_of_active_nodes(struct ctdb_context *ctdb,
+ struct ctdb_node_map_old *node_map,
+ TALLOC_CTX *mem_ctx, bool include_self);
+uint32_t *list_of_connected_nodes(struct ctdb_context *ctdb,
+ struct ctdb_node_map_old *node_map,
+ TALLOC_CTX *mem_ctx, bool include_self);
+
+/*
+ get capabilities of a remote node
+ */
+
+struct ctdb_client_control_state *ctdb_ctrl_getcapabilities_send(
+ struct ctdb_context *ctdb,
+ TALLOC_CTX *mem_ctx,
+ struct timeval timeout,
+ uint32_t destnode);
+int ctdb_ctrl_getcapabilities_recv(struct ctdb_context *ctdb,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_client_control_state *state,
+ uint32_t *capabilities);
+int ctdb_ctrl_getcapabilities(struct ctdb_context *ctdb,
+ struct timeval timeout, uint32_t destnode,
+ uint32_t *capabilities);
+
+struct ctdb_node_capabilities {
+ bool retrieved;
+ uint32_t capabilities;
+};
+
+/* Retrieve capabilities for all connected nodes. The length of the
+ * returned array can be calculated using talloc_array_length(). */
+struct ctdb_node_capabilities *ctdb_get_capabilities(
+ struct ctdb_context *ctdb,
+ TALLOC_CTX *mem_ctx,
+ struct timeval timeout,
+ struct ctdb_node_map_old *nodemap);
+
+/* Get capabilities for specified node, NULL if not found */
+uint32_t *ctdb_get_node_capabilities(struct ctdb_node_capabilities *caps,
+ uint32_t pnn);
+
+/* True if the given node has all of the required capabilities */
+bool ctdb_node_has_capabilities(struct ctdb_node_capabilities *caps,
+ uint32_t pnn, uint32_t capabilities_required);
+
+int ctdb_ctrl_recd_ping(struct ctdb_context *ctdb);
+
+int ctdb_ctrl_report_recd_lock_latency(struct ctdb_context *ctdb,
+ struct timeval timeout, double latency);
+
+int ctdb_ctrl_set_ban(struct ctdb_context *ctdb, struct timeval timeout,
+ uint32_t destnode, struct ctdb_ban_state *bantime);
+
+struct ctdb_client_control_state *ctdb_ctrl_updaterecord_send(
+ struct ctdb_context *ctdb,
+ TALLOC_CTX *mem_ctx,
+ struct timeval timeout,
+ uint32_t destnode,
+ struct ctdb_db_context *ctdb_db,
+ TDB_DATA key,
+ struct ctdb_ltdb_header *header,
+ TDB_DATA data);
+int ctdb_ctrl_updaterecord_recv(struct ctdb_context *ctdb,
+ struct ctdb_client_control_state *state);
+int ctdb_ctrl_updaterecord(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx,
+ struct timeval timeout, uint32_t destnode,
+ struct ctdb_db_context *ctdb_db, TDB_DATA key,
+ struct ctdb_ltdb_header *header, TDB_DATA data);
+
+#endif /* _CTDB_CLIENT_H */
diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h
new file mode 100644
index 0000000..8027812
--- /dev/null
+++ b/ctdb/include/ctdb_private.h
@@ -0,0 +1,1040 @@
+/*
+ ctdb database library
+
+ Copyright (C) Andrew Tridgell 2006
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CTDB_PRIVATE_H
+#define _CTDB_PRIVATE_H
+
+#include "ctdb_client.h"
+#include <sys/socket.h>
+
+#include "common/db_hash.h"
+
+/*
+ array of tcp connections
+ */
+struct ctdb_tcp_array {
+ uint32_t num;
+ struct ctdb_connection *connections;
+};
+
+/*
+ an installed ctdb remote call
+*/
+typedef int (*ctdb_fn_t)(struct ctdb_call_info *);
+
+struct ctdb_registered_call {
+ struct ctdb_registered_call *next, *prev;
+ uint32_t id;
+ ctdb_fn_t fn;
+};
+
+/*
+ check that a pnn is valid
+ */
+#define ctdb_validate_pnn(ctdb, pnn) (((uint32_t)(pnn)) < (ctdb)->num_nodes)
+
+/* used for callbacks in ctdb_control requests */
+typedef void (*ctdb_control_callback_fn_t)(struct ctdb_context *,
+ int32_t status, TDB_DATA data,
+ const char *errormsg,
+ void *private_data);
+/*
+ structure describing a connected client in the daemon
+ */
+struct ctdb_client {
+ struct ctdb_context *ctdb;
+ int fd;
+ struct ctdb_queue *queue;
+ uint32_t client_id;
+ pid_t pid;
+ struct ctdb_tcp_list *tcp_list;
+ uint32_t db_id;
+ uint32_t num_persistent_updates;
+ struct ctdb_client_notify_list *notify;
+};
+
+/*
+ state associated with one node
+*/
+struct ctdb_node {
+ struct ctdb_context *ctdb;
+ ctdb_sock_addr address;
+ const char *name; /* for debug messages */
+ void *transport_data; /* private to transport */
+ uint32_t pnn;
+ uint32_t flags;
+
+ /* used by the dead node monitoring */
+ uint32_t dead_count;
+ uint32_t rx_cnt;
+ uint32_t tx_cnt;
+
+ /* a list of controls pending to this node, so we can time them out quickly
+ if the node becomes disconnected */
+ struct daemon_control_state *pending_controls;
+};
+
+/*
+ transport specific methods
+*/
+struct ctdb_methods {
+ int (*initialise)(struct ctdb_context *); /* initialise transport structures */
+ int (*start)(struct ctdb_context *); /* start the transport */
+ int (*add_node)(struct ctdb_node *); /* setup a new node */
+ int (*connect_node)(struct ctdb_node *); /* connect to node */
+ int (*queue_pkt)(struct ctdb_node *, uint8_t *data, uint32_t length);
+ void *(*allocate_pkt)(TALLOC_CTX *mem_ctx, size_t );
+ void (*shutdown)(struct ctdb_context *); /* shutdown transport */
+ void (*restart)(struct ctdb_node *); /* stop and restart the connection */
+};
+
+/*
+ transport calls up to the ctdb layer
+*/
+struct ctdb_upcalls {
+ /* recv_pkt is called when a packet comes in */
+ void (*recv_pkt)(struct ctdb_context *, uint8_t *data, uint32_t length);
+
+ /* node_dead is called when an attempt to send to a node fails */
+ void (*node_dead)(struct ctdb_node *);
+
+ /* node_connected is called when a connection to a node is established */
+ void (*node_connected)(struct ctdb_node *);
+};
+
+/* additional data required for the daemon mode */
+struct ctdb_daemon_data {
+ int sd;
+ char *name;
+ struct ctdb_queue *queue;
+};
+
+
+#define CTDB_UPDATE_STAT(ctdb, counter, value) \
+ { \
+ if (value > ctdb->statistics.counter) { \
+ ctdb->statistics.counter = value; \
+ } \
+ if (value > ctdb->statistics_current.counter) { \
+ ctdb->statistics_current.counter = value; \
+ } \
+ }
+
+#define CTDB_INCREMENT_STAT(ctdb, counter) \
+ { \
+ ctdb->statistics.counter++; \
+ ctdb->statistics_current.counter++; \
+ }
+
+#define CTDB_DECREMENT_STAT(ctdb, counter) \
+ { \
+ if (ctdb->statistics.counter > 0) \
+ ctdb->statistics.counter--; \
+ if (ctdb->statistics_current.counter > 0) \
+ ctdb->statistics_current.counter--; \
+ }
+
+#define CTDB_INCREMENT_DB_STAT(ctdb_db, counter) \
+ { \
+ ctdb_db->statistics.counter++; \
+ }
+
+#define CTDB_DECREMENT_DB_STAT(ctdb_db, counter) \
+ { \
+ if (ctdb_db->statistics.counter > 0) \
+ ctdb_db->statistics.counter--; \
+ }
+
+#define CTDB_UPDATE_RECLOCK_LATENCY(ctdb, name, counter, value) \
+ { \
+ if (value > ctdb->statistics.counter.max) \
+ ctdb->statistics.counter.max = value; \
+ if (value > ctdb->statistics_current.counter.max) \
+ ctdb->statistics_current.counter.max = value; \
+ \
+ if (ctdb->statistics.counter.num == 0 || \
+ value < ctdb->statistics.counter.min) \
+ ctdb->statistics.counter.min = value; \
+ if (ctdb->statistics_current.counter.num == 0 || \
+ value < ctdb->statistics_current.counter.min) \
+ ctdb->statistics_current.counter.min = value; \
+ \
+ ctdb->statistics.counter.total += value; \
+ ctdb->statistics_current.counter.total += value; \
+ \
+ ctdb->statistics.counter.num++; \
+ ctdb->statistics_current.counter.num++; \
+ \
+ if (ctdb->tunable.reclock_latency_ms != 0) { \
+ if (value*1000 > ctdb->tunable.reclock_latency_ms) { \
+ DEBUG(DEBUG_ERR, \
+ ("High RECLOCK latency %fs for operation %s\n", \
+ value, name)); \
+ } \
+ } \
+ }
+
+#define CTDB_UPDATE_DB_LATENCY(ctdb_db, operation, counter, value) \
+ { \
+ if (value > ctdb_db->statistics.counter.max) \
+ ctdb_db->statistics.counter.max = value; \
+ if (ctdb_db->statistics.counter.num == 0 || \
+ value < ctdb_db->statistics.counter.min) \
+ ctdb_db->statistics.counter.min = value; \
+ \
+ ctdb_db->statistics.counter.total += value; \
+ ctdb_db->statistics.counter.num++; \
+ \
+ if (ctdb_db->ctdb->tunable.log_latency_ms != 0) { \
+ if (value*1000 > ctdb_db->ctdb->tunable.log_latency_ms) { \
+ DEBUG(DEBUG_ERR, \
+ ("High latency %.6fs for operation %s on database %s\n",\
+ value, operation, ctdb_db->db_name)); \
+ } \
+ } \
+ }
+
+#define CTDB_UPDATE_LATENCY(ctdb, db, operation, counter, t) \
+ { \
+ double l = timeval_elapsed(&t); \
+ \
+ if (l > ctdb->statistics.counter.max) \
+ ctdb->statistics.counter.max = l; \
+ if (l > ctdb->statistics_current.counter.max) \
+ ctdb->statistics_current.counter.max = l; \
+ \
+ if (ctdb->statistics.counter.num == 0 || \
+ l < ctdb->statistics.counter.min) \
+ ctdb->statistics.counter.min = l; \
+ if (ctdb->statistics_current.counter.num == 0 || \
+ l < ctdb->statistics_current.counter.min) \
+ ctdb->statistics_current.counter.min = l; \
+ \
+ ctdb->statistics.counter.total += l; \
+ ctdb->statistics_current.counter.total += l; \
+ \
+ ctdb->statistics.counter.num++; \
+ ctdb->statistics_current.counter.num++; \
+ \
+ if (ctdb->tunable.log_latency_ms != 0) { \
+ if (l*1000 > ctdb->tunable.log_latency_ms) { \
+ DEBUG(DEBUG_WARNING, \
+ ("High latency %.6fs for operation %s on database %s\n",\
+ l, operation, db->db_name)); \
+ } \
+ } \
+ }
+
+
+struct ctdb_cluster_mutex_handle;
+struct eventd_context;
+
+enum ctdb_freeze_mode {CTDB_FREEZE_NONE, CTDB_FREEZE_PENDING, CTDB_FREEZE_FROZEN};
+
+/* main state of the ctdb daemon */
+struct ctdb_context {
+ struct tevent_context *ev;
+ struct timeval ctdbd_start_time;
+ struct timeval last_recovery_started;
+ struct timeval last_recovery_finished;
+ uint32_t recovery_mode;
+ TALLOC_CTX *tickle_update_context;
+ TALLOC_CTX *keepalive_ctx;
+ TALLOC_CTX *check_public_ifaces_ctx;
+ struct ctdb_tunable_list tunable;
+ enum ctdb_freeze_mode freeze_mode;
+ struct ctdb_freeze_handle *freeze_handle;
+ bool freeze_transaction_started;
+ uint32_t freeze_transaction_id;
+ ctdb_sock_addr *address;
+ const char *name;
+ const char *db_directory;
+ const char *db_directory_persistent;
+ const char *db_directory_state;
+ struct tdb_wrap *db_persistent_health;
+ uint32_t db_persistent_startup_generation;
+ uint64_t db_persistent_check_errors;
+ uint64_t max_persistent_check_errors;
+ const char *transport;
+ const char *recovery_lock;
+ uint32_t pnn; /* our own pnn */
+ uint32_t num_nodes;
+ uint32_t num_connected;
+ unsigned flags;
+ uint32_t capabilities;
+ struct reqid_context *idr;
+ struct ctdb_node **nodes; /* array of nodes in the cluster - indexed by vnn */
+ struct ctdb_vnn *vnn; /* list of public ip addresses and interfaces */
+ struct ctdb_interface *ifaces; /* list of local interfaces */
+ char *err_msg;
+ const struct ctdb_methods *methods; /* transport methods */
+ const struct ctdb_upcalls *upcalls; /* transport upcalls */
+ void *transport_data; /* private to transport */
+ struct ctdb_db_context *db_list;
+ struct srvid_context *srv;
+ struct srvid_context *tunnels;
+ struct ctdb_daemon_data daemon;
+ struct ctdb_statistics statistics;
+ struct ctdb_statistics statistics_current;
+#define MAX_STAT_HISTORY 100
+ struct ctdb_statistics statistics_history[MAX_STAT_HISTORY];
+ struct ctdb_vnn_map *vnn_map;
+ uint32_t num_clients;
+ bool do_checkpublicip;
+ bool do_setsched;
+ const char *event_script_dir;
+ const char *notification_script;
+ pid_t ctdbd_pid;
+ pid_t recoverd_pid;
+ enum ctdb_runstate runstate;
+ struct ctdb_monitor_state *monitor;
+ int start_as_disabled;
+ int start_as_stopped;
+ bool valgrinding;
+ uint32_t *recd_ping_count;
+ TALLOC_CTX *recd_ctx; /* a context used to track recoverd monitoring events */
+ TALLOC_CTX *release_ips_ctx; /* a context used to automatically drop all IPs if we fail to recover the node */
+
+ struct eventd_context *ectx;
+
+ TALLOC_CTX *banning_ctx;
+
+ struct ctdb_vacuum_child_context *vacuumer;
+
+ /* mapping from pid to ctdb_client * */
+ struct ctdb_client_pid_list *client_pids;
+
+ /* Used to defer db attach requests while in recovery mode */
+ struct ctdb_deferred_attach_context *deferred_attach;
+
+ /* if we are a child process, do we have a domain socket to send controls on */
+ bool can_send_controls;
+
+ struct ctdb_reloadips_handle *reload_ips;
+
+ const char *nodes_file;
+ const char *public_addresses_file;
+ struct trbt_tree *child_processes;
+
+ /* Used for locking record/db/alldb */
+ struct lock_context *lock_current;
+ struct lock_context *lock_pending;
+};
+
+struct ctdb_db_hot_key {
+ uint32_t count;
+ TDB_DATA key;
+ uint32_t last_logged_count;
+};
+
+struct ctdb_db_context {
+ struct ctdb_db_context *next, *prev;
+ struct ctdb_context *ctdb;
+ uint32_t db_id;
+ uint8_t db_flags;
+ const char *db_name;
+ const char *db_path;
+ struct tdb_wrap *ltdb;
+ struct tdb_context *rottdb; /* ReadOnly tracking TDB */
+ struct ctdb_registered_call *calls; /* list of registered calls */
+ uint32_t seqnum;
+ struct tevent_timer *seqnum_update;
+ struct ctdb_traverse_local_handle *traverse;
+ struct ctdb_vacuum_handle *vacuum_handle;
+ char *unhealthy_reason;
+ int pending_requests;
+ struct revokechild_handle *revokechild_active;
+ struct ctdb_persistent_state *persistent_state;
+ struct trbt_tree *delete_queue;
+ struct trbt_tree *fetch_queue;
+ struct trbt_tree *sticky_records;
+ int (*ctdb_ltdb_store_fn)(struct ctdb_db_context *ctdb_db,
+ TDB_DATA key,
+ struct ctdb_ltdb_header *header,
+ TDB_DATA data);
+
+ /* used to track which records we are currently fetching
+ so we can avoid sending duplicate fetch requests
+ */
+ struct trbt_tree *deferred_fetch;
+ struct trbt_tree *defer_dmaster;
+
+ struct ctdb_db_statistics_old statistics;
+ struct ctdb_db_hot_key hot_keys[MAX_HOT_KEYS];
+
+ struct lock_context *lock_current;
+ struct lock_context *lock_pending;
+ unsigned int lock_num_current;
+ struct db_hash_context *lock_log;
+
+ struct ctdb_call_state *pending_calls;
+
+ enum ctdb_freeze_mode freeze_mode;
+ struct ctdb_db_freeze_handle *freeze_handle;
+ bool freeze_transaction_started;
+ uint32_t freeze_transaction_id;
+ uint32_t generation;
+
+ bool invalid_records;
+ bool push_started;
+ void *push_state;
+
+ struct hash_count_context *migratedb;
+};
+
+
+#define CTDB_NO_MEMORY(ctdb, p) do { if (!(p)) { \
+ DEBUG(0,("Out of memory for %s at %s\n", #p, __location__)); \
+ ctdb_set_error(ctdb, "Out of memory at %s:%d", __FILE__, __LINE__); \
+ return -1; }} while (0)
+
+#define CTDB_NO_MEMORY_VOID(ctdb, p) do { if (!(p)) { \
+ DEBUG(0,("Out of memory for %s at %s\n", #p, __location__)); \
+ ctdb_set_error(ctdb, "Out of memory at %s:%d", __FILE__, __LINE__); \
+ return; }} while (0)
+
+#define CTDB_NO_MEMORY_NULL(ctdb, p) do { if (!(p)) { \
+ DEBUG(0,("Out of memory for %s at %s\n", #p, __location__)); \
+ ctdb_set_error(ctdb, "Out of memory at %s:%d", __FILE__, __LINE__); \
+ return NULL; }} while (0)
+
+#define CTDB_NO_MEMORY_FATAL(ctdb, p) do { if (!(p)) { \
+ DEBUG(0,("Out of memory for %s at %s\n", #p, __location__)); \
+ ctdb_fatal(ctdb, "Out of memory in " __location__ ); \
+ }} while (0)
+
+
+enum call_state {CTDB_CALL_WAIT, CTDB_CALL_DONE, CTDB_CALL_ERROR};
+
+/*
+ state of a in-progress ctdb call
+*/
+struct ctdb_call_state {
+ struct ctdb_call_state *next, *prev;
+ enum call_state state;
+ uint32_t reqid;
+ struct ctdb_req_call_old *c;
+ struct ctdb_db_context *ctdb_db;
+ const char *errmsg;
+ struct ctdb_call *call;
+ uint32_t generation;
+ struct {
+ void (*fn)(struct ctdb_call_state *);
+ void *private_data;
+ } async;
+};
+
+/* internal prototypes */
+
+#define CHECK_CONTROL_DATA_SIZE(size) do { \
+ if (indata.dsize != size) { \
+ DEBUG(0,(__location__ " Invalid data size in opcode %u. Got %u expected %u\n", \
+ opcode, (unsigned)indata.dsize, (unsigned)size)); \
+ return -1; \
+ } \
+ } while (0)
+
+#define CHECK_CONTROL_MIN_DATA_SIZE(size) do { \
+ if (indata.dsize < size) { \
+ DEBUG(0,(__location__ " Invalid data size in opcode %u. Got %u expected >= %u\n", \
+ opcode, (unsigned)indata.dsize, (unsigned)size)); \
+ return -1; \
+ } \
+ } while (0)
+
+/*
+ state of a in-progress ctdb call in client
+*/
+struct ctdb_client_call_state {
+ enum call_state state;
+ uint32_t reqid;
+ struct ctdb_db_context *ctdb_db;
+ struct ctdb_call *call;
+ struct {
+ void (*fn)(struct ctdb_client_call_state *);
+ void *private_data;
+ } async;
+};
+
+extern int script_log_level;
+extern bool fast_start;
+extern const char *ctdbd_pidfile;
+
+typedef void (*deferred_requeue_fn)(void *call_context, struct ctdb_req_header *hdr);
+
+
+/* from tcp/ and ib/ */
+
+int ctdb_tcp_init(struct ctdb_context *ctdb);
+int ctdb_ibw_init(struct ctdb_context *ctdb);
+
+/* from ctdb_banning.c */
+
+int32_t ctdb_control_set_ban_state(struct ctdb_context *ctdb, TDB_DATA indata);
+int32_t ctdb_control_get_ban_state(struct ctdb_context *ctdb, TDB_DATA *outdata);
+void ctdb_ban_self(struct ctdb_context *ctdb);
+
+/* from ctdb_call.c */
+
+struct ctdb_db_context *find_ctdb_db(struct ctdb_context *ctdb, uint32_t id);
+
+void ctdb_request_dmaster(struct ctdb_context *ctdb,
+ struct ctdb_req_header *hdr);
+void ctdb_reply_dmaster(struct ctdb_context *ctdb,
+ struct ctdb_req_header *hdr);
+void ctdb_request_call(struct ctdb_context *ctdb, struct ctdb_req_header *hdr);
+void ctdb_reply_call(struct ctdb_context *ctdb, struct ctdb_req_header *hdr);
+void ctdb_reply_error(struct ctdb_context *ctdb, struct ctdb_req_header *hdr);
+
+void ctdb_call_resend_db(struct ctdb_db_context *ctdb);
+void ctdb_call_resend_all(struct ctdb_context *ctdb);
+
+struct ctdb_call_state *ctdb_call_local_send(struct ctdb_db_context *ctdb_db,
+ struct ctdb_call *call,
+ struct ctdb_ltdb_header *header,
+ TDB_DATA *data);
+
+struct ctdb_call_state *ctdb_daemon_call_send_remote(
+ struct ctdb_db_context *ctdb_db,
+ struct ctdb_call *call,
+ struct ctdb_ltdb_header *header);
+int ctdb_daemon_call_recv(struct ctdb_call_state *state,
+ struct ctdb_call *call);
+
+int ctdb_start_revoke_ro_record(struct ctdb_context *ctdb,
+ struct ctdb_db_context *ctdb_db,
+ TDB_DATA key, struct ctdb_ltdb_header *header,
+ TDB_DATA data);
+
+int ctdb_add_revoke_deferred_call(struct ctdb_context *ctdb,
+ struct ctdb_db_context *ctdb_db,
+ TDB_DATA key, struct ctdb_req_header *hdr,
+ deferred_requeue_fn fn, void *call_context);
+
+int ctdb_migration_init(struct ctdb_db_context *ctdb_db);
+
+/* from server/ctdb_control.c */
+
+int32_t ctdb_dump_memory(struct ctdb_context *ctdb, TDB_DATA *outdata);
+
+void ctdb_request_control_reply(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA *outdata, int32_t status,
+ const char *errormsg);
+
+void ctdb_request_control(struct ctdb_context *ctdb,
+ struct ctdb_req_header *hdr);
+void ctdb_reply_control(struct ctdb_context *ctdb,
+ struct ctdb_req_header *hdr);
+
+int ctdb_daemon_send_control(struct ctdb_context *ctdb, uint32_t destnode,
+ uint64_t srvid, uint32_t opcode,
+ uint32_t client_id, uint32_t flags,
+ TDB_DATA data,
+ ctdb_control_callback_fn_t callback,
+ void *private_data);
+
+/* from server/ctdb_daemon.c */
+
+int daemon_register_message_handler(struct ctdb_context *ctdb,
+ uint32_t client_id, uint64_t srvid);
+int daemon_deregister_message_handler(struct ctdb_context *ctdb,
+ uint32_t client_id, uint64_t srvid);
+
+void daemon_tunnel_handler(uint64_t tunnel_id, TDB_DATA data,
+ void *private_data);
+
+struct ctdb_node *ctdb_find_node(struct ctdb_context *ctdb, uint32_t pnn);
+
+int ctdb_start_daemon(struct ctdb_context *ctdb,
+ bool interactive,
+ bool test_mode_enabled);
+
+struct ctdb_req_header *_ctdb_transport_allocate(struct ctdb_context *ctdb,
+ TALLOC_CTX *mem_ctx,
+ enum ctdb_operation operation,
+ size_t length, size_t slength,
+ const char *type);
+
+#define ctdb_transport_allocate(ctdb, mem_ctx, operation, length, type) \
+ (type *)_ctdb_transport_allocate(ctdb, mem_ctx, operation, length, \
+ sizeof(type), #type)
+
+void ctdb_daemon_cancel_controls(struct ctdb_context *ctdb,
+ struct ctdb_node *node);
+
+int ctdb_daemon_set_call(struct ctdb_context *ctdb, uint32_t db_id,
+ ctdb_fn_t fn, int id);
+
+int ctdb_daemon_send_message(struct ctdb_context *ctdb, uint32_t pnn,
+ uint64_t srvid, TDB_DATA data);
+
+int32_t ctdb_control_register_notify(struct ctdb_context *ctdb,
+ uint32_t client_id, TDB_DATA indata);
+int32_t ctdb_control_deregister_notify(struct ctdb_context *ctdb,
+ uint32_t client_id, TDB_DATA indata);
+
+struct ctdb_client *ctdb_find_client_by_pid(struct ctdb_context *ctdb,
+ pid_t pid);
+
+int32_t ctdb_control_process_exists(struct ctdb_context *ctdb, pid_t pid);
+int32_t ctdb_control_check_pid_srvid(struct ctdb_context *ctdb,
+ TDB_DATA indata);
+
+int ctdb_control_getnodesfile(struct ctdb_context *ctdb, uint32_t opcode,
+ TDB_DATA indata, TDB_DATA *outdata);
+
+void ctdb_shutdown_sequence(struct ctdb_context *ctdb, int exit_code);
+
+int switch_from_server_to_client(struct ctdb_context *ctdb);
+
+/* From server/ctdb_fork.c */
+
+void ctdb_track_child(struct ctdb_context *ctdb, pid_t pid);
+
+pid_t ctdb_fork(struct ctdb_context *ctdb);
+pid_t ctdb_vfork_exec(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ const char *helper, int helper_argc,
+ const char **helper_argv);
+
+struct tevent_signal *ctdb_init_sigchld(struct ctdb_context *ctdb);
+
+int ctdb_kill(struct ctdb_context *ctdb, pid_t pid, int signum);
+
+/* from server/ctdb_freeze.c */
+
+int32_t ctdb_control_db_freeze(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ uint32_t db_id, bool *async_reply);
+int32_t ctdb_control_db_thaw(struct ctdb_context *ctdb, uint32_t db_id);
+
+int32_t ctdb_control_freeze(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c, bool *async_reply);
+int32_t ctdb_control_thaw(struct ctdb_context *ctdb, bool check_recmode);
+
+bool ctdb_blocking_freeze(struct ctdb_context *ctdb);
+
+int32_t ctdb_control_db_transaction_start(struct ctdb_context *ctdb,
+ TDB_DATA indata);
+int32_t ctdb_control_db_transaction_cancel(struct ctdb_context *ctdb,
+ TDB_DATA indata);
+int32_t ctdb_control_db_transaction_commit(struct ctdb_context *ctdb,
+ TDB_DATA indata);
+
+int32_t ctdb_control_wipe_database(struct ctdb_context *ctdb, TDB_DATA indata);
+
+bool ctdb_db_frozen(struct ctdb_db_context *ctdb_db);
+bool ctdb_db_all_frozen(struct ctdb_context *ctdb);
+bool ctdb_db_allow_access(struct ctdb_db_context *ctdb_db);
+
+/* from server/ctdb_keepalive.c */
+
+void ctdb_start_keepalive(struct ctdb_context *ctdb);
+void ctdb_stop_keepalive(struct ctdb_context *ctdb);
+
+void ctdb_request_keepalive(struct ctdb_context *ctdb,
+ struct ctdb_req_header *hdr);
+
+/* from server/ctdb_lock.c */
+
+struct lock_request;
+
+typedef int (*ctdb_db_handler_t)(struct ctdb_db_context *ctdb_db,
+ void *private_data);
+
+int ctdb_db_iterator(struct ctdb_context *ctdb, ctdb_db_handler_t handler,
+ void *private_data);
+
+int ctdb_lockdb_mark(struct ctdb_db_context *ctdb_db);
+
+int ctdb_lockdb_unmark(struct ctdb_db_context *ctdb_db);
+
+struct lock_request *ctdb_lock_record(TALLOC_CTX *mem_ctx,
+ struct ctdb_db_context *ctdb_db,
+ TDB_DATA key,
+ bool auto_mark,
+ void (*callback)(void *, bool),
+ void *private_data);
+
+struct lock_request *ctdb_lock_db(TALLOC_CTX *mem_ctx,
+ struct ctdb_db_context *ctdb_db,
+ bool auto_mark,
+ void (*callback)(void *, bool),
+ void *private_data);
+
+/* from ctdb_logging.c */
+
+bool ctdb_logging_init(TALLOC_CTX *mem_ctx, const char *logging,
+ const char *debug_level);
+
+int ctdb_set_child_logging(struct ctdb_context *ctdb);
+
+/* from ctdb_logging_file.c */
+
+void ctdb_log_init_file(void);
+
+/* from ctdb_logging_syslog.c */
+
+void ctdb_log_init_syslog(void);
+
+/* from ctdb_ltdb_server.c */
+
+int ctdb_ltdb_lock_requeue(struct ctdb_db_context *ctdb_db,
+ TDB_DATA key, struct ctdb_req_header *hdr,
+ void (*recv_pkt)(void *, struct ctdb_req_header *),
+ void *recv_context, bool ignore_generation);
+
+int ctdb_ltdb_lock_fetch_requeue(struct ctdb_db_context *ctdb_db,
+ TDB_DATA key, struct ctdb_ltdb_header *header,
+ struct ctdb_req_header *hdr, TDB_DATA *data,
+ void (*recv_pkt)(void *, struct ctdb_req_header *),
+ void *recv_context, bool ignore_generation);
+
+int ctdb_load_persistent_health(struct ctdb_context *ctdb,
+ struct ctdb_db_context *ctdb_db);
+int ctdb_update_persistent_health(struct ctdb_context *ctdb,
+ struct ctdb_db_context *ctdb_db,
+ const char *reason,/* NULL means healthy */
+ unsigned int num_healthy_nodes);
+int ctdb_recheck_persistent_health(struct ctdb_context *ctdb);
+
+int32_t ctdb_control_db_set_healthy(struct ctdb_context *ctdb,
+ TDB_DATA indata);
+int32_t ctdb_control_db_get_health(struct ctdb_context *ctdb,
+ TDB_DATA indata, TDB_DATA *outdata);
+
+int ctdb_set_db_readonly(struct ctdb_context *ctdb,
+ struct ctdb_db_context *ctdb_db);
+
+int ctdb_process_deferred_attach(struct ctdb_context *ctdb);
+
+int32_t ctdb_control_db_attach(struct ctdb_context *ctdb,
+ TDB_DATA indata,
+ TDB_DATA *outdata,
+ uint8_t db_flags,
+ uint32_t srcnode,
+ uint32_t client_id,
+ struct ctdb_req_control_old *c,
+ bool *async_reply);
+int32_t ctdb_control_db_detach(struct ctdb_context *ctdb, TDB_DATA indata,
+ uint32_t client_id);
+
+int ctdb_attach_databases(struct ctdb_context *ctdb);
+
+int32_t ctdb_ltdb_update_seqnum(struct ctdb_context *ctdb, uint32_t db_id,
+ uint32_t srcnode);
+int32_t ctdb_ltdb_enable_seqnum(struct ctdb_context *ctdb, uint32_t db_id);
+
+int ctdb_set_db_sticky(struct ctdb_context *ctdb,
+ struct ctdb_db_context *ctdb_db);
+
+void ctdb_db_statistics_reset(struct ctdb_db_context *ctdb_db);
+
+int32_t ctdb_control_get_db_statistics(struct ctdb_context *ctdb,
+ uint32_t db_id, TDB_DATA *outdata);
+
+/* from ctdb_monitor.c */
+
+void ctdb_run_notification_script(struct ctdb_context *ctdb, const char *event);
+
+void ctdb_stop_monitoring(struct ctdb_context *ctdb);
+
+void ctdb_wait_for_first_recovery(struct ctdb_context *ctdb);
+
+int32_t ctdb_control_modflags(struct ctdb_context *ctdb, TDB_DATA indata);
+
+/* from ctdb_persistent.c */
+
+void ctdb_persistent_finish_trans3_commits(struct ctdb_context *ctdb);
+
+int32_t ctdb_control_trans3_commit(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA recdata, bool *async_reply);
+
+int32_t ctdb_control_start_persistent_update(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA recdata);
+int32_t ctdb_control_cancel_persistent_update(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA recdata);
+
+int32_t ctdb_control_get_db_seqnum(struct ctdb_context *ctdb,
+ TDB_DATA indata, TDB_DATA *outdata);
+
+/* from ctdb_recover.c */
+
+int ctdb_control_getvnnmap(struct ctdb_context *ctdb, uint32_t opcode,
+ TDB_DATA indata, TDB_DATA *outdata);
+int ctdb_control_setvnnmap(struct ctdb_context *ctdb, uint32_t opcode,
+ TDB_DATA indata, TDB_DATA *outdata);
+
+int ctdb_control_getdbmap(struct ctdb_context *ctdb, uint32_t opcode,
+ TDB_DATA indata, TDB_DATA *outdata);
+int ctdb_control_getnodemap(struct ctdb_context *ctdb, uint32_t opcode,
+ TDB_DATA indata, TDB_DATA *outdata);
+
+int ctdb_control_reload_nodes_file(struct ctdb_context *ctdb, uint32_t opcode);
+
+int32_t ctdb_control_db_pull(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA indata, TDB_DATA *outdata);
+int32_t ctdb_control_db_push_start(struct ctdb_context *ctdb,
+ TDB_DATA indata);
+int32_t ctdb_control_db_push_confirm(struct ctdb_context *ctdb,
+ TDB_DATA indata, TDB_DATA *outdata);
+
+int ctdb_deferred_drop_all_ips(struct ctdb_context *ctdb);
+
+int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA indata, bool *async_reply,
+ const char **errormsg);
+
+int32_t ctdb_control_end_recovery(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ bool *async_reply);
+int32_t ctdb_control_start_recovery(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ bool *async_reply);
+
+int32_t ctdb_control_try_delete_records(struct ctdb_context *ctdb,
+ TDB_DATA indata, TDB_DATA *outdata);
+
+int32_t ctdb_control_get_capabilities(struct ctdb_context *ctdb,
+ TDB_DATA *outdata);
+
+int32_t ctdb_control_recd_ping(struct ctdb_context *ctdb);
+
+void ctdb_node_become_inactive(struct ctdb_context *ctdb);
+
+int32_t ctdb_control_stop_node(struct ctdb_context *ctdb);
+int32_t ctdb_control_continue_node(struct ctdb_context *ctdb);
+
+/* from ctdb_recoverd.c */
+
+int ctdb_start_recoverd(struct ctdb_context *ctdb);
+void ctdb_stop_recoverd(struct ctdb_context *ctdb);
+
+/* from ctdb_server.c */
+
+int ctdb_set_transport(struct ctdb_context *ctdb, const char *transport);
+
+struct ctdb_node *ctdb_ip_to_node(struct ctdb_context *ctdb,
+ const ctdb_sock_addr *nodeip);
+uint32_t ctdb_ip_to_pnn(struct ctdb_context *ctdb,
+ const ctdb_sock_addr *nodeip);
+
+void ctdb_load_nodes_file(struct ctdb_context *ctdb);
+
+int ctdb_set_address(struct ctdb_context *ctdb, const char *address);
+
+uint32_t ctdb_get_num_active_nodes(struct ctdb_context *ctdb);
+
+void ctdb_input_pkt(struct ctdb_context *ctdb, struct ctdb_req_header *);
+
+void ctdb_node_dead(struct ctdb_node *node);
+void ctdb_node_connected(struct ctdb_node *node);
+
+void ctdb_queue_packet(struct ctdb_context *ctdb, struct ctdb_req_header *hdr);
+void ctdb_queue_packet_opcode(struct ctdb_context *ctdb,
+ struct ctdb_req_header *hdr, unsigned opcode);
+
+/* from ctdb_serverids.c */
+
+int32_t ctdb_control_register_server_id(struct ctdb_context *ctdb,
+ uint32_t client_id, TDB_DATA indata);
+int32_t ctdb_control_check_server_id(struct ctdb_context *ctdb,
+ TDB_DATA indata);
+int32_t ctdb_control_unregister_server_id(struct ctdb_context *ctdb,
+ TDB_DATA indata);
+int32_t ctdb_control_get_server_id_list(struct ctdb_context *ctdb,
+ TDB_DATA *outdata);
+
+/* from ctdb_statistics.c */
+
+int ctdb_statistics_init(struct ctdb_context *ctdb);
+
+int32_t ctdb_control_get_stat_history(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA *outdata);
+
+/* from ctdb_takeover.c */
+
+int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA indata,
+ bool *async_reply);
+int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA indata,
+ bool *async_reply);
+int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ bool *async_reply);
+
+int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses);
+
+int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
+ TDB_DATA indata);
+int32_t ctdb_control_tcp_client_disconnected(struct ctdb_context *ctdb,
+ uint32_t client_id,
+ TDB_DATA indata);
+int32_t ctdb_control_tcp_client_passed(struct ctdb_context *ctdb,
+ uint32_t client_id,
+ TDB_DATA indata);
+int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata,
+ bool tcp_update_needed);
+int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata);
+int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn);
+
+void ctdb_takeover_client_destructor_hook(struct ctdb_client *client);
+
+void ctdb_release_all_ips(struct ctdb_context *ctdb);
+
+int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA *outdata);
+int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA indata, TDB_DATA *outdata);
+
+int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA *outdata);
+int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA indata);
+
+int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb,
+ TDB_DATA indata);
+int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb,
+ TDB_DATA indata, TDB_DATA *outdata);
+
+void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb);
+
+int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb,
+ TDB_DATA indata);
+
+int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb,
+ TDB_DATA indata);
+int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb,
+ TDB_DATA recdata);
+
+int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ bool *async_reply);
+
+/* from ctdb_traverse.c */
+
+int32_t ctdb_control_traverse_all_ext(struct ctdb_context *ctdb,
+ TDB_DATA data, TDB_DATA *outdata);
+int32_t ctdb_control_traverse_all(struct ctdb_context *ctdb,
+ TDB_DATA data, TDB_DATA *outdata);
+int32_t ctdb_control_traverse_data(struct ctdb_context *ctdb,
+ TDB_DATA data, TDB_DATA *outdata);
+int32_t ctdb_control_traverse_kill(struct ctdb_context *ctdb, TDB_DATA indata,
+ TDB_DATA *outdata, uint32_t srcnode);
+
+int32_t ctdb_control_traverse_start_ext(struct ctdb_context *ctdb,
+ TDB_DATA indata, TDB_DATA *outdata,
+ uint32_t srcnode, uint32_t client_id);
+int32_t ctdb_control_traverse_start(struct ctdb_context *ctdb,
+ TDB_DATA indata, TDB_DATA *outdata,
+ uint32_t srcnode, uint32_t client_id);
+
+/* from ctdb_tunables.c */
+
+void ctdb_tunables_set_defaults(struct ctdb_context *ctdb);
+
+int32_t ctdb_control_get_tunable(struct ctdb_context *ctdb, TDB_DATA indata,
+ TDB_DATA *outdata);
+int32_t ctdb_control_set_tunable(struct ctdb_context *ctdb, TDB_DATA indata);
+int32_t ctdb_control_list_tunables(struct ctdb_context *ctdb,
+ TDB_DATA *outdata);
+bool ctdb_tunables_load(struct ctdb_context *ctdb);
+
+/* from ctdb_tunnel.c */
+
+int32_t ctdb_control_tunnel_register(struct ctdb_context *ctdb,
+ uint32_t client_id, uint64_t tunnel_id);
+int32_t ctdb_control_tunnel_deregister(struct ctdb_context *ctdb,
+ uint32_t client_id, uint64_t tunnel_id);
+
+int ctdb_daemon_send_tunnel(struct ctdb_context *ctdb, uint32_t destnode,
+ uint64_t tunnel_id, uint32_t client_id,
+ TDB_DATA data);
+
+void ctdb_request_tunnel(struct ctdb_context *ctdb,
+ struct ctdb_req_header *hdr);
+
+/* from ctdb_update_record.c */
+
+int32_t ctdb_control_update_record(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA recdata, bool *async_reply);
+
+/* from ctdb_uptime.c */
+
+int32_t ctdb_control_uptime(struct ctdb_context *ctdb, TDB_DATA *outdata);
+
+/* from ctdb_vacuum.c */
+
+int32_t ctdb_control_db_vacuum(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA indata,
+ bool *async_reply);
+
+void ctdb_stop_vacuuming(struct ctdb_context *ctdb);
+int ctdb_vacuum_init(struct ctdb_db_context *ctdb_db);
+
+int32_t ctdb_control_schedule_for_deletion(struct ctdb_context *ctdb,
+ TDB_DATA indata);
+int32_t ctdb_local_schedule_for_deletion(struct ctdb_db_context *ctdb_db,
+ const struct ctdb_ltdb_header *hdr,
+ TDB_DATA key);
+
+void ctdb_local_remove_from_delete_queue(struct ctdb_db_context *ctdb_db,
+ const struct ctdb_ltdb_header *hdr,
+ const TDB_DATA key);
+
+int32_t ctdb_control_vacuum_fetch(struct ctdb_context *ctdb, TDB_DATA indata);
+
+/* from eventscript.c */
+
+int ctdb_start_eventd(struct ctdb_context *ctdb);
+void ctdb_stop_eventd(struct ctdb_context *ctdb);
+
+int ctdb_event_script_callback(struct ctdb_context *ctdb,
+ TALLOC_CTX *mem_ctx,
+ void (*callback)(struct ctdb_context *,
+ int, void *),
+ void *private_data,
+ enum ctdb_event call,
+ const char *fmt, ...) PRINTF_ATTRIBUTE(6,7);
+
+int ctdb_event_script_args(struct ctdb_context *ctdb,
+ enum ctdb_event call,
+ const char *fmt, ...) PRINTF_ATTRIBUTE(3,4);
+
+int ctdb_event_script(struct ctdb_context *ctdb,
+ enum ctdb_event call);
+
+void ctdb_event_reopen_logs(struct ctdb_context *ctdb);
+
+#endif
diff --git a/ctdb/include/ctdb_protocol.h b/ctdb/include/ctdb_protocol.h
new file mode 100644
index 0000000..31af420
--- /dev/null
+++ b/ctdb/include/ctdb_protocol.h
@@ -0,0 +1,301 @@
+/*
+ ctdb database library
+
+ Copyright (C) Andrew Tridgell 2006
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CTDB_PROTOCOL_H
+#define _CTDB_PROTOCOL_H
+
+#include <sys/socket.h>
+#include "protocol/protocol.h"
+
+/* define ctdb port number */
+#define CTDB_PORT 4379
+
+/* we must align packets to ensure ctdb works on all architectures (eg. sparc) */
+#define CTDB_DS_ALIGNMENT 8
+
+/*
+ structure passed to a ctdb call backend function
+*/
+struct ctdb_call_info {
+ TDB_DATA key; /* record key */
+ struct ctdb_ltdb_header *header;
+ TDB_DATA record_data; /* current data in the record */
+ TDB_DATA *new_data; /* optionally updated record data */
+ TDB_DATA *call_data; /* optionally passed from caller */
+ TDB_DATA *reply_data; /* optionally returned by function */
+ uint32_t status; /* optional reply status - defaults to zero */
+};
+
+/*
+ ctdb flags
+*/
+#define CTDB_FLAG_TORTURE (1<<1)
+
+struct ctdb_script_list_old {
+ uint32_t num_scripts;
+ struct ctdb_script scripts[1];
+};
+
+/* Mapping from enum to names. */
+extern const char *ctdb_eventscript_call_names[];
+
+/*
+ packet structures
+*/
+struct ctdb_req_call_old {
+ struct ctdb_req_header hdr;
+ uint32_t flags;
+ uint32_t db_id;
+ uint32_t callid;
+ uint32_t hopcount;
+ uint32_t keylen;
+ uint32_t calldatalen;
+ uint8_t data[1]; /* key[] followed by calldata[] */
+};
+
+struct ctdb_reply_call_old {
+ struct ctdb_req_header hdr;
+ uint32_t status;
+ uint32_t datalen;
+ uint8_t data[1];
+};
+
+struct ctdb_reply_error_old {
+ struct ctdb_req_header hdr;
+ uint32_t status;
+ uint32_t msglen;
+ uint8_t msg[1];
+};
+
+struct ctdb_req_dmaster_old {
+ struct ctdb_req_header hdr;
+ uint32_t db_id;
+ uint64_t rsn;
+ uint32_t dmaster;
+ uint32_t keylen;
+ uint32_t datalen;
+ uint8_t data[1];
+};
+
+struct ctdb_reply_dmaster_old {
+ struct ctdb_req_header hdr;
+ uint32_t db_id;
+ uint64_t rsn;
+ uint32_t keylen;
+ uint32_t datalen;
+ uint8_t data[1];
+};
+
+struct ctdb_req_message_old {
+ struct ctdb_req_header hdr;
+ uint64_t srvid;
+ uint32_t datalen;
+ uint8_t data[1];
+};
+
+struct ctdb_req_control_old {
+ struct ctdb_req_header hdr;
+ uint32_t opcode;
+ uint32_t pad;
+ uint64_t srvid;
+ uint32_t client_id;
+ uint32_t flags;
+ uint32_t datalen;
+ uint8_t data[1];
+};
+
+struct ctdb_reply_control_old {
+ struct ctdb_req_header hdr;
+ int32_t status;
+ uint32_t datalen;
+ uint32_t errorlen;
+ uint8_t data[1];
+};
+
+struct ctdb_req_keepalive_old {
+ struct ctdb_req_header hdr;
+ uint32_t version;
+ uint32_t uptime;
+};
+
+struct ctdb_req_tunnel_old {
+ struct ctdb_req_header hdr;
+ uint64_t tunnel_id;
+ uint32_t flags;
+ uint32_t datalen;
+ uint8_t data[1];
+};
+
+/*
+ Structure used for a nodemap.
+ The nodemap is the structure containing a list of all nodes
+ known to the cluster and their associated flags.
+*/
+struct ctdb_node_map_old {
+ uint32_t num;
+ struct ctdb_node_and_flags nodes[1];
+};
+
+struct ctdb_public_ip_list_old {
+ uint32_t num;
+ struct ctdb_public_ip ips[1];
+};
+
+/*
+ structure used to pass record data between the child and parent
+ */
+struct ctdb_rec_data_old {
+ uint32_t length;
+ uint32_t reqid;
+ uint32_t keylen;
+ uint32_t datalen;
+ uint8_t data[1];
+};
+
+/*
+ * wire format for statistics history
+ */
+struct ctdb_statistics_list_old {
+ uint32_t num;
+ struct ctdb_statistics stats[1];
+};
+
+/*
+ * db statistics
+ */
+struct ctdb_db_statistics_old {
+ struct {
+ uint32_t num_calls;
+ uint32_t num_current;
+ uint32_t num_pending;
+ uint32_t num_failed;
+ struct ctdb_latency_counter latency;
+ uint32_t buckets[MAX_COUNT_BUCKETS];
+ } locks;
+ struct {
+ struct ctdb_latency_counter latency;
+ } vacuum;
+ uint32_t db_ro_delegations;
+ uint32_t db_ro_revokes;
+ uint32_t hop_count_bucket[MAX_COUNT_BUCKETS];
+ uint32_t num_hot_keys;
+ struct {
+ uint32_t count;
+ TDB_DATA key;
+ } hot_keys[MAX_HOT_KEYS];
+ char hot_keys_wire[1];
+};
+
+/*
+ a wire representation of the vnn map
+ */
+struct ctdb_vnn_map_wire {
+ uint32_t generation;
+ uint32_t size;
+ uint32_t map[1];
+};
+
+struct ctdb_notify_data_old {
+ uint64_t srvid;
+ uint32_t len;
+ uint8_t notify_data[1];
+};
+
+/* table that contains a list of all dbids on a node
+ */
+struct ctdb_dbid_map_old {
+ uint32_t num;
+ struct ctdb_dbid dbs[1];
+};
+
+/* the list of tcp tickles used by get/set tcp tickle list */
+struct ctdb_tickle_list_old {
+ ctdb_sock_addr addr;
+ uint32_t num;
+ struct ctdb_connection connections[1];
+};
+
+/*
+ struct holding a ctdb_sock_addr and an interface name,
+ used to add/remove public addresses and gratuitous arp
+ */
+struct ctdb_addr_info_old {
+ ctdb_sock_addr addr;
+ uint32_t mask;
+ uint32_t len;
+ char iface[1];
+};
+
+/* structure used for sending lists of records */
+struct ctdb_marshall_buffer {
+ uint32_t db_id;
+ uint32_t count;
+ uint8_t data[1];
+};
+
+/*
+ structure for setting a tunable
+ */
+struct ctdb_tunable_old {
+ uint32_t value;
+ uint32_t length;
+ uint8_t name[1];
+};
+
+/*
+ structure for getting a tunable
+ */
+struct ctdb_control_get_tunable {
+ uint32_t length;
+ uint8_t name[1];
+};
+
+/*
+ structure for listing tunables
+ */
+struct ctdb_control_list_tunable {
+ uint32_t length;
+ /* returns a : separated list of tunable names */
+ uint8_t data[1];
+};
+
+
+struct ctdb_public_ip_info_old {
+ struct ctdb_public_ip ip;
+ uint32_t active_idx;
+ uint32_t num;
+ struct ctdb_iface ifaces[1];
+};
+
+struct ctdb_iface_list_old {
+ uint32_t num;
+ struct ctdb_iface ifaces[1];
+};
+
+/**
+ * structure to pass to a schedule_for_deletion_control
+ */
+struct ctdb_control_schedule_for_deletion {
+ uint32_t db_id;
+ struct ctdb_ltdb_header hdr;
+ uint32_t keylen;
+ uint8_t key[1]; /* key[] */
+};
+
+#endif
diff --git a/ctdb/include/public/README.txt b/ctdb/include/public/README.txt
new file mode 100644
index 0000000..534e9b7
--- /dev/null
+++ b/ctdb/include/public/README.txt
@@ -0,0 +1,6 @@
+DO NOT REMOVE
+
+This is a placeholder to allow for build rules putting public headers
+in this directory. Using this directory allows us to ensure that our
+public headers will work with external applications that make use of
+Samba libraries
diff --git a/ctdb/include/public/util/README.txt b/ctdb/include/public/util/README.txt
new file mode 100644
index 0000000..534e9b7
--- /dev/null
+++ b/ctdb/include/public/util/README.txt
@@ -0,0 +1,6 @@
+DO NOT REMOVE
+
+This is a placeholder to allow for build rules putting public headers
+in this directory. Using this directory allows us to ensure that our
+public headers will work with external applications that make use of
+Samba libraries
diff --git a/ctdb/protocol/protocol.h b/ctdb/protocol/protocol.h
new file mode 100644
index 0000000..009a0df
--- /dev/null
+++ b/ctdb/protocol/protocol.h
@@ -0,0 +1,1079 @@
+/*
+ CTDB protocol marshalling
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_PROTOCOL_H__
+#define __CTDB_PROTOCOL_H__
+
+#include <tdb.h>
+
+#define CTDB_MAGIC 0x43544442 /* CTDB */
+#define CTDB_PROTOCOL 1
+
+enum ctdb_operation {
+ CTDB_REQ_CALL = 0,
+ CTDB_REPLY_CALL = 1,
+ CTDB_REQ_DMASTER = 2,
+ CTDB_REPLY_DMASTER = 3,
+ CTDB_REPLY_ERROR = 4,
+ CTDB_REQ_MESSAGE = 5,
+ /* #6 removed */
+ CTDB_REQ_CONTROL = 7,
+ CTDB_REPLY_CONTROL = 8,
+ CTDB_REQ_KEEPALIVE = 9,
+ CTDB_REQ_TUNNEL = 10,
+};
+
+/* used on the domain socket, send a pdu to the local daemon */
+#define CTDB_CURRENT_NODE 0xF0000001
+/* send a broadcast to all nodes in the cluster, active or not */
+#define CTDB_BROADCAST_ALL 0xF0000002
+/* send a broadcast to all nodes in the current vnn map */
+#define CTDB_BROADCAST_ACTIVE 0xF0000003
+/* send a broadcast to all connected nodes */
+#define CTDB_BROADCAST_CONNECTED 0xF0000004
+/* send a broadcast to selected connected nodes */
+#define CTDB_MULTICAST 0xF0000005
+
+#define CTDB_UNKNOWN_PNN 0xFFFFFFFF
+
+/* the key used to store persistent db sequence number */
+#define CTDB_DB_SEQNUM_KEY "__db_sequence_number__"
+
+struct ctdb_req_header {
+ uint32_t length;
+ uint32_t ctdb_magic;
+ uint32_t ctdb_version;
+ uint32_t generation;
+ uint32_t operation;
+ uint32_t destnode;
+ uint32_t srcnode;
+ uint32_t reqid;
+};
+
+struct ctdb_req_call {
+ uint32_t flags;
+ uint32_t db_id;
+ uint32_t callid;
+ uint32_t hopcount;
+ TDB_DATA key;
+ TDB_DATA calldata;
+};
+
+struct ctdb_reply_call {
+ int32_t status;
+ TDB_DATA data;
+};
+
+struct ctdb_reply_error {
+ int32_t status;
+ TDB_DATA msg;
+};
+
+struct ctdb_req_dmaster {
+ uint32_t db_id;
+ uint64_t rsn;
+ uint32_t dmaster;
+ TDB_DATA key;
+ TDB_DATA data;
+};
+
+struct ctdb_reply_dmaster {
+ uint32_t db_id;
+ uint64_t rsn;
+ TDB_DATA key;
+ TDB_DATA data;
+};
+
+#define CTDB_NULL_FUNC 0xFF000001
+#define CTDB_FETCH_FUNC 0xFF000002
+#define CTDB_FETCH_WITH_HEADER_FUNC 0xFF000003
+
+struct ctdb_call {
+ int call_id;
+ TDB_DATA key;
+ TDB_DATA call_data;
+ TDB_DATA reply_data;
+ uint32_t status;
+#define CTDB_IMMEDIATE_MIGRATION 0x00000001
+#define CTDB_CALL_FLAG_VACUUM_MIGRATION 0x00000002
+#define CTDB_WANT_READONLY 0x00000004
+ uint32_t flags;
+};
+
+/* SRVID to catch all messages */
+#define CTDB_SRVID_ALL (~(uint64_t)0)
+
+/* SRVID prefix used during recovery for pulling and pushing databases */
+#define CTDB_SRVID_RECOVERY 0xF001000000000000LL
+
+/* SRVID to assign of banning credits */
+#define CTDB_SRVID_BANNING 0xF002000000000000LL
+
+/* SRVID to inform of election data */
+#define CTDB_SRVID_ELECTION 0xF100000000000000LL
+
+/* SRVID for broadcasting leader */
+#define CTDB_SRVID_LEADER 0xF101000000000000LL
+
+/* SRVID to inform clients that the cluster has been reconfigured */
+#define CTDB_SRVID_RECONFIGURE 0xF200000000000000LL
+
+/* SRVID to inform clients an IP address has been released */
+#define CTDB_SRVID_RELEASE_IP 0xF300000000000000LL
+
+/* SRVID to inform clients that an IP address has been taken over */
+#define CTDB_SRVID_TAKE_IP 0xF301000000000000LL
+
+/* SRVID to inform clients that CTDB_EVENT_IPREALLOCATED finished */
+#define CTDB_SRVID_IPREALLOCATED 0xF302000000000000LL
+
+/* SRVID to inform recovery daemon of the node flags - OBSOLETE */
+#define CTDB_SRVID_SET_NODE_FLAGS 0xF400000000000000LL
+
+/* SRVID to inform recovery daemon to update public ip assignment */
+#define CTDB_SRVID_RECD_UPDATE_IP 0xF500000000000000LL
+
+/* SRVID to inform recovery daemon to migrate a set of records */
+#define CTDB_SRVID_VACUUM_FETCH 0xF700000000000000LL
+
+/* SRVID to inform recovery daemon to detach a database */
+#define CTDB_SRVID_DETACH_DATABASE 0xF701000000000000LL
+
+/* SRVID to inform recovery daemon to dump talloc memdump to the log */
+#define CTDB_SRVID_MEM_DUMP 0xF800000000000000LL
+
+/* SRVID to inform recovery daemon to send logs */
+#define CTDB_SRVID_GETLOG 0xF801000000000000LL
+
+/* SRVID to inform recovery daemon to clear logs */
+#define CTDB_SRVID_CLEARLOG 0xF802000000000000LL
+
+/* SRVID to inform recovery daemon to push the node flags to other nodes */
+#define CTDB_SRVID_PUSH_NODE_FLAGS 0xF900000000000000LL
+
+/* SRVID to inform recovery daemon to reload the nodes file */
+#define CTDB_SRVID_RELOAD_NODES 0xFA00000000000000LL
+
+/* SRVID to inform recovery daemon to perform a takeover run */
+#define CTDB_SRVID_TAKEOVER_RUN 0xFB00000000000000LL
+
+/* SRVID to inform recovery daemon to rebalance ips for a node. */
+#define CTDB_SRVID_REBALANCE_NODE 0xFB01000000000000LL
+
+/* SRVID to inform recovery daemon to stop takeover runs from occurring */
+#define CTDB_SRVID_DISABLE_TAKEOVER_RUNS 0xFB03000000000000LL
+
+/* SRVID to inform recovery daemon to stop recoveries from occurring */
+#define CTDB_SRVID_DISABLE_RECOVERIES 0xFB04000000000000LL
+
+/* SRVID to inform recovery daemon to disable the public ip checks */
+#define CTDB_SRVID_DISABLE_IP_CHECK 0xFC00000000000000LL
+
+/* A range of ports reserved for registering a PID (top 8 bits)
+ * All ports matching the 8 top bits are reserved for exclusive use by
+ * registering a SRVID that matches the process-id of the requesting process
+ */
+#define CTDB_SRVID_PID_RANGE 0x0000000000000000LL
+
+/* A range of ports reserved for samba (top 8 bits)
+ * All ports matching the 8 top bits are reserved for exclusive use by
+ * CIFS server
+ */
+#define CTDB_SRVID_SAMBA_NOTIFY 0xFE00000000000000LL
+#define CTDB_SRVID_SAMBA_RANGE 0xFE00000000000000LL
+
+/* A range of ports reserved for a CTDB NFS server (top 8 bits)
+ * All ports matching the 8 top bits are reserved for exclusive use by
+ * NFS server
+ */
+#define CTDB_SRVID_NFSD_RANGE 0xEE00000000000000LL
+
+/* A range of ports reserved for a CTDB ISCSI server (top 8 bits)
+ * All ports matching the 8 top bits are reserved for exclusive use by
+ * ISCSI server
+ */
+#define CTDB_SRVID_ISCSID_RANGE 0xDE00000000000000LL
+
+/* A range of ports reserved for CTDB tool (top 8 bits)
+ * All ports matching the 8 top bits are reserved for exclusive use by
+ * CTDB tool
+ */
+#define CTDB_SRVID_TOOL_RANGE 0xCE00000000000000LL
+
+/* A range of ports reserved by client (top 8 bits)
+ * All ports matching the 8 top bits are reserved for exclusive use by
+ * CTDB client code
+ */
+#define CTDB_SRVID_CLIENT_RANGE 0xBE00000000000000LL
+
+/* Range of ports reserved for test applications (top 8 bits)
+ * All ports matching the 8 top bits are reserved for exclusive use by
+ * test applications
+ */
+#define CTDB_SRVID_TEST_RANGE 0xAE00000000000000LL
+
+
+enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0,
+ CTDB_CONTROL_STATISTICS = 1,
+ /* #2 removed */
+ CTDB_CONTROL_PING = 3,
+ CTDB_CONTROL_GETDBPATH = 4,
+ CTDB_CONTROL_GETVNNMAP = 5,
+ CTDB_CONTROL_SETVNNMAP = 6,
+ CTDB_CONTROL_GET_DEBUG = 7,
+ CTDB_CONTROL_SET_DEBUG = 8,
+ CTDB_CONTROL_GET_DBMAP = 9,
+ CTDB_CONTROL_GET_NODEMAPv4 = 10, /* obsolete */
+ CTDB_CONTROL_SET_DMASTER = 11, /* obsolete */
+ /* #12 removed */
+ CTDB_CONTROL_PULL_DB = 13, /* obsolete */
+ CTDB_CONTROL_PUSH_DB = 14, /* obsolete */
+ CTDB_CONTROL_GET_RECMODE = 15,
+ CTDB_CONTROL_SET_RECMODE = 16,
+ CTDB_CONTROL_STATISTICS_RESET = 17,
+ CTDB_CONTROL_DB_ATTACH = 18,
+ CTDB_CONTROL_SET_CALL = 19, /* obsolete */
+ CTDB_CONTROL_TRAVERSE_START = 20,
+ CTDB_CONTROL_TRAVERSE_ALL = 21,
+ CTDB_CONTROL_TRAVERSE_DATA = 22,
+ CTDB_CONTROL_REGISTER_SRVID = 23,
+ CTDB_CONTROL_DEREGISTER_SRVID = 24,
+ CTDB_CONTROL_GET_DBNAME = 25,
+ CTDB_CONTROL_ENABLE_SEQNUM = 26,
+ CTDB_CONTROL_UPDATE_SEQNUM = 27,
+ /* #28 removed */
+ CTDB_CONTROL_DUMP_MEMORY = 29,
+ CTDB_CONTROL_GET_PID = 30,
+ CTDB_CONTROL_GET_RECMASTER = 31, /* obsolete */
+ CTDB_CONTROL_SET_RECMASTER = 32, /* obsolete */
+ CTDB_CONTROL_FREEZE = 33,
+ CTDB_CONTROL_THAW = 34, /* obsolete */
+ CTDB_CONTROL_GET_PNN = 35,
+ CTDB_CONTROL_SHUTDOWN = 36,
+ CTDB_CONTROL_GET_MONMODE = 37, /* obsolete */
+ /* #38 removed */
+ /* #39 removed */
+ /* #40 removed */
+ /* #41 removed */
+ CTDB_CONTROL_TAKEOVER_IPv4 = 42, /* obsolete */
+ CTDB_CONTROL_RELEASE_IPv4 = 43, /* obsolete */
+ CTDB_CONTROL_TCP_CLIENT = 44,
+ CTDB_CONTROL_TCP_ADD = 45,
+ CTDB_CONTROL_TCP_REMOVE = 46,
+ CTDB_CONTROL_STARTUP = 47,
+ CTDB_CONTROL_SET_TUNABLE = 48,
+ CTDB_CONTROL_GET_TUNABLE = 49,
+ CTDB_CONTROL_LIST_TUNABLES = 50,
+ CTDB_CONTROL_GET_PUBLIC_IPSv4 = 51, /* obsolete */
+ CTDB_CONTROL_MODIFY_FLAGS = 52,
+ CTDB_CONTROL_GET_ALL_TUNABLES = 53,
+ CTDB_CONTROL_KILL_TCP = 54, /* obsolete */
+ CTDB_CONTROL_GET_TCP_TICKLE_LIST = 55,
+ CTDB_CONTROL_SET_TCP_TICKLE_LIST = 56,
+ CTDB_CONTROL_REGISTER_SERVER_ID = 57, /* obsolete */
+ CTDB_CONTROL_UNREGISTER_SERVER_ID = 58, /* obsolete */
+ CTDB_CONTROL_CHECK_SERVER_ID = 59, /* obsolete */
+ CTDB_CONTROL_GET_SERVER_ID_LIST = 60, /* obsolete */
+ CTDB_CONTROL_DB_ATTACH_PERSISTENT = 61,
+ CTDB_CONTROL_PERSISTENT_STORE = 62, /* obsolete */
+ CTDB_CONTROL_UPDATE_RECORD = 63,
+ CTDB_CONTROL_SEND_GRATUITOUS_ARP = 64,
+ CTDB_CONTROL_TRANSACTION_START = 65, /* obsolete */
+ CTDB_CONTROL_TRANSACTION_COMMIT = 66, /* obsolete */
+ CTDB_CONTROL_WIPE_DATABASE = 67,
+ /* #68 removed */
+ CTDB_CONTROL_UPTIME = 69,
+ CTDB_CONTROL_START_RECOVERY = 70,
+ CTDB_CONTROL_END_RECOVERY = 71,
+ CTDB_CONTROL_RELOAD_NODES_FILE = 72,
+ /* #73 removed */
+ CTDB_CONTROL_TRY_DELETE_RECORDS = 74,
+ CTDB_CONTROL_ENABLE_MONITOR = 75, /* obsolete */
+ CTDB_CONTROL_DISABLE_MONITOR = 76, /* obsolete */
+ CTDB_CONTROL_ADD_PUBLIC_IP = 77,
+ CTDB_CONTROL_DEL_PUBLIC_IP = 78,
+ CTDB_CONTROL_RUN_EVENTSCRIPTS = 79, /* obsolete */
+ CTDB_CONTROL_GET_CAPABILITIES = 80,
+ CTDB_CONTROL_START_PERSISTENT_UPDATE = 81, /* obsolete */
+ CTDB_CONTROL_CANCEL_PERSISTENT_UPDATE= 82, /* obsolete */
+ CTDB_CONTROL_TRANS2_COMMIT = 83, /* obsolete */
+ CTDB_CONTROL_TRANS2_FINISHED = 84, /* obsolete */
+ CTDB_CONTROL_TRANS2_ERROR = 85, /* obsolete */
+ CTDB_CONTROL_TRANS2_COMMIT_RETRY = 86, /* obsolete */
+ CTDB_CONTROL_RECD_PING = 87,
+ CTDB_CONTROL_RELEASE_IP = 88,
+ CTDB_CONTROL_TAKEOVER_IP = 89,
+ CTDB_CONTROL_GET_PUBLIC_IPS = 90,
+ CTDB_CONTROL_GET_NODEMAP = 91,
+ /* missing */
+ CTDB_CONTROL_GET_EVENT_SCRIPT_STATUS = 96, /* obsolete */
+ CTDB_CONTROL_TRAVERSE_KILL = 97,
+ CTDB_CONTROL_RECD_RECLOCK_LATENCY = 98,
+ CTDB_CONTROL_GET_RECLOCK_FILE = 99,
+ CTDB_CONTROL_SET_RECLOCK_FILE = 100, /* obsolete */
+ CTDB_CONTROL_STOP_NODE = 101,
+ CTDB_CONTROL_CONTINUE_NODE = 102,
+ CTDB_CONTROL_SET_NATGWSTATE = 103, /* obsolete */
+ CTDB_CONTROL_SET_LMASTERROLE = 104,
+ CTDB_CONTROL_SET_RECMASTERROLE = 105,
+ CTDB_CONTROL_ENABLE_SCRIPT = 107, /* obsolete */
+ CTDB_CONTROL_DISABLE_SCRIPT = 108, /* obsolete */
+ CTDB_CONTROL_SET_BAN_STATE = 109,
+ CTDB_CONTROL_GET_BAN_STATE = 110,
+ CTDB_CONTROL_SET_DB_PRIORITY = 111, /* obsolete */
+ CTDB_CONTROL_GET_DB_PRIORITY = 112, /* obsolete */
+ CTDB_CONTROL_TRANSACTION_CANCEL = 113, /* obsolete */
+ CTDB_CONTROL_REGISTER_NOTIFY = 114,
+ CTDB_CONTROL_DEREGISTER_NOTIFY = 115,
+ CTDB_CONTROL_TRANS2_ACTIVE = 116, /* obsolete */
+ CTDB_CONTROL_GET_LOG = 117, /* obsolete */
+ CTDB_CONTROL_CLEAR_LOG = 118, /* obsolete */
+ CTDB_CONTROL_TRANS3_COMMIT = 119,
+ CTDB_CONTROL_GET_DB_SEQNUM = 120,
+ CTDB_CONTROL_DB_SET_HEALTHY = 121,
+ CTDB_CONTROL_DB_GET_HEALTH = 122,
+ CTDB_CONTROL_GET_PUBLIC_IP_INFO = 123,
+ CTDB_CONTROL_GET_IFACES = 124,
+ CTDB_CONTROL_SET_IFACE_LINK_STATE = 125,
+ CTDB_CONTROL_TCP_ADD_DELAYED_UPDATE = 126,
+ CTDB_CONTROL_GET_STAT_HISTORY = 127,
+ CTDB_CONTROL_SCHEDULE_FOR_DELETION = 128,
+ CTDB_CONTROL_SET_DB_READONLY = 129,
+ CTDB_CONTROL_CHECK_SRVIDS = 130, /* obsolete */
+ CTDB_CONTROL_TRAVERSE_START_EXT = 131,
+ CTDB_CONTROL_GET_DB_STATISTICS = 132,
+ CTDB_CONTROL_SET_DB_STICKY = 133,
+ CTDB_CONTROL_RELOAD_PUBLIC_IPS = 134,
+ CTDB_CONTROL_TRAVERSE_ALL_EXT = 135,
+ CTDB_CONTROL_RECEIVE_RECORDS = 136, /* obsolete */
+ CTDB_CONTROL_IPREALLOCATED = 137,
+ CTDB_CONTROL_GET_RUNSTATE = 138,
+ CTDB_CONTROL_DB_DETACH = 139,
+ CTDB_CONTROL_GET_NODES_FILE = 140,
+ CTDB_CONTROL_DB_FREEZE = 141,
+ CTDB_CONTROL_DB_THAW = 142,
+ CTDB_CONTROL_DB_TRANSACTION_START = 143,
+ CTDB_CONTROL_DB_TRANSACTION_COMMIT = 144,
+ CTDB_CONTROL_DB_TRANSACTION_CANCEL = 145,
+ CTDB_CONTROL_DB_PULL = 146,
+ CTDB_CONTROL_DB_PUSH_START = 147,
+ CTDB_CONTROL_DB_PUSH_CONFIRM = 148,
+ CTDB_CONTROL_DB_OPEN_FLAGS = 149,
+ CTDB_CONTROL_DB_ATTACH_REPLICATED = 150,
+ CTDB_CONTROL_CHECK_PID_SRVID = 151,
+ CTDB_CONTROL_TUNNEL_REGISTER = 152,
+ CTDB_CONTROL_TUNNEL_DEREGISTER = 153,
+ CTDB_CONTROL_VACUUM_FETCH = 154,
+ CTDB_CONTROL_DB_VACUUM = 155,
+ CTDB_CONTROL_ECHO_DATA = 156,
+ CTDB_CONTROL_DISABLE_NODE = 157,
+ CTDB_CONTROL_ENABLE_NODE = 158,
+ CTDB_CONTROL_TCP_CLIENT_DISCONNECTED = 159,
+ CTDB_CONTROL_TCP_CLIENT_PASSED = 160,
+};
+
+#define MAX_COUNT_BUCKETS 16
+#define MAX_HOT_KEYS 10
+
+struct ctdb_latency_counter {
+ int num;
+ double min;
+ double max;
+ double total;
+};
+
+struct ctdb_statistics {
+ uint32_t num_clients;
+ uint32_t frozen;
+ uint32_t recovering;
+ uint32_t client_packets_sent;
+ uint32_t client_packets_recv;
+ uint32_t node_packets_sent;
+ uint32_t node_packets_recv;
+ uint32_t keepalive_packets_sent;
+ uint32_t keepalive_packets_recv;
+ struct {
+ uint32_t req_call;
+ uint32_t reply_call;
+ uint32_t req_dmaster;
+ uint32_t reply_dmaster;
+ uint32_t reply_error;
+ uint32_t req_message;
+ uint32_t req_control;
+ uint32_t reply_control;
+ uint32_t req_tunnel;
+ } node;
+ struct {
+ uint32_t req_call;
+ uint32_t req_message;
+ uint32_t req_control;
+ uint32_t req_tunnel;
+ } client;
+ struct {
+ uint32_t call;
+ uint32_t control;
+ uint32_t traverse;
+ } timeouts;
+ struct {
+ struct ctdb_latency_counter ctdbd;
+ struct ctdb_latency_counter recd;
+ } reclock;
+ struct {
+ uint32_t num_calls;
+ uint32_t num_current;
+ uint32_t num_pending;
+ uint32_t num_failed;
+ struct ctdb_latency_counter latency;
+ uint32_t buckets[MAX_COUNT_BUCKETS];
+ } locks;
+ uint32_t total_calls;
+ uint32_t pending_calls;
+ uint32_t childwrite_calls;
+ uint32_t pending_childwrite_calls;
+ uint32_t memory_used;
+ uint32_t __last_counter; /* hack */
+ uint32_t max_hop_count;
+ uint32_t hop_count_bucket[MAX_COUNT_BUCKETS];
+ struct ctdb_latency_counter call_latency;
+ struct ctdb_latency_counter childwrite_latency;
+ uint32_t num_recoveries;
+ struct timeval statistics_start_time;
+ struct timeval statistics_current_time;
+ uint32_t total_ro_delegations;
+ uint32_t total_ro_revokes;
+};
+
+#define INVALID_GENERATION 1
+/* table that contains the mapping between a hash value and lmaster
+ */
+struct ctdb_vnn_map {
+ uint32_t generation;
+ uint32_t size;
+ uint32_t *map;
+};
+
+struct ctdb_dbid {
+ uint32_t db_id;
+#define CTDB_DB_FLAGS_PERSISTENT 0x01
+#define CTDB_DB_FLAGS_READONLY 0x02
+#define CTDB_DB_FLAGS_STICKY 0x04
+#define CTDB_DB_FLAGS_REPLICATED 0x08
+ uint8_t flags;
+};
+
+struct ctdb_dbid_map {
+ uint32_t num;
+ struct ctdb_dbid *dbs;
+};
+
+struct ctdb_pulldb {
+ uint32_t db_id;
+#define CTDB_LMASTER_ANY 0xffffffff
+ uint32_t lmaster;
+};
+
+struct ctdb_pulldb_ext {
+ uint32_t db_id;
+ uint32_t lmaster;
+ uint64_t srvid;
+};
+
+#define CTDB_RECOVERY_NORMAL 0
+#define CTDB_RECOVERY_ACTIVE 1
+
+/*
+ the extended header for records in the ltdb
+*/
+struct ctdb_ltdb_header {
+ uint64_t rsn;
+ uint32_t dmaster;
+ uint32_t reserved1;
+#define CTDB_REC_FLAG_DEFAULT 0x00000000
+#define CTDB_REC_FLAG_MIGRATED_WITH_DATA 0x00010000
+#define CTDB_REC_FLAG_VACUUM_MIGRATED 0x00020000
+#define CTDB_REC_FLAG_AUTOMATIC 0x00040000
+#define CTDB_REC_RO_HAVE_DELEGATIONS 0x01000000
+#define CTDB_REC_RO_HAVE_READONLY 0x02000000
+#define CTDB_REC_RO_REVOKING_READONLY 0x04000000
+#define CTDB_REC_RO_REVOKE_COMPLETE 0x08000000
+#define CTDB_REC_RO_FLAGS (CTDB_REC_RO_HAVE_DELEGATIONS|\
+ CTDB_REC_RO_HAVE_READONLY|\
+ CTDB_REC_RO_REVOKING_READONLY|\
+ CTDB_REC_RO_REVOKE_COMPLETE)
+ uint32_t flags;
+};
+
+struct ctdb_rec_data {
+ uint32_t reqid;
+ struct ctdb_ltdb_header *header;
+ TDB_DATA key, data;
+};
+
+struct ctdb_rec_buffer {
+ uint32_t db_id;
+ uint32_t count;
+ uint8_t *buf;
+ size_t buflen;
+};
+
+typedef int (*ctdb_rec_parser_func_t)(uint32_t reqid,
+ struct ctdb_ltdb_header *header,
+ TDB_DATA key, TDB_DATA data,
+ void *private_data);
+
+struct ctdb_traverse_start {
+ uint32_t db_id;
+ uint32_t reqid;
+ uint64_t srvid;
+};
+
+struct ctdb_traverse_all {
+ uint32_t db_id;
+ uint32_t reqid;
+ uint32_t pnn;
+ uint32_t client_reqid;
+ uint64_t srvid;
+};
+
+struct ctdb_traverse_start_ext {
+ uint32_t db_id;
+ uint32_t reqid;
+ uint64_t srvid;
+ bool withemptyrecords;
+};
+
+struct ctdb_traverse_all_ext {
+ uint32_t db_id;
+ uint32_t reqid;
+ uint32_t pnn;
+ uint32_t client_reqid;
+ uint64_t srvid;
+ bool withemptyrecords;
+};
+
+typedef union {
+ struct sockaddr sa;
+ struct sockaddr_in ip;
+ struct sockaddr_in6 ip6;
+} ctdb_sock_addr;
+
+struct ctdb_connection {
+ union {
+ ctdb_sock_addr src;
+ ctdb_sock_addr server;
+ };
+ union {
+ ctdb_sock_addr dst;
+ ctdb_sock_addr client;
+ };
+};
+
+struct ctdb_connection_list {
+ uint32_t num;
+ struct ctdb_connection *conn;
+};
+
+struct ctdb_tunable {
+ const char *name;
+ uint32_t value;
+};
+
+struct ctdb_var_list {
+ int count;
+ const char **var;
+};
+
+struct ctdb_node_flag_change {
+ uint32_t pnn;
+ uint32_t new_flags;
+ uint32_t old_flags;
+};
+
+/* all tunable variables go in here */
+struct ctdb_tunable_list {
+ uint32_t max_redirect_count;
+ uint32_t seqnum_interval; /* unit is ms */
+ uint32_t control_timeout;
+ uint32_t traverse_timeout;
+ uint32_t keepalive_interval;
+ uint32_t keepalive_limit;
+ uint32_t recover_timeout;
+ uint32_t recover_interval;
+ uint32_t election_timeout;
+ uint32_t takeover_timeout;
+ uint32_t monitor_interval;
+ uint32_t tickle_update_interval;
+ uint32_t script_timeout;
+ uint32_t monitor_timeout_count; /* allow dodgy scripts to hang this many times in a row before we mark the node unhealthy */
+ uint32_t script_unhealthy_on_timeout; /* obsolete */
+ uint32_t recovery_grace_period;
+ uint32_t recovery_ban_period;
+ uint32_t database_hash_size;
+ uint32_t database_max_dead;
+ uint32_t rerecovery_timeout;
+ uint32_t enable_bans;
+ uint32_t deterministic_public_ips;
+ uint32_t reclock_ping_period;
+ uint32_t no_ip_failback;
+ uint32_t disable_ip_failover;
+ uint32_t verbose_memory_names;
+ uint32_t recd_ping_timeout;
+ uint32_t recd_ping_failcount;
+ uint32_t log_latency_ms;
+ uint32_t reclock_latency_ms;
+ uint32_t recovery_drop_all_ips;
+ uint32_t verify_recovery_lock;
+ uint32_t vacuum_interval;
+ uint32_t vacuum_max_run_time;
+ uint32_t repack_limit;
+ uint32_t vacuum_limit;
+ uint32_t max_queue_depth_drop_msg;
+ uint32_t allow_unhealthy_db_read;
+ uint32_t stat_history_interval;
+ uint32_t deferred_attach_timeout;
+ uint32_t vacuum_fast_path_count;
+ uint32_t lcp2_public_ip_assignment;
+ uint32_t allow_client_db_attach;
+ uint32_t recover_pdb_by_seqnum;
+ uint32_t deferred_rebalance_on_node_add;
+ uint32_t fetch_collapse;
+ uint32_t hopcount_make_sticky;
+ uint32_t sticky_duration;
+ uint32_t sticky_pindown;
+ uint32_t no_ip_takeover;
+ uint32_t db_record_count_warn;
+ uint32_t db_record_size_warn;
+ uint32_t db_size_warn;
+ uint32_t pulldb_preallocation_size;
+ uint32_t no_ip_host_on_all_disabled;
+ uint32_t samba3_hack;
+ uint32_t mutex_enabled;
+ uint32_t lock_processes_per_db;
+ uint32_t rec_buffer_size_limit;
+ uint32_t queue_buffer_size;
+ uint32_t ip_alloc_algorithm;
+ uint32_t allow_mixed_versions;
+};
+
+struct ctdb_tickle_list {
+ ctdb_sock_addr addr;
+ uint32_t num;
+ struct ctdb_connection *conn;
+};
+
+struct ctdb_addr_info {
+ ctdb_sock_addr addr;
+ uint32_t mask;
+ const char *iface;
+};
+
+struct ctdb_transdb {
+ uint32_t db_id;
+ uint32_t tid;
+};
+
+struct ctdb_uptime {
+ struct timeval current_time;
+ struct timeval ctdbd_start_time;
+ struct timeval last_recovery_started;
+ struct timeval last_recovery_finished;
+};
+
+struct ctdb_public_ip {
+ uint32_t pnn;
+ ctdb_sock_addr addr;
+};
+
+struct ctdb_public_ip_list {
+ uint32_t num;
+ struct ctdb_public_ip *ip;
+};
+
+/*
+ * Node flags
+ */
+#define NODE_FLAGS_DISCONNECTED 0x00000001 /* node isn't connected */
+#define NODE_FLAGS_UNHEALTHY 0x00000002 /* monitoring says node is unhealthy */
+#define NODE_FLAGS_PERMANENTLY_DISABLED 0x00000004 /* administrator has disabled node */
+#define NODE_FLAGS_BANNED 0x00000008 /* recovery daemon has banned the node */
+#define NODE_FLAGS_DELETED 0x00000010 /* this node has been deleted */
+#define NODE_FLAGS_STOPPED 0x00000020 /* this node has been stopped */
+#define NODE_FLAGS_DISABLED (NODE_FLAGS_UNHEALTHY|NODE_FLAGS_PERMANENTLY_DISABLED)
+#define NODE_FLAGS_INACTIVE (NODE_FLAGS_DELETED|NODE_FLAGS_DISCONNECTED|NODE_FLAGS_BANNED|NODE_FLAGS_STOPPED)
+
+/*
+ * Node capabilities
+ */
+#define CTDB_CAP_RECMASTER 0x00000001
+#define CTDB_CAP_LMASTER 0x00000002
+#define CTDB_CAP_LVS 0x00000004 /* obsolete */
+#define CTDB_CAP_NATGW 0x00000008 /* obsolete */
+
+/*
+ * Node features
+ */
+#define CTDB_CAP_PARALLEL_RECOVERY 0x00010000
+#define CTDB_CAP_FRAGMENTED_CONTROLS 0x00020000
+
+#define CTDB_CAP_FEATURES (CTDB_CAP_PARALLEL_RECOVERY | \
+ CTDB_CAP_FRAGMENTED_CONTROLS)
+
+#define CTDB_CAP_DEFAULT (CTDB_CAP_RECMASTER | \
+ CTDB_CAP_LMASTER | \
+ CTDB_CAP_FEATURES)
+
+struct ctdb_node_and_flags {
+ uint32_t pnn;
+ uint32_t flags;
+ ctdb_sock_addr addr;
+};
+
+struct ctdb_node_map {
+ uint32_t num;
+ struct ctdb_node_and_flags *node;
+};
+
+enum ctdb_event {
+ CTDB_EVENT_INIT, /* CTDB starting up: no args */
+ CTDB_EVENT_SETUP, /* CTDB starting up after transport is readdy: no args. */
+ CTDB_EVENT_STARTUP, /* CTDB starting up after initial recovery: no args. */
+ CTDB_EVENT_START_RECOVERY, /* CTDB recovery starting: no args. */
+ CTDB_EVENT_RECOVERED, /* CTDB recovery finished: no args. */
+ CTDB_EVENT_TAKE_IP, /* IP taken: interface, IP address, netmask bits. */
+ CTDB_EVENT_RELEASE_IP, /* IP released: interface, IP address, netmask bits. */
+ CTDB_EVENT_STOPPED, /* Deprecated, do not use. */
+ CTDB_EVENT_MONITOR, /* Please check if service is healthy: no args. */
+ CTDB_EVENT_STATUS, /* Deprecated, do not use. */
+ CTDB_EVENT_SHUTDOWN, /* CTDB shutting down: no args. */
+ CTDB_EVENT_RELOAD, /* Deprecated, do not use */
+ CTDB_EVENT_UPDATE_IP, /* IP updating: old interface, new interface, IP address, netmask bits. */
+ CTDB_EVENT_IPREALLOCATED, /* when a takeover_run() completes */
+ CTDB_EVENT_MAX
+};
+
+#define MAX_SCRIPT_NAME 31
+#define MAX_SCRIPT_OUTPUT 511
+
+struct ctdb_script {
+ char name[MAX_SCRIPT_NAME+1];
+ struct timeval start;
+ struct timeval finished;
+ int32_t status;
+ char output[MAX_SCRIPT_OUTPUT+1];
+};
+
+struct ctdb_script_list {
+ uint32_t num_scripts;
+ struct ctdb_script *script;
+};
+
+struct ctdb_ban_state {
+ uint32_t pnn;
+ uint32_t time;
+};
+
+struct ctdb_notify_data {
+ uint64_t srvid;
+ TDB_DATA data;
+};
+
+#ifdef IFNAMSIZ
+#define CTDB_IFACE_SIZE IFNAMSIZ
+#else
+#define CTDB_IFACE_SIZE 16
+#endif
+
+struct ctdb_iface {
+ char name[CTDB_IFACE_SIZE+2];
+ uint16_t link_state;
+ uint32_t references;
+};
+
+struct ctdb_iface_list {
+ uint32_t num;
+ struct ctdb_iface *iface;
+};
+
+struct ctdb_public_ip_info {
+ struct ctdb_public_ip ip;
+ uint32_t active_idx;
+ struct ctdb_iface_list *ifaces;
+};
+
+struct ctdb_statistics_list {
+ int num;
+ struct ctdb_statistics *stats;
+};
+
+struct ctdb_key_data {
+ uint32_t db_id;
+ struct ctdb_ltdb_header header;
+ TDB_DATA key;
+};
+
+struct ctdb_db_statistics {
+ struct {
+ uint32_t num_calls;
+ uint32_t num_current;
+ uint32_t num_pending;
+ uint32_t num_failed;
+ struct ctdb_latency_counter latency;
+ uint32_t buckets[MAX_COUNT_BUCKETS];
+ } locks;
+ struct {
+ struct ctdb_latency_counter latency;
+ } vacuum;
+ uint32_t db_ro_delegations;
+ uint32_t db_ro_revokes;
+ uint32_t hop_count_bucket[MAX_COUNT_BUCKETS];
+ uint32_t num_hot_keys;
+ struct {
+ uint32_t count;
+ TDB_DATA key;
+ } hot_keys[MAX_HOT_KEYS];
+};
+
+enum ctdb_runstate {
+ CTDB_RUNSTATE_UNKNOWN,
+ CTDB_RUNSTATE_INIT,
+ CTDB_RUNSTATE_SETUP,
+ CTDB_RUNSTATE_FIRST_RECOVERY,
+ CTDB_RUNSTATE_STARTUP,
+ CTDB_RUNSTATE_RUNNING,
+ CTDB_RUNSTATE_SHUTDOWN,
+};
+
+struct ctdb_pid_srvid {
+ pid_t pid;
+ uint64_t srvid;
+};
+
+struct ctdb_db_vacuum {
+ uint32_t db_id;
+ bool full_vacuum_run;
+
+};
+
+struct ctdb_echo_data {
+ uint32_t timeout;
+ TDB_DATA buf;
+};
+
+struct ctdb_req_control_data {
+ uint32_t opcode;
+ union {
+ pid_t pid;
+ uint32_t db_id;
+ struct ctdb_vnn_map *vnnmap;
+ uint32_t loglevel;
+ struct ctdb_pulldb *pulldb;
+ struct ctdb_pulldb_ext *pulldb_ext;
+ struct ctdb_rec_buffer *recbuf;
+ uint32_t recmode;
+ const char *db_name;
+ struct ctdb_traverse_start *traverse_start;
+ struct ctdb_traverse_all *traverse_all;
+ struct ctdb_rec_data *rec_data;
+ uint32_t recmaster;
+ struct ctdb_connection *conn;
+ struct ctdb_tunable *tunable;
+ const char *tun_var;
+ struct ctdb_node_flag_change *flag_change;
+ ctdb_sock_addr *addr;
+ struct ctdb_tickle_list *tickles;
+ struct ctdb_client_id *cid;
+ struct ctdb_addr_info *addr_info;
+ struct ctdb_transdb *transdb;
+ struct ctdb_public_ip *pubip;
+ enum ctdb_event event;
+ double reclock_latency;
+ uint32_t role;
+ struct ctdb_ban_state *ban_state;
+ struct ctdb_notify_data *notify;
+ uint64_t srvid;
+ struct ctdb_iface *iface;
+ struct ctdb_key_data *key;
+ struct ctdb_traverse_start_ext *traverse_start_ext;
+ struct ctdb_traverse_all_ext *traverse_all_ext;
+ struct ctdb_pid_srvid *pid_srvid;
+ struct ctdb_db_vacuum *db_vacuum;
+ struct ctdb_echo_data *echo_data;
+ } data;
+};
+
+struct ctdb_reply_control_data {
+ uint32_t opcode;
+ union {
+ struct ctdb_statistics *stats;
+ const char *db_path;
+ struct ctdb_vnn_map *vnnmap;
+ uint32_t loglevel;
+ struct ctdb_dbid_map *dbmap;
+ struct ctdb_rec_buffer *recbuf;
+ uint32_t db_id;
+ const char *db_name;
+ const char *mem_str;
+ uint32_t tun_value;
+ struct ctdb_var_list *tun_var_list;
+ struct ctdb_tunable_list *tun_list;
+ struct ctdb_tickle_list *tickles;
+ struct ctdb_client_id_map *cid_map;
+ struct ctdb_uptime *uptime;
+ uint32_t caps;
+ struct ctdb_public_ip_list *pubip_list;
+ struct ctdb_node_map *nodemap;
+ const char *reclock_file;
+ struct ctdb_ban_state *ban_state;
+ uint64_t seqnum;
+ const char *reason;
+ struct ctdb_public_ip_info *ipinfo;
+ struct ctdb_iface_list *iface_list;
+ struct ctdb_statistics_list *stats_list;
+ struct ctdb_db_statistics *dbstats;
+ enum ctdb_runstate runstate;
+ uint32_t num_records;
+ int tdb_flags;
+ struct ctdb_echo_data *echo_data;
+ } data;
+};
+
+struct ctdb_req_control {
+ uint32_t opcode;
+ uint32_t pad;
+ uint64_t srvid;
+ uint32_t client_id;
+#define CTDB_CTRL_FLAG_NOREPLY 1
+#define CTDB_CTRL_FLAG_OPCODE_SPECIFIC 0xFFFF0000
+/* Ugly overloading of this field... */
+#define CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE 0x00010000
+#define CTDB_CTRL_FLAG_ATTACH_RECOVERY 0x00020000
+ uint32_t flags;
+ struct ctdb_req_control_data rdata;
+};
+
+struct ctdb_reply_control {
+ int32_t status;
+ const char *errmsg;
+ struct ctdb_reply_control_data rdata;
+};
+
+struct ctdb_election_message {
+ uint32_t num_connected;
+ struct timeval priority_time;
+ uint32_t pnn;
+ uint32_t node_flags;
+};
+
+struct ctdb_srvid_message {
+ uint32_t pnn;
+ uint64_t srvid;
+};
+
+struct ctdb_disable_message {
+ uint32_t pnn;
+ uint64_t srvid;
+ uint32_t timeout;
+};
+
+union ctdb_message_data {
+ /* SRVID_ELECTION */
+ struct ctdb_election_message *election;
+ /* SRVID_RELEASE_IP, SRVID_TAKE_IP */
+ const char *ipaddr;
+ /* SRVID_SET_NODE_FLAGS, SERVID_PUSH_NODE_FLAGS */
+ struct ctdb_node_flag_change *flag_change;
+ /* SRVID_RECD_UPDATE_IP */
+ struct ctdb_public_ip *pubip;
+ /* SRVID_VACUUM_FETCH */
+ struct ctdb_rec_buffer *recbuf;
+ /* SRVID_DETACH_DATABASE */
+ uint32_t db_id;
+ /* SRVID_MEM_DUMP, SRVID_TAKEOVER_RUN */
+ struct ctdb_srvid_message *msg;
+ /* SRVID_BANNING, SRVID_REBALANCE_NODE, SRVID_LEADER */
+ uint32_t pnn;
+ /* SRVID_DISABLE_TAKEOVER_RUNS, SRVID_DISABLE_RECOVERIES */
+ struct ctdb_disable_message *disable;
+ /* SRVID_DISABLE_IP_CHECK */
+ uint32_t timeout;
+ /* Other */
+ TDB_DATA data;
+};
+
+struct ctdb_req_message {
+ uint64_t srvid;
+ union ctdb_message_data data;
+};
+
+struct ctdb_req_message_data {
+ uint64_t srvid;
+ TDB_DATA data;
+};
+
+struct ctdb_req_keepalive {
+ uint32_t version;
+ uint32_t uptime;
+};
+
+#define CTDB_TUNNEL_TEST 0xffffffff00000000
+
+#define CTDB_TUNNEL_FLAG_REQUEST 0x00000001
+#define CTDB_TUNNEL_FLAG_REPLY 0x00000002
+#define CTDB_TUNNEL_FLAG_NOREPLY 0x00000010
+
+struct ctdb_req_tunnel {
+ uint64_t tunnel_id;
+ uint32_t flags;
+ TDB_DATA data;
+};
+
+
+/* This is equivalent to server_id */
+struct ctdb_server_id {
+ uint64_t pid;
+ uint32_t task_id;
+ uint32_t vnn;
+ uint64_t unique_id;
+};
+
+enum ctdb_g_lock_type {
+ CTDB_G_LOCK_READ = 0,
+ CTDB_G_LOCK_WRITE = 1,
+};
+
+struct ctdb_g_lock {
+ enum ctdb_g_lock_type type;
+ struct ctdb_server_id sid;
+};
+
+struct ctdb_g_lock_list {
+ unsigned int num;
+ struct ctdb_g_lock *lock;
+};
+
+/*
+ * Generic packet header
+ */
+
+struct sock_packet_header {
+ uint32_t length;
+ uint32_t reqid;
+};
+
+#endif /* __CTDB_PROTOCOL_H__ */
diff --git a/ctdb/protocol/protocol_api.h b/ctdb/protocol/protocol_api.h
new file mode 100644
index 0000000..48e4e84
--- /dev/null
+++ b/ctdb/protocol/protocol_api.h
@@ -0,0 +1,682 @@
+/*
+ CTDB protocol marshalling
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_PROTOCOL_API_H__
+#define __CTDB_PROTOCOL_API_H__
+
+#include <talloc.h>
+
+#include "protocol/protocol.h"
+
+/* From protocol/protocol_types.c */
+
+size_t ctdb_ltdb_header_len(struct ctdb_ltdb_header *in);
+void ctdb_ltdb_header_push(struct ctdb_ltdb_header *in, uint8_t *buf,
+ size_t *npush);
+int ctdb_ltdb_header_pull(uint8_t *buf, size_t buflen,
+ struct ctdb_ltdb_header *out, size_t *npull);
+
+int ctdb_ltdb_header_extract(TDB_DATA *data, struct ctdb_ltdb_header *header);
+
+size_t ctdb_rec_data_len(struct ctdb_rec_data *in);
+void ctdb_rec_data_push(struct ctdb_rec_data *in, uint8_t *buf, size_t *npush);
+int ctdb_rec_data_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_rec_data **out, size_t *npull);
+
+size_t ctdb_rec_buffer_len(struct ctdb_rec_buffer *in);
+void ctdb_rec_buffer_push(struct ctdb_rec_buffer *in, uint8_t *buf,
+ size_t *npush);
+int ctdb_rec_buffer_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_rec_buffer **out, size_t *npull);
+
+struct ctdb_rec_buffer *ctdb_rec_buffer_init(TALLOC_CTX *mem_ctx,
+ uint32_t db_id);
+int ctdb_rec_buffer_add(TALLOC_CTX *mem_ctx, struct ctdb_rec_buffer *recbuf,
+ uint32_t reqid, struct ctdb_ltdb_header *header,
+ TDB_DATA key, TDB_DATA data);
+int ctdb_rec_buffer_traverse(struct ctdb_rec_buffer *recbuf,
+ ctdb_rec_parser_func_t func,
+ void *private_data);
+
+int ctdb_rec_buffer_write(struct ctdb_rec_buffer *recbuf, int fd);
+int ctdb_rec_buffer_read(int fd, TALLOC_CTX *mem_ctx,
+ struct ctdb_rec_buffer **out);
+
+size_t ctdb_server_id_len(struct ctdb_server_id *in);
+void ctdb_server_id_push(struct ctdb_server_id *in, uint8_t *buf,
+ size_t *npush);
+int ctdb_server_id_pull(uint8_t *buf, size_t buflen,
+ struct ctdb_server_id *out, size_t *npull);
+
+size_t ctdb_g_lock_len(struct ctdb_g_lock *in);
+void ctdb_g_lock_push(struct ctdb_g_lock *in, uint8_t *buf, size_t *npush);
+int ctdb_g_lock_pull(uint8_t *buf, size_t buflen, struct ctdb_g_lock *out,
+ size_t *npull);
+
+size_t ctdb_g_lock_list_len(struct ctdb_g_lock_list *in);
+void ctdb_g_lock_list_push(struct ctdb_g_lock_list *in, uint8_t *buf,
+ size_t *npush);
+int ctdb_g_lock_list_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_g_lock_list **out, size_t *npull);
+
+/* From protocol/protocol_header.c */
+
+void ctdb_req_header_fill(struct ctdb_req_header *h, uint32_t generation,
+ uint32_t operation, uint32_t destnode,
+ uint32_t srcnode, uint32_t reqid);
+
+size_t ctdb_req_header_len(struct ctdb_req_header *in);
+void ctdb_req_header_push(struct ctdb_req_header *in, uint8_t *buf,
+ size_t *npush);
+int ctdb_req_header_pull(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *out, size_t *npull);
+
+int ctdb_req_header_verify(struct ctdb_req_header *h, uint32_t operation);
+
+/* From protocol/protocol_call.c */
+
+size_t ctdb_req_call_len(struct ctdb_req_header *h,
+ struct ctdb_req_call *c);
+
+int ctdb_req_call_push(struct ctdb_req_header *h,
+ struct ctdb_req_call *c,
+ uint8_t *buf, size_t *buflen);
+
+int ctdb_req_call_pull(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_req_call *c);
+
+size_t ctdb_reply_call_len(struct ctdb_req_header *h,
+ struct ctdb_reply_call *c);
+
+int ctdb_reply_call_push(struct ctdb_req_header *h,
+ struct ctdb_reply_call *c,
+ uint8_t *buf, size_t *buflen);
+
+int ctdb_reply_call_pull(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_reply_call *c);
+
+size_t ctdb_reply_error_len(struct ctdb_req_header *h,
+ struct ctdb_reply_error *c);
+
+int ctdb_reply_error_push(struct ctdb_req_header *h,
+ struct ctdb_reply_error *c,
+ uint8_t *buf, size_t *buflen);
+
+int ctdb_reply_error_pull(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_reply_error *c);
+
+size_t ctdb_req_dmaster_len(struct ctdb_req_header *h,
+ struct ctdb_req_dmaster *c);
+
+int ctdb_req_dmaster_push(struct ctdb_req_header *h,
+ struct ctdb_req_dmaster *c,
+ uint8_t *buf, size_t *buflen);
+
+int ctdb_req_dmaster_pull(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_req_dmaster *c);
+
+size_t ctdb_reply_dmaster_len(struct ctdb_req_header *h,
+ struct ctdb_reply_dmaster *c);
+
+int ctdb_reply_dmaster_push(struct ctdb_req_header *h,
+ struct ctdb_reply_dmaster *c,
+ uint8_t *buf, size_t *buflen);
+
+int ctdb_reply_dmaster_pull(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_reply_dmaster *c);
+
+/* From protocol/protocol_control.c */
+
+size_t ctdb_req_control_len(struct ctdb_req_header *h,
+ struct ctdb_req_control *c);
+
+int ctdb_req_control_push(struct ctdb_req_header *h,
+ struct ctdb_req_control *c,
+ uint8_t *buf, size_t *buflen);
+
+int ctdb_req_control_pull(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_req_control *c);
+
+size_t ctdb_reply_control_len(struct ctdb_req_header *h,
+ struct ctdb_reply_control *c);
+
+int ctdb_reply_control_push(struct ctdb_req_header *h,
+ struct ctdb_reply_control *c,
+ uint8_t *buf, size_t *buflen);
+
+int ctdb_reply_control_pull(uint8_t *buf, size_t buflen, uint32_t opcode,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_reply_control *c);
+
+/* From protocol/protocol_client.c */
+
+void ctdb_req_control_process_exists(struct ctdb_req_control *request,
+ pid_t pid);
+int ctdb_reply_control_process_exists(struct ctdb_reply_control *reply,
+ int *status);
+
+void ctdb_req_control_statistics(struct ctdb_req_control *request);
+
+int ctdb_reply_control_statistics(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_statistics **stats);
+
+void ctdb_req_control_ping(struct ctdb_req_control *request);
+int ctdb_reply_control_ping(struct ctdb_reply_control *reply,
+ int *num_clients);
+
+void ctdb_req_control_getdbpath(struct ctdb_req_control *request,
+ uint32_t db_id);
+int ctdb_reply_control_getdbpath(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx, const char **db_path);
+
+void ctdb_req_control_getvnnmap(struct ctdb_req_control *request);
+int ctdb_reply_control_getvnnmap(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_vnn_map **vnnmap);
+
+void ctdb_req_control_setvnnmap(struct ctdb_req_control *request,
+ struct ctdb_vnn_map *vnnmap);
+int ctdb_reply_control_setvnnmap(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_get_debug(struct ctdb_req_control *request);
+int ctdb_reply_control_get_debug(struct ctdb_reply_control *reply,
+ int *debug_level);
+
+void ctdb_req_control_set_debug(struct ctdb_req_control *request,
+ int debug_level);
+int ctdb_reply_control_set_debug(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_get_dbmap(struct ctdb_req_control *request);
+int ctdb_reply_control_get_dbmap(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_dbid_map **dbmap);
+
+void ctdb_req_control_get_recmode(struct ctdb_req_control *request);
+int ctdb_reply_control_get_recmode(struct ctdb_reply_control *reply,
+ int *recmode);
+
+void ctdb_req_control_set_recmode(struct ctdb_req_control *request,
+ int recmode);
+int ctdb_reply_control_set_recmode(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_statistics_reset(struct ctdb_req_control *request);
+int ctdb_reply_control_statistics_reset(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_db_attach(struct ctdb_req_control *request,
+ const char *db_name);
+int ctdb_reply_control_db_attach(struct ctdb_reply_control *reply,
+ uint32_t *db_id);
+
+void ctdb_req_control_traverse_start(struct ctdb_req_control *request,
+ struct ctdb_traverse_start *traverse);
+int ctdb_reply_control_traverse_start(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_register_srvid(struct ctdb_req_control *request,
+ uint64_t srvid);
+int ctdb_reply_control_register_srvid(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_deregister_srvid(struct ctdb_req_control *request,
+ uint64_t srvid);
+int ctdb_reply_control_deregister_srvid(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_get_dbname(struct ctdb_req_control *request,
+ uint32_t db_id);
+int ctdb_reply_control_get_dbname(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx, const char **db_name);
+
+void ctdb_req_control_enable_seqnum(struct ctdb_req_control *request,
+ uint32_t db_id);
+int ctdb_reply_control_enable_seqnum(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_update_seqnum(struct ctdb_req_control *request,
+ uint32_t db_id);
+int ctdb_reply_control_update_seqnum(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_dump_memory(struct ctdb_req_control *request);
+int ctdb_reply_control_dump_memory(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx, const char **mem_str);
+
+void ctdb_req_control_get_pid(struct ctdb_req_control *request);
+int ctdb_reply_control_get_pid(struct ctdb_reply_control *reply,
+ pid_t *pid);
+
+void ctdb_req_control_freeze(struct ctdb_req_control *request,
+ uint32_t priority);
+int ctdb_reply_control_freeze(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_get_pnn(struct ctdb_req_control *request);
+int ctdb_reply_control_get_pnn(struct ctdb_reply_control *reply,
+ uint32_t *pnn);
+
+void ctdb_req_control_shutdown(struct ctdb_req_control *request);
+int ctdb_reply_control_shutdown(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_tcp_client(struct ctdb_req_control *request,
+ struct ctdb_connection *conn);
+int ctdb_reply_control_tcp_client(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_tcp_add(struct ctdb_req_control *request,
+ struct ctdb_connection *conn);
+int ctdb_reply_control_tcp_add(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_tcp_remove(struct ctdb_req_control *request,
+ struct ctdb_connection *conn);
+int ctdb_reply_control_tcp_remove(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_startup(struct ctdb_req_control *request);
+int ctdb_reply_control_startup(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_set_tunable(struct ctdb_req_control *request,
+ struct ctdb_tunable *tunable);
+int ctdb_reply_control_set_tunable(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_get_tunable(struct ctdb_req_control *request,
+ const char *name);
+int ctdb_reply_control_get_tunable(struct ctdb_reply_control *reply,
+ uint32_t *value);
+
+void ctdb_req_control_list_tunables(struct ctdb_req_control *request);
+int ctdb_reply_control_list_tunables(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_var_list **tun_var_list);
+
+void ctdb_req_control_modify_flags(struct ctdb_req_control *request,
+ struct ctdb_node_flag_change *flag_change);
+int ctdb_reply_control_modify_flags(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_get_all_tunables(struct ctdb_req_control *request);
+int ctdb_reply_control_get_all_tunables(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_tunable_list **tun_list);
+
+void ctdb_req_control_get_tcp_tickle_list(struct ctdb_req_control *request,
+ ctdb_sock_addr *addr);
+int ctdb_reply_control_get_tcp_tickle_list(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_tickle_list **tickles);
+
+void ctdb_req_control_set_tcp_tickle_list(struct ctdb_req_control *request,
+ struct ctdb_tickle_list *tickles);
+int ctdb_reply_control_set_tcp_tickle_list(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_db_attach_persistent(struct ctdb_req_control *request,
+ const char *name);
+int ctdb_reply_control_db_attach_persistent(struct ctdb_reply_control *reply,
+ uint32_t *db_id);
+
+void ctdb_req_control_update_record(struct ctdb_req_control *request,
+ struct ctdb_rec_buffer *recbuf);
+int ctdb_reply_control_update_record(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_send_gratuitous_arp(struct ctdb_req_control *request,
+ struct ctdb_addr_info *addr_info);
+int ctdb_reply_control_send_gratuitous_arp(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_wipe_database(struct ctdb_req_control *request,
+ struct ctdb_transdb *transdb);
+int ctdb_reply_control_wipe_database(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_uptime(struct ctdb_req_control *request);
+int ctdb_reply_control_uptime(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_uptime **uptime);
+
+void ctdb_req_control_start_recovery(struct ctdb_req_control *request);
+int ctdb_reply_control_start_recovery(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_end_recovery(struct ctdb_req_control *request);
+int ctdb_reply_control_end_recovery(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_reload_nodes_file(struct ctdb_req_control *request);
+int ctdb_reply_control_reload_nodes_file(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_try_delete_records(struct ctdb_req_control *request,
+ struct ctdb_rec_buffer *recbuf);
+int ctdb_reply_control_try_delete_records(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_rec_buffer **recbuf);
+
+void ctdb_req_control_add_public_ip(struct ctdb_req_control *request,
+ struct ctdb_addr_info *addr_info);
+int ctdb_reply_control_add_public_ip(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_del_public_ip(struct ctdb_req_control *request,
+ struct ctdb_addr_info *addr_info);
+int ctdb_reply_control_del_public_ip(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_get_capabilities(struct ctdb_req_control *request);
+int ctdb_reply_control_get_capabilities(struct ctdb_reply_control *reply,
+ uint32_t *caps);
+
+void ctdb_req_control_recd_ping(struct ctdb_req_control *request);
+int ctdb_reply_control_recd_ping(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_release_ip(struct ctdb_req_control *request,
+ struct ctdb_public_ip *pubip);
+int ctdb_reply_control_release_ip(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_takeover_ip(struct ctdb_req_control *request,
+ struct ctdb_public_ip *pubip);
+int ctdb_reply_control_takeover_ip(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_get_public_ips(struct ctdb_req_control *request,
+ bool available_only);
+int ctdb_reply_control_get_public_ips(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_public_ip_list **pubip_list);
+
+void ctdb_req_control_get_nodemap(struct ctdb_req_control *request);
+int ctdb_reply_control_get_nodemap(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_node_map **nodemap);
+
+void ctdb_req_control_traverse_kill(struct ctdb_req_control *request,
+ struct ctdb_traverse_start *traverse);
+int ctdb_reply_control_traverse_kill(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_recd_reclock_latency(struct ctdb_req_control *request,
+ double reclock_latency);
+int ctdb_reply_control_recd_reclock_latency(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_get_reclock_file(struct ctdb_req_control *request);
+int ctdb_reply_control_get_reclock_file(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ const char **reclock_file);
+
+void ctdb_req_control_stop_node(struct ctdb_req_control *request);
+int ctdb_reply_control_stop_node(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_continue_node(struct ctdb_req_control *request);
+int ctdb_reply_control_continue_node(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_set_lmasterrole(struct ctdb_req_control *request,
+ uint32_t lmaster_role);
+int ctdb_reply_control_set_lmasterrole(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_set_recmasterrole(struct ctdb_req_control *request,
+ uint32_t recmaster_role);
+int ctdb_reply_control_set_recmasterrole(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_set_ban_state(struct ctdb_req_control *request,
+ struct ctdb_ban_state *ban_state);
+int ctdb_reply_control_set_ban_state(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_get_ban_state(struct ctdb_req_control *request);
+int ctdb_reply_control_get_ban_state(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_ban_state **ban_state);
+
+void ctdb_req_control_register_notify(struct ctdb_req_control *request,
+ struct ctdb_notify_data *notify);
+int ctdb_reply_control_register_notify(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_deregister_notify(struct ctdb_req_control *request,
+ uint64_t srvid);
+int ctdb_reply_control_deregister_notify(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_trans3_commit(struct ctdb_req_control *request,
+ struct ctdb_rec_buffer *recbuf);
+int ctdb_reply_control_trans3_commit(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_get_db_seqnum(struct ctdb_req_control *request,
+ uint32_t db_id);
+int ctdb_reply_control_get_db_seqnum(struct ctdb_reply_control *reply,
+ uint64_t *seqnum);
+
+void ctdb_req_control_db_set_healthy(struct ctdb_req_control *request,
+ uint32_t db_id);
+int ctdb_reply_control_db_set_healthy(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_db_get_health(struct ctdb_req_control *request,
+ uint32_t db_id);
+int ctdb_reply_control_db_get_health(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ const char **reason);
+
+void ctdb_req_control_get_public_ip_info(struct ctdb_req_control *request,
+ ctdb_sock_addr *addr);
+int ctdb_reply_control_get_public_ip_info(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_public_ip_info **ipinfo);
+
+void ctdb_req_control_get_ifaces(struct ctdb_req_control *request);
+int ctdb_reply_control_get_ifaces(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_iface_list **iface_list);
+
+void ctdb_req_control_set_iface_link_state(struct ctdb_req_control *request,
+ struct ctdb_iface *iface);
+int ctdb_reply_control_set_iface_link_state(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_tcp_add_delayed_update(struct ctdb_req_control *request,
+ struct ctdb_connection *conn);
+int ctdb_reply_control_tcp_add_delayed_update(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_get_stat_history(struct ctdb_req_control *request);
+int ctdb_reply_control_get_stat_history(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_statistics_list **stats_list);
+
+void ctdb_req_control_schedule_for_deletion(struct ctdb_req_control *request,
+ struct ctdb_key_data *key);
+int ctdb_reply_control_schedule_for_deletion(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_set_db_readonly(struct ctdb_req_control *request,
+ uint32_t db_id);
+int ctdb_reply_control_set_db_readonly(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_traverse_start_ext(struct ctdb_req_control *request,
+ struct ctdb_traverse_start_ext *traverse);
+int ctdb_reply_control_traverse_start_ext(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_get_db_statistics(struct ctdb_req_control *request,
+ uint32_t db_id);
+int ctdb_reply_control_get_db_statistics(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_db_statistics **dbstats);
+
+void ctdb_req_control_set_db_sticky(struct ctdb_req_control *request,
+ uint32_t db_id);
+int ctdb_reply_control_set_db_sticky(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_reload_public_ips(struct ctdb_req_control *request);
+int ctdb_reply_control_reload_public_ips(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_ipreallocated(struct ctdb_req_control *request);
+int ctdb_reply_control_ipreallocated(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_get_runstate(struct ctdb_req_control *request);
+int ctdb_reply_control_get_runstate(struct ctdb_reply_control *reply,
+ enum ctdb_runstate *runstate);
+
+void ctdb_req_control_db_detach(struct ctdb_req_control *request,
+ uint32_t db_id);
+int ctdb_reply_control_db_detach(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_get_nodes_file(struct ctdb_req_control *request);
+int ctdb_reply_control_get_nodes_file(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_node_map **nodemap);
+
+void ctdb_req_control_db_freeze(struct ctdb_req_control *request,
+ uint32_t db_id);
+int ctdb_reply_control_db_freeze(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_db_thaw(struct ctdb_req_control *request,
+ uint32_t db_id);
+int ctdb_reply_control_db_thaw(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_db_transaction_start(struct ctdb_req_control *request,
+ struct ctdb_transdb *transdb);
+int ctdb_reply_control_db_transaction_start(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_db_transaction_commit(struct ctdb_req_control *request,
+ struct ctdb_transdb *transdb);
+int ctdb_reply_control_db_transaction_commit(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_db_transaction_cancel(struct ctdb_req_control *request,
+ uint32_t db_id);
+int ctdb_reply_control_db_transaction_cancel(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_db_pull(struct ctdb_req_control *request,
+ struct ctdb_pulldb_ext *pulldb_ext);
+int ctdb_reply_control_db_pull(struct ctdb_reply_control *reply,
+ uint32_t *num_records);
+
+void ctdb_req_control_db_push_start(struct ctdb_req_control *request,
+ struct ctdb_pulldb_ext *pulldb_ext);
+int ctdb_reply_control_db_push_start(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_db_push_confirm(struct ctdb_req_control *request,
+ uint32_t db_id);
+int ctdb_reply_control_db_push_confirm(struct ctdb_reply_control *reply,
+ uint32_t *num_records);
+
+void ctdb_req_control_db_open_flags(struct ctdb_req_control *request,
+ uint32_t db_id);
+int ctdb_reply_control_db_open_flags(struct ctdb_reply_control *reply,
+ int *tdb_flags);
+
+void ctdb_req_control_db_attach_replicated(struct ctdb_req_control *request,
+ const char *db_name);
+int ctdb_reply_control_db_attach_replicated(struct ctdb_reply_control *reply,
+ uint32_t *db_id);
+
+void ctdb_req_control_check_pid_srvid(struct ctdb_req_control *request,
+ struct ctdb_pid_srvid *pid_srvid);
+int ctdb_reply_control_check_pid_srvid(struct ctdb_reply_control *reply,
+ int *status);
+
+void ctdb_req_control_tunnel_register(struct ctdb_req_control *request,
+ uint64_t tunnel_id);
+int ctdb_reply_control_tunnel_register(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_tunnel_deregister(struct ctdb_req_control *request,
+ uint64_t tunnel_id);
+int ctdb_reply_control_tunnel_deregister(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_vacuum_fetch(struct ctdb_req_control *request,
+ struct ctdb_rec_buffer *recbuf);
+int ctdb_reply_control_vacuum_fetch(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_db_vacuum(struct ctdb_req_control *request,
+ struct ctdb_db_vacuum *db_vacuum);
+int ctdb_reply_control_db_vacuum(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_echo_data(struct ctdb_req_control *request,
+ struct ctdb_echo_data *echo_data);
+int ctdb_reply_control_echo_data(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_disable_node(struct ctdb_req_control *request);
+int ctdb_reply_control_disable_node(struct ctdb_reply_control *reply);
+
+void ctdb_req_control_enable_node(struct ctdb_req_control *request);
+int ctdb_reply_control_enable_node(struct ctdb_reply_control *reply);
+
+/* From protocol/protocol_debug.c */
+
+void ctdb_packet_print(uint8_t *buf, size_t buflen, FILE *fp);
+
+/* From protocol/protocol_message.c */
+
+size_t ctdb_req_message_len(struct ctdb_req_header *h,
+ struct ctdb_req_message *c);
+
+int ctdb_req_message_push(struct ctdb_req_header *h,
+ struct ctdb_req_message *c,
+ uint8_t *buf, size_t *buflen);
+
+int ctdb_req_message_pull(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_req_message *c);
+
+size_t ctdb_req_message_data_len(struct ctdb_req_header *h,
+ struct ctdb_req_message_data *c);
+
+int ctdb_req_message_data_push(struct ctdb_req_header *h,
+ struct ctdb_req_message_data *c,
+ uint8_t *buf, size_t *buflen);
+
+int ctdb_req_message_data_pull(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_req_message_data *c);
+
+/* From protocol/protocol_keepalive.c */
+
+size_t ctdb_req_keepalive_len(struct ctdb_req_header *h,
+ struct ctdb_req_keepalive *c);
+
+int ctdb_req_keepalive_push(struct ctdb_req_header *h,
+ struct ctdb_req_keepalive *c,
+ uint8_t *buf, size_t *buflen);
+
+int ctdb_req_keepalive_pull(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_req_keepalive *c);
+
+/* From protocol/protocol_tunnel.c */
+
+size_t ctdb_req_tunnel_len(struct ctdb_req_header *h,
+ struct ctdb_req_tunnel *c);
+
+int ctdb_req_tunnel_push(struct ctdb_req_header *h,
+ struct ctdb_req_tunnel *c,
+ uint8_t *buf, size_t *buflen);
+
+int ctdb_req_tunnel_pull(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_req_tunnel *c);
+
+/* From protocol/protocol_packet.c */
+
+int ctdb_allocate_pkt(TALLOC_CTX *mem_ctx, size_t datalen,
+ uint8_t **buf, size_t *buflen);
+
+/* From protocol/protocol_sock.c */
+
+size_t sock_packet_header_len(struct sock_packet_header *in);
+void sock_packet_header_push(struct sock_packet_header *in, uint8_t *buf,
+ size_t *npush);
+int sock_packet_header_pull(uint8_t *buf, size_t buflen,
+ struct sock_packet_header *out, size_t *npull);
+
+void sock_packet_header_set_reqid(struct sock_packet_header *h,
+ uint32_t reqid);
+void sock_packet_header_set_length(struct sock_packet_header *h,
+ uint32_t length);
+
+#endif /* __CTDB_PROTOCOL_API_H__ */
diff --git a/ctdb/protocol/protocol_basic.c b/ctdb/protocol/protocol_basic.c
new file mode 100644
index 0000000..42f2077
--- /dev/null
+++ b/ctdb/protocol/protocol_basic.c
@@ -0,0 +1,400 @@
+/*
+ CTDB protocol marshalling
+
+ Copyright (C) Amitay Isaacs 2015-2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <talloc.h>
+
+#include "protocol_basic.h"
+
+/*
+ * Basic data types
+ */
+
+size_t ctdb_uint8_len(uint8_t *in)
+{
+ return sizeof(uint8_t);
+}
+
+void ctdb_uint8_push(uint8_t *in, uint8_t *buf, size_t *npush)
+{
+ *buf = *in;
+ *npush = sizeof(uint8_t);
+}
+
+int ctdb_uint8_pull(uint8_t *buf, size_t buflen, uint8_t *out, size_t *npull)
+{
+ if (buflen < sizeof(uint8_t)) {
+ return EMSGSIZE;
+ }
+
+ *out = *buf;
+ *npull = sizeof(uint8_t);
+ return 0;
+}
+
+size_t ctdb_uint16_len(uint16_t *in)
+{
+ return sizeof(uint16_t);
+}
+
+void ctdb_uint16_push(uint16_t *in, uint8_t *buf, size_t *npush)
+{
+ memcpy(buf, in, sizeof(uint16_t));
+ *npush = sizeof(uint16_t);
+}
+
+int ctdb_uint16_pull(uint8_t *buf, size_t buflen, uint16_t *out, size_t *npull)
+{
+ if (buflen < sizeof(uint16_t)) {
+ return EMSGSIZE;
+ }
+
+ memcpy(out, buf, sizeof(uint16_t));
+ *npull = sizeof(uint16_t);
+ return 0;
+}
+
+size_t ctdb_int32_len(int32_t *in)
+{
+ return sizeof(int32_t);
+}
+
+void ctdb_int32_push(int32_t *in, uint8_t *buf, size_t *npush)
+{
+ memcpy(buf, in, sizeof(int32_t));
+ *npush = sizeof(int32_t);
+}
+
+int ctdb_int32_pull(uint8_t *buf, size_t buflen, int32_t *out, size_t *npull)
+{
+ if (buflen < sizeof(int32_t)) {
+ return EMSGSIZE;
+ }
+
+ memcpy(out, buf, sizeof(int32_t));
+ *npull = sizeof(int32_t);
+ return 0;
+}
+
+size_t ctdb_uint32_len(uint32_t *in)
+{
+ return sizeof(uint32_t);
+}
+
+void ctdb_uint32_push(uint32_t *in, uint8_t *buf, size_t *npush)
+{
+ memcpy(buf, in, sizeof(uint32_t));
+ *npush = sizeof(uint32_t);
+}
+
+int ctdb_uint32_pull(uint8_t *buf, size_t buflen, uint32_t *out, size_t *npull)
+{
+ if (buflen < sizeof(uint32_t)) {
+ return EMSGSIZE;
+ }
+
+ memcpy(out, buf, sizeof(uint32_t));
+ *npull = sizeof(uint32_t);
+ return 0;
+}
+
+size_t ctdb_uint64_len(uint64_t *in)
+{
+ return sizeof(uint64_t);
+}
+
+void ctdb_uint64_push(uint64_t *in, uint8_t *buf, size_t *npush)
+{
+ memcpy(buf, in, sizeof(uint64_t));
+ *npush = sizeof(uint64_t);
+}
+
+int ctdb_uint64_pull(uint8_t *buf, size_t buflen, uint64_t *out, size_t *npull)
+{
+ if (buflen < sizeof(uint64_t)) {
+ return EMSGSIZE;
+ }
+
+ memcpy(out, buf, sizeof(uint64_t));
+ *npull = sizeof(uint64_t);
+ return 0;
+}
+
+size_t ctdb_double_len(double *in)
+{
+ return sizeof(double);
+}
+
+void ctdb_double_push(double *in, uint8_t *buf, size_t *npush)
+{
+ memcpy(buf, in, sizeof(double));
+ *npush = sizeof(double);
+}
+
+int ctdb_double_pull(uint8_t *buf, size_t buflen, double *out, size_t *npull)
+{
+ if (buflen < sizeof(double)) {
+ return EMSGSIZE;
+ }
+
+ memcpy(out, buf, sizeof(double));
+ *npull = sizeof(double);
+ return 0;
+}
+
+size_t ctdb_bool_len(bool *in)
+{
+ uint8_t u8 = 0;
+
+ return ctdb_uint8_len(&u8);
+}
+
+void ctdb_bool_push(bool *in, uint8_t *buf, size_t *npush)
+{
+ size_t np;
+ uint8_t u8 = *in;
+
+ ctdb_uint8_push(&u8, buf, &np);
+ *npush = np;
+}
+
+int ctdb_bool_pull(uint8_t *buf, size_t buflen, bool *out, size_t *npull)
+{
+ size_t np;
+ uint8_t u8;
+ int ret;
+
+ ret = ctdb_uint8_pull(buf, buflen, &u8, &np);
+ if (ret != 0) {
+ return ret;
+ }
+
+ if (u8 == 0) {
+ *out = false;
+ } else if (u8 == 1) {
+ *out = true;
+ } else {
+ return EINVAL;
+ }
+
+ *npull = np;
+ return 0;
+}
+
+size_t ctdb_chararray_len(char *in, size_t len)
+{
+ return len;
+}
+
+void ctdb_chararray_push(char *in, size_t len, uint8_t *buf, size_t *npush)
+{
+ memcpy(buf, in, len);
+ *npush = len;
+}
+
+int ctdb_chararray_pull(uint8_t *buf, size_t buflen, char *out, size_t len,
+ size_t *npull)
+{
+ if (buflen < len) {
+ return EMSGSIZE;
+ }
+
+ memcpy(out, buf, len);
+ out[len-1] = '\0';
+ *npull = len;
+ return 0;
+}
+
+size_t ctdb_string_len(const char **in)
+{
+ if (*in == NULL) {
+ return 0;
+ }
+
+ return strlen(*in) + 1;
+}
+
+void ctdb_string_push(const char **in, uint8_t *buf, size_t *npush)
+{
+ size_t len;
+
+ len = ctdb_string_len(in);
+ if (len > 0) {
+ memcpy(buf, *in, len);
+ }
+
+ *npush = len;
+}
+
+int ctdb_string_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ const char **out, size_t *npull)
+{
+ const char *str;
+
+ if (buflen > UINT32_MAX) {
+ return EMSGSIZE;
+ }
+
+ if (buflen == 0) {
+ *out = NULL;
+ *npull = 0;
+ return 0;
+ }
+
+ str = talloc_strndup(mem_ctx, (char *)buf, buflen);
+ if (str == NULL) {
+ return ENOMEM;
+ }
+
+ *out = str;
+ *npull = ctdb_string_len(&str);
+ return 0;
+}
+
+size_t ctdb_stringn_len(const char **in)
+{
+ uint32_t u32 = ctdb_string_len(in);
+
+ return ctdb_uint32_len(&u32) + u32;
+}
+
+void ctdb_stringn_push(const char **in, uint8_t *buf, size_t *npush)
+{
+ size_t offset = 0, np;
+ uint32_t u32 = ctdb_string_len(in);
+
+ ctdb_uint32_push(&u32, buf+offset, &np);
+ offset += np;
+
+ ctdb_string_push(in, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+int ctdb_stringn_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ const char **out, size_t *npull)
+{
+ size_t offset = 0, np;
+ uint32_t u32;
+ int ret;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &u32, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ if (buflen-offset < u32) {
+ return EMSGSIZE;
+ }
+
+ ret = ctdb_string_pull(buf+offset, u32, mem_ctx, out, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ *npull = offset;
+ return 0;
+}
+
+/*
+ * System defined data types
+ */
+
+size_t ctdb_pid_len(pid_t *in)
+{
+ return sizeof(pid_t);
+}
+
+void ctdb_pid_push(pid_t *in, uint8_t *buf, size_t *npush)
+{
+ memcpy(buf, in, sizeof(pid_t));
+ *npush = sizeof(pid_t);
+}
+
+int ctdb_pid_pull(uint8_t *buf, size_t buflen, pid_t *out, size_t *npull)
+{
+ if (buflen < sizeof(pid_t)) {
+ return EMSGSIZE;
+ }
+
+ memcpy(out, buf, sizeof(pid_t));
+ *npull = sizeof(pid_t);
+ return 0;
+}
+
+size_t ctdb_timeval_len(struct timeval *in)
+{
+ return sizeof(struct timeval);
+}
+
+void ctdb_timeval_push(struct timeval *in, uint8_t *buf, size_t *npush)
+{
+ memcpy(buf, in, sizeof(struct timeval));
+ *npush = sizeof(struct timeval);
+}
+
+int ctdb_timeval_pull(uint8_t *buf, size_t buflen, struct timeval *out,
+ size_t *npull)
+{
+ if (buflen < sizeof(struct timeval)) {
+ return EMSGSIZE;
+ }
+
+ memcpy(out, buf, sizeof(struct timeval));
+ *npull = sizeof(struct timeval);
+ return 0;
+}
+
+/*
+ * Dummy type to tackle structure padding
+ */
+
+size_t ctdb_padding_len(int count)
+{
+ return count % SIZEOF_VOID_P;
+}
+
+void ctdb_padding_push(int count, uint8_t *buf, size_t *npush)
+{
+ uint8_t padding[count];
+ size_t aligned_count = count % SIZEOF_VOID_P;
+
+ if (aligned_count > 0) {
+ memset(padding, 0, aligned_count);
+ memcpy(buf, padding, aligned_count);
+ }
+ *npush = aligned_count;
+}
+
+int ctdb_padding_pull(uint8_t *buf, size_t buflen, int count, size_t *npull)
+{
+ size_t aligned_count = count % SIZEOF_VOID_P;
+
+ if (buflen < aligned_count) {
+ return EMSGSIZE;
+ }
+
+ *npull = aligned_count;
+ return 0;
+}
diff --git a/ctdb/protocol/protocol_basic.h b/ctdb/protocol/protocol_basic.h
new file mode 100644
index 0000000..4086e81
--- /dev/null
+++ b/ctdb/protocol/protocol_basic.h
@@ -0,0 +1,86 @@
+/*
+ CTDB protocol marshalling
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __PROTOCOL_BASIC_H__
+#define __PROTOCOL_BASIC_H__
+
+/*
+ * From protocol/protocol_basic.c
+ */
+
+size_t ctdb_uint8_len(uint8_t *in);
+void ctdb_uint8_push(uint8_t *in, uint8_t *buf, size_t *npush);
+int ctdb_uint8_pull(uint8_t *buf, size_t buflen, uint8_t *out, size_t *npull);
+
+size_t ctdb_uint16_len(uint16_t *in);
+void ctdb_uint16_push(uint16_t *in, uint8_t *buf, size_t *npush);
+int ctdb_uint16_pull(uint8_t *buf, size_t buflen, uint16_t *out,
+ size_t *npull);
+
+size_t ctdb_int32_len(int32_t *in);
+void ctdb_int32_push(int32_t *in, uint8_t *buf, size_t *npush);
+int ctdb_int32_pull(uint8_t *buf, size_t buflen, int32_t *out, size_t *npull);
+
+size_t ctdb_uint32_len(uint32_t *in);
+void ctdb_uint32_push(uint32_t *in, uint8_t *buf, size_t *npush);
+int ctdb_uint32_pull(uint8_t *buf, size_t buflen, uint32_t *out,
+ size_t *npull);
+
+size_t ctdb_uint64_len(uint64_t *in);
+void ctdb_uint64_push(uint64_t *in, uint8_t *buf, size_t *npush);
+int ctdb_uint64_pull(uint8_t *buf, size_t buflen, uint64_t *out,
+ size_t *npull);
+
+size_t ctdb_double_len(double *in);
+void ctdb_double_push(double *in, uint8_t *buf, size_t *npush);
+int ctdb_double_pull(uint8_t *buf, size_t buflen, double *out, size_t *npull);
+
+size_t ctdb_bool_len(bool *in);
+void ctdb_bool_push(bool *in, uint8_t *buf, size_t *npush);
+int ctdb_bool_pull(uint8_t *buf, size_t buflen, bool *out, size_t *npull);
+
+size_t ctdb_chararray_len(char *in, size_t len);
+void ctdb_chararray_push(char *in, size_t len, uint8_t *buf, size_t *npush);
+int ctdb_chararray_pull(uint8_t *buf, size_t buflen, char *out, size_t len,
+ size_t *npull);
+
+size_t ctdb_string_len(const char **in);
+void ctdb_string_push(const char **in, uint8_t *buf, size_t *npush);
+int ctdb_string_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ const char **out, size_t *npull);
+
+size_t ctdb_stringn_len(const char **in);
+void ctdb_stringn_push(const char **in, uint8_t *buf, size_t *npush);
+int ctdb_stringn_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ const char **out, size_t *npull);
+
+size_t ctdb_pid_len(pid_t *in);
+void ctdb_pid_push(pid_t *in, uint8_t *buf, size_t *npush);
+int ctdb_pid_pull(uint8_t *buf, size_t buflen, pid_t *out, size_t *npull);
+
+size_t ctdb_timeval_len(struct timeval *in);
+void ctdb_timeval_push(struct timeval *in, uint8_t *buf, size_t *npush);
+int ctdb_timeval_pull(uint8_t *buf, size_t buflen, struct timeval *out,
+ size_t *npull);
+
+size_t ctdb_padding_len(int count);
+void ctdb_padding_push(int count, uint8_t *buf, size_t *npush);
+int ctdb_padding_pull(uint8_t *buf, size_t buflen, int count, size_t *npull);
+
+#endif /* __PROTOCOL_BASIC_H__ */
diff --git a/ctdb/protocol/protocol_call.c b/ctdb/protocol/protocol_call.c
new file mode 100644
index 0000000..393b118
--- /dev/null
+++ b/ctdb/protocol/protocol_call.c
@@ -0,0 +1,581 @@
+/*
+ CTDB protocol marshalling
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <tdb.h>
+
+#include "protocol.h"
+#include "protocol_api.h"
+#include "protocol_private.h"
+
+size_t ctdb_req_call_len(struct ctdb_req_header *h, struct ctdb_req_call *c)
+{
+ return ctdb_req_header_len(h) +
+ ctdb_uint32_len(&c->flags) +
+ ctdb_uint32_len(&c->db_id) +
+ ctdb_uint32_len(&c->callid) +
+ ctdb_uint32_len(&c->hopcount) +
+ ctdb_tdb_datan_len(&c->key) +
+ ctdb_tdb_datan_len(&c->calldata);
+}
+
+int ctdb_req_call_push(struct ctdb_req_header *h, struct ctdb_req_call *c,
+ uint8_t *buf, size_t *buflen)
+{
+ size_t offset = 0;
+ size_t length, np;
+ uint32_t u32;
+
+ if (c->key.dsize == 0) {
+ return EINVAL;
+ }
+
+ length = ctdb_req_call_len(h, c);
+ if (*buflen < length) {
+ *buflen = length;
+ return EMSGSIZE;
+ }
+
+ h->length = *buflen;
+ ctdb_req_header_push(h, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&c->flags, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&c->db_id, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&c->callid, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&c->hopcount, buf+offset, &np);
+ offset += np;
+
+ u32 = ctdb_tdb_data_len(&c->key);
+ ctdb_uint32_push(&u32, buf+offset, &np);
+ offset += np;
+
+ u32 = ctdb_tdb_data_len(&c->calldata);
+ ctdb_uint32_push(&u32, buf+offset, &np);
+ offset += np;
+
+ ctdb_tdb_data_push(&c->key, buf+offset, &np);
+ offset += np;
+
+ ctdb_tdb_data_push(&c->calldata, buf+offset, &np);
+ offset += np;
+
+ return 0;
+}
+
+int ctdb_req_call_pull(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_req_call *c)
+{
+ struct ctdb_req_header header;
+ size_t offset = 0, np;
+ uint32_t u32;
+ int ret;
+
+ ret = ctdb_req_header_pull(buf+offset, buflen-offset, &header, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ if (h != NULL) {
+ *h = header;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &c->flags, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &c->db_id, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &c->callid, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &c->hopcount, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &u32, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+ c->key.dsize = u32;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &u32, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+ c->calldata.dsize = u32;
+
+ if (buflen-offset < c->key.dsize) {
+ return EMSGSIZE;
+ }
+
+ ret = ctdb_tdb_data_pull(buf+offset, c->key.dsize, mem_ctx, &c->key,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ if (buflen-offset < c->calldata.dsize) {
+ return EMSGSIZE;
+ }
+
+ ret = ctdb_tdb_data_pull(buf+offset, c->calldata.dsize,
+ mem_ctx, &c->calldata, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ return 0;
+}
+
+size_t ctdb_reply_call_len(struct ctdb_req_header *h,
+ struct ctdb_reply_call *c)
+{
+ return ctdb_req_header_len(h) +
+ ctdb_int32_len(&c->status) +
+ ctdb_tdb_datan_len(&c->data);
+}
+
+int ctdb_reply_call_push(struct ctdb_req_header *h, struct ctdb_reply_call *c,
+ uint8_t *buf, size_t *buflen)
+{
+ size_t offset = 0, np;
+ size_t length;
+
+ length = ctdb_reply_call_len(h, c);
+ if (*buflen < length) {
+ *buflen = length;
+ return EMSGSIZE;
+ }
+
+ h->length = *buflen;
+ ctdb_req_header_push(h, buf+offset, &np);
+ offset += np;
+
+ ctdb_int32_push(&c->status, buf+offset, &np);
+ offset += np;
+
+ ctdb_tdb_datan_push(&c->data, buf+offset, &np);
+ offset += np;
+
+ return 0;
+}
+
+int ctdb_reply_call_pull(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_reply_call *c)
+{
+ struct ctdb_req_header header;
+ size_t offset = 0, np;
+ int ret;
+
+ ret = ctdb_req_header_pull(buf+offset, buflen-offset, &header, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ if (h != NULL) {
+ *h = header;
+ }
+
+ ret = ctdb_int32_pull(buf+offset, buflen-offset, &c->status, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_tdb_datan_pull(buf+offset, buflen-offset,
+ mem_ctx, &c->data, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ return 0;
+}
+
+size_t ctdb_reply_error_len(struct ctdb_req_header *h,
+ struct ctdb_reply_error *c)
+{
+ return ctdb_req_header_len(h) +
+ ctdb_int32_len(&c->status) +
+ ctdb_tdb_datan_len(&c->msg);
+}
+
+int ctdb_reply_error_push(struct ctdb_req_header *h, struct ctdb_reply_error *c,
+ uint8_t *buf, size_t *buflen)
+{
+ size_t offset = 0, np;
+ size_t length;
+
+ length = ctdb_reply_error_len(h, c);
+ if (*buflen < length) {
+ *buflen = length;
+ return EMSGSIZE;
+ }
+
+ h->length = *buflen;
+ ctdb_req_header_push(h, buf+offset, &np);
+ offset += np;
+
+ ctdb_int32_push(&c->status, buf+offset, &np);
+ offset += np;
+
+ ctdb_tdb_datan_push(&c->msg, buf+offset, &np);
+ offset += np;
+
+ return 0;
+}
+
+int ctdb_reply_error_pull(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_reply_error *c)
+{
+ struct ctdb_req_header header;
+ size_t offset = 0, np;
+ int ret;
+
+ ret = ctdb_req_header_pull(buf+offset, buflen-offset, &header, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ if (h != NULL) {
+ *h = header;
+ }
+
+ ret = ctdb_int32_pull(buf+offset, buflen-offset, &c->status, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_tdb_datan_pull(buf+offset, buflen-offset, mem_ctx, &c->msg,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ return 0;
+}
+
+size_t ctdb_req_dmaster_len(struct ctdb_req_header *h,
+ struct ctdb_req_dmaster *c)
+{
+ return ctdb_req_header_len(h) +
+ ctdb_uint32_len(&c->db_id) +
+ ctdb_padding_len(4) +
+ ctdb_uint64_len(&c->rsn) +
+ ctdb_uint32_len(&c->dmaster) +
+ ctdb_tdb_datan_len(&c->key) +
+ ctdb_tdb_datan_len(&c->data);
+}
+
+int ctdb_req_dmaster_push(struct ctdb_req_header *h, struct ctdb_req_dmaster *c,
+ uint8_t *buf, size_t *buflen)
+{
+ size_t offset = 0, np;
+ size_t length;
+ uint32_t u32;
+
+ length = ctdb_req_dmaster_len(h, c);
+ if (*buflen < length) {
+ *buflen = length;
+ return EMSGSIZE;
+ }
+
+ h->length = *buflen;
+ ctdb_req_header_push(h, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&c->db_id, buf+offset, &np);
+ offset += np;
+
+ ctdb_padding_push(4, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint64_push(&c->rsn, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&c->dmaster, buf+offset, &np);
+ offset += np;
+
+ u32 = ctdb_tdb_data_len(&c->key);
+ ctdb_uint32_push(&u32, buf+offset, &np);
+ offset += np;
+
+ u32 = ctdb_tdb_data_len(&c->data);
+ ctdb_uint32_push(&u32, buf+offset, &np);
+ offset += np;
+
+ ctdb_tdb_data_push(&c->key, buf+offset, &np);
+ offset += np;
+
+ ctdb_tdb_data_push(&c->data, buf+offset, &np);
+ offset += np;
+
+ return 0;
+}
+
+int ctdb_req_dmaster_pull(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_req_dmaster *c)
+{
+ struct ctdb_req_header header;
+ size_t offset = 0, np;
+ uint32_t u32;
+ int ret;
+
+ ret = ctdb_req_header_pull(buf+offset, buflen-offset, &header, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ if (h != NULL) {
+ *h = header;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &c->db_id, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_padding_pull(buf+offset, buflen-offset, 4, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint64_pull(buf+offset, buflen-offset, &c->rsn, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &c->dmaster, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &u32, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+ c->key.dsize = u32;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &u32, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+ c->data.dsize = u32;
+
+ if (buflen-offset < c->key.dsize) {
+ return EMSGSIZE;
+ }
+
+ ret = ctdb_tdb_data_pull(buf+offset, c->key.dsize, mem_ctx, &c->key,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ if (buflen-offset < c->data.dsize) {
+ return EMSGSIZE;
+ }
+
+ ret = ctdb_tdb_data_pull(buf+offset, c->data.dsize, mem_ctx, &c->data,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ return 0;
+}
+
+size_t ctdb_reply_dmaster_len(struct ctdb_req_header *h,
+ struct ctdb_reply_dmaster *c)
+{
+ return ctdb_req_header_len(h) +
+ ctdb_uint32_len(&c->db_id) +
+ ctdb_padding_len(4) +
+ ctdb_uint64_len(&c->rsn) +
+ ctdb_tdb_datan_len(&c->key) +
+ ctdb_tdb_datan_len(&c->data);
+}
+
+int ctdb_reply_dmaster_push(struct ctdb_req_header *h,
+ struct ctdb_reply_dmaster *c,
+ uint8_t *buf, size_t *buflen)
+{
+ size_t offset = 0, np;
+ size_t length;
+ uint32_t u32;
+
+ length = ctdb_reply_dmaster_len(h, c);
+ if (*buflen < length) {
+ *buflen = length;
+ return EMSGSIZE;
+ }
+
+ h->length = *buflen;
+ ctdb_req_header_push(h, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&c->db_id, buf+offset, &np);
+ offset += np;
+
+ ctdb_padding_push(4, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint64_push(&c->rsn, buf+offset, &np);
+ offset += np;
+
+ u32 = ctdb_tdb_data_len(&c->key);
+ ctdb_uint32_push(&u32, buf+offset, &np);
+ offset += np;
+
+ u32 = ctdb_tdb_data_len(&c->data);
+ ctdb_uint32_push(&u32, buf+offset, &np);
+ offset += np;
+
+ ctdb_tdb_data_push(&c->key, buf+offset, &np);
+ offset += np;
+
+ ctdb_tdb_data_push(&c->data, buf+offset, &np);
+ offset += np;
+
+ return 0;
+}
+
+int ctdb_reply_dmaster_pull(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_reply_dmaster *c)
+{
+ struct ctdb_req_header header;
+ size_t offset = 0, np;
+ uint32_t u32;
+ int ret;
+
+ ret = ctdb_req_header_pull(buf+offset, buflen-offset, &header, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ if (h != NULL) {
+ *h = header;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &c->db_id, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_padding_pull(buf+offset, buflen-offset, 4, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint64_pull(buf+offset, buflen-offset, &c->rsn, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &u32, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+ c->key.dsize = u32;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &u32, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+ c->data.dsize = u32;
+
+ if (buflen-offset < c->key.dsize) {
+ return EMSGSIZE;
+ }
+
+ ret = ctdb_tdb_data_pull(buf+offset, c->key.dsize, mem_ctx, &c->key,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ if (buflen-offset < c->data.dsize) {
+ return EMSGSIZE;
+ }
+
+ ret = ctdb_tdb_data_pull(buf+offset, c->data.dsize, mem_ctx, &c->data,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ return 0;
+}
diff --git a/ctdb/protocol/protocol_client.c b/ctdb/protocol/protocol_client.c
new file mode 100644
index 0000000..9ca2d4e
--- /dev/null
+++ b/ctdb/protocol/protocol_client.c
@@ -0,0 +1,2352 @@
+/*
+ CTDB protocol marshalling
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <tdb.h>
+
+#include "protocol.h"
+#include "protocol_api.h"
+#include "protocol_private.h"
+
+/*
+void ctdb_req_call_fill(struct ctdb_req_call *c,
+ uint32_t db_id, uint32_t flags,
+ uint32_t call_id, TDB_DATA key)
+{
+ request->flags = flags;
+ c->db_id = db_id;
+ c->call_id = call_id;
+ c->key = key;
+ c->calldata = tdb_null;
+}
+*/
+
+static int ctdb_reply_control_generic(struct ctdb_reply_control *reply,
+ uint32_t opcode)
+{
+ if (reply->rdata.opcode != opcode) {
+ return EPROTO;
+ }
+
+ return reply->status;
+}
+
+/* CTDB_CONTROL_PROCESS_EXISTS */
+
+void ctdb_req_control_process_exists(struct ctdb_req_control *request,
+ pid_t pid)
+{
+ request->opcode = CTDB_CONTROL_PROCESS_EXISTS;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_PROCESS_EXISTS;
+ request->rdata.data.pid = pid;
+
+}
+
+int ctdb_reply_control_process_exists(struct ctdb_reply_control *reply,
+ int *status)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_PROCESS_EXISTS) {
+ return EPROTO;
+ }
+
+ *status = reply->status;
+ reply->status = 0;
+
+ return reply->status;
+}
+
+/* CTDB_CONTROL_STATISTICS */
+
+void ctdb_req_control_statistics(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_STATISTICS;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_STATISTICS;
+}
+
+int ctdb_reply_control_statistics(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_statistics **stats)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_STATISTICS) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *stats = talloc_steal(mem_ctx, reply->rdata.data.stats);
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_PING */
+
+void ctdb_req_control_ping(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_PING;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_PING;
+}
+
+int ctdb_reply_control_ping(struct ctdb_reply_control *reply,
+ int *num_clients)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_PING) {
+ return EPROTO;
+ }
+
+ if (reply->status >= 0) {
+ *num_clients = reply->status;
+ reply->status = 0;
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_GETDBPATH */
+
+void ctdb_req_control_getdbpath(struct ctdb_req_control *request,
+ uint32_t db_id)
+{
+ request->opcode = CTDB_CONTROL_GETDBPATH;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_GETDBPATH;
+ request->rdata.data.db_id = db_id;
+}
+
+int ctdb_reply_control_getdbpath(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx, const char **db_path)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_GETDBPATH) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *db_path = talloc_steal(mem_ctx, reply->rdata.data.db_path);
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_GETVNNMAP */
+
+void ctdb_req_control_getvnnmap(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_GETVNNMAP;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_GETVNNMAP;
+}
+
+int ctdb_reply_control_getvnnmap(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_vnn_map **vnnmap)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_GETVNNMAP) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *vnnmap = talloc_steal(mem_ctx, reply->rdata.data.vnnmap);
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_SETVNNMAP */
+
+void ctdb_req_control_setvnnmap(struct ctdb_req_control *request,
+ struct ctdb_vnn_map *vnnmap)
+{
+ request->opcode = CTDB_CONTROL_SETVNNMAP;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_SETVNNMAP;
+ request->rdata.data.vnnmap = vnnmap;
+}
+
+int ctdb_reply_control_setvnnmap(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_SETVNNMAP);
+}
+
+/* CTDB_CONTROL_GET_DEBUG */
+
+void ctdb_req_control_get_debug(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_GET_DEBUG;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_GET_DEBUG;
+}
+
+int ctdb_reply_control_get_debug(struct ctdb_reply_control *reply,
+ int *loglevel)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_GET_DEBUG) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *loglevel = (int)reply->rdata.data.loglevel;
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_SET_DEBUG */
+
+void ctdb_req_control_set_debug(struct ctdb_req_control *request,
+ int loglevel)
+{
+ request->opcode = CTDB_CONTROL_SET_DEBUG;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_SET_DEBUG;
+ request->rdata.data.loglevel = (uint32_t)loglevel;
+}
+
+int ctdb_reply_control_set_debug(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_SET_DEBUG);
+}
+
+/* CTDB_CONTROL_GET_DBMAP */
+
+void ctdb_req_control_get_dbmap(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_GET_DBMAP;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_GET_DBMAP;
+}
+
+int ctdb_reply_control_get_dbmap(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_dbid_map **dbmap)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_GET_DBMAP) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *dbmap = talloc_steal(mem_ctx, reply->rdata.data.dbmap);
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_GET_RECMODE */
+
+void ctdb_req_control_get_recmode(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_GET_RECMODE;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_GET_RECMODE;
+}
+
+int ctdb_reply_control_get_recmode(struct ctdb_reply_control *reply,
+ int *recmode)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_GET_RECMODE) {
+ return EPROTO;
+ }
+
+ if (reply->status >= 0) {
+ *recmode = reply->status;
+ reply->status = 0;
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_SET_RECMODE */
+
+void ctdb_req_control_set_recmode(struct ctdb_req_control *request,
+ int recmode)
+{
+ request->opcode = CTDB_CONTROL_SET_RECMODE;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_SET_RECMODE;
+ request->rdata.data.recmode = recmode;
+}
+
+int ctdb_reply_control_set_recmode(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_SET_RECMODE);
+}
+
+/* CTDB_CONTROL_STATISTICS_RESET */
+
+void ctdb_req_control_statistics_reset(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_STATISTICS_RESET;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_STATISTICS_RESET;
+}
+
+int ctdb_reply_control_statistics_reset(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply,
+ CTDB_CONTROL_STATISTICS_RESET);
+}
+
+/* CTDB_CONTROL_DB_ATTACH */
+
+void ctdb_req_control_db_attach(struct ctdb_req_control *request,
+ const char *db_name)
+{
+ request->opcode = CTDB_CONTROL_DB_ATTACH;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_DB_ATTACH;
+ request->rdata.data.db_name = db_name;
+}
+
+int ctdb_reply_control_db_attach(struct ctdb_reply_control *reply,
+ uint32_t *db_id)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_DB_ATTACH) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *db_id = reply->rdata.data.db_id;
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_TRAVERSE_START */
+
+void ctdb_req_control_traverse_start(struct ctdb_req_control *request,
+ struct ctdb_traverse_start *traverse)
+{
+ request->opcode = CTDB_CONTROL_TRAVERSE_START;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_TRAVERSE_START;
+ request->rdata.data.traverse_start = traverse;
+}
+
+int ctdb_reply_control_traverse_start(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_TRAVERSE_START);
+}
+
+/* CTDB_CONTROL_TRAVERSE_ALL */
+/* CTDB_CONTROL_TRAVERSE_DATA */
+
+/* CTDB_CONTROL_REGISTER_SRVID */
+
+void ctdb_req_control_register_srvid(struct ctdb_req_control *request,
+ uint64_t srvid)
+{
+ request->opcode = CTDB_CONTROL_REGISTER_SRVID;
+ request->pad = 0;
+ request->srvid = srvid;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_REGISTER_SRVID;
+}
+
+int ctdb_reply_control_register_srvid(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_REGISTER_SRVID);
+}
+
+/* CTDB_CONTROL_DEREGISTER_SRVID */
+
+void ctdb_req_control_deregister_srvid(struct ctdb_req_control *request,
+ uint64_t srvid)
+{
+ request->opcode = CTDB_CONTROL_DEREGISTER_SRVID;
+ request->pad = 0;
+ request->srvid = srvid;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_DEREGISTER_SRVID;
+}
+
+int ctdb_reply_control_deregister_srvid(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply,
+ CTDB_CONTROL_DEREGISTER_SRVID);
+}
+
+/* CTDB_CONTROL_GET_DBNAME */
+
+void ctdb_req_control_get_dbname(struct ctdb_req_control *request,
+ uint32_t db_id)
+{
+ request->opcode = CTDB_CONTROL_GET_DBNAME;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_GET_DBNAME;
+ request->rdata.data.db_id = db_id;
+}
+
+int ctdb_reply_control_get_dbname(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx, const char **db_name)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_GET_DBNAME) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *db_name = talloc_steal(mem_ctx, reply->rdata.data.db_name);
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_ENABLE_SEQNUM */
+
+void ctdb_req_control_enable_seqnum(struct ctdb_req_control *request,
+ uint32_t db_id)
+{
+ request->opcode = CTDB_CONTROL_ENABLE_SEQNUM;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_ENABLE_SEQNUM;
+ request->rdata.data.db_id = db_id;
+}
+
+int ctdb_reply_control_enable_seqnum(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_ENABLE_SEQNUM);
+}
+
+/* CTDB_CONTROL_UPDATE_SEQNUM */
+
+void ctdb_req_control_update_seqnum(struct ctdb_req_control *request,
+ uint32_t db_id)
+{
+ request->opcode = CTDB_CONTROL_UPDATE_SEQNUM;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_UPDATE_SEQNUM;
+ request->rdata.data.db_id = db_id;
+}
+
+int ctdb_reply_control_update_seqnum(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_UPDATE_SEQNUM);
+}
+
+/* CTDB_CONTROL_DUMP_MEMORY */
+
+void ctdb_req_control_dump_memory(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_DUMP_MEMORY;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_DUMP_MEMORY;
+}
+
+int ctdb_reply_control_dump_memory(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx, const char **mem_str)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_DUMP_MEMORY) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *mem_str = talloc_steal(mem_ctx, reply->rdata.data.mem_str);
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_GET_PID */
+
+void ctdb_req_control_get_pid(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_GET_PID;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_GET_PID;
+}
+
+int ctdb_reply_control_get_pid(struct ctdb_reply_control *reply,
+ pid_t *pid)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_GET_PID) {
+ return EPROTO;
+ }
+
+ *pid = reply->status;
+ reply->status = 0;
+
+ return reply->status;
+}
+
+/* CTDB_CONTROL_FREEZE */
+
+void ctdb_req_control_freeze(struct ctdb_req_control *request,
+ uint32_t priority)
+{
+ request->opcode = CTDB_CONTROL_FREEZE;
+ request->pad = 0;
+ request->srvid = priority;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_FREEZE;
+}
+
+int ctdb_reply_control_freeze(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_FREEZE);
+}
+
+/* CTDB_CONTROL_GET_PNN */
+
+void ctdb_req_control_get_pnn(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_GET_PNN;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_GET_PNN;
+}
+
+int ctdb_reply_control_get_pnn(struct ctdb_reply_control *reply,
+ uint32_t *pnn)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_GET_PNN) {
+ return EPROTO;
+ }
+
+ if (reply->status >= 0) {
+ *pnn = reply->status;
+ reply->status = 0;
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_SHUTDOWN */
+
+void ctdb_req_control_shutdown(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_SHUTDOWN;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = CTDB_CTRL_FLAG_NOREPLY;
+
+ request->rdata.opcode = CTDB_CONTROL_SHUTDOWN;
+}
+
+int ctdb_reply_control_shutdown(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_SHUTDOWN);
+}
+
+/* CTDB_CONTROL_TCP_CLIENT */
+
+void ctdb_req_control_tcp_client(struct ctdb_req_control *request,
+ struct ctdb_connection *conn)
+{
+ request->opcode = CTDB_CONTROL_TCP_CLIENT;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_TCP_CLIENT;
+ request->rdata.data.conn = conn;
+}
+
+int ctdb_reply_control_tcp_client(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_TCP_CLIENT);
+}
+
+/* CTDB_CONTROL_TCP_ADD */
+
+void ctdb_req_control_tcp_add(struct ctdb_req_control *request,
+ struct ctdb_connection *conn)
+{
+ request->opcode = CTDB_CONTROL_TCP_ADD;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_TCP_ADD;
+ request->rdata.data.conn = conn;
+}
+
+int ctdb_reply_control_tcp_add(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_TCP_ADD);
+}
+
+/* CTDB_CONTROL_TCP_REMOVE */
+
+void ctdb_req_control_tcp_remove(struct ctdb_req_control *request,
+ struct ctdb_connection *conn)
+{
+ request->opcode = CTDB_CONTROL_TCP_REMOVE;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_TCP_REMOVE;
+ request->rdata.data.conn = conn;
+}
+
+int ctdb_reply_control_tcp_remove(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_TCP_REMOVE);
+}
+
+/* CTDB_CONTROL_STARTUP */
+
+void ctdb_req_control_startup(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_STARTUP;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_STARTUP;
+}
+
+int ctdb_reply_control_startup(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_STARTUP);
+}
+
+/* CTDB_CONTROL_SET_TUNABLE */
+
+void ctdb_req_control_set_tunable(struct ctdb_req_control *request,
+ struct ctdb_tunable *tunable)
+{
+ request->opcode = CTDB_CONTROL_SET_TUNABLE;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_SET_TUNABLE;
+ request->rdata.data.tunable = tunable;
+}
+
+int ctdb_reply_control_set_tunable(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_SET_TUNABLE);
+}
+
+/* CTDB_CONTROL_GET_TUNABLE */
+
+void ctdb_req_control_get_tunable(struct ctdb_req_control *request,
+ const char *name)
+{
+ request->opcode = CTDB_CONTROL_GET_TUNABLE;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_GET_TUNABLE;
+ request->rdata.data.tun_var = discard_const(name);
+}
+
+int ctdb_reply_control_get_tunable(struct ctdb_reply_control *reply,
+ uint32_t *value)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_GET_TUNABLE) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *value = reply->rdata.data.tun_value;
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_LIST_TUNABLES */
+
+void ctdb_req_control_list_tunables(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_LIST_TUNABLES;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_LIST_TUNABLES;
+}
+
+int ctdb_reply_control_list_tunables(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_var_list **tun_var_list)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_LIST_TUNABLES) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *tun_var_list = talloc_steal(mem_ctx,
+ reply->rdata.data.tun_var_list);
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_MODIFY_FLAGS */
+
+void ctdb_req_control_modify_flags(struct ctdb_req_control *request,
+ struct ctdb_node_flag_change *flag_change)
+{
+ request->opcode = CTDB_CONTROL_MODIFY_FLAGS;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_MODIFY_FLAGS;
+ request->rdata.data.flag_change = flag_change;
+}
+
+int ctdb_reply_control_modify_flags(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_MODIFY_FLAGS);
+}
+
+/* CTDB_CONTROL_GET_ALL_TUNABLES */
+
+void ctdb_req_control_get_all_tunables(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_GET_ALL_TUNABLES;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_GET_ALL_TUNABLES;
+}
+
+int ctdb_reply_control_get_all_tunables(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_tunable_list **tun_list)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_GET_ALL_TUNABLES) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *tun_list = talloc_steal(mem_ctx, reply->rdata.data.tun_list);
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_GET_TCP_TICKLE_LIST */
+
+void ctdb_req_control_get_tcp_tickle_list(struct ctdb_req_control *request,
+ ctdb_sock_addr *addr)
+{
+ request->opcode = CTDB_CONTROL_GET_TCP_TICKLE_LIST;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_GET_TCP_TICKLE_LIST;
+ request->rdata.data.addr = addr;
+}
+
+int ctdb_reply_control_get_tcp_tickle_list(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_tickle_list **tickles)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_GET_TCP_TICKLE_LIST) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *tickles = talloc_steal(mem_ctx, reply->rdata.data.tickles);
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_SET_TCP_TICKLE_LIST */
+
+void ctdb_req_control_set_tcp_tickle_list(struct ctdb_req_control *request,
+ struct ctdb_tickle_list *tickles)
+{
+ request->opcode = CTDB_CONTROL_SET_TCP_TICKLE_LIST;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_SET_TCP_TICKLE_LIST;
+ request->rdata.data.tickles = tickles;
+}
+
+int ctdb_reply_control_set_tcp_tickle_list(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply,
+ CTDB_CONTROL_SET_TCP_TICKLE_LIST);
+}
+
+/* CTDB_CONTROL_DB_ATTACH_PERSISTENT */
+
+void ctdb_req_control_db_attach_persistent(struct ctdb_req_control *request,
+ const char *db_name)
+{
+ request->opcode = CTDB_CONTROL_DB_ATTACH_PERSISTENT;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_DB_ATTACH_PERSISTENT;
+ request->rdata.data.db_name = db_name;
+}
+
+int ctdb_reply_control_db_attach_persistent(struct ctdb_reply_control *reply,
+ uint32_t *db_id)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_DB_ATTACH_PERSISTENT) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *db_id = reply->rdata.data.db_id;
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_UPDATE_RECORD */
+
+void ctdb_req_control_update_record(struct ctdb_req_control *request,
+ struct ctdb_rec_buffer *recbuf)
+{
+ request->opcode = CTDB_CONTROL_UPDATE_RECORD;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_UPDATE_RECORD;
+ request->rdata.data.recbuf = recbuf;
+}
+
+int ctdb_reply_control_update_record(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_UPDATE_RECORD);
+}
+
+/* CTDB_CONTROL_SEND_GRATUITOUS_ARP */
+
+void ctdb_req_control_send_gratuitous_arp(struct ctdb_req_control *request,
+ struct ctdb_addr_info *addr_info)
+{
+ request->opcode = CTDB_CONTROL_SEND_GRATUITOUS_ARP;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_SEND_GRATUITOUS_ARP;
+ request->rdata.data.addr_info = addr_info;
+}
+
+int ctdb_reply_control_send_gratuitous_arp(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply,
+ CTDB_CONTROL_SEND_GRATUITOUS_ARP);
+}
+
+/* CTDB_CONTROL_WIPE_DATABASE */
+
+void ctdb_req_control_wipe_database(struct ctdb_req_control *request,
+ struct ctdb_transdb *transdb)
+{
+ request->opcode = CTDB_CONTROL_WIPE_DATABASE;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_WIPE_DATABASE;
+ request->rdata.data.transdb = transdb;
+}
+
+int ctdb_reply_control_wipe_database(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_WIPE_DATABASE);
+}
+
+/* CTDB_CONTROL_UPTIME */
+
+void ctdb_req_control_uptime(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_UPTIME;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_UPTIME;
+}
+
+int ctdb_reply_control_uptime(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx, struct ctdb_uptime **uptime)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_UPTIME) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *uptime = talloc_steal(mem_ctx, reply->rdata.data.uptime);
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_START_RECOVERY */
+
+void ctdb_req_control_start_recovery(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_START_RECOVERY;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_START_RECOVERY;
+}
+
+int ctdb_reply_control_start_recovery(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_START_RECOVERY);
+}
+
+/* CTDB_CONTROL_END_RECOVERY */
+
+void ctdb_req_control_end_recovery(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_END_RECOVERY;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_END_RECOVERY;
+}
+
+int ctdb_reply_control_end_recovery(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_END_RECOVERY);
+}
+
+/* CTDB_CONTROL_RELOAD_NODES_FILE */
+
+void ctdb_req_control_reload_nodes_file(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_RELOAD_NODES_FILE;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_RELOAD_NODES_FILE;
+}
+
+int ctdb_reply_control_reload_nodes_file(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply,
+ CTDB_CONTROL_RELOAD_NODES_FILE);
+}
+
+/* CTDB_CONTROL_TRY_DELETE_RECORDS */
+
+void ctdb_req_control_try_delete_records(struct ctdb_req_control *request,
+ struct ctdb_rec_buffer *recbuf)
+{
+ request->opcode = CTDB_CONTROL_TRY_DELETE_RECORDS;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_TRY_DELETE_RECORDS;
+ request->rdata.data.recbuf = recbuf;
+}
+
+int ctdb_reply_control_try_delete_records(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_rec_buffer **recbuf)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_TRY_DELETE_RECORDS) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *recbuf = talloc_steal(mem_ctx, reply->rdata.data.recbuf);
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_ADD_PUBLIC_IP */
+
+void ctdb_req_control_add_public_ip(struct ctdb_req_control *request,
+ struct ctdb_addr_info *addr_info)
+{
+ request->opcode = CTDB_CONTROL_ADD_PUBLIC_IP;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_ADD_PUBLIC_IP;
+ request->rdata.data.addr_info = addr_info;
+}
+
+int ctdb_reply_control_add_public_ip(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_ADD_PUBLIC_IP);
+}
+
+/* CTDB_CONTROL_DEL_PUBLIC_IP */
+
+void ctdb_req_control_del_public_ip(struct ctdb_req_control *request,
+ struct ctdb_addr_info *addr_info)
+{
+ request->opcode = CTDB_CONTROL_DEL_PUBLIC_IP;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_DEL_PUBLIC_IP;
+ request->rdata.data.addr_info = addr_info;
+}
+
+int ctdb_reply_control_del_public_ip(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_DEL_PUBLIC_IP);
+}
+
+/* CTDB_CONTROL_GET_CAPABILITIES */
+
+void ctdb_req_control_get_capabilities(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_GET_CAPABILITIES;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_GET_CAPABILITIES;
+}
+
+int ctdb_reply_control_get_capabilities(struct ctdb_reply_control *reply,
+ uint32_t *caps)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_GET_CAPABILITIES) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *caps = reply->rdata.data.caps;
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_RECD_PING */
+
+void ctdb_req_control_recd_ping(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_RECD_PING;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_RECD_PING;
+}
+
+int ctdb_reply_control_recd_ping(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_RECD_PING);
+}
+
+/* CTDB_CONTROL_RELEASE_IP */
+
+void ctdb_req_control_release_ip(struct ctdb_req_control *request,
+ struct ctdb_public_ip *pubip)
+{
+ request->opcode = CTDB_CONTROL_RELEASE_IP;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_RELEASE_IP;
+ request->rdata.data.pubip = pubip;
+}
+
+int ctdb_reply_control_release_ip(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_RELEASE_IP);
+}
+
+/* CTDB_CONTROL_TAKEOVER_IP */
+
+void ctdb_req_control_takeover_ip(struct ctdb_req_control *request,
+ struct ctdb_public_ip *pubip)
+{
+ request->opcode = CTDB_CONTROL_TAKEOVER_IP;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_TAKEOVER_IP;
+ request->rdata.data.pubip = pubip;
+}
+
+int ctdb_reply_control_takeover_ip(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_TAKEOVER_IP);
+}
+
+/* CTDB_CONTROL_GET_PUBLIC_IPS */
+
+void ctdb_req_control_get_public_ips(struct ctdb_req_control *request,
+ bool available_only)
+{
+ request->opcode = CTDB_CONTROL_GET_PUBLIC_IPS;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_GET_PUBLIC_IPS;
+ if (available_only) {
+ request->flags = CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE;
+ }
+}
+
+int ctdb_reply_control_get_public_ips(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_public_ip_list **pubip_list)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_GET_PUBLIC_IPS) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *pubip_list = talloc_steal(mem_ctx,
+ reply->rdata.data.pubip_list);
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_GET_NODEMAP */
+
+void ctdb_req_control_get_nodemap(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_GET_NODEMAP;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_GET_NODEMAP;
+}
+
+int ctdb_reply_control_get_nodemap(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_node_map **nodemap)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_GET_NODEMAP) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *nodemap = talloc_steal(mem_ctx, reply->rdata.data.nodemap);
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_TRAVERSE_KILL */
+
+void ctdb_req_control_traverse_kill(struct ctdb_req_control *request,
+ struct ctdb_traverse_start *traverse)
+{
+ request->opcode = CTDB_CONTROL_TRAVERSE_KILL;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_TRAVERSE_KILL;
+ request->rdata.data.traverse_start = traverse;
+}
+
+int ctdb_reply_control_traverse_kill(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_TRAVERSE_KILL);
+}
+
+/* CTDB_CONTROL_RECD_RECLOCK_LATENCY */
+
+void ctdb_req_control_recd_reclock_latency(struct ctdb_req_control *request,
+ double reclock_latency)
+{
+ request->opcode = CTDB_CONTROL_RECD_RECLOCK_LATENCY;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_RECD_RECLOCK_LATENCY;
+ request->rdata.data.reclock_latency = reclock_latency;
+}
+
+int ctdb_reply_control_recd_reclock_latency(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply,
+ CTDB_CONTROL_RECD_RECLOCK_LATENCY);
+}
+
+/* CTDB_CONTROL_GET_RECLOCK_FILE */
+
+void ctdb_req_control_get_reclock_file(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_GET_RECLOCK_FILE;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_GET_RECLOCK_FILE;
+}
+
+int ctdb_reply_control_get_reclock_file(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ const char **reclock_file)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_GET_RECLOCK_FILE) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *reclock_file = talloc_steal(mem_ctx,
+ reply->rdata.data.reclock_file);
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_STOP_NODE */
+
+void ctdb_req_control_stop_node(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_STOP_NODE;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_STOP_NODE;
+}
+
+int ctdb_reply_control_stop_node(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_STOP_NODE);
+}
+
+/* CTDB_CONTROL_CONTINUE_NODE */
+
+void ctdb_req_control_continue_node(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_CONTINUE_NODE;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_CONTINUE_NODE;
+}
+
+int ctdb_reply_control_continue_node(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_CONTINUE_NODE);
+}
+
+/* CTDB_CONTROL_SET_LMASTERROLE */
+
+void ctdb_req_control_set_lmasterrole(struct ctdb_req_control *request,
+ uint32_t lmaster_role)
+{
+ request->opcode = CTDB_CONTROL_SET_LMASTERROLE;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_SET_LMASTERROLE;
+ request->rdata.data.role = lmaster_role;
+}
+
+int ctdb_reply_control_set_lmasterrole(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_SET_LMASTERROLE);
+}
+
+/* CTDB_CONTROL_SET_RECMASTERROLE */
+
+void ctdb_req_control_set_recmasterrole(struct ctdb_req_control *request,
+ uint32_t recmaster_role)
+{
+ request->opcode = CTDB_CONTROL_SET_RECMASTERROLE;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_SET_RECMASTERROLE;
+ request->rdata.data.role = recmaster_role;
+}
+
+int ctdb_reply_control_set_recmasterrole(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply,
+ CTDB_CONTROL_SET_RECMASTERROLE);
+}
+
+/* CTDB_CONTROL_SET_BAN_STATE */
+
+void ctdb_req_control_set_ban_state(struct ctdb_req_control *request,
+ struct ctdb_ban_state *ban_state)
+{
+ request->opcode = CTDB_CONTROL_SET_BAN_STATE;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_SET_BAN_STATE;
+ request->rdata.data.ban_state = ban_state;
+}
+
+int ctdb_reply_control_set_ban_state(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_SET_BAN_STATE);
+}
+
+/* CTDB_CONTROL_GET_BAN_STATE */
+
+void ctdb_req_control_get_ban_state(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_GET_BAN_STATE;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_GET_BAN_STATE;
+}
+
+int ctdb_reply_control_get_ban_state(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_ban_state **ban_state)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_GET_BAN_STATE) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *ban_state = talloc_steal(mem_ctx,
+ reply->rdata.data.ban_state);
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_REGISTER_NOTIFY */
+
+void ctdb_req_control_register_notify(struct ctdb_req_control *request,
+ struct ctdb_notify_data *notify)
+{
+ request->opcode = CTDB_CONTROL_REGISTER_NOTIFY;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_REGISTER_NOTIFY;
+ request->rdata.data.notify = notify;
+}
+
+int ctdb_reply_control_register_notify(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_REGISTER_NOTIFY);
+}
+
+/* CTDB_CONTROL_DEREGISTER_NOTIFY */
+
+void ctdb_req_control_deregister_notify(struct ctdb_req_control *request,
+ uint64_t srvid)
+{
+ request->opcode = CTDB_CONTROL_DEREGISTER_NOTIFY;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_DEREGISTER_NOTIFY;
+ request->rdata.data.srvid = srvid;
+}
+
+int ctdb_reply_control_deregister_notify(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply,
+ CTDB_CONTROL_DEREGISTER_NOTIFY);
+}
+
+/* CTDB_CONTROL_TRANS3_COMMIT */
+
+void ctdb_req_control_trans3_commit(struct ctdb_req_control *request,
+ struct ctdb_rec_buffer *recbuf)
+{
+ request->opcode = CTDB_CONTROL_TRANS3_COMMIT;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_TRANS3_COMMIT;
+ request->rdata.data.recbuf = recbuf;
+}
+
+int ctdb_reply_control_trans3_commit(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_TRANS3_COMMIT);
+}
+
+/* CTDB_CONTROL_GET_DB_SEQNUM */
+
+void ctdb_req_control_get_db_seqnum(struct ctdb_req_control *request,
+ uint32_t db_id)
+{
+ request->opcode = CTDB_CONTROL_GET_DB_SEQNUM;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_GET_DB_SEQNUM;
+ request->rdata.data.db_id = db_id;
+}
+
+int ctdb_reply_control_get_db_seqnum(struct ctdb_reply_control *reply,
+ uint64_t *seqnum)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_GET_DB_SEQNUM) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *seqnum = reply->rdata.data.seqnum;
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_DB_SET_HEALTHY */
+
+void ctdb_req_control_db_set_healthy(struct ctdb_req_control *request,
+ uint32_t db_id)
+{
+ request->opcode = CTDB_CONTROL_DB_SET_HEALTHY;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_DB_SET_HEALTHY;
+ request->rdata.data.db_id = db_id;
+}
+
+int ctdb_reply_control_db_set_healthy(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_DB_SET_HEALTHY);
+}
+
+/* CTDB_CONTROL_DB_GET_HEALTH */
+
+void ctdb_req_control_db_get_health(struct ctdb_req_control *request,
+ uint32_t db_id)
+{
+ request->opcode = CTDB_CONTROL_DB_GET_HEALTH;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_DB_GET_HEALTH;
+ request->rdata.data.db_id = db_id;
+}
+
+int ctdb_reply_control_db_get_health(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx, const char **reason)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_DB_GET_HEALTH) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *reason = talloc_steal(mem_ctx, reply->rdata.data.reason);
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_GET_PUBLIC_IP_INFO */
+
+void ctdb_req_control_get_public_ip_info(struct ctdb_req_control *request,
+ ctdb_sock_addr *addr)
+{
+ request->opcode = CTDB_CONTROL_GET_PUBLIC_IP_INFO;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_GET_PUBLIC_IP_INFO;
+ request->rdata.data.addr = addr;
+}
+
+int ctdb_reply_control_get_public_ip_info(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_public_ip_info **ipinfo)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_GET_PUBLIC_IP_INFO) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *ipinfo = talloc_steal(mem_ctx, reply->rdata.data.ipinfo);
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_GET_IFACES */
+
+void ctdb_req_control_get_ifaces(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_GET_IFACES;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_GET_IFACES;
+}
+
+int ctdb_reply_control_get_ifaces(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_iface_list **iface_list)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_GET_IFACES) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *iface_list = talloc_steal(mem_ctx,
+ reply->rdata.data.iface_list);
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_SET_IFACE_LINK_STATE */
+
+void ctdb_req_control_set_iface_link_state(struct ctdb_req_control *request,
+ struct ctdb_iface *iface)
+{
+ request->opcode = CTDB_CONTROL_SET_IFACE_LINK_STATE;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_SET_IFACE_LINK_STATE;
+ request->rdata.data.iface = iface;
+}
+
+int ctdb_reply_control_set_iface_link_state(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply,
+ CTDB_CONTROL_SET_IFACE_LINK_STATE);
+}
+
+/* CTDB_CONTROL_TCP_ADD_DELAYED_UPDATE */
+
+void ctdb_req_control_tcp_add_delayed_update(struct ctdb_req_control *request,
+ struct ctdb_connection *conn)
+{
+ request->opcode = CTDB_CONTROL_TCP_ADD_DELAYED_UPDATE;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_TCP_ADD_DELAYED_UPDATE;
+ request->rdata.data.conn = conn;
+}
+
+int ctdb_reply_control_tcp_add_delayed_update(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply,
+ CTDB_CONTROL_TCP_ADD_DELAYED_UPDATE);
+}
+
+/* CTDB_CONTROL_GET_STAT_HISTORY */
+
+void ctdb_req_control_get_stat_history(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_GET_STAT_HISTORY;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_GET_STAT_HISTORY;
+}
+
+int ctdb_reply_control_get_stat_history(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_statistics_list **stats_list)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_GET_STAT_HISTORY) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *stats_list = talloc_steal(mem_ctx,
+ reply->rdata.data.stats_list);
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_SCHEDULE_FOR_DELETION */
+
+void ctdb_req_control_schedule_for_deletion(struct ctdb_req_control *request,
+ struct ctdb_key_data *key)
+{
+ request->opcode = CTDB_CONTROL_SCHEDULE_FOR_DELETION;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_SCHEDULE_FOR_DELETION;
+ request->rdata.data.key = key;
+}
+
+int ctdb_reply_control_schedule_for_deletion(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply,
+ CTDB_CONTROL_SCHEDULE_FOR_DELETION);
+}
+
+/* CTDB_CONTROL_SET_DB_READONLY */
+
+void ctdb_req_control_set_db_readonly(struct ctdb_req_control *request,
+ uint32_t db_id)
+{
+ request->opcode = CTDB_CONTROL_SET_DB_READONLY;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_SET_DB_READONLY;
+ request->rdata.data.db_id = db_id;
+}
+
+int ctdb_reply_control_set_db_readonly(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_SET_DB_READONLY);
+}
+
+/* CTDB_CONTROL_TRAVERSE_START_EXT */
+
+void ctdb_req_control_traverse_start_ext(struct ctdb_req_control *request,
+ struct ctdb_traverse_start_ext *traverse)
+{
+ request->opcode = CTDB_CONTROL_TRAVERSE_START_EXT;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_TRAVERSE_START_EXT;
+ request->rdata.data.traverse_start_ext = traverse;
+}
+
+int ctdb_reply_control_traverse_start_ext(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply,
+ CTDB_CONTROL_TRAVERSE_START_EXT);
+}
+
+/* CTDB_CONTROL_GET_DB_STATISTICS */
+
+void ctdb_req_control_get_db_statistics(struct ctdb_req_control *request,
+ uint32_t db_id)
+{
+ request->opcode = CTDB_CONTROL_GET_DB_STATISTICS;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_GET_DB_STATISTICS;
+ request->rdata.data.db_id = db_id;
+}
+
+int ctdb_reply_control_get_db_statistics(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_db_statistics **dbstats)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_GET_DB_STATISTICS) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *dbstats = talloc_steal(mem_ctx, reply->rdata.data.dbstats);
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_SET_DB_STICKY */
+
+void ctdb_req_control_set_db_sticky(struct ctdb_req_control *request,
+ uint32_t db_id)
+{
+ request->opcode = CTDB_CONTROL_SET_DB_STICKY;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_SET_DB_STICKY;
+ request->rdata.data.db_id = db_id;
+}
+
+int ctdb_reply_control_set_db_sticky(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_SET_DB_STICKY);
+}
+
+/* CTDB_CONTROL_RELOAD_PUBLIC_IPS */
+
+void ctdb_req_control_reload_public_ips(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_RELOAD_PUBLIC_IPS;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_RELOAD_PUBLIC_IPS;
+}
+
+int ctdb_reply_control_reload_public_ips(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply,
+ CTDB_CONTROL_RELOAD_PUBLIC_IPS);
+}
+
+/* CTDB_CONTROL_TRAVERSE_ALL_EXT */
+
+/* CTDB_CONTROL_IPREALLOCATED */
+
+void ctdb_req_control_ipreallocated(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_IPREALLOCATED;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_IPREALLOCATED;
+}
+
+int ctdb_reply_control_ipreallocated(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_IPREALLOCATED);
+}
+
+/* CTDB_CONTROL_GET_RUNSTATE */
+
+void ctdb_req_control_get_runstate(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_GET_RUNSTATE;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_GET_RUNSTATE;
+}
+
+int ctdb_reply_control_get_runstate(struct ctdb_reply_control *reply,
+ enum ctdb_runstate *runstate)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_GET_RUNSTATE) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *runstate = reply->rdata.data.runstate;
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_DB_DETACH */
+
+void ctdb_req_control_db_detach(struct ctdb_req_control *request,
+ uint32_t db_id)
+{
+ request->opcode = CTDB_CONTROL_DB_DETACH;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_DB_DETACH;
+ request->rdata.data.db_id = db_id;
+}
+
+int ctdb_reply_control_db_detach(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_DB_DETACH);
+}
+
+/* CTDB_CONTROL_GET_NODES_FILE */
+
+void ctdb_req_control_get_nodes_file(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_GET_NODES_FILE;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_GET_NODES_FILE;
+}
+
+int ctdb_reply_control_get_nodes_file(struct ctdb_reply_control *reply,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_node_map **nodemap)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_GET_NODES_FILE) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *nodemap = talloc_steal(mem_ctx, reply->rdata.data.nodemap);
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_DB_FREEZE */
+
+void ctdb_req_control_db_freeze(struct ctdb_req_control *request,
+ uint32_t db_id)
+{
+ request->opcode = CTDB_CONTROL_DB_FREEZE;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_DB_FREEZE;
+ request->rdata.data.db_id = db_id;
+}
+
+int ctdb_reply_control_db_freeze(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_DB_FREEZE);
+}
+
+/* CTDB_CONTROL_DB_THAW */
+
+void ctdb_req_control_db_thaw(struct ctdb_req_control *request,
+ uint32_t db_id)
+{
+ request->opcode = CTDB_CONTROL_DB_THAW;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_DB_THAW;
+ request->rdata.data.db_id = db_id;
+}
+
+int ctdb_reply_control_db_thaw(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_DB_THAW);
+}
+
+/* CTDB_CONTROL_DB_TRANSACTION_START */
+
+void ctdb_req_control_db_transaction_start(struct ctdb_req_control *request,
+ struct ctdb_transdb *transdb)
+{
+ request->opcode = CTDB_CONTROL_DB_TRANSACTION_START;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_DB_TRANSACTION_START;
+ request->rdata.data.transdb = transdb;
+}
+
+int ctdb_reply_control_db_transaction_start(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply,
+ CTDB_CONTROL_DB_TRANSACTION_START);
+}
+
+/* CTDB_CONTROL_DB_TRANSACTION_COMMIT */
+
+void ctdb_req_control_db_transaction_commit(struct ctdb_req_control *request,
+ struct ctdb_transdb *transdb)
+{
+ request->opcode = CTDB_CONTROL_DB_TRANSACTION_COMMIT;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_DB_TRANSACTION_COMMIT;
+ request->rdata.data.transdb = transdb;
+}
+
+int ctdb_reply_control_db_transaction_commit(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply,
+ CTDB_CONTROL_DB_TRANSACTION_COMMIT);
+}
+
+/* CTDB_CONTROL_DB_TRANSACTION_CANCEL */
+
+void ctdb_req_control_db_transaction_cancel(struct ctdb_req_control *request,
+ uint32_t db_id)
+{
+ request->opcode = CTDB_CONTROL_DB_TRANSACTION_CANCEL;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_DB_TRANSACTION_CANCEL;
+ request->rdata.data.db_id = db_id;
+}
+
+int ctdb_reply_control_db_transaction_cancel(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply,
+ CTDB_CONTROL_DB_TRANSACTION_CANCEL);
+}
+
+/* CTDB_CONTROL_DB_PULL */
+
+void ctdb_req_control_db_pull(struct ctdb_req_control *request,
+ struct ctdb_pulldb_ext *pulldb_ext)
+{
+ request->opcode = CTDB_CONTROL_DB_PULL;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_DB_PULL;
+ request->rdata.data.pulldb_ext = pulldb_ext;
+}
+
+int ctdb_reply_control_db_pull(struct ctdb_reply_control *reply,
+ uint32_t *num_records)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_DB_PULL) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *num_records = reply->rdata.data.num_records;
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_DB_PUSH_START */
+
+void ctdb_req_control_db_push_start(struct ctdb_req_control *request,
+ struct ctdb_pulldb_ext *pulldb_ext)
+{
+ request->opcode = CTDB_CONTROL_DB_PUSH_START;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_DB_PUSH_START;
+ request->rdata.data.pulldb_ext = pulldb_ext;
+}
+
+int ctdb_reply_control_db_push_start(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_DB_PUSH_START);
+}
+
+/* CTDB_CONTROL_DB_PUSH_CONFIRM */
+
+void ctdb_req_control_db_push_confirm(struct ctdb_req_control *request,
+ uint32_t db_id)
+{
+ request->opcode = CTDB_CONTROL_DB_PUSH_CONFIRM;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_DB_PUSH_CONFIRM;
+ request->rdata.data.db_id = db_id;
+}
+
+int ctdb_reply_control_db_push_confirm(struct ctdb_reply_control *reply,
+ uint32_t *num_records)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_DB_PUSH_CONFIRM) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *num_records = reply->rdata.data.num_records;
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_DB_OPEN_FLAGS */
+
+void ctdb_req_control_db_open_flags(struct ctdb_req_control *request,
+ uint32_t db_id)
+{
+ request->opcode = CTDB_CONTROL_DB_OPEN_FLAGS;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_DB_OPEN_FLAGS;
+ request->rdata.data.db_id = db_id;
+}
+
+int ctdb_reply_control_db_open_flags(struct ctdb_reply_control *reply,
+ int *tdb_flags)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_DB_OPEN_FLAGS) {
+ return EPROTO;
+ }
+
+ if (reply->status == 0) {
+ *tdb_flags = reply->rdata.data.tdb_flags;
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_DB_ATTACH_REPLICATED */
+
+void ctdb_req_control_db_attach_replicated(struct ctdb_req_control *request,
+ const char *db_name)
+{
+ request->opcode = CTDB_CONTROL_DB_ATTACH_REPLICATED;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_DB_ATTACH_REPLICATED;
+ request->rdata.data.db_name = db_name;
+}
+
+int ctdb_reply_control_db_attach_replicated(struct ctdb_reply_control *reply,
+ uint32_t *db_id)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_DB_ATTACH_REPLICATED) {
+ return EPROTO;
+ }
+ if (reply->status == 0) {
+ *db_id = reply->rdata.data.db_id;
+ }
+ return reply->status;
+}
+
+/* CTDB_CONTROL_CHECK_PID_SRVID */
+
+void ctdb_req_control_check_pid_srvid(struct ctdb_req_control *request,
+ struct ctdb_pid_srvid *pid_srvid)
+{
+ request->opcode = CTDB_CONTROL_CHECK_PID_SRVID;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_CHECK_PID_SRVID;
+ request->rdata.data.pid_srvid = pid_srvid;
+}
+
+int ctdb_reply_control_check_pid_srvid(struct ctdb_reply_control *reply,
+ int *status)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_CHECK_PID_SRVID) {
+ return EPROTO;
+ }
+
+ *status = reply->status;
+ reply->status = 0;
+
+ return reply->status;
+}
+
+/* CTDB_CONTROL_TUNNEL_REGISTER */
+
+void ctdb_req_control_tunnel_register(struct ctdb_req_control *request,
+ uint64_t tunnel_id)
+{
+ request->opcode = CTDB_CONTROL_TUNNEL_REGISTER;
+ request->pad = 0;
+ request->srvid = tunnel_id;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_TUNNEL_REGISTER;
+}
+
+int ctdb_reply_control_tunnel_register(struct ctdb_reply_control *reply)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_TUNNEL_REGISTER) {
+ return EPROTO;
+ }
+
+ return reply->status;
+}
+
+/* CTDB_CONTROL_TUNNEL_DEREGISTER */
+
+void ctdb_req_control_tunnel_deregister(struct ctdb_req_control *request,
+ uint64_t tunnel_id)
+{
+ request->opcode = CTDB_CONTROL_TUNNEL_DEREGISTER;
+ request->pad = 0;
+ request->srvid = tunnel_id;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_TUNNEL_DEREGISTER;
+}
+
+int ctdb_reply_control_tunnel_deregister(struct ctdb_reply_control *reply)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_TUNNEL_DEREGISTER) {
+ return EPROTO;
+ }
+
+ return reply->status;
+}
+
+/* CTDB_CONTROL_VACUUM_FETCH */
+
+void ctdb_req_control_vacuum_fetch(struct ctdb_req_control *request,
+ struct ctdb_rec_buffer *recbuf)
+{
+ request->opcode = CTDB_CONTROL_VACUUM_FETCH;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_VACUUM_FETCH;
+ request->rdata.data.recbuf = recbuf;
+}
+
+int ctdb_reply_control_vacuum_fetch(struct ctdb_reply_control *reply)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_VACUUM_FETCH) {
+ return EPROTO;
+ }
+
+ return reply->status;
+}
+
+/* CTDB_CONTROL_DB_VACUUM */
+
+void ctdb_req_control_db_vacuum(struct ctdb_req_control *request,
+ struct ctdb_db_vacuum *db_vacuum)
+{
+ request->opcode = CTDB_CONTROL_DB_VACUUM;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_DB_VACUUM;
+ request->rdata.data.db_vacuum = db_vacuum;
+}
+
+int ctdb_reply_control_db_vacuum(struct ctdb_reply_control *reply)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_DB_VACUUM) {
+ return EPROTO;
+ }
+
+ return reply->status;
+}
+
+/* CTDB_CONTROL_ECHO_DATA */
+
+void ctdb_req_control_echo_data(struct ctdb_req_control *request,
+ struct ctdb_echo_data *echo_data)
+{
+ request->opcode = CTDB_CONTROL_ECHO_DATA;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_ECHO_DATA;
+ request->rdata.data.echo_data = echo_data;
+}
+
+int ctdb_reply_control_echo_data(struct ctdb_reply_control *reply)
+{
+ if (reply->rdata.opcode != CTDB_CONTROL_ECHO_DATA) {
+ return EPROTO;
+ }
+
+ return reply->status;
+}
+
+/* CTDB_CONTROL_DISABLE_NODE */
+
+void ctdb_req_control_disable_node(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_DISABLE_NODE;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_DISABLE_NODE;
+}
+
+int ctdb_reply_control_disable_node(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_DISABLE_NODE);
+}
+
+/* CTDB_CONTROL_ENABLE_NODE */
+
+void ctdb_req_control_enable_node(struct ctdb_req_control *request)
+{
+ request->opcode = CTDB_CONTROL_ENABLE_NODE;
+ request->pad = 0;
+ request->srvid = 0;
+ request->client_id = 0;
+ request->flags = 0;
+
+ request->rdata.opcode = CTDB_CONTROL_ENABLE_NODE;
+}
+
+int ctdb_reply_control_enable_node(struct ctdb_reply_control *reply)
+{
+ return ctdb_reply_control_generic(reply, CTDB_CONTROL_ENABLE_NODE);
+}
diff --git a/ctdb/protocol/protocol_control.c b/ctdb/protocol/protocol_control.c
new file mode 100644
index 0000000..e449115
--- /dev/null
+++ b/ctdb/protocol/protocol_control.c
@@ -0,0 +1,2036 @@
+/*
+ CTDB protocol marshalling
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <tdb.h>
+
+#include "protocol.h"
+#include "protocol_api.h"
+#include "protocol_private.h"
+
+static size_t ctdb_req_control_data_len(struct ctdb_req_control_data *cd)
+{
+ size_t len = 0;
+ uint32_t u32;
+
+ if (cd == NULL) {
+ return 0;
+ }
+
+ switch (cd->opcode) {
+ case CTDB_CONTROL_PROCESS_EXISTS:
+ len = ctdb_pid_len(&cd->data.pid);
+ break;
+
+ case CTDB_CONTROL_STATISTICS:
+ break;
+
+ case CTDB_CONTROL_PING:
+ break;
+
+ case CTDB_CONTROL_GETDBPATH:
+ len = ctdb_uint32_len(&cd->data.db_id);
+ break;
+
+ case CTDB_CONTROL_GETVNNMAP:
+ break;
+
+ case CTDB_CONTROL_SETVNNMAP:
+ len = ctdb_vnn_map_len(cd->data.vnnmap);
+ break;
+
+ case CTDB_CONTROL_GET_DEBUG:
+ break;
+
+ case CTDB_CONTROL_SET_DEBUG:
+ len = ctdb_uint32_len(&cd->data.loglevel);
+ break;
+
+ case CTDB_CONTROL_GET_DBMAP:
+ break;
+
+ case CTDB_CONTROL_GET_RECMODE:
+ break;
+
+ case CTDB_CONTROL_SET_RECMODE:
+ len = ctdb_uint32_len(&cd->data.recmode);
+ break;
+
+ case CTDB_CONTROL_STATISTICS_RESET:
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH:
+ len = ctdb_string_len(&cd->data.db_name);
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_START:
+ len = ctdb_traverse_start_len(cd->data.traverse_start);
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_ALL:
+ len = ctdb_traverse_all_len(cd->data.traverse_all);
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_DATA:
+ len = ctdb_rec_data_len(cd->data.rec_data);
+ break;
+
+ case CTDB_CONTROL_REGISTER_SRVID:
+ break;
+
+ case CTDB_CONTROL_DEREGISTER_SRVID:
+ break;
+
+ case CTDB_CONTROL_GET_DBNAME:
+ len = ctdb_uint32_len(&cd->data.db_id);
+ break;
+
+ case CTDB_CONTROL_ENABLE_SEQNUM:
+ len = ctdb_uint32_len(&cd->data.db_id);
+ break;
+
+ case CTDB_CONTROL_UPDATE_SEQNUM:
+ len = ctdb_uint32_len(&cd->data.db_id);
+ break;
+
+ case CTDB_CONTROL_DUMP_MEMORY:
+ break;
+
+ case CTDB_CONTROL_GET_PID:
+ break;
+
+ case CTDB_CONTROL_FREEZE:
+ break;
+
+ case CTDB_CONTROL_GET_PNN:
+ break;
+
+ case CTDB_CONTROL_SHUTDOWN:
+ break;
+
+ case CTDB_CONTROL_TCP_CLIENT:
+ len = ctdb_connection_len(cd->data.conn);
+ break;
+
+ case CTDB_CONTROL_TCP_ADD:
+ len = ctdb_connection_len(cd->data.conn);
+ break;
+
+ case CTDB_CONTROL_TCP_REMOVE:
+ len = ctdb_connection_len(cd->data.conn);
+ break;
+
+ case CTDB_CONTROL_STARTUP:
+ break;
+
+ case CTDB_CONTROL_SET_TUNABLE:
+ len = ctdb_tunable_len(cd->data.tunable);
+ break;
+
+ case CTDB_CONTROL_GET_TUNABLE:
+ len = ctdb_stringn_len(&cd->data.tun_var);
+ break;
+
+ case CTDB_CONTROL_LIST_TUNABLES:
+ break;
+
+ case CTDB_CONTROL_MODIFY_FLAGS:
+ len = ctdb_node_flag_change_len(cd->data.flag_change);
+ break;
+
+ case CTDB_CONTROL_GET_ALL_TUNABLES:
+ break;
+
+ case CTDB_CONTROL_GET_TCP_TICKLE_LIST:
+ len = ctdb_sock_addr_len(cd->data.addr);
+ break;
+
+ case CTDB_CONTROL_SET_TCP_TICKLE_LIST:
+ len = ctdb_tickle_list_len(cd->data.tickles);
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH_PERSISTENT:
+ len = ctdb_string_len(&cd->data.db_name);
+ break;
+
+ case CTDB_CONTROL_UPDATE_RECORD:
+ len = ctdb_rec_buffer_len(cd->data.recbuf);
+ break;
+
+ case CTDB_CONTROL_SEND_GRATUITOUS_ARP:
+ len = ctdb_addr_info_len(cd->data.addr_info);
+ break;
+
+ case CTDB_CONTROL_WIPE_DATABASE:
+ len = ctdb_transdb_len(cd->data.transdb);
+ break;
+
+ case CTDB_CONTROL_UPTIME:
+ break;
+
+ case CTDB_CONTROL_START_RECOVERY:
+ break;
+
+ case CTDB_CONTROL_END_RECOVERY:
+ break;
+
+ case CTDB_CONTROL_RELOAD_NODES_FILE:
+ break;
+
+ case CTDB_CONTROL_TRY_DELETE_RECORDS:
+ len = ctdb_rec_buffer_len(cd->data.recbuf);
+ break;
+
+ case CTDB_CONTROL_ADD_PUBLIC_IP:
+ len = ctdb_addr_info_len(cd->data.addr_info);
+ break;
+
+ case CTDB_CONTROL_DEL_PUBLIC_IP:
+ len = ctdb_addr_info_len(cd->data.addr_info);
+ break;
+
+ case CTDB_CONTROL_GET_CAPABILITIES:
+ break;
+
+ case CTDB_CONTROL_RECD_PING:
+ break;
+
+ case CTDB_CONTROL_RELEASE_IP:
+ len = ctdb_public_ip_len(cd->data.pubip);
+ break;
+
+ case CTDB_CONTROL_TAKEOVER_IP:
+ len = ctdb_public_ip_len(cd->data.pubip);
+ break;
+
+ case CTDB_CONTROL_GET_PUBLIC_IPS:
+ break;
+
+ case CTDB_CONTROL_GET_NODEMAP:
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_KILL:
+ len = ctdb_traverse_start_len(cd->data.traverse_start);
+ break;
+
+ case CTDB_CONTROL_RECD_RECLOCK_LATENCY:
+ len = ctdb_double_len(&cd->data.reclock_latency);
+ break;
+
+ case CTDB_CONTROL_GET_RECLOCK_FILE:
+ break;
+
+ case CTDB_CONTROL_STOP_NODE:
+ break;
+
+ case CTDB_CONTROL_CONTINUE_NODE:
+ break;
+
+ case CTDB_CONTROL_SET_LMASTERROLE:
+ len = ctdb_uint32_len(&cd->data.role);
+ break;
+
+ case CTDB_CONTROL_SET_RECMASTERROLE:
+ len = ctdb_uint32_len(&cd->data.role);
+ break;
+
+ case CTDB_CONTROL_SET_BAN_STATE:
+ len = ctdb_ban_state_len(cd->data.ban_state);
+ break;
+
+ case CTDB_CONTROL_GET_BAN_STATE:
+ break;
+
+ case CTDB_CONTROL_REGISTER_NOTIFY:
+ len = ctdb_notify_data_len(cd->data.notify);
+ break;
+
+ case CTDB_CONTROL_DEREGISTER_NOTIFY:
+ len = ctdb_uint64_len(&cd->data.srvid);
+ break;
+
+ case CTDB_CONTROL_TRANS3_COMMIT:
+ len = ctdb_rec_buffer_len(cd->data.recbuf);
+ break;
+
+ case CTDB_CONTROL_GET_DB_SEQNUM:
+ u32 = 0;
+ len = ctdb_uint32_len(&cd->data.db_id) + ctdb_uint32_len(&u32);
+ break;
+
+ case CTDB_CONTROL_DB_SET_HEALTHY:
+ len = ctdb_uint32_len(&cd->data.db_id);
+ break;
+
+ case CTDB_CONTROL_DB_GET_HEALTH:
+ len = ctdb_uint32_len(&cd->data.db_id);
+ break;
+
+ case CTDB_CONTROL_GET_PUBLIC_IP_INFO:
+ len = ctdb_sock_addr_len(cd->data.addr);
+ break;
+
+ case CTDB_CONTROL_GET_IFACES:
+ break;
+
+ case CTDB_CONTROL_SET_IFACE_LINK_STATE:
+ len = ctdb_iface_len(cd->data.iface);
+ break;
+
+ case CTDB_CONTROL_TCP_ADD_DELAYED_UPDATE:
+ len = ctdb_connection_len(cd->data.conn);
+ break;
+
+ case CTDB_CONTROL_GET_STAT_HISTORY:
+ break;
+
+ case CTDB_CONTROL_SCHEDULE_FOR_DELETION:
+ len = ctdb_key_data_len(cd->data.key);
+ break;
+
+ case CTDB_CONTROL_SET_DB_READONLY:
+ len = ctdb_uint32_len(&cd->data.db_id);
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_START_EXT:
+ len = ctdb_traverse_start_ext_len(cd->data.traverse_start_ext);
+ break;
+
+ case CTDB_CONTROL_GET_DB_STATISTICS:
+ len = ctdb_uint32_len(&cd->data.db_id);
+ break;
+
+ case CTDB_CONTROL_SET_DB_STICKY:
+ len = ctdb_uint32_len(&cd->data.db_id);
+ break;
+
+ case CTDB_CONTROL_RELOAD_PUBLIC_IPS:
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_ALL_EXT:
+ len = ctdb_traverse_all_ext_len(cd->data.traverse_all_ext);
+ break;
+
+ case CTDB_CONTROL_IPREALLOCATED:
+ break;
+
+ case CTDB_CONTROL_GET_RUNSTATE:
+ break;
+
+ case CTDB_CONTROL_DB_DETACH:
+ len = ctdb_uint32_len(&cd->data.db_id);
+ break;
+
+ case CTDB_CONTROL_GET_NODES_FILE:
+ break;
+
+ case CTDB_CONTROL_DB_FREEZE:
+ len = ctdb_uint32_len(&cd->data.db_id);
+ break;
+
+ case CTDB_CONTROL_DB_THAW:
+ len = ctdb_uint32_len(&cd->data.db_id);
+ break;
+
+ case CTDB_CONTROL_DB_TRANSACTION_START:
+ len = ctdb_transdb_len(cd->data.transdb);
+ break;
+
+ case CTDB_CONTROL_DB_TRANSACTION_COMMIT:
+ len = ctdb_transdb_len(cd->data.transdb);
+ break;
+
+ case CTDB_CONTROL_DB_TRANSACTION_CANCEL:
+ len = ctdb_uint32_len(&cd->data.db_id);
+ break;
+
+ case CTDB_CONTROL_DB_PULL:
+ len = ctdb_pulldb_ext_len(cd->data.pulldb_ext);
+ break;
+
+ case CTDB_CONTROL_DB_PUSH_START:
+ len = ctdb_pulldb_ext_len(cd->data.pulldb_ext);
+ break;
+
+ case CTDB_CONTROL_DB_PUSH_CONFIRM:
+ len = ctdb_uint32_len(&cd->data.db_id);
+ break;
+
+ case CTDB_CONTROL_DB_OPEN_FLAGS:
+ len = ctdb_uint32_len(&cd->data.db_id);
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH_REPLICATED:
+ len = ctdb_string_len(&cd->data.db_name);
+ break;
+
+ case CTDB_CONTROL_CHECK_PID_SRVID:
+ len = ctdb_pid_srvid_len(cd->data.pid_srvid);
+ break;
+
+ case CTDB_CONTROL_TUNNEL_REGISTER:
+ break;
+
+ case CTDB_CONTROL_TUNNEL_DEREGISTER:
+ break;
+
+ case CTDB_CONTROL_VACUUM_FETCH:
+ len = ctdb_rec_buffer_len(cd->data.recbuf);
+ break;
+
+ case CTDB_CONTROL_DB_VACUUM:
+ len = ctdb_db_vacuum_len(cd->data.db_vacuum);
+ break;
+
+ case CTDB_CONTROL_ECHO_DATA:
+ len = ctdb_echo_data_len(cd->data.echo_data);
+ break;
+
+ case CTDB_CONTROL_DISABLE_NODE:
+ break;
+
+ case CTDB_CONTROL_ENABLE_NODE:
+ break;
+
+ case CTDB_CONTROL_TCP_CLIENT_DISCONNECTED:
+ len = ctdb_connection_len(cd->data.conn);
+ break;
+
+ case CTDB_CONTROL_TCP_CLIENT_PASSED:
+ len = ctdb_connection_len(cd->data.conn);
+ break;
+ }
+
+ return len;
+}
+
+static void ctdb_req_control_data_push(struct ctdb_req_control_data *cd,
+ uint8_t *buf, size_t *npush)
+{
+ size_t np = 0, offset;
+ uint32_t u32;
+
+ switch (cd->opcode) {
+ case CTDB_CONTROL_PROCESS_EXISTS:
+ ctdb_pid_push(&cd->data.pid, buf, &np);
+ break;
+
+ case CTDB_CONTROL_GETDBPATH:
+ ctdb_uint32_push(&cd->data.db_id, buf, &np);
+ break;
+
+ case CTDB_CONTROL_SETVNNMAP:
+ ctdb_vnn_map_push(cd->data.vnnmap, buf, &np);
+ break;
+
+ case CTDB_CONTROL_SET_DEBUG:
+ ctdb_uint32_push(&cd->data.loglevel, buf, &np);
+ break;
+
+ case CTDB_CONTROL_SET_RECMODE:
+ ctdb_uint32_push(&cd->data.recmode, buf, &np);
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH:
+ ctdb_string_push(&cd->data.db_name, buf, &np);
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_START:
+ ctdb_traverse_start_push(cd->data.traverse_start, buf, &np);
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_ALL:
+ ctdb_traverse_all_push(cd->data.traverse_all, buf, &np);
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_DATA:
+ ctdb_rec_data_push(cd->data.rec_data, buf, &np);
+ break;
+
+ case CTDB_CONTROL_GET_DBNAME:
+ ctdb_uint32_push(&cd->data.db_id, buf, &np);
+ break;
+
+ case CTDB_CONTROL_ENABLE_SEQNUM:
+ ctdb_uint32_push(&cd->data.db_id, buf, &np);
+ break;
+
+ case CTDB_CONTROL_UPDATE_SEQNUM:
+ ctdb_uint32_push(&cd->data.db_id, buf, &np);
+ break;
+
+ case CTDB_CONTROL_TCP_CLIENT:
+ ctdb_connection_push(cd->data.conn, buf, &np);
+ break;
+
+ case CTDB_CONTROL_TCP_ADD:
+ ctdb_connection_push(cd->data.conn, buf, &np);
+ break;
+
+ case CTDB_CONTROL_TCP_REMOVE:
+ ctdb_connection_push(cd->data.conn, buf, &np);
+ break;
+
+ case CTDB_CONTROL_SET_TUNABLE:
+ ctdb_tunable_push(cd->data.tunable, buf, &np);
+ break;
+
+ case CTDB_CONTROL_GET_TUNABLE:
+ ctdb_stringn_push(&cd->data.tun_var, buf, &np);
+ break;
+
+ case CTDB_CONTROL_MODIFY_FLAGS:
+ ctdb_node_flag_change_push(cd->data.flag_change, buf, &np);
+ break;
+
+ case CTDB_CONTROL_GET_TCP_TICKLE_LIST:
+ ctdb_sock_addr_push(cd->data.addr, buf, &np);
+ break;
+
+ case CTDB_CONTROL_SET_TCP_TICKLE_LIST:
+ ctdb_tickle_list_push(cd->data.tickles, buf, &np);
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH_PERSISTENT:
+ ctdb_string_push(&cd->data.db_name, buf, &np);
+ break;
+
+ case CTDB_CONTROL_UPDATE_RECORD:
+ ctdb_rec_buffer_push(cd->data.recbuf, buf, &np);
+ break;
+
+ case CTDB_CONTROL_SEND_GRATUITOUS_ARP:
+ ctdb_addr_info_push(cd->data.addr_info, buf, &np);
+ break;
+
+ case CTDB_CONTROL_WIPE_DATABASE:
+ ctdb_transdb_push(cd->data.transdb, buf, &np);
+ break;
+
+ case CTDB_CONTROL_TRY_DELETE_RECORDS:
+ ctdb_rec_buffer_push(cd->data.recbuf, buf, &np);
+ break;
+
+ case CTDB_CONTROL_ADD_PUBLIC_IP:
+ ctdb_addr_info_push(cd->data.addr_info, buf, &np);
+ break;
+
+ case CTDB_CONTROL_DEL_PUBLIC_IP:
+ ctdb_addr_info_push(cd->data.addr_info, buf, &np);
+ break;
+
+ case CTDB_CONTROL_RELEASE_IP:
+ ctdb_public_ip_push(cd->data.pubip, buf, &np);
+ break;
+
+ case CTDB_CONTROL_TAKEOVER_IP:
+ ctdb_public_ip_push(cd->data.pubip, buf, &np);
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_KILL:
+ ctdb_traverse_start_push(cd->data.traverse_start, buf, &np);
+ break;
+
+ case CTDB_CONTROL_RECD_RECLOCK_LATENCY:
+ ctdb_double_push(&cd->data.reclock_latency, buf, &np);
+ break;
+
+ case CTDB_CONTROL_SET_LMASTERROLE:
+ ctdb_uint32_push(&cd->data.role, buf, &np);
+ break;
+
+ case CTDB_CONTROL_SET_RECMASTERROLE:
+ ctdb_uint32_push(&cd->data.role, buf, &np);
+ break;
+
+ case CTDB_CONTROL_SET_BAN_STATE:
+ ctdb_ban_state_push(cd->data.ban_state, buf, &np);
+ break;
+
+ case CTDB_CONTROL_REGISTER_NOTIFY:
+ ctdb_notify_data_push(cd->data.notify, buf, &np);
+ break;
+
+ case CTDB_CONTROL_DEREGISTER_NOTIFY:
+ ctdb_uint64_push(&cd->data.srvid, buf, &np);
+ break;
+
+ case CTDB_CONTROL_TRANS3_COMMIT:
+ ctdb_rec_buffer_push(cd->data.recbuf, buf, &np);
+ break;
+
+ case CTDB_CONTROL_GET_DB_SEQNUM:
+ u32 = 0;
+ offset = 0;
+ ctdb_uint32_push(&cd->data.db_id, buf, &np);
+ offset += np;
+ ctdb_uint32_push(&u32, buf+offset, &np);
+ offset += np;
+ np = offset;
+ break;
+
+ case CTDB_CONTROL_DB_SET_HEALTHY:
+ ctdb_uint32_push(&cd->data.db_id, buf, &np);
+ break;
+
+ case CTDB_CONTROL_DB_GET_HEALTH:
+ ctdb_uint32_push(&cd->data.db_id, buf, &np);
+ break;
+
+ case CTDB_CONTROL_GET_PUBLIC_IP_INFO:
+ ctdb_sock_addr_push(cd->data.addr, buf, &np);
+ break;
+
+ case CTDB_CONTROL_SET_IFACE_LINK_STATE:
+ ctdb_iface_push(cd->data.iface, buf, &np);
+ break;
+
+ case CTDB_CONTROL_TCP_ADD_DELAYED_UPDATE:
+ ctdb_connection_push(cd->data.conn, buf, &np);
+ break;
+
+ case CTDB_CONTROL_SCHEDULE_FOR_DELETION:
+ ctdb_key_data_push(cd->data.key, buf, &np);
+ break;
+
+ case CTDB_CONTROL_SET_DB_READONLY:
+ ctdb_uint32_push(&cd->data.db_id, buf, &np);
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_START_EXT:
+ ctdb_traverse_start_ext_push(cd->data.traverse_start_ext, buf,
+ &np);
+ break;
+
+ case CTDB_CONTROL_GET_DB_STATISTICS:
+ ctdb_uint32_push(&cd->data.db_id, buf, &np);
+ break;
+
+ case CTDB_CONTROL_SET_DB_STICKY:
+ ctdb_uint32_push(&cd->data.db_id, buf, &np);
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_ALL_EXT:
+ ctdb_traverse_all_ext_push(cd->data.traverse_all_ext, buf,
+ &np);
+ break;
+
+ case CTDB_CONTROL_DB_DETACH:
+ ctdb_uint32_push(&cd->data.db_id, buf, &np);
+ break;
+
+ case CTDB_CONTROL_DB_FREEZE:
+ ctdb_uint32_push(&cd->data.db_id, buf, &np);
+ break;
+
+ case CTDB_CONTROL_DB_THAW:
+ ctdb_uint32_push(&cd->data.db_id, buf, &np);
+ break;
+
+ case CTDB_CONTROL_DB_TRANSACTION_START:
+ ctdb_transdb_push(cd->data.transdb, buf, &np);
+ break;
+
+ case CTDB_CONTROL_DB_TRANSACTION_COMMIT:
+ ctdb_transdb_push(cd->data.transdb, buf, &np);
+ break;
+
+ case CTDB_CONTROL_DB_TRANSACTION_CANCEL:
+ ctdb_uint32_push(&cd->data.db_id, buf, &np);
+ break;
+
+ case CTDB_CONTROL_DB_PULL:
+ ctdb_pulldb_ext_push(cd->data.pulldb_ext, buf, &np);
+ break;
+
+ case CTDB_CONTROL_DB_PUSH_START:
+ ctdb_pulldb_ext_push(cd->data.pulldb_ext, buf, &np);
+ break;
+
+ case CTDB_CONTROL_DB_PUSH_CONFIRM:
+ ctdb_uint32_push(&cd->data.db_id, buf, &np);
+ break;
+
+ case CTDB_CONTROL_DB_OPEN_FLAGS:
+ ctdb_uint32_push(&cd->data.db_id, buf, &np);
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH_REPLICATED:
+ ctdb_string_push(&cd->data.db_name, buf, &np);
+ break;
+
+ case CTDB_CONTROL_CHECK_PID_SRVID:
+ ctdb_pid_srvid_push(cd->data.pid_srvid, buf, &np);
+ break;
+
+ case CTDB_CONTROL_VACUUM_FETCH:
+ ctdb_rec_buffer_push(cd->data.recbuf, buf, &np);
+ break;
+
+ case CTDB_CONTROL_DB_VACUUM:
+ ctdb_db_vacuum_push(cd->data.db_vacuum, buf, &np);
+ break;
+
+ case CTDB_CONTROL_ECHO_DATA:
+ ctdb_echo_data_push(cd->data.echo_data, buf, &np);
+ break;
+
+ case CTDB_CONTROL_TCP_CLIENT_DISCONNECTED:
+ ctdb_connection_push(cd->data.conn, buf, &np);
+ break;
+
+ case CTDB_CONTROL_TCP_CLIENT_PASSED:
+ ctdb_connection_push(cd->data.conn, buf, &np);
+ break;
+ }
+
+ *npush = np;
+}
+
+static int ctdb_req_control_data_pull(uint8_t *buf, size_t buflen,
+ uint32_t opcode,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_req_control_data *cd,
+ size_t *npull)
+{
+ size_t np = 0, offset;
+ uint32_t u32;
+ int ret = 0;
+
+ cd->opcode = opcode;
+
+ switch (opcode) {
+ case CTDB_CONTROL_PROCESS_EXISTS:
+ ret = ctdb_pid_pull(buf, buflen, &cd->data.pid, &np);
+ break;
+
+ case CTDB_CONTROL_GETDBPATH:
+ ret = ctdb_uint32_pull(buf, buflen, &cd->data.db_id, &np);
+ break;
+
+ case CTDB_CONTROL_SETVNNMAP:
+ ret = ctdb_vnn_map_pull(buf, buflen, mem_ctx,
+ &cd->data.vnnmap, &np);
+ break;
+
+ case CTDB_CONTROL_SET_DEBUG:
+ ret = ctdb_uint32_pull(buf, buflen, &cd->data.loglevel, &np);
+ break;
+
+ case CTDB_CONTROL_SET_RECMODE:
+ ret = ctdb_uint32_pull(buf, buflen, &cd->data.recmode, &np);
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH:
+ ret = ctdb_string_pull(buf, buflen, mem_ctx,
+ &cd->data.db_name, &np);
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_START:
+ ret = ctdb_traverse_start_pull(buf, buflen, mem_ctx,
+ &cd->data.traverse_start, &np);
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_ALL:
+ ret = ctdb_traverse_all_pull(buf, buflen, mem_ctx,
+ &cd->data.traverse_all, &np);
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_DATA:
+ ret = ctdb_rec_data_pull(buf, buflen, mem_ctx,
+ &cd->data.rec_data, &np);
+ break;
+
+ case CTDB_CONTROL_GET_DBNAME:
+ ret = ctdb_uint32_pull(buf, buflen, &cd->data.db_id, &np);
+ break;
+
+ case CTDB_CONTROL_ENABLE_SEQNUM:
+ ret = ctdb_uint32_pull(buf, buflen, &cd->data.db_id, &np);
+ break;
+
+ case CTDB_CONTROL_UPDATE_SEQNUM:
+ ret = ctdb_uint32_pull(buf, buflen, &cd->data.db_id, &np);
+ break;
+
+ case CTDB_CONTROL_TCP_CLIENT:
+ ret = ctdb_connection_pull(buf, buflen, mem_ctx,
+ &cd->data.conn, &np);
+ break;
+
+ case CTDB_CONTROL_TCP_ADD:
+ ret = ctdb_connection_pull(buf, buflen, mem_ctx,
+ &cd->data.conn, &np);
+ break;
+
+ case CTDB_CONTROL_TCP_REMOVE:
+ ret = ctdb_connection_pull(buf, buflen, mem_ctx,
+ &cd->data.conn, &np);
+ break;
+
+ case CTDB_CONTROL_SET_TUNABLE:
+ ret = ctdb_tunable_pull(buf, buflen, mem_ctx,
+ &cd->data.tunable, &np);
+ break;
+
+ case CTDB_CONTROL_GET_TUNABLE:
+ ret = ctdb_stringn_pull(buf, buflen, mem_ctx,
+ &cd->data.tun_var, &np);
+ break;
+
+ case CTDB_CONTROL_MODIFY_FLAGS:
+ ret = ctdb_node_flag_change_pull(buf, buflen, mem_ctx,
+ &cd->data.flag_change, &np);
+ break;
+
+ case CTDB_CONTROL_GET_TCP_TICKLE_LIST:
+ ret = ctdb_sock_addr_pull(buf, buflen, mem_ctx,
+ &cd->data.addr, &np);
+ break;
+
+ case CTDB_CONTROL_SET_TCP_TICKLE_LIST:
+ ret = ctdb_tickle_list_pull(buf, buflen, mem_ctx,
+ &cd->data.tickles, &np);
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH_PERSISTENT:
+ ret = ctdb_string_pull(buf, buflen, mem_ctx,
+ &cd->data.db_name, &np);
+ break;
+
+ case CTDB_CONTROL_UPDATE_RECORD:
+ ret = ctdb_rec_buffer_pull(buf, buflen, mem_ctx,
+ &cd->data.recbuf, &np);
+ break;
+
+ case CTDB_CONTROL_SEND_GRATUITOUS_ARP:
+ ret = ctdb_addr_info_pull(buf, buflen, mem_ctx,
+ &cd->data.addr_info, &np);
+ break;
+
+ case CTDB_CONTROL_WIPE_DATABASE:
+ ret = ctdb_transdb_pull(buf, buflen, mem_ctx,
+ &cd->data.transdb, &np);
+ break;
+
+ case CTDB_CONTROL_TRY_DELETE_RECORDS:
+ ret = ctdb_rec_buffer_pull(buf, buflen, mem_ctx,
+ &cd->data.recbuf, &np);
+ break;
+
+ case CTDB_CONTROL_ADD_PUBLIC_IP:
+ ret = ctdb_addr_info_pull(buf, buflen, mem_ctx,
+ &cd->data.addr_info, &np);
+ break;
+
+ case CTDB_CONTROL_DEL_PUBLIC_IP:
+ ret = ctdb_addr_info_pull(buf, buflen, mem_ctx,
+ &cd->data.addr_info, &np);
+ break;
+
+ case CTDB_CONTROL_RELEASE_IP:
+ ret = ctdb_public_ip_pull(buf, buflen, mem_ctx,
+ &cd->data.pubip, &np);
+ break;
+
+ case CTDB_CONTROL_TAKEOVER_IP:
+ ret = ctdb_public_ip_pull(buf, buflen, mem_ctx,
+ &cd->data.pubip, &np);
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_KILL:
+ ret = ctdb_traverse_start_pull(buf, buflen, mem_ctx,
+ &cd->data.traverse_start, &np);
+ break;
+
+ case CTDB_CONTROL_RECD_RECLOCK_LATENCY:
+ ret = ctdb_double_pull(buf, buflen, &cd->data.reclock_latency,
+ &np);
+ break;
+
+ case CTDB_CONTROL_SET_LMASTERROLE:
+ ret = ctdb_uint32_pull(buf, buflen, &cd->data.role, &np);
+ break;
+
+ case CTDB_CONTROL_SET_RECMASTERROLE:
+ ret = ctdb_uint32_pull(buf, buflen, &cd->data.role, &np);
+ break;
+
+ case CTDB_CONTROL_SET_BAN_STATE:
+ ret = ctdb_ban_state_pull(buf, buflen, mem_ctx,
+ &cd->data.ban_state, &np);
+ break;
+
+ case CTDB_CONTROL_REGISTER_NOTIFY:
+ ret = ctdb_notify_data_pull(buf, buflen, mem_ctx,
+ &cd->data.notify, &np);
+ break;
+
+ case CTDB_CONTROL_DEREGISTER_NOTIFY:
+ ret = ctdb_uint64_pull(buf, buflen, &cd->data.srvid, &np);
+ break;
+
+ case CTDB_CONTROL_TRANS3_COMMIT:
+ ret = ctdb_rec_buffer_pull(buf, buflen, mem_ctx,
+ &cd->data.recbuf, &np);
+ break;
+
+ case CTDB_CONTROL_GET_DB_SEQNUM:
+ offset = 0;
+ ret = ctdb_uint32_pull(buf, buflen, &cd->data.db_id, &np);
+ if (ret != 0) {
+ break;
+ }
+ offset += np;
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &u32, &np);
+ offset += np;
+ np = offset;
+ break;
+
+ case CTDB_CONTROL_DB_SET_HEALTHY:
+ ret = ctdb_uint32_pull(buf, buflen, &cd->data.db_id, &np);
+ break;
+
+ case CTDB_CONTROL_DB_GET_HEALTH:
+ ret = ctdb_uint32_pull(buf, buflen, &cd->data.db_id, &np);
+ break;
+
+ case CTDB_CONTROL_GET_PUBLIC_IP_INFO:
+ ret = ctdb_sock_addr_pull(buf, buflen, mem_ctx,
+ &cd->data.addr, &np);
+ break;
+
+ case CTDB_CONTROL_SET_IFACE_LINK_STATE:
+ ret = ctdb_iface_pull(buf, buflen, mem_ctx,
+ &cd->data.iface, &np);
+ break;
+
+ case CTDB_CONTROL_TCP_ADD_DELAYED_UPDATE:
+ ret = ctdb_connection_pull(buf, buflen, mem_ctx,
+ &cd->data.conn, &np);
+ break;
+
+ case CTDB_CONTROL_SCHEDULE_FOR_DELETION:
+ ret = ctdb_key_data_pull(buf, buflen, mem_ctx,
+ &cd->data.key, &np);
+ break;
+
+ case CTDB_CONTROL_SET_DB_READONLY:
+ ret = ctdb_uint32_pull(buf, buflen, &cd->data.db_id, &np);
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_START_EXT:
+ ret = ctdb_traverse_start_ext_pull(buf, buflen, mem_ctx,
+ &cd->data.traverse_start_ext,
+ &np);
+ break;
+
+ case CTDB_CONTROL_GET_DB_STATISTICS:
+ ret = ctdb_uint32_pull(buf, buflen, &cd->data.db_id, &np);
+ break;
+
+ case CTDB_CONTROL_SET_DB_STICKY:
+ ret = ctdb_uint32_pull(buf, buflen, &cd->data.db_id, &np);
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_ALL_EXT:
+ ret = ctdb_traverse_all_ext_pull(buf, buflen, mem_ctx,
+ &cd->data.traverse_all_ext,
+ &np);
+ break;
+
+ case CTDB_CONTROL_DB_DETACH:
+ ret = ctdb_uint32_pull(buf, buflen, &cd->data.db_id, &np);
+ break;
+
+ case CTDB_CONTROL_DB_FREEZE:
+ ret = ctdb_uint32_pull(buf, buflen, &cd->data.db_id, &np);
+ break;
+
+ case CTDB_CONTROL_DB_THAW:
+ ret = ctdb_uint32_pull(buf, buflen, &cd->data.db_id, &np);
+ break;
+
+ case CTDB_CONTROL_DB_TRANSACTION_START:
+ ret = ctdb_transdb_pull(buf, buflen, mem_ctx,
+ &cd->data.transdb, &np);
+ break;
+
+ case CTDB_CONTROL_DB_TRANSACTION_COMMIT:
+ ret = ctdb_transdb_pull(buf, buflen, mem_ctx,
+ &cd->data.transdb, &np);
+ break;
+
+ case CTDB_CONTROL_DB_TRANSACTION_CANCEL:
+ ret = ctdb_uint32_pull(buf, buflen, &cd->data.db_id, &np);
+ break;
+
+ case CTDB_CONTROL_DB_PULL:
+ ret = ctdb_pulldb_ext_pull(buf, buflen, mem_ctx,
+ &cd->data.pulldb_ext, &np);
+ break;
+
+ case CTDB_CONTROL_DB_PUSH_START:
+ ret = ctdb_pulldb_ext_pull(buf, buflen, mem_ctx,
+ &cd->data.pulldb_ext, &np);
+ break;
+
+ case CTDB_CONTROL_DB_PUSH_CONFIRM:
+ ret = ctdb_uint32_pull(buf, buflen, &cd->data.db_id, &np);
+ break;
+
+ case CTDB_CONTROL_DB_OPEN_FLAGS:
+ ret = ctdb_uint32_pull(buf, buflen, &cd->data.db_id, &np);
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH_REPLICATED:
+ ret = ctdb_string_pull(buf, buflen, mem_ctx,
+ &cd->data.db_name, &np);
+ break;
+
+ case CTDB_CONTROL_CHECK_PID_SRVID:
+ ret = ctdb_pid_srvid_pull(buf, buflen, mem_ctx,
+ &cd->data.pid_srvid, &np);
+ break;
+
+ case CTDB_CONTROL_VACUUM_FETCH:
+ ret = ctdb_rec_buffer_pull(buf, buflen, mem_ctx,
+ &cd->data.recbuf, &np);
+ break;
+
+ case CTDB_CONTROL_DB_VACUUM:
+ ret = ctdb_db_vacuum_pull(buf,
+ buflen,
+ mem_ctx,
+ &cd->data.db_vacuum,
+ &np);
+ break;
+
+ case CTDB_CONTROL_ECHO_DATA:
+ ret = ctdb_echo_data_pull(buf,
+ buflen,
+ mem_ctx,
+ &cd->data.echo_data,
+ &np);
+ break;
+
+ case CTDB_CONTROL_TCP_CLIENT_DISCONNECTED:
+ ret = ctdb_connection_pull(buf,
+ buflen,
+ mem_ctx,
+ &cd->data.conn,
+ &np);
+ break;
+
+ case CTDB_CONTROL_TCP_CLIENT_PASSED:
+ ret = ctdb_connection_pull(buf,
+ buflen,
+ mem_ctx,
+ &cd->data.conn,
+ &np);
+ break;
+ }
+
+ if (ret != 0) {
+ return ret;
+ }
+
+ *npull = np;
+ return 0;
+}
+
+static size_t ctdb_reply_control_data_len(struct ctdb_reply_control_data *cd)
+{
+ size_t len = 0;
+
+ if (cd == NULL) {
+ return 0;
+ }
+
+ switch (cd->opcode) {
+ case CTDB_CONTROL_PROCESS_EXISTS:
+ break;
+
+ case CTDB_CONTROL_STATISTICS:
+ len = ctdb_statistics_len(cd->data.stats);
+ break;
+
+ case CTDB_CONTROL_PING:
+ break;
+
+ case CTDB_CONTROL_GETDBPATH:
+ len = ctdb_string_len(&cd->data.db_path);
+ break;
+
+ case CTDB_CONTROL_GETVNNMAP:
+ len = ctdb_vnn_map_len(cd->data.vnnmap);
+ break;
+
+ case CTDB_CONTROL_SETVNNMAP:
+ break;
+
+ case CTDB_CONTROL_GET_DEBUG:
+ len = ctdb_uint32_len(&cd->data.loglevel);
+ break;
+
+ case CTDB_CONTROL_SET_DEBUG:
+ break;
+
+ case CTDB_CONTROL_GET_DBMAP:
+ len = ctdb_dbid_map_len(cd->data.dbmap);
+ break;
+
+ case CTDB_CONTROL_GET_RECMODE:
+ break;
+
+ case CTDB_CONTROL_SET_RECMODE:
+ break;
+
+ case CTDB_CONTROL_STATISTICS_RESET:
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH:
+ len = ctdb_uint32_len(&cd->data.db_id);
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_START:
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_ALL:
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_DATA:
+ break;
+
+ case CTDB_CONTROL_REGISTER_SRVID:
+ break;
+
+ case CTDB_CONTROL_DEREGISTER_SRVID:
+ break;
+
+ case CTDB_CONTROL_GET_DBNAME:
+ len = ctdb_string_len(&cd->data.db_name);
+ break;
+
+ case CTDB_CONTROL_ENABLE_SEQNUM:
+ break;
+
+ case CTDB_CONTROL_UPDATE_SEQNUM:
+ break;
+
+ case CTDB_CONTROL_DUMP_MEMORY:
+ len = ctdb_string_len(&cd->data.mem_str);
+ break;
+
+ case CTDB_CONTROL_GET_PID:
+ break;
+
+ case CTDB_CONTROL_FREEZE:
+ break;
+
+ case CTDB_CONTROL_GET_PNN:
+ break;
+
+ case CTDB_CONTROL_SHUTDOWN:
+ break;
+
+ case CTDB_CONTROL_TCP_CLIENT:
+ break;
+
+ case CTDB_CONTROL_TCP_ADD:
+ break;
+
+ case CTDB_CONTROL_TCP_REMOVE:
+ break;
+
+ case CTDB_CONTROL_STARTUP:
+ break;
+
+ case CTDB_CONTROL_SET_TUNABLE:
+ break;
+
+ case CTDB_CONTROL_GET_TUNABLE:
+ len = ctdb_uint32_len(&cd->data.tun_value);
+ break;
+
+ case CTDB_CONTROL_LIST_TUNABLES:
+ len = ctdb_var_list_len(cd->data.tun_var_list);
+ break;
+
+ case CTDB_CONTROL_MODIFY_FLAGS:
+ break;
+
+ case CTDB_CONTROL_GET_ALL_TUNABLES:
+ len = ctdb_tunable_list_len(cd->data.tun_list);
+ break;
+
+ case CTDB_CONTROL_GET_TCP_TICKLE_LIST:
+ len = ctdb_tickle_list_len(cd->data.tickles);
+ break;
+
+ case CTDB_CONTROL_SET_TCP_TICKLE_LIST:
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH_PERSISTENT:
+ len = ctdb_uint32_len(&cd->data.db_id);
+ break;
+
+ case CTDB_CONTROL_UPDATE_RECORD:
+ break;
+
+ case CTDB_CONTROL_SEND_GRATUITOUS_ARP:
+ break;
+
+ case CTDB_CONTROL_WIPE_DATABASE:
+ break;
+
+ case CTDB_CONTROL_UPTIME:
+ len = ctdb_uptime_len(cd->data.uptime);
+ break;
+
+ case CTDB_CONTROL_START_RECOVERY:
+ break;
+
+ case CTDB_CONTROL_END_RECOVERY:
+ break;
+
+ case CTDB_CONTROL_RELOAD_NODES_FILE:
+ break;
+
+ case CTDB_CONTROL_TRY_DELETE_RECORDS:
+ len = ctdb_rec_buffer_len(cd->data.recbuf);
+ break;
+
+ case CTDB_CONTROL_ADD_PUBLIC_IP:
+ break;
+
+ case CTDB_CONTROL_DEL_PUBLIC_IP:
+ break;
+
+ case CTDB_CONTROL_GET_CAPABILITIES:
+ len = ctdb_uint32_len(&cd->data.caps);
+ break;
+
+ case CTDB_CONTROL_RECD_PING:
+ break;
+
+ case CTDB_CONTROL_RELEASE_IP:
+ break;
+
+ case CTDB_CONTROL_TAKEOVER_IP:
+ break;
+
+ case CTDB_CONTROL_GET_PUBLIC_IPS:
+ len = ctdb_public_ip_list_len(cd->data.pubip_list);
+ break;
+
+ case CTDB_CONTROL_GET_NODEMAP:
+ len = ctdb_node_map_len(cd->data.nodemap);
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_KILL:
+ break;
+
+ case CTDB_CONTROL_RECD_RECLOCK_LATENCY:
+ break;
+
+ case CTDB_CONTROL_GET_RECLOCK_FILE:
+ len = ctdb_string_len(&cd->data.reclock_file);
+ break;
+
+ case CTDB_CONTROL_STOP_NODE:
+ break;
+
+ case CTDB_CONTROL_CONTINUE_NODE:
+ break;
+
+ case CTDB_CONTROL_SET_LMASTERROLE:
+ break;
+
+ case CTDB_CONTROL_SET_RECMASTERROLE:
+ break;
+
+ case CTDB_CONTROL_SET_BAN_STATE:
+ break;
+
+ case CTDB_CONTROL_GET_BAN_STATE:
+ len = ctdb_ban_state_len(cd->data.ban_state);
+ break;
+
+ case CTDB_CONTROL_REGISTER_NOTIFY:
+ break;
+
+ case CTDB_CONTROL_DEREGISTER_NOTIFY:
+ break;
+
+ case CTDB_CONTROL_TRANS3_COMMIT:
+ break;
+
+ case CTDB_CONTROL_GET_DB_SEQNUM:
+ len = ctdb_uint64_len(&cd->data.seqnum);
+ break;
+
+ case CTDB_CONTROL_DB_SET_HEALTHY:
+ break;
+
+ case CTDB_CONTROL_DB_GET_HEALTH:
+ len = ctdb_string_len(&cd->data.reason);
+ break;
+
+ case CTDB_CONTROL_GET_PUBLIC_IP_INFO:
+ len = ctdb_public_ip_info_len(cd->data.ipinfo);
+ break;
+
+ case CTDB_CONTROL_GET_IFACES:
+ len = ctdb_iface_list_len(cd->data.iface_list);
+ break;
+
+ case CTDB_CONTROL_SET_IFACE_LINK_STATE:
+ break;
+
+ case CTDB_CONTROL_TCP_ADD_DELAYED_UPDATE:
+ break;
+
+ case CTDB_CONTROL_GET_STAT_HISTORY:
+ len = ctdb_statistics_list_len(cd->data.stats_list);
+ break;
+
+ case CTDB_CONTROL_SCHEDULE_FOR_DELETION:
+ break;
+
+ case CTDB_CONTROL_SET_DB_READONLY:
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_START_EXT:
+ break;
+
+ case CTDB_CONTROL_GET_DB_STATISTICS:
+ len = ctdb_db_statistics_len(cd->data.dbstats);
+ break;
+
+ case CTDB_CONTROL_SET_DB_STICKY:
+ break;
+
+ case CTDB_CONTROL_RELOAD_PUBLIC_IPS:
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_ALL_EXT:
+ break;
+
+ case CTDB_CONTROL_IPREALLOCATED:
+ break;
+
+ case CTDB_CONTROL_GET_RUNSTATE:
+ len = ctdb_uint32_len(&cd->data.runstate);
+ break;
+
+ case CTDB_CONTROL_DB_DETACH:
+ break;
+
+ case CTDB_CONTROL_GET_NODES_FILE:
+ len = ctdb_node_map_len(cd->data.nodemap);
+ break;
+
+ case CTDB_CONTROL_DB_FREEZE:
+ break;
+
+ case CTDB_CONTROL_DB_THAW:
+ break;
+
+ case CTDB_CONTROL_DB_TRANSACTION_START:
+ break;
+
+ case CTDB_CONTROL_DB_TRANSACTION_COMMIT:
+ break;
+
+ case CTDB_CONTROL_DB_TRANSACTION_CANCEL:
+ break;
+
+ case CTDB_CONTROL_DB_PULL:
+ len = ctdb_uint32_len(&cd->data.num_records);
+ break;
+
+ case CTDB_CONTROL_DB_PUSH_START:
+ break;
+
+ case CTDB_CONTROL_DB_PUSH_CONFIRM:
+ len = ctdb_uint32_len(&cd->data.num_records);
+ break;
+
+ case CTDB_CONTROL_DB_OPEN_FLAGS:
+ len = ctdb_int32_len(&cd->data.tdb_flags);
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH_REPLICATED:
+ len = ctdb_uint32_len(&cd->data.db_id);
+ break;
+
+ case CTDB_CONTROL_CHECK_PID_SRVID:
+ break;
+
+ case CTDB_CONTROL_TUNNEL_REGISTER:
+ break;
+
+ case CTDB_CONTROL_TUNNEL_DEREGISTER:
+ break;
+
+ case CTDB_CONTROL_VACUUM_FETCH:
+ break;
+
+ case CTDB_CONTROL_DB_VACUUM:
+ break;
+
+ case CTDB_CONTROL_ECHO_DATA:
+ len = ctdb_echo_data_len(cd->data.echo_data);
+ break;
+
+ case CTDB_CONTROL_DISABLE_NODE:
+ break;
+
+ case CTDB_CONTROL_ENABLE_NODE:
+ break;
+
+ case CTDB_CONTROL_TCP_CLIENT_DISCONNECTED:
+ break;
+
+ case CTDB_CONTROL_TCP_CLIENT_PASSED:
+ break;
+ }
+
+ return len;
+}
+
+static void ctdb_reply_control_data_push(struct ctdb_reply_control_data *cd,
+ uint8_t *buf, size_t *npush)
+{
+ size_t np = 0;
+
+ switch (cd->opcode) {
+ case CTDB_CONTROL_STATISTICS:
+ ctdb_statistics_push(cd->data.stats, buf, &np);
+ break;
+
+ case CTDB_CONTROL_GETDBPATH:
+ ctdb_string_push(&cd->data.db_path, buf, &np);
+ break;
+
+ case CTDB_CONTROL_GETVNNMAP:
+ ctdb_vnn_map_push(cd->data.vnnmap, buf, &np);
+ break;
+
+ case CTDB_CONTROL_GET_DEBUG:
+ ctdb_uint32_push(&cd->data.loglevel, buf, &np);
+ break;
+
+ case CTDB_CONTROL_GET_DBMAP:
+ ctdb_dbid_map_push(cd->data.dbmap, buf, &np);
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH:
+ ctdb_uint32_push(&cd->data.db_id, buf, &np);
+ break;
+
+ case CTDB_CONTROL_GET_DBNAME:
+ ctdb_string_push(&cd->data.db_name, buf, &np);
+ break;
+
+ case CTDB_CONTROL_DUMP_MEMORY:
+ ctdb_string_push(&cd->data.mem_str, buf, &np);
+ break;
+
+ case CTDB_CONTROL_GET_PID:
+ break;
+
+ case CTDB_CONTROL_GET_TUNABLE:
+ ctdb_uint32_push(&cd->data.tun_value, buf, &np);
+ break;
+
+ case CTDB_CONTROL_LIST_TUNABLES:
+ ctdb_var_list_push(cd->data.tun_var_list, buf, &np);
+ break;
+
+ case CTDB_CONTROL_GET_ALL_TUNABLES:
+ ctdb_tunable_list_push(cd->data.tun_list, buf, &np);
+ break;
+
+ case CTDB_CONTROL_GET_TCP_TICKLE_LIST:
+ ctdb_tickle_list_push(cd->data.tickles, buf, &np);
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH_PERSISTENT:
+ ctdb_uint32_push(&cd->data.db_id, buf, &np);
+ break;
+
+ case CTDB_CONTROL_UPTIME:
+ ctdb_uptime_push(cd->data.uptime, buf, &np);
+ break;
+
+ case CTDB_CONTROL_TRY_DELETE_RECORDS:
+ ctdb_rec_buffer_push(cd->data.recbuf, buf, &np);
+ break;
+
+ case CTDB_CONTROL_GET_CAPABILITIES:
+ ctdb_uint32_push(&cd->data.caps, buf, &np);
+ break;
+
+ case CTDB_CONTROL_GET_PUBLIC_IPS:
+ ctdb_public_ip_list_push(cd->data.pubip_list, buf, &np);
+ break;
+
+ case CTDB_CONTROL_GET_NODEMAP:
+ ctdb_node_map_push(cd->data.nodemap, buf, &np);
+ break;
+
+ case CTDB_CONTROL_GET_RECLOCK_FILE:
+ ctdb_string_push(&cd->data.reclock_file, buf, &np);
+ break;
+
+ case CTDB_CONTROL_GET_BAN_STATE:
+ ctdb_ban_state_push(cd->data.ban_state, buf, &np);
+ break;
+
+ case CTDB_CONTROL_GET_DB_SEQNUM:
+ ctdb_uint64_push(&cd->data.seqnum, buf, &np);
+ break;
+
+ case CTDB_CONTROL_DB_GET_HEALTH:
+ ctdb_string_push(&cd->data.reason, buf, &np);
+ break;
+
+ case CTDB_CONTROL_GET_PUBLIC_IP_INFO:
+ ctdb_public_ip_info_push(cd->data.ipinfo, buf, &np);
+ break;
+
+ case CTDB_CONTROL_GET_IFACES:
+ ctdb_iface_list_push(cd->data.iface_list, buf, &np);
+ break;
+
+ case CTDB_CONTROL_GET_STAT_HISTORY:
+ ctdb_statistics_list_push(cd->data.stats_list, buf, &np);
+ break;
+
+ case CTDB_CONTROL_GET_DB_STATISTICS:
+ ctdb_db_statistics_push(cd->data.dbstats, buf, &np);
+ break;
+
+ case CTDB_CONTROL_GET_RUNSTATE:
+ ctdb_uint32_push(&cd->data.runstate, buf, &np);
+ break;
+
+ case CTDB_CONTROL_GET_NODES_FILE:
+ ctdb_node_map_push(cd->data.nodemap, buf, &np);
+ break;
+
+ case CTDB_CONTROL_DB_PULL:
+ ctdb_uint32_push(&cd->data.num_records, buf, &np);
+ break;
+
+ case CTDB_CONTROL_DB_PUSH_CONFIRM:
+ ctdb_uint32_push(&cd->data.num_records, buf, &np);
+ break;
+
+ case CTDB_CONTROL_DB_OPEN_FLAGS:
+ ctdb_int32_push(&cd->data.tdb_flags, buf, &np);
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH_REPLICATED:
+ ctdb_uint32_push(&cd->data.db_id, buf, &np);
+ break;
+
+ case CTDB_CONTROL_CHECK_PID_SRVID:
+ break;
+
+ case CTDB_CONTROL_VACUUM_FETCH:
+ break;
+
+ case CTDB_CONTROL_DB_VACUUM:
+ break;
+
+ case CTDB_CONTROL_ECHO_DATA:
+ ctdb_echo_data_push(cd->data.echo_data, buf, &np);
+ break;
+ }
+
+ *npush = np;
+}
+
+static int ctdb_reply_control_data_pull(uint8_t *buf, size_t buflen,
+ uint32_t opcode, TALLOC_CTX *mem_ctx,
+ struct ctdb_reply_control_data *cd,
+ size_t *npull)
+{
+ size_t np = 0;
+ int ret = 0;
+
+ cd->opcode = opcode;
+
+ switch (opcode) {
+ case CTDB_CONTROL_STATISTICS:
+ ret = ctdb_statistics_pull(buf, buflen, mem_ctx,
+ &cd->data.stats, &np);
+ break;
+
+ case CTDB_CONTROL_GETDBPATH:
+ ret = ctdb_string_pull(buf, buflen, mem_ctx,
+ &cd->data.db_path, &np);
+ break;
+
+ case CTDB_CONTROL_GETVNNMAP:
+ ret = ctdb_vnn_map_pull(buf, buflen, mem_ctx,
+ &cd->data.vnnmap, &np);
+ break;
+
+ case CTDB_CONTROL_GET_DEBUG:
+ ret = ctdb_uint32_pull(buf, buflen, &cd->data.loglevel, &np);
+ break;
+
+ case CTDB_CONTROL_GET_DBMAP:
+ ret = ctdb_dbid_map_pull(buf, buflen, mem_ctx,
+ &cd->data.dbmap, &np);
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH:
+ ret = ctdb_uint32_pull(buf, buflen, &cd->data.db_id, &np);
+ break;
+
+ case CTDB_CONTROL_GET_DBNAME:
+ ret = ctdb_string_pull(buf, buflen, mem_ctx,
+ &cd->data.db_name, &np);
+ break;
+
+ case CTDB_CONTROL_DUMP_MEMORY:
+ ret = ctdb_string_pull(buf, buflen, mem_ctx,
+ &cd->data.mem_str, &np);
+ break;
+
+ case CTDB_CONTROL_GET_PID:
+ break;
+
+ case CTDB_CONTROL_GET_TUNABLE:
+ ret = ctdb_uint32_pull(buf, buflen, &cd->data.tun_value,
+ &np);
+ break;
+
+ case CTDB_CONTROL_LIST_TUNABLES:
+ ret = ctdb_var_list_pull(buf, buflen, mem_ctx,
+ &cd->data.tun_var_list, &np);
+ break;
+
+ case CTDB_CONTROL_GET_ALL_TUNABLES:
+ ret = ctdb_tunable_list_pull(buf, buflen, mem_ctx,
+ &cd->data.tun_list, &np);
+ break;
+
+ case CTDB_CONTROL_GET_TCP_TICKLE_LIST:
+ ret = ctdb_tickle_list_pull(buf, buflen, mem_ctx,
+ &cd->data.tickles, &np);
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH_PERSISTENT:
+ ret = ctdb_uint32_pull(buf, buflen, &cd->data.db_id, &np);
+ break;
+
+ case CTDB_CONTROL_UPTIME:
+ ret = ctdb_uptime_pull(buf, buflen, mem_ctx,
+ &cd->data.uptime, &np);
+ break;
+
+ case CTDB_CONTROL_TRY_DELETE_RECORDS:
+ ret = ctdb_rec_buffer_pull(buf, buflen, mem_ctx,
+ &cd->data.recbuf, &np);
+ break;
+
+ case CTDB_CONTROL_GET_CAPABILITIES:
+ ret = ctdb_uint32_pull(buf, buflen, &cd->data.caps, &np);
+ break;
+
+ case CTDB_CONTROL_GET_PUBLIC_IPS:
+ ret = ctdb_public_ip_list_pull(buf, buflen, mem_ctx,
+ &cd->data.pubip_list, &np);
+ break;
+
+ case CTDB_CONTROL_GET_NODEMAP:
+ ret = ctdb_node_map_pull(buf, buflen, mem_ctx,
+ &cd->data.nodemap, &np);
+ break;
+
+ case CTDB_CONTROL_GET_RECLOCK_FILE:
+ ret = ctdb_string_pull(buf, buflen, mem_ctx,
+ &cd->data.reclock_file, &np);
+ break;
+
+ case CTDB_CONTROL_GET_BAN_STATE:
+ ret = ctdb_ban_state_pull(buf, buflen, mem_ctx,
+ &cd->data.ban_state, &np);
+ break;
+
+ case CTDB_CONTROL_GET_DB_SEQNUM:
+ ret = ctdb_uint64_pull(buf, buflen, &cd->data.seqnum, &np);
+ break;
+
+ case CTDB_CONTROL_DB_GET_HEALTH:
+ ret = ctdb_string_pull(buf, buflen, mem_ctx,
+ &cd->data.reason, &np);
+ break;
+
+ case CTDB_CONTROL_GET_PUBLIC_IP_INFO:
+ ret = ctdb_public_ip_info_pull(buf, buflen, mem_ctx,
+ &cd->data.ipinfo, &np);
+ break;
+
+ case CTDB_CONTROL_GET_IFACES:
+ ret = ctdb_iface_list_pull(buf, buflen, mem_ctx,
+ &cd->data.iface_list, &np);
+ break;
+
+ case CTDB_CONTROL_GET_STAT_HISTORY:
+ ret = ctdb_statistics_list_pull(buf, buflen, mem_ctx,
+ &cd->data.stats_list, &np);
+ break;
+
+ case CTDB_CONTROL_GET_DB_STATISTICS:
+ ret = ctdb_db_statistics_pull(buf, buflen, mem_ctx,
+ &cd->data.dbstats, &np);
+ break;
+
+ case CTDB_CONTROL_GET_RUNSTATE:
+ ret = ctdb_uint32_pull(buf, buflen, &cd->data.runstate, &np);
+ break;
+
+ case CTDB_CONTROL_GET_NODES_FILE:
+ ret = ctdb_node_map_pull(buf, buflen, mem_ctx,
+ &cd->data.nodemap, &np);
+ break;
+
+ case CTDB_CONTROL_DB_PULL:
+ ret = ctdb_uint32_pull(buf, buflen, &cd->data.num_records,
+ &np);
+ break;
+
+ case CTDB_CONTROL_DB_PUSH_CONFIRM:
+ ret = ctdb_uint32_pull(buf, buflen, &cd->data.num_records,
+ &np);
+ break;
+
+ case CTDB_CONTROL_DB_OPEN_FLAGS:
+ ret = ctdb_int32_pull(buf, buflen, &cd->data.tdb_flags, &np);
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH_REPLICATED:
+ ret = ctdb_uint32_pull(buf, buflen, &cd->data.db_id, &np);
+ break;
+
+ case CTDB_CONTROL_CHECK_PID_SRVID:
+ break;
+
+ case CTDB_CONTROL_VACUUM_FETCH:
+ break;
+
+ case CTDB_CONTROL_DB_VACUUM:
+ break;
+
+ case CTDB_CONTROL_ECHO_DATA:
+ ret = ctdb_echo_data_pull(buf,
+ buflen,
+ mem_ctx,
+ &cd->data.echo_data,
+ &np);
+ break;
+ }
+
+ if (ret != 0) {
+ return ret;
+ }
+
+ *npull = np;
+ return 0;
+}
+
+size_t ctdb_req_control_len(struct ctdb_req_header *h,
+ struct ctdb_req_control *c)
+{
+ uint32_t u32 = 0;
+
+ return ctdb_req_header_len(h) +
+ ctdb_uint32_len(&c->opcode) +
+ ctdb_uint32_len(&c->pad) +
+ ctdb_uint64_len(&c->srvid) +
+ ctdb_uint32_len(&c->client_id) +
+ ctdb_uint32_len(&c->flags) +
+ ctdb_uint32_len(&u32) +
+ ctdb_req_control_data_len(&c->rdata);
+}
+
+int ctdb_req_control_push(struct ctdb_req_header *h,
+ struct ctdb_req_control *c,
+ uint8_t *buf, size_t *buflen)
+{
+ size_t offset = 0, np;
+ size_t length;
+ uint32_t u32;
+
+ length = ctdb_req_control_len(h, c);
+ if (*buflen < length) {
+ *buflen = length;
+ return EMSGSIZE;
+ }
+
+ h->length = *buflen;
+ ctdb_req_header_push(h, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&c->opcode, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&c->pad, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint64_push(&c->srvid, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&c->client_id, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&c->flags, buf+offset, &np);
+ offset += np;
+
+ u32 = ctdb_req_control_data_len(&c->rdata);
+ ctdb_uint32_push(&u32, buf+offset, &np);
+ offset += np;
+
+ ctdb_req_control_data_push(&c->rdata, buf+offset, &np);
+ offset += np;
+
+ if (offset > *buflen) {
+ return EMSGSIZE;
+ }
+
+ return 0;
+}
+
+int ctdb_req_control_pull(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_req_control *c)
+{
+ struct ctdb_req_header header;
+ size_t offset = 0, np;
+ uint32_t u32;
+ int ret;
+
+ ret = ctdb_req_header_pull(buf+offset, buflen-offset, &header, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ if (h != NULL) {
+ *h = header;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &c->opcode, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &c->pad, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint64_pull(buf+offset, buflen-offset, &c->srvid, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &c->client_id, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &c->flags, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &u32, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ if (u32 > buflen-offset) {
+ return EMSGSIZE;
+ }
+
+ ret = ctdb_req_control_data_pull(buf+offset, u32, c->opcode, mem_ctx,
+ &c->rdata, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ if (offset > buflen) {
+ return EMSGSIZE;
+ }
+
+ return 0;
+}
+
+size_t ctdb_reply_control_len(struct ctdb_req_header *h,
+ struct ctdb_reply_control *c)
+{
+ uint32_t dsize, esize;
+
+ if (c->status == 0) {
+ dsize = ctdb_reply_control_data_len(&c->rdata);
+ esize = 0;
+ } else {
+ dsize = 0;
+ esize = ctdb_string_len(&c->errmsg);
+ }
+
+ return ctdb_req_header_len(h) +
+ ctdb_int32_len(&c->status) +
+ ctdb_uint32_len(&dsize) +
+ ctdb_uint32_len(&esize) +
+ dsize + esize;
+}
+
+int ctdb_reply_control_push(struct ctdb_req_header *h,
+ struct ctdb_reply_control *c,
+ uint8_t *buf, size_t *buflen)
+{
+ size_t offset = 0, np;
+ size_t length;
+ uint32_t dsize, esize;
+
+ length = ctdb_reply_control_len(h, c);
+ if (*buflen < length) {
+ *buflen = length;
+ return EMSGSIZE;
+ }
+
+ h->length = *buflen;
+ ctdb_req_header_push(h, buf+offset, &np);
+ offset += np;
+
+ ctdb_int32_push(&c->status, buf+offset, &np);
+ offset += np;
+
+ if (c->status == 0) {
+ dsize = ctdb_reply_control_data_len(&c->rdata);
+ esize = 0;
+ } else {
+ dsize = 0;
+ esize = ctdb_string_len(&c->errmsg);
+ }
+
+ ctdb_uint32_push(&dsize, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&esize, buf+offset, &np);
+ offset += np;
+
+ if (c->status == 0) {
+ ctdb_reply_control_data_push(&c->rdata, buf+offset, &np);
+ } else {
+ ctdb_string_push(&c->errmsg, buf+offset, &np);
+ }
+ offset += np;
+
+ return 0;
+}
+
+int ctdb_reply_control_pull(uint8_t *buf, size_t buflen, uint32_t opcode,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_reply_control *c)
+{
+ struct ctdb_req_header header;
+ size_t offset = 0, np;
+ uint32_t dsize, esize;
+ int ret;
+
+ ret = ctdb_req_header_pull(buf+offset, buflen-offset, &header, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ if (h != NULL) {
+ *h = header;
+ }
+
+ ret = ctdb_int32_pull(buf+offset, buflen-offset, &c->status, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &dsize, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &esize, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ c->errmsg = NULL;
+
+ if (c->status == 0) {
+ if (buflen-offset < dsize) {
+ return EMSGSIZE;
+ }
+
+ ret = ctdb_reply_control_data_pull(buf+offset, dsize,
+ opcode, mem_ctx, &c->rdata,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ } else {
+ if (buflen-offset < esize) {
+ return EMSGSIZE;
+ }
+
+ ret = ctdb_string_pull(buf+offset, esize, mem_ctx, &c->errmsg,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+ }
+
+ return 0;
+}
diff --git a/ctdb/protocol/protocol_debug.c b/ctdb/protocol/protocol_debug.c
new file mode 100644
index 0000000..f1e1fc0
--- /dev/null
+++ b/ctdb/protocol/protocol_debug.c
@@ -0,0 +1,746 @@
+/*
+ CTDB protocol marshalling
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/locale.h"
+
+#include <talloc.h>
+#include <tdb.h>
+
+#include <protocol/protocol.h>
+#include <protocol/protocol_api.h>
+
+/*
+ * Utility functions
+ */
+struct uint32_map {
+ uint32_t key;
+#define MAP_END 0xffffffff
+ const char *name;
+};
+
+
+static void uint32_map_print(struct uint32_map *map, uint32_t key, FILE *fp)
+{
+ int i = 0;
+
+ while (map[i].key != MAP_END) {
+ if (key == map[i].key) {
+ fprintf(fp, "%s", map[i].name);
+ return;
+ }
+ i = i+1;
+ }
+
+ fprintf(fp, "UNKNOWN(%u)", key);
+}
+
+static void tdb_data_print(TDB_DATA d, FILE *fp)
+{
+ unsigned char *p = (unsigned char *)d.dptr;
+ int len = d.dsize;
+ while (len--) {
+ if (isprint(*p) && !strchr("\"\\", *p)) {
+ fputc(*p, fp);
+ } else {
+ fprintf(fp, "\\%02X", *p);
+ }
+ p++;
+ }
+}
+
+/*
+ * Data types
+ */
+
+static void ctdb_operation_print(uint32_t operation, FILE *fp)
+{
+ struct uint32_map map[] = {
+ { CTDB_REQ_CALL, "REQ_CALL" },
+ { CTDB_REPLY_CALL, "REPLY_CALL" },
+ { CTDB_REQ_DMASTER, "REQ_DMASTER" },
+ { CTDB_REPLY_DMASTER, "REPLY_DMASTER" },
+ { CTDB_REPLY_ERROR, "REPLY_ERROR" },
+ { CTDB_REQ_MESSAGE, "REQ_MESSAGE" },
+ { CTDB_REQ_CONTROL, "REQ_CONTROL", },
+ { CTDB_REPLY_CONTROL, "REPLY_CONTROL" },
+ { CTDB_REQ_KEEPALIVE, "REQ_KEEPALIVE" },
+ { MAP_END, "" },
+ };
+
+ uint32_map_print(map, operation, fp);
+}
+
+static void ctdb_callid_print(uint32_t callid, FILE *fp)
+{
+ struct uint32_map map[] = {
+ { CTDB_NULL_FUNC, "NULL" },
+ { CTDB_FETCH_FUNC, "FETCH" },
+ { CTDB_FETCH_WITH_HEADER_FUNC, "FETCH_WITH_HEADER" },
+ { MAP_END, "" },
+ };
+
+ uint32_map_print(map, callid, fp);
+}
+
+static void ctdb_opcode_print(uint32_t opcode, FILE *fp)
+{
+ struct uint32_map map[] = {
+ { CTDB_CONTROL_PROCESS_EXISTS, "PROCESS_EXISTS" },
+ { CTDB_CONTROL_STATISTICS, "STATISTICS" },
+ { CTDB_CONTROL_PING, "PING" },
+ { CTDB_CONTROL_GETDBPATH, "GETDBPATH" },
+ { CTDB_CONTROL_GETVNNMAP, "GETVNNMAP" },
+ { CTDB_CONTROL_SETVNNMAP, "SETVNNMAP" },
+ { CTDB_CONTROL_GET_DEBUG, "GET_DEBUG" },
+ { CTDB_CONTROL_SET_DEBUG, "SET_DEBUG" },
+ { CTDB_CONTROL_GET_DBMAP, "GET_DBMAP" },
+ { CTDB_CONTROL_GET_NODEMAPv4, "GET_NODEMAPv4" },
+ { CTDB_CONTROL_SET_DMASTER, "SET_DMASTER" },
+ { CTDB_CONTROL_GET_RECMODE, "GET_RECMODE" },
+ { CTDB_CONTROL_SET_RECMODE, "SET_RECMODE" },
+ { CTDB_CONTROL_STATISTICS_RESET, "STATISTICS_RESET" },
+ { CTDB_CONTROL_DB_ATTACH, "DB_ATTACH" },
+ { CTDB_CONTROL_SET_CALL, "SET_CALL" },
+ { CTDB_CONTROL_TRAVERSE_START, "TRAVERSE_START" },
+ { CTDB_CONTROL_TRAVERSE_ALL, "TRAVERSE_ALL" },
+ { CTDB_CONTROL_TRAVERSE_DATA, "TRAVERSE_DATA" },
+ { CTDB_CONTROL_REGISTER_SRVID, "REGISTER_SRVID" },
+ { CTDB_CONTROL_DEREGISTER_SRVID, "DEREGISTER_SRVID" },
+ { CTDB_CONTROL_GET_DBNAME, "GET_DBNAME" },
+ { CTDB_CONTROL_ENABLE_SEQNUM, "ENABLE_SEQNUM" },
+ { CTDB_CONTROL_UPDATE_SEQNUM, "UPDATE_SEQNUM" },
+ { CTDB_CONTROL_DUMP_MEMORY, "DUMP_MEMORY" },
+ { CTDB_CONTROL_GET_PID, "GET_PID" },
+ { CTDB_CONTROL_GET_RECMASTER, "GET_RECMASTER" },
+ { CTDB_CONTROL_SET_RECMASTER, "SET_RECMASTER" },
+ { CTDB_CONTROL_FREEZE, "FREEZE" },
+ { CTDB_CONTROL_THAW, "THAW" },
+ { CTDB_CONTROL_GET_PNN, "GET_PNN" },
+ { CTDB_CONTROL_SHUTDOWN, "SHUTDOWN" },
+ { CTDB_CONTROL_GET_MONMODE, "GET_MONMODE" },
+ { CTDB_CONTROL_TAKEOVER_IPv4, "TAKEOVER_IPv4" },
+ { CTDB_CONTROL_RELEASE_IPv4, "RELEASE_IPv4" },
+ { CTDB_CONTROL_TCP_CLIENT, "TCP_CLIENT" },
+ { CTDB_CONTROL_TCP_ADD, "TCP_ADD" },
+ { CTDB_CONTROL_TCP_REMOVE, "TCP_REMOVE" },
+ { CTDB_CONTROL_STARTUP, "STARTUP" },
+ { CTDB_CONTROL_SET_TUNABLE, "SET_TUNABLE" },
+ { CTDB_CONTROL_GET_TUNABLE, "GET_TUNABLE" },
+ { CTDB_CONTROL_LIST_TUNABLES, "LIST_TUNABLES" },
+ { CTDB_CONTROL_GET_PUBLIC_IPSv4, "GET_PUBLIC_IPSv4" },
+ { CTDB_CONTROL_MODIFY_FLAGS, "MODIFY_FLAGS" },
+ { CTDB_CONTROL_GET_ALL_TUNABLES, "GET_ALL_TUNABLES" },
+ { CTDB_CONTROL_KILL_TCP, "KILL_TCP" },
+ { CTDB_CONTROL_GET_TCP_TICKLE_LIST, "GET_TCP_TICKLE_LIST" },
+ { CTDB_CONTROL_SET_TCP_TICKLE_LIST, "SET_TCP_TICKLE_LIST" },
+ { CTDB_CONTROL_REGISTER_SERVER_ID, "REGISTER_SERVER_ID" },
+ { CTDB_CONTROL_UNREGISTER_SERVER_ID, "UNREGISTER_SERVER_ID" },
+ { CTDB_CONTROL_CHECK_SERVER_ID, "CHECK_SERVER_ID" },
+ { CTDB_CONTROL_GET_SERVER_ID_LIST, "GET_SERVER_ID_LIST" },
+ { CTDB_CONTROL_DB_ATTACH_PERSISTENT, "DB_ATTACH_PERSISTENT" },
+ { CTDB_CONTROL_PERSISTENT_STORE, "PERSISTENT_STORE" },
+ { CTDB_CONTROL_UPDATE_RECORD, "UPDATE_RECORD" },
+ { CTDB_CONTROL_SEND_GRATUITOUS_ARP, "SEND_GRATUITOUS_ARP" },
+ { CTDB_CONTROL_TRANSACTION_START, "TRANSACTION_START" },
+ { CTDB_CONTROL_TRANSACTION_COMMIT, "TRANSACTION_COMMIT" },
+ { CTDB_CONTROL_WIPE_DATABASE, "WIPE_DATABASE" },
+ { CTDB_CONTROL_UPTIME, "UPTIME" },
+ { CTDB_CONTROL_START_RECOVERY, "START_RECOVERY" },
+ { CTDB_CONTROL_END_RECOVERY, "END_RECOVERY" },
+ { CTDB_CONTROL_RELOAD_NODES_FILE, "RELOAD_NODES_FILE" },
+ { CTDB_CONTROL_TRY_DELETE_RECORDS, "TRY_DELETE_RECORDS" },
+ { CTDB_CONTROL_ENABLE_MONITOR, "ENABLE_MONITOR" },
+ { CTDB_CONTROL_DISABLE_MONITOR, "DISABLE_MONITOR" },
+ { CTDB_CONTROL_ADD_PUBLIC_IP, "ADD_PUBLIC_IP" },
+ { CTDB_CONTROL_DEL_PUBLIC_IP, "DEL_PUBLIC_IP" },
+ { CTDB_CONTROL_RUN_EVENTSCRIPTS, "RUN_EVENTSCRIPTS" },
+ { CTDB_CONTROL_GET_CAPABILITIES, "GET_CAPABILITIES" },
+ { CTDB_CONTROL_START_PERSISTENT_UPDATE, "START_PERSISTENT_UPDATE" },
+ { CTDB_CONTROL_CANCEL_PERSISTENT_UPDATE, "CANCEL_PERSISTENT_UPDATE" },
+ { CTDB_CONTROL_TRANS2_COMMIT, "TRANS2_COMMIT" },
+ { CTDB_CONTROL_TRANS2_FINISHED, "TRANS2_FINISHED" },
+ { CTDB_CONTROL_TRANS2_ERROR, "TRANS2_ERROR" },
+ { CTDB_CONTROL_TRANS2_COMMIT_RETRY, "TRANS2_COMMIT_RETRY" },
+ { CTDB_CONTROL_RECD_PING, "RECD_PING" },
+ { CTDB_CONTROL_RELEASE_IP, "RELEASE_IP" },
+ { CTDB_CONTROL_TAKEOVER_IP, "TAKEOVER_IP" },
+ { CTDB_CONTROL_GET_PUBLIC_IPS, "GET_PUBLIC_IPS" },
+ { CTDB_CONTROL_GET_NODEMAP, "GET_NODEMAP" },
+ { CTDB_CONTROL_GET_EVENT_SCRIPT_STATUS, "GET_EVENT_SCRIPT_STATUS" },
+ { CTDB_CONTROL_TRAVERSE_KILL, "TRAVERSE_KILL" },
+ { CTDB_CONTROL_RECD_RECLOCK_LATENCY, "RECD_RECLOCK_LATENCY" },
+ { CTDB_CONTROL_GET_RECLOCK_FILE, "GET_RECLOCK_FILE" },
+ { CTDB_CONTROL_STOP_NODE, "STOP_NODE" },
+ { CTDB_CONTROL_CONTINUE_NODE, "CONTINUE_NODE" },
+ { CTDB_CONTROL_SET_NATGWSTATE, "SET_NATGWSTATE" },
+ { CTDB_CONTROL_SET_LMASTERROLE, "SET_LMASTERROLE" },
+ { CTDB_CONTROL_SET_RECMASTERROLE, "SET_RECMASTERROLE" },
+ { CTDB_CONTROL_ENABLE_SCRIPT, "ENABLE_SCRIPT" },
+ { CTDB_CONTROL_DISABLE_SCRIPT, "DISABLE_SCRIPT" },
+ { CTDB_CONTROL_SET_BAN_STATE, "SET_BAN_STATE" },
+ { CTDB_CONTROL_GET_BAN_STATE, "GET_BAN_STATE" },
+ { CTDB_CONTROL_SET_DB_PRIORITY, "SET_DB_PRIORITY" },
+ { CTDB_CONTROL_GET_DB_PRIORITY, "GET_DB_PRIORITY" },
+ { CTDB_CONTROL_TRANSACTION_CANCEL, "TRANSACTION_CANCEL" },
+ { CTDB_CONTROL_REGISTER_NOTIFY, "REGISTER_NOTIFY" },
+ { CTDB_CONTROL_DEREGISTER_NOTIFY, "DEREGISTER_NOTIFY" },
+ { CTDB_CONTROL_TRANS2_ACTIVE, "TRANS2_ACTIVE" },
+ { CTDB_CONTROL_GET_LOG, "GET_LOG" },
+ { CTDB_CONTROL_CLEAR_LOG, "CLEAR_LOG" },
+ { CTDB_CONTROL_TRANS3_COMMIT, "TRANS3_COMMIT" },
+ { CTDB_CONTROL_GET_DB_SEQNUM, "GET_DB_SEQNUM" },
+ { CTDB_CONTROL_DB_SET_HEALTHY, "DB_SET_HEALTHY" },
+ { CTDB_CONTROL_DB_GET_HEALTH, "DB_GET_HEALTH" },
+ { CTDB_CONTROL_GET_PUBLIC_IP_INFO, "GET_PUBLIC_IP_INFO" },
+ { CTDB_CONTROL_GET_IFACES, "GET_IFACES" },
+ { CTDB_CONTROL_SET_IFACE_LINK_STATE, "SET_IFACE_LINK_STATE" },
+ { CTDB_CONTROL_TCP_ADD_DELAYED_UPDATE, "TCP_ADD_DELAYED_UPDATE" },
+ { CTDB_CONTROL_GET_STAT_HISTORY, "GET_STAT_HISTORY" },
+ { CTDB_CONTROL_SCHEDULE_FOR_DELETION, "SCHEDULE_FOR_DELETION" },
+ { CTDB_CONTROL_SET_DB_READONLY, "SET_DB_READONLY" },
+ { CTDB_CONTROL_CHECK_SRVIDS, "CHECK_SRVIDS" },
+ { CTDB_CONTROL_TRAVERSE_START_EXT, "TRAVERSE_START_EXT" },
+ { CTDB_CONTROL_GET_DB_STATISTICS, "GET_DB_STATISTICS" },
+ { CTDB_CONTROL_SET_DB_STICKY, "SET_DB_STICKY" },
+ { CTDB_CONTROL_RELOAD_PUBLIC_IPS, "RELOAD_PUBLIC_IPS" },
+ { CTDB_CONTROL_TRAVERSE_ALL_EXT, "TRAVERSE_ALL_EXT" },
+ { CTDB_CONTROL_RECEIVE_RECORDS, "RECEIVE_RECORDS" },
+ { CTDB_CONTROL_IPREALLOCATED, "IPREALLOCATED" },
+ { CTDB_CONTROL_GET_RUNSTATE, "GET_RUNSTATE" },
+ { CTDB_CONTROL_DB_DETACH, "DB_DETACH" },
+ { CTDB_CONTROL_GET_NODES_FILE, "GET_NODES_FILE" },
+ { CTDB_CONTROL_DB_FREEZE, "DB_FREEZE" },
+ { CTDB_CONTROL_DB_THAW, "DB_THAW" },
+ { CTDB_CONTROL_DB_TRANSACTION_START, "DB_TRANSACTION_START" },
+ { CTDB_CONTROL_DB_TRANSACTION_COMMIT, "DB_TRANSACTION_COMMIT" },
+ { CTDB_CONTROL_DB_TRANSACTION_CANCEL, "DB_TRANSACTION_CANCEL" },
+ { CTDB_CONTROL_DB_PULL, "DB_PULL" },
+ { CTDB_CONTROL_DB_PUSH_START, "DB_PUSH_START" },
+ { CTDB_CONTROL_DB_PUSH_CONFIRM, "DB_PUSH_CONFIRM" },
+ { CTDB_CONTROL_DB_OPEN_FLAGS, "DB_OPEN_FLAGS" },
+ { CTDB_CONTROL_DB_ATTACH_REPLICATED, "DB_ATTACH_REPLICATED" },
+ { CTDB_CONTROL_CHECK_PID_SRVID, "CHECK_PID_SRVID" },
+ { CTDB_CONTROL_TUNNEL_REGISTER, "TUNNEL_REGISTER" },
+ { CTDB_CONTROL_TUNNEL_DEREGISTER, "TUNNEL_DEREGISTER" },
+ { CTDB_CONTROL_VACUUM_FETCH, "VACUUM_FETCH" },
+ { CTDB_CONTROL_DB_VACUUM, "DB_VACUUM" },
+ { CTDB_CONTROL_ECHO_DATA, "ECHO_DATA" },
+ { CTDB_CONTROL_DISABLE_NODE, "DISABLE_NODE" },
+ { CTDB_CONTROL_ENABLE_NODE, "ENABLE_NODE" },
+ { CTDB_CONTROL_TCP_CLIENT_DISCONNECTED, "TCP_CLIENT_DISCONNECTED" },
+ { CTDB_CONTROL_TCP_CLIENT_PASSED, "TCP_CLIENT_PASSED" },
+ { MAP_END, "" },
+ };
+
+ uint32_map_print(map, opcode, fp);
+}
+
+static void ctdb_control_flags_print(uint32_t flags, FILE *fp)
+{
+ if (flags & CTDB_CTRL_FLAG_NOREPLY) {
+ fprintf(fp, "NOREPLY ");
+ }
+ if (flags & CTDB_CTRL_FLAG_OPCODE_SPECIFIC) {
+ fprintf(fp, "OPCODE_SPECIFIC ");
+ }
+}
+
+static void ctdb_pnn_print(uint32_t pnn, FILE *fp)
+{
+ if (pnn == CTDB_CURRENT_NODE) {
+ fprintf(fp, "CURRENT");
+ } else if (pnn == CTDB_BROADCAST_ALL) {
+ fprintf(fp, "ALL");
+ } else if (pnn == CTDB_BROADCAST_ACTIVE) {
+ fprintf(fp, "ACTIVE");
+ } else if (pnn == CTDB_BROADCAST_CONNECTED) {
+ fprintf(fp, "CONNECTED");
+ } else if (pnn == CTDB_MULTICAST) {
+ fprintf(fp, "MULTICAST");
+ } else if (pnn == CTDB_UNKNOWN_PNN) {
+ fprintf(fp, "UNKNOWN");
+ } else {
+ fprintf(fp, "%u", pnn);
+ }
+}
+
+static void ctdb_srvid_print(uint64_t srvid, FILE *fp)
+{
+ uint64_t prefix = 0xFFFF000000000000LL;
+
+ if (srvid == CTDB_SRVID_ALL) {
+ fprintf(fp, "ALL");
+ } else if ((srvid & prefix) == CTDB_SRVID_RECOVERY) {
+ srvid = srvid & ~CTDB_SRVID_RECOVERY;
+ fprintf(fp, "RECOVERY-%"PRIx64"", srvid);
+ } else if (srvid == CTDB_SRVID_BANNING) {
+ fprintf(fp, "BANNING");
+ } else if (srvid == CTDB_SRVID_ELECTION) {
+ fprintf(fp, "ELECTION");
+ } else if (srvid == CTDB_SRVID_RECONFIGURE) {
+ fprintf(fp, "RECONFIGURE");
+ } else if (srvid == CTDB_SRVID_RELEASE_IP) {
+ fprintf(fp, "RELEASE_IP");
+ } else if (srvid == CTDB_SRVID_TAKE_IP) {
+ fprintf(fp, "TAKE_IP");
+ } else if (srvid == CTDB_SRVID_IPREALLOCATED) {
+ fprintf(fp, "IPREALLOCATED");
+ } else if (srvid == CTDB_SRVID_SET_NODE_FLAGS) {
+ fprintf(fp, "SET_NODE_FLAGS");
+ } else if (srvid == CTDB_SRVID_RECD_UPDATE_IP) {
+ fprintf(fp, "RECD_UPDATE_IP");
+ } else if (srvid == CTDB_SRVID_VACUUM_FETCH) {
+ fprintf(fp, "VACUUM_FETCH");
+ } else if (srvid == CTDB_SRVID_DETACH_DATABASE) {
+ fprintf(fp, "DETACH_DATABASE");
+ } else if (srvid == CTDB_SRVID_MEM_DUMP) {
+ fprintf(fp, "MEM_DUMP");
+ } else if (srvid == CTDB_SRVID_GETLOG) {
+ fprintf(fp, "GETLOG");
+ } else if (srvid == CTDB_SRVID_CLEARLOG) {
+ fprintf(fp, "CLEARLOG");
+ } else if (srvid == CTDB_SRVID_PUSH_NODE_FLAGS) {
+ fprintf(fp, "PUSH_NODE_FLAGS");
+ } else if (srvid == CTDB_SRVID_RELOAD_NODES) {
+ fprintf(fp, "RELOAD_NODES");
+ } else if (srvid == CTDB_SRVID_TAKEOVER_RUN) {
+ fprintf(fp, "TAKEOVER_RUN");
+ } else if (srvid == CTDB_SRVID_REBALANCE_NODE) {
+ fprintf(fp, "REBALANCE_NODE");
+ } else if (srvid == CTDB_SRVID_DISABLE_TAKEOVER_RUNS) {
+ fprintf(fp, "DISABLE_TAKEOVER_RUNS");
+ } else if (srvid == CTDB_SRVID_DISABLE_RECOVERIES) {
+ fprintf(fp, "DISABLE_RECOVERIES");
+ } else if (srvid == CTDB_SRVID_DISABLE_IP_CHECK) {
+ fprintf(fp, "DISABLE_IP_CHECK");
+ } else if ((srvid & prefix) == CTDB_SRVID_SAMBA_RANGE) {
+ if (srvid == CTDB_SRVID_SAMBA_NOTIFY) {
+ fprintf(fp, "SAMBA_NOTIFY");
+ } else {
+ srvid &= ~CTDB_SRVID_SAMBA_RANGE;
+ fprintf(fp, "samba-0x%"PRIx64"", srvid);
+ }
+ } else if ((srvid & prefix) == CTDB_SRVID_NFSD_RANGE) {
+ srvid &= ~CTDB_SRVID_NFSD_RANGE;
+ fprintf(fp, "nfsd-0x%"PRIx64"", srvid);
+ } else if ((srvid & prefix) == CTDB_SRVID_ISCSID_RANGE) {
+ srvid &= ~CTDB_SRVID_ISCSID_RANGE;
+ fprintf(fp, "iscsi-0x%"PRIx64"", srvid);
+ } else if ((srvid & prefix) == CTDB_SRVID_TOOL_RANGE) {
+ srvid &= ~CTDB_SRVID_TOOL_RANGE;
+ fprintf(fp, "tool-0x%"PRIx64"", srvid);
+ } else if ((srvid & prefix) == CTDB_SRVID_TEST_RANGE) {
+ srvid &= ~CTDB_SRVID_TEST_RANGE;
+ fprintf(fp, "test-0x%"PRIx64"", srvid);
+ } else if ((srvid & prefix) == CTDB_SRVID_PID_RANGE) {
+ if (srvid < UINT16_MAX) {
+ fprintf(fp, "pid-%"PRIu64, srvid);
+ } else {
+ fprintf(fp, "pid-0x%"PRIx64, srvid);
+ }
+ } else {
+ fprintf(fp, "0x%"PRIx64, srvid);
+ }
+}
+
+static void ctdb_tunnel_id_print(uint64_t tunnel_id, FILE *fp)
+{
+ if ((tunnel_id & CTDB_TUNNEL_TEST) == CTDB_TUNNEL_TEST) {
+ fprintf(fp, "TEST-%"PRIx64, tunnel_id);
+ } else {
+ fprintf(fp, "0x%"PRIx64, tunnel_id);
+ }
+}
+
+static void ctdb_tunnel_flags_print(uint32_t flags, FILE *fp)
+{
+ if (flags & CTDB_TUNNEL_FLAG_REQUEST) {
+ fprintf(fp, "REQUEST ");
+ }
+ if (flags & CTDB_TUNNEL_FLAG_REPLY) {
+ fprintf(fp, "REPLY ");
+ }
+ if (flags & CTDB_TUNNEL_FLAG_NOREPLY) {
+ fprintf(fp, "NOREPLY ");
+ }
+}
+
+/*
+ * Print routines
+ */
+
+static void ctdb_req_header_print(struct ctdb_req_header *h, FILE *fp)
+{
+ fprintf(fp, "Header\n");
+ fprintf(fp, " length:%u magic:0x%"PRIx32" version:%u generation:0x%"PRIx32"\n",
+ h->length, h->ctdb_magic, h->ctdb_version, h->generation);
+ fprintf(fp, " ");
+ ctdb_operation_print(h->operation, fp);
+ fprintf(fp, " dst:");
+ ctdb_pnn_print(h->destnode, fp);
+ fprintf(fp, " src:");
+ ctdb_pnn_print(h->srcnode, fp);
+ fprintf(fp, " reqid:0x%"PRIx32"\n", h->reqid);
+}
+
+static void ctdb_req_call_print(struct ctdb_req_call *c, FILE *fp)
+{
+ fprintf(fp, "Data\n");
+ fprintf(fp, " db:0x%"PRIx32" ", c->db_id);
+ ctdb_callid_print(c->callid, fp);
+ fprintf(fp, "\n");
+ fprintf(fp, " key:");
+ tdb_data_print(c->key, fp);
+ fprintf(fp, "\n");
+}
+
+static void ctdb_reply_call_print(struct ctdb_reply_call *c, FILE *fp)
+{
+ fprintf(fp, "Data\n");
+ fprintf(fp, " status:%d\n", c->status);
+ if (c->status == 0) {
+ fprintf(fp, " data:");
+ tdb_data_print(c->data, fp);
+ fprintf(fp, "\n");
+ }
+}
+
+static void ctdb_req_dmaster_print(struct ctdb_req_dmaster *c, FILE *fp)
+{
+ fprintf(fp, "Data\n");
+ fprintf(fp, " db:0x%"PRIx32" rsn:0x%"PRIx64" dmaster:%u\n",
+ c->db_id, c->rsn, c->dmaster);
+ fprintf(fp, " key:");
+ tdb_data_print(c->key, fp);
+ fprintf(fp, "\n");
+ fprintf(fp, " data:");
+ tdb_data_print(c->data, fp);
+ fprintf(fp, "\n");
+}
+
+static void ctdb_reply_dmaster_print(struct ctdb_reply_dmaster *c, FILE *fp)
+{
+ fprintf(fp, "Data\n");
+ fprintf(fp, " db:0x%"PRIx32" rsn:0x%"PRIx64"\n", c->db_id, c->rsn);
+ fprintf(fp, " key:");
+ tdb_data_print(c->key, fp);
+ fprintf(fp, "\n");
+ fprintf(fp, " data:");
+ tdb_data_print(c->data, fp);
+ fprintf(fp, "\n");
+}
+
+static void ctdb_reply_error_print(struct ctdb_reply_error *c, FILE *fp)
+{
+ fprintf(fp, "Data\n");
+ fprintf(fp, " status:%d\n", c->status);
+ if (c->status != 0) {
+ fprintf(fp, " msg:");
+ tdb_data_print(c->msg, fp);
+ fprintf(fp, "\n");
+ }
+}
+
+static void ctdb_req_message_data_print(struct ctdb_req_message_data *c,
+ FILE *fp)
+{
+ fprintf(fp, "Data\n");
+ fprintf(fp, " srvid:");
+ ctdb_srvid_print(c->srvid, fp);
+ fprintf(fp, "\n");
+ fprintf(fp, " data:");
+ tdb_data_print(c->data, fp);
+ fprintf(fp, "\n");
+}
+
+static void ctdb_req_control_print(struct ctdb_req_control *c, FILE *fp)
+{
+ fprintf(fp, "Data\n");
+ fprintf(fp, " ");
+ ctdb_opcode_print(c->opcode, fp);
+ fprintf(fp, " srvid:");
+ ctdb_srvid_print(c->srvid, fp);
+ fprintf(fp, " client_id:0x%"PRIx32" ", c->client_id);
+ ctdb_control_flags_print(c->flags, fp);
+ fprintf(fp, "\n");
+}
+
+static void ctdb_reply_control_print(struct ctdb_reply_control *c, FILE *fp)
+{
+ fprintf(fp, "Data\n");
+ fprintf(fp, " status:%d ", c->status);
+ if (c->errmsg != NULL) {
+ fprintf(fp, "errmsg: %s", c->errmsg);
+ }
+ fprintf(fp, "\n");
+}
+
+static void ctdb_req_keepalive_print(struct ctdb_req_keepalive *c, FILE *fp)
+{
+ fprintf(fp, "Data\n");
+ fprintf(fp, " version:0x%"PRIx32, c->version);
+ fprintf(fp, " uptime:%"PRIu32, c->uptime);
+ fprintf(fp, "\n");
+}
+
+static void ctdb_req_tunnel_print(struct ctdb_req_tunnel *c, FILE *fp)
+{
+ fprintf(fp, "Data\n");
+ fprintf(fp, " tunnel_id:");
+ ctdb_tunnel_id_print(c->tunnel_id, fp);
+ ctdb_tunnel_flags_print(c->flags, fp);
+ tdb_data_print(c->data, fp);
+ fprintf(fp, "\n");
+}
+
+/*
+ * Parse routines
+ */
+
+static void ctdb_req_call_parse(uint8_t *buf, size_t buflen, FILE *fp,
+ TALLOC_CTX *mem_ctx)
+{
+ struct ctdb_req_call c;
+ int ret;
+
+ ret = ctdb_req_call_pull(buf, buflen, NULL, mem_ctx, &c);
+ if (ret != 0) {
+ fprintf(fp, "Failed to parse CTDB_REQ_CALL\n");
+ return;
+ }
+
+ ctdb_req_call_print(&c, fp);
+}
+
+static void ctdb_reply_call_parse(uint8_t *buf, size_t buflen, FILE *fp,
+ TALLOC_CTX *mem_ctx)
+{
+ struct ctdb_reply_call c;
+ int ret;
+
+ ret = ctdb_reply_call_pull(buf, buflen, NULL, mem_ctx, &c);
+ if (ret != 0) {
+ fprintf(fp, "Failed to parse CTDB_REPLY_CALL\n");
+ return;
+ }
+
+ ctdb_reply_call_print(&c, fp);
+}
+
+static void ctdb_req_dmaster_parse(uint8_t *buf, size_t buflen, FILE *fp,
+ TALLOC_CTX *mem_ctx)
+{
+ struct ctdb_req_dmaster c;
+ int ret;
+
+ ret = ctdb_req_dmaster_pull(buf, buflen, NULL, mem_ctx, &c);
+ if (ret != 0) {
+ fprintf(fp, "Failed to parse CTDB_REQ_DMASTER\n");
+ return;
+ }
+
+ ctdb_req_dmaster_print(&c, fp);
+}
+
+static void ctdb_reply_dmaster_parse(uint8_t *buf, size_t buflen, FILE *fp,
+ TALLOC_CTX *mem_ctx)
+{
+ struct ctdb_reply_dmaster c;
+ int ret;
+
+ ret = ctdb_reply_dmaster_pull(buf, buflen, NULL, mem_ctx, &c);
+ if (ret != 0) {
+ fprintf(fp, "Failed to parse CTDB_REPLY_DMASTER\n");
+ return;
+ }
+
+ ctdb_reply_dmaster_print(&c, fp);
+}
+
+static void ctdb_reply_error_parse(uint8_t *buf, size_t buflen, FILE *fp,
+ TALLOC_CTX *mem_ctx)
+{
+ struct ctdb_reply_error c;
+ int ret;
+
+ ret = ctdb_reply_error_pull(buf, buflen, NULL, mem_ctx, &c);
+ if (ret != 0) {
+ fprintf(fp, "Failed to parse CTDB_REPLY_ERROR\n");
+ return;
+ }
+
+ ctdb_reply_error_print(&c, fp);
+}
+
+static void ctdb_req_message_parse(uint8_t *buf, size_t buflen, FILE *fp,
+ TALLOC_CTX *mem_ctx)
+{
+ struct ctdb_req_message_data c;
+ int ret;
+
+ ret = ctdb_req_message_data_pull(buf, buflen, NULL, mem_ctx, &c);
+ if (ret != 0) {
+ fprintf(fp, "Failed to parse CTDB_REQ_MESSAGE\n");
+ return;
+ }
+
+ ctdb_req_message_data_print(&c, fp);
+}
+
+static void ctdb_req_control_parse(uint8_t *buf, size_t buflen, FILE *fp,
+ TALLOC_CTX *mem_ctx)
+{
+ struct ctdb_req_control c;
+ int ret;
+
+ ret = ctdb_req_control_pull(buf, buflen, NULL, mem_ctx, &c);
+ if (ret != 0) {
+ fprintf(fp, "Failed to parse CTDB_REQ_CONTROL\n");
+ return;
+ }
+
+ ctdb_req_control_print(&c, fp);
+}
+
+static void ctdb_reply_control_parse(uint8_t *buf, size_t buflen, FILE *fp,
+ TALLOC_CTX *mem_ctx)
+{
+ struct ctdb_reply_control c;
+ int ret;
+
+ ret = ctdb_reply_control_pull(buf, buflen, -1, NULL, mem_ctx, &c);
+ if (ret != 0) {
+ fprintf(fp, "Failed to parse CTDB_REPLY_CONTROL\n");
+ return;
+ }
+
+ ctdb_reply_control_print(&c, fp);
+}
+
+static void ctdb_req_keepalive_parse(uint8_t *buf, size_t buflen, FILE *fp,
+ TALLOC_CTX *mem_ctx)
+{
+ struct ctdb_req_keepalive c;
+ int ret;
+
+ ret = ctdb_req_keepalive_pull(buf, buflen, NULL, mem_ctx, &c);
+ if (ret != 0) {
+ fprintf(fp, "Failed to parse CTDB_REQ_KEEPALIVE\n");
+ return;
+ }
+
+ ctdb_req_keepalive_print(&c, fp);
+}
+
+static void ctdb_req_tunnel_parse(uint8_t *buf, size_t buflen, FILE *fp,
+ TALLOC_CTX *mem_ctx)
+{
+ struct ctdb_req_tunnel c;
+ int ret;
+
+ ret = ctdb_req_tunnel_pull(buf, buflen, NULL, mem_ctx, &c);
+ if (ret != 0) {
+ fprintf(fp, "Failed to parse CTDB_REQ_TUNNEL\n");
+ return;
+ }
+
+ ctdb_req_tunnel_print(&c, fp);
+}
+
+/*
+ * Packet print
+ */
+
+void ctdb_packet_print(uint8_t *buf, size_t buflen, FILE *fp)
+{
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+ struct ctdb_req_header h;
+ size_t np;
+ int ret;
+
+ fprintf(fp, "Buffer len:%zu\n", buflen);
+
+ ret = ctdb_req_header_pull(buf, buflen, &h, &np);
+ if (ret != 0) {
+ fprintf(fp, "Failed to parse ctdb packet header\n");
+ return;
+ }
+
+ ctdb_req_header_print(&h, fp);
+
+ if (h.length > buflen) {
+ fprintf(fp, "Packet length mismatch\n");
+ }
+
+ ret = ctdb_req_header_verify(&h, 0);
+ if (ret != 0) {
+ fprintf(fp, "Invalid ctdb packet header\n");
+ return;
+ }
+
+ switch (h.operation) {
+ case CTDB_REQ_CALL:
+ ctdb_req_call_parse(buf, buflen, fp, mem_ctx);
+ break;
+
+ case CTDB_REPLY_CALL:
+ ctdb_reply_call_parse(buf, buflen, fp, mem_ctx);
+ break;
+
+ case CTDB_REQ_DMASTER:
+ ctdb_req_dmaster_parse(buf, buflen, fp, mem_ctx);
+ break;
+
+ case CTDB_REPLY_DMASTER:
+ ctdb_reply_dmaster_parse(buf, buflen, fp, mem_ctx);
+ break;
+
+ case CTDB_REPLY_ERROR:
+ ctdb_reply_error_parse(buf, buflen, fp, mem_ctx);
+ break;
+
+ case CTDB_REQ_MESSAGE:
+ ctdb_req_message_parse(buf, buflen, fp, mem_ctx);
+ break;
+
+ case CTDB_REQ_CONTROL:
+ ctdb_req_control_parse(buf, buflen, fp, mem_ctx);
+ break;
+
+ case CTDB_REPLY_CONTROL:
+ ctdb_reply_control_parse(buf, buflen, fp, mem_ctx);
+ break;
+
+ case CTDB_REQ_KEEPALIVE:
+ ctdb_req_keepalive_parse(buf, buflen, fp, mem_ctx);
+ break;
+
+ case CTDB_REQ_TUNNEL:
+ ctdb_req_tunnel_parse(buf, buflen, fp, mem_ctx);
+ break;
+
+ default:
+ fprintf(fp, "Invalid ctdb operation\n");
+ break;
+ }
+
+ talloc_free(mem_ctx);
+}
diff --git a/ctdb/protocol/protocol_header.c b/ctdb/protocol/protocol_header.c
new file mode 100644
index 0000000..a6be7f5
--- /dev/null
+++ b/ctdb/protocol/protocol_header.c
@@ -0,0 +1,169 @@
+/*
+ CTDB protocol marshalling
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <tdb.h>
+
+#include "protocol.h"
+#include "protocol_api.h"
+#include "protocol_private.h"
+
+int ctdb_req_header_verify(struct ctdb_req_header *h, uint32_t operation)
+{
+ if (h->length < sizeof(struct ctdb_req_header)) {
+ return EMSGSIZE;
+ }
+
+ if (h->ctdb_magic != CTDB_MAGIC) {
+ return EPROTO;
+ }
+
+ if (h->ctdb_version != CTDB_PROTOCOL) {
+ return EPROTO;
+ }
+
+ if (operation != 0 && h->operation != operation) {
+ return EPROTO;
+ }
+
+ return 0;
+}
+
+void ctdb_req_header_fill(struct ctdb_req_header *h, uint32_t generation,
+ uint32_t operation, uint32_t destnode,
+ uint32_t srcnode, uint32_t reqid)
+{
+ h->length = sizeof(struct ctdb_req_header);
+ h->ctdb_magic = CTDB_MAGIC;
+ h->ctdb_version = CTDB_PROTOCOL;
+ h->generation = generation;
+ h->operation = operation;
+ h->destnode = destnode;
+ h->srcnode = srcnode;
+ h->reqid = reqid;
+}
+
+size_t ctdb_req_header_len(struct ctdb_req_header *in)
+{
+ return ctdb_uint32_len(&in->length) +
+ ctdb_uint32_len(&in->ctdb_magic) +
+ ctdb_uint32_len(&in->ctdb_version) +
+ ctdb_uint32_len(&in->generation) +
+ ctdb_uint32_len(&in->operation) +
+ ctdb_uint32_len(&in->destnode) +
+ ctdb_uint32_len(&in->srcnode) +
+ ctdb_uint32_len(&in->reqid);
+}
+
+void ctdb_req_header_push(struct ctdb_req_header *in, uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_uint32_push(&in->length, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->ctdb_magic, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->ctdb_version, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->generation, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->operation, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->destnode, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->srcnode, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->reqid, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+int ctdb_req_header_pull(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *out, size_t *npull)
+{
+ size_t offset = 0, np;
+ int ret;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &out->length, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &out->ctdb_magic,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &out->ctdb_version,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &out->generation,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &out->operation,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &out->destnode, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &out->srcnode, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &out->reqid, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ *npull = offset;
+ return 0;
+}
diff --git a/ctdb/protocol/protocol_keepalive.c b/ctdb/protocol/protocol_keepalive.c
new file mode 100644
index 0000000..3a1fc0e
--- /dev/null
+++ b/ctdb/protocol/protocol_keepalive.c
@@ -0,0 +1,95 @@
+/*
+ CTDB protocol marshalling
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <tdb.h>
+
+#include "protocol.h"
+#include "protocol_api.h"
+#include "protocol_private.h"
+
+size_t ctdb_req_keepalive_len(struct ctdb_req_header *h,
+ struct ctdb_req_keepalive *c)
+{
+ return ctdb_req_header_len(h) +
+ ctdb_uint32_len(&c->version) +
+ ctdb_uint32_len(&c->uptime);
+}
+
+int ctdb_req_keepalive_push(struct ctdb_req_header *h,
+ struct ctdb_req_keepalive *c,
+ uint8_t *buf, size_t *buflen)
+{
+ size_t length, offset = 0, np;
+
+ length = ctdb_req_keepalive_len(h, c);
+ if (*buflen < length) {
+ *buflen = length;
+ return EMSGSIZE;
+ }
+
+ h->length = *buflen;
+ ctdb_req_header_push(h, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&c->version, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&c->uptime, buf+offset, &np);
+ offset += np;
+
+ return 0;
+}
+
+int ctdb_req_keepalive_pull(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_req_keepalive *c)
+{
+ struct ctdb_req_header header;
+ size_t offset = 0, np;
+ int ret;
+
+ ret = ctdb_req_header_pull(buf, buflen, &header, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ if (h != NULL) {
+ *h = header;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &c->version, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &c->uptime, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ return 0;
+}
diff --git a/ctdb/protocol/protocol_message.c b/ctdb/protocol/protocol_message.c
new file mode 100644
index 0000000..8d32322
--- /dev/null
+++ b/ctdb/protocol/protocol_message.c
@@ -0,0 +1,485 @@
+/*
+ CTDB protocol marshalling
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <tdb.h>
+
+#include "protocol.h"
+#include "protocol_api.h"
+#include "protocol_private.h"
+
+
+static size_t ctdb_message_data_len(union ctdb_message_data *mdata,
+ uint64_t srvid)
+{
+ size_t len = 0;
+
+ switch (srvid) {
+ case CTDB_SRVID_BANNING:
+ len = ctdb_uint32_len(&mdata->pnn);
+ break;
+
+ case CTDB_SRVID_ELECTION:
+ len = ctdb_election_message_len(mdata->election);
+ break;
+
+ case CTDB_SRVID_LEADER:
+ len = ctdb_uint32_len(&mdata->pnn);
+ break;
+
+ case CTDB_SRVID_RECONFIGURE:
+ break;
+
+ case CTDB_SRVID_RELEASE_IP:
+ len = ctdb_string_len(&mdata->ipaddr);
+ break;
+
+ case CTDB_SRVID_TAKE_IP:
+ len = ctdb_string_len(&mdata->ipaddr);
+ break;
+
+ case CTDB_SRVID_IPREALLOCATED:
+ break;
+
+ case CTDB_SRVID_SET_NODE_FLAGS:
+ len = ctdb_node_flag_change_len(mdata->flag_change);
+ break;
+
+ case CTDB_SRVID_RECD_UPDATE_IP:
+ len = ctdb_public_ip_len(mdata->pubip);
+ break;
+
+ case CTDB_SRVID_VACUUM_FETCH:
+ len = ctdb_rec_buffer_len(mdata->recbuf);
+ break;
+
+ case CTDB_SRVID_DETACH_DATABASE:
+ len = ctdb_uint32_len(&mdata->db_id);
+ break;
+
+ case CTDB_SRVID_MEM_DUMP:
+ len = ctdb_srvid_message_len(mdata->msg);
+ break;
+
+ case CTDB_SRVID_GETLOG:
+ break;
+
+ case CTDB_SRVID_CLEARLOG:
+ break;
+
+ case CTDB_SRVID_PUSH_NODE_FLAGS:
+ len = ctdb_node_flag_change_len(mdata->flag_change);
+ break;
+
+ case CTDB_SRVID_RELOAD_NODES:
+ break;
+
+ case CTDB_SRVID_TAKEOVER_RUN:
+ len = ctdb_srvid_message_len(mdata->msg);
+ break;
+
+ case CTDB_SRVID_REBALANCE_NODE:
+ len = ctdb_uint32_len(&mdata->pnn);
+ break;
+
+ case CTDB_SRVID_DISABLE_TAKEOVER_RUNS:
+ len = ctdb_disable_message_len(mdata->disable);
+ break;
+
+ case CTDB_SRVID_DISABLE_RECOVERIES:
+ len = ctdb_disable_message_len(mdata->disable);
+ break;
+
+ case CTDB_SRVID_DISABLE_IP_CHECK:
+ len = ctdb_uint32_len(&mdata->timeout);
+ break;
+
+ default:
+ len = ctdb_tdb_data_len(&mdata->data);
+ break;
+ }
+
+ return len;
+}
+
+static void ctdb_message_data_push(union ctdb_message_data *mdata,
+ uint64_t srvid, uint8_t *buf,
+ size_t *npush)
+{
+ size_t np = 0;
+
+ switch (srvid) {
+ case CTDB_SRVID_BANNING:
+ ctdb_uint32_push(&mdata->pnn, buf, &np);
+ break;
+
+ case CTDB_SRVID_ELECTION:
+ ctdb_election_message_push(mdata->election, buf, &np);
+ break;
+
+ case CTDB_SRVID_LEADER:
+ ctdb_uint32_push(&mdata->pnn, buf, &np);
+ break;
+
+ case CTDB_SRVID_RECONFIGURE:
+ break;
+
+ case CTDB_SRVID_RELEASE_IP:
+ ctdb_string_push(&mdata->ipaddr, buf, &np);
+ break;
+
+ case CTDB_SRVID_TAKE_IP:
+ ctdb_string_push(&mdata->ipaddr, buf, &np);
+ break;
+
+ case CTDB_SRVID_IPREALLOCATED:
+ break;
+
+ case CTDB_SRVID_SET_NODE_FLAGS:
+ ctdb_node_flag_change_push(mdata->flag_change, buf, &np);
+ break;
+
+ case CTDB_SRVID_RECD_UPDATE_IP:
+ ctdb_public_ip_push(mdata->pubip, buf, &np);
+ break;
+
+ case CTDB_SRVID_VACUUM_FETCH:
+ ctdb_rec_buffer_push(mdata->recbuf, buf, &np);
+ break;
+
+ case CTDB_SRVID_DETACH_DATABASE:
+ ctdb_uint32_push(&mdata->db_id, buf, &np);
+ break;
+
+ case CTDB_SRVID_MEM_DUMP:
+ ctdb_srvid_message_push(mdata->msg, buf, &np);
+ break;
+
+ case CTDB_SRVID_GETLOG:
+ break;
+
+ case CTDB_SRVID_CLEARLOG:
+ break;
+
+ case CTDB_SRVID_PUSH_NODE_FLAGS:
+ ctdb_node_flag_change_push(mdata->flag_change, buf, &np);
+ break;
+
+ case CTDB_SRVID_RELOAD_NODES:
+ break;
+
+ case CTDB_SRVID_TAKEOVER_RUN:
+ ctdb_srvid_message_push(mdata->msg, buf, &np);
+ break;
+
+ case CTDB_SRVID_REBALANCE_NODE:
+ ctdb_uint32_push(&mdata->pnn, buf, &np);
+ break;
+
+ case CTDB_SRVID_DISABLE_TAKEOVER_RUNS:
+ ctdb_disable_message_push(mdata->disable, buf, &np);
+ break;
+
+ case CTDB_SRVID_DISABLE_RECOVERIES:
+ ctdb_disable_message_push(mdata->disable, buf, &np);
+ break;
+
+ case CTDB_SRVID_DISABLE_IP_CHECK:
+ ctdb_uint32_push(&mdata->timeout, buf, &np);
+ break;
+
+ default:
+ ctdb_tdb_data_push(&mdata->data, buf, &np);
+ break;
+ }
+
+ *npush = np;
+}
+
+static int ctdb_message_data_pull(uint8_t *buf, size_t buflen,
+ uint64_t srvid, TALLOC_CTX *mem_ctx,
+ union ctdb_message_data *mdata,
+ size_t *npull)
+{
+ int ret = 0;
+ size_t np = 0;
+
+ switch (srvid) {
+ case CTDB_SRVID_BANNING:
+ ret = ctdb_uint32_pull(buf, buflen, &mdata->pnn, &np);
+ break;
+
+ case CTDB_SRVID_ELECTION:
+ ret = ctdb_election_message_pull(buf, buflen, mem_ctx,
+ &mdata->election, &np);
+ break;
+
+ case CTDB_SRVID_LEADER:
+ ret = ctdb_uint32_pull(buf, buflen, &mdata->pnn, &np);
+ break;
+
+ case CTDB_SRVID_RECONFIGURE:
+ break;
+
+ case CTDB_SRVID_RELEASE_IP:
+ ret = ctdb_string_pull(buf, buflen, mem_ctx, &mdata->ipaddr,
+ &np);
+ break;
+
+ case CTDB_SRVID_TAKE_IP:
+ ret = ctdb_string_pull(buf, buflen, mem_ctx, &mdata->ipaddr,
+ &np);
+ break;
+
+ case CTDB_SRVID_IPREALLOCATED:
+ break;
+
+ case CTDB_SRVID_SET_NODE_FLAGS:
+ ret = ctdb_node_flag_change_pull(buf, buflen, mem_ctx,
+ &mdata->flag_change, &np);
+ break;
+
+ case CTDB_SRVID_RECD_UPDATE_IP:
+ ret = ctdb_public_ip_pull(buf, buflen, mem_ctx,
+ &mdata->pubip, &np);
+ break;
+
+ case CTDB_SRVID_VACUUM_FETCH:
+ ret = ctdb_rec_buffer_pull(buf, buflen, mem_ctx,
+ &mdata->recbuf, &np);
+ break;
+
+ case CTDB_SRVID_DETACH_DATABASE:
+ ret = ctdb_uint32_pull(buf, buflen, &mdata->db_id, &np);
+ break;
+
+ case CTDB_SRVID_MEM_DUMP:
+ ret = ctdb_srvid_message_pull(buf, buflen, mem_ctx,
+ &mdata->msg, &np);
+ break;
+
+ case CTDB_SRVID_GETLOG:
+ break;
+
+ case CTDB_SRVID_CLEARLOG:
+ break;
+
+ case CTDB_SRVID_PUSH_NODE_FLAGS:
+ ret = ctdb_node_flag_change_pull(buf, buflen, mem_ctx,
+ &mdata->flag_change, &np);
+ break;
+
+ case CTDB_SRVID_RELOAD_NODES:
+ break;
+
+ case CTDB_SRVID_TAKEOVER_RUN:
+ ret = ctdb_srvid_message_pull(buf, buflen, mem_ctx,
+ &mdata->msg, &np);
+ break;
+
+ case CTDB_SRVID_REBALANCE_NODE:
+ ret = ctdb_uint32_pull(buf, buflen, &mdata->pnn, &np);
+ break;
+
+ case CTDB_SRVID_DISABLE_TAKEOVER_RUNS:
+ ret = ctdb_disable_message_pull(buf, buflen, mem_ctx,
+ &mdata->disable, &np);
+ break;
+
+ case CTDB_SRVID_DISABLE_RECOVERIES:
+ ret = ctdb_disable_message_pull(buf, buflen, mem_ctx,
+ &mdata->disable, &np);
+ break;
+
+ case CTDB_SRVID_DISABLE_IP_CHECK:
+ ret = ctdb_uint32_pull(buf, buflen, &mdata->timeout, &np);
+ break;
+
+ default:
+ ret = ctdb_tdb_data_pull(buf, buflen, mem_ctx, &mdata->data,
+ &np);
+ break;
+ }
+
+ if (ret != 0) {
+ return ret;
+ }
+
+ *npull = np;
+ return 0;
+}
+
+size_t ctdb_req_message_len(struct ctdb_req_header *h,
+ struct ctdb_req_message *c)
+{
+ uint32_t u32 = ctdb_message_data_len(&c->data, c->srvid);
+
+ return ctdb_req_header_len(h) +
+ ctdb_uint64_len(&c->srvid) +
+ ctdb_uint32_len(&u32) + u32;
+}
+
+int ctdb_req_message_push(struct ctdb_req_header *h,
+ struct ctdb_req_message *c,
+ uint8_t *buf, size_t *buflen)
+{
+ size_t offset = 0, np;
+ size_t length;
+ uint32_t u32;
+
+ length = ctdb_req_message_len(h, c);
+ if (*buflen < length) {
+ *buflen = length;
+ return EMSGSIZE;
+ }
+
+ h->length = *buflen;
+ ctdb_req_header_push(h, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint64_push(&c->srvid, buf+offset, &np);
+ offset += np;
+
+ u32 = ctdb_message_data_len(&c->data, c->srvid);
+ ctdb_uint32_push(&u32, buf+offset, &np);
+ offset += np;
+
+ ctdb_message_data_push(&c->data, c->srvid, buf+offset, &np);
+ offset += np;
+
+ return 0;
+}
+
+int ctdb_req_message_pull(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_req_message *c)
+{
+ struct ctdb_req_header header;
+ size_t offset = 0, np;
+ uint32_t u32;
+ int ret;
+
+ ret = ctdb_req_header_pull(buf+offset, buflen-offset, &header, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ if (h != NULL) {
+ *h = header;
+ }
+
+ ret = ctdb_uint64_pull(buf+offset, buflen-offset, &c->srvid, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &u32, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ if (buflen-offset < u32) {
+ return EMSGSIZE;
+ }
+
+ ret = ctdb_message_data_pull(buf+offset, u32, c->srvid,
+ mem_ctx, &c->data, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ return ret;
+}
+
+size_t ctdb_req_message_data_len(struct ctdb_req_header *h,
+ struct ctdb_req_message_data *c)
+{
+ return ctdb_req_header_len(h) +
+ ctdb_uint64_len(&c->srvid) +
+ ctdb_tdb_datan_len(&c->data);
+}
+
+int ctdb_req_message_data_push(struct ctdb_req_header *h,
+ struct ctdb_req_message_data *c,
+ uint8_t *buf, size_t *buflen)
+{
+ size_t offset = 0, np;
+ size_t length;
+
+ length = ctdb_req_message_data_len(h, c);
+ if (*buflen < length) {
+ *buflen = length;
+ return EMSGSIZE;
+ }
+
+ h->length = *buflen;
+ ctdb_req_header_push(h, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint64_push(&c->srvid, buf+offset, &np);
+ offset += np;
+
+ ctdb_tdb_datan_push(&c->data, buf+offset, &np);
+ offset += np;
+
+ return 0;
+}
+
+int ctdb_req_message_data_pull(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_req_message_data *c)
+{
+ struct ctdb_req_header header;
+ size_t offset = 0, np;
+ int ret;
+
+ ret = ctdb_req_header_pull(buf+offset, buflen-offset, &header, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ if (h != NULL) {
+ *h = header;
+ }
+
+ ret = ctdb_uint64_pull(buf+offset, buflen-offset, &c->srvid, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_tdb_datan_pull(buf+offset, buflen-offset,
+ mem_ctx, &c->data, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ return 0;
+}
diff --git a/ctdb/protocol/protocol_packet.c b/ctdb/protocol/protocol_packet.c
new file mode 100644
index 0000000..d5fd96b
--- /dev/null
+++ b/ctdb/protocol/protocol_packet.c
@@ -0,0 +1,48 @@
+/*
+ CTDB protocol marshalling
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <tdb.h>
+
+#include "protocol_api.h"
+
+#define CTDB_DS_ALIGNMENT 8
+
+int ctdb_allocate_pkt(TALLOC_CTX *mem_ctx, size_t length,
+ uint8_t **buf, size_t *buflen)
+{
+ size_t new_length;
+
+ if (buf == NULL || buflen == NULL) {
+ return EINVAL;
+ }
+
+ new_length = (length + CTDB_DS_ALIGNMENT-1) & ~(CTDB_DS_ALIGNMENT-1);
+
+ *buflen = new_length;
+ *buf = talloc_zero_size(mem_ctx, new_length);
+ if (*buf == NULL) {
+ return ENOMEM;
+ }
+
+ return 0;
+}
diff --git a/ctdb/protocol/protocol_private.h b/ctdb/protocol/protocol_private.h
new file mode 100644
index 0000000..cbbba39
--- /dev/null
+++ b/ctdb/protocol/protocol_private.h
@@ -0,0 +1,300 @@
+/*
+ CTDB protocol marshalling
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __PROTOCOL_PRIVATE_H__
+#define __PROTOCOL_PRIVATE_H__
+
+#include "protocol.h"
+#include "protocol_basic.h"
+
+/*
+ * From protocol/protocol_types.c
+ */
+
+size_t ctdb_tdb_data_len(TDB_DATA *in);
+void ctdb_tdb_data_push(TDB_DATA *in, uint8_t *buf, size_t *npush);
+int ctdb_tdb_data_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ TDB_DATA *out, size_t *npull);
+
+size_t ctdb_tdb_datan_len(TDB_DATA *in);
+void ctdb_tdb_datan_push(TDB_DATA *in, uint8_t *buf, size_t *npush);
+int ctdb_tdb_datan_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ TDB_DATA *out, size_t *npull);
+
+size_t ctdb_latency_counter_len(struct ctdb_latency_counter *in);
+void ctdb_latency_counter_push(struct ctdb_latency_counter *in, uint8_t *buf,
+ size_t *npush);
+int ctdb_latency_counter_pull(uint8_t *buf, size_t buflen,
+ struct ctdb_latency_counter *out, size_t *npull);
+
+size_t ctdb_statistics_len(struct ctdb_statistics *in);
+void ctdb_statistics_push(struct ctdb_statistics *in, uint8_t *buf,
+ size_t *npush);
+int ctdb_statistics_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_statistics **out, size_t *npull);
+
+size_t ctdb_statistics_list_len(struct ctdb_statistics_list *in);
+void ctdb_statistics_list_push(struct ctdb_statistics_list *in,
+ uint8_t *buf, size_t *npull);
+int ctdb_statistics_list_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_statistics_list **out,
+ size_t *npull);
+
+size_t ctdb_vnn_map_len(struct ctdb_vnn_map *in);
+void ctdb_vnn_map_push(struct ctdb_vnn_map *in, uint8_t *buf, size_t *npush);
+int ctdb_vnn_map_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_vnn_map **out, size_t *npull);
+
+size_t ctdb_dbid_len(struct ctdb_dbid *in);
+void ctdb_dbid_push(struct ctdb_dbid *in, uint8_t *buf, size_t *npush);
+int ctdb_dbid_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_dbid **out, size_t *npull);
+
+size_t ctdb_dbid_map_len(struct ctdb_dbid_map *in);
+void ctdb_dbid_map_push(struct ctdb_dbid_map *in, uint8_t *buf,
+ size_t *npush);
+int ctdb_dbid_map_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_dbid_map **out, size_t *npull);
+
+size_t ctdb_pulldb_len(struct ctdb_pulldb *in);
+void ctdb_pulldb_push(struct ctdb_pulldb *in, uint8_t *buf, size_t *npush);
+int ctdb_pulldb_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_pulldb **out, size_t *npull);
+
+size_t ctdb_pulldb_ext_len(struct ctdb_pulldb_ext *in);
+void ctdb_pulldb_ext_push(struct ctdb_pulldb_ext *in, uint8_t *buf,
+ size_t *npush);
+int ctdb_pulldb_ext_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_pulldb_ext **out, size_t *npull);
+
+size_t ctdb_db_vacuum_len(struct ctdb_db_vacuum *in);
+void ctdb_db_vacuum_push(struct ctdb_db_vacuum *in,
+ uint8_t *buf,
+ size_t *npush);
+int ctdb_db_vacuum_pull(uint8_t *buf,
+ size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_db_vacuum **out,
+ size_t *npull);
+
+size_t ctdb_echo_data_len(struct ctdb_echo_data *in);
+void ctdb_echo_data_push(struct ctdb_echo_data *in,
+ uint8_t *buf,
+ size_t *npush);
+int ctdb_echo_data_pull(uint8_t *buf,
+ size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_echo_data **out,
+ size_t *npull);
+
+size_t ctdb_traverse_start_len(struct ctdb_traverse_start *in);
+void ctdb_traverse_start_push(struct ctdb_traverse_start *in, uint8_t *buf,
+ size_t *npush);
+int ctdb_traverse_start_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_traverse_start **out, size_t *npull);
+
+size_t ctdb_traverse_all_len(struct ctdb_traverse_all *in);
+void ctdb_traverse_all_push(struct ctdb_traverse_all *in, uint8_t *buf,
+ size_t *npush);
+int ctdb_traverse_all_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_traverse_all **out, size_t *npull);
+
+size_t ctdb_traverse_start_ext_len(struct ctdb_traverse_start_ext *in);
+void ctdb_traverse_start_ext_push(struct ctdb_traverse_start_ext *in,
+ uint8_t *buf, size_t *npush);
+int ctdb_traverse_start_ext_pull(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_traverse_start_ext **out,
+ size_t *npull);
+
+size_t ctdb_traverse_all_ext_len(struct ctdb_traverse_all_ext *in);
+void ctdb_traverse_all_ext_push(struct ctdb_traverse_all_ext *in,
+ uint8_t *buf, size_t *npush);
+int ctdb_traverse_all_ext_pull(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_traverse_all_ext **out,
+ size_t *npull);
+
+size_t ctdb_sock_addr_len(ctdb_sock_addr *in);
+void ctdb_sock_addr_push(ctdb_sock_addr *in, uint8_t *buf, size_t *npush);
+int ctdb_sock_addr_pull_elems(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx, ctdb_sock_addr *out,
+ size_t *npull);
+int ctdb_sock_addr_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ ctdb_sock_addr **out, size_t *npull);
+
+size_t ctdb_connection_len(struct ctdb_connection *in);
+void ctdb_connection_push(struct ctdb_connection *in, uint8_t *buf,
+ size_t *npush);
+int ctdb_connection_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_connection **out, size_t *npull);
+
+size_t ctdb_connection_list_len(struct ctdb_connection_list *in);
+void ctdb_connection_list_push(struct ctdb_connection_list *in, uint8_t *buf,
+ size_t *npush);
+int ctdb_connection_list_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_connection_list **out, size_t *npull);
+
+size_t ctdb_tunable_len(struct ctdb_tunable *in);
+void ctdb_tunable_push(struct ctdb_tunable *in, uint8_t *buf, size_t *npush);
+int ctdb_tunable_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_tunable **out, size_t *npull);
+
+size_t ctdb_node_flag_change_len(struct ctdb_node_flag_change *in);
+void ctdb_node_flag_change_push(struct ctdb_node_flag_change *in,
+ uint8_t *buf, size_t *npush);
+int ctdb_node_flag_change_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_node_flag_change **out,
+ size_t *npull);
+
+size_t ctdb_var_list_len(struct ctdb_var_list *in);
+void ctdb_var_list_push(struct ctdb_var_list *in, uint8_t *buf, size_t *npush);
+int ctdb_var_list_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_var_list **out, size_t *npull);
+
+size_t ctdb_tunable_list_len(struct ctdb_tunable_list *in);
+void ctdb_tunable_list_push(struct ctdb_tunable_list *in, uint8_t *buf,
+ size_t *npush);
+int ctdb_tunable_list_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_tunable_list **out, size_t *npull);
+
+size_t ctdb_tickle_list_len(struct ctdb_tickle_list *in);
+void ctdb_tickle_list_push(struct ctdb_tickle_list *in, uint8_t *buf,
+ size_t *npush);
+int ctdb_tickle_list_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_tickle_list **out, size_t *npull);
+
+size_t ctdb_addr_info_len(struct ctdb_addr_info *in);
+void ctdb_addr_info_push(struct ctdb_addr_info *in, uint8_t *buf,
+ size_t *npush);
+int ctdb_addr_info_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_addr_info **out, size_t *npull);
+
+size_t ctdb_transdb_len(struct ctdb_transdb *in);
+void ctdb_transdb_push(struct ctdb_transdb *in, uint8_t *buf, size_t *npush);
+int ctdb_transdb_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_transdb **out, size_t *npull);
+
+size_t ctdb_uptime_len(struct ctdb_uptime *in);
+void ctdb_uptime_push(struct ctdb_uptime *in, uint8_t *buf, size_t *npush);
+int ctdb_uptime_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_uptime **out, size_t *npull);
+
+size_t ctdb_public_ip_len(struct ctdb_public_ip *in);
+void ctdb_public_ip_push(struct ctdb_public_ip *in, uint8_t *buf,
+ size_t *npush);
+int ctdb_public_ip_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_public_ip **out, size_t *npull);
+
+size_t ctdb_public_ip_list_len(struct ctdb_public_ip_list *in);
+void ctdb_public_ip_list_push(struct ctdb_public_ip_list *in, uint8_t *buf,
+ size_t *npush);
+int ctdb_public_ip_list_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_public_ip_list **out, size_t *npull);
+
+size_t ctdb_node_and_flags_len(struct ctdb_node_and_flags *in);
+void ctdb_node_and_flags_push(struct ctdb_node_and_flags *in, uint8_t *buf,
+ size_t *npush);
+int ctdb_node_and_flags_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_node_and_flags **out, size_t *npull);
+
+size_t ctdb_node_map_len(struct ctdb_node_map *in);
+void ctdb_node_map_push(struct ctdb_node_map *in, uint8_t *buf, size_t *npush);
+int ctdb_node_map_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_node_map **out, size_t *npull);
+
+size_t ctdb_script_len(struct ctdb_script *in);
+void ctdb_script_push(struct ctdb_script *in, uint8_t *buf, size_t *npush);
+int ctdb_script_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_script **out, size_t *npull);
+
+size_t ctdb_script_list_len(struct ctdb_script_list *in);
+void ctdb_script_list_push(struct ctdb_script_list *in, uint8_t *buf,
+ size_t *npush);
+int ctdb_script_list_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_script_list **out, size_t *npull);
+
+size_t ctdb_ban_state_len(struct ctdb_ban_state *in);
+void ctdb_ban_state_push(struct ctdb_ban_state *in, uint8_t *buf,
+ size_t *npush);
+int ctdb_ban_state_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_ban_state **out, size_t *npull);
+
+size_t ctdb_notify_data_len(struct ctdb_notify_data *in);
+void ctdb_notify_data_push(struct ctdb_notify_data *in, uint8_t *buf,
+ size_t *npush);
+int ctdb_notify_data_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_notify_data **out, size_t *npull);
+
+size_t ctdb_iface_len(struct ctdb_iface *in);
+void ctdb_iface_push(struct ctdb_iface *in, uint8_t *buf, size_t *npush);
+int ctdb_iface_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_iface **out, size_t *npull);
+
+size_t ctdb_iface_list_len(struct ctdb_iface_list *in);
+void ctdb_iface_list_push(struct ctdb_iface_list *in, uint8_t *buf,
+ size_t *npush);
+int ctdb_iface_list_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_iface_list **out, size_t *npull);
+
+size_t ctdb_public_ip_info_len(struct ctdb_public_ip_info *in);
+void ctdb_public_ip_info_push(struct ctdb_public_ip_info *in, uint8_t *buf,
+ size_t *npush);
+int ctdb_public_ip_info_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_public_ip_info **out, size_t *npull);
+
+size_t ctdb_key_data_len(struct ctdb_key_data *in);
+void ctdb_key_data_push(struct ctdb_key_data *in, uint8_t *buf, size_t *npush);
+int ctdb_key_data_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_key_data **out, size_t *npull);
+
+size_t ctdb_db_statistics_len(struct ctdb_db_statistics *in);
+void ctdb_db_statistics_push(struct ctdb_db_statistics *in, uint8_t *buf,
+ size_t *npush);
+int ctdb_db_statistics_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_db_statistics **out, size_t *npull);
+
+size_t ctdb_pid_srvid_len(struct ctdb_pid_srvid *in);
+void ctdb_pid_srvid_push(struct ctdb_pid_srvid *in, uint8_t *buf,
+ size_t *npush);
+int ctdb_pid_srvid_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_pid_srvid **out, size_t *npull);
+
+size_t ctdb_election_message_len(struct ctdb_election_message *in);
+void ctdb_election_message_push(struct ctdb_election_message *in,
+ uint8_t *buf, size_t *npush);
+int ctdb_election_message_pull(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_election_message **out,
+ size_t *npull);
+
+size_t ctdb_srvid_message_len(struct ctdb_srvid_message *in);
+void ctdb_srvid_message_push(struct ctdb_srvid_message *in, uint8_t *buf,
+ size_t *npush);
+int ctdb_srvid_message_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_srvid_message **out, size_t *npull);
+
+size_t ctdb_disable_message_len(struct ctdb_disable_message *in);
+void ctdb_disable_message_push(struct ctdb_disable_message *in, uint8_t *buf,
+ size_t *npush);
+int ctdb_disable_message_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_disable_message **out,
+ size_t *npull);
+
+#endif /* __PROTOCOL_PRIVATE_H__ */
diff --git a/ctdb/protocol/protocol_sock.c b/ctdb/protocol/protocol_sock.c
new file mode 100644
index 0000000..e32f087
--- /dev/null
+++ b/ctdb/protocol/protocol_sock.c
@@ -0,0 +1,81 @@
+/*
+ CTDB generic sock packet marshalling
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <talloc.h>
+
+#include "protocol.h"
+#include "protocol_private.h"
+#include "protocol_api.h"
+
+size_t sock_packet_header_len(struct sock_packet_header *in)
+{
+ return ctdb_uint32_len(&in->length) +
+ ctdb_uint32_len(&in->reqid);
+}
+
+void sock_packet_header_push(struct sock_packet_header *in, uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_uint32_push(&in->length, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->reqid, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+int sock_packet_header_pull(uint8_t *buf, size_t buflen,
+ struct sock_packet_header *out, size_t *npull)
+{
+ size_t offset = 0, np;
+ int ret;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &out->length, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &out->reqid, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ *npull = offset;
+ return 0;
+}
+
+void sock_packet_header_set_reqid(struct sock_packet_header *h,
+ uint32_t reqid)
+{
+ h->reqid = reqid;
+}
+
+void sock_packet_header_set_length(struct sock_packet_header *h,
+ uint32_t length)
+{
+ h->length = length;
+}
diff --git a/ctdb/protocol/protocol_tunnel.c b/ctdb/protocol/protocol_tunnel.c
new file mode 100644
index 0000000..d31d9d5
--- /dev/null
+++ b/ctdb/protocol/protocol_tunnel.c
@@ -0,0 +1,114 @@
+/*
+ CTDB protocol marshalling
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <tdb.h>
+
+#include "protocol.h"
+#include "protocol_api.h"
+#include "protocol_private.h"
+
+size_t ctdb_req_tunnel_len(struct ctdb_req_header *h,
+ struct ctdb_req_tunnel *c)
+{
+ return ctdb_req_header_len(h) +
+ ctdb_uint64_len(&c->tunnel_id) +
+ ctdb_uint32_len(&c->flags) +
+ ctdb_tdb_datan_len(&c->data);
+}
+
+int ctdb_req_tunnel_push(struct ctdb_req_header *h,
+ struct ctdb_req_tunnel *c,
+ uint8_t *buf, size_t *buflen)
+{
+ size_t length, offset = 0, np;
+
+ length = ctdb_req_tunnel_len(h, c);
+ if (*buflen < length) {
+ *buflen = length;
+ return EMSGSIZE;
+ }
+
+ h->length = *buflen;
+ ctdb_req_header_push(h, buf, &np);
+ offset += np;
+
+ ctdb_uint64_push(&c->tunnel_id, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&c->flags, buf+offset, &np);
+ offset += np;
+
+ ctdb_tdb_datan_push(&c->data, buf+offset, &np);
+ offset += np;
+
+ if (offset > *buflen) {
+ return EMSGSIZE;
+ }
+
+ return 0;
+}
+
+int ctdb_req_tunnel_pull(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_req_tunnel *c)
+{
+ struct ctdb_req_header header;
+ size_t offset = 0, np;
+ int ret;
+
+ ret = ctdb_req_header_pull(buf, buflen, &header, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ if (h != NULL) {
+ *h = header;
+ }
+
+ ret = ctdb_uint64_pull(buf+offset, buflen-offset, &c->tunnel_id, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &c->flags, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_tdb_datan_pull(buf+offset, buflen-offset, mem_ctx,
+ &c->data, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ if (offset > buflen) {
+ return EMSGSIZE;
+ }
+
+ return 0;
+}
diff --git a/ctdb/protocol/protocol_types.c b/ctdb/protocol/protocol_types.c
new file mode 100644
index 0000000..0eb1923
--- /dev/null
+++ b/ctdb/protocol/protocol_types.c
@@ -0,0 +1,5348 @@
+/*
+ CTDB protocol marshalling
+
+ Copyright (C) Amitay Isaacs 2015-2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <tdb.h>
+
+#include "protocol.h"
+#include "protocol_private.h"
+#include "protocol_api.h"
+
+size_t ctdb_tdb_data_len(TDB_DATA *in)
+{
+ return in->dsize > UINT32_MAX ? UINT32_MAX : in->dsize;
+}
+
+void ctdb_tdb_data_push(TDB_DATA *in, uint8_t *buf, size_t *npush)
+{
+ size_t len = ctdb_tdb_data_len(in);
+
+ if (len > 0) {
+ memcpy(buf, in->dptr, len);
+ }
+
+ *npush = len;
+}
+
+int ctdb_tdb_data_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ TDB_DATA *out, size_t *npull)
+{
+ TDB_DATA val;
+
+ if (buflen > UINT32_MAX) {
+ return EMSGSIZE;
+ }
+
+ val.dsize = buflen;
+ if (val.dsize > 0) {
+ val.dptr = talloc_memdup(mem_ctx, buf, buflen);
+ if (val.dptr == NULL) {
+ return ENOMEM;
+ }
+ } else {
+ val.dptr = NULL;
+ }
+
+ *out = val;
+ *npull = buflen;
+ return 0;
+}
+
+size_t ctdb_tdb_datan_len(TDB_DATA *in)
+{
+ uint32_t u32 = ctdb_tdb_data_len(in);
+
+ return ctdb_uint32_len(&u32) + u32;
+}
+
+void ctdb_tdb_datan_push(TDB_DATA *in, uint8_t *buf, size_t *npush)
+{
+ size_t offset = 0, np;
+ uint32_t u32 = ctdb_tdb_data_len(in);
+
+ ctdb_uint32_push(&u32, buf+offset, &np);
+ offset += np;
+
+ ctdb_tdb_data_push(in, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+int ctdb_tdb_datan_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ TDB_DATA *out, size_t *npull)
+{
+ size_t offset = 0, np;
+ uint32_t u32;
+ int ret;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &u32, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ if (buflen-offset < u32) {
+ return EMSGSIZE;
+ }
+
+ ret = ctdb_tdb_data_pull(buf+offset, u32, mem_ctx, out, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ *npull = offset;
+ return 0;
+}
+
+size_t ctdb_latency_counter_len(struct ctdb_latency_counter *in)
+{
+ return ctdb_int32_len(&in->num) +
+ ctdb_padding_len(4) +
+ ctdb_double_len(&in->min) +
+ ctdb_double_len(&in->max) +
+ ctdb_double_len(&in->total);
+}
+
+void ctdb_latency_counter_push(struct ctdb_latency_counter *in, uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_int32_push(&in->num, buf+offset, &np);
+ offset += np;
+
+ ctdb_padding_push(4, buf+offset, &np);
+ offset += np;
+
+ ctdb_double_push(&in->min, buf+offset, &np);
+ offset += np;
+
+ ctdb_double_push(&in->max, buf+offset, &np);
+ offset += np;
+
+ ctdb_double_push(&in->total, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+int ctdb_latency_counter_pull(uint8_t *buf, size_t buflen,
+ struct ctdb_latency_counter *out, size_t *npull)
+{
+ size_t offset = 0, np;
+ int ret;
+
+ ret = ctdb_int32_pull(buf+offset, buflen-offset, &out->num, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_padding_pull(buf+offset, buflen-offset, 4, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_double_pull(buf+offset, buflen-offset, &out->min, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_double_pull(buf+offset, buflen-offset, &out->max, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_double_pull(buf+offset, buflen-offset, &out->total, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ *npull = offset;
+ return 0;
+}
+
+size_t ctdb_statistics_len(struct ctdb_statistics *in)
+{
+ return ctdb_uint32_len(&in->num_clients) +
+ ctdb_uint32_len(&in->frozen) +
+ ctdb_uint32_len(&in->recovering) +
+ ctdb_uint32_len(&in->client_packets_sent) +
+ ctdb_uint32_len(&in->client_packets_recv) +
+ ctdb_uint32_len(&in->node_packets_sent) +
+ ctdb_uint32_len(&in->node_packets_recv) +
+ ctdb_uint32_len(&in->keepalive_packets_sent) +
+ ctdb_uint32_len(&in->keepalive_packets_recv) +
+ ctdb_uint32_len(&in->node.req_call) +
+ ctdb_uint32_len(&in->node.reply_call) +
+ ctdb_uint32_len(&in->node.req_dmaster) +
+ ctdb_uint32_len(&in->node.reply_dmaster) +
+ ctdb_uint32_len(&in->node.reply_error) +
+ ctdb_uint32_len(&in->node.req_message) +
+ ctdb_uint32_len(&in->node.req_control) +
+ ctdb_uint32_len(&in->node.reply_control) +
+ ctdb_uint32_len(&in->node.req_tunnel) +
+ ctdb_uint32_len(&in->client.req_call) +
+ ctdb_uint32_len(&in->client.req_message) +
+ ctdb_uint32_len(&in->client.req_control) +
+ ctdb_uint32_len(&in->client.req_tunnel) +
+ ctdb_uint32_len(&in->timeouts.call) +
+ ctdb_uint32_len(&in->timeouts.control) +
+ ctdb_uint32_len(&in->timeouts.traverse) +
+ ctdb_padding_len(4) +
+ ctdb_latency_counter_len(&in->reclock.ctdbd) +
+ ctdb_latency_counter_len(&in->reclock.recd) +
+ ctdb_uint32_len(&in->locks.num_calls) +
+ ctdb_uint32_len(&in->locks.num_current) +
+ ctdb_uint32_len(&in->locks.num_pending) +
+ ctdb_uint32_len(&in->locks.num_failed) +
+ ctdb_latency_counter_len(&in->locks.latency) +
+ MAX_COUNT_BUCKETS * ctdb_uint32_len(&in->locks.buckets[0]) +
+ ctdb_uint32_len(&in->total_calls) +
+ ctdb_uint32_len(&in->pending_calls) +
+ ctdb_uint32_len(&in->childwrite_calls) +
+ ctdb_uint32_len(&in->pending_childwrite_calls) +
+ ctdb_uint32_len(&in->memory_used) +
+ ctdb_uint32_len(&in->__last_counter) +
+ ctdb_uint32_len(&in->max_hop_count) +
+ MAX_COUNT_BUCKETS *
+ ctdb_uint32_len(&in->hop_count_bucket[0]) +
+ ctdb_padding_len(4) +
+ ctdb_latency_counter_len(&in->call_latency) +
+ ctdb_latency_counter_len(&in->childwrite_latency) +
+ ctdb_uint32_len(&in->num_recoveries) +
+ ctdb_padding_len(4) +
+ ctdb_timeval_len(&in->statistics_start_time) +
+ ctdb_timeval_len(&in->statistics_current_time) +
+ ctdb_uint32_len(&in->total_ro_delegations) +
+ ctdb_uint32_len(&in->total_ro_revokes);
+}
+
+void ctdb_statistics_push(struct ctdb_statistics *in, uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+ int i;
+
+ ctdb_uint32_push(&in->num_clients, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->frozen, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->recovering, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->client_packets_sent, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->client_packets_recv, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->node_packets_sent, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->node_packets_recv, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->keepalive_packets_sent, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->keepalive_packets_recv, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->node.req_call, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->node.reply_call, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->node.req_dmaster, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->node.reply_dmaster, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->node.reply_error, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->node.req_message, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->node.req_control, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->node.reply_control, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->node.req_tunnel, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->client.req_call, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->client.req_message, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->client.req_control, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->client.req_tunnel, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->timeouts.call, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->timeouts.control, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->timeouts.traverse, buf+offset, &np);
+ offset += np;
+
+ ctdb_padding_push(4, buf+offset, &np);
+ offset += np;
+
+ ctdb_latency_counter_push(&in->reclock.ctdbd, buf+offset, &np);
+ offset += np;
+
+ ctdb_latency_counter_push(&in->reclock.recd, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->locks.num_calls, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->locks.num_current, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->locks.num_pending, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->locks.num_failed, buf+offset, &np);
+ offset += np;
+
+ ctdb_latency_counter_push(&in->locks.latency, buf+offset, &np);
+ offset += np;
+
+ for (i=0; i<MAX_COUNT_BUCKETS; i++) {
+ ctdb_uint32_push(&in->locks.buckets[i], buf+offset, &np);
+ offset += np;
+ }
+
+ ctdb_uint32_push(&in->total_calls, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->pending_calls, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->childwrite_calls, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->pending_childwrite_calls, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->memory_used, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->__last_counter, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->max_hop_count, buf+offset, &np);
+ offset += np;
+
+ for (i=0; i<MAX_COUNT_BUCKETS; i++) {
+ ctdb_uint32_push(&in->hop_count_bucket[i], buf+offset, &np);
+ offset += np;
+ }
+
+ ctdb_padding_push(4, buf+offset, &np);
+ offset += np;
+
+ ctdb_latency_counter_push(&in->call_latency, buf+offset, &np);
+ offset += np;
+
+ ctdb_latency_counter_push(&in->childwrite_latency, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->num_recoveries, buf+offset, &np);
+ offset += np;
+
+ ctdb_padding_push(4, buf+offset, &np);
+ offset += np;
+
+ ctdb_timeval_push(&in->statistics_start_time, buf+offset, &np);
+ offset += np;
+
+ ctdb_timeval_push(&in->statistics_current_time, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->total_ro_delegations, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->total_ro_revokes, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+static int ctdb_statistics_pull_elems(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_statistics *out,
+ size_t *npull)
+{
+ size_t offset = 0, np;
+ int ret, i;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &out->num_clients,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &out->frozen, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &out->recovering,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->client_packets_sent, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->client_packets_recv, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->node_packets_sent, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->node_packets_recv, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->keepalive_packets_sent, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->keepalive_packets_recv, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->node.req_call, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->node.reply_call, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->node.req_dmaster, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->node.reply_dmaster, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->node.reply_error, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->node.req_message, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->node.req_control, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->node.reply_control, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->node.req_tunnel, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->client.req_call, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->client.req_message, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->client.req_control, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->client.req_tunnel, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->timeouts.call, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->timeouts.control, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->timeouts.traverse, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_padding_pull(buf+offset, buflen-offset, 4, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_latency_counter_pull(buf+offset, buflen-offset,
+ &out->reclock.ctdbd, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_latency_counter_pull(buf+offset, buflen-offset,
+ &out->reclock.recd, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->locks.num_calls, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->locks.num_current, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->locks.num_pending, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->locks.num_failed, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_latency_counter_pull(buf+offset, buflen-offset,
+ &out->locks.latency, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ for (i=0; i<MAX_COUNT_BUCKETS; i++) {
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->locks.buckets[i], &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->total_calls, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->pending_calls, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->childwrite_calls, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->pending_childwrite_calls, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &out->memory_used,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->__last_counter, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->max_hop_count, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ for (i=0; i<MAX_COUNT_BUCKETS; i++) {
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->hop_count_bucket[i], &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+ }
+
+ ret = ctdb_padding_pull(buf+offset, buflen-offset, 4, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_latency_counter_pull(buf+offset, buflen-offset,
+ &out->call_latency, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_latency_counter_pull(buf+offset, buflen-offset,
+ &out->childwrite_latency, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->num_recoveries, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_padding_pull(buf+offset, buflen-offset, 4, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_timeval_pull(buf+offset, buflen-offset,
+ &out->statistics_start_time, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_timeval_pull(buf+offset, buflen-offset,
+ &out->statistics_current_time, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->total_ro_delegations, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->total_ro_revokes, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ *npull = offset;
+ return 0;
+}
+
+int ctdb_statistics_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_statistics **out, size_t *npull)
+{
+ struct ctdb_statistics *val;
+ size_t np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_statistics);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_statistics_pull_elems(buf, buflen, val, val, &np);
+ if (ret != 0) {
+ talloc_free(val);
+ return ret;
+ }
+
+ *out = val;
+ *npull = np;
+ return 0;
+}
+
+size_t ctdb_statistics_list_len(struct ctdb_statistics_list *in)
+{
+ size_t len;
+
+ len = ctdb_int32_len(&in->num) + ctdb_padding_len(4);
+ if (in->num > 0) {
+ len += in->num * ctdb_statistics_len(&in->stats[0]);
+ }
+
+ return len;
+}
+
+void ctdb_statistics_list_push(struct ctdb_statistics_list *in,
+ uint8_t *buf, size_t *npush)
+{
+ size_t offset = 0, np;
+ int i;
+
+ ctdb_int32_push(&in->num, buf+offset, &np);
+ offset += np;
+
+ ctdb_padding_push(4, buf+offset, &np);
+ offset += np;
+
+ for (i=0; i<in->num; i++) {
+ ctdb_statistics_push(&in->stats[i], buf+offset, &np);
+ offset += np;
+ }
+
+ *npush = offset;
+}
+
+int ctdb_statistics_list_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_statistics_list **out,
+ size_t *npull)
+{
+ struct ctdb_statistics_list *val;
+ size_t offset = 0, np;
+ int ret, i;
+
+ val = talloc(mem_ctx, struct ctdb_statistics_list);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_int32_pull(buf+offset, buflen-offset, &val->num, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_padding_pull(buf+offset, buflen-offset, 4, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ if (val->num == 0) {
+ val->stats = NULL;
+ goto done;
+ }
+
+ val->stats = talloc_array(val, struct ctdb_statistics, val->num);
+ if (val->stats == NULL) {
+ ret = ENOMEM;
+ goto fail;
+ }
+
+ for (i=0; i<val->num; i++) {
+ ret = ctdb_statistics_pull_elems(buf+offset, buflen-offset,
+ val, &val->stats[i], &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+ }
+
+done:
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+size_t ctdb_vnn_map_len(struct ctdb_vnn_map *in)
+{
+ size_t len;
+
+ len = ctdb_uint32_len(&in->generation) + ctdb_uint32_len(&in->size);
+ if (in->size > 0) {
+ len += in->size * ctdb_uint32_len(&in->map[0]);
+ }
+
+ return len;
+}
+
+void ctdb_vnn_map_push(struct ctdb_vnn_map *in, uint8_t *buf, size_t *npush)
+{
+ size_t offset = 0, np;
+ uint32_t i;
+
+ ctdb_uint32_push(&in->generation, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->size, buf+offset, &np);
+ offset += np;
+
+ for (i=0; i<in->size; i++) {
+ ctdb_uint32_push(&in->map[i], buf+offset, &np);
+ offset += np;
+ }
+
+ *npush = offset;
+}
+
+int ctdb_vnn_map_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_vnn_map **out, size_t *npull)
+{
+ struct ctdb_vnn_map *val;
+ size_t offset = 0, np;
+ uint32_t i;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_vnn_map);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->generation,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->size, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ if (val->size == 0) {
+ val->map = NULL;
+ goto done;
+ }
+
+ val->map = talloc_array(val, uint32_t, val->size);
+ if (val->map == NULL) {
+ ret = ENOMEM;
+ goto fail;
+ }
+
+ for (i=0; i<val->size; i++) {
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &val->map[i], &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+ }
+
+done:
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+size_t ctdb_dbid_len(struct ctdb_dbid *in)
+{
+ return ctdb_uint32_len(&in->db_id) +
+ ctdb_uint8_len(&in->flags) +
+ ctdb_padding_len(3);
+}
+
+void ctdb_dbid_push(struct ctdb_dbid *in, uint8_t *buf, size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_uint32_push(&in->db_id, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint8_push(&in->flags, buf+offset, &np);
+ offset += np;
+
+ ctdb_padding_push(3, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+static int ctdb_dbid_pull_elems(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx, struct ctdb_dbid *out,
+ size_t *npull)
+{
+ size_t offset = 0, np;
+ int ret;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &out->db_id, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint8_pull(buf+offset, buflen-offset, &out->flags, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_padding_pull(buf+offset, buflen-offset, 3, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ *npull = offset;
+ return 0;
+}
+
+int ctdb_dbid_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_dbid **out, size_t *npull)
+{
+ struct ctdb_dbid *val;
+ size_t np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_dbid);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_dbid_pull_elems(buf, buflen, val, val, &np);
+ if (ret != 0) {
+ talloc_free(val);
+ return ret;
+ }
+
+ *out = val;
+ *npull = np;
+ return 0;
+}
+
+size_t ctdb_dbid_map_len(struct ctdb_dbid_map *in)
+{
+ size_t len;
+
+ len = ctdb_uint32_len(&in->num);
+ if (in->num > 0) {
+ len += in->num * ctdb_dbid_len(&in->dbs[0]);
+ }
+
+ return len;
+}
+
+void ctdb_dbid_map_push(struct ctdb_dbid_map *in, uint8_t *buf, size_t *npush)
+{
+ size_t offset = 0, np;
+ uint32_t i;
+
+ ctdb_uint32_push(&in->num, buf+offset, &np);
+ offset += np;
+
+ for (i=0; i<in->num; i++) {
+ ctdb_dbid_push(&in->dbs[i], buf+offset, &np);
+ offset += np;
+ }
+
+ *npush = offset;
+}
+
+int ctdb_dbid_map_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_dbid_map **out, size_t *npull)
+{
+ struct ctdb_dbid_map *val;
+ size_t offset = 0, np;
+ uint32_t i;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_dbid_map);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->num, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ if (val->num == 0) {
+ val->dbs = NULL;
+ goto done;
+ }
+
+ val->dbs = talloc_array(val, struct ctdb_dbid, val->num);
+ if (val->dbs == NULL) {
+ ret = ENOMEM;
+ goto fail;
+ }
+
+ for (i=0; i<val->num; i++) {
+ ret = ctdb_dbid_pull_elems(buf+offset, buflen-offset, val,
+ &val->dbs[i], &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+ }
+
+done:
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+size_t ctdb_pulldb_len(struct ctdb_pulldb *in)
+{
+ return ctdb_uint32_len(&in->db_id) +
+ ctdb_uint32_len(&in->lmaster);
+}
+
+void ctdb_pulldb_push(struct ctdb_pulldb *in, uint8_t *buf, size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_uint32_push(&in->db_id, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->lmaster, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+int ctdb_pulldb_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_pulldb **out, size_t *npull)
+{
+ struct ctdb_pulldb *val;
+ size_t offset = 0, np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_pulldb);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->db_id, &np);
+ if (ret != 0) {
+ talloc_free(val);
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->lmaster, &np);
+ if (ret != 0) {
+ talloc_free(val);
+ return ret;
+ }
+ offset += np;
+
+ *out = val;
+ *npull = offset;
+ return 0;
+}
+
+size_t ctdb_pulldb_ext_len(struct ctdb_pulldb_ext *in)
+{
+ return ctdb_uint32_len(&in->db_id) +
+ ctdb_uint32_len(&in->lmaster) +
+ ctdb_uint64_len(&in->srvid);
+}
+
+void ctdb_pulldb_ext_push(struct ctdb_pulldb_ext *in, uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_uint32_push(&in->db_id, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->lmaster, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint64_push(&in->srvid, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+int ctdb_pulldb_ext_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_pulldb_ext **out, size_t *npull)
+{
+ struct ctdb_pulldb_ext *val;
+ size_t offset = 0, np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_pulldb_ext);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->db_id, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->lmaster, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint64_pull(buf+offset, buflen-offset, &val->srvid, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+size_t ctdb_db_vacuum_len(struct ctdb_db_vacuum *in)
+{
+ return ctdb_uint32_len(&in->db_id) +
+ ctdb_bool_len(&in->full_vacuum_run);
+}
+
+void ctdb_db_vacuum_push(struct ctdb_db_vacuum *in,
+ uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_uint32_push(&in->db_id, buf+offset, &np);
+ offset += np;
+
+ ctdb_bool_push(&in->full_vacuum_run, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+int ctdb_db_vacuum_pull(uint8_t *buf,
+ size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_db_vacuum **out,
+ size_t *npull)
+{
+ struct ctdb_db_vacuum *val;
+ size_t offset = 0, np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_db_vacuum);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset,
+ buflen-offset,
+ &val->db_id,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_bool_pull(buf+offset,
+ buflen-offset,
+ &val->full_vacuum_run,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+size_t ctdb_echo_data_len(struct ctdb_echo_data *in)
+{
+ /*
+ * No overflow check, none of the routines in this file do it
+ * and there's no way to report it anyway.
+ */
+ return ctdb_uint32_len(&in->timeout) + ctdb_tdb_datan_len(&in->buf);
+}
+
+void ctdb_echo_data_push(struct ctdb_echo_data *in,
+ uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+
+ /*
+ * No overflow check, none of the routines in this file do it
+ * and there's no way to report it anyway.
+ */
+
+ ctdb_uint32_push(&in->timeout, buf+offset, &np);
+ offset += np;
+
+ ctdb_tdb_datan_push(&in->buf, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+int ctdb_echo_data_pull(uint8_t *buf,
+ size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_echo_data **out,
+ size_t *npull)
+{
+ struct ctdb_echo_data *val;
+ size_t offset = 0, np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_echo_data);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset,
+ buflen-offset,
+ &val->timeout,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_tdb_datan_pull(buf+offset,
+ buflen-offset,
+ val,
+ &val->buf,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+size_t ctdb_ltdb_header_len(struct ctdb_ltdb_header *in)
+{
+ return ctdb_uint64_len(&in->rsn) +
+ ctdb_uint32_len(&in->dmaster) +
+ ctdb_uint32_len(&in->reserved1) +
+ ctdb_uint32_len(&in->flags) +
+ ctdb_padding_len(4);
+}
+
+void ctdb_ltdb_header_push(struct ctdb_ltdb_header *in, uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_uint64_push(&in->rsn, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->dmaster, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->reserved1, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->flags, buf+offset, &np);
+ offset += np;
+
+ ctdb_padding_push(4, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+int ctdb_ltdb_header_pull(uint8_t *buf, size_t buflen,
+ struct ctdb_ltdb_header *out, size_t *npull)
+{
+ size_t offset = 0, np;
+ int ret;
+
+ ret = ctdb_uint64_pull(buf+offset, buflen-offset, &out->rsn, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &out->dmaster, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &out->reserved1,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &out->flags, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_padding_pull(buf+offset, buflen-offset, 4, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ *npull = offset;
+ return 0;
+}
+
+int ctdb_ltdb_header_extract(TDB_DATA *data, struct ctdb_ltdb_header *header)
+{
+ size_t np;
+ int ret;
+
+ ret = ctdb_ltdb_header_pull(data->dptr, data->dsize, header, &np);
+ if (ret != 0) {
+ return ret;
+ }
+
+ data->dptr += np;
+ data->dsize -= np;
+
+ return 0;
+}
+
+size_t ctdb_rec_data_len(struct ctdb_rec_data *in)
+{
+ uint32_t u32;
+
+ u32 = ctdb_uint32_len(&in->reqid) +
+ ctdb_tdb_datan_len(&in->key) +
+ ctdb_tdb_datan_len(&in->data);
+
+ if (in->header != NULL) {
+ u32 += ctdb_ltdb_header_len(in->header);
+ }
+
+ return ctdb_uint32_len(&u32) + u32;
+}
+
+void ctdb_rec_data_push(struct ctdb_rec_data *in, uint8_t *buf, size_t *npush)
+{
+ size_t offset = 0, np;
+ uint32_t u32;
+
+ u32 = ctdb_rec_data_len(in);
+ ctdb_uint32_push(&u32, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->reqid, buf+offset, &np);
+ offset += np;
+
+ u32 = ctdb_tdb_data_len(&in->key);
+ ctdb_uint32_push(&u32, buf+offset, &np);
+ offset += np;
+
+ u32 = ctdb_tdb_data_len(&in->data);
+ if (in->header != NULL) {
+ u32 += ctdb_ltdb_header_len(in->header);
+ }
+
+ ctdb_uint32_push(&u32, buf+offset, &np);
+ offset += np;
+
+ ctdb_tdb_data_push(&in->key, buf+offset, &np);
+ offset += np;
+
+ /* If ltdb header is not NULL, then it is pushed as part of the data */
+ if (in->header != NULL) {
+ ctdb_ltdb_header_push(in->header, buf+offset, &np);
+ offset += np;
+ }
+ ctdb_tdb_data_push(&in->data, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+static int ctdb_rec_data_pull_data(uint8_t *buf, size_t buflen,
+ uint32_t *reqid,
+ TDB_DATA *key, TDB_DATA *data,
+ size_t *npull)
+{
+ size_t offset = 0, np;
+ size_t len;
+ uint32_t u32;
+ int ret;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &u32, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ if (buflen < u32) {
+ return EMSGSIZE;
+ }
+ len = u32;
+
+ ret = ctdb_uint32_pull(buf+offset, len-offset, reqid, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, len-offset, &u32, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+ key->dsize = u32;
+
+ ret = ctdb_uint32_pull(buf+offset, len-offset, &u32, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+ data->dsize = u32;
+
+ if (len-offset < key->dsize) {
+ return EMSGSIZE;
+ }
+
+ key->dptr = buf+offset;
+ offset += key->dsize;
+
+ if (len-offset < data->dsize) {
+ return EMSGSIZE;
+ }
+
+ data->dptr = buf+offset;
+ offset += data->dsize;
+
+ *npull = offset;
+ return 0;
+}
+
+static int ctdb_rec_data_pull_elems(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_rec_data *out,
+ size_t *npull)
+{
+ uint32_t reqid;
+ TDB_DATA key, data;
+ size_t np;
+ int ret;
+
+ ret = ctdb_rec_data_pull_data(buf, buflen, &reqid, &key, &data, &np);
+ if (ret != 0) {
+ return ret;
+ }
+
+ out->reqid = reqid;
+
+ /* Always set header to NULL. If it is required, extract it using
+ * ctdb_rec_data_extract_header()
+ */
+ out->header = NULL;
+
+ out->key.dsize = key.dsize;
+ if (key.dsize > 0) {
+ out->key.dptr = talloc_memdup(mem_ctx, key.dptr, key.dsize);
+ if (out->key.dptr == NULL) {
+ return ENOMEM;
+ }
+ }
+
+ out->data.dsize = data.dsize;
+ if (data.dsize > 0) {
+ out->data.dptr = talloc_memdup(mem_ctx, data.dptr, data.dsize);
+ if (out->data.dptr == NULL) {
+ return ENOMEM;
+ }
+ }
+
+ *npull = np;
+ return 0;
+}
+
+int ctdb_rec_data_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_rec_data **out, size_t *npull)
+{
+ struct ctdb_rec_data *val;
+ size_t np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_rec_data);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_rec_data_pull_elems(buf, buflen, val, val, &np);
+ if (ret != 0) {
+ TALLOC_FREE(val);
+ return ret;
+ }
+
+ *out = val;
+ *npull = np;
+ return ret;
+}
+
+size_t ctdb_rec_buffer_len(struct ctdb_rec_buffer *in)
+{
+ return ctdb_uint32_len(&in->db_id) +
+ ctdb_uint32_len(&in->count) +
+ in->buflen;
+}
+
+void ctdb_rec_buffer_push(struct ctdb_rec_buffer *in, uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_uint32_push(&in->db_id, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->count, buf+offset, &np);
+ offset += np;
+
+ memcpy(buf+offset, in->buf, in->buflen);
+ offset += in->buflen;
+
+ *npush = offset;
+}
+
+int ctdb_rec_buffer_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_rec_buffer **out, size_t *npull)
+{
+ struct ctdb_rec_buffer *val;
+ size_t offset = 0, np;
+ size_t length;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_rec_buffer);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->db_id, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->count, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ /* Since there is no buflen provided, walk the records to
+ * validate the length of the buffer.
+ */
+ val->buf = buf+offset;
+ val->buflen = buflen-offset;
+
+ length = 0;
+ ret = ctdb_rec_buffer_traverse(val, NULL, &length);
+ if (ret != 0) {
+ goto fail;
+ }
+
+ if (length > buflen-offset) {
+ ret = EMSGSIZE;
+ goto fail;
+ }
+
+ val->buf = talloc_memdup(val, buf+offset, length);
+ if (val->buf == NULL) {
+ ret = ENOMEM;
+ goto fail;
+ }
+ val->buflen = length;
+ offset += length;
+
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+struct ctdb_rec_buffer *ctdb_rec_buffer_init(TALLOC_CTX *mem_ctx,
+ uint32_t db_id)
+{
+ struct ctdb_rec_buffer *recbuf;
+
+ recbuf = talloc_zero(mem_ctx, struct ctdb_rec_buffer);
+ if (recbuf == NULL) {
+ return recbuf;
+ }
+
+ recbuf->db_id = db_id;
+
+ return recbuf;
+}
+
+int ctdb_rec_buffer_add(TALLOC_CTX *mem_ctx, struct ctdb_rec_buffer *recbuf,
+ uint32_t reqid, struct ctdb_ltdb_header *header,
+ TDB_DATA key, TDB_DATA data)
+{
+ struct ctdb_rec_data recdata;
+ size_t len, np;
+ uint8_t *ptr;
+
+ recdata.reqid = reqid;
+ recdata.header = header;
+ recdata.key = key;
+ recdata.data = data;
+
+ len = ctdb_rec_data_len(&recdata);
+
+ ptr = talloc_realloc(mem_ctx, recbuf->buf, uint8_t,
+ recbuf->buflen + len);
+ if (ptr == NULL) {
+ return ENOMEM;
+ }
+
+ ctdb_rec_data_push(&recdata, &ptr[recbuf->buflen], &np);
+
+ recbuf->count++;
+ recbuf->buf = ptr;
+ recbuf->buflen += np;
+ return 0;
+}
+
+int ctdb_rec_buffer_traverse(struct ctdb_rec_buffer *recbuf,
+ ctdb_rec_parser_func_t func,
+ void *private_data)
+{
+ TDB_DATA key, data;
+ uint32_t reqid;
+ size_t offset, reclen;
+ unsigned int i;
+ int ret = 0;
+
+ offset = 0;
+ for (i=0; i<recbuf->count; i++) {
+ ret = ctdb_rec_data_pull_data(&recbuf->buf[offset],
+ recbuf->buflen - offset,
+ &reqid, &key, &data, &reclen);
+ if (ret != 0) {
+ return ret;
+ }
+
+ if (func != NULL) {
+ ret = func(reqid, NULL, key, data, private_data);
+ if (ret != 0) {
+ break;
+ }
+ }
+
+ offset += reclen;
+ }
+
+ if (ret != 0) {
+ return ret;
+ }
+
+ if (func == NULL) {
+ size_t *length = (size_t *)private_data;
+
+ *length = offset;
+ }
+
+ return 0;
+}
+
+int ctdb_rec_buffer_write(struct ctdb_rec_buffer *recbuf, int fd)
+{
+ ssize_t n;
+
+ n = write(fd, &recbuf->db_id, sizeof(uint32_t));
+ if (n == -1 || (size_t)n != sizeof(uint32_t)) {
+ return (errno != 0 ? errno : EIO);
+ }
+ n = write(fd, &recbuf->count, sizeof(uint32_t));
+ if (n == -1 || (size_t)n != sizeof(uint32_t)) {
+ return (errno != 0 ? errno : EIO);
+ }
+ n = write(fd, &recbuf->buflen, sizeof(size_t));
+ if (n == -1 || (size_t)n != sizeof(size_t)) {
+ return (errno != 0 ? errno : EIO);
+ }
+ n = write(fd, recbuf->buf, recbuf->buflen);
+ if (n == -1 || (size_t)n != recbuf->buflen) {
+ return (errno != 0 ? errno : EIO);
+ }
+
+ return 0;
+}
+
+int ctdb_rec_buffer_read(int fd, TALLOC_CTX *mem_ctx,
+ struct ctdb_rec_buffer **out)
+{
+ struct ctdb_rec_buffer *recbuf;
+ ssize_t n;
+
+ recbuf = talloc(mem_ctx, struct ctdb_rec_buffer);
+ if (recbuf == NULL) {
+ return ENOMEM;
+ }
+
+ n = read(fd, &recbuf->db_id, sizeof(uint32_t));
+ if (n == -1 || (size_t)n != sizeof(uint32_t)) {
+ return (errno != 0 ? errno : EIO);
+ }
+ n = read(fd, &recbuf->count, sizeof(uint32_t));
+ if (n == -1 || (size_t)n != sizeof(uint32_t)) {
+ return (errno != 0 ? errno : EIO);
+ }
+ n = read(fd, &recbuf->buflen, sizeof(size_t));
+ if (n == -1 || (size_t)n != sizeof(size_t)) {
+ return (errno != 0 ? errno : EIO);
+ }
+
+ recbuf->buf = talloc_size(recbuf, recbuf->buflen);
+ if (recbuf->buf == NULL) {
+ return ENOMEM;
+ }
+
+ n = read(fd, recbuf->buf, recbuf->buflen);
+ if (n == -1 || (size_t)n != recbuf->buflen) {
+ return (errno != 0 ? errno : EIO);
+ }
+
+ *out = recbuf;
+ return 0;
+}
+
+size_t ctdb_traverse_start_len(struct ctdb_traverse_start *in)
+{
+ return ctdb_uint32_len(&in->db_id) +
+ ctdb_uint32_len(&in->reqid) +
+ ctdb_uint64_len(&in->srvid);
+}
+
+void ctdb_traverse_start_push(struct ctdb_traverse_start *in, uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_uint32_push(&in->db_id, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->reqid, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint64_push(&in->srvid, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+int ctdb_traverse_start_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_traverse_start **out, size_t *npull)
+{
+ struct ctdb_traverse_start *val;
+ size_t offset = 0, np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_traverse_start);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->db_id, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->reqid, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint64_pull(buf+offset, buflen-offset, &val->srvid, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+size_t ctdb_traverse_all_len(struct ctdb_traverse_all *in)
+{
+ return ctdb_uint32_len(&in->db_id) +
+ ctdb_uint32_len(&in->reqid) +
+ ctdb_uint32_len(&in->pnn) +
+ ctdb_uint32_len(&in->client_reqid) +
+ ctdb_uint64_len(&in->srvid);
+}
+
+void ctdb_traverse_all_push(struct ctdb_traverse_all *in, uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_uint32_push(&in->db_id, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->reqid, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->pnn, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->client_reqid, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint64_push(&in->srvid, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+int ctdb_traverse_all_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_traverse_all **out, size_t *npull)
+{
+ struct ctdb_traverse_all *val;
+ size_t offset = 0, np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_traverse_all);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->db_id, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->reqid, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->pnn, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->client_reqid,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint64_pull(buf+offset, buflen-offset, &val->srvid, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+size_t ctdb_traverse_start_ext_len(struct ctdb_traverse_start_ext *in)
+{
+ return ctdb_uint32_len(&in->db_id) +
+ ctdb_uint32_len(&in->reqid) +
+ ctdb_uint64_len(&in->srvid) +
+ ctdb_bool_len(&in->withemptyrecords) +
+ ctdb_padding_len(7);
+}
+
+void ctdb_traverse_start_ext_push(struct ctdb_traverse_start_ext *in,
+ uint8_t *buf, size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_uint32_push(&in->db_id, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->reqid, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint64_push(&in->srvid, buf+offset, &np);
+ offset += np;
+
+ ctdb_bool_push(&in->withemptyrecords, buf+offset, &np);
+ offset += np;
+
+ ctdb_padding_push(7, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+int ctdb_traverse_start_ext_pull(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_traverse_start_ext **out,
+ size_t *npull)
+{
+ struct ctdb_traverse_start_ext *val;
+ size_t offset = 0, np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_traverse_start_ext);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->db_id, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->reqid, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint64_pull(buf+offset, buflen-offset, &val->srvid, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_bool_pull(buf+offset, buflen-offset,
+ &val->withemptyrecords, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_padding_pull(buf+offset, buflen-offset, 7, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+size_t ctdb_traverse_all_ext_len(struct ctdb_traverse_all_ext *in)
+{
+ return ctdb_uint32_len(&in->db_id) +
+ ctdb_uint32_len(&in->reqid) +
+ ctdb_uint32_len(&in->pnn) +
+ ctdb_uint32_len(&in->client_reqid) +
+ ctdb_uint64_len(&in->srvid) +
+ ctdb_bool_len(&in->withemptyrecords) +
+ ctdb_padding_len(7);
+}
+
+void ctdb_traverse_all_ext_push(struct ctdb_traverse_all_ext *in,
+ uint8_t *buf, size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_uint32_push(&in->db_id, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->reqid, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->pnn, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->client_reqid, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint64_push(&in->srvid, buf+offset, &np);
+ offset += np;
+
+ ctdb_bool_push(&in->withemptyrecords, buf+offset, &np);
+ offset += np;
+
+ ctdb_padding_push(7, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+int ctdb_traverse_all_ext_pull(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_traverse_all_ext **out,
+ size_t *npull)
+{
+ struct ctdb_traverse_all_ext *val;
+ size_t offset = 0, np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_traverse_all_ext);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->db_id, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->reqid, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->pnn, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->client_reqid,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint64_pull(buf+offset, buflen-offset, &val->srvid, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_bool_pull(buf+offset, buflen-offset,
+ &val->withemptyrecords, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_padding_pull(buf+offset, buflen-offset, 7, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+size_t ctdb_sock_addr_len(ctdb_sock_addr *in)
+{
+ return sizeof(ctdb_sock_addr);
+}
+
+void ctdb_sock_addr_push(ctdb_sock_addr *in, uint8_t *buf, size_t *npush)
+{
+ memcpy(buf, in, sizeof(ctdb_sock_addr));
+ *npush = sizeof(ctdb_sock_addr);
+}
+
+int ctdb_sock_addr_pull_elems(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx, ctdb_sock_addr *out,
+ size_t *npull)
+{
+ if (buflen < sizeof(ctdb_sock_addr)) {
+ return EMSGSIZE;
+ }
+
+ memcpy(out, buf, sizeof(ctdb_sock_addr));
+ *npull = sizeof(ctdb_sock_addr);
+
+ return 0;
+}
+
+int ctdb_sock_addr_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ ctdb_sock_addr **out, size_t *npull)
+{
+ ctdb_sock_addr *val;
+ size_t np;
+ int ret;
+
+ val = talloc(mem_ctx, ctdb_sock_addr);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_sock_addr_pull_elems(buf, buflen, val, val, &np);
+ if (ret != 0) {
+ talloc_free(val);
+ return ret;
+ }
+
+ *out = val;
+ *npull = np;
+ return ret;
+}
+
+size_t ctdb_connection_len(struct ctdb_connection *in)
+{
+ return ctdb_sock_addr_len(&in->src) +
+ ctdb_sock_addr_len(&in->dst);
+}
+
+void ctdb_connection_push(struct ctdb_connection *in, uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_sock_addr_push(&in->src, buf+offset, &np);
+ offset += np;
+
+ ctdb_sock_addr_push(&in->dst, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+static int ctdb_connection_pull_elems(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_connection *out,
+ size_t *npull)
+{
+ size_t offset = 0, np;
+ int ret;
+
+ ret = ctdb_sock_addr_pull_elems(buf+offset, buflen-offset,
+ mem_ctx, &out->src, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_sock_addr_pull_elems(buf+offset, buflen-offset,
+ mem_ctx, &out->dst, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ *npull = offset;
+ return 0;
+}
+
+int ctdb_connection_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_connection **out, size_t *npull)
+{
+ struct ctdb_connection *val;
+ size_t np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_connection);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_connection_pull_elems(buf, buflen, val, val, &np);
+ if (ret != 0) {
+ talloc_free(val);
+ return ret;
+ }
+
+ *out = val;
+ *npull = np;
+ return ret;
+}
+
+size_t ctdb_connection_list_len(struct ctdb_connection_list *in)
+{
+ size_t len;
+
+ len = ctdb_uint32_len(&in->num);
+ if (in->num > 0) {
+ len += in->num * ctdb_connection_len(&in->conn[0]);
+ }
+
+ return len;
+}
+
+void ctdb_connection_list_push(struct ctdb_connection_list *in, uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+ uint32_t i;
+
+ ctdb_uint32_push(&in->num, buf+offset, &np);
+ offset += np;
+
+ for (i=0; i<in->num; i++) {
+ ctdb_connection_push(&in->conn[i], buf+offset, &np);
+ offset += np;
+ }
+
+ *npush = offset;
+}
+
+int ctdb_connection_list_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_connection_list **out, size_t *npull)
+{
+ struct ctdb_connection_list *val;
+ size_t offset = 0, np;
+ uint32_t i;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_connection_list);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->num, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ if (val->num == 0) {
+ val->conn = NULL;
+ goto done;
+ }
+
+ val->conn = talloc_array(val, struct ctdb_connection, val->num);
+ if (val->conn == NULL) {
+ ret = ENOMEM;
+ goto fail;
+ }
+
+ for (i=0; i<val->num; i++) {
+ ret = ctdb_connection_pull_elems(buf+offset, buflen-offset,
+ val, &val->conn[i], &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+ }
+
+done:
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+size_t ctdb_tunable_len(struct ctdb_tunable *in)
+{
+ return ctdb_uint32_len(&in->value) +
+ ctdb_stringn_len(&in->name);
+}
+
+void ctdb_tunable_push(struct ctdb_tunable *in, uint8_t *buf, size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_uint32_push(&in->value, buf+offset, &np);
+ offset += np;
+
+ ctdb_stringn_push(&in->name, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+int ctdb_tunable_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_tunable **out, size_t *npull)
+{
+ struct ctdb_tunable *val;
+ size_t offset = 0, np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_tunable);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->value, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_stringn_pull(buf+offset, buflen-offset, mem_ctx,
+ &val->name, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+size_t ctdb_node_flag_change_len(struct ctdb_node_flag_change *in)
+{
+ return ctdb_uint32_len(&in->pnn) +
+ ctdb_uint32_len(&in->new_flags) +
+ ctdb_uint32_len(&in->old_flags);
+}
+
+void ctdb_node_flag_change_push(struct ctdb_node_flag_change *in,
+ uint8_t *buf, size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_uint32_push(&in->pnn, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->new_flags, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->old_flags, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+int ctdb_node_flag_change_pull(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_node_flag_change **out,
+ size_t *npull)
+{
+ struct ctdb_node_flag_change *val;
+ size_t offset = 0, np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_node_flag_change);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->pnn, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->new_flags,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->old_flags,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+size_t ctdb_var_list_len(struct ctdb_var_list *in)
+{
+ uint32_t u32 = 0;
+ int i;
+
+ for (i=0; i<in->count; i++) {
+ u32 += ctdb_string_len(&in->var[i]);
+ }
+
+ return ctdb_uint32_len(&u32) + u32;
+}
+
+void ctdb_var_list_push(struct ctdb_var_list *in, uint8_t *buf, size_t *npush)
+{
+ size_t offset = 0, np;
+ uint32_t u32;
+ int i;
+ uint8_t sep = ':';
+
+ /* The length only corresponds to the payload size */
+ u32 = ctdb_var_list_len(in);
+ u32 -= ctdb_uint32_len(&u32);
+
+ ctdb_uint32_push(&u32, buf+offset, &np);
+ offset += np;
+
+ /* The variables are separated by ':' and the complete string is null
+ * terminated.
+ */
+ for (i=0; i<in->count; i++) {
+ ctdb_string_push(&in->var[i], buf+offset, &np);
+ offset += np;
+
+ if (i < in->count - 1) {
+ /* Replace '\0' with ':' */
+ ctdb_uint8_push(&sep, buf+offset-1, &np);
+ }
+ }
+
+ *npush = offset;
+}
+
+int ctdb_var_list_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_var_list **out, size_t *npull)
+{
+ struct ctdb_var_list *val;
+ const char *str, **list;
+ char *s, *tok, *ptr = NULL;
+ size_t offset = 0, np;
+ uint32_t u32;
+ int ret;
+
+ val = talloc_zero(mem_ctx, struct ctdb_var_list);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &u32, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ if (buflen-offset < u32) {
+ ret = EMSGSIZE;
+ goto fail;
+ }
+
+ ret = ctdb_string_pull(buf+offset, u32, val, &str, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ s = discard_const(str);
+ while ((tok = strtok_r(s, ":", &ptr)) != NULL) {
+ list = talloc_realloc(val, val->var, const char *,
+ val->count+1);
+ if (list == NULL) {
+ ret = ENOMEM;
+ goto fail;
+ }
+
+ val->var = list;
+
+ s = talloc_strdup(val, tok);
+ if (s == NULL) {
+ ret = ENOMEM;
+ goto fail;
+ }
+
+ val->var[val->count] = s;
+ val->count += 1;
+ s = NULL;
+ }
+
+ talloc_free(discard_const(str));
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+size_t ctdb_tunable_list_len(struct ctdb_tunable_list *in)
+{
+ return ctdb_uint32_len(&in->max_redirect_count) +
+ ctdb_uint32_len(&in->seqnum_interval) +
+ ctdb_uint32_len(&in->control_timeout) +
+ ctdb_uint32_len(&in->traverse_timeout) +
+ ctdb_uint32_len(&in->keepalive_interval) +
+ ctdb_uint32_len(&in->keepalive_limit) +
+ ctdb_uint32_len(&in->recover_timeout) +
+ ctdb_uint32_len(&in->recover_interval) +
+ ctdb_uint32_len(&in->election_timeout) +
+ ctdb_uint32_len(&in->takeover_timeout) +
+ ctdb_uint32_len(&in->monitor_interval) +
+ ctdb_uint32_len(&in->tickle_update_interval) +
+ ctdb_uint32_len(&in->script_timeout) +
+ ctdb_uint32_len(&in->monitor_timeout_count) +
+ ctdb_uint32_len(&in->script_unhealthy_on_timeout) +
+ ctdb_uint32_len(&in->recovery_grace_period) +
+ ctdb_uint32_len(&in->recovery_ban_period) +
+ ctdb_uint32_len(&in->database_hash_size) +
+ ctdb_uint32_len(&in->database_max_dead) +
+ ctdb_uint32_len(&in->rerecovery_timeout) +
+ ctdb_uint32_len(&in->enable_bans) +
+ ctdb_uint32_len(&in->deterministic_public_ips) +
+ ctdb_uint32_len(&in->reclock_ping_period) +
+ ctdb_uint32_len(&in->no_ip_failback) +
+ ctdb_uint32_len(&in->disable_ip_failover) +
+ ctdb_uint32_len(&in->verbose_memory_names) +
+ ctdb_uint32_len(&in->recd_ping_timeout) +
+ ctdb_uint32_len(&in->recd_ping_failcount) +
+ ctdb_uint32_len(&in->log_latency_ms) +
+ ctdb_uint32_len(&in->reclock_latency_ms) +
+ ctdb_uint32_len(&in->recovery_drop_all_ips) +
+ ctdb_uint32_len(&in->verify_recovery_lock) +
+ ctdb_uint32_len(&in->vacuum_interval) +
+ ctdb_uint32_len(&in->vacuum_max_run_time) +
+ ctdb_uint32_len(&in->repack_limit) +
+ ctdb_uint32_len(&in->vacuum_limit) +
+ ctdb_uint32_len(&in->max_queue_depth_drop_msg) +
+ ctdb_uint32_len(&in->allow_unhealthy_db_read) +
+ ctdb_uint32_len(&in->stat_history_interval) +
+ ctdb_uint32_len(&in->deferred_attach_timeout) +
+ ctdb_uint32_len(&in->vacuum_fast_path_count) +
+ ctdb_uint32_len(&in->lcp2_public_ip_assignment) +
+ ctdb_uint32_len(&in->allow_client_db_attach) +
+ ctdb_uint32_len(&in->recover_pdb_by_seqnum) +
+ ctdb_uint32_len(&in->deferred_rebalance_on_node_add) +
+ ctdb_uint32_len(&in->fetch_collapse) +
+ ctdb_uint32_len(&in->hopcount_make_sticky) +
+ ctdb_uint32_len(&in->sticky_duration) +
+ ctdb_uint32_len(&in->sticky_pindown) +
+ ctdb_uint32_len(&in->no_ip_takeover) +
+ ctdb_uint32_len(&in->db_record_count_warn) +
+ ctdb_uint32_len(&in->db_record_size_warn) +
+ ctdb_uint32_len(&in->db_size_warn) +
+ ctdb_uint32_len(&in->pulldb_preallocation_size) +
+ ctdb_uint32_len(&in->no_ip_host_on_all_disabled) +
+ ctdb_uint32_len(&in->samba3_hack) +
+ ctdb_uint32_len(&in->mutex_enabled) +
+ ctdb_uint32_len(&in->lock_processes_per_db) +
+ ctdb_uint32_len(&in->rec_buffer_size_limit) +
+ ctdb_uint32_len(&in->queue_buffer_size) +
+ ctdb_uint32_len(&in->ip_alloc_algorithm) +
+ ctdb_uint32_len(&in->allow_mixed_versions);
+}
+
+void ctdb_tunable_list_push(struct ctdb_tunable_list *in, uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_uint32_push(&in->max_redirect_count, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->seqnum_interval, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->control_timeout, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->traverse_timeout, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->keepalive_interval, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->keepalive_limit, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->recover_timeout, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->recover_interval, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->election_timeout, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->takeover_timeout, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->monitor_interval, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->tickle_update_interval, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->script_timeout, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->monitor_timeout_count, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->script_unhealthy_on_timeout, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->recovery_grace_period, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->recovery_ban_period, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->database_hash_size, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->database_max_dead, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->rerecovery_timeout, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->enable_bans, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->deterministic_public_ips, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->reclock_ping_period, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->no_ip_failback, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->disable_ip_failover, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->verbose_memory_names, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->recd_ping_timeout, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->recd_ping_failcount, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->log_latency_ms, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->reclock_latency_ms, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->recovery_drop_all_ips, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->verify_recovery_lock, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->vacuum_interval, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->vacuum_max_run_time, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->repack_limit, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->vacuum_limit, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->max_queue_depth_drop_msg, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->allow_unhealthy_db_read, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->stat_history_interval, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->deferred_attach_timeout, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->vacuum_fast_path_count, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->lcp2_public_ip_assignment, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->allow_client_db_attach, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->recover_pdb_by_seqnum, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->deferred_rebalance_on_node_add, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->fetch_collapse, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->hopcount_make_sticky, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->sticky_duration, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->sticky_pindown, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->no_ip_takeover, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->db_record_count_warn, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->db_record_size_warn, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->db_size_warn, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->pulldb_preallocation_size, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->no_ip_host_on_all_disabled, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->samba3_hack, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->mutex_enabled, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->lock_processes_per_db, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->rec_buffer_size_limit, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->queue_buffer_size, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->ip_alloc_algorithm, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->allow_mixed_versions, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+static int ctdb_tunable_list_pull_elems(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_tunable_list *out,
+ size_t *npull)
+{
+ size_t offset = 0, np;
+ int ret;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->max_redirect_count, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->seqnum_interval, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->control_timeout, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->traverse_timeout, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->keepalive_interval, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->keepalive_limit, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->recover_timeout, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->recover_interval, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->election_timeout, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->takeover_timeout, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->monitor_interval, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->tickle_update_interval, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->script_timeout, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->monitor_timeout_count, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->script_unhealthy_on_timeout, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->recovery_grace_period, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->recovery_ban_period, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->database_hash_size, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->database_max_dead, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->rerecovery_timeout, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->enable_bans, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->deterministic_public_ips, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->reclock_ping_period, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->no_ip_failback, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->disable_ip_failover, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->verbose_memory_names, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->recd_ping_timeout, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->recd_ping_failcount, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->log_latency_ms, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->reclock_latency_ms, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->recovery_drop_all_ips, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->verify_recovery_lock, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->vacuum_interval, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->vacuum_max_run_time, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->repack_limit, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->vacuum_limit, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->max_queue_depth_drop_msg, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->allow_unhealthy_db_read, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->stat_history_interval, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->deferred_attach_timeout, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->vacuum_fast_path_count, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->lcp2_public_ip_assignment, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->allow_client_db_attach, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->recover_pdb_by_seqnum, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->deferred_rebalance_on_node_add, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->fetch_collapse, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->hopcount_make_sticky, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->sticky_duration, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->sticky_pindown, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->no_ip_takeover, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->db_record_count_warn, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->db_record_size_warn, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->db_size_warn, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->pulldb_preallocation_size, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->no_ip_host_on_all_disabled, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->samba3_hack, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->mutex_enabled, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->lock_processes_per_db, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->rec_buffer_size_limit, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->queue_buffer_size, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->ip_alloc_algorithm, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->allow_mixed_versions, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ *npull = offset;
+ return 0;
+}
+
+int ctdb_tunable_list_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_tunable_list **out, size_t *npull)
+{
+ struct ctdb_tunable_list *val;
+ size_t np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_tunable_list);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_tunable_list_pull_elems(buf, buflen, val, val, &np);
+ if (ret != 0) {
+ talloc_free(val);
+ return ret;
+ }
+
+ *out = val;
+ *npull = np;
+ return 0;
+}
+
+size_t ctdb_tickle_list_len(struct ctdb_tickle_list *in)
+{
+ size_t len;
+
+ len = ctdb_sock_addr_len(&in->addr) +
+ ctdb_uint32_len(&in->num);
+ if (in->num > 0) {
+ len += in->num * ctdb_connection_len(&in->conn[0]);
+ }
+
+ return len;
+}
+
+void ctdb_tickle_list_push(struct ctdb_tickle_list *in, uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+ uint32_t i;
+
+ ctdb_sock_addr_push(&in->addr, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->num, buf+offset, &np);
+ offset += np;
+
+ for (i=0; i<in->num; i++) {
+ ctdb_connection_push(&in->conn[i], buf+offset, &np);
+ offset += np;
+ }
+
+ *npush = offset;
+}
+
+int ctdb_tickle_list_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_tickle_list **out, size_t *npull)
+{
+ struct ctdb_tickle_list *val;
+ size_t offset = 0, np;
+ uint32_t i;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_tickle_list);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_sock_addr_pull_elems(buf+offset, buflen-offset, val,
+ &val->addr, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->num, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ if (val->num == 0) {
+ val->conn = NULL;
+ goto done;
+ }
+
+ val->conn = talloc_array(val, struct ctdb_connection, val->num);
+ if (val->conn == NULL) {
+ ret = ENOMEM;
+ goto fail;
+ }
+
+ for (i=0; i<val->num; i++) {
+ ret = ctdb_connection_pull_elems(buf+offset, buflen-offset,
+ val, &val->conn[i], &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+ }
+
+done:
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+size_t ctdb_addr_info_len(struct ctdb_addr_info *in)
+{
+ return ctdb_sock_addr_len(&in->addr) +
+ ctdb_uint32_len(&in->mask) +
+ ctdb_stringn_len(&in->iface);
+}
+
+void ctdb_addr_info_push(struct ctdb_addr_info *in, uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_sock_addr_push(&in->addr, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->mask, buf+offset, &np);
+ offset += np;
+
+ ctdb_stringn_push(&in->iface, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+int ctdb_addr_info_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_addr_info **out, size_t *npull)
+{
+ struct ctdb_addr_info *val;
+ size_t offset = 0, np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_addr_info);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_sock_addr_pull_elems(buf+offset, buflen-offset, val,
+ &val->addr, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->mask, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_stringn_pull(buf+offset, buflen-offset, val, &val->iface,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+size_t ctdb_transdb_len(struct ctdb_transdb *in)
+{
+ return ctdb_uint32_len(&in->db_id) +
+ ctdb_uint32_len(&in->tid);
+}
+
+void ctdb_transdb_push(struct ctdb_transdb *in, uint8_t *buf, size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_uint32_push(&in->db_id, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->tid, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+int ctdb_transdb_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_transdb **out, size_t *npull)
+{
+ struct ctdb_transdb *val;
+ size_t offset = 0, np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_transdb);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->db_id, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->tid, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+size_t ctdb_uptime_len(struct ctdb_uptime *in)
+{
+ return ctdb_timeval_len(&in->current_time) +
+ ctdb_timeval_len(&in->ctdbd_start_time) +
+ ctdb_timeval_len(&in->last_recovery_started) +
+ ctdb_timeval_len(&in->last_recovery_finished);
+}
+
+void ctdb_uptime_push(struct ctdb_uptime *in, uint8_t *buf, size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_timeval_push(&in->current_time, buf+offset, &np);
+ offset += np;
+
+ ctdb_timeval_push(&in->ctdbd_start_time, buf+offset, &np);
+ offset += np;
+
+ ctdb_timeval_push(&in->last_recovery_started, buf+offset, &np);
+ offset += np;
+
+ ctdb_timeval_push(&in->last_recovery_finished, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+int ctdb_uptime_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_uptime **out, size_t *npull)
+{
+ struct ctdb_uptime *val;
+ size_t offset = 0, np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_uptime);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_timeval_pull(buf+offset, buflen-offset, &val->current_time,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_timeval_pull(buf+offset, buflen-offset,
+ &val->ctdbd_start_time, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_timeval_pull(buf+offset, buflen-offset,
+ &val->last_recovery_started, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_timeval_pull(buf+offset, buflen-offset,
+ &val->last_recovery_finished, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+size_t ctdb_public_ip_len(struct ctdb_public_ip *in)
+{
+ return ctdb_uint32_len(&in->pnn) +
+ ctdb_sock_addr_len(&in->addr);
+}
+
+void ctdb_public_ip_push(struct ctdb_public_ip *in, uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_uint32_push(&in->pnn, buf+offset, &np);
+ offset += np;
+
+ ctdb_sock_addr_push(&in->addr, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+static int ctdb_public_ip_pull_elems(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_public_ip *out, size_t *npull)
+{
+ size_t offset = 0, np;
+ int ret;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &out->pnn, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_sock_addr_pull_elems(buf+offset, buflen-offset, mem_ctx,
+ &out->addr, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ *npull = offset;
+ return 0;
+}
+
+int ctdb_public_ip_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_public_ip **out, size_t *npull)
+{
+ struct ctdb_public_ip *val;
+ size_t np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_public_ip);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_public_ip_pull_elems(buf, buflen, val, val, &np);
+ if (ret != 0) {
+ TALLOC_FREE(val);
+ return ret;
+ }
+
+ *out = val;
+ *npull = np;
+ return ret;
+}
+
+size_t ctdb_public_ip_list_len(struct ctdb_public_ip_list *in)
+{
+ size_t len;
+
+ len = ctdb_uint32_len(&in->num);
+ if (in->num > 0) {
+ len += in->num * ctdb_public_ip_len(&in->ip[0]);
+ }
+
+ return len;
+}
+
+void ctdb_public_ip_list_push(struct ctdb_public_ip_list *in, uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+ uint32_t i;
+
+ ctdb_uint32_push(&in->num, buf+offset, &np);
+ offset += np;
+
+ for (i=0; i<in->num; i++) {
+ ctdb_public_ip_push(&in->ip[i], buf+offset, &np);
+ offset += np;
+ }
+
+ *npush = offset;
+}
+
+int ctdb_public_ip_list_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_public_ip_list **out, size_t *npull)
+{
+ struct ctdb_public_ip_list *val;
+ size_t offset = 0, np;
+ uint32_t i;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_public_ip_list);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->num, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ if (val->num == 0) {
+ val->ip = NULL;
+ goto done;
+ }
+
+ val->ip = talloc_array(val, struct ctdb_public_ip, val->num);
+ if (val->ip == NULL) {
+ ret = ENOMEM;
+ goto fail;
+ }
+
+ for (i=0; i<val->num; i++) {
+ ret = ctdb_public_ip_pull_elems(buf+offset, buflen-offset,
+ val->ip, &val->ip[i], &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+ }
+
+done:
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+size_t ctdb_node_and_flags_len(struct ctdb_node_and_flags *in)
+{
+ return ctdb_uint32_len(&in->pnn) +
+ ctdb_uint32_len(&in->flags) +
+ ctdb_sock_addr_len(&in->addr);
+}
+
+void ctdb_node_and_flags_push(struct ctdb_node_and_flags *in, uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_uint32_push(&in->pnn, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->flags, buf+offset, &np);
+ offset += np;
+
+ ctdb_sock_addr_push(&in->addr, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+static int ctdb_node_and_flags_pull_elems(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_node_and_flags *out,
+ size_t *npull)
+{
+ size_t offset = 0, np;
+ int ret;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &out->pnn, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &out->flags, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_sock_addr_pull_elems(buf+offset, buflen-offset, mem_ctx,
+ &out->addr, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ *npull = offset;
+ return 0;
+}
+
+int ctdb_node_and_flags_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_node_and_flags **out, size_t *npull)
+{
+ struct ctdb_node_and_flags *val;
+ size_t np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_node_and_flags);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_node_and_flags_pull_elems(buf, buflen, val, val, &np);
+ if (ret != 0) {
+ TALLOC_FREE(val);
+ return ret;
+ }
+
+ *out = val;
+ *npull = np;
+ return ret;
+}
+
+size_t ctdb_node_map_len(struct ctdb_node_map *in)
+{
+ size_t len;
+
+ len = ctdb_uint32_len(&in->num);
+ if (in->num > 0) {
+ len += in->num * ctdb_node_and_flags_len(&in->node[0]);
+ }
+
+ return len;
+}
+
+void ctdb_node_map_push(struct ctdb_node_map *in, uint8_t *buf, size_t *npush)
+{
+ size_t offset = 0, np;
+ uint32_t i;
+
+ ctdb_uint32_push(&in->num, buf+offset, &np);
+ offset += np;
+
+ for (i=0; i<in->num; i++) {
+ ctdb_node_and_flags_push(&in->node[i], buf+offset, &np);
+ offset += np;
+ }
+
+ *npush = offset;
+}
+
+int ctdb_node_map_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_node_map **out, size_t *npull)
+{
+ struct ctdb_node_map *val;
+ size_t offset = 0, np;
+ uint32_t i;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_node_map);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->num, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ if (val->num == 0) {
+ val->node = NULL;
+ goto done;
+ }
+
+ val->node = talloc_array(val, struct ctdb_node_and_flags, val->num);
+ if (val->node == NULL) {
+ ret = ENOMEM;
+ goto fail;
+ }
+
+ for (i=0; i<val->num; i++) {
+ ret = ctdb_node_and_flags_pull_elems(buf+offset,
+ buflen-offset,
+ val->node, &val->node[i],
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+ }
+
+done:
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+size_t ctdb_script_len(struct ctdb_script *in)
+{
+ return ctdb_chararray_len(in->name, MAX_SCRIPT_NAME+1) +
+ ctdb_timeval_len(&in->start) +
+ ctdb_timeval_len(&in->finished) +
+ ctdb_int32_len(&in->status) +
+ ctdb_chararray_len(in->output, MAX_SCRIPT_OUTPUT+1) +
+ ctdb_padding_len(4);
+}
+
+void ctdb_script_push(struct ctdb_script *in, uint8_t *buf, size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_chararray_push(in->name, MAX_SCRIPT_NAME+1, buf+offset, &np);
+ offset += np;
+
+ ctdb_timeval_push(&in->start, buf+offset, &np);
+ offset += np;
+
+ ctdb_timeval_push(&in->finished, buf+offset, &np);
+ offset += np;
+
+ ctdb_int32_push(&in->status, buf+offset, &np);
+ offset += np;
+
+ ctdb_chararray_push(in->output, MAX_SCRIPT_OUTPUT+1, buf+offset, &np);
+ offset += np;
+
+ ctdb_padding_push(4, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+static int ctdb_script_pull_elems(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_script *out, size_t *npull)
+{
+ size_t offset = 0, np;
+ int ret;
+
+ ret = ctdb_chararray_pull(buf+offset, buflen-offset,
+ out->name, MAX_SCRIPT_NAME+1, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_timeval_pull(buf+offset, buflen-offset, &out->start, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_timeval_pull(buf+offset, buflen-offset, &out->finished,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_int32_pull(buf+offset, buflen-offset, &out->status, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_chararray_pull(buf+offset, buflen-offset,
+ out->output, MAX_SCRIPT_OUTPUT+1, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_padding_pull(buf+offset, buflen-offset, 4, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ *npull = offset;
+ return 0;
+}
+
+int ctdb_script_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_script **out, size_t *npull)
+{
+ struct ctdb_script *val;
+ size_t np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_script);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_script_pull_elems(buf, buflen, val, val, &np);
+ if (ret != 0) {
+ TALLOC_FREE(val);
+ return ret;
+ }
+
+ *out = val;
+ *npull = np;
+ return ret;
+}
+
+size_t ctdb_script_list_len(struct ctdb_script_list *in)
+{
+ size_t len;
+
+ if (in == NULL) {
+ return 0;
+ }
+
+ len = ctdb_uint32_len(&in->num_scripts) + ctdb_padding_len(4);
+ if (in->num_scripts > 0) {
+ len += in->num_scripts * ctdb_script_len(&in->script[0]);
+ }
+
+ return len;
+}
+
+void ctdb_script_list_push(struct ctdb_script_list *in, uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+ uint32_t i;
+
+ if (in == NULL) {
+ *npush = 0;
+ return;
+ }
+
+ ctdb_uint32_push(&in->num_scripts, buf+offset, &np);
+ offset += np;
+
+ ctdb_padding_push(4, buf+offset, &np);
+ offset += np;
+
+ for (i=0; i<in->num_scripts; i++) {
+ ctdb_script_push(&in->script[i], buf+offset, &np);
+ offset += np;
+ }
+
+ *npush = offset;
+}
+
+int ctdb_script_list_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_script_list **out, size_t *npull)
+{
+ struct ctdb_script_list *val;
+ size_t offset = 0, np;
+ uint32_t i;
+ int ret;
+
+ /* If event scripts have never been run, the result will be NULL */
+ if (buflen == 0) {
+ val = NULL;
+ goto done;
+ }
+
+ val = talloc(mem_ctx, struct ctdb_script_list);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->num_scripts,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_padding_pull(buf+offset, buflen-offset, 4, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ if (val->num_scripts == 0) {
+ val->script = NULL;
+ goto done;
+ }
+
+ val->script = talloc_array(val, struct ctdb_script, val->num_scripts);
+ if (val->script == NULL) {
+ ret = ENOMEM;
+ goto fail;
+ }
+
+ for (i=0; i<val->num_scripts; i++) {
+ ret = ctdb_script_pull_elems(buf+offset, buflen-offset,
+ val, &val->script[i], &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+ }
+
+done:
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+size_t ctdb_ban_state_len(struct ctdb_ban_state *in)
+{
+ return ctdb_uint32_len(&in->pnn) +
+ ctdb_uint32_len(&in->time);
+}
+
+void ctdb_ban_state_push(struct ctdb_ban_state *in, uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_uint32_push(&in->pnn, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->time, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+int ctdb_ban_state_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_ban_state **out, size_t *npull)
+{
+ struct ctdb_ban_state *val;
+ size_t offset = 0, np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_ban_state);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->pnn, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->time, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+size_t ctdb_notify_data_len(struct ctdb_notify_data *in)
+{
+ return ctdb_uint64_len(&in->srvid) +
+ ctdb_tdb_datan_len(&in->data);
+}
+
+void ctdb_notify_data_push(struct ctdb_notify_data *in, uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_uint64_push(&in->srvid, buf+offset, &np);
+ offset += np;
+
+ ctdb_tdb_datan_push(&in->data, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+int ctdb_notify_data_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_notify_data **out, size_t *npull)
+{
+ struct ctdb_notify_data *val;
+ size_t offset = 0, np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_notify_data);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_uint64_pull(buf+offset, buflen-offset, &val->srvid, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_tdb_datan_pull(buf+offset, buflen-offset, val, &val->data,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+size_t ctdb_iface_len(struct ctdb_iface *in)
+{
+ return ctdb_chararray_len(in->name, CTDB_IFACE_SIZE+2) +
+ ctdb_uint16_len(&in->link_state) +
+ ctdb_uint32_len(&in->references);
+}
+
+void ctdb_iface_push(struct ctdb_iface *in, uint8_t *buf, size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_chararray_push(in->name, CTDB_IFACE_SIZE+2, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint16_push(&in->link_state, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->references, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+static int ctdb_iface_pull_elems(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_iface *out, size_t *npull)
+{
+ size_t offset = 0, np;
+ int ret;
+
+ ret = ctdb_chararray_pull(buf+offset, buflen-offset,
+ out->name, CTDB_IFACE_SIZE+2, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint16_pull(buf+offset, buflen-offset, &out->link_state,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &out->references,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ *npull = offset;
+ return 0;
+}
+
+int ctdb_iface_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_iface **out, size_t *npull)
+{
+ struct ctdb_iface *val;
+ size_t np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_iface);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_iface_pull_elems(buf, buflen, val, val, &np);
+ if (ret != 0) {
+ talloc_free(val);
+ return ret;
+ }
+
+ *out = val;
+ *npull = np;
+ return ret;
+}
+
+size_t ctdb_iface_list_len(struct ctdb_iface_list *in)
+{
+ size_t len;
+
+ len = ctdb_uint32_len(&in->num);
+ if (in->num > 0) {
+ len += in->num * ctdb_iface_len(&in->iface[0]);
+ }
+
+ return len;
+}
+
+void ctdb_iface_list_push(struct ctdb_iface_list *in, uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+ uint32_t i;
+
+ ctdb_uint32_push(&in->num, buf+offset, &np);
+ offset += np;
+
+ for (i=0; i<in->num; i++) {
+ ctdb_iface_push(&in->iface[i], buf+offset, &np);
+ offset += np;
+ }
+
+ *npush = offset;
+}
+
+int ctdb_iface_list_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_iface_list **out, size_t *npull)
+{
+ struct ctdb_iface_list *val;
+ size_t offset = 0, np;
+ uint32_t i;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_iface_list);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->num, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ if (val->num == 0) {
+ val->iface = NULL;
+ goto done;
+ }
+
+ val->iface = talloc_array(val, struct ctdb_iface, val->num);
+ if (val->iface == NULL) {
+ ret = ENOMEM;
+ goto fail;
+ }
+
+ for (i=0; i<val->num; i++) {
+ ret = ctdb_iface_pull_elems(buf+offset, buflen-offset,
+ val, &val->iface[i], &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+ }
+
+done:
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+size_t ctdb_public_ip_info_len(struct ctdb_public_ip_info *in)
+{
+ return ctdb_public_ip_len(&in->ip) +
+ ctdb_uint32_len(&in->active_idx) +
+ ctdb_iface_list_len(in->ifaces);
+}
+
+void ctdb_public_ip_info_push(struct ctdb_public_ip_info *in, uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_public_ip_push(&in->ip, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->active_idx, buf+offset, &np);
+ offset += np;
+
+ ctdb_iface_list_push(in->ifaces, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+int ctdb_public_ip_info_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_public_ip_info **out, size_t *npull)
+{
+ struct ctdb_public_ip_info *val;
+ size_t offset = 0, np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_public_ip_info);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_public_ip_pull_elems(buf+offset, buflen-offset, val,
+ &val->ip, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->active_idx,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_iface_list_pull(buf+offset, buflen-offset, val,
+ &val->ifaces, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+size_t ctdb_key_data_len(struct ctdb_key_data *in)
+{
+ return ctdb_uint32_len(&in->db_id) +
+ ctdb_padding_len(4) +
+ ctdb_ltdb_header_len(&in->header) +
+ ctdb_tdb_datan_len(&in->key);
+}
+
+void ctdb_key_data_push(struct ctdb_key_data *in, uint8_t *buf, size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_uint32_push(&in->db_id, buf+offset, &np);
+ offset += np;
+
+ ctdb_padding_push(4, buf+offset, &np);
+ offset += np;
+
+ ctdb_ltdb_header_push(&in->header, buf+offset, &np);
+ offset += np;
+
+ ctdb_tdb_datan_push(&in->key, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+int ctdb_key_data_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_key_data **out, size_t *npull)
+{
+ struct ctdb_key_data *val;
+ size_t offset = 0, np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_key_data);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->db_id, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_padding_pull(buf+offset, buflen-offset, 4, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_ltdb_header_pull(buf+offset, buflen-offset, &val->header,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_tdb_datan_pull(buf+offset, buflen-offset, val, &val->key,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+/* In the tdb_data structure marshalling, we are only interested in dsize.
+ * The dptr value is ignored. The actual tdb_data blob is stored separately.
+ *
+ * This is only required for ctdb_db_statistics and will be dropped in future.
+ */
+
+static size_t tdb_data_struct_len(TDB_DATA *data)
+{
+ return sizeof(void *) + sizeof(size_t);
+}
+
+static void tdb_data_struct_push(TDB_DATA *data, uint8_t *buf, size_t *npush)
+{
+ size_t offset = 0;
+
+ memcpy(buf+offset, &data->dptr, sizeof(void *));
+ offset += sizeof(void *);
+
+ memcpy(buf+offset, &data->dsize, sizeof(size_t));
+ offset += sizeof(size_t);
+
+ *npush = offset;
+}
+
+static int tdb_data_struct_pull(uint8_t *buf, size_t buflen, TDB_DATA *data,
+ size_t *npull)
+{
+ size_t offset = 0;
+ void *ptr;
+
+ if (buflen-offset < sizeof(void *)) {
+ return EMSGSIZE;
+ }
+
+ memcpy(&ptr, buf+offset, sizeof(void *));
+ offset += sizeof(void *);
+ data->dptr = NULL;
+
+ if (buflen-offset < sizeof(size_t)) {
+ return EMSGSIZE;
+ }
+
+ memcpy(&data->dsize, buf+offset, sizeof(size_t));
+ offset += sizeof(size_t);
+
+ *npull = offset;
+ return 0;
+}
+
+size_t ctdb_db_statistics_len(struct ctdb_db_statistics *in)
+{
+ TDB_DATA data = { 0 };
+ size_t len;
+ uint32_t u32 = 0;
+ int i;
+
+ len = ctdb_uint32_len(&in->locks.num_calls) +
+ ctdb_uint32_len(&in->locks.num_current) +
+ ctdb_uint32_len(&in->locks.num_pending) +
+ ctdb_uint32_len(&in->locks.num_failed) +
+ ctdb_latency_counter_len(&in->locks.latency) +
+ MAX_COUNT_BUCKETS *
+ ctdb_uint32_len(&in->locks.buckets[0]) +
+ ctdb_latency_counter_len(&in->vacuum.latency) +
+ ctdb_uint32_len(&in->db_ro_delegations) +
+ ctdb_uint32_len(&in->db_ro_revokes) +
+ MAX_COUNT_BUCKETS *
+ ctdb_uint32_len(&in->hop_count_bucket[0]) +
+ ctdb_uint32_len(&in->num_hot_keys) +
+ ctdb_padding_len(4) +
+ MAX_HOT_KEYS *
+ (ctdb_uint32_len(&u32) + ctdb_padding_len(4) +
+ tdb_data_struct_len(&data));
+
+ for (i=0; i<MAX_HOT_KEYS; i++) {
+ len += ctdb_tdb_data_len(&in->hot_keys[i].key);
+ }
+
+ return len;
+}
+
+void ctdb_db_statistics_push(struct ctdb_db_statistics *in, uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+ uint32_t num_hot_keys;
+ int i;
+
+ ctdb_uint32_push(&in->locks.num_calls, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->locks.num_current, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->locks.num_pending, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->locks.num_failed, buf+offset, &np);
+ offset += np;
+
+ ctdb_latency_counter_push(&in->locks.latency, buf+offset, &np);
+ offset += np;
+
+ for (i=0; i<MAX_COUNT_BUCKETS; i++) {
+ ctdb_uint32_push(&in->locks.buckets[i], buf+offset, &np);
+ offset += np;
+ }
+
+ ctdb_latency_counter_push(&in->vacuum.latency, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->db_ro_delegations, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->db_ro_revokes, buf+offset, &np);
+ offset += np;
+
+ for (i=0; i<MAX_COUNT_BUCKETS; i++) {
+ ctdb_uint32_push(&in->hop_count_bucket[i], buf+offset, &np);
+ offset += np;
+ }
+
+ num_hot_keys = MAX_HOT_KEYS;
+ ctdb_uint32_push(&num_hot_keys, buf+offset, &np);
+ offset += np;
+
+ ctdb_padding_push(4, buf+offset, &np);
+ offset += np;
+
+ for (i=0; i<MAX_HOT_KEYS; i++) {
+ ctdb_uint32_push(&in->hot_keys[i].count, buf+offset, &np);
+ offset += np;
+
+ ctdb_padding_push(4, buf+offset, &np);
+ offset += np;
+
+ tdb_data_struct_push(&in->hot_keys[i].key, buf+offset, &np);
+ offset += np;
+ }
+
+ for (i=0; i<MAX_HOT_KEYS; i++) {
+ ctdb_tdb_data_push(&in->hot_keys[i].key, buf+offset, &np);
+ offset += np;
+ }
+
+ *npush = offset;
+}
+
+static int ctdb_db_statistics_pull_elems(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_db_statistics *out,
+ size_t *npull)
+{
+ size_t offset = 0, np;
+ int ret, i;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->locks.num_calls, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->locks.num_current, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->locks.num_pending, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->locks.num_failed, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_latency_counter_pull(buf+offset, buflen-offset,
+ &out->locks.latency, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ for (i=0; i<MAX_COUNT_BUCKETS; i++) {
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->locks.buckets[i], &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+ }
+
+ ret = ctdb_latency_counter_pull(buf+offset, buflen-offset,
+ &out->vacuum.latency, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->db_ro_delegations, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->db_ro_revokes, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ for (i=0; i<MAX_COUNT_BUCKETS; i++) {
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->hop_count_bucket[i], &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->num_hot_keys, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_padding_pull(buf+offset, buflen-offset, 4, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ for (i=0; i<MAX_HOT_KEYS; i++) {
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset,
+ &out->hot_keys[i].count, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_padding_pull(buf+offset, buflen-offset, 4, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = tdb_data_struct_pull(buf+offset, buflen-offset,
+ &out->hot_keys[i].key, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+ }
+
+ for (i=0; i<MAX_HOT_KEYS; i++) {
+ ret = ctdb_tdb_data_pull(buf+offset,
+ out->hot_keys[i].key.dsize,
+ out, &out->hot_keys[i].key, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+ }
+
+ *npull = offset;
+ return 0;
+}
+
+int ctdb_db_statistics_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_db_statistics **out, size_t *npull)
+{
+ struct ctdb_db_statistics *val;
+ size_t np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_db_statistics);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_db_statistics_pull_elems(buf, buflen, val, val, &np);
+ if (ret != 0) {
+ talloc_free(val);
+ return ret;
+ }
+
+ *out = val;
+ *npull = np;
+ return 0;
+}
+
+size_t ctdb_pid_srvid_len(struct ctdb_pid_srvid *in)
+{
+ return ctdb_pid_len(&in->pid) +
+ ctdb_uint64_len(&in->srvid);
+}
+
+void ctdb_pid_srvid_push(struct ctdb_pid_srvid *in, uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_pid_push(&in->pid, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint64_push(&in->srvid, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+int ctdb_pid_srvid_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_pid_srvid **out, size_t *npull)
+{
+ struct ctdb_pid_srvid *val;
+ size_t offset = 0, np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_pid_srvid);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_pid_pull(buf+offset, buflen-offset, &val->pid, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint64_pull(buf+offset, buflen-offset, &val->srvid, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+size_t ctdb_election_message_len(struct ctdb_election_message *in)
+{
+ return ctdb_uint32_len(&in->num_connected) +
+ ctdb_padding_len(4) +
+ ctdb_timeval_len(&in->priority_time) +
+ ctdb_uint32_len(&in->pnn) +
+ ctdb_uint32_len(&in->node_flags);
+}
+
+void ctdb_election_message_push(struct ctdb_election_message *in,
+ uint8_t *buf, size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_uint32_push(&in->num_connected, buf+offset, &np);
+ offset += np;
+
+ ctdb_padding_push(4, buf+offset, &np);
+ offset += np;
+
+ ctdb_timeval_push(&in->priority_time, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->pnn, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->node_flags, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+int ctdb_election_message_pull(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_election_message **out,
+ size_t *npull)
+{
+ struct ctdb_election_message *val;
+ size_t offset = 0, np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_election_message);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->num_connected,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_padding_pull(buf+offset, buflen-offset, 4, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_timeval_pull(buf+offset, buflen-offset,
+ &val->priority_time, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->pnn, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->node_flags,
+ &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+size_t ctdb_srvid_message_len(struct ctdb_srvid_message *in)
+{
+ return ctdb_uint32_len(&in->pnn) +
+ ctdb_padding_len(4) +
+ ctdb_uint64_len(&in->srvid);
+}
+
+void ctdb_srvid_message_push(struct ctdb_srvid_message *in, uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_uint32_push(&in->pnn, buf+offset, &np);
+ offset += np;
+
+ ctdb_padding_push(4, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint64_push(&in->srvid, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+int ctdb_srvid_message_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_srvid_message **out, size_t *npull)
+{
+ struct ctdb_srvid_message *val;
+ size_t offset = 0, np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_srvid_message);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->pnn, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_padding_pull(buf+offset, buflen-offset, 4, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint64_pull(buf+offset, buflen-offset, &val->srvid, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+size_t ctdb_disable_message_len(struct ctdb_disable_message *in)
+{
+ return ctdb_uint32_len(&in->pnn) +
+ ctdb_padding_len(4) +
+ ctdb_uint64_len(&in->srvid) +
+ ctdb_uint32_len(&in->timeout) +
+ ctdb_padding_len(4);
+}
+
+void ctdb_disable_message_push(struct ctdb_disable_message *in, uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_uint32_push(&in->pnn, buf+offset, &np);
+ offset += np;
+
+ ctdb_padding_push(4, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint64_push(&in->srvid, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->timeout, buf+offset, &np);
+ offset += np;
+
+ ctdb_padding_push(4, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+int ctdb_disable_message_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_disable_message **out,
+ size_t *npull)
+{
+ struct ctdb_disable_message *val;
+ size_t offset = 0, np;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_disable_message);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->pnn, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_padding_pull(buf+offset, buflen-offset, 4, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint64_pull(buf+offset, buflen-offset, &val->srvid, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &val->timeout, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ ret = ctdb_padding_pull(buf+offset, buflen-offset, 4, &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
+
+size_t ctdb_server_id_len(struct ctdb_server_id *in)
+{
+ return ctdb_uint64_len(&in->pid) +
+ ctdb_uint32_len(&in->task_id) +
+ ctdb_uint32_len(&in->vnn) +
+ ctdb_uint64_len(&in->unique_id);
+}
+
+void ctdb_server_id_push(struct ctdb_server_id *in, uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_uint64_push(&in->pid, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->task_id, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->vnn, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint64_push(&in->unique_id, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+int ctdb_server_id_pull(uint8_t *buf, size_t buflen,
+ struct ctdb_server_id *out, size_t *npull)
+{
+ size_t offset = 0, np;
+ int ret;
+
+ ret = ctdb_uint64_pull(buf+offset, buflen-offset, &out->pid, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &out->task_id, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &out->vnn, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint64_pull(buf+offset, buflen-offset, &out->unique_id,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ *npull = offset;
+ return 0;
+}
+
+size_t ctdb_g_lock_len(struct ctdb_g_lock *in)
+{
+ return ctdb_uint32_len(&in->type) +
+ ctdb_padding_len(4) +
+ ctdb_server_id_len(&in->sid);
+}
+
+void ctdb_g_lock_push(struct ctdb_g_lock *in, uint8_t *buf, size_t *npush)
+{
+ size_t offset = 0, np;
+ uint32_t type;
+
+ type = in->type;
+ ctdb_uint32_push(&type, buf+offset, &np);
+ offset += np;
+
+ ctdb_padding_push(4, buf+offset, &np);
+ offset += np;
+
+ ctdb_server_id_push(&in->sid, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+int ctdb_g_lock_pull(uint8_t *buf, size_t buflen, struct ctdb_g_lock *out,
+ size_t *npull)
+{
+ size_t offset = 0, np;
+ int ret;
+ uint32_t type;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &type, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ if (type == 0) {
+ out->type = CTDB_G_LOCK_READ;
+ } else if (type == 1) {
+ out->type = CTDB_G_LOCK_WRITE;
+ } else {
+ return EPROTO;
+ }
+
+ ret = ctdb_padding_pull(buf+offset, buflen-offset, 4, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_server_id_pull(buf+offset, buflen-offset, &out->sid, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ *npull = offset;
+ return 0;
+}
+
+size_t ctdb_g_lock_list_len(struct ctdb_g_lock_list *in)
+{
+ size_t len = 0;
+
+ if (in->num > 0) {
+ len += in->num * ctdb_g_lock_len(&in->lock[0]);
+ }
+
+ return len;
+}
+
+void ctdb_g_lock_list_push(struct ctdb_g_lock_list *in, uint8_t *buf,
+ size_t *npush)
+{
+ size_t offset = 0, np;
+ uint32_t i;
+
+ for (i=0; i<in->num; i++) {
+ ctdb_g_lock_push(&in->lock[i], buf+offset, &np);
+ offset += np;
+ }
+
+ *npush = offset;
+}
+
+int ctdb_g_lock_list_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
+ struct ctdb_g_lock_list **out, size_t *npull)
+{
+ struct ctdb_g_lock_list *val;
+ struct ctdb_g_lock lock = { 0 };
+ size_t offset = 0, np;
+ uint32_t i;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_g_lock_list);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ if (buflen == 0) {
+ val->lock = NULL;
+ val->num = 0;
+ goto done;
+ }
+
+ val->num = buflen / ctdb_g_lock_len(&lock);
+
+ val->lock = talloc_array(val, struct ctdb_g_lock, val->num);
+ if (val->lock == NULL) {
+ ret = ENOMEM;
+ goto fail;
+ }
+
+ for (i=0; i<val->num; i++) {
+ ret = ctdb_g_lock_pull(buf+offset, buflen-offset,
+ &val->lock[i], &np);
+ if (ret != 0) {
+ goto fail;
+ }
+ offset += np;
+ }
+
+done:
+ *out = val;
+ *npull = offset;
+ return 0;
+
+fail:
+ talloc_free(val);
+ return ret;
+}
diff --git a/ctdb/protocol/protocol_util.c b/ctdb/protocol/protocol_util.c
new file mode 100644
index 0000000..87ecc87
--- /dev/null
+++ b/ctdb/protocol/protocol_util.c
@@ -0,0 +1,773 @@
+/*
+ CTDB protocol marshalling
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <talloc.h>
+
+#include "common/line.h"
+
+#include "protocol.h"
+#include "protocol_util.h"
+#include "lib/util/util.h"
+#include "lib/util/smb_strtox.h"
+
+static struct {
+ enum ctdb_runstate runstate;
+ const char * label;
+} runstate_map[] = {
+ { CTDB_RUNSTATE_UNKNOWN, "UNKNOWN" },
+ { CTDB_RUNSTATE_INIT, "INIT" },
+ { CTDB_RUNSTATE_SETUP, "SETUP" },
+ { CTDB_RUNSTATE_FIRST_RECOVERY, "FIRST_RECOVERY" },
+ { CTDB_RUNSTATE_STARTUP, "STARTUP" },
+ { CTDB_RUNSTATE_RUNNING, "RUNNING" },
+ { CTDB_RUNSTATE_SHUTDOWN, "SHUTDOWN" },
+ { -1, NULL },
+};
+
+const char *ctdb_runstate_to_string(enum ctdb_runstate runstate)
+{
+ int i;
+
+ for (i=0; runstate_map[i].label != NULL; i++) {
+ if (runstate_map[i].runstate == runstate) {
+ return runstate_map[i].label;
+ }
+ }
+
+ return runstate_map[0].label;
+}
+
+enum ctdb_runstate ctdb_runstate_from_string(const char *runstate_str)
+{
+ int i;
+
+ for (i=0; runstate_map[i].label != NULL; i++) {
+ if (strcasecmp(runstate_map[i].label,
+ runstate_str) == 0) {
+ return runstate_map[i].runstate;
+ }
+ }
+
+ return CTDB_RUNSTATE_UNKNOWN;
+}
+
+static struct {
+ enum ctdb_event event;
+ const char *label;
+} event_map[] = {
+ { CTDB_EVENT_INIT, "init" },
+ { CTDB_EVENT_SETUP, "setup" },
+ { CTDB_EVENT_STARTUP, "startup" },
+ { CTDB_EVENT_START_RECOVERY, "startrecovery" },
+ { CTDB_EVENT_RECOVERED, "recovered" },
+ { CTDB_EVENT_TAKE_IP, "takeip" },
+ { CTDB_EVENT_RELEASE_IP, "releaseip" },
+ { CTDB_EVENT_MONITOR, "monitor" },
+ { CTDB_EVENT_SHUTDOWN, "shutdown" },
+ { CTDB_EVENT_UPDATE_IP, "updateip" },
+ { CTDB_EVENT_IPREALLOCATED, "ipreallocated" },
+ { CTDB_EVENT_MAX, "all" },
+ { -1, NULL },
+};
+
+const char *ctdb_event_to_string(enum ctdb_event event)
+{
+ int i;
+
+ for (i=0; event_map[i].label != NULL; i++) {
+ if (event_map[i].event == event) {
+ return event_map[i].label;
+ }
+ }
+
+ return "unknown";
+}
+
+enum ctdb_event ctdb_event_from_string(const char *event_str)
+{
+ int i;
+
+ for (i=0; event_map[i].label != NULL; i++) {
+ if (strcmp(event_map[i].label, event_str) == 0) {
+ return event_map[i].event;
+ }
+ }
+
+ return CTDB_EVENT_MAX;
+}
+
+int ctdb_sock_addr_to_buf(char *buf, socklen_t buflen,
+ ctdb_sock_addr *addr, bool with_port)
+{
+ const char *t;
+ size_t len = 0;
+
+ switch (addr->sa.sa_family) {
+ case AF_INET:
+ t = inet_ntop(addr->ip.sin_family, &addr->ip.sin_addr,
+ buf, buflen);
+ if (t == NULL) {
+ return errno;
+ }
+ if (with_port) {
+ len = strlen(buf);
+ }
+ break;
+
+ case AF_INET6: {
+ char tmp[INET6_ADDRSTRLEN];
+
+ t = inet_ntop(addr->ip6.sin6_family,
+ &addr->ip6.sin6_addr,
+ tmp,
+ sizeof(tmp));
+ if (t == NULL) {
+ return errno;
+ }
+
+ if (with_port) {
+ int ret = snprintf(buf, buflen, "[%s]", tmp);
+ if (ret < 0) {
+ return ENOSPC;
+ }
+ len = (size_t)ret;
+ } else {
+ len = strlcpy(buf, tmp, buflen);
+ }
+ if (len >= buflen){
+ return ENOSPC;
+ }
+ break;
+ }
+
+ default:
+ return EAFNOSUPPORT;
+ break;
+ }
+
+ if (with_port) {
+ int ret;
+
+ ret = snprintf(buf+len, buflen-len,
+ ":%u", ctdb_sock_addr_port(addr));
+ if (ret < 0 || (size_t)ret >= buflen-len) {
+ return ENOSPC;
+ }
+ }
+
+ return 0;
+}
+
+char *ctdb_sock_addr_to_string(TALLOC_CTX *mem_ctx,
+ ctdb_sock_addr *addr,
+ bool with_port)
+{
+ size_t len = 64;
+ char *cip;
+ int ret;
+
+ cip = talloc_size(mem_ctx, len);
+
+ if (cip == NULL) {
+ return NULL;
+ }
+
+ ret = ctdb_sock_addr_to_buf(cip, len, addr, with_port);
+ if (ret != 0) {
+ talloc_free(cip);
+ return NULL;
+ }
+
+ return cip;
+}
+
+static int ipv4_from_string(const char *str, struct sockaddr_in *ip)
+{
+ int ret;
+
+ *ip = (struct sockaddr_in) {
+ .sin_family = AF_INET,
+ };
+
+ ret = inet_pton(AF_INET, str, &ip->sin_addr);
+ if (ret != 1) {
+ return EINVAL;
+ }
+
+#ifdef HAVE_SOCK_SIN_LEN
+ ip->sin_len = sizeof(*ip);
+#endif
+ return 0;
+}
+
+static int ipv6_from_string(const char *str, struct sockaddr_in6 *ip6)
+{
+ int ret;
+
+ *ip6 = (struct sockaddr_in6) {
+ .sin6_family = AF_INET6,
+ };
+
+ ret = inet_pton(AF_INET6, str, &ip6->sin6_addr);
+ if (ret != 1) {
+ return EINVAL;
+ }
+
+#ifdef HAVE_SOCK_SIN6_LEN
+ ip6->sin6_len = sizeof(*ip6);
+#endif
+ return 0;
+}
+
+static int ip_from_string(const char *str, ctdb_sock_addr *addr)
+{
+ char *p;
+ int ret;
+
+ if (addr == NULL) {
+ return EINVAL;
+ }
+
+ ZERO_STRUCTP(addr); /* valgrind :-) */
+
+ /* IPv4 or IPv6 address?
+ *
+ * Use strrchr() because we need the right-most ':' below for
+ * IPv4-mapped IPv6 addresses anyway...
+ */
+ p = strrchr(str, ':');
+ if (p == NULL) {
+ ret = ipv4_from_string(str, &addr->ip);
+ } else {
+ static const uint8_t ipv4_mapped_prefix[12] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff
+ };
+ size_t len = strlen(str);
+ char s[64];
+
+ len = strlcpy(s, str, sizeof(s));
+ if (len >= sizeof(s)) {
+ return EINVAL;
+ }
+
+ if ((len >= 2) && (s[0] == '[') && (s[len-1] == ']')) {
+ s[len-1] = '\0';
+ str = s+1;
+ p = strrchr(str, ':');
+ }
+
+ ret = ipv6_from_string(str, &addr->ip6);
+ if (ret != 0) {
+ return ret;
+ }
+
+ /*
+ * Check for IPv4-mapped IPv6 address
+ * (e.g. ::ffff:192.0.2.128) - reparse as IPv4 if
+ * necessary
+ */
+ if (memcmp(&addr->ip6.sin6_addr.s6_addr[0],
+ ipv4_mapped_prefix,
+ sizeof(ipv4_mapped_prefix)) == 0) {
+ /* Initialize addr struct to zero before reparsing as IPV4 */
+ ZERO_STRUCTP(addr);
+
+ /* Reparse as IPv4 */
+ ret = ipv4_from_string(p+1, &addr->ip);
+ }
+ }
+
+ return ret;
+}
+
+int ctdb_sock_addr_from_string(const char *str,
+ ctdb_sock_addr *addr, bool with_port)
+{
+ char *p;
+ char s[64]; /* Much longer than INET6_ADDRSTRLEN */
+ unsigned port;
+ size_t len;
+ int ret;
+
+ if (! with_port) {
+ ret = ip_from_string(str, addr);
+ return ret;
+ }
+
+ /* Parse out port number and then IP address */
+
+ len = strlcpy(s, str, sizeof(s));
+ if (len >= sizeof(s)) {
+ return EINVAL;
+ }
+
+ p = strrchr(s, ':');
+ if (p == NULL) {
+ return EINVAL;
+ }
+
+ port = smb_strtoul(p+1, NULL, 10, &ret, SMB_STR_FULL_STR_CONV);
+ if (ret != 0) {
+ /* Empty string or trailing garbage */
+ return EINVAL;
+ }
+
+ *p = '\0';
+ ret = ip_from_string(s, addr);
+
+ ctdb_sock_addr_set_port(addr, port);
+
+ return ret;
+}
+
+int ctdb_sock_addr_mask_from_string(const char *str,
+ ctdb_sock_addr *addr,
+ unsigned int *mask)
+{
+ char *p;
+ char s[64]; /* Much longer than INET6_ADDRSTRLEN */
+ unsigned int m;
+ size_t len;
+ int ret = 0;
+
+ if (addr == NULL || mask == NULL) {
+ return EINVAL;
+ }
+
+ len = strlcpy(s, str, sizeof(s));
+ if (len >= sizeof(s)) {
+ return EINVAL;
+ }
+
+ p = strrchr(s, '/');
+ if (p == NULL) {
+ return EINVAL;
+ }
+
+ m = smb_strtoul(p+1, NULL, 10, &ret, SMB_STR_FULL_STR_CONV);
+ if (ret != 0) {
+ /* Empty string or trailing garbage */
+ return EINVAL;
+ }
+
+ *p = '\0';
+ ret = ip_from_string(s, addr);
+
+ if (ret == 0) {
+ *mask = m;
+ }
+
+ return ret;
+}
+
+unsigned int ctdb_sock_addr_port(ctdb_sock_addr *addr)
+{
+ switch (addr->sa.sa_family) {
+ case AF_INET:
+ return ntohs(addr->ip.sin_port);
+ break;
+ case AF_INET6:
+ return ntohs(addr->ip6.sin6_port);
+ break;
+ default:
+ return 0;
+ }
+}
+
+void ctdb_sock_addr_set_port(ctdb_sock_addr *addr, unsigned int port)
+{
+ switch (addr->sa.sa_family) {
+ case AF_INET:
+ addr->ip.sin_port = htons(port);
+ break;
+ case AF_INET6:
+ addr->ip6.sin6_port = htons(port);
+ break;
+ default:
+ break;
+ }
+}
+
+static int ctdb_sock_addr_cmp_family(const ctdb_sock_addr *addr1,
+ const ctdb_sock_addr *addr2)
+{
+ /* This is somewhat arbitrary. However, when used for sorting
+ * it just needs to be consistent.
+ */
+ if (addr1->sa.sa_family < addr2->sa.sa_family) {
+ return -1;
+ }
+ if (addr1->sa.sa_family > addr2->sa.sa_family) {
+ return 1;
+ }
+
+ return 0;
+}
+
+int ctdb_sock_addr_cmp_ip(const ctdb_sock_addr *addr1,
+ const ctdb_sock_addr *addr2)
+{
+ int ret;
+
+ ret = ctdb_sock_addr_cmp_family(addr1, addr2);
+ if (ret != 0) {
+ return ret;
+ }
+
+ switch (addr1->sa.sa_family) {
+ case AF_INET:
+ ret = memcmp(&addr1->ip.sin_addr.s_addr,
+ &addr2->ip.sin_addr.s_addr, 4);
+ break;
+
+ case AF_INET6:
+ ret = memcmp(addr1->ip6.sin6_addr.s6_addr,
+ addr2->ip6.sin6_addr.s6_addr, 16);
+ break;
+
+ default:
+ ret = -1;
+ }
+
+ return ret;
+}
+
+int ctdb_sock_addr_cmp(const ctdb_sock_addr *addr1,
+ const ctdb_sock_addr *addr2)
+{
+ int ret = 0;
+
+ ret = ctdb_sock_addr_cmp_ip(addr1, addr2);
+ if (ret != 0) {
+ return ret;
+ }
+
+ switch (addr1->sa.sa_family) {
+ case AF_INET:
+ if (addr1->ip.sin_port < addr2->ip.sin_port) {
+ ret = -1;
+ } else if (addr1->ip.sin_port > addr2->ip.sin_port) {
+ ret = 1;
+ }
+ break;
+
+ case AF_INET6:
+ if (addr1->ip6.sin6_port < addr2->ip6.sin6_port) {
+ ret = -1;
+ } else if (addr1->ip6.sin6_port > addr2->ip6.sin6_port) {
+ ret = 1;
+ }
+ break;
+
+ default:
+ ret = -1;
+ }
+
+ return ret;
+}
+
+bool ctdb_sock_addr_same_ip(const ctdb_sock_addr *addr1,
+ const ctdb_sock_addr *addr2)
+{
+ return (ctdb_sock_addr_cmp_ip(addr1, addr2) == 0);
+}
+
+bool ctdb_sock_addr_same(const ctdb_sock_addr *addr1,
+ const ctdb_sock_addr *addr2)
+{
+ return (ctdb_sock_addr_cmp(addr1, addr2) == 0);
+}
+
+bool ctdb_connection_same(const struct ctdb_connection *conn1,
+ const struct ctdb_connection *conn2)
+{
+ bool same;
+
+ same = ctdb_sock_addr_same(&conn1->src, &conn2->src);
+ if (!same) {
+ return false;
+ }
+
+ same = ctdb_sock_addr_same(&conn1->dst, &conn2->dst);
+ if (!same) {
+ return false;
+ }
+
+ return true;
+}
+
+int ctdb_connection_to_buf(char *buf,
+ size_t buflen,
+ struct ctdb_connection *conn,
+ bool client_first,
+ const char *sep)
+{
+ char server[64], client[64];
+ int ret;
+
+ ret = ctdb_sock_addr_to_buf(server, sizeof(server),
+ &conn->server, true);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ret = ctdb_sock_addr_to_buf(client, sizeof(client),
+ &conn->client, true);
+ if (ret != 0) {
+ return ret;
+ }
+
+ if (! client_first) {
+ ret = snprintf(buf, buflen, "%s%s%s", server, sep, client);
+ } else {
+ ret = snprintf(buf, buflen, "%s%s%s", client, sep, server);
+ }
+ if (ret < 0 || (size_t)ret >= buflen) {
+ return ENOSPC;
+ }
+
+ return 0;
+}
+
+char *ctdb_connection_to_string(TALLOC_CTX *mem_ctx,
+ struct ctdb_connection *conn,
+ bool client_first)
+{
+ const size_t len = 128;
+ char *out;
+ int ret;
+
+ out = talloc_size(mem_ctx, len);
+ if (out == NULL) {
+ return NULL;
+ }
+
+ ret = ctdb_connection_to_buf(out, len, conn, client_first, " ");
+ if (ret != 0) {
+ talloc_free(out);
+ return NULL;
+ }
+
+ return out;
+}
+
+int ctdb_connection_from_string(const char *str, bool client_first,
+ struct ctdb_connection *conn)
+{
+ char s[128];
+ char *t1 = NULL, *t2 = NULL;
+ size_t len;
+ ctdb_sock_addr *first = (client_first ? &conn->client : &conn->server);
+ ctdb_sock_addr *second = (client_first ? &conn->server : &conn->client);
+ int ret;
+
+ len = strlcpy(s, str, sizeof(s));
+ if (len >= sizeof(s)) {
+ return EINVAL;
+ }
+
+ t1 = strtok(s, " \t\n");
+ if (t1 == NULL) {
+ return EINVAL;
+ }
+
+ t2 = strtok(NULL, " \t\n\0");
+ if (t2 == NULL) {
+ return EINVAL;
+ }
+
+ ret = ctdb_sock_addr_from_string(t1, first, true);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ret = ctdb_sock_addr_from_string(t2, second, true);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ret = ctdb_sock_addr_cmp_family(first, second);
+ if (ret != 0) {
+ return EINVAL;
+ }
+
+ return 0;
+}
+
+int ctdb_connection_list_add(struct ctdb_connection_list *conn_list,
+ struct ctdb_connection *conn)
+{
+ uint32_t len;
+
+ if (conn_list == NULL) {
+ return EINVAL;
+ }
+
+ /* Ensure array is big enough */
+ len = talloc_array_length(conn_list->conn);
+ if (conn_list->num == len) {
+ conn_list->conn = talloc_realloc(conn_list, conn_list->conn,
+ struct ctdb_connection,
+ len+128);
+ if (conn_list->conn == NULL) {
+ return ENOMEM;
+ }
+ }
+
+ conn_list->conn[conn_list->num] = *conn;
+ conn_list->num++;
+
+ return 0;
+}
+
+static int connection_cmp(const void *a, const void *b)
+{
+ const struct ctdb_connection *conn_a = a;
+ const struct ctdb_connection *conn_b = b;
+ int ret;
+
+ ret = ctdb_sock_addr_cmp(&conn_a->server, &conn_b->server);
+ if (ret == 0) {
+ ret = ctdb_sock_addr_cmp(&conn_a->client, &conn_b->client);
+ }
+
+ return ret;
+}
+
+int ctdb_connection_list_sort(struct ctdb_connection_list *conn_list)
+{
+ if (conn_list == NULL) {
+ return EINVAL;
+ }
+
+ if (conn_list->num > 0) {
+ qsort(conn_list->conn, conn_list->num,
+ sizeof(struct ctdb_connection), connection_cmp);
+ }
+
+ return 0;
+}
+
+char *ctdb_connection_list_to_string(
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_connection_list *conn_list, bool client_first)
+{
+ uint32_t i;
+ char *out;
+
+ out = talloc_strdup(mem_ctx, "");
+ if (out == NULL) {
+ return NULL;
+ }
+
+ if (conn_list == NULL || conn_list->num == 0) {
+ return out;
+ }
+
+ for (i = 0; i < conn_list->num; i++) {
+ char buf[128];
+ int ret;
+
+ ret = ctdb_connection_to_buf(buf,
+ sizeof(buf),
+ &conn_list->conn[i],
+ client_first,
+ " ");
+ if (ret != 0) {
+ talloc_free(out);
+ return NULL;
+ }
+
+ out = talloc_asprintf_append(out, "%s\n", buf);
+ if (out == NULL) {
+ return NULL;
+ }
+ }
+
+ return out;
+}
+
+struct ctdb_connection_list_read_state {
+ struct ctdb_connection_list *list;
+ bool client_first;
+};
+
+static int ctdb_connection_list_read_line(char *line, void *private_data)
+{
+ struct ctdb_connection_list_read_state *state =
+ (struct ctdb_connection_list_read_state *)private_data;
+ struct ctdb_connection conn;
+ int ret;
+
+ /* Skip empty lines */
+ if (line[0] == '\0') {
+ return 0;
+ }
+
+ /* Comment */
+ if (line[0] == '#') {
+ return 0;
+ }
+
+ ret = ctdb_connection_from_string(line, state->client_first, &conn);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ret = ctdb_connection_list_add(state->list, &conn);
+ if (ret != 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+int ctdb_connection_list_read(TALLOC_CTX *mem_ctx,
+ int fd,
+ bool client_first,
+ struct ctdb_connection_list **conn_list)
+{
+ struct ctdb_connection_list_read_state state;
+ int ret;
+
+ if (conn_list == NULL) {
+ return EINVAL;
+ }
+
+ state.list = talloc_zero(mem_ctx, struct ctdb_connection_list);
+ if (state.list == NULL) {
+ return ENOMEM;
+ }
+
+ state.client_first = client_first;
+
+ ret = line_read(fd,
+ 128,
+ mem_ctx,
+ ctdb_connection_list_read_line,
+ &state,
+ NULL);
+
+ *conn_list = state.list;
+
+ return ret;
+}
diff --git a/ctdb/protocol/protocol_util.h b/ctdb/protocol/protocol_util.h
new file mode 100644
index 0000000..70f35d1
--- /dev/null
+++ b/ctdb/protocol/protocol_util.h
@@ -0,0 +1,83 @@
+/*
+ CTDB protocol marshalling
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_PROTOCOL_UTIL_H__
+#define __CTDB_PROTOCOL_UTIL_H__
+
+#include <talloc.h>
+
+#include "protocol/protocol.h"
+
+const char *ctdb_runstate_to_string(enum ctdb_runstate runstate);
+enum ctdb_runstate ctdb_runstate_from_string(const char *runstate_str);
+
+const char *ctdb_event_to_string(enum ctdb_event event);
+enum ctdb_event ctdb_event_from_string(const char *event_str);
+
+/*
+ * buflen must be long enough to hold the longest possible "address:port".
+ * For example, 1122:3344:5566:7788:99aa:bbcc:ddee:ff00:12345.
+ * 64 is sane value for buflen.
+ */
+int ctdb_sock_addr_to_buf(char *buf, socklen_t buflen,
+ ctdb_sock_addr *addr, bool with_port);
+char *ctdb_sock_addr_to_string(TALLOC_CTX *mem_ctx,
+ ctdb_sock_addr *addr, bool with_port);
+int ctdb_sock_addr_from_string(const char *str,
+ ctdb_sock_addr *addr, bool with_port);
+int ctdb_sock_addr_mask_from_string(const char *str,
+ ctdb_sock_addr *addr,
+ unsigned int *mask);
+unsigned int ctdb_sock_addr_port(ctdb_sock_addr *addr);
+void ctdb_sock_addr_set_port(ctdb_sock_addr *addr, unsigned int port);
+int ctdb_sock_addr_cmp_ip(const ctdb_sock_addr *addr1,
+ const ctdb_sock_addr *addr2);
+int ctdb_sock_addr_cmp(const ctdb_sock_addr *addr1,
+ const ctdb_sock_addr *addr2);
+bool ctdb_sock_addr_same_ip(const ctdb_sock_addr *addr1,
+ const ctdb_sock_addr *addr2);
+bool ctdb_sock_addr_same(const ctdb_sock_addr *addr1,
+ const ctdb_sock_addr *addr2);
+
+bool ctdb_connection_same(const struct ctdb_connection *conn1,
+ const struct ctdb_connection *conn2);
+
+int ctdb_connection_to_buf(char *buf,
+ size_t buflen,
+ struct ctdb_connection * conn,
+ bool client_first,
+ const char *sep);
+char *ctdb_connection_to_string(TALLOC_CTX *mem_ctx,
+ struct ctdb_connection * conn,
+ bool client_first);
+int ctdb_connection_from_string(const char *str, bool client_first,
+ struct ctdb_connection *conn);
+
+int ctdb_connection_list_add(struct ctdb_connection_list *conn_list,
+ struct ctdb_connection *conn);
+int ctdb_connection_list_sort(struct ctdb_connection_list *conn_list);
+char *ctdb_connection_list_to_string(
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_connection_list *conn_list, bool client_first);
+int ctdb_connection_list_read(TALLOC_CTX *mem_ctx,
+ int fd,
+ bool client_first,
+ struct ctdb_connection_list **conn_list);
+
+#endif /* __CTDB_PROTOCOL_UTIL_H__ */
diff --git a/ctdb/server/ctdb_banning.c b/ctdb/server/ctdb_banning.c
new file mode 100644
index 0000000..3c71157
--- /dev/null
+++ b/ctdb/server/ctdb_banning.c
@@ -0,0 +1,146 @@
+/*
+ ctdb banning code
+
+ Copyright (C) Ronnie Sahlberg 2009
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include "replace.h"
+#include "system/time.h"
+#include "system/network.h"
+#include "system/filesys.h"
+#include "system/wait.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/debug.h"
+#include "lib/util/samba_util.h"
+
+#include "ctdb_private.h"
+#include "ctdb_client.h"
+
+#include "common/common.h"
+#include "common/logging.h"
+
+static void ctdb_ban_node_event(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
+
+ /* Make sure we were able to freeze databases during banning */
+ if (!ctdb_db_all_frozen(ctdb)) {
+ DEBUG(DEBUG_ERR, ("Banning timed out, but not all databases "
+ "frozen yet - banning this node again.\n"));
+ ctdb_ban_self(ctdb);
+ return;
+ }
+
+ DEBUG(DEBUG_ERR,("Banning timed out\n"));
+ ctdb->nodes[ctdb->pnn]->flags &= ~NODE_FLAGS_BANNED;
+
+ if (ctdb->banning_ctx != NULL) {
+ talloc_free(ctdb->banning_ctx);
+ ctdb->banning_ctx = NULL;
+ }
+}
+
+int32_t ctdb_control_set_ban_state(struct ctdb_context *ctdb, TDB_DATA indata)
+{
+ struct ctdb_ban_state *bantime = (struct ctdb_ban_state *)indata.dptr;
+ bool already_banned;
+
+ DEBUG(DEBUG_INFO,("SET BAN STATE\n"));
+
+ if (bantime->pnn != ctdb->pnn) {
+ DEBUG(DEBUG_WARNING,
+ ("SET_BAN_STATE control for PNN %d ignored\n",
+ bantime->pnn));
+ return -1;
+ }
+
+ already_banned = false;
+ if (ctdb->banning_ctx != NULL) {
+ talloc_free(ctdb->banning_ctx);
+ ctdb->banning_ctx = NULL;
+ already_banned = true;
+ }
+
+ if (bantime->time == 0) {
+ DEBUG(DEBUG_ERR,("Unbanning this node\n"));
+ ctdb->nodes[bantime->pnn]->flags &= ~NODE_FLAGS_BANNED;
+ return 0;
+ }
+
+ if (ctdb->tunable.enable_bans == 0) {
+ DEBUG(DEBUG_ERR,("Bans are disabled - ignoring ban of node %u\n", bantime->pnn));
+ return 0;
+ }
+
+ ctdb->banning_ctx = talloc(ctdb, struct ctdb_ban_state);
+ if (ctdb->banning_ctx == NULL) {
+ DEBUG(DEBUG_CRIT,(__location__ " ERROR Failed to allocate new banning state\n"));
+ return -1;
+ }
+ *((struct ctdb_ban_state *)(ctdb->banning_ctx)) = *bantime;
+
+
+ DEBUG(DEBUG_ERR,("Banning this node for %d seconds\n", bantime->time));
+ ctdb->nodes[bantime->pnn]->flags |= NODE_FLAGS_BANNED;
+
+ tevent_add_timer(ctdb->ev, ctdb->banning_ctx,
+ timeval_current_ofs(bantime->time,0),
+ ctdb_ban_node_event, ctdb);
+
+ if (!already_banned) {
+ ctdb_node_become_inactive(ctdb);
+ }
+ return 0;
+}
+
+int32_t ctdb_control_get_ban_state(struct ctdb_context *ctdb, TDB_DATA *outdata)
+{
+ struct ctdb_ban_state *bantime;
+
+ bantime = talloc(outdata, struct ctdb_ban_state);
+ CTDB_NO_MEMORY(ctdb, bantime);
+
+ if (ctdb->banning_ctx != NULL) {
+ *bantime = *(struct ctdb_ban_state *)(ctdb->banning_ctx);
+ } else {
+ bantime->pnn = ctdb->pnn;
+ bantime->time = 0;
+ }
+
+ outdata->dptr = (uint8_t *)bantime;
+ outdata->dsize = sizeof(struct ctdb_ban_state);
+
+ return 0;
+}
+
+/* Routine to ban ourselves for a while when trouble strikes. */
+void ctdb_ban_self(struct ctdb_context *ctdb)
+{
+ TDB_DATA data;
+ struct ctdb_ban_state bantime;
+
+ bantime.pnn = ctdb->pnn;
+ bantime.time = ctdb->tunable.recovery_ban_period;
+
+ data.dsize = sizeof(bantime);
+ data.dptr = (uint8_t *)&bantime;
+
+ ctdb_control_set_ban_state(ctdb, data);
+}
diff --git a/ctdb/server/ctdb_call.c b/ctdb/server/ctdb_call.c
new file mode 100644
index 0000000..a51a92d
--- /dev/null
+++ b/ctdb/server/ctdb_call.c
@@ -0,0 +1,2086 @@
+/*
+ ctdb_call protocol code
+
+ Copyright (C) Andrew Tridgell 2006
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ see http://wiki.samba.org/index.php/Samba_%26_Clustering for
+ protocol design and packet details
+*/
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/dlinklist.h"
+#include "lib/util/debug.h"
+#include "lib/util/samba_util.h"
+#include "lib/util/sys_rw.h"
+#include "lib/util/util_process.h"
+
+#include "ctdb_private.h"
+#include "ctdb_client.h"
+
+#include "common/rb_tree.h"
+#include "common/reqid.h"
+#include "common/system.h"
+#include "common/common.h"
+#include "common/logging.h"
+#include "common/hash_count.h"
+
+struct ctdb_sticky_record {
+ struct ctdb_context *ctdb;
+ struct ctdb_db_context *ctdb_db;
+ TDB_CONTEXT *pindown;
+};
+
+/*
+ find the ctdb_db from a db index
+ */
+ struct ctdb_db_context *find_ctdb_db(struct ctdb_context *ctdb, uint32_t id)
+{
+ struct ctdb_db_context *ctdb_db;
+
+ for (ctdb_db=ctdb->db_list; ctdb_db; ctdb_db=ctdb_db->next) {
+ if (ctdb_db->db_id == id) {
+ break;
+ }
+ }
+ return ctdb_db;
+}
+
+/*
+ a variant of input packet that can be used in lock requeue
+*/
+static void ctdb_call_input_pkt(void *p, struct ctdb_req_header *hdr)
+{
+ struct ctdb_context *ctdb = talloc_get_type(p, struct ctdb_context);
+ ctdb_input_pkt(ctdb, hdr);
+}
+
+
+/*
+ send an error reply
+*/
+static void ctdb_send_error(struct ctdb_context *ctdb,
+ struct ctdb_req_header *hdr, uint32_t status,
+ const char *fmt, ...) PRINTF_ATTRIBUTE(4,5);
+static void ctdb_send_error(struct ctdb_context *ctdb,
+ struct ctdb_req_header *hdr, uint32_t status,
+ const char *fmt, ...)
+{
+ va_list ap;
+ struct ctdb_reply_error_old *r;
+ char *msg;
+ int msglen, len;
+
+ if (ctdb->methods == NULL) {
+ DEBUG(DEBUG_INFO,(__location__ " Failed to send error. Transport is DOWN\n"));
+ return;
+ }
+
+ va_start(ap, fmt);
+ msg = talloc_vasprintf(ctdb, fmt, ap);
+ if (msg == NULL) {
+ ctdb_fatal(ctdb, "Unable to allocate error in ctdb_send_error\n");
+ }
+ va_end(ap);
+
+ msglen = strlen(msg)+1;
+ len = offsetof(struct ctdb_reply_error_old, msg);
+ r = ctdb_transport_allocate(ctdb, msg, CTDB_REPLY_ERROR, len + msglen,
+ struct ctdb_reply_error_old);
+ CTDB_NO_MEMORY_FATAL(ctdb, r);
+
+ r->hdr.destnode = hdr->srcnode;
+ r->hdr.reqid = hdr->reqid;
+ r->status = status;
+ r->msglen = msglen;
+ memcpy(&r->msg[0], msg, msglen);
+
+ ctdb_queue_packet(ctdb, &r->hdr);
+
+ talloc_free(msg);
+}
+
+
+/**
+ * send a redirect reply
+ *
+ * The logic behind this function is this:
+ *
+ * A client wants to grab a record and sends a CTDB_REQ_CALL packet
+ * to its local ctdb (ctdb_request_call). If the node is not itself
+ * the record's DMASTER, it first redirects the packet to the
+ * record's LMASTER. The LMASTER then redirects the call packet to
+ * the current DMASTER. Note that this works because of this: When
+ * a record is migrated off a node, then the new DMASTER is stored
+ * in the record's copy on the former DMASTER.
+ */
+static void ctdb_call_send_redirect(struct ctdb_context *ctdb,
+ struct ctdb_db_context *ctdb_db,
+ TDB_DATA key,
+ struct ctdb_req_call_old *c,
+ struct ctdb_ltdb_header *header)
+{
+ uint32_t lmaster = ctdb_lmaster(ctdb, &key);
+
+ c->hdr.destnode = lmaster;
+ if (ctdb->pnn == lmaster) {
+ c->hdr.destnode = header->dmaster;
+ }
+ c->hopcount++;
+
+ if (c->hopcount%100 > 95) {
+ DEBUG(DEBUG_WARNING,("High hopcount %d dbid:%s "
+ "key:0x%08x reqid=%08x pnn:%d src:%d lmaster:%d "
+ "header->dmaster:%d dst:%d\n",
+ c->hopcount, ctdb_db->db_name, ctdb_hash(&key),
+ c->hdr.reqid, ctdb->pnn, c->hdr.srcnode, lmaster,
+ header->dmaster, c->hdr.destnode));
+ }
+
+ ctdb_queue_packet(ctdb, &c->hdr);
+}
+
+
+/*
+ send a dmaster reply
+
+ caller must have the chainlock before calling this routine. Caller must be
+ the lmaster
+*/
+static void ctdb_send_dmaster_reply(struct ctdb_db_context *ctdb_db,
+ struct ctdb_ltdb_header *header,
+ TDB_DATA key, TDB_DATA data,
+ uint32_t new_dmaster,
+ uint32_t reqid)
+{
+ struct ctdb_context *ctdb = ctdb_db->ctdb;
+ struct ctdb_reply_dmaster_old *r;
+ int ret, len;
+ TALLOC_CTX *tmp_ctx;
+
+ if (ctdb->pnn != ctdb_lmaster(ctdb, &key)) {
+ DEBUG(DEBUG_ALERT,(__location__ " Caller is not lmaster!\n"));
+ return;
+ }
+
+ header->dmaster = new_dmaster;
+ ret = ctdb_ltdb_store(ctdb_db, key, header, data);
+ if (ret != 0) {
+ ctdb_fatal(ctdb, "ctdb_send_dmaster_reply unable to update dmaster");
+ return;
+ }
+
+ if (ctdb->methods == NULL) {
+ ctdb_fatal(ctdb, "ctdb_send_dmaster_reply can't update dmaster since transport is down");
+ return;
+ }
+
+ /* put the packet on a temporary context, allowing us to safely free
+ it below even if ctdb_reply_dmaster() has freed it already */
+ tmp_ctx = talloc_new(ctdb);
+
+ /* send the CTDB_REPLY_DMASTER */
+ len = offsetof(struct ctdb_reply_dmaster_old, data) + key.dsize + data.dsize + sizeof(uint32_t);
+ r = ctdb_transport_allocate(ctdb, tmp_ctx, CTDB_REPLY_DMASTER, len,
+ struct ctdb_reply_dmaster_old);
+ CTDB_NO_MEMORY_FATAL(ctdb, r);
+
+ r->hdr.destnode = new_dmaster;
+ r->hdr.reqid = reqid;
+ r->hdr.generation = ctdb_db->generation;
+ r->rsn = header->rsn;
+ r->keylen = key.dsize;
+ r->datalen = data.dsize;
+ r->db_id = ctdb_db->db_id;
+ memcpy(&r->data[0], key.dptr, key.dsize);
+ memcpy(&r->data[key.dsize], data.dptr, data.dsize);
+ memcpy(&r->data[key.dsize+data.dsize], &header->flags, sizeof(uint32_t));
+
+ ctdb_queue_packet(ctdb, &r->hdr);
+
+ talloc_free(tmp_ctx);
+}
+
+/*
+ send a dmaster request (give another node the dmaster for a record)
+
+ This is always sent to the lmaster, which ensures that the lmaster
+ always knows who the dmaster is. The lmaster will then send a
+ CTDB_REPLY_DMASTER to the new dmaster
+*/
+static void ctdb_call_send_dmaster(struct ctdb_db_context *ctdb_db,
+ struct ctdb_req_call_old *c,
+ struct ctdb_ltdb_header *header,
+ TDB_DATA *key, TDB_DATA *data)
+{
+ struct ctdb_req_dmaster_old *r;
+ struct ctdb_context *ctdb = ctdb_db->ctdb;
+ int len;
+ uint32_t lmaster = ctdb_lmaster(ctdb, key);
+
+ if (ctdb->methods == NULL) {
+ ctdb_fatal(ctdb, "Failed ctdb_call_send_dmaster since transport is down");
+ return;
+ }
+
+ if (data->dsize != 0) {
+ header->flags |= CTDB_REC_FLAG_MIGRATED_WITH_DATA;
+ }
+
+ if (lmaster == ctdb->pnn) {
+ ctdb_send_dmaster_reply(ctdb_db, header, *key, *data,
+ c->hdr.srcnode, c->hdr.reqid);
+ return;
+ }
+
+ len = offsetof(struct ctdb_req_dmaster_old, data) + key->dsize + data->dsize
+ + sizeof(uint32_t);
+ r = ctdb_transport_allocate(ctdb, ctdb, CTDB_REQ_DMASTER, len,
+ struct ctdb_req_dmaster_old);
+ CTDB_NO_MEMORY_FATAL(ctdb, r);
+ r->hdr.destnode = lmaster;
+ r->hdr.reqid = c->hdr.reqid;
+ r->hdr.generation = ctdb_db->generation;
+ r->db_id = c->db_id;
+ r->rsn = header->rsn;
+ r->dmaster = c->hdr.srcnode;
+ r->keylen = key->dsize;
+ r->datalen = data->dsize;
+ memcpy(&r->data[0], key->dptr, key->dsize);
+ memcpy(&r->data[key->dsize], data->dptr, data->dsize);
+ memcpy(&r->data[key->dsize + data->dsize], &header->flags, sizeof(uint32_t));
+
+ header->dmaster = c->hdr.srcnode;
+ if (ctdb_ltdb_store(ctdb_db, *key, header, *data) != 0) {
+ ctdb_fatal(ctdb, "Failed to store record in ctdb_call_send_dmaster");
+ }
+
+ ctdb_queue_packet(ctdb, &r->hdr);
+
+ talloc_free(r);
+}
+
+static void ctdb_sticky_pindown_timeout(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_sticky_record *sr = talloc_get_type(private_data,
+ struct ctdb_sticky_record);
+
+ DEBUG(DEBUG_ERR,("Pindown timeout db:%s unstick record\n", sr->ctdb_db->db_name));
+ if (sr->pindown != NULL) {
+ talloc_free(sr->pindown);
+ sr->pindown = NULL;
+ }
+}
+
+static int
+ctdb_set_sticky_pindown(struct ctdb_context *ctdb, struct ctdb_db_context *ctdb_db, TDB_DATA key)
+{
+ TALLOC_CTX *tmp_ctx = talloc_new(NULL);
+ uint32_t *k;
+ struct ctdb_sticky_record *sr;
+
+ k = ctdb_key_to_idkey(tmp_ctx, key);
+ if (k == NULL) {
+ DEBUG(DEBUG_ERR,("Failed to allocate key for sticky record\n"));
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
+ sr = trbt_lookuparray32(ctdb_db->sticky_records, k[0], &k[0]);
+ if (sr == NULL) {
+ talloc_free(tmp_ctx);
+ return 0;
+ }
+
+ talloc_free(tmp_ctx);
+
+ if (sr->pindown == NULL) {
+ DEBUG(DEBUG_ERR,("Pinning down record in %s for %d ms\n", ctdb_db->db_name, ctdb->tunable.sticky_pindown));
+ sr->pindown = talloc_new(sr);
+ if (sr->pindown == NULL) {
+ DEBUG(DEBUG_ERR,("Failed to allocate pindown context for sticky record\n"));
+ return -1;
+ }
+ tevent_add_timer(ctdb->ev, sr->pindown,
+ timeval_current_ofs(ctdb->tunable.sticky_pindown / 1000,
+ (ctdb->tunable.sticky_pindown * 1000) % 1000000),
+ ctdb_sticky_pindown_timeout, sr);
+ }
+
+ return 0;
+}
+
+/*
+ called when a CTDB_REPLY_DMASTER packet comes in, or when the lmaster
+ gets a CTDB_REQUEST_DMASTER for itself. We become the dmaster.
+
+ must be called with the chainlock held. This function releases the chainlock
+*/
+static void ctdb_become_dmaster(struct ctdb_db_context *ctdb_db,
+ struct ctdb_req_header *hdr,
+ TDB_DATA key, TDB_DATA data,
+ uint64_t rsn, uint32_t record_flags)
+{
+ struct ctdb_call_state *state;
+ struct ctdb_context *ctdb = ctdb_db->ctdb;
+ struct ctdb_ltdb_header header;
+ int ret;
+
+ DEBUG(DEBUG_DEBUG,("pnn %u dmaster response %08x\n", ctdb->pnn, ctdb_hash(&key)));
+
+ ZERO_STRUCT(header);
+ header.rsn = rsn;
+ header.dmaster = ctdb->pnn;
+ header.flags = record_flags;
+
+ state = reqid_find(ctdb->idr, hdr->reqid, struct ctdb_call_state);
+
+ if (state) {
+ if (state->call->flags & CTDB_CALL_FLAG_VACUUM_MIGRATION) {
+ /*
+ * We temporarily add the VACUUM_MIGRATED flag to
+ * the record flags, so that ctdb_ltdb_store can
+ * decide whether the record should be stored or
+ * deleted.
+ */
+ header.flags |= CTDB_REC_FLAG_VACUUM_MIGRATED;
+ }
+ }
+
+ if (ctdb_ltdb_store(ctdb_db, key, &header, data) != 0) {
+ ctdb_fatal(ctdb, "ctdb_reply_dmaster store failed\n");
+
+ ret = ctdb_ltdb_unlock(ctdb_db, key);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret));
+ }
+ return;
+ }
+
+ /* we just became DMASTER and this database is "sticky",
+ see if the record is flagged as "hot" and set up a pin-down
+ context to stop migrations for a little while if so
+ */
+ if (ctdb_db_sticky(ctdb_db)) {
+ ctdb_set_sticky_pindown(ctdb, ctdb_db, key);
+ }
+
+ if (state == NULL) {
+ DEBUG(DEBUG_ERR,("pnn %u Invalid reqid %u in ctdb_become_dmaster from node %u\n",
+ ctdb->pnn, hdr->reqid, hdr->srcnode));
+
+ ret = ctdb_ltdb_unlock(ctdb_db, key);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret));
+ }
+ return;
+ }
+
+ if (key.dsize != state->call->key.dsize || memcmp(key.dptr, state->call->key.dptr, key.dsize)) {
+ DEBUG(DEBUG_ERR, ("Got bogus DMASTER packet reqid:%u from node %u. Key does not match key held in matching idr.\n", hdr->reqid, hdr->srcnode));
+
+ ret = ctdb_ltdb_unlock(ctdb_db, key);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret));
+ }
+ return;
+ }
+
+ if (hdr->reqid != state->reqid) {
+ /* we found a record but it was the wrong one */
+ DEBUG(DEBUG_ERR, ("Dropped orphan in ctdb_become_dmaster with reqid:%u\n from node %u", hdr->reqid, hdr->srcnode));
+
+ ret = ctdb_ltdb_unlock(ctdb_db, key);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret));
+ }
+ return;
+ }
+
+ (void) hash_count_increment(ctdb_db->migratedb, key);
+
+ ctdb_call_local(ctdb_db, state->call, &header, state, &data, true);
+
+ ret = ctdb_ltdb_unlock(ctdb_db, state->call->key);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret));
+ }
+
+ state->state = CTDB_CALL_DONE;
+ if (state->async.fn) {
+ state->async.fn(state);
+ }
+}
+
+struct dmaster_defer_call {
+ struct dmaster_defer_call *next, *prev;
+ struct ctdb_context *ctdb;
+ struct ctdb_req_header *hdr;
+};
+
+struct dmaster_defer_queue {
+ struct ctdb_db_context *ctdb_db;
+ uint32_t generation;
+ struct dmaster_defer_call *deferred_calls;
+};
+
+static void dmaster_defer_reprocess(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t,
+ void *private_data)
+{
+ struct dmaster_defer_call *call = talloc_get_type(
+ private_data, struct dmaster_defer_call);
+
+ ctdb_input_pkt(call->ctdb, call->hdr);
+ talloc_free(call);
+}
+
+static int dmaster_defer_queue_destructor(struct dmaster_defer_queue *ddq)
+{
+ /* Ignore requests, if database recovery happens in-between. */
+ if (ddq->generation != ddq->ctdb_db->generation) {
+ return 0;
+ }
+
+ while (ddq->deferred_calls != NULL) {
+ struct dmaster_defer_call *call = ddq->deferred_calls;
+
+ DLIST_REMOVE(ddq->deferred_calls, call);
+
+ talloc_steal(call->ctdb, call);
+ tevent_add_timer(call->ctdb->ev, call, timeval_zero(),
+ dmaster_defer_reprocess, call);
+ }
+ return 0;
+}
+
+static void *insert_ddq_callback(void *parm, void *data)
+{
+ if (data) {
+ talloc_free(data);
+ }
+ return parm;
+}
+
+/**
+ * This function is used to register a key in database that needs to be updated.
+ * Any requests for that key should get deferred till this is completed.
+ */
+static int dmaster_defer_setup(struct ctdb_db_context *ctdb_db,
+ struct ctdb_req_header *hdr,
+ TDB_DATA key)
+{
+ uint32_t *k;
+ struct dmaster_defer_queue *ddq;
+
+ k = ctdb_key_to_idkey(hdr, key);
+ if (k == NULL) {
+ DEBUG(DEBUG_ERR, ("Failed to allocate key for dmaster defer setup\n"));
+ return -1;
+ }
+
+ /* Already exists */
+ ddq = trbt_lookuparray32(ctdb_db->defer_dmaster, k[0], k);
+ if (ddq != NULL) {
+ if (ddq->generation == ctdb_db->generation) {
+ talloc_free(k);
+ return 0;
+ }
+
+ /* Recovery occurred - get rid of old queue. All the deferred
+ * requests will be resent anyway from ctdb_call_resend_db.
+ */
+ talloc_free(ddq);
+ }
+
+ ddq = talloc(hdr, struct dmaster_defer_queue);
+ if (ddq == NULL) {
+ DEBUG(DEBUG_ERR, ("Failed to allocate dmaster defer queue\n"));
+ talloc_free(k);
+ return -1;
+ }
+ ddq->ctdb_db = ctdb_db;
+ ddq->generation = hdr->generation;
+ ddq->deferred_calls = NULL;
+
+ trbt_insertarray32_callback(ctdb_db->defer_dmaster, k[0], k,
+ insert_ddq_callback, ddq);
+ talloc_set_destructor(ddq, dmaster_defer_queue_destructor);
+
+ talloc_free(k);
+ return 0;
+}
+
+static int dmaster_defer_add(struct ctdb_db_context *ctdb_db,
+ struct ctdb_req_header *hdr,
+ TDB_DATA key)
+{
+ struct dmaster_defer_queue *ddq;
+ struct dmaster_defer_call *call;
+ uint32_t *k;
+
+ k = ctdb_key_to_idkey(hdr, key);
+ if (k == NULL) {
+ DEBUG(DEBUG_ERR, ("Failed to allocate key for dmaster defer add\n"));
+ return -1;
+ }
+
+ ddq = trbt_lookuparray32(ctdb_db->defer_dmaster, k[0], k);
+ if (ddq == NULL) {
+ talloc_free(k);
+ return -1;
+ }
+
+ talloc_free(k);
+
+ if (ddq->generation != hdr->generation) {
+ talloc_set_destructor(ddq, NULL);
+ talloc_free(ddq);
+ return -1;
+ }
+
+ call = talloc(ddq, struct dmaster_defer_call);
+ if (call == NULL) {
+ DEBUG(DEBUG_ERR, ("Failed to allocate dmaster defer call\n"));
+ return -1;
+ }
+
+ call->ctdb = ctdb_db->ctdb;
+ call->hdr = talloc_steal(call, hdr);
+
+ DLIST_ADD_END(ddq->deferred_calls, call);
+
+ return 0;
+}
+
+/*
+ called when a CTDB_REQ_DMASTER packet comes in
+
+ this comes into the lmaster for a record when the current dmaster
+ wants to give up the dmaster role and give it to someone else
+*/
+void ctdb_request_dmaster(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
+{
+ struct ctdb_req_dmaster_old *c = (struct ctdb_req_dmaster_old *)hdr;
+ TDB_DATA key, data, data2;
+ struct ctdb_ltdb_header header;
+ struct ctdb_db_context *ctdb_db;
+ uint32_t record_flags = 0;
+ size_t len;
+ int ret;
+
+ key.dptr = c->data;
+ key.dsize = c->keylen;
+ data.dptr = c->data + c->keylen;
+ data.dsize = c->datalen;
+ len = offsetof(struct ctdb_req_dmaster_old, data) + key.dsize + data.dsize
+ + sizeof(uint32_t);
+ if (len <= c->hdr.length) {
+ memcpy(&record_flags, &c->data[c->keylen + c->datalen],
+ sizeof(record_flags));
+ }
+
+ ctdb_db = find_ctdb_db(ctdb, c->db_id);
+ if (!ctdb_db) {
+ ctdb_send_error(ctdb, hdr, -1,
+ "Unknown database in request. db_id==0x%08x",
+ c->db_id);
+ return;
+ }
+
+ dmaster_defer_setup(ctdb_db, hdr, key);
+
+ /* fetch the current record */
+ ret = ctdb_ltdb_lock_fetch_requeue(ctdb_db, key, &header, hdr, &data2,
+ ctdb_call_input_pkt, ctdb, false);
+ if (ret == -1) {
+ ctdb_fatal(ctdb, "ctdb_req_dmaster failed to fetch record");
+ return;
+ }
+ if (ret == -2) {
+ DEBUG(DEBUG_INFO,(__location__ " deferring ctdb_request_dmaster\n"));
+ return;
+ }
+
+ if (ctdb_lmaster(ctdb, &key) != ctdb->pnn) {
+ DEBUG(DEBUG_ERR, ("dmaster request to non-lmaster "
+ "db=%s lmaster=%u gen=%u curgen=%u\n",
+ ctdb_db->db_name, ctdb_lmaster(ctdb, &key),
+ hdr->generation, ctdb_db->generation));
+ ctdb_fatal(ctdb, "ctdb_req_dmaster to non-lmaster");
+ }
+
+ DEBUG(DEBUG_DEBUG,("pnn %u dmaster request on %08x for %u from %u\n",
+ ctdb->pnn, ctdb_hash(&key), c->dmaster, c->hdr.srcnode));
+
+ /* its a protocol error if the sending node is not the current dmaster */
+ if (header.dmaster != hdr->srcnode) {
+ DEBUG(DEBUG_ALERT,("pnn %u dmaster request for new-dmaster %u from non-master %u real-dmaster=%u key %08x dbid 0x%08x gen=%u curgen=%u c->rsn=%llu header.rsn=%llu reqid=%u keyval=0x%08x\n",
+ ctdb->pnn, c->dmaster, hdr->srcnode, header.dmaster, ctdb_hash(&key),
+ ctdb_db->db_id, hdr->generation, ctdb->vnn_map->generation,
+ (unsigned long long)c->rsn, (unsigned long long)header.rsn, c->hdr.reqid,
+ (key.dsize >= 4)?(*(uint32_t *)key.dptr):0));
+ if (header.rsn != 0 || header.dmaster != ctdb->pnn) {
+ DEBUG(DEBUG_ERR,("ctdb_req_dmaster from non-master. Force a recovery.\n"));
+
+ ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
+ ctdb_ltdb_unlock(ctdb_db, key);
+ return;
+ }
+ }
+
+ if (header.rsn > c->rsn) {
+ DEBUG(DEBUG_ALERT,("pnn %u dmaster request with older RSN new-dmaster %u from %u real-dmaster=%u key %08x dbid 0x%08x gen=%u curgen=%u c->rsn=%llu header.rsn=%llu reqid=%u\n",
+ ctdb->pnn, c->dmaster, hdr->srcnode, header.dmaster, ctdb_hash(&key),
+ ctdb_db->db_id, hdr->generation, ctdb->vnn_map->generation,
+ (unsigned long long)c->rsn, (unsigned long long)header.rsn, c->hdr.reqid));
+ }
+
+ /* use the rsn from the sending node */
+ header.rsn = c->rsn;
+
+ /* store the record flags from the sending node */
+ header.flags = record_flags;
+
+ /* check if the new dmaster is the lmaster, in which case we
+ skip the dmaster reply */
+ if (c->dmaster == ctdb->pnn) {
+ ctdb_become_dmaster(ctdb_db, hdr, key, data, c->rsn, record_flags);
+ } else {
+ ctdb_send_dmaster_reply(ctdb_db, &header, key, data, c->dmaster, hdr->reqid);
+
+ ret = ctdb_ltdb_unlock(ctdb_db, key);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret));
+ }
+ }
+}
+
+static void ctdb_sticky_record_timeout(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_sticky_record *sr = talloc_get_type(private_data,
+ struct ctdb_sticky_record);
+ talloc_free(sr);
+}
+
+static void *ctdb_make_sticky_record_callback(void *parm, void *data)
+{
+ if (data) {
+ DEBUG(DEBUG_ERR,("Already have sticky record registered. Free old %p and create new %p\n", data, parm));
+ talloc_free(data);
+ }
+ return parm;
+}
+
+static int
+ctdb_make_record_sticky(struct ctdb_context *ctdb, struct ctdb_db_context *ctdb_db, TDB_DATA key)
+{
+ TALLOC_CTX *tmp_ctx = talloc_new(NULL);
+ uint32_t *k;
+ struct ctdb_sticky_record *sr;
+
+ k = ctdb_key_to_idkey(tmp_ctx, key);
+ if (k == NULL) {
+ DEBUG(DEBUG_ERR,("Failed to allocate key for sticky record\n"));
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
+ sr = trbt_lookuparray32(ctdb_db->sticky_records, k[0], &k[0]);
+ if (sr != NULL) {
+ talloc_free(tmp_ctx);
+ return 0;
+ }
+
+ sr = talloc(ctdb_db->sticky_records, struct ctdb_sticky_record);
+ if (sr == NULL) {
+ talloc_free(tmp_ctx);
+ DEBUG(DEBUG_ERR,("Failed to allocate sticky record structure\n"));
+ return -1;
+ }
+
+ sr->ctdb = ctdb;
+ sr->ctdb_db = ctdb_db;
+ sr->pindown = NULL;
+
+ DEBUG(DEBUG_ERR,("Make record sticky for %d seconds in db %s key:0x%08x.\n",
+ ctdb->tunable.sticky_duration,
+ ctdb_db->db_name, ctdb_hash(&key)));
+
+ trbt_insertarray32_callback(ctdb_db->sticky_records, k[0], &k[0], ctdb_make_sticky_record_callback, sr);
+
+ tevent_add_timer(ctdb->ev, sr,
+ timeval_current_ofs(ctdb->tunable.sticky_duration, 0),
+ ctdb_sticky_record_timeout, sr);
+
+ talloc_free(tmp_ctx);
+ return 0;
+}
+
+struct pinned_down_requeue_handle {
+ struct ctdb_context *ctdb;
+ struct ctdb_req_header *hdr;
+};
+
+struct pinned_down_deferred_call {
+ struct ctdb_context *ctdb;
+ struct ctdb_req_header *hdr;
+};
+
+static void pinned_down_requeue(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct pinned_down_requeue_handle *handle = talloc_get_type(private_data, struct pinned_down_requeue_handle);
+ struct ctdb_context *ctdb = handle->ctdb;
+
+ talloc_steal(ctdb, handle->hdr);
+ ctdb_call_input_pkt(ctdb, handle->hdr);
+
+ talloc_free(handle);
+}
+
+static int pinned_down_destructor(struct pinned_down_deferred_call *pinned_down)
+{
+ struct ctdb_context *ctdb = pinned_down->ctdb;
+ struct pinned_down_requeue_handle *handle = talloc(ctdb, struct pinned_down_requeue_handle);
+
+ handle->ctdb = pinned_down->ctdb;
+ handle->hdr = pinned_down->hdr;
+ talloc_steal(handle, handle->hdr);
+
+ tevent_add_timer(ctdb->ev, handle, timeval_zero(),
+ pinned_down_requeue, handle);
+
+ return 0;
+}
+
+static int
+ctdb_defer_pinned_down_request(struct ctdb_context *ctdb, struct ctdb_db_context *ctdb_db, TDB_DATA key, struct ctdb_req_header *hdr)
+{
+ TALLOC_CTX *tmp_ctx = talloc_new(NULL);
+ uint32_t *k;
+ struct ctdb_sticky_record *sr;
+ struct pinned_down_deferred_call *pinned_down;
+
+ k = ctdb_key_to_idkey(tmp_ctx, key);
+ if (k == NULL) {
+ DEBUG(DEBUG_ERR,("Failed to allocate key for sticky record\n"));
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
+ sr = trbt_lookuparray32(ctdb_db->sticky_records, k[0], &k[0]);
+ if (sr == NULL) {
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
+ talloc_free(tmp_ctx);
+
+ if (sr->pindown == NULL) {
+ return -1;
+ }
+
+ pinned_down = talloc(sr->pindown, struct pinned_down_deferred_call);
+ if (pinned_down == NULL) {
+ DEBUG(DEBUG_ERR,("Failed to allocate structure for deferred pinned down request\n"));
+ return -1;
+ }
+
+ pinned_down->ctdb = ctdb;
+ pinned_down->hdr = hdr;
+
+ talloc_set_destructor(pinned_down, pinned_down_destructor);
+ talloc_steal(pinned_down, hdr);
+
+ return 0;
+}
+
+static int hot_key_cmp(const void *a, const void *b)
+{
+ const struct ctdb_db_hot_key *ka = (const struct ctdb_db_hot_key *)a;
+ const struct ctdb_db_hot_key *kb = (const struct ctdb_db_hot_key *)b;
+
+ if (ka->count < kb->count) {
+ return -1;
+ }
+ if (ka->count > kb->count) {
+ return 1;
+ }
+
+ return 0;
+}
+
+static void
+ctdb_update_db_stat_hot_keys(struct ctdb_db_context *ctdb_db, TDB_DATA key,
+ unsigned int count)
+{
+ unsigned int i, id;
+ char *keystr;
+
+ /*
+ * If all slots are being used then only need to compare
+ * against the count in the 0th slot, since it contains the
+ * smallest count.
+ */
+ if (ctdb_db->statistics.num_hot_keys == MAX_HOT_KEYS &&
+ count <= ctdb_db->hot_keys[0].count) {
+ return;
+ }
+
+ /* see if we already know this key */
+ for (i = 0; i < MAX_HOT_KEYS; i++) {
+ if (key.dsize != ctdb_db->hot_keys[i].key.dsize) {
+ continue;
+ }
+ if (memcmp(key.dptr, ctdb_db->hot_keys[i].key.dptr, key.dsize)) {
+ continue;
+ }
+ /* found an entry for this key */
+ if (count <= ctdb_db->hot_keys[i].count) {
+ return;
+ }
+ if (count >= (2 * ctdb_db->hot_keys[i].last_logged_count)) {
+ keystr = hex_encode_talloc(ctdb_db,
+ (unsigned char *)key.dptr,
+ key.dsize);
+ D_NOTICE("Updated hot key database=%s key=%s count=%d\n",
+ ctdb_db->db_name,
+ keystr ? keystr : "" ,
+ count);
+ TALLOC_FREE(keystr);
+ ctdb_db->hot_keys[i].last_logged_count = count;
+ }
+ ctdb_db->hot_keys[i].count = count;
+ goto sort_keys;
+ }
+
+ if (ctdb_db->statistics.num_hot_keys < MAX_HOT_KEYS) {
+ id = ctdb_db->statistics.num_hot_keys;
+ ctdb_db->statistics.num_hot_keys++;
+ } else {
+ id = 0;
+ }
+
+ if (ctdb_db->hot_keys[id].key.dptr != NULL) {
+ talloc_free(ctdb_db->hot_keys[id].key.dptr);
+ }
+ ctdb_db->hot_keys[id].key.dsize = key.dsize;
+ ctdb_db->hot_keys[id].key.dptr = talloc_memdup(ctdb_db,
+ key.dptr,
+ key.dsize);
+ ctdb_db->hot_keys[id].count = count;
+
+ keystr = hex_encode_talloc(ctdb_db,
+ (unsigned char *)key.dptr, key.dsize);
+ D_NOTICE("Added hot key database=%s key=%s count=%d\n",
+ ctdb_db->db_name,
+ keystr ? keystr : "" ,
+ count);
+ talloc_free(keystr);
+ ctdb_db->hot_keys[id].last_logged_count = count;
+
+sort_keys:
+ qsort(&ctdb_db->hot_keys[0],
+ ctdb_db->statistics.num_hot_keys,
+ sizeof(struct ctdb_db_hot_key),
+ hot_key_cmp);
+}
+
+/*
+ called when a CTDB_REQ_CALL packet comes in
+*/
+void ctdb_request_call(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
+{
+ struct ctdb_req_call_old *c = (struct ctdb_req_call_old *)hdr;
+ TDB_DATA data;
+ struct ctdb_reply_call_old *r;
+ int ret, len;
+ struct ctdb_ltdb_header header;
+ struct ctdb_call *call;
+ struct ctdb_db_context *ctdb_db;
+ int tmp_count, bucket;
+
+ if (ctdb->methods == NULL) {
+ DEBUG(DEBUG_INFO,(__location__ " Failed ctdb_request_call. Transport is DOWN\n"));
+ return;
+ }
+
+
+ ctdb_db = find_ctdb_db(ctdb, c->db_id);
+ if (!ctdb_db) {
+ ctdb_send_error(ctdb, hdr, -1,
+ "Unknown database in request. db_id==0x%08x",
+ c->db_id);
+ return;
+ }
+
+ call = talloc(hdr, struct ctdb_call);
+ CTDB_NO_MEMORY_FATAL(ctdb, call);
+
+ call->call_id = c->callid;
+ call->key.dptr = c->data;
+ call->key.dsize = c->keylen;
+ call->call_data.dptr = c->data + c->keylen;
+ call->call_data.dsize = c->calldatalen;
+ call->reply_data.dptr = NULL;
+ call->reply_data.dsize = 0;
+
+
+ /* If this record is pinned down we should defer the
+ request until the pindown times out
+ */
+ if (ctdb_db_sticky(ctdb_db)) {
+ if (ctdb_defer_pinned_down_request(ctdb, ctdb_db, call->key, hdr) == 0) {
+ DEBUG(DEBUG_WARNING,
+ ("Defer request for pinned down record in %s\n", ctdb_db->db_name));
+ talloc_free(call);
+ return;
+ }
+ }
+
+ if (dmaster_defer_add(ctdb_db, hdr, call->key) == 0) {
+ talloc_free(call);
+ return;
+ }
+
+ /* determine if we are the dmaster for this key. This also
+ fetches the record data (if any), thus avoiding a 2nd fetch of the data
+ if the call will be answered locally */
+
+ ret = ctdb_ltdb_lock_fetch_requeue(ctdb_db, call->key, &header, hdr, &data,
+ ctdb_call_input_pkt, ctdb, false);
+ if (ret == -1) {
+ ctdb_send_error(ctdb, hdr, ret, "ltdb fetch failed in ctdb_request_call");
+ talloc_free(call);
+ return;
+ }
+ if (ret == -2) {
+ DEBUG(DEBUG_INFO,(__location__ " deferred ctdb_request_call\n"));
+ talloc_free(call);
+ return;
+ }
+
+ /* Dont do READONLY if we don't have a tracking database */
+ if ((c->flags & CTDB_WANT_READONLY) && !ctdb_db_readonly(ctdb_db)) {
+ c->flags &= ~CTDB_WANT_READONLY;
+ }
+
+ if (header.flags & CTDB_REC_RO_REVOKE_COMPLETE) {
+ header.flags &= ~CTDB_REC_RO_FLAGS;
+ CTDB_INCREMENT_STAT(ctdb, total_ro_revokes);
+ CTDB_INCREMENT_DB_STAT(ctdb_db, db_ro_revokes);
+ if (ctdb_ltdb_store(ctdb_db, call->key, &header, data) != 0) {
+ ctdb_fatal(ctdb, "Failed to write header with cleared REVOKE flag");
+ }
+ /* and clear out the tracking data */
+ if (tdb_delete(ctdb_db->rottdb, call->key) != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to clear out trackingdb record\n"));
+ }
+ }
+
+ /* if we are revoking, we must defer all other calls until the revoke
+ * had completed.
+ */
+ if (header.flags & CTDB_REC_RO_REVOKING_READONLY) {
+ talloc_free(data.dptr);
+ ret = ctdb_ltdb_unlock(ctdb_db, call->key);
+
+ if (ctdb_add_revoke_deferred_call(ctdb, ctdb_db, call->key, hdr, ctdb_call_input_pkt, ctdb) != 0) {
+ ctdb_fatal(ctdb, "Failed to add deferred call for revoke child");
+ }
+ talloc_free(call);
+ return;
+ }
+
+ /*
+ * If we are not the dmaster and are not hosting any delegations,
+ * then we redirect the request to the node than can answer it
+ * (the lmaster or the dmaster).
+ */
+ if ((header.dmaster != ctdb->pnn)
+ && (!(header.flags & CTDB_REC_RO_HAVE_DELEGATIONS)) ) {
+ talloc_free(data.dptr);
+ ctdb_call_send_redirect(ctdb, ctdb_db, call->key, c, &header);
+
+ ret = ctdb_ltdb_unlock(ctdb_db, call->key);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret));
+ }
+ talloc_free(call);
+ return;
+ }
+
+ if ( (!(c->flags & CTDB_WANT_READONLY))
+ && (header.flags & (CTDB_REC_RO_HAVE_DELEGATIONS|CTDB_REC_RO_HAVE_READONLY)) ) {
+ header.flags |= CTDB_REC_RO_REVOKING_READONLY;
+ if (ctdb_ltdb_store(ctdb_db, call->key, &header, data) != 0) {
+ ctdb_fatal(ctdb, "Failed to store record with HAVE_DELEGATIONS set");
+ }
+ ret = ctdb_ltdb_unlock(ctdb_db, call->key);
+
+ if (ctdb_start_revoke_ro_record(ctdb, ctdb_db, call->key, &header, data) != 0) {
+ ctdb_fatal(ctdb, "Failed to start record revoke");
+ }
+ talloc_free(data.dptr);
+
+ if (ctdb_add_revoke_deferred_call(ctdb, ctdb_db, call->key, hdr, ctdb_call_input_pkt, ctdb) != 0) {
+ ctdb_fatal(ctdb, "Failed to add deferred call for revoke child");
+ }
+ talloc_free(call);
+
+ return;
+ }
+
+ /* If this is the first request for delegation. bump rsn and set
+ * the delegations flag
+ */
+ if ((c->flags & CTDB_WANT_READONLY)
+ && (c->callid == CTDB_FETCH_WITH_HEADER_FUNC)
+ && (!(header.flags & CTDB_REC_RO_HAVE_DELEGATIONS))) {
+ header.rsn += 3;
+ header.flags |= CTDB_REC_RO_HAVE_DELEGATIONS;
+ if (ctdb_ltdb_store(ctdb_db, call->key, &header, data) != 0) {
+ ctdb_fatal(ctdb, "Failed to store record with HAVE_DELEGATIONS set");
+ }
+ }
+ if ((c->flags & CTDB_WANT_READONLY)
+ && ((unsigned int)call->call_id == CTDB_FETCH_WITH_HEADER_FUNC)) {
+ TDB_DATA tdata;
+
+ tdata = tdb_fetch(ctdb_db->rottdb, call->key);
+ if (ctdb_trackingdb_add_pnn(ctdb, &tdata, c->hdr.srcnode) != 0) {
+ ctdb_fatal(ctdb, "Failed to add node to trackingdb");
+ }
+ if (tdb_store(ctdb_db->rottdb, call->key, tdata, TDB_REPLACE) != 0) {
+ ctdb_fatal(ctdb, "Failed to store trackingdb data");
+ }
+ free(tdata.dptr);
+
+ ret = ctdb_ltdb_unlock(ctdb_db, call->key);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret));
+ }
+
+ len = offsetof(struct ctdb_reply_call_old, data) + data.dsize + sizeof(struct ctdb_ltdb_header);
+ r = ctdb_transport_allocate(ctdb, ctdb, CTDB_REPLY_CALL, len,
+ struct ctdb_reply_call_old);
+ CTDB_NO_MEMORY_FATAL(ctdb, r);
+ r->hdr.destnode = c->hdr.srcnode;
+ r->hdr.reqid = c->hdr.reqid;
+ r->hdr.generation = ctdb_db->generation;
+ r->status = 0;
+ r->datalen = data.dsize + sizeof(struct ctdb_ltdb_header);
+ header.rsn -= 2;
+ header.flags |= CTDB_REC_RO_HAVE_READONLY;
+ header.flags &= ~CTDB_REC_RO_HAVE_DELEGATIONS;
+ memcpy(&r->data[0], &header, sizeof(struct ctdb_ltdb_header));
+
+ if (data.dsize) {
+ memcpy(&r->data[sizeof(struct ctdb_ltdb_header)], data.dptr, data.dsize);
+ }
+
+ ctdb_queue_packet(ctdb, &r->hdr);
+ CTDB_INCREMENT_STAT(ctdb, total_ro_delegations);
+ CTDB_INCREMENT_DB_STAT(ctdb_db, db_ro_delegations);
+
+ talloc_free(r);
+ talloc_free(call);
+ return;
+ }
+
+ CTDB_UPDATE_STAT(ctdb, max_hop_count, c->hopcount);
+ tmp_count = c->hopcount;
+ bucket = 0;
+ while (tmp_count) {
+ tmp_count >>= 1;
+ bucket++;
+ }
+ if (bucket >= MAX_COUNT_BUCKETS) {
+ bucket = MAX_COUNT_BUCKETS - 1;
+ }
+ CTDB_INCREMENT_STAT(ctdb, hop_count_bucket[bucket]);
+ CTDB_INCREMENT_DB_STAT(ctdb_db, hop_count_bucket[bucket]);
+
+ /* If this database supports sticky records, then check if the
+ hopcount is big. If it is it means the record is hot and we
+ should make it sticky.
+ */
+ if (ctdb_db_sticky(ctdb_db) &&
+ c->hopcount >= ctdb->tunable.hopcount_make_sticky) {
+ ctdb_make_record_sticky(ctdb, ctdb_db, call->key);
+ }
+
+
+ /* Try if possible to migrate the record off to the caller node.
+ * From the clients perspective a fetch of the data is just as
+ * expensive as a migration.
+ */
+ if (c->hdr.srcnode != ctdb->pnn) {
+ if (ctdb_db->persistent_state) {
+ DEBUG(DEBUG_INFO, (__location__ " refusing migration"
+ " of key %s while transaction is active\n",
+ (char *)call->key.dptr));
+ } else {
+ DEBUG(DEBUG_DEBUG,("pnn %u starting migration of %08x to %u\n",
+ ctdb->pnn, ctdb_hash(&(call->key)), c->hdr.srcnode));
+ ctdb_call_send_dmaster(ctdb_db, c, &header, &(call->key), &data);
+ talloc_free(data.dptr);
+
+ ret = ctdb_ltdb_unlock(ctdb_db, call->key);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret));
+ }
+ }
+ talloc_free(call);
+ return;
+ }
+
+ ret = ctdb_call_local(ctdb_db, call, &header, hdr, &data, true);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_call_local failed\n"));
+ call->status = -1;
+ }
+
+ ret = ctdb_ltdb_unlock(ctdb_db, call->key);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret));
+ }
+
+ len = offsetof(struct ctdb_reply_call_old, data) + call->reply_data.dsize;
+ r = ctdb_transport_allocate(ctdb, ctdb, CTDB_REPLY_CALL, len,
+ struct ctdb_reply_call_old);
+ CTDB_NO_MEMORY_FATAL(ctdb, r);
+ r->hdr.destnode = hdr->srcnode;
+ r->hdr.reqid = hdr->reqid;
+ r->hdr.generation = ctdb_db->generation;
+ r->status = call->status;
+ r->datalen = call->reply_data.dsize;
+ if (call->reply_data.dsize) {
+ memcpy(&r->data[0], call->reply_data.dptr, call->reply_data.dsize);
+ }
+
+ ctdb_queue_packet(ctdb, &r->hdr);
+
+ talloc_free(r);
+ talloc_free(call);
+}
+
+/**
+ * called when a CTDB_REPLY_CALL packet comes in
+ *
+ * This packet comes in response to a CTDB_REQ_CALL request packet. It
+ * contains any reply data from the call
+ */
+void ctdb_reply_call(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
+{
+ struct ctdb_reply_call_old *c = (struct ctdb_reply_call_old *)hdr;
+ struct ctdb_call_state *state;
+
+ state = reqid_find(ctdb->idr, hdr->reqid, struct ctdb_call_state);
+ if (state == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " reqid %u not found\n", hdr->reqid));
+ return;
+ }
+
+ if (hdr->reqid != state->reqid) {
+ /* we found a record but it was the wrong one */
+ DEBUG(DEBUG_ERR, ("Dropped orphaned call reply with reqid:%u\n",hdr->reqid));
+ return;
+ }
+
+
+ /* read only delegation processing */
+ /* If we got a FETCH_WITH_HEADER we should check if this is a ro
+ * delegation since we may need to update the record header
+ */
+ if (state->c->callid == CTDB_FETCH_WITH_HEADER_FUNC) {
+ struct ctdb_db_context *ctdb_db = state->ctdb_db;
+ struct ctdb_ltdb_header *header = (struct ctdb_ltdb_header *)&c->data[0];
+ struct ctdb_ltdb_header oldheader;
+ TDB_DATA key, data, olddata;
+ int ret;
+
+ if (!(header->flags & CTDB_REC_RO_HAVE_READONLY)) {
+ goto finished_ro;
+ return;
+ }
+
+ key.dsize = state->c->keylen;
+ key.dptr = state->c->data;
+ ret = ctdb_ltdb_lock_requeue(ctdb_db, key, hdr,
+ ctdb_call_input_pkt, ctdb, false);
+ if (ret == -2) {
+ return;
+ }
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to get lock in ctdb_reply_call\n"));
+ return;
+ }
+
+ ret = ctdb_ltdb_fetch(ctdb_db, key, &oldheader, state, &olddata);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("Failed to fetch old record in ctdb_reply_call\n"));
+ ctdb_ltdb_unlock(ctdb_db, key);
+ goto finished_ro;
+ }
+
+ if (header->rsn <= oldheader.rsn) {
+ ctdb_ltdb_unlock(ctdb_db, key);
+ goto finished_ro;
+ }
+
+ if (c->datalen < sizeof(struct ctdb_ltdb_header)) {
+ DEBUG(DEBUG_ERR,(__location__ " Got FETCH_WITH_HEADER reply with too little data: %d bytes\n", c->datalen));
+ ctdb_ltdb_unlock(ctdb_db, key);
+ goto finished_ro;
+ }
+
+ data.dsize = c->datalen - sizeof(struct ctdb_ltdb_header);
+ data.dptr = &c->data[sizeof(struct ctdb_ltdb_header)];
+ ret = ctdb_ltdb_store(ctdb_db, key, header, data);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("Failed to store new record in ctdb_reply_call\n"));
+ ctdb_ltdb_unlock(ctdb_db, key);
+ goto finished_ro;
+ }
+
+ ctdb_ltdb_unlock(ctdb_db, key);
+ }
+finished_ro:
+
+ state->call->reply_data.dptr = c->data;
+ state->call->reply_data.dsize = c->datalen;
+ state->call->status = c->status;
+
+ talloc_steal(state, c);
+
+ state->state = CTDB_CALL_DONE;
+ if (state->async.fn) {
+ state->async.fn(state);
+ }
+}
+
+
+/**
+ * called when a CTDB_REPLY_DMASTER packet comes in
+ *
+ * This packet comes in from the lmaster in response to a CTDB_REQ_CALL
+ * request packet. It means that the current dmaster wants to give us
+ * the dmaster role.
+ */
+void ctdb_reply_dmaster(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
+{
+ struct ctdb_reply_dmaster_old *c = (struct ctdb_reply_dmaster_old *)hdr;
+ struct ctdb_db_context *ctdb_db;
+ TDB_DATA key, data;
+ uint32_t record_flags = 0;
+ size_t len;
+ int ret;
+
+ ctdb_db = find_ctdb_db(ctdb, c->db_id);
+ if (ctdb_db == NULL) {
+ DEBUG(DEBUG_ERR,("Unknown db_id 0x%x in ctdb_reply_dmaster\n", c->db_id));
+ return;
+ }
+
+ key.dptr = c->data;
+ key.dsize = c->keylen;
+ data.dptr = &c->data[key.dsize];
+ data.dsize = c->datalen;
+ len = offsetof(struct ctdb_reply_dmaster_old, data) + key.dsize + data.dsize
+ + sizeof(uint32_t);
+ if (len <= c->hdr.length) {
+ memcpy(&record_flags, &c->data[c->keylen + c->datalen],
+ sizeof(record_flags));
+ }
+
+ dmaster_defer_setup(ctdb_db, hdr, key);
+
+ ret = ctdb_ltdb_lock_requeue(ctdb_db, key, hdr,
+ ctdb_call_input_pkt, ctdb, false);
+ if (ret == -2) {
+ return;
+ }
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to get lock in ctdb_reply_dmaster\n"));
+ return;
+ }
+
+ ctdb_become_dmaster(ctdb_db, hdr, key, data, c->rsn, record_flags);
+}
+
+
+/*
+ called when a CTDB_REPLY_ERROR packet comes in
+*/
+void ctdb_reply_error(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
+{
+ struct ctdb_reply_error_old *c = (struct ctdb_reply_error_old *)hdr;
+ struct ctdb_call_state *state;
+
+ state = reqid_find(ctdb->idr, hdr->reqid, struct ctdb_call_state);
+ if (state == NULL) {
+ DEBUG(DEBUG_ERR,("pnn %u Invalid reqid %u in ctdb_reply_error\n",
+ ctdb->pnn, hdr->reqid));
+ return;
+ }
+
+ if (hdr->reqid != state->reqid) {
+ /* we found a record but it was the wrong one */
+ DEBUG(DEBUG_ERR, ("Dropped orphaned error reply with reqid:%u\n",hdr->reqid));
+ return;
+ }
+
+ talloc_steal(state, c);
+
+ state->state = CTDB_CALL_ERROR;
+ state->errmsg = (char *)c->msg;
+ if (state->async.fn) {
+ state->async.fn(state);
+ }
+}
+
+
+/*
+ destroy a ctdb_call
+*/
+static int ctdb_call_destructor(struct ctdb_call_state *state)
+{
+ DLIST_REMOVE(state->ctdb_db->pending_calls, state);
+ reqid_remove(state->ctdb_db->ctdb->idr, state->reqid);
+ return 0;
+}
+
+
+/*
+ called when a ctdb_call needs to be resent after a reconfigure event
+*/
+static void ctdb_call_resend(struct ctdb_call_state *state)
+{
+ struct ctdb_context *ctdb = state->ctdb_db->ctdb;
+
+ state->generation = state->ctdb_db->generation;
+
+ /* use a new reqid, in case the old reply does eventually come in */
+ reqid_remove(ctdb->idr, state->reqid);
+ state->reqid = reqid_new(ctdb->idr, state);
+ state->c->hdr.reqid = state->reqid;
+
+ /* update the generation count for this request, so its valid with the new vnn_map */
+ state->c->hdr.generation = state->generation;
+
+ /* send the packet to ourselves, it will be redirected appropriately */
+ state->c->hdr.destnode = ctdb->pnn;
+
+ ctdb_queue_packet(ctdb, &state->c->hdr);
+ D_INFO("resent ctdb_call for db %s reqid %u generation %u\n",
+ state->ctdb_db->db_name,
+ state->reqid,
+ state->generation);
+}
+
+/*
+ resend all pending calls on recovery
+ */
+void ctdb_call_resend_db(struct ctdb_db_context *ctdb_db)
+{
+ struct ctdb_call_state *state, *next;
+ unsigned int count = 0;
+
+ for (state = ctdb_db->pending_calls; state; state = next) {
+ next = state->next;
+ ctdb_call_resend(state);
+ count++;
+ }
+ /* Avoid logging a 0 count below */
+ if (count == 0) {
+ return;
+ }
+ D_NOTICE("Resent calls for database=%s, generation=%u, count=%u\n",
+ ctdb_db->db_name,
+ ctdb_db->generation,
+ count);
+}
+
+void ctdb_call_resend_all(struct ctdb_context *ctdb)
+{
+ struct ctdb_db_context *ctdb_db;
+
+ for (ctdb_db = ctdb->db_list; ctdb_db; ctdb_db = ctdb_db->next) {
+ ctdb_call_resend_db(ctdb_db);
+ }
+}
+
+/*
+ this allows the caller to setup a async.fn
+*/
+static void call_local_trigger(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_call_state *state = talloc_get_type(private_data, struct ctdb_call_state);
+ if (state->async.fn) {
+ state->async.fn(state);
+ }
+}
+
+
+/*
+ construct an event driven local ctdb_call
+
+ this is used so that locally processed ctdb_call requests are processed
+ in an event driven manner
+*/
+struct ctdb_call_state *ctdb_call_local_send(struct ctdb_db_context *ctdb_db,
+ struct ctdb_call *call,
+ struct ctdb_ltdb_header *header,
+ TDB_DATA *data)
+{
+ struct ctdb_call_state *state;
+ struct ctdb_context *ctdb = ctdb_db->ctdb;
+ int ret;
+
+ state = talloc_zero(ctdb_db, struct ctdb_call_state);
+ CTDB_NO_MEMORY_NULL(ctdb, state);
+
+ talloc_steal(state, data->dptr);
+
+ state->state = CTDB_CALL_DONE;
+ state->call = talloc(state, struct ctdb_call);
+ CTDB_NO_MEMORY_NULL(ctdb, state->call);
+ *(state->call) = *call;
+ state->ctdb_db = ctdb_db;
+
+ ret = ctdb_call_local(ctdb_db, state->call, header, state, data, true);
+ if (ret != 0) {
+ DEBUG(DEBUG_DEBUG,("ctdb_call_local() failed, ignoring return code %d\n", ret));
+ }
+
+ tevent_add_timer(ctdb->ev, state, timeval_zero(),
+ call_local_trigger, state);
+
+ return state;
+}
+
+
+/*
+ make a remote ctdb call - async send. Called in daemon context.
+
+ This constructs a ctdb_call request and queues it for processing.
+ This call never blocks.
+*/
+struct ctdb_call_state *ctdb_daemon_call_send_remote(struct ctdb_db_context *ctdb_db,
+ struct ctdb_call *call,
+ struct ctdb_ltdb_header *header)
+{
+ uint32_t len;
+ struct ctdb_call_state *state;
+ struct ctdb_context *ctdb = ctdb_db->ctdb;
+ struct ctdb_req_call_old *c;
+
+ if (ctdb->methods == NULL) {
+ DEBUG(DEBUG_INFO,(__location__ " Failed send packet. Transport is down\n"));
+ return NULL;
+ }
+
+ state = talloc_zero(ctdb_db, struct ctdb_call_state);
+ CTDB_NO_MEMORY_NULL(ctdb, state);
+ state->call = talloc(state, struct ctdb_call);
+ CTDB_NO_MEMORY_NULL(ctdb, state->call);
+
+ state->reqid = reqid_new(ctdb->idr, state);
+ state->ctdb_db = ctdb_db;
+ state->state = CTDB_CALL_WAIT;
+ state->generation = ctdb_db->generation;
+
+ len = offsetof(struct ctdb_req_call_old, data) + call->key.dsize +
+ call->call_data.dsize;
+
+ c = ctdb_transport_allocate(ctdb,
+ state,
+ CTDB_REQ_CALL,
+ len,
+ struct ctdb_req_call_old);
+
+ CTDB_NO_MEMORY_NULL(ctdb, c);
+ state->c = c;
+
+ c->hdr.destnode = header->dmaster;
+ c->hdr.reqid = state->reqid;
+ c->hdr.generation = ctdb_db->generation;
+ c->flags = call->flags;
+ c->db_id = ctdb_db->db_id;
+ c->callid = call->call_id;
+ c->hopcount = 0;
+ c->keylen = call->key.dsize;
+ c->calldatalen = call->call_data.dsize;
+
+ memcpy(&c->data[0], call->key.dptr, call->key.dsize);
+ memcpy(&c->data[call->key.dsize],
+ call->call_data.dptr,
+ call->call_data.dsize);
+
+ *(state->call) = *call;
+ state->call->call_data.dptr = &c->data[call->key.dsize];
+ state->call->key.dptr = &c->data[0];
+
+ DLIST_ADD(ctdb_db->pending_calls, state);
+
+ talloc_set_destructor(state, ctdb_call_destructor);
+ ctdb_queue_packet(ctdb, &state->c->hdr);
+
+ return state;
+}
+
+/*
+ make a remote ctdb call - async recv - called in daemon context
+
+ This is called when the program wants to wait for a ctdb_call to complete and get the
+ results. This call will block unless the call has already completed.
+*/
+int ctdb_daemon_call_recv(struct ctdb_call_state *state, struct ctdb_call *call)
+{
+ while (state->state < CTDB_CALL_DONE) {
+ tevent_loop_once(state->ctdb_db->ctdb->ev);
+ }
+ if (state->state != CTDB_CALL_DONE) {
+ ctdb_set_error(state->ctdb_db->ctdb, "%s", state->errmsg);
+ talloc_free(state);
+ return -1;
+ }
+
+ if (state->call->reply_data.dsize) {
+ call->reply_data.dptr = talloc_memdup(call,
+ state->call->reply_data.dptr,
+ state->call->reply_data.dsize);
+ call->reply_data.dsize = state->call->reply_data.dsize;
+ } else {
+ call->reply_data.dptr = NULL;
+ call->reply_data.dsize = 0;
+ }
+ call->status = state->call->status;
+ talloc_free(state);
+ return 0;
+}
+
+
+struct revokechild_deferred_call {
+ struct revokechild_deferred_call *prev, *next;
+ struct ctdb_context *ctdb;
+ struct ctdb_req_header *hdr;
+ deferred_requeue_fn fn;
+ void *ctx;
+ struct revokechild_handle *rev_hdl;
+};
+
+struct revokechild_handle {
+ struct revokechild_handle *next, *prev;
+ struct ctdb_context *ctdb;
+ struct ctdb_db_context *ctdb_db;
+ struct tevent_fd *fde;
+ int status;
+ int fd[2];
+ pid_t child;
+ TDB_DATA key;
+ struct revokechild_deferred_call *deferred_call_list;
+};
+
+static void deferred_call_requeue(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct revokechild_deferred_call *dlist = talloc_get_type_abort(
+ private_data, struct revokechild_deferred_call);
+
+ while (dlist != NULL) {
+ struct revokechild_deferred_call *dcall = dlist;
+
+ talloc_set_destructor(dcall, NULL);
+ DLIST_REMOVE(dlist, dcall);
+ dcall->fn(dcall->ctx, dcall->hdr);
+ talloc_free(dcall);
+ }
+}
+
+static int deferred_call_destructor(struct revokechild_deferred_call *dcall)
+{
+ struct revokechild_handle *rev_hdl = dcall->rev_hdl;
+
+ DLIST_REMOVE(rev_hdl->deferred_call_list, dcall);
+ return 0;
+}
+
+static int revokechild_destructor(struct revokechild_handle *rev_hdl)
+{
+ struct revokechild_deferred_call *now_list = NULL;
+ struct revokechild_deferred_call *delay_list = NULL;
+
+ if (rev_hdl->fde != NULL) {
+ talloc_free(rev_hdl->fde);
+ }
+
+ if (rev_hdl->fd[0] != -1) {
+ close(rev_hdl->fd[0]);
+ }
+ if (rev_hdl->fd[1] != -1) {
+ close(rev_hdl->fd[1]);
+ }
+ ctdb_kill(rev_hdl->ctdb, rev_hdl->child, SIGKILL);
+
+ DLIST_REMOVE(rev_hdl->ctdb_db->revokechild_active, rev_hdl);
+
+ while (rev_hdl->deferred_call_list != NULL) {
+ struct revokechild_deferred_call *dcall;
+
+ dcall = rev_hdl->deferred_call_list;
+ DLIST_REMOVE(rev_hdl->deferred_call_list, dcall);
+
+ /* If revoke is successful, then first process all the calls
+ * that need write access, and delay readonly requests by 1
+ * second grace.
+ *
+ * If revoke is unsuccessful, most likely because of node
+ * failure, delay all the pending requests, so database can
+ * be recovered.
+ */
+
+ if (rev_hdl->status == 0) {
+ struct ctdb_req_call_old *c;
+
+ c = (struct ctdb_req_call_old *)dcall->hdr;
+ if (c->flags & CTDB_WANT_READONLY) {
+ DLIST_ADD(delay_list, dcall);
+ } else {
+ DLIST_ADD(now_list, dcall);
+ }
+ } else {
+ DLIST_ADD(delay_list, dcall);
+ }
+ }
+
+ if (now_list != NULL) {
+ tevent_add_timer(rev_hdl->ctdb->ev,
+ rev_hdl->ctdb_db,
+ tevent_timeval_current_ofs(0, 0),
+ deferred_call_requeue,
+ now_list);
+ }
+
+ if (delay_list != NULL) {
+ tevent_add_timer(rev_hdl->ctdb->ev,
+ rev_hdl->ctdb_db,
+ tevent_timeval_current_ofs(1, 0),
+ deferred_call_requeue,
+ delay_list);
+ }
+
+ return 0;
+}
+
+static void revokechild_handler(struct tevent_context *ev,
+ struct tevent_fd *fde,
+ uint16_t flags, void *private_data)
+{
+ struct revokechild_handle *rev_hdl =
+ talloc_get_type(private_data, struct revokechild_handle);
+ int ret;
+ char c;
+
+ ret = sys_read(rev_hdl->fd[0], &c, 1);
+ if (ret != 1) {
+ DEBUG(DEBUG_ERR,("Failed to read status from revokechild. errno:%d\n", errno));
+ rev_hdl->status = -1;
+ talloc_free(rev_hdl);
+ return;
+ }
+ if (c != 0) {
+ DEBUG(DEBUG_ERR,("revokechild returned failure. status:%d\n", c));
+ rev_hdl->status = -1;
+ talloc_free(rev_hdl);
+ return;
+ }
+
+ talloc_free(rev_hdl);
+}
+
+struct ctdb_revoke_state {
+ struct ctdb_db_context *ctdb_db;
+ TDB_DATA key;
+ struct ctdb_ltdb_header *header;
+ TDB_DATA data;
+ int count;
+ int status;
+ int finished;
+};
+
+static void update_record_cb(struct ctdb_client_control_state *state)
+{
+ struct ctdb_revoke_state *revoke_state;
+ int ret;
+ int32_t res;
+
+ if (state == NULL) {
+ return;
+ }
+ revoke_state = state->async.private_data;
+
+ state->async.fn = NULL;
+ ret = ctdb_control_recv(state->ctdb, state, state, NULL, &res, NULL);
+ if ((ret != 0) || (res != 0)) {
+ DEBUG(DEBUG_ERR,("Recv for revoke update record failed ret:%d res:%d\n", ret, res));
+ revoke_state->status = -1;
+ }
+
+ revoke_state->count--;
+ if (revoke_state->count <= 0) {
+ revoke_state->finished = 1;
+ }
+}
+
+static void revoke_send_cb(struct ctdb_context *ctdb, uint32_t pnn, void *private_data)
+{
+ struct ctdb_revoke_state *revoke_state = private_data;
+ struct ctdb_client_control_state *state;
+
+ state = ctdb_ctrl_updaterecord_send(ctdb, revoke_state, timeval_current_ofs(ctdb->tunable.control_timeout,0), pnn, revoke_state->ctdb_db, revoke_state->key, revoke_state->header, revoke_state->data);
+ if (state == NULL) {
+ DEBUG(DEBUG_ERR,("Failure to send update record to revoke readonly delegation\n"));
+ revoke_state->status = -1;
+ return;
+ }
+ state->async.fn = update_record_cb;
+ state->async.private_data = revoke_state;
+
+ revoke_state->count++;
+
+}
+
+static void ctdb_revoke_timeout_handler(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval yt, void *private_data)
+{
+ struct ctdb_revoke_state *state = private_data;
+
+ DEBUG(DEBUG_ERR,("Timed out waiting for revoke to finish\n"));
+ state->finished = 1;
+ state->status = -1;
+}
+
+static int ctdb_revoke_all_delegations(struct ctdb_context *ctdb, struct ctdb_db_context *ctdb_db, TDB_DATA tdata, TDB_DATA key, struct ctdb_ltdb_header *header, TDB_DATA data)
+{
+ struct ctdb_revoke_state *state = talloc_zero(ctdb, struct ctdb_revoke_state);
+ struct ctdb_ltdb_header new_header;
+ TDB_DATA new_data;
+
+ state->ctdb_db = ctdb_db;
+ state->key = key;
+ state->header = header;
+ state->data = data;
+
+ ctdb_trackingdb_traverse(ctdb, tdata, revoke_send_cb, state);
+
+ tevent_add_timer(ctdb->ev, state,
+ timeval_current_ofs(ctdb->tunable.control_timeout, 0),
+ ctdb_revoke_timeout_handler, state);
+
+ while (state->finished == 0) {
+ tevent_loop_once(ctdb->ev);
+ }
+
+ if (ctdb_ltdb_lock(ctdb_db, key) != 0) {
+ DEBUG(DEBUG_ERR,("Failed to chainlock the database in revokechild\n"));
+ talloc_free(state);
+ return -1;
+ }
+ if (ctdb_ltdb_fetch(ctdb_db, key, &new_header, state, &new_data) != 0) {
+ ctdb_ltdb_unlock(ctdb_db, key);
+ DEBUG(DEBUG_ERR,("Failed for fetch tdb record in revokechild\n"));
+ talloc_free(state);
+ return -1;
+ }
+ header->rsn++;
+ if (new_header.rsn > header->rsn) {
+ ctdb_ltdb_unlock(ctdb_db, key);
+ DEBUG(DEBUG_ERR,("RSN too high in tdb record in revokechild\n"));
+ talloc_free(state);
+ return -1;
+ }
+ if ( (new_header.flags & (CTDB_REC_RO_REVOKING_READONLY|CTDB_REC_RO_HAVE_DELEGATIONS)) != (CTDB_REC_RO_REVOKING_READONLY|CTDB_REC_RO_HAVE_DELEGATIONS) ) {
+ ctdb_ltdb_unlock(ctdb_db, key);
+ DEBUG(DEBUG_ERR,("Flags are wrong in tdb record in revokechild\n"));
+ talloc_free(state);
+ return -1;
+ }
+
+ /*
+ * If revoke on all nodes succeed, revoke is complete. Otherwise,
+ * remove CTDB_REC_RO_REVOKING_READONLY flag and retry.
+ */
+ if (state->status == 0) {
+ new_header.rsn++;
+ new_header.flags |= CTDB_REC_RO_REVOKE_COMPLETE;
+ } else {
+ DEBUG(DEBUG_NOTICE, ("Revoke all delegations failed, retrying.\n"));
+ new_header.flags &= ~CTDB_REC_RO_REVOKING_READONLY;
+ }
+ if (ctdb_ltdb_store(ctdb_db, key, &new_header, new_data) != 0) {
+ ctdb_ltdb_unlock(ctdb_db, key);
+ DEBUG(DEBUG_ERR,("Failed to write new record in revokechild\n"));
+ talloc_free(state);
+ return -1;
+ }
+ ctdb_ltdb_unlock(ctdb_db, key);
+
+ talloc_free(state);
+ return 0;
+}
+
+
+int ctdb_start_revoke_ro_record(struct ctdb_context *ctdb,
+ struct ctdb_db_context *ctdb_db,
+ TDB_DATA key,
+ struct ctdb_ltdb_header *header,
+ TDB_DATA data)
+{
+ TDB_DATA tdata;
+ struct revokechild_handle *rev_hdl;
+ pid_t parent = getpid();
+ int ret;
+
+ header->flags &= ~(CTDB_REC_RO_REVOKING_READONLY |
+ CTDB_REC_RO_HAVE_DELEGATIONS |
+ CTDB_REC_RO_HAVE_READONLY);
+
+ header->flags |= CTDB_REC_FLAG_MIGRATED_WITH_DATA;
+ header->rsn -= 1;
+
+ rev_hdl = talloc_zero(ctdb_db, struct revokechild_handle);
+ if (rev_hdl == NULL) {
+ D_ERR("Failed to allocate revokechild_handle\n");
+ return -1;
+ }
+
+ tdata = tdb_fetch(ctdb_db->rottdb, key);
+ if (tdata.dsize > 0) {
+ uint8_t *tmp;
+
+ tmp = tdata.dptr;
+ tdata.dptr = talloc_memdup(rev_hdl, tdata.dptr, tdata.dsize);
+ free(tmp);
+ }
+
+ rev_hdl->status = 0;
+ rev_hdl->ctdb = ctdb;
+ rev_hdl->ctdb_db = ctdb_db;
+ rev_hdl->fd[0] = -1;
+ rev_hdl->fd[1] = -1;
+
+ rev_hdl->key.dsize = key.dsize;
+ rev_hdl->key.dptr = talloc_memdup(rev_hdl, key.dptr, key.dsize);
+ if (rev_hdl->key.dptr == NULL) {
+ D_ERR("Failed to allocate key for revokechild_handle\n");
+ goto err_out;
+ }
+
+ ret = pipe(rev_hdl->fd);
+ if (ret != 0) {
+ D_ERR("Failed to allocate key for revokechild_handle\n");
+ goto err_out;
+ }
+
+
+ rev_hdl->child = ctdb_fork(ctdb);
+ if (rev_hdl->child == (pid_t)-1) {
+ D_ERR("Failed to fork child for revokechild\n");
+ goto err_out;
+ }
+
+ if (rev_hdl->child == 0) {
+ char c = 0;
+ close(rev_hdl->fd[0]);
+
+ prctl_set_comment("ctdb_revokechild");
+ if (switch_from_server_to_client(ctdb) != 0) {
+ D_ERR("Failed to switch from server to client "
+ "for revokechild process\n");
+ c = 1;
+ goto child_finished;
+ }
+
+ c = ctdb_revoke_all_delegations(ctdb,
+ ctdb_db,
+ tdata,
+ key,
+ header,
+ data);
+
+child_finished:
+ sys_write(rev_hdl->fd[1], &c, 1);
+ ctdb_wait_for_process_to_exit(parent);
+ _exit(0);
+ }
+
+ close(rev_hdl->fd[1]);
+ rev_hdl->fd[1] = -1;
+ set_close_on_exec(rev_hdl->fd[0]);
+
+ rev_hdl->fde = tevent_add_fd(ctdb->ev,
+ rev_hdl,
+ rev_hdl->fd[0],
+ TEVENT_FD_READ,
+ revokechild_handler,
+ (void *)rev_hdl);
+
+ if (rev_hdl->fde == NULL) {
+ D_ERR("Failed to set up fd event for revokechild process\n");
+ talloc_free(rev_hdl);
+ }
+ tevent_fd_set_auto_close(rev_hdl->fde);
+
+ /* This is an active revokechild child process */
+ DLIST_ADD_END(ctdb_db->revokechild_active, rev_hdl);
+ talloc_set_destructor(rev_hdl, revokechild_destructor);
+
+ return 0;
+err_out:
+ talloc_free(rev_hdl);
+ return -1;
+}
+
+int ctdb_add_revoke_deferred_call(struct ctdb_context *ctdb, struct ctdb_db_context *ctdb_db, TDB_DATA key, struct ctdb_req_header *hdr, deferred_requeue_fn fn, void *call_context)
+{
+ struct revokechild_handle *rev_hdl;
+ struct revokechild_deferred_call *deferred_call;
+
+ for (rev_hdl = ctdb_db->revokechild_active;
+ rev_hdl;
+ rev_hdl = rev_hdl->next) {
+ if (rev_hdl->key.dsize == 0) {
+ continue;
+ }
+ if (rev_hdl->key.dsize != key.dsize) {
+ continue;
+ }
+ if (!memcmp(rev_hdl->key.dptr, key.dptr, key.dsize)) {
+ break;
+ }
+ }
+
+ if (rev_hdl == NULL) {
+ DEBUG(DEBUG_ERR,("Failed to add deferred call to revoke list. revoke structure not found\n"));
+ return -1;
+ }
+
+ deferred_call = talloc(call_context, struct revokechild_deferred_call);
+ if (deferred_call == NULL) {
+ DEBUG(DEBUG_ERR,("Failed to allocate deferred call structure for revoking record\n"));
+ return -1;
+ }
+
+ deferred_call->ctdb = ctdb;
+ deferred_call->hdr = talloc_steal(deferred_call, hdr);
+ deferred_call->fn = fn;
+ deferred_call->ctx = call_context;
+ deferred_call->rev_hdl = rev_hdl;
+
+ talloc_set_destructor(deferred_call, deferred_call_destructor);
+
+ DLIST_ADD(rev_hdl->deferred_call_list, deferred_call);
+
+ return 0;
+}
+
+static void ctdb_migration_count_handler(TDB_DATA key, uint64_t counter,
+ void *private_data)
+{
+ struct ctdb_db_context *ctdb_db = talloc_get_type_abort(
+ private_data, struct ctdb_db_context);
+ unsigned int value;
+
+ value = (counter < INT_MAX ? counter : INT_MAX);
+ ctdb_update_db_stat_hot_keys(ctdb_db, key, value);
+}
+
+static void ctdb_migration_cleandb_event(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval current_time,
+ void *private_data)
+{
+ struct ctdb_db_context *ctdb_db = talloc_get_type_abort(
+ private_data, struct ctdb_db_context);
+
+ if (ctdb_db->migratedb == NULL) {
+ return;
+ }
+
+ hash_count_expire(ctdb_db->migratedb, NULL);
+
+ te = tevent_add_timer(ctdb_db->ctdb->ev, ctdb_db->migratedb,
+ tevent_timeval_current_ofs(10, 0),
+ ctdb_migration_cleandb_event, ctdb_db);
+ if (te == NULL) {
+ DEBUG(DEBUG_ERR,
+ ("Memory error in migration cleandb event for %s\n",
+ ctdb_db->db_name));
+ TALLOC_FREE(ctdb_db->migratedb);
+ }
+}
+
+int ctdb_migration_init(struct ctdb_db_context *ctdb_db)
+{
+ struct timeval one_second = { 1, 0 };
+ struct tevent_timer *te;
+ int ret;
+
+ if (! ctdb_db_volatile(ctdb_db)) {
+ return 0;
+ }
+
+ ret = hash_count_init(ctdb_db, one_second,
+ ctdb_migration_count_handler, ctdb_db,
+ &ctdb_db->migratedb);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Memory error in migration init for %s\n",
+ ctdb_db->db_name));
+ return -1;
+ }
+
+ te = tevent_add_timer(ctdb_db->ctdb->ev, ctdb_db->migratedb,
+ tevent_timeval_current_ofs(10, 0),
+ ctdb_migration_cleandb_event, ctdb_db);
+ if (te == NULL) {
+ DEBUG(DEBUG_ERR,
+ ("Memory error in migration init for %s\n",
+ ctdb_db->db_name));
+ TALLOC_FREE(ctdb_db->migratedb);
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/ctdb/server/ctdb_client.c b/ctdb/server/ctdb_client.c
new file mode 100644
index 0000000..c9edb1d
--- /dev/null
+++ b/ctdb/server/ctdb_client.c
@@ -0,0 +1,1709 @@
+/*
+ ctdb daemon code
+
+ Copyright (C) Andrew Tridgell 2007
+ Copyright (C) Ronnie Sahlberg 2007
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+#include "system/locale.h"
+
+#include <talloc.h>
+#include <tevent.h>
+#include <tdb.h>
+
+#include "lib/tdb_wrap/tdb_wrap.h"
+#include "lib/util/dlinklist.h"
+#include "lib/util/time.h"
+#include "lib/util/debug.h"
+#include "lib/util/samba_util.h"
+
+#include "ctdb_private.h"
+#include "ctdb_client.h"
+
+#include "common/reqid.h"
+#include "common/system.h"
+#include "common/common.h"
+#include "common/logging.h"
+
+/*
+ allocate a packet for use in client<->daemon communication
+ */
+struct ctdb_req_header *_ctdbd_allocate_pkt(struct ctdb_context *ctdb,
+ TALLOC_CTX *mem_ctx,
+ enum ctdb_operation operation,
+ size_t length, size_t slength,
+ const char *type)
+{
+ int size;
+ struct ctdb_req_header *hdr;
+
+ length = MAX(length, slength);
+ size = (length+(CTDB_DS_ALIGNMENT-1)) & ~(CTDB_DS_ALIGNMENT-1);
+
+ hdr = (struct ctdb_req_header *)talloc_zero_size(mem_ctx, size);
+ if (hdr == NULL) {
+ DEBUG(DEBUG_ERR,("Unable to allocate packet for operation %u of length %u\n",
+ operation, (unsigned)length));
+ return NULL;
+ }
+ talloc_set_name_const(hdr, type);
+ hdr->length = length;
+ hdr->operation = operation;
+ hdr->ctdb_magic = CTDB_MAGIC;
+ hdr->ctdb_version = CTDB_PROTOCOL;
+ hdr->srcnode = ctdb->pnn;
+ if (ctdb->vnn_map) {
+ hdr->generation = ctdb->vnn_map->generation;
+ }
+
+ return hdr;
+}
+
+/*
+ local version of ctdb_call
+*/
+int ctdb_call_local(struct ctdb_db_context *ctdb_db, struct ctdb_call *call,
+ struct ctdb_ltdb_header *header, TALLOC_CTX *mem_ctx,
+ TDB_DATA *data, bool updatetdb)
+{
+ struct ctdb_call_info *c;
+ struct ctdb_registered_call *fn;
+ struct ctdb_context *ctdb = ctdb_db->ctdb;
+
+ c = talloc_zero(mem_ctx, struct ctdb_call_info);
+ CTDB_NO_MEMORY(ctdb, c);
+
+ c->key = call->key;
+ c->call_data = &call->call_data;
+ c->record_data.dptr = talloc_memdup(c, data->dptr, data->dsize);
+ c->record_data.dsize = data->dsize;
+ CTDB_NO_MEMORY(ctdb, c->record_data.dptr);
+ c->header = header;
+
+ for (fn=ctdb_db->calls;fn;fn=fn->next) {
+ if (fn->id == (uint32_t)call->call_id) {
+ break;
+ }
+ }
+ if (fn == NULL) {
+ ctdb_set_error(ctdb, "Unknown call id %u\n", call->call_id);
+ talloc_free(c);
+ return -1;
+ }
+
+ if (fn->fn(c) != 0) {
+ ctdb_set_error(ctdb, "ctdb_call %u failed\n", call->call_id);
+ talloc_free(c);
+ return -1;
+ }
+
+ /* we need to force the record to be written out if this was a remote access */
+ if (c->new_data == NULL) {
+ c->new_data = &c->record_data;
+ }
+
+ if (c->new_data && updatetdb) {
+ /* XXX check that we always have the lock here? */
+ if (ctdb_ltdb_store(ctdb_db, call->key, header, *c->new_data) != 0) {
+ ctdb_set_error(ctdb, "ctdb_call tdb_store failed\n");
+ talloc_free(c);
+ return -1;
+ }
+ }
+
+ if (c->reply_data) {
+ call->reply_data = *c->reply_data;
+
+ talloc_steal(call, call->reply_data.dptr);
+ talloc_set_name_const(call->reply_data.dptr, __location__);
+ } else {
+ call->reply_data.dptr = NULL;
+ call->reply_data.dsize = 0;
+ }
+ call->status = c->status;
+
+ talloc_free(c);
+
+ return 0;
+}
+
+
+/*
+ queue a packet for sending from client to daemon
+*/
+static int ctdb_client_queue_pkt(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
+{
+ return ctdb_queue_send(ctdb->daemon.queue, (uint8_t *)hdr, hdr->length);
+}
+
+
+/*
+ called when a CTDB_REPLY_CALL packet comes in in the client
+
+ This packet comes in response to a CTDB_REQ_CALL request packet. It
+ contains any reply data from the call
+*/
+static void ctdb_client_reply_call(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
+{
+ struct ctdb_reply_call_old *c = (struct ctdb_reply_call_old *)hdr;
+ struct ctdb_client_call_state *state;
+
+ state = reqid_find(ctdb->idr, hdr->reqid, struct ctdb_client_call_state);
+ if (state == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " reqid %u not found\n", hdr->reqid));
+ return;
+ }
+
+ if (hdr->reqid != state->reqid) {
+ /* we found a record but it was the wrong one */
+ DEBUG(DEBUG_ERR, ("Dropped client call reply with reqid:%u\n",hdr->reqid));
+ return;
+ }
+
+ state->call->reply_data.dptr = c->data;
+ state->call->reply_data.dsize = c->datalen;
+ state->call->status = c->status;
+
+ talloc_steal(state, c);
+
+ state->state = CTDB_CALL_DONE;
+
+ if (state->async.fn) {
+ state->async.fn(state);
+ }
+}
+
+void ctdb_request_message(struct ctdb_context *ctdb,
+ struct ctdb_req_header *hdr)
+{
+ struct ctdb_req_message_old *c = (struct ctdb_req_message_old *)hdr;
+ TDB_DATA data;
+
+ data.dsize = c->datalen;
+ data.dptr = talloc_memdup(c, &c->data[0], c->datalen);
+ if (data.dptr == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Memory allocation failure\n"));
+ return;
+ }
+
+ srvid_dispatch(ctdb->srv, c->srvid, CTDB_SRVID_ALL, data);
+}
+
+static void ctdb_client_reply_control(struct ctdb_context *ctdb, struct ctdb_req_header *hdr);
+
+/*
+ this is called in the client, when data comes in from the daemon
+ */
+void ctdb_client_read_cb(uint8_t *data, size_t cnt, void *args)
+{
+ struct ctdb_context *ctdb = talloc_get_type(args, struct ctdb_context);
+ struct ctdb_req_header *hdr = (struct ctdb_req_header *)data;
+ TALLOC_CTX *tmp_ctx;
+
+ /* place the packet as a child of a tmp_ctx. We then use
+ talloc_free() below to free it. If any of the calls want
+ to keep it, then they will steal it somewhere else, and the
+ talloc_free() will be a no-op */
+ tmp_ctx = talloc_new(ctdb);
+ talloc_steal(tmp_ctx, hdr);
+
+ if (cnt == 0) {
+ DEBUG(DEBUG_CRIT,("Daemon has exited - shutting down client\n"));
+ exit(1);
+ }
+
+ if (cnt < sizeof(*hdr)) {
+ DEBUG(DEBUG_CRIT,("Bad packet length %u in client\n", (unsigned)cnt));
+ goto done;
+ }
+ if (cnt != hdr->length) {
+ ctdb_set_error(ctdb, "Bad header length %u expected %u in client\n",
+ (unsigned)hdr->length, (unsigned)cnt);
+ goto done;
+ }
+
+ if (hdr->ctdb_magic != CTDB_MAGIC) {
+ ctdb_set_error(ctdb, "Non CTDB packet rejected in client\n");
+ goto done;
+ }
+
+ if (hdr->ctdb_version != CTDB_PROTOCOL) {
+ ctdb_set_error(ctdb, "Bad CTDB version 0x%x rejected in client\n", hdr->ctdb_version);
+ goto done;
+ }
+
+ switch (hdr->operation) {
+ case CTDB_REPLY_CALL:
+ ctdb_client_reply_call(ctdb, hdr);
+ break;
+
+ case CTDB_REQ_MESSAGE:
+ ctdb_request_message(ctdb, hdr);
+ break;
+
+ case CTDB_REPLY_CONTROL:
+ ctdb_client_reply_control(ctdb, hdr);
+ break;
+
+ default:
+ DEBUG(DEBUG_CRIT,("bogus operation code:%u\n",hdr->operation));
+ }
+
+done:
+ talloc_free(tmp_ctx);
+}
+
+/*
+ connect to a unix domain socket
+*/
+int ctdb_socket_connect(struct ctdb_context *ctdb)
+{
+ struct sockaddr_un addr;
+ int ret;
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+ strncpy(addr.sun_path, ctdb->daemon.name, sizeof(addr.sun_path)-1);
+
+ ctdb->daemon.sd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (ctdb->daemon.sd == -1) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to open client socket. Errno:%s(%d)\n", strerror(errno), errno));
+ return -1;
+ }
+
+ if (connect(ctdb->daemon.sd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
+ DEBUG(DEBUG_ERR,
+ (__location__
+ "Failed to connect client socket to daemon (%s)\n",
+ strerror(errno)));
+ close(ctdb->daemon.sd);
+ ctdb->daemon.sd = -1;
+ return -1;
+ }
+
+ ret = set_blocking(ctdb->daemon.sd, false);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ (__location__
+ " failed to set socket non-blocking (%s)\n",
+ strerror(errno)));
+ close(ctdb->daemon.sd);
+ ctdb->daemon.sd = -1;
+ return -1;
+ }
+
+ set_close_on_exec(ctdb->daemon.sd);
+
+ ctdb->daemon.queue = ctdb_queue_setup(ctdb, ctdb, ctdb->daemon.sd,
+ CTDB_DS_ALIGNMENT,
+ ctdb_client_read_cb, ctdb, "to-ctdbd");
+ return 0;
+}
+
+
+struct ctdb_record_handle {
+ struct ctdb_db_context *ctdb_db;
+ TDB_DATA key;
+ TDB_DATA *data;
+ struct ctdb_ltdb_header header;
+};
+
+
+/*
+ make a recv call to the local ctdb daemon - called from client context
+
+ This is called when the program wants to wait for a ctdb_call to complete and get the
+ results. This call will block unless the call has already completed.
+*/
+int ctdb_call_recv(struct ctdb_client_call_state *state, struct ctdb_call *call)
+{
+ if (state == NULL) {
+ return -1;
+ }
+
+ while (state->state < CTDB_CALL_DONE) {
+ tevent_loop_once(state->ctdb_db->ctdb->ev);
+ }
+ if (state->state != CTDB_CALL_DONE) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_call_recv failed\n"));
+ talloc_free(state);
+ return -1;
+ }
+
+ if (state->call->reply_data.dsize) {
+ call->reply_data.dptr = talloc_memdup(state->ctdb_db,
+ state->call->reply_data.dptr,
+ state->call->reply_data.dsize);
+ call->reply_data.dsize = state->call->reply_data.dsize;
+ } else {
+ call->reply_data.dptr = NULL;
+ call->reply_data.dsize = 0;
+ }
+ call->status = state->call->status;
+ talloc_free(state);
+
+ return call->status;
+}
+
+
+
+
+/*
+ destroy a ctdb_call in client
+*/
+static int ctdb_client_call_destructor(struct ctdb_client_call_state *state)
+{
+ reqid_remove(state->ctdb_db->ctdb->idr, state->reqid);
+ return 0;
+}
+
+/*
+ construct an event driven local ctdb_call
+
+ this is used so that locally processed ctdb_call requests are processed
+ in an event driven manner
+*/
+static struct ctdb_client_call_state *ctdb_client_call_local_send(struct ctdb_db_context *ctdb_db,
+ struct ctdb_call *call,
+ struct ctdb_ltdb_header *header,
+ TDB_DATA *data)
+{
+ struct ctdb_client_call_state *state;
+ struct ctdb_context *ctdb = ctdb_db->ctdb;
+ int ret;
+
+ state = talloc_zero(ctdb_db, struct ctdb_client_call_state);
+ CTDB_NO_MEMORY_NULL(ctdb, state);
+ state->call = talloc_zero(state, struct ctdb_call);
+ CTDB_NO_MEMORY_NULL(ctdb, state->call);
+
+ talloc_steal(state, data->dptr);
+
+ state->state = CTDB_CALL_DONE;
+ *(state->call) = *call;
+ state->ctdb_db = ctdb_db;
+
+ ret = ctdb_call_local(ctdb_db, state->call, header, state, data, true);
+ if (ret != 0) {
+ DEBUG(DEBUG_DEBUG,("ctdb_call_local() failed, ignoring return code %d\n", ret));
+ }
+
+ return state;
+}
+
+/*
+ make a ctdb call to the local daemon - async send. Called from client context.
+
+ This constructs a ctdb_call request and queues it for processing.
+ This call never blocks.
+*/
+struct ctdb_client_call_state *ctdb_call_send(struct ctdb_db_context *ctdb_db,
+ struct ctdb_call *call)
+{
+ struct ctdb_client_call_state *state;
+ struct ctdb_context *ctdb = ctdb_db->ctdb;
+ struct ctdb_ltdb_header header;
+ TDB_DATA data;
+ int ret;
+ size_t len;
+ struct ctdb_req_call_old *c;
+
+ /* if the domain socket is not yet open, open it */
+ if (ctdb->daemon.sd==-1) {
+ ctdb_socket_connect(ctdb);
+ }
+
+ ret = ctdb_ltdb_lock(ctdb_db, call->key);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to get chainlock\n"));
+ return NULL;
+ }
+
+ ret = ctdb_ltdb_fetch(ctdb_db, call->key, &header, ctdb_db, &data);
+
+ if ((call->flags & CTDB_IMMEDIATE_MIGRATION) && (header.flags & CTDB_REC_RO_HAVE_DELEGATIONS)) {
+ ret = -1;
+ }
+
+ if (ret == 0 && header.dmaster == ctdb->pnn) {
+ state = ctdb_client_call_local_send(ctdb_db, call, &header, &data);
+ talloc_free(data.dptr);
+ ctdb_ltdb_unlock(ctdb_db, call->key);
+ return state;
+ }
+
+ ctdb_ltdb_unlock(ctdb_db, call->key);
+ talloc_free(data.dptr);
+
+ state = talloc_zero(ctdb_db, struct ctdb_client_call_state);
+ if (state == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " failed to allocate state\n"));
+ return NULL;
+ }
+ state->call = talloc_zero(state, struct ctdb_call);
+ if (state->call == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " failed to allocate state->call\n"));
+ return NULL;
+ }
+
+ len = offsetof(struct ctdb_req_call_old, data) + call->key.dsize + call->call_data.dsize;
+ c = ctdbd_allocate_pkt(ctdb, state, CTDB_REQ_CALL, len, struct ctdb_req_call_old);
+ if (c == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " failed to allocate packet\n"));
+ return NULL;
+ }
+
+ state->reqid = reqid_new(ctdb->idr, state);
+ state->ctdb_db = ctdb_db;
+ talloc_set_destructor(state, ctdb_client_call_destructor);
+
+ c->hdr.reqid = state->reqid;
+ c->flags = call->flags;
+ c->db_id = ctdb_db->db_id;
+ c->callid = call->call_id;
+ c->hopcount = 0;
+ c->keylen = call->key.dsize;
+ c->calldatalen = call->call_data.dsize;
+ memcpy(&c->data[0], call->key.dptr, call->key.dsize);
+ memcpy(&c->data[call->key.dsize],
+ call->call_data.dptr, call->call_data.dsize);
+ *(state->call) = *call;
+ state->call->call_data.dptr = &c->data[call->key.dsize];
+ state->call->key.dptr = &c->data[0];
+
+ state->state = CTDB_CALL_WAIT;
+
+
+ ctdb_client_queue_pkt(ctdb, &c->hdr);
+
+ return state;
+}
+
+
+/*
+ full ctdb_call. Equivalent to a ctdb_call_send() followed by a ctdb_call_recv()
+*/
+int ctdb_call(struct ctdb_db_context *ctdb_db, struct ctdb_call *call)
+{
+ struct ctdb_client_call_state *state;
+
+ state = ctdb_call_send(ctdb_db, call);
+ return ctdb_call_recv(state, call);
+}
+
+
+/*
+ tell the daemon what messaging srvid we will use, and register the message
+ handler function in the client
+*/
+int ctdb_client_set_message_handler(struct ctdb_context *ctdb, uint64_t srvid,
+ srvid_handler_fn handler,
+ void *private_data)
+{
+ int res;
+ int32_t status;
+
+ res = ctdb_control(ctdb, CTDB_CURRENT_NODE, srvid,
+ CTDB_CONTROL_REGISTER_SRVID, 0,
+ tdb_null, NULL, NULL, &status, NULL, NULL);
+ if (res != 0 || status != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Failed to register srvid %llu\n",
+ (unsigned long long)srvid));
+ return -1;
+ }
+
+ /* also need to register the handler with our own ctdb structure */
+ return srvid_register(ctdb->srv, ctdb, srvid, handler, private_data);
+}
+
+/*
+ tell the daemon we no longer want a srvid
+*/
+int ctdb_client_remove_message_handler(struct ctdb_context *ctdb,
+ uint64_t srvid, void *private_data)
+{
+ int res;
+ int32_t status;
+
+ res = ctdb_control(ctdb, CTDB_CURRENT_NODE, srvid,
+ CTDB_CONTROL_DEREGISTER_SRVID, 0,
+ tdb_null, NULL, NULL, &status, NULL, NULL);
+ if (res != 0 || status != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Failed to deregister srvid %llu\n",
+ (unsigned long long)srvid));
+ return -1;
+ }
+
+ /* also need to register the handler with our own ctdb structure */
+ srvid_deregister(ctdb->srv, srvid, private_data);
+ return 0;
+}
+
+/*
+ send a message - from client context
+ */
+int ctdb_client_send_message(struct ctdb_context *ctdb, uint32_t pnn,
+ uint64_t srvid, TDB_DATA data)
+{
+ struct ctdb_req_message_old *r;
+ int len, res;
+
+ len = offsetof(struct ctdb_req_message_old, data) + data.dsize;
+ r = ctdbd_allocate_pkt(ctdb, ctdb, CTDB_REQ_MESSAGE,
+ len, struct ctdb_req_message_old);
+ CTDB_NO_MEMORY(ctdb, r);
+
+ r->hdr.destnode = pnn;
+ r->srvid = srvid;
+ r->datalen = data.dsize;
+ memcpy(&r->data[0], data.dptr, data.dsize);
+
+ res = ctdb_client_queue_pkt(ctdb, &r->hdr);
+ talloc_free(r);
+ return res;
+}
+
+
+/*
+ called when a control completes or timesout to invoke the callback
+ function the user provided
+*/
+static void invoke_control_callback(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_client_control_state *state;
+ TALLOC_CTX *tmp_ctx = talloc_new(NULL);
+ int ret;
+
+ state = talloc_get_type(private_data, struct ctdb_client_control_state);
+ talloc_steal(tmp_ctx, state);
+
+ ret = ctdb_control_recv(state->ctdb, state, state,
+ NULL,
+ NULL,
+ NULL);
+ if (ret != 0) {
+ DEBUG(DEBUG_DEBUG,("ctdb_control_recv() failed, ignoring return code %d\n", ret));
+ }
+
+ talloc_free(tmp_ctx);
+}
+
+/*
+ called when a CTDB_REPLY_CONTROL packet comes in in the client
+
+ This packet comes in response to a CTDB_REQ_CONTROL request packet. It
+ contains any reply data from the control
+*/
+static void ctdb_client_reply_control(struct ctdb_context *ctdb,
+ struct ctdb_req_header *hdr)
+{
+ struct ctdb_reply_control_old *c = (struct ctdb_reply_control_old *)hdr;
+ struct ctdb_client_control_state *state;
+
+ state = reqid_find(ctdb->idr, hdr->reqid, struct ctdb_client_control_state);
+ if (state == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " reqid %u not found\n", hdr->reqid));
+ return;
+ }
+
+ if (hdr->reqid != state->reqid) {
+ /* we found a record but it was the wrong one */
+ DEBUG(DEBUG_ERR, ("Dropped orphaned reply control with reqid:%u\n",hdr->reqid));
+ return;
+ }
+
+ state->outdata.dptr = c->data;
+ state->outdata.dsize = c->datalen;
+ state->status = c->status;
+ if (c->errorlen) {
+ state->errormsg = talloc_strndup(state,
+ (char *)&c->data[c->datalen],
+ c->errorlen);
+ }
+
+ /* state->outdata now uses resources from c so we don't want c
+ to just disappear from under us while state is still alive
+ */
+ talloc_steal(state, c);
+
+ state->state = CTDB_CONTROL_DONE;
+
+ /* if we had a callback registered for this control, pull the response
+ and call the callback.
+ */
+ if (state->async.fn) {
+ tevent_add_timer(ctdb->ev, state, timeval_zero(),
+ invoke_control_callback, state);
+ }
+}
+
+
+/*
+ destroy a ctdb_control in client
+*/
+static int ctdb_client_control_destructor(struct ctdb_client_control_state *state)
+{
+ reqid_remove(state->ctdb->idr, state->reqid);
+ return 0;
+}
+
+
+/* time out handler for ctdb_control */
+static void control_timeout_func(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_client_control_state *state = talloc_get_type(private_data, struct ctdb_client_control_state);
+
+ DEBUG(DEBUG_ERR,(__location__ " control timed out. reqid:%u opcode:%u "
+ "dstnode:%u\n", state->reqid, state->c->opcode,
+ state->c->hdr.destnode));
+
+ state->state = CTDB_CONTROL_TIMEOUT;
+
+ /* if we had a callback registered for this control, pull the response
+ and call the callback.
+ */
+ if (state->async.fn) {
+ tevent_add_timer(state->ctdb->ev, state, timeval_zero(),
+ invoke_control_callback, state);
+ }
+}
+
+/* async version of send control request */
+struct ctdb_client_control_state *ctdb_control_send(struct ctdb_context *ctdb,
+ uint32_t destnode, uint64_t srvid,
+ uint32_t opcode, uint32_t flags, TDB_DATA data,
+ TALLOC_CTX *mem_ctx,
+ struct timeval *timeout,
+ char **errormsg)
+{
+ struct ctdb_client_control_state *state;
+ size_t len;
+ struct ctdb_req_control_old *c;
+ int ret;
+
+ if (errormsg) {
+ *errormsg = NULL;
+ }
+
+ /* if the domain socket is not yet open, open it */
+ if (ctdb->daemon.sd==-1) {
+ ctdb_socket_connect(ctdb);
+ }
+
+ state = talloc_zero(mem_ctx, struct ctdb_client_control_state);
+ CTDB_NO_MEMORY_NULL(ctdb, state);
+
+ state->ctdb = ctdb;
+ state->reqid = reqid_new(ctdb->idr, state);
+ state->state = CTDB_CONTROL_WAIT;
+ state->errormsg = NULL;
+
+ talloc_set_destructor(state, ctdb_client_control_destructor);
+
+ len = offsetof(struct ctdb_req_control_old, data) + data.dsize;
+ c = ctdbd_allocate_pkt(ctdb, state, CTDB_REQ_CONTROL,
+ len, struct ctdb_req_control_old);
+ state->c = c;
+ CTDB_NO_MEMORY_NULL(ctdb, c);
+ c->hdr.reqid = state->reqid;
+ c->hdr.destnode = destnode;
+ c->opcode = opcode;
+ c->client_id = 0;
+ c->flags = flags;
+ c->srvid = srvid;
+ c->datalen = data.dsize;
+ if (data.dsize) {
+ memcpy(&c->data[0], data.dptr, data.dsize);
+ }
+
+ /* timeout */
+ if (timeout && !timeval_is_zero(timeout)) {
+ tevent_add_timer(ctdb->ev, state, *timeout,
+ control_timeout_func, state);
+ }
+
+ ret = ctdb_client_queue_pkt(ctdb, &(c->hdr));
+ if (ret != 0) {
+ talloc_free(state);
+ return NULL;
+ }
+
+ if (flags & CTDB_CTRL_FLAG_NOREPLY) {
+ talloc_free(state);
+ return NULL;
+ }
+
+ return state;
+}
+
+
+/* async version of receive control reply */
+int ctdb_control_recv(struct ctdb_context *ctdb,
+ struct ctdb_client_control_state *state,
+ TALLOC_CTX *mem_ctx,
+ TDB_DATA *outdata, int32_t *status, char **errormsg)
+{
+ TALLOC_CTX *tmp_ctx;
+
+ if (status != NULL) {
+ *status = -1;
+ }
+ if (errormsg != NULL) {
+ *errormsg = NULL;
+ }
+
+ if (state == NULL) {
+ return -1;
+ }
+
+ /* prevent double free of state */
+ tmp_ctx = talloc_new(ctdb);
+ talloc_steal(tmp_ctx, state);
+
+ /* loop one event at a time until we either timeout or the control
+ completes.
+ */
+ while (state->state == CTDB_CONTROL_WAIT) {
+ tevent_loop_once(ctdb->ev);
+ }
+
+ if (state->state != CTDB_CONTROL_DONE) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control_recv failed\n"));
+ if (state->async.fn) {
+ state->async.fn(state);
+ }
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
+ if (state->errormsg) {
+ int s = (state->status == 0 ? -1 : state->status);
+ DEBUG(DEBUG_ERR,("ctdb_control error: '%s'\n", state->errormsg));
+ if (errormsg) {
+ (*errormsg) = talloc_move(mem_ctx, &state->errormsg);
+ }
+ if (state->async.fn) {
+ state->async.fn(state);
+ }
+ talloc_free(tmp_ctx);
+ return s;
+ }
+
+ if (outdata) {
+ *outdata = state->outdata;
+ outdata->dptr = talloc_memdup(mem_ctx, outdata->dptr, outdata->dsize);
+ }
+
+ if (status) {
+ *status = state->status;
+ }
+
+ if (state->async.fn) {
+ state->async.fn(state);
+ }
+
+ talloc_free(tmp_ctx);
+ return 0;
+}
+
+
+
+/*
+ send a ctdb control message
+ timeout specifies how long we should wait for a reply.
+ if timeout is NULL we wait indefinitely
+ */
+int ctdb_control(struct ctdb_context *ctdb, uint32_t destnode, uint64_t srvid,
+ uint32_t opcode, uint32_t flags, TDB_DATA data,
+ TALLOC_CTX *mem_ctx, TDB_DATA *outdata, int32_t *status,
+ struct timeval *timeout,
+ char **errormsg)
+{
+ struct ctdb_client_control_state *state;
+
+ state = ctdb_control_send(ctdb, destnode, srvid, opcode,
+ flags, data, mem_ctx,
+ timeout, errormsg);
+
+ /* FIXME: Error conditions in ctdb_control_send return NULL without
+ * setting errormsg. So, there is no way to distinguish between success
+ * and failure when CTDB_CTRL_FLAG_NOREPLY is set */
+ if (flags & CTDB_CTRL_FLAG_NOREPLY) {
+ if (status != NULL) {
+ *status = 0;
+ }
+ return 0;
+ }
+
+ return ctdb_control_recv(ctdb, state, mem_ctx, outdata, status,
+ errormsg);
+}
+
+/*
+ get vnn map from a remote node
+ */
+int ctdb_ctrl_getvnnmap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_vnn_map **vnnmap)
+{
+ int ret;
+ TDB_DATA outdata;
+ int32_t res;
+ struct ctdb_vnn_map_wire *map;
+
+ ret = ctdb_control(ctdb, destnode, 0,
+ CTDB_CONTROL_GETVNNMAP, 0, tdb_null,
+ mem_ctx, &outdata, &res, &timeout, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getvnnmap failed\n"));
+ return -1;
+ }
+
+ map = (struct ctdb_vnn_map_wire *)outdata.dptr;
+ if (outdata.dsize < offsetof(struct ctdb_vnn_map_wire, map) ||
+ outdata.dsize != map->size*sizeof(uint32_t) + offsetof(struct ctdb_vnn_map_wire, map)) {
+ DEBUG(DEBUG_ERR,("Bad vnn map size received in ctdb_ctrl_getvnnmap\n"));
+ return -1;
+ }
+
+ (*vnnmap) = talloc(mem_ctx, struct ctdb_vnn_map);
+ CTDB_NO_MEMORY(ctdb, *vnnmap);
+ (*vnnmap)->generation = map->generation;
+ (*vnnmap)->size = map->size;
+ (*vnnmap)->map = talloc_array(*vnnmap, uint32_t, map->size);
+
+ CTDB_NO_MEMORY(ctdb, (*vnnmap)->map);
+ memcpy((*vnnmap)->map, map->map, sizeof(uint32_t)*map->size);
+ talloc_free(outdata.dptr);
+
+ return 0;
+}
+
+
+/*
+ get the recovery mode of a remote node
+ */
+struct ctdb_client_control_state *
+ctdb_ctrl_getrecmode_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode)
+{
+ return ctdb_control_send(ctdb, destnode, 0,
+ CTDB_CONTROL_GET_RECMODE, 0, tdb_null,
+ mem_ctx, &timeout, NULL);
+}
+
+int ctdb_ctrl_getrecmode_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *recmode)
+{
+ int ret;
+ int32_t res;
+
+ ret = ctdb_control_recv(ctdb, state, mem_ctx, NULL, &res, NULL);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_getrecmode_recv failed\n"));
+ return -1;
+ }
+
+ if (recmode) {
+ *recmode = (uint32_t)res;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t *recmode)
+{
+ struct ctdb_client_control_state *state;
+
+ state = ctdb_ctrl_getrecmode_send(ctdb, mem_ctx, timeout, destnode);
+ return ctdb_ctrl_getrecmode_recv(ctdb, mem_ctx, state, recmode);
+}
+
+
+
+
+/*
+ set the recovery mode of a remote node
+ */
+int ctdb_ctrl_setrecmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmode)
+{
+ int ret;
+ TDB_DATA data;
+ int32_t res;
+
+ data.dsize = sizeof(uint32_t);
+ data.dptr = (unsigned char *)&recmode;
+
+ ret = ctdb_control(ctdb, destnode, 0,
+ CTDB_CONTROL_SET_RECMODE, 0, data,
+ NULL, NULL, &res, &timeout, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setrecmode failed\n"));
+ return -1;
+ }
+
+ return 0;
+}
+
+
+
+/*
+ get a list of nodes (vnn and flags ) from a remote node
+ */
+int ctdb_ctrl_getnodemap(struct ctdb_context *ctdb,
+ struct timeval timeout, uint32_t destnode,
+ TALLOC_CTX *mem_ctx, struct ctdb_node_map_old **nodemap)
+{
+ int ret;
+ TDB_DATA outdata;
+ int32_t res;
+
+ ret = ctdb_control(ctdb, destnode, 0,
+ CTDB_CONTROL_GET_NODEMAP, 0, tdb_null,
+ mem_ctx, &outdata, &res, &timeout, NULL);
+ if (ret != 0 || res != 0 || outdata.dsize == 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getnodes failed ret:%d res:%d\n", ret, res));
+ return -1;
+ }
+
+ *nodemap = (struct ctdb_node_map_old *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
+ talloc_free(outdata.dptr);
+ return 0;
+}
+
+int ctdb_ctrl_get_runstate(struct ctdb_context *ctdb,
+ struct timeval timeout,
+ uint32_t destnode,
+ uint32_t *runstate)
+{
+ TDB_DATA outdata;
+ int32_t res;
+ int ret;
+
+ ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_RUNSTATE, 0,
+ tdb_null, ctdb, &outdata, &res, &timeout, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,("ctdb_control for get_runstate failed\n"));
+ return ret != 0 ? ret : res;
+ }
+
+ if (outdata.dsize != sizeof(uint32_t)) {
+ DEBUG(DEBUG_ERR,("Invalid return data in get_runstate\n"));
+ talloc_free(outdata.dptr);
+ return -1;
+ }
+
+ if (runstate != NULL) {
+ *runstate = *(uint32_t *)outdata.dptr;
+ }
+ talloc_free(outdata.dptr);
+
+ return 0;
+}
+
+/*
+ get debug level on a node
+ */
+int ctdb_ctrl_get_debuglevel(struct ctdb_context *ctdb, uint32_t destnode, int32_t *level)
+{
+ int ret;
+ int32_t res;
+ TDB_DATA data;
+
+ ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_DEBUG, 0, tdb_null,
+ ctdb, &data, &res, NULL, NULL);
+ if (ret != 0 || res != 0) {
+ return -1;
+ }
+ if (data.dsize != sizeof(int32_t)) {
+ DEBUG(DEBUG_ERR,("Bad control reply size in ctdb_get_debuglevel (got %u)\n",
+ (unsigned)data.dsize));
+ return -1;
+ }
+ *level = *(int32_t *)data.dptr;
+ talloc_free(data.dptr);
+ return 0;
+}
+
+/* Freeze all databases */
+int ctdb_ctrl_freeze(struct ctdb_context *ctdb, struct timeval timeout,
+ uint32_t destnode)
+{
+ int ret;
+ int32_t res;
+
+ ret = ctdb_control(ctdb, destnode, 0,
+ CTDB_CONTROL_FREEZE, 0, tdb_null,
+ NULL, NULL, &res, &timeout, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR, ("ctdb_ctrl_freeze_priority failed\n"));
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ get pnn of a node, or -1
+ */
+int ctdb_ctrl_getpnn(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
+{
+ int ret;
+ int32_t res;
+
+ ret = ctdb_control(ctdb, destnode, 0,
+ CTDB_CONTROL_GET_PNN, 0, tdb_null,
+ NULL, NULL, &res, &timeout, NULL);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getpnn failed\n"));
+ return -1;
+ }
+
+ return res;
+}
+
+int ctdb_ctrl_get_public_ips_flags(struct ctdb_context *ctdb,
+ struct timeval timeout, uint32_t destnode,
+ TALLOC_CTX *mem_ctx,
+ uint32_t flags,
+ struct ctdb_public_ip_list_old **ips)
+{
+ int ret;
+ TDB_DATA outdata;
+ int32_t res;
+
+ ret = ctdb_control(ctdb, destnode, 0,
+ CTDB_CONTROL_GET_PUBLIC_IPS, flags, tdb_null,
+ mem_ctx, &outdata, &res, &timeout, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,(__location__
+ " ctdb_control for getpublicips failed ret:%d res:%d\n",
+ ret, res));
+ return -1;
+ }
+
+ *ips = (struct ctdb_public_ip_list_old *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
+ talloc_free(outdata.dptr);
+
+ return 0;
+}
+
+int ctdb_ctrl_get_public_ips(struct ctdb_context *ctdb,
+ struct timeval timeout, uint32_t destnode,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_public_ip_list_old **ips)
+{
+ return ctdb_ctrl_get_public_ips_flags(ctdb, timeout,
+ destnode, mem_ctx,
+ 0, ips);
+}
+
+int ctdb_ctrl_get_ifaces(struct ctdb_context *ctdb,
+ struct timeval timeout, uint32_t destnode,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_iface_list_old **_ifaces)
+{
+ int ret;
+ TDB_DATA outdata;
+ int32_t res;
+ struct ctdb_iface_list_old *ifaces;
+ uint32_t len;
+ uint32_t i;
+
+ ret = ctdb_control(ctdb, destnode, 0,
+ CTDB_CONTROL_GET_IFACES, 0, tdb_null,
+ mem_ctx, &outdata, &res, &timeout, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
+ "failed ret:%d res:%d\n",
+ ret, res));
+ return -1;
+ }
+
+ len = offsetof(struct ctdb_iface_list_old, ifaces);
+ if (len > outdata.dsize) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
+ "returned invalid data with size %u > %u\n",
+ (unsigned int)outdata.dsize,
+ (unsigned int)len));
+ dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
+ return -1;
+ }
+
+ ifaces = (struct ctdb_iface_list_old *)outdata.dptr;
+ len += ifaces->num*sizeof(struct ctdb_iface);
+
+ if (len > outdata.dsize) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
+ "returned invalid data with size %u > %u\n",
+ (unsigned int)outdata.dsize,
+ (unsigned int)len));
+ dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
+ return -1;
+ }
+
+ /* make sure we null terminate the returned strings */
+ for (i=0; i < ifaces->num; i++) {
+ ifaces->ifaces[i].name[CTDB_IFACE_SIZE] = '\0';
+ }
+
+ *_ifaces = (struct ctdb_iface_list_old *)talloc_memdup(mem_ctx,
+ outdata.dptr,
+ outdata.dsize);
+ talloc_free(outdata.dptr);
+ if (*_ifaces == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
+ "talloc_memdup size %u failed\n",
+ (unsigned int)outdata.dsize));
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ get all tunables
+ */
+int ctdb_ctrl_get_all_tunables(struct ctdb_context *ctdb,
+ struct timeval timeout,
+ uint32_t destnode,
+ struct ctdb_tunable_list *tunables)
+{
+ TDB_DATA outdata;
+ int ret;
+ int32_t res;
+
+ ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_ALL_TUNABLES, 0, tdb_null, ctdb,
+ &outdata, &res, &timeout, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get all tunables failed\n"));
+ return -1;
+ }
+
+ if (outdata.dsize != sizeof(*tunables)) {
+ DEBUG(DEBUG_ERR,(__location__ " bad data size %u in ctdb_ctrl_get_all_tunables should be %u\n",
+ (unsigned)outdata.dsize, (unsigned)sizeof(*tunables)));
+ return -1;
+ }
+
+ *tunables = *(struct ctdb_tunable_list *)outdata.dptr;
+ talloc_free(outdata.dptr);
+ return 0;
+}
+
+/*
+ set some ctdb flags
+*/
+void ctdb_set_flags(struct ctdb_context *ctdb, unsigned flags)
+{
+ ctdb->flags |= flags;
+}
+
+const char *ctdb_get_socketname(struct ctdb_context *ctdb)
+{
+ return ctdb->daemon.name;
+}
+
+/*
+ return the pnn of this node
+*/
+uint32_t ctdb_get_pnn(struct ctdb_context *ctdb)
+{
+ return ctdb->pnn;
+}
+
+/*
+ callback for the async helpers used when sending the same control
+ to multiple nodes in parallel.
+*/
+static void async_callback(struct ctdb_client_control_state *state)
+{
+ struct client_async_data *data = talloc_get_type(state->async.private_data, struct client_async_data);
+ struct ctdb_context *ctdb = talloc_get_type(state->ctdb, struct ctdb_context);
+ int ret;
+ TDB_DATA outdata;
+ int32_t res = -1;
+ uint32_t destnode = state->c->hdr.destnode;
+
+ outdata.dsize = 0;
+ outdata.dptr = NULL;
+
+ /* one more node has responded with recmode data */
+ data->count--;
+
+ /* if we failed to push the db, then return an error and let
+ the main loop try again.
+ */
+ if (state->state != CTDB_CONTROL_DONE) {
+ if ( !data->dont_log_errors) {
+ DEBUG(DEBUG_ERR,("Async operation failed with state %d, opcode:%u\n", state->state, data->opcode));
+ }
+ data->fail_count++;
+ if (state->state == CTDB_CONTROL_TIMEOUT) {
+ res = -ETIMEDOUT;
+ } else {
+ res = -1;
+ }
+ if (data->fail_callback) {
+ data->fail_callback(ctdb, destnode, res, outdata,
+ data->callback_data);
+ }
+ return;
+ }
+
+ state->async.fn = NULL;
+
+ ret = ctdb_control_recv(ctdb, state, data, &outdata, &res, NULL);
+ if ((ret != 0) || (res != 0)) {
+ if ( !data->dont_log_errors) {
+ DEBUG(DEBUG_ERR,("Async operation failed with ret=%d res=%d opcode=%u\n", ret, (int)res, data->opcode));
+ }
+ data->fail_count++;
+ if (data->fail_callback) {
+ data->fail_callback(ctdb, destnode, res, outdata,
+ data->callback_data);
+ }
+ }
+ if ((ret == 0) && (data->callback != NULL)) {
+ data->callback(ctdb, destnode, res, outdata,
+ data->callback_data);
+ }
+}
+
+
+void ctdb_client_async_add(struct client_async_data *data, struct ctdb_client_control_state *state)
+{
+ /* set up the callback functions */
+ state->async.fn = async_callback;
+ state->async.private_data = data;
+
+ /* one more control to wait for to complete */
+ data->count++;
+}
+
+
+/* wait for up to the maximum number of seconds allowed
+ or until all nodes we expect a response from has replied
+*/
+int ctdb_client_async_wait(struct ctdb_context *ctdb, struct client_async_data *data)
+{
+ while (data->count > 0) {
+ tevent_loop_once(ctdb->ev);
+ }
+ if (data->fail_count != 0) {
+ if (!data->dont_log_errors) {
+ DEBUG(DEBUG_ERR,("Async wait failed - fail_count=%u\n",
+ data->fail_count));
+ }
+ return -1;
+ }
+ return 0;
+}
+
+
+/*
+ perform a simple control on the listed nodes
+ The control cannot return data
+ */
+int ctdb_client_async_control(struct ctdb_context *ctdb,
+ enum ctdb_controls opcode,
+ uint32_t *nodes,
+ uint64_t srvid,
+ struct timeval timeout,
+ bool dont_log_errors,
+ TDB_DATA data,
+ client_async_callback client_callback,
+ client_async_callback fail_callback,
+ void *callback_data)
+{
+ struct client_async_data *async_data;
+ struct ctdb_client_control_state *state;
+ int j, num_nodes;
+
+ async_data = talloc_zero(ctdb, struct client_async_data);
+ CTDB_NO_MEMORY_FATAL(ctdb, async_data);
+ async_data->dont_log_errors = dont_log_errors;
+ async_data->callback = client_callback;
+ async_data->fail_callback = fail_callback;
+ async_data->callback_data = callback_data;
+ async_data->opcode = opcode;
+
+ num_nodes = talloc_get_size(nodes) / sizeof(uint32_t);
+
+ /* loop over all nodes and send an async control to each of them */
+ for (j=0; j<num_nodes; j++) {
+ uint32_t pnn = nodes[j];
+
+ state = ctdb_control_send(ctdb, pnn, srvid, opcode,
+ 0, data, async_data, &timeout, NULL);
+ if (state == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to call async control %u\n", (unsigned)opcode));
+ talloc_free(async_data);
+ return -1;
+ }
+
+ ctdb_client_async_add(async_data, state);
+ }
+
+ if (ctdb_client_async_wait(ctdb, async_data) != 0) {
+ talloc_free(async_data);
+ return -1;
+ }
+
+ talloc_free(async_data);
+ return 0;
+}
+
+uint32_t *list_of_vnnmap_nodes(struct ctdb_context *ctdb,
+ struct ctdb_vnn_map *vnn_map,
+ TALLOC_CTX *mem_ctx,
+ bool include_self)
+{
+ unsigned int i, j, num_nodes;
+ uint32_t *nodes;
+
+ for (i=num_nodes=0;i<vnn_map->size;i++) {
+ if (vnn_map->map[i] == ctdb->pnn && !include_self) {
+ continue;
+ }
+ num_nodes++;
+ }
+
+ nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
+ CTDB_NO_MEMORY_FATAL(ctdb, nodes);
+
+ for (i=j=0;i<vnn_map->size;i++) {
+ if (vnn_map->map[i] == ctdb->pnn && !include_self) {
+ continue;
+ }
+ nodes[j++] = vnn_map->map[i];
+ }
+
+ return nodes;
+}
+
+/* Get list of nodes not including those with flags specified by mask */
+static uint32_t *list_of_nodes(struct ctdb_context *ctdb,
+ struct ctdb_node_map_old *node_map,
+ TALLOC_CTX *mem_ctx,
+ uint32_t mask,
+ bool include_self)
+{
+ unsigned int i, j, num_nodes;
+ uint32_t exclude_pnn;
+ uint32_t *nodes;
+
+ exclude_pnn = include_self ? CTDB_UNKNOWN_PNN : ctdb->pnn;
+
+ for (i=num_nodes=0;i<node_map->num;i++) {
+ if (node_map->nodes[i].flags & mask) {
+ continue;
+ }
+ if (node_map->nodes[i].pnn == exclude_pnn) {
+ continue;
+ }
+ num_nodes++;
+ }
+
+ nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
+ CTDB_NO_MEMORY_FATAL(ctdb, nodes);
+
+ for (i=j=0;i<node_map->num;i++) {
+ if (node_map->nodes[i].flags & mask) {
+ continue;
+ }
+ if (node_map->nodes[i].pnn == exclude_pnn) {
+ continue;
+ }
+ nodes[j++] = node_map->nodes[i].pnn;
+ }
+
+ return nodes;
+}
+
+uint32_t *list_of_active_nodes(struct ctdb_context *ctdb,
+ struct ctdb_node_map_old *node_map,
+ TALLOC_CTX *mem_ctx,
+ bool include_self)
+{
+ return list_of_nodes(ctdb,
+ node_map,
+ mem_ctx,
+ NODE_FLAGS_INACTIVE,
+ include_self);
+}
+
+uint32_t *list_of_connected_nodes(struct ctdb_context *ctdb,
+ struct ctdb_node_map_old *node_map,
+ TALLOC_CTX *mem_ctx,
+ bool include_self)
+{
+ return list_of_nodes(ctdb,
+ node_map,
+ mem_ctx,
+ NODE_FLAGS_DISCONNECTED,
+ include_self);
+}
+
+/*
+ get capabilities of a remote node
+ */
+struct ctdb_client_control_state *
+ctdb_ctrl_getcapabilities_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode)
+{
+ return ctdb_control_send(ctdb, destnode, 0,
+ CTDB_CONTROL_GET_CAPABILITIES, 0, tdb_null,
+ mem_ctx, &timeout, NULL);
+}
+
+int ctdb_ctrl_getcapabilities_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *capabilities)
+{
+ int ret;
+ int32_t res;
+ TDB_DATA outdata;
+
+ ret = ctdb_control_recv(ctdb, state, mem_ctx, &outdata, &res, NULL);
+ if ( (ret != 0) || (res != 0) ) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_getcapabilities_recv failed\n"));
+ return -1;
+ }
+
+ if (capabilities) {
+ *capabilities = *((uint32_t *)outdata.dptr);
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_getcapabilities(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *capabilities)
+{
+ struct ctdb_client_control_state *state;
+ TALLOC_CTX *tmp_ctx = talloc_new(NULL);
+ int ret;
+
+ state = ctdb_ctrl_getcapabilities_send(ctdb, tmp_ctx, timeout, destnode);
+ ret = ctdb_ctrl_getcapabilities_recv(ctdb, tmp_ctx, state, capabilities);
+ talloc_free(tmp_ctx);
+ return ret;
+}
+
+static void get_capabilities_callback(struct ctdb_context *ctdb,
+ uint32_t node_pnn, int32_t res,
+ TDB_DATA outdata, void *callback_data)
+{
+ struct ctdb_node_capabilities *caps =
+ talloc_get_type(callback_data,
+ struct ctdb_node_capabilities);
+
+ if ( (outdata.dsize != sizeof(uint32_t)) || (outdata.dptr == NULL) ) {
+ DEBUG(DEBUG_ERR, (__location__ " Invalid length/pointer for getcap callback : %u %p\n", (unsigned)outdata.dsize, outdata.dptr));
+ return;
+ }
+
+ if (node_pnn >= talloc_array_length(caps)) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " unexpected PNN %u\n", node_pnn));
+ return;
+ }
+
+ caps[node_pnn].retrieved = true;
+ caps[node_pnn].capabilities = *((uint32_t *)outdata.dptr);
+}
+
+struct ctdb_node_capabilities *
+ctdb_get_capabilities(struct ctdb_context *ctdb,
+ TALLOC_CTX *mem_ctx,
+ struct timeval timeout,
+ struct ctdb_node_map_old *nodemap)
+{
+ uint32_t *nodes;
+ uint32_t i, res;
+ struct ctdb_node_capabilities *ret;
+
+ nodes = list_of_active_nodes(ctdb, nodemap, mem_ctx, true);
+
+ ret = talloc_array(mem_ctx, struct ctdb_node_capabilities,
+ nodemap->num);
+ CTDB_NO_MEMORY_NULL(ctdb, ret);
+ /* Prepopulate the expected PNNs */
+ for (i = 0; i < talloc_array_length(ret); i++) {
+ ret[i].retrieved = false;
+ }
+
+ res = ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_CAPABILITIES,
+ nodes, 0, timeout,
+ false, tdb_null,
+ get_capabilities_callback, NULL,
+ ret);
+ if (res != 0) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " Failed to read node capabilities.\n"));
+ TALLOC_FREE(ret);
+ }
+
+ return ret;
+}
+
+uint32_t *
+ctdb_get_node_capabilities(struct ctdb_node_capabilities *caps,
+ uint32_t pnn)
+{
+ if (pnn < talloc_array_length(caps) && caps[pnn].retrieved) {
+ return &caps[pnn].capabilities;
+ }
+
+ return NULL;
+}
+
+bool ctdb_node_has_capabilities(struct ctdb_node_capabilities *caps,
+ uint32_t pnn,
+ uint32_t capabilities_required)
+{
+ uint32_t *capp = ctdb_get_node_capabilities(caps, pnn);
+ return (capp != NULL) &&
+ ((*capp & capabilities_required) == capabilities_required);
+}
+
+/*
+ recovery daemon ping to main daemon
+ */
+int ctdb_ctrl_recd_ping(struct ctdb_context *ctdb)
+{
+ int ret;
+ int32_t res;
+
+ ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_RECD_PING, 0, tdb_null,
+ ctdb, NULL, &res, NULL, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,("Failed to send recd ping\n"));
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ tell the main daemon how long it took to lock the reclock file
+ */
+int ctdb_ctrl_report_recd_lock_latency(struct ctdb_context *ctdb, struct timeval timeout, double latency)
+{
+ int ret;
+ int32_t res;
+ TDB_DATA data;
+
+ data.dptr = (uint8_t *)&latency;
+ data.dsize = sizeof(latency);
+
+ ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_RECD_RECLOCK_LATENCY, 0, data,
+ ctdb, NULL, &res, NULL, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,("Failed to send recd reclock latency\n"));
+ return -1;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_set_ban(struct ctdb_context *ctdb, struct timeval timeout,
+ uint32_t destnode, struct ctdb_ban_state *bantime)
+{
+ int ret;
+ TDB_DATA data;
+ int32_t res;
+
+ data.dsize = sizeof(*bantime);
+ data.dptr = (uint8_t *)bantime;
+
+ ret = ctdb_control(ctdb, destnode, 0,
+ CTDB_CONTROL_SET_BAN_STATE, 0, data,
+ NULL, NULL, &res, &timeout, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set ban state failed\n"));
+ return -1;
+ }
+
+ return 0;
+}
+
+struct ctdb_client_control_state *
+ctdb_ctrl_updaterecord_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, struct ctdb_db_context *ctdb_db, TDB_DATA key, struct ctdb_ltdb_header *header, TDB_DATA data)
+{
+ struct ctdb_client_control_state *handle;
+ struct ctdb_marshall_buffer *m;
+ struct ctdb_rec_data_old *rec;
+ TDB_DATA outdata;
+
+ m = talloc_zero(mem_ctx, struct ctdb_marshall_buffer);
+ if (m == NULL) {
+ DEBUG(DEBUG_ERR, ("Failed to allocate marshall buffer for update record\n"));
+ return NULL;
+ }
+
+ m->db_id = ctdb_db->db_id;
+
+ rec = ctdb_marshall_record(m, 0, key, header, data);
+ if (rec == NULL) {
+ DEBUG(DEBUG_ERR,("Failed to marshall record for update record\n"));
+ talloc_free(m);
+ return NULL;
+ }
+ m = talloc_realloc_size(mem_ctx, m, rec->length + offsetof(struct ctdb_marshall_buffer, data));
+ if (m == NULL) {
+ DEBUG(DEBUG_CRIT,(__location__ " Failed to expand recdata\n"));
+ talloc_free(m);
+ return NULL;
+ }
+ m->count++;
+ memcpy((uint8_t *)m + offsetof(struct ctdb_marshall_buffer, data), rec, rec->length);
+
+
+ outdata.dptr = (uint8_t *)m;
+ outdata.dsize = talloc_get_size(m);
+
+ handle = ctdb_control_send(ctdb, destnode, 0,
+ CTDB_CONTROL_UPDATE_RECORD, 0, outdata,
+ mem_ctx, &timeout, NULL);
+ talloc_free(m);
+ return handle;
+}
+
+int ctdb_ctrl_updaterecord_recv(struct ctdb_context *ctdb, struct ctdb_client_control_state *state)
+{
+ int ret;
+ int32_t res;
+
+ ret = ctdb_control_recv(ctdb, state, state, NULL, &res, NULL);
+ if ( (ret != 0) || (res != 0) ){
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_update_record_recv failed\n"));
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+ctdb_ctrl_updaterecord(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, struct ctdb_db_context *ctdb_db, TDB_DATA key, struct ctdb_ltdb_header *header, TDB_DATA data)
+{
+ struct ctdb_client_control_state *state;
+
+ state = ctdb_ctrl_updaterecord_send(ctdb, mem_ctx, timeout, destnode, ctdb_db, key, header, data);
+ return ctdb_ctrl_updaterecord_recv(ctdb, state);
+}
diff --git a/ctdb/server/ctdb_cluster_mutex.c b/ctdb/server/ctdb_cluster_mutex.c
new file mode 100644
index 0000000..2fbe301
--- /dev/null
+++ b/ctdb/server/ctdb_cluster_mutex.c
@@ -0,0 +1,382 @@
+/*
+ CTDB cluster mutex handling
+
+ Copyright (C) Andrew Tridgell 2007
+ Copyright (C) Ronnie Sahlberg 2007
+ Copyright (C) Martin Schwenke 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+
+#include <tevent.h>
+
+#include "lib/util/debug.h"
+#include "lib/util/time.h"
+#include "lib/util/strv.h"
+#include "lib/util/strv_util.h"
+#include "lib/util/sys_rw.h"
+#include "lib/util/blocking.h"
+
+#include "ctdb_private.h"
+
+#include "ctdb_cluster_mutex.h"
+
+struct ctdb_cluster_mutex_handle {
+ struct ctdb_context *ctdb;
+ cluster_mutex_handler_t handler;
+ void *private_data;
+ cluster_mutex_lost_handler_t lost_handler;
+ void *lost_data;
+ int fd[2];
+ struct tevent_timer *te;
+ struct tevent_fd *fde;
+ pid_t child;
+ struct timeval start_time;
+ bool have_response;
+};
+
+static void cluster_mutex_timeout(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_cluster_mutex_handle *h =
+ talloc_get_type(private_data, struct ctdb_cluster_mutex_handle);
+ double latency = timeval_elapsed(&h->start_time);
+
+ if (h->handler != NULL) {
+ h->handler('2', latency, h->private_data);
+ }
+}
+
+
+/* When the handle is freed it causes any child holding the mutex to
+ * be killed, thus freeing the mutex */
+static int cluster_mutex_destructor(struct ctdb_cluster_mutex_handle *h)
+{
+ if (h->fd[0] != -1) {
+ h->fd[0] = -1;
+ }
+ ctdb_kill(h->ctdb, h->child, SIGTERM);
+ return 0;
+}
+
+/* this is called when the client process has completed ctdb_recovery_lock()
+ and has written data back to us through the pipe.
+*/
+static void cluster_mutex_handler(struct tevent_context *ev,
+ struct tevent_fd *fde,
+ uint16_t flags, void *private_data)
+{
+ struct ctdb_cluster_mutex_handle *h=
+ talloc_get_type(private_data, struct ctdb_cluster_mutex_handle);
+ double latency = timeval_elapsed(&h->start_time);
+ char c = '0';
+ int ret;
+
+ /* Got response from child process so abort timeout */
+ TALLOC_FREE(h->te);
+
+ ret = sys_read(h->fd[0], &c, 1);
+
+ /* Don't call the handler more than once. It only exists to
+ * process the initial response from the helper. */
+ if (h->have_response) {
+ /* Only deal with EOF due to process exit. Silently
+ * ignore any other output. */
+ if (ret == 0) {
+ if (h->lost_handler != NULL) {
+ h->lost_handler(h->lost_data);
+ }
+ }
+ return;
+ }
+ h->have_response = true;
+
+ /* If the child wrote status then just pass it to the handler.
+ * If no status was written then this is an unexpected error
+ * so pass generic error code to handler. */
+ if (h->handler != NULL) {
+ h->handler(ret == 1 ? c : '3', latency, h->private_data);
+ }
+}
+
+static char cluster_mutex_helper[PATH_MAX+1] = "";
+
+static bool cluster_mutex_helper_args_file(TALLOC_CTX *mem_ctx,
+ const char *argstring,
+ char ***argv)
+{
+ struct stat st;
+ size_t size = sizeof(cluster_mutex_helper);
+ const char *t;
+ char **args = NULL;
+ int ret;
+
+ if (cluster_mutex_helper[0] != '\0') {
+ goto helper_done;
+ }
+
+ t = getenv("CTDB_CLUSTER_MUTEX_HELPER");
+ if (t != NULL) {
+ size_t len;
+
+ len = strlcpy(cluster_mutex_helper, t, size);
+ if (len >= size) {
+ DBG_ERR("error: CTDB_CLUSTER_MUTEX_HELPER too long\n");
+ exit(1);
+ }
+ } else {
+ ret = snprintf(cluster_mutex_helper,
+ size,
+ "%s/%s",
+ CTDB_HELPER_BINDIR,
+ "ctdb_mutex_fcntl_helper");
+ if (ret < 0 || (size_t)ret >= size) {
+ D_ERR("Unable to set cluster mutex helper - "
+ "path too long\n");
+ exit(1);
+ }
+ }
+
+ ret = stat(cluster_mutex_helper, &st);
+ if (ret != 0) {
+ D_ERR("Unable to set cluster mutex helper \"%s\" - %s\n",
+ cluster_mutex_helper,
+ strerror(errno));
+ exit(1);
+ }
+
+ if ((st.st_mode & S_IXUSR) == 0) {
+ D_ERR("Unable to set cluster_mutex helper \"%s\" - "
+ "not executable\n",
+ cluster_mutex_helper);
+ exit(1);
+ }
+
+ D_NOTICE("Set cluster mutex helper to \"%s\"\n", cluster_mutex_helper);
+
+helper_done:
+
+ /* Array includes default helper, file and NULL */
+ args = talloc_array(mem_ctx, char *, 3);
+ if (args == NULL) {
+ DBG_ERR("Memory allocation error\n");
+ return false;
+ }
+
+ args[0] = cluster_mutex_helper;
+
+ args[1] = talloc_strdup(args, argstring);
+ if (args[1] == NULL) {
+ DBG_ERR("Memory allocation error\n");
+ return false;
+ }
+
+ args[2] = NULL;
+
+ *argv = args;
+ return true;
+}
+
+static bool cluster_mutex_helper_args_cmd(TALLOC_CTX *mem_ctx,
+ const char *argstring,
+ char ***argv)
+{
+ int i, ret, n;
+ char **args = NULL;
+ char *strv = NULL;
+ char *t = NULL;
+
+ ret = strv_split(mem_ctx, &strv, argstring, " \t");
+ if (ret != 0) {
+ D_ERR("Unable to parse mutex helper command \"%s\" (%s)\n",
+ argstring,
+ strerror(ret));
+ return false;
+ }
+ n = strv_count(strv);
+ if (n == 0) {
+ D_ERR("Mutex helper command is empty \"%s\"\n", argstring);
+ return false;
+ }
+
+ /* Extra slot for NULL */
+ args = talloc_array(mem_ctx, char *, n + 1);
+ if (args == NULL) {
+ DBG_ERR("Memory allocation error\n");
+ return false;
+ }
+
+ talloc_steal(args, strv);
+
+ t = NULL;
+ for (i = 0 ; i < n; i++) {
+ t = strv_next(strv, t);
+ args[i] = t;
+ }
+
+ args[n] = NULL;
+
+ *argv = args;
+ return true;
+}
+
+static bool cluster_mutex_helper_args(TALLOC_CTX *mem_ctx,
+ const char *argstring,
+ char ***argv)
+{
+ bool ok;
+
+ if (argstring != NULL && argstring[0] == '!') {
+ ok = cluster_mutex_helper_args_cmd(mem_ctx, &argstring[1], argv);
+ } else {
+ ok = cluster_mutex_helper_args_file(mem_ctx, argstring, argv);
+ }
+
+ return ok;
+}
+
+struct ctdb_cluster_mutex_handle *
+ctdb_cluster_mutex(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ const char *argstring,
+ int timeout,
+ cluster_mutex_handler_t handler,
+ void *private_data,
+ cluster_mutex_lost_handler_t lost_handler,
+ void *lost_data)
+{
+ struct ctdb_cluster_mutex_handle *h;
+ char **args;
+ sigset_t sigset_term;
+ int ret;
+
+ h = talloc(mem_ctx, struct ctdb_cluster_mutex_handle);
+ if (h == NULL) {
+ DBG_ERR("out of memory\n");
+ return NULL;
+ }
+
+ h->start_time = timeval_current();
+ h->fd[0] = -1;
+ h->fd[1] = -1;
+ h->have_response = false;
+
+ ret = pipe(h->fd);
+ if (ret != 0) {
+ talloc_free(h);
+ DBG_ERR("Failed to open pipe\n");
+ return NULL;
+ }
+ set_close_on_exec(h->fd[0]);
+
+ /* Create arguments for lock helper */
+ if (!cluster_mutex_helper_args(h, argstring, &args)) {
+ close(h->fd[0]);
+ close(h->fd[1]);
+ talloc_free(h);
+ return NULL;
+ }
+
+ sigemptyset(&sigset_term);
+ sigaddset(&sigset_term, SIGTERM);
+ ret = sigprocmask(SIG_BLOCK, &sigset_term, NULL);
+ if (ret != 0) {
+ DBG_WARNING("Failed to block SIGTERM (%d)\n", errno);
+ }
+
+ h->child = ctdb_fork(ctdb);
+ if (h->child == (pid_t)-1) {
+ close(h->fd[0]);
+ close(h->fd[1]);
+ talloc_free(h);
+ ret = sigprocmask(SIG_UNBLOCK, &sigset_term, NULL);
+ if (ret != 0) {
+ DBG_WARNING("Failed to unblock SIGTERM (%d)\n", errno);
+ }
+ return NULL;
+ }
+
+ if (h->child == 0) {
+ struct sigaction sa = {
+ .sa_handler = SIG_DFL,
+ };
+
+ ret = sigaction(SIGTERM, &sa, NULL);
+ if (ret != 0) {
+ DBG_WARNING("Failed to reset signal handler (%d)\n",
+ errno);
+ }
+
+ ret = sigprocmask(SIG_UNBLOCK, &sigset_term, NULL);
+ if (ret != 0) {
+ DBG_WARNING("Failed to unblock SIGTERM (%d)\n", errno);
+ }
+
+ /* Make stdout point to the pipe */
+ close(STDOUT_FILENO);
+ dup2(h->fd[1], STDOUT_FILENO);
+ close(h->fd[1]);
+
+ execv(args[0], args);
+
+ /* Only happens on error */
+ DBG_ERR("execv() failed\n");
+ _exit(1);
+ }
+
+ /* Parent */
+
+ ret = sigprocmask(SIG_UNBLOCK, &sigset_term, NULL);
+ if (ret != 0) {
+ DBG_WARNING("Failed to unblock SIGTERM (%d)\n", errno);
+ }
+
+ DBG_DEBUG("Created PIPE FD:%d\n", h->fd[0]);
+ set_close_on_exec(h->fd[0]);
+
+ close(h->fd[1]);
+ h->fd[1] = -1;
+
+ talloc_set_destructor(h, cluster_mutex_destructor);
+
+ if (timeout != 0) {
+ h->te = tevent_add_timer(ctdb->ev, h,
+ timeval_current_ofs(timeout, 0),
+ cluster_mutex_timeout, h);
+ } else {
+ h->te = NULL;
+ }
+
+ h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
+ cluster_mutex_handler, (void *)h);
+
+ if (h->fde == NULL) {
+ talloc_free(h);
+ return NULL;
+ }
+ tevent_fd_set_auto_close(h->fde);
+
+ h->ctdb = ctdb;
+ h->handler = handler;
+ h->private_data = private_data;
+ h->lost_handler = lost_handler;
+ h->lost_data = lost_data;
+
+ return h;
+}
diff --git a/ctdb/server/ctdb_cluster_mutex.h b/ctdb/server/ctdb_cluster_mutex.h
new file mode 100644
index 0000000..4587290
--- /dev/null
+++ b/ctdb/server/ctdb_cluster_mutex.h
@@ -0,0 +1,51 @@
+/*
+ CTDB cluster mutex handling
+
+ Copyright (C) Andrew Tridgell 2007
+ Copyright (C) Ronnie Sahlberg 2007
+ Copyright (C) Martin Schwenke 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_CLUSTER_MUTEX_H__
+#define __CTDB_CLUSTER_MUTEX_H__
+
+#include <talloc.h>
+
+#include "replace.h"
+#include "system/network.h"
+
+#include "ctdb_private.h"
+
+struct ctdb_cluster_mutex_handle;
+
+typedef void (*cluster_mutex_handler_t) (
+ char status,
+ double latency,
+ void *private_data);
+
+typedef void (*cluster_mutex_lost_handler_t) (void *private_data);
+
+struct ctdb_cluster_mutex_handle *
+ctdb_cluster_mutex(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ const char *argstring,
+ int timeout,
+ cluster_mutex_handler_t handler,
+ void *private_data,
+ cluster_mutex_lost_handler_t lost_handler,
+ void *lost_data);
+
+#endif /* __CTDB_CLUSTER_MUTEX_H__ */
diff --git a/ctdb/server/ctdb_config.c b/ctdb/server/ctdb_config.c
new file mode 100644
index 0000000..3f61fda
--- /dev/null
+++ b/ctdb/server/ctdb_config.c
@@ -0,0 +1,183 @@
+/*
+ CTDB daemon config handling
+
+ Copyright (C) Martin Schwenke 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include "lib/util/debug.h"
+
+#include "common/conf.h"
+#include "common/logging_conf.h"
+#include "common/path.h"
+
+#include "cluster/cluster_conf.h"
+#include "database/database_conf.h"
+#include "event/event_conf.h"
+#include "failover/failover_conf.h"
+#include "legacy_conf.h"
+
+#include "ctdb_config.h"
+
+struct ctdb_config ctdb_config;
+
+static void setup_config_pointers(struct conf_context *conf)
+{
+ /*
+ * Cluster
+ */
+
+ conf_assign_string_pointer(conf,
+ CLUSTER_CONF_SECTION,
+ CLUSTER_CONF_TRANSPORT,
+ &ctdb_config.transport);
+ conf_assign_string_pointer(conf,
+ CLUSTER_CONF_SECTION,
+ CLUSTER_CONF_NODE_ADDRESS,
+ &ctdb_config.node_address);
+ conf_assign_string_pointer(conf,
+ CLUSTER_CONF_SECTION,
+ CLUSTER_CONF_CLUSTER_LOCK,
+ &ctdb_config.cluster_lock);
+ conf_assign_string_pointer(conf,
+ CLUSTER_CONF_SECTION,
+ CLUSTER_CONF_RECOVERY_LOCK,
+ &ctdb_config.recovery_lock);
+ conf_assign_integer_pointer(conf,
+ CLUSTER_CONF_SECTION,
+ CLUSTER_CONF_LEADER_TIMEOUT,
+ &ctdb_config.leader_timeout);
+ conf_assign_boolean_pointer(conf,
+ CLUSTER_CONF_SECTION,
+ CLUSTER_CONF_LEADER_CAPABILITY,
+ &ctdb_config.leader_capability);
+
+ /*
+ * Database
+ */
+
+ conf_assign_string_pointer(conf,
+ DATABASE_CONF_SECTION,
+ DATABASE_CONF_VOLATILE_DB_DIR,
+ &ctdb_config.dbdir_volatile);
+ conf_assign_string_pointer(conf,
+ DATABASE_CONF_SECTION,
+ DATABASE_CONF_PERSISTENT_DB_DIR,
+ &ctdb_config.dbdir_persistent);
+ conf_assign_string_pointer(conf,
+ DATABASE_CONF_SECTION,
+ DATABASE_CONF_STATE_DB_DIR,
+ &ctdb_config.dbdir_state);
+ conf_assign_string_pointer(conf,
+ DATABASE_CONF_SECTION,
+ DATABASE_CONF_LOCK_DEBUG_SCRIPT,
+ &ctdb_config.lock_debug_script);
+ conf_assign_boolean_pointer(conf,
+ DATABASE_CONF_SECTION,
+ DATABASE_CONF_TDB_MUTEXES,
+ &ctdb_config.tdb_mutexes);
+
+ /*
+ * Event
+ */
+ conf_assign_string_pointer(conf,
+ EVENT_CONF_SECTION,
+ EVENT_CONF_DEBUG_SCRIPT,
+ &ctdb_config.event_debug_script);
+
+ /*
+ * Failover
+ */
+ conf_assign_boolean_pointer(conf,
+ FAILOVER_CONF_SECTION,
+ FAILOVER_CONF_DISABLED,
+ &ctdb_config.failover_disabled);
+
+ /*
+ * Legacy
+ */
+
+ conf_assign_boolean_pointer(conf,
+ LEGACY_CONF_SECTION,
+ LEGACY_CONF_REALTIME_SCHEDULING,
+ &ctdb_config.realtime_scheduling);
+ conf_assign_boolean_pointer(conf,
+ LEGACY_CONF_SECTION,
+ LEGACY_CONF_LMASTER_CAPABILITY,
+ &ctdb_config.lmaster_capability);
+ conf_assign_boolean_pointer(conf,
+ LEGACY_CONF_SECTION,
+ LEGACY_CONF_START_AS_STOPPED,
+ &ctdb_config.start_as_stopped);
+ conf_assign_boolean_pointer(conf,
+ LEGACY_CONF_SECTION,
+ LEGACY_CONF_START_AS_DISABLED,
+ &ctdb_config.start_as_disabled);
+ conf_assign_string_pointer(conf,
+ LEGACY_CONF_SECTION,
+ LEGACY_CONF_SCRIPT_LOG_LEVEL,
+ &ctdb_config.script_log_level);
+}
+
+int ctdbd_config_load(TALLOC_CTX *mem_ctx,
+ struct conf_context **result)
+{
+ struct conf_context *conf = NULL;
+ int ret = 0;
+ char *conf_file = NULL;
+
+ ret = conf_init(mem_ctx, &conf);
+ if (ret != 0) {
+ return ret;
+ }
+
+ logging_conf_init(conf, NULL);
+ cluster_conf_init(conf);
+ database_conf_init(conf);
+ event_conf_init(conf);
+ failover_conf_init(conf);
+ legacy_conf_init(conf);
+
+ setup_config_pointers(conf);
+
+ if (! conf_valid(conf)) {
+ ret = EINVAL;
+ goto fail;
+ }
+
+ conf_file = path_config(conf);
+ if (conf_file == NULL) {
+ D_ERR("Memory allocation error\n");
+ ret = ENOMEM;
+ goto fail;
+ }
+ ret = conf_load(conf, conf_file, true);
+ /* Configuration file does not need to exist */
+ if (ret != 0 && ret != ENOENT) {
+ D_ERR("Failed to load configuration file %s\n", conf_file);
+ goto fail;
+ }
+
+ talloc_free(conf_file);
+ *result = conf;
+
+ return 0;
+
+fail:
+ talloc_free(conf);
+ return ret;
+}
diff --git a/ctdb/server/ctdb_config.h b/ctdb/server/ctdb_config.h
new file mode 100644
index 0000000..7ccda7d
--- /dev/null
+++ b/ctdb/server/ctdb_config.h
@@ -0,0 +1,59 @@
+/*
+ CTDB daemon config handling
+
+ Copyright (C) Martin Schwenke 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_CONFIG_H__
+#define __CTDB_CONFIG_H__
+
+#include "common/conf.h"
+
+struct ctdb_config {
+ /* Cluster */
+ const char *transport;
+ const char *node_address;
+ const char *cluster_lock;
+ const char *recovery_lock;
+ int leader_timeout;
+ bool leader_capability;
+
+ /* Database */
+ const char *dbdir_volatile;
+ const char *dbdir_persistent;
+ const char *dbdir_state;
+ const char *lock_debug_script;
+ bool tdb_mutexes;
+
+ /* Event */
+ const char *event_debug_script;
+
+ /* Failover */
+ bool failover_disabled;
+
+ /* Legacy */
+ bool realtime_scheduling;
+ bool lmaster_capability;
+ bool start_as_stopped;
+ bool start_as_disabled;
+ const char *script_log_level;
+};
+
+extern struct ctdb_config ctdb_config;
+
+int ctdbd_config_load(TALLOC_CTX *mem_ctx, struct conf_context **conf);
+
+#endif /* __CTDB_CONFIG_H__ */
diff --git a/ctdb/server/ctdb_control.c b/ctdb/server/ctdb_control.c
new file mode 100644
index 0000000..422c4cf
--- /dev/null
+++ b/ctdb/server/ctdb_control.c
@@ -0,0 +1,1097 @@
+/*
+ ctdb_control protocol code
+
+ Copyright (C) Andrew Tridgell 2007
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+#include "system/wait.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/tdb_wrap/tdb_wrap.h"
+#include "lib/util/dlinklist.h"
+#include "lib/util/debug.h"
+#include "lib/util/samba_util.h"
+#include "lib/util/talloc_report.h"
+
+#include "ctdb_private.h"
+#include "ctdb_client.h"
+
+#include "protocol/protocol_private.h"
+
+#include "common/reqid.h"
+#include "common/common.h"
+#include "common/logging.h"
+
+
+struct ctdb_control_state {
+ struct ctdb_context *ctdb;
+ uint32_t reqid;
+ ctdb_control_callback_fn_t callback;
+ void *private_data;
+ unsigned flags;
+};
+
+
+/*
+ dump talloc memory hierarchy, returning it as a blob to the client
+ */
+int32_t ctdb_dump_memory(struct ctdb_context *ctdb, TDB_DATA *outdata)
+{
+ char *report;
+ size_t reportlen;
+
+ report = talloc_report_str(outdata, NULL);
+ if (report == NULL) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " talloc_report_str failed\n"));
+ return -1;
+ }
+ reportlen = talloc_get_size(report);
+
+ if (reportlen > 0) {
+ reportlen -= 1; /* strip trailing zero */
+ }
+
+ outdata->dptr = (uint8_t *)report;
+ outdata->dsize = reportlen;
+ return 0;
+}
+
+static int32_t control_not_implemented(const char *unsupported,
+ const char *alternate)
+{
+ if (alternate == NULL) {
+ DEBUG(DEBUG_ERR,
+ ("Control %s is not implemented any more\n",
+ unsupported));
+ } else {
+ DEBUG(DEBUG_ERR,
+ ("Control %s is not implemented any more, use %s instead\n",
+ unsupported, alternate));
+ }
+ return -1;
+}
+
+struct ctdb_echo_data_state {
+ struct ctdb_context *ctdb;
+ struct ctdb_req_control_old *c;
+ struct ctdb_echo_data *data;
+};
+
+static void ctdb_echo_data_timeout(
+ struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval now,
+ void *private_data);
+
+static int32_t ctdb_control_echo_data(
+ struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA indata,
+ bool *async_reply)
+{
+ struct ctdb_echo_data_state *state = NULL;
+ struct tevent_timer *te = NULL;
+ uint32_t delay = 0;
+ size_t np = 0;
+ int ret;
+
+ state = talloc_zero(ctdb, struct ctdb_echo_data_state);
+ CTDB_NO_MEMORY(ctdb, state);
+ state->ctdb = ctdb;
+
+ ret = ctdb_echo_data_pull(
+ indata.dptr, indata.dsize, state, &state->data, &np);
+ if (ret != 0) {
+ DBG_DEBUG("ctdb_echo_data_pull failed: %s\n",
+ strerror(ret));
+ TALLOC_FREE(state);
+ return -1;
+ }
+
+ te = tevent_add_timer(
+ ctdb->ev,
+ state,
+ timeval_current_ofs_msec(delay),
+ ctdb_echo_data_timeout,
+ state);
+ if (te == NULL) {
+ DBG_DEBUG("tevent_add_timer failed\n");
+ TALLOC_FREE(state);
+ return -1;
+ }
+
+ state->c = talloc_move(state, &c);
+ *async_reply = true;
+
+ return 0;
+}
+
+static void ctdb_echo_data_timeout(
+ struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval now,
+ void *private_data)
+{
+ struct ctdb_echo_data_state *state = talloc_get_type_abort(
+ private_data, struct ctdb_echo_data_state);
+ size_t len = ctdb_echo_data_len(state->data);
+ uint8_t *buf = NULL;
+ size_t np;
+ TDB_DATA data;
+
+ DBG_DEBUG("reqid=%"PRIu32" len=%zu\n", state->c->hdr.reqid, len);
+
+ buf = talloc_array(state, uint8_t, len);
+ if (buf == NULL) {
+ DBG_WARNING("talloc_array(%zu) failed\n", len);
+ goto done;
+ }
+ ctdb_echo_data_push(state->data, buf, &np);
+ data = (TDB_DATA) { .dptr = buf, .dsize = np };
+
+ ctdb_request_control_reply(state->ctdb, state->c, &data, 0, NULL);
+
+done:
+ TALLOC_FREE(state);
+}
+
+static int ctdb_control_disable_node(struct ctdb_context *ctdb)
+{
+ struct ctdb_node *node;
+
+ node = ctdb_find_node(ctdb, CTDB_CURRENT_NODE);
+ if (node == NULL) {
+ /* Can't happen */
+ DBG_ERR("Unable to find current node\n");
+ return -1;
+ }
+
+ D_ERR("Disable node\n");
+ node->flags |= NODE_FLAGS_PERMANENTLY_DISABLED;
+
+ return 0;
+}
+
+static int ctdb_control_enable_node(struct ctdb_context *ctdb)
+{
+ struct ctdb_node *node;
+
+ node = ctdb_find_node(ctdb, CTDB_CURRENT_NODE);
+ if (node == NULL) {
+ /* Can't happen */
+ DBG_ERR("Unable to find current node\n");
+ return -1;
+ }
+
+ D_ERR("Enable node\n");
+ node->flags &= ~NODE_FLAGS_PERMANENTLY_DISABLED;
+
+ return 0;
+}
+
+/*
+ process a control request
+ */
+static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA indata,
+ TDB_DATA *outdata, uint32_t srcnode,
+ const char **errormsg,
+ bool *async_reply)
+{
+ uint32_t opcode = c->opcode;
+ uint64_t srvid = c->srvid;
+ uint32_t client_id = c->client_id;
+ static int level = DEBUG_ERR;
+
+ switch (opcode) {
+ case CTDB_CONTROL_PROCESS_EXISTS: {
+ CHECK_CONTROL_DATA_SIZE(sizeof(pid_t));
+ return ctdb_control_process_exists(ctdb, *(pid_t *)indata.dptr);
+ }
+
+ case CTDB_CONTROL_SET_DEBUG: {
+ union {
+ uint8_t *ptr;
+ int32_t *level;
+ } debug;
+ CHECK_CONTROL_DATA_SIZE(sizeof(int32_t));
+ debug.ptr = indata.dptr;
+ debuglevel_set(*debug.level);
+ return 0;
+ }
+
+ case CTDB_CONTROL_GET_DEBUG: {
+ CHECK_CONTROL_DATA_SIZE(0);
+ level = debuglevel_get();
+ outdata->dptr = (uint8_t *)&(level);
+ outdata->dsize = sizeof(DEBUGLEVEL);
+ return 0;
+ }
+
+ case CTDB_CONTROL_STATISTICS: {
+ CHECK_CONTROL_DATA_SIZE(0);
+ ctdb->statistics.memory_used = talloc_total_size(NULL);
+ ctdb->statistics.num_clients = ctdb->num_clients;
+ ctdb->statistics.frozen = (ctdb_db_all_frozen(ctdb) ? 1 : 0);
+ ctdb->statistics.recovering = (ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE);
+ ctdb->statistics.statistics_current_time = timeval_current();
+
+ outdata->dptr = (uint8_t *)&ctdb->statistics;
+ outdata->dsize = sizeof(ctdb->statistics);
+ return 0;
+ }
+
+ case CTDB_CONTROL_GET_ALL_TUNABLES: {
+ CHECK_CONTROL_DATA_SIZE(0);
+ outdata->dptr = (uint8_t *)&ctdb->tunable;
+ outdata->dsize = sizeof(ctdb->tunable);
+ return 0;
+ }
+
+ case CTDB_CONTROL_DUMP_MEMORY: {
+ CHECK_CONTROL_DATA_SIZE(0);
+ return ctdb_dump_memory(ctdb, outdata);
+ }
+
+ case CTDB_CONTROL_STATISTICS_RESET: {
+ struct ctdb_db_context *ctdb_db;
+
+ CHECK_CONTROL_DATA_SIZE(0);
+ ZERO_STRUCT(ctdb->statistics);
+ for (ctdb_db = ctdb->db_list;
+ ctdb_db != NULL;
+ ctdb_db = ctdb_db->next) {
+ ctdb_db_statistics_reset(ctdb_db);
+ }
+ ctdb->statistics.statistics_start_time = timeval_current();
+ return 0;
+ }
+
+ case CTDB_CONTROL_GETVNNMAP:
+ return ctdb_control_getvnnmap(ctdb, opcode, indata, outdata);
+
+ case CTDB_CONTROL_GET_DBMAP:
+ return ctdb_control_getdbmap(ctdb, opcode, indata, outdata);
+
+ case CTDB_CONTROL_GET_NODEMAPv4:
+ return control_not_implemented("GET_NODEMAPv4", "GET_NODEMAP");
+
+ case CTDB_CONTROL_GET_NODEMAP:
+ return ctdb_control_getnodemap(ctdb, opcode, indata, outdata);
+
+ case CTDB_CONTROL_GET_NODES_FILE:
+ return ctdb_control_getnodesfile(ctdb, opcode, indata, outdata);
+
+ case CTDB_CONTROL_RELOAD_NODES_FILE:
+ CHECK_CONTROL_DATA_SIZE(0);
+ return ctdb_control_reload_nodes_file(ctdb, opcode);
+
+ case CTDB_CONTROL_SET_DB_STICKY: {
+ uint32_t db_id;
+ struct ctdb_db_context *ctdb_db;
+
+ CHECK_CONTROL_DATA_SIZE(sizeof(db_id));
+ db_id = *(uint32_t *)indata.dptr;
+ ctdb_db = find_ctdb_db(ctdb, db_id);
+ if (ctdb_db == NULL) return -1;
+ return ctdb_set_db_sticky(ctdb, ctdb_db);
+ }
+
+ case CTDB_CONTROL_SETVNNMAP:
+ return ctdb_control_setvnnmap(ctdb, opcode, indata, outdata);
+
+ case CTDB_CONTROL_PULL_DB:
+ return control_not_implemented("PULL_DB", NULL);
+
+ case CTDB_CONTROL_SET_DMASTER:
+ return control_not_implemented("SET_DMASTER", NULL);
+
+ case CTDB_CONTROL_PUSH_DB:
+ return control_not_implemented("PUSH_DB", NULL);
+
+ case CTDB_CONTROL_GET_RECMODE: {
+ return ctdb->recovery_mode;
+ }
+
+ case CTDB_CONTROL_SET_RECMASTER:
+ return control_not_implemented("SET_RECMASTER", NULL);
+
+ case CTDB_CONTROL_GET_RECMASTER:
+ return control_not_implemented("GET_RECMASTER", NULL);
+
+ case CTDB_CONTROL_GET_PID:
+ return getpid();
+
+ case CTDB_CONTROL_GET_PNN:
+ return ctdb->pnn;
+
+ case CTDB_CONTROL_PING:
+ CHECK_CONTROL_DATA_SIZE(0);
+ return ctdb->num_clients;
+
+ case CTDB_CONTROL_GET_RUNSTATE:
+ CHECK_CONTROL_DATA_SIZE(0);
+ outdata->dptr = (uint8_t *)&ctdb->runstate;
+ outdata->dsize = sizeof(uint32_t);
+ return 0;
+
+
+ case CTDB_CONTROL_SET_DB_READONLY: {
+ uint32_t db_id;
+ struct ctdb_db_context *ctdb_db;
+
+ CHECK_CONTROL_DATA_SIZE(sizeof(db_id));
+ db_id = *(uint32_t *)indata.dptr;
+ ctdb_db = find_ctdb_db(ctdb, db_id);
+ if (ctdb_db == NULL) return -1;
+ return ctdb_set_db_readonly(ctdb, ctdb_db);
+ }
+ case CTDB_CONTROL_GET_DBNAME: {
+ uint32_t db_id;
+ struct ctdb_db_context *ctdb_db;
+
+ CHECK_CONTROL_DATA_SIZE(sizeof(db_id));
+ db_id = *(uint32_t *)indata.dptr;
+ ctdb_db = find_ctdb_db(ctdb, db_id);
+ if (ctdb_db == NULL) return -1;
+ outdata->dptr = discard_const(ctdb_db->db_name);
+ outdata->dsize = strlen(ctdb_db->db_name)+1;
+ return 0;
+ }
+
+ case CTDB_CONTROL_GETDBPATH: {
+ uint32_t db_id;
+ struct ctdb_db_context *ctdb_db;
+
+ CHECK_CONTROL_DATA_SIZE(sizeof(db_id));
+ db_id = *(uint32_t *)indata.dptr;
+ ctdb_db = find_ctdb_db(ctdb, db_id);
+ if (ctdb_db == NULL) return -1;
+ outdata->dptr = discard_const(ctdb_db->db_path);
+ outdata->dsize = strlen(ctdb_db->db_path)+1;
+ return 0;
+ }
+
+ case CTDB_CONTROL_DB_ATTACH:
+ return ctdb_control_db_attach(ctdb,
+ indata,
+ outdata,
+ 0,
+ srcnode,
+ client_id,
+ c,
+ async_reply);
+
+ case CTDB_CONTROL_DB_ATTACH_PERSISTENT:
+ return ctdb_control_db_attach(ctdb,
+ indata,
+ outdata,
+ CTDB_DB_FLAGS_PERSISTENT,
+ srcnode,
+ client_id,
+ c,
+ async_reply);
+
+ case CTDB_CONTROL_DB_ATTACH_REPLICATED:
+ return ctdb_control_db_attach(ctdb,
+ indata,
+ outdata,
+ CTDB_DB_FLAGS_REPLICATED,
+ srcnode,
+ client_id,
+ c,
+ async_reply);
+
+ case CTDB_CONTROL_SET_CALL:
+ return control_not_implemented("SET_CALL", NULL);
+
+ case CTDB_CONTROL_TRAVERSE_START:
+ CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_traverse_start));
+ return ctdb_control_traverse_start(ctdb, indata, outdata, srcnode, client_id);
+
+ case CTDB_CONTROL_TRAVERSE_START_EXT:
+ CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_traverse_start_ext));
+ return ctdb_control_traverse_start_ext(ctdb, indata, outdata, srcnode, client_id);
+
+ case CTDB_CONTROL_TRAVERSE_ALL:
+ return ctdb_control_traverse_all(ctdb, indata, outdata);
+
+ case CTDB_CONTROL_TRAVERSE_ALL_EXT:
+ return ctdb_control_traverse_all_ext(ctdb, indata, outdata);
+
+ case CTDB_CONTROL_TRAVERSE_DATA:
+ return ctdb_control_traverse_data(ctdb, indata, outdata);
+
+ case CTDB_CONTROL_TRAVERSE_KILL:
+ CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_traverse_start));
+ return ctdb_control_traverse_kill(ctdb, indata, outdata, srcnode);
+
+ case CTDB_CONTROL_REGISTER_SRVID:
+ return daemon_register_message_handler(ctdb, client_id, srvid);
+
+ case CTDB_CONTROL_DEREGISTER_SRVID:
+ return daemon_deregister_message_handler(ctdb, client_id, srvid);
+
+ case CTDB_CONTROL_CHECK_SRVIDS:
+ return control_not_implemented("CHECK_SRVIDS", NULL);
+
+ case CTDB_CONTROL_ENABLE_SEQNUM:
+ CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
+ return ctdb_ltdb_enable_seqnum(ctdb, *(uint32_t *)indata.dptr);
+
+ case CTDB_CONTROL_UPDATE_SEQNUM:
+ CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
+ return ctdb_ltdb_update_seqnum(ctdb, *(uint32_t *)indata.dptr, srcnode);
+
+ case CTDB_CONTROL_FREEZE:
+ CHECK_CONTROL_DATA_SIZE(0);
+ return ctdb_control_freeze(ctdb, c, async_reply);
+
+ case CTDB_CONTROL_THAW:
+ return control_not_implemented("THAW", NULL);
+
+ case CTDB_CONTROL_SET_RECMODE:
+ CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
+ return ctdb_control_set_recmode(ctdb, c, indata, async_reply, errormsg);
+
+ case CTDB_CONTROL_GET_MONMODE:
+ return control_not_implemented("GET_MONMODE", NULL);
+
+ case CTDB_CONTROL_ENABLE_MONITOR:
+ return control_not_implemented("ENABLE_MONITOR", NULL);
+
+ case CTDB_CONTROL_RUN_EVENTSCRIPTS:
+ return control_not_implemented("RUN_EVENTSCRIPTS", NULL);
+
+ case CTDB_CONTROL_DISABLE_MONITOR:
+ return control_not_implemented("DISABLE_MONITOR", NULL);
+
+ case CTDB_CONTROL_SHUTDOWN:
+ DEBUG(DEBUG_NOTICE,("Received SHUTDOWN command.\n"));
+ ctdb_shutdown_sequence(ctdb, 0);
+ /* In case above returns due to duplicate shutdown */
+ return 0;
+
+ case CTDB_CONTROL_TAKEOVER_IPv4:
+ return control_not_implemented("TAKEOVER_IPv4", "TAKEOVER_IP");
+
+ case CTDB_CONTROL_TAKEOVER_IP:
+ CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_public_ip));
+ return ctdb_control_takeover_ip(ctdb, c, indata, async_reply);
+
+ case CTDB_CONTROL_RELEASE_IPv4:
+ return control_not_implemented("RELEASE_IPv4", "RELEASE_IP");
+
+ case CTDB_CONTROL_RELEASE_IP:
+ CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_public_ip));
+ return ctdb_control_release_ip(ctdb, c, indata, async_reply);
+
+ case CTDB_CONTROL_IPREALLOCATED:
+ CHECK_CONTROL_DATA_SIZE(0);
+ return ctdb_control_ipreallocated(ctdb, c, async_reply);
+
+ case CTDB_CONTROL_GET_PUBLIC_IPSv4:
+ return control_not_implemented("GET_PUBLIC_IPSv4",
+ "GET_PUBLIC_IPS");
+
+ case CTDB_CONTROL_GET_PUBLIC_IPS:
+ CHECK_CONTROL_DATA_SIZE(0);
+ return ctdb_control_get_public_ips(ctdb, c, outdata);
+
+ case CTDB_CONTROL_TCP_CLIENT:
+ CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_connection));
+ return ctdb_control_tcp_client(ctdb, client_id, indata);
+
+ case CTDB_CONTROL_STARTUP:
+ CHECK_CONTROL_DATA_SIZE(0);
+ return ctdb_control_startup(ctdb, srcnode);
+
+ case CTDB_CONTROL_TCP_ADD:
+ CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_connection));
+ return ctdb_control_tcp_add(ctdb, indata, false);
+
+ case CTDB_CONTROL_TCP_ADD_DELAYED_UPDATE:
+ CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_connection));
+ return ctdb_control_tcp_add(ctdb, indata, true);
+
+ case CTDB_CONTROL_TCP_REMOVE:
+ CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_connection));
+ return ctdb_control_tcp_remove(ctdb, indata);
+
+ case CTDB_CONTROL_SET_TUNABLE:
+ return ctdb_control_set_tunable(ctdb, indata);
+
+ case CTDB_CONTROL_GET_TUNABLE:
+ return ctdb_control_get_tunable(ctdb, indata, outdata);
+
+ case CTDB_CONTROL_LIST_TUNABLES:
+ return ctdb_control_list_tunables(ctdb, outdata);
+
+ case CTDB_CONTROL_MODIFY_FLAGS:
+ CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_node_flag_change));
+ return ctdb_control_modflags(ctdb, indata);
+
+ case CTDB_CONTROL_KILL_TCP:
+ return control_not_implemented("KILL_TCP", NULL);
+
+ case CTDB_CONTROL_GET_TCP_TICKLE_LIST:
+ CHECK_CONTROL_DATA_SIZE(sizeof(ctdb_sock_addr));
+ return ctdb_control_get_tcp_tickle_list(ctdb, indata, outdata);
+
+ case CTDB_CONTROL_SET_TCP_TICKLE_LIST:
+ /* data size is verified in the called function */
+ return ctdb_control_set_tcp_tickle_list(ctdb, indata);
+
+ case CTDB_CONTROL_REGISTER_SERVER_ID:
+ return control_not_implemented("REGISTER_SERVER_ID", NULL);
+
+ case CTDB_CONTROL_UNREGISTER_SERVER_ID:
+ return control_not_implemented("UNREGISTER_SERVER_ID", NULL);
+
+ case CTDB_CONTROL_CHECK_SERVER_ID:
+ return control_not_implemented("CHECK_SERVER_ID", NULL);
+
+ case CTDB_CONTROL_GET_SERVER_ID_LIST:
+ return control_not_implemented("SERVER_ID_LIST", NULL);
+
+ case CTDB_CONTROL_PERSISTENT_STORE:
+ return control_not_implemented("PERSISTENT_STORE", NULL);
+
+ case CTDB_CONTROL_UPDATE_RECORD:
+ return ctdb_control_update_record(ctdb, c, indata, async_reply);
+
+ case CTDB_CONTROL_SEND_GRATUITOUS_ARP:
+ return ctdb_control_send_gratious_arp(ctdb, indata);
+
+ case CTDB_CONTROL_TRANSACTION_START:
+ return control_not_implemented("TRANSACTION_START", NULL);
+
+ case CTDB_CONTROL_TRANSACTION_COMMIT:
+ return control_not_implemented("TRANSACTION_COMMIT", NULL);
+
+ case CTDB_CONTROL_WIPE_DATABASE:
+ CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_transdb));
+ return ctdb_control_wipe_database(ctdb, indata);
+
+ case CTDB_CONTROL_UPTIME:
+ return ctdb_control_uptime(ctdb, outdata);
+
+ case CTDB_CONTROL_START_RECOVERY:
+ return ctdb_control_start_recovery(ctdb, c, async_reply);
+
+ case CTDB_CONTROL_END_RECOVERY:
+ return ctdb_control_end_recovery(ctdb, c, async_reply);
+
+ case CTDB_CONTROL_TRY_DELETE_RECORDS:
+ return ctdb_control_try_delete_records(ctdb, indata, outdata);
+
+ case CTDB_CONTROL_ADD_PUBLIC_IP:
+ return ctdb_control_add_public_address(ctdb, indata);
+
+ case CTDB_CONTROL_DEL_PUBLIC_IP:
+ return ctdb_control_del_public_address(ctdb, indata);
+
+ case CTDB_CONTROL_GET_CAPABILITIES:
+ return ctdb_control_get_capabilities(ctdb, outdata);
+
+ case CTDB_CONTROL_START_PERSISTENT_UPDATE:
+ return ctdb_control_start_persistent_update(ctdb, c, indata);
+
+ case CTDB_CONTROL_CANCEL_PERSISTENT_UPDATE:
+ return ctdb_control_cancel_persistent_update(ctdb, c, indata);
+
+ case CTDB_CONTROL_TRANS2_COMMIT:
+ case CTDB_CONTROL_TRANS2_COMMIT_RETRY:
+ return control_not_implemented("TRANS2_COMMIT", "TRANS3_COMMIT");
+
+ case CTDB_CONTROL_TRANS2_ERROR:
+ return control_not_implemented("TRANS2_ERROR", NULL);
+
+ case CTDB_CONTROL_TRANS2_FINISHED:
+ return control_not_implemented("TRANS2_FINISHED", NULL);
+
+ case CTDB_CONTROL_TRANS2_ACTIVE:
+ return control_not_implemented("TRANS2_ACTIVE", NULL);
+
+ case CTDB_CONTROL_TRANS3_COMMIT:
+ return ctdb_control_trans3_commit(ctdb, c, indata, async_reply);
+
+ case CTDB_CONTROL_RECD_PING:
+ CHECK_CONTROL_DATA_SIZE(0);
+ return ctdb_control_recd_ping(ctdb);
+
+ case CTDB_CONTROL_GET_EVENT_SCRIPT_STATUS:
+ return control_not_implemented("GET_EVENT_SCRIPT_STATUS", NULL);
+
+ case CTDB_CONTROL_RECD_RECLOCK_LATENCY:
+ CHECK_CONTROL_DATA_SIZE(sizeof(double));
+ CTDB_UPDATE_RECLOCK_LATENCY(ctdb, "recd reclock", reclock.recd, *((double *)indata.dptr));
+ return 0;
+ case CTDB_CONTROL_GET_RECLOCK_FILE:
+ CHECK_CONTROL_DATA_SIZE(0);
+ if (ctdb->recovery_lock != NULL) {
+ outdata->dptr = discard_const(ctdb->recovery_lock);
+ outdata->dsize = strlen(ctdb->recovery_lock) + 1;
+ }
+ return 0;
+ case CTDB_CONTROL_SET_RECLOCK_FILE:
+ return control_not_implemented("SET_RECLOCK", NULL);
+
+ case CTDB_CONTROL_STOP_NODE:
+ CHECK_CONTROL_DATA_SIZE(0);
+ return ctdb_control_stop_node(ctdb);
+
+ case CTDB_CONTROL_CONTINUE_NODE:
+ CHECK_CONTROL_DATA_SIZE(0);
+ return ctdb_control_continue_node(ctdb);
+
+ case CTDB_CONTROL_SET_NATGWSTATE:
+ return control_not_implemented("SET_NATGWSTATE", NULL);
+
+ case CTDB_CONTROL_SET_LMASTERROLE: {
+ uint32_t lmasterrole;
+
+ CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
+ lmasterrole = *(uint32_t *)indata.dptr;
+ if (lmasterrole == 0) {
+ ctdb->capabilities &= ~CTDB_CAP_LMASTER;
+ } else {
+ ctdb->capabilities |= CTDB_CAP_LMASTER;
+ }
+ return 0;
+ }
+
+ case CTDB_CONTROL_SET_RECMASTERROLE: {
+ uint32_t recmasterrole;
+
+ CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
+ recmasterrole = *(uint32_t *)indata.dptr;
+ if (recmasterrole == 0) {
+ ctdb->capabilities &= ~CTDB_CAP_RECMASTER;
+ } else {
+ ctdb->capabilities |= CTDB_CAP_RECMASTER;
+ }
+ return 0;
+ }
+
+ case CTDB_CONTROL_ENABLE_SCRIPT:
+ return control_not_implemented("ENABLE_SCRIPT", NULL);
+
+ case CTDB_CONTROL_DISABLE_SCRIPT:
+ return control_not_implemented("DISABLE_SCRIPT", NULL);
+
+ case CTDB_CONTROL_SET_BAN_STATE:
+ CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_ban_state));
+ return ctdb_control_set_ban_state(ctdb, indata);
+
+ case CTDB_CONTROL_GET_BAN_STATE:
+ CHECK_CONTROL_DATA_SIZE(0);
+ return ctdb_control_get_ban_state(ctdb, outdata);
+
+ case CTDB_CONTROL_SET_DB_PRIORITY:
+ return control_not_implemented("SET_DB_PRIORITY", NULL);
+
+ case CTDB_CONTROL_GET_DB_PRIORITY:
+ return control_not_implemented("GET_DB_PRIORITY", NULL);
+
+ case CTDB_CONTROL_TRANSACTION_CANCEL:
+ return control_not_implemented("TRANSACTION_CANCEL", NULL);
+
+ case CTDB_CONTROL_REGISTER_NOTIFY:
+ return ctdb_control_register_notify(ctdb, client_id, indata);
+
+ case CTDB_CONTROL_DEREGISTER_NOTIFY:
+ CHECK_CONTROL_DATA_SIZE(sizeof(uint64_t));
+ return ctdb_control_deregister_notify(ctdb, client_id, indata);
+
+ case CTDB_CONTROL_GET_LOG:
+ return control_not_implemented("GET_LOG", NULL);
+
+ case CTDB_CONTROL_CLEAR_LOG:
+ return control_not_implemented("CLEAR_LOG", NULL);
+
+ case CTDB_CONTROL_GET_DB_SEQNUM:
+ CHECK_CONTROL_DATA_SIZE(sizeof(uint64_t));
+ return ctdb_control_get_db_seqnum(ctdb, indata, outdata);
+
+ case CTDB_CONTROL_DB_SET_HEALTHY:
+ CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
+ return ctdb_control_db_set_healthy(ctdb, indata);
+
+ case CTDB_CONTROL_DB_GET_HEALTH:
+ CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
+ return ctdb_control_db_get_health(ctdb, indata, outdata);
+
+ case CTDB_CONTROL_GET_PUBLIC_IP_INFO:
+ CHECK_CONTROL_DATA_SIZE(sizeof(ctdb_sock_addr));
+ return ctdb_control_get_public_ip_info(ctdb, c, indata, outdata);
+
+ case CTDB_CONTROL_GET_IFACES:
+ CHECK_CONTROL_DATA_SIZE(0);
+ return ctdb_control_get_ifaces(ctdb, c, outdata);
+
+ case CTDB_CONTROL_SET_IFACE_LINK_STATE:
+ CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_iface));
+ return ctdb_control_set_iface_link(ctdb, c, indata);
+
+ case CTDB_CONTROL_GET_STAT_HISTORY:
+ CHECK_CONTROL_DATA_SIZE(0);
+ return ctdb_control_get_stat_history(ctdb, c, outdata);
+
+ case CTDB_CONTROL_SCHEDULE_FOR_DELETION: {
+ struct ctdb_control_schedule_for_deletion *d;
+ size_t size = offsetof(struct ctdb_control_schedule_for_deletion, key);
+ CHECK_CONTROL_MIN_DATA_SIZE(size);
+ d = (struct ctdb_control_schedule_for_deletion *)indata.dptr;
+ size += d->keylen;
+ CHECK_CONTROL_DATA_SIZE(size);
+ return ctdb_control_schedule_for_deletion(ctdb, indata);
+ }
+ case CTDB_CONTROL_GET_DB_STATISTICS:
+ CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
+ return ctdb_control_get_db_statistics(ctdb, *(uint32_t *)indata.dptr, outdata);
+
+ case CTDB_CONTROL_RELOAD_PUBLIC_IPS:
+ CHECK_CONTROL_DATA_SIZE(0);
+ return ctdb_control_reload_public_ips(ctdb, c, async_reply);
+
+ case CTDB_CONTROL_RECEIVE_RECORDS:
+ return control_not_implemented("RECEIVE_RECORDS", NULL);
+
+ case CTDB_CONTROL_DB_DETACH:
+ return ctdb_control_db_detach(ctdb, indata, client_id);
+
+ case CTDB_CONTROL_DB_FREEZE:
+ CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
+ return ctdb_control_db_freeze(ctdb, c, *(uint32_t *)indata.dptr,
+ async_reply);
+
+ case CTDB_CONTROL_DB_THAW:
+ CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
+ return ctdb_control_db_thaw(ctdb, *(uint32_t *)indata.dptr);
+
+ case CTDB_CONTROL_DB_TRANSACTION_START:
+ CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_transdb));
+ return ctdb_control_db_transaction_start(ctdb, indata);
+
+ case CTDB_CONTROL_DB_TRANSACTION_COMMIT:
+ CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_transdb));
+ return ctdb_control_db_transaction_commit(ctdb, indata);
+
+ case CTDB_CONTROL_DB_TRANSACTION_CANCEL:
+ CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
+ return ctdb_control_db_transaction_cancel(ctdb, indata);
+
+ case CTDB_CONTROL_DB_PULL:
+ CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_pulldb_ext));
+ return ctdb_control_db_pull(ctdb, c, indata, outdata);
+
+ case CTDB_CONTROL_DB_PUSH_START:
+ CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_pulldb_ext));
+ return ctdb_control_db_push_start(ctdb, indata);
+
+ case CTDB_CONTROL_DB_PUSH_CONFIRM:
+ CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
+ return ctdb_control_db_push_confirm(ctdb, indata, outdata);
+
+ case CTDB_CONTROL_DB_OPEN_FLAGS: {
+ uint32_t db_id;
+ struct ctdb_db_context *ctdb_db;
+ int tdb_flags;
+
+ CHECK_CONTROL_DATA_SIZE(sizeof(db_id));
+ db_id = *(uint32_t *)indata.dptr;
+ ctdb_db = find_ctdb_db(ctdb, db_id);
+ if (ctdb_db == NULL) {
+ return -1;
+ }
+
+ tdb_flags = tdb_get_flags(ctdb_db->ltdb->tdb);
+
+ outdata->dptr = talloc_size(outdata, sizeof(tdb_flags));
+ if (outdata->dptr == NULL) {
+ return -1;
+ }
+
+ outdata->dsize = sizeof(tdb_flags);
+ memcpy(outdata->dptr, &tdb_flags, outdata->dsize);
+ return 0;
+ }
+
+ case CTDB_CONTROL_CHECK_PID_SRVID:
+ CHECK_CONTROL_DATA_SIZE((sizeof(pid_t) + sizeof(uint64_t)));
+ return ctdb_control_check_pid_srvid(ctdb, indata);
+
+ case CTDB_CONTROL_TUNNEL_REGISTER:
+ return ctdb_control_tunnel_register(ctdb, client_id, srvid);
+
+ case CTDB_CONTROL_TUNNEL_DEREGISTER:
+ return ctdb_control_tunnel_deregister(ctdb, client_id, srvid);
+
+ case CTDB_CONTROL_VACUUM_FETCH:
+ return ctdb_control_vacuum_fetch(ctdb, indata);
+
+ case CTDB_CONTROL_DB_VACUUM: {
+ struct ctdb_db_vacuum db_vacuum;
+
+ CHECK_CONTROL_DATA_SIZE(ctdb_db_vacuum_len(&db_vacuum));
+ return ctdb_control_db_vacuum(ctdb, c, indata, async_reply);
+ }
+ case CTDB_CONTROL_ECHO_DATA: {
+ return ctdb_control_echo_data(ctdb, c, indata, async_reply);
+ }
+
+ case CTDB_CONTROL_DISABLE_NODE:
+ CHECK_CONTROL_DATA_SIZE(0);
+ return ctdb_control_disable_node(ctdb);
+
+ case CTDB_CONTROL_ENABLE_NODE:
+ CHECK_CONTROL_DATA_SIZE(0);
+ return ctdb_control_enable_node(ctdb);
+
+ case CTDB_CONTROL_TCP_CLIENT_DISCONNECTED:
+ CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_connection));
+ return ctdb_control_tcp_client_disconnected(ctdb, client_id, indata);
+
+ case CTDB_CONTROL_TCP_CLIENT_PASSED:
+ CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_connection));
+ return ctdb_control_tcp_client_passed(ctdb, client_id, indata);
+
+ default:
+ DEBUG(DEBUG_CRIT,(__location__ " Unknown CTDB control opcode %u\n", opcode));
+ return -1;
+ }
+}
+
+/*
+ send a reply for a ctdb control
+ */
+void ctdb_request_control_reply(struct ctdb_context *ctdb, struct ctdb_req_control_old *c,
+ TDB_DATA *outdata, int32_t status, const char *errormsg)
+{
+ struct ctdb_reply_control_old *r;
+ size_t len;
+
+ /* some controls send no reply */
+ if (c->flags & CTDB_CTRL_FLAG_NOREPLY) {
+ return;
+ }
+
+ len = offsetof(struct ctdb_reply_control_old, data) + (outdata?outdata->dsize:0);
+ if (errormsg) {
+ len += strlen(errormsg);
+ }
+ r = ctdb_transport_allocate(ctdb, ctdb, CTDB_REPLY_CONTROL, len, struct ctdb_reply_control_old);
+ if (r == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ "Unable to allocate transport - OOM or transport is down\n"));
+ return;
+ }
+
+ r->hdr.destnode = c->hdr.srcnode;
+ r->hdr.reqid = c->hdr.reqid;
+ r->status = status;
+ r->datalen = outdata?outdata->dsize:0;
+ if (outdata && outdata->dsize) {
+ memcpy(&r->data[0], outdata->dptr, outdata->dsize);
+ }
+ if (errormsg) {
+ r->errorlen = strlen(errormsg);
+ memcpy(&r->data[r->datalen], errormsg, r->errorlen);
+ }
+
+ ctdb_queue_packet_opcode(ctdb, &r->hdr, c->opcode);
+
+ talloc_free(r);
+}
+
+/*
+ called when a CTDB_REQ_CONTROL packet comes in
+*/
+void ctdb_request_control(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
+{
+ struct ctdb_req_control_old *c = (struct ctdb_req_control_old *)hdr;
+ TDB_DATA data, *outdata;
+ int32_t status;
+ bool async_reply = false;
+ const char *errormsg = NULL;
+
+ data.dptr = &c->data[0];
+ data.dsize = c->datalen;
+
+ outdata = talloc_zero(c, TDB_DATA);
+
+ status = ctdb_control_dispatch(ctdb, c, data, outdata, hdr->srcnode,
+ &errormsg, &async_reply);
+
+ if (!async_reply) {
+ ctdb_request_control_reply(ctdb, c, outdata, status, errormsg);
+ }
+}
+
+/*
+ called when a CTDB_REPLY_CONTROL packet comes in
+*/
+void ctdb_reply_control(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
+{
+ struct ctdb_reply_control_old *c = (struct ctdb_reply_control_old *)hdr;
+ TDB_DATA data;
+ struct ctdb_control_state *state;
+ const char *errormsg = NULL;
+
+ state = reqid_find(ctdb->idr, hdr->reqid, struct ctdb_control_state);
+ if (state == NULL) {
+ DEBUG(DEBUG_ERR,("pnn %u Invalid reqid %u in ctdb_reply_control\n",
+ ctdb->pnn, hdr->reqid));
+ return;
+ }
+
+ if (hdr->reqid != state->reqid) {
+ /* we found a record but it was the wrong one */
+ DEBUG(DEBUG_ERR, ("Dropped orphaned control reply with reqid:%u\n", hdr->reqid));
+ return;
+ }
+
+ data.dptr = &c->data[0];
+ data.dsize = c->datalen;
+ if (c->errorlen) {
+ errormsg = talloc_strndup(state,
+ (char *)&c->data[c->datalen], c->errorlen);
+ }
+
+ /* make state a child of the packet, so it goes away when the packet
+ is freed. */
+ talloc_steal(hdr, state);
+
+ state->callback(ctdb, c->status, data, errormsg, state->private_data);
+}
+
+static int ctdb_control_destructor(struct ctdb_control_state *state)
+{
+ reqid_remove(state->ctdb->idr, state->reqid);
+ return 0;
+}
+
+/*
+ handle a timeout of a control
+ */
+static void ctdb_control_timeout(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_control_state *state = talloc_get_type(private_data, struct ctdb_control_state);
+ TALLOC_CTX *tmp_ctx = talloc_new(ev);
+
+ CTDB_INCREMENT_STAT(state->ctdb, timeouts.control);
+
+ talloc_steal(tmp_ctx, state);
+
+ state->callback(state->ctdb, -1, tdb_null,
+ "ctdb_control timed out",
+ state->private_data);
+ talloc_free(tmp_ctx);
+}
+
+
+/*
+ send a control message to a node
+ */
+int ctdb_daemon_send_control(struct ctdb_context *ctdb, uint32_t destnode,
+ uint64_t srvid, uint32_t opcode, uint32_t client_id,
+ uint32_t flags,
+ TDB_DATA data,
+ ctdb_control_callback_fn_t callback,
+ void *private_data)
+{
+ struct ctdb_req_control_old *c;
+ struct ctdb_control_state *state;
+ size_t len;
+
+ if (ctdb->methods == NULL) {
+ DEBUG(DEBUG_INFO,(__location__ " Failed to send control. Transport is DOWN\n"));
+ return -1;
+ }
+
+ if (((destnode == CTDB_BROADCAST_ACTIVE) ||
+ (destnode == CTDB_BROADCAST_ALL) ||
+ (destnode == CTDB_BROADCAST_CONNECTED)) &&
+ !(flags & CTDB_CTRL_FLAG_NOREPLY)) {
+ DEBUG(DEBUG_CRIT,("Attempt to broadcast control without NOREPLY\n"));
+ return -1;
+ }
+
+ if (destnode != CTDB_BROADCAST_ACTIVE &&
+ destnode != CTDB_BROADCAST_ALL &&
+ destnode != CTDB_BROADCAST_CONNECTED &&
+ (!ctdb_validate_pnn(ctdb, destnode) ||
+ (ctdb->nodes[destnode]->flags & NODE_FLAGS_DISCONNECTED))) {
+ if (!(flags & CTDB_CTRL_FLAG_NOREPLY)) {
+ callback(ctdb, -1, tdb_null, "ctdb_control to disconnected node", private_data);
+ }
+ return 0;
+ }
+
+ /* the state is made a child of private_data if possible. This means any reply
+ will be discarded if the private_data goes away */
+ state = talloc(private_data?private_data:ctdb, struct ctdb_control_state);
+ CTDB_NO_MEMORY(ctdb, state);
+
+ state->reqid = reqid_new(ctdb->idr, state);
+ state->callback = callback;
+ state->private_data = private_data;
+ state->ctdb = ctdb;
+ state->flags = flags;
+
+ talloc_set_destructor(state, ctdb_control_destructor);
+
+ len = offsetof(struct ctdb_req_control_old, data) + data.dsize;
+ c = ctdb_transport_allocate(ctdb, state, CTDB_REQ_CONTROL, len,
+ struct ctdb_req_control_old);
+ CTDB_NO_MEMORY(ctdb, c);
+ talloc_set_name_const(c, "ctdb_req_control packet");
+
+ c->hdr.destnode = destnode;
+ c->hdr.reqid = state->reqid;
+ c->opcode = opcode;
+ c->client_id = client_id;
+ c->flags = flags;
+ c->srvid = srvid;
+ c->datalen = data.dsize;
+ if (data.dsize) {
+ memcpy(&c->data[0], data.dptr, data.dsize);
+ }
+
+ ctdb_queue_packet(ctdb, &c->hdr);
+
+ if (flags & CTDB_CTRL_FLAG_NOREPLY) {
+ talloc_free(state);
+ return 0;
+ }
+
+ if (ctdb->tunable.control_timeout) {
+ tevent_add_timer(ctdb->ev, state,
+ timeval_current_ofs(ctdb->tunable.control_timeout, 0),
+ ctdb_control_timeout, state);
+ }
+
+ talloc_free(c);
+ return 0;
+}
diff --git a/ctdb/server/ctdb_daemon.c b/ctdb/server/ctdb_daemon.c
new file mode 100644
index 0000000..eb9d634
--- /dev/null
+++ b/ctdb/server/ctdb_daemon.c
@@ -0,0 +1,2248 @@
+/*
+ ctdb daemon code
+
+ Copyright (C) Andrew Tridgell 2006
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+#include "system/wait.h"
+#include "system/time.h"
+
+#include <talloc.h>
+/* Allow use of deprecated function tevent_loop_allow_nesting() */
+#define TEVENT_DEPRECATED
+#include <tevent.h>
+#include <tdb.h>
+
+#include "lib/tdb_wrap/tdb_wrap.h"
+#include "lib/util/dlinklist.h"
+#include "lib/util/debug.h"
+#include "lib/util/time.h"
+#include "lib/util/blocking.h"
+#include "lib/util/become_daemon.h"
+
+#include "version.h"
+#include "ctdb_private.h"
+#include "ctdb_client.h"
+
+#include "common/rb_tree.h"
+#include "common/reqid.h"
+#include "common/system.h"
+#include "common/common.h"
+#include "common/logging.h"
+#include "common/pidfile.h"
+#include "common/sock_io.h"
+
+struct ctdb_client_pid_list {
+ struct ctdb_client_pid_list *next, *prev;
+ struct ctdb_context *ctdb;
+ pid_t pid;
+ struct ctdb_client *client;
+};
+
+const char *ctdbd_pidfile = NULL;
+static struct pidfile_context *ctdbd_pidfile_ctx = NULL;
+
+static void daemon_incoming_packet(void *, struct ctdb_req_header *);
+
+static pid_t __ctdbd_pid;
+
+static void print_exit_message(void)
+{
+ if (getpid() == __ctdbd_pid) {
+ DEBUG(DEBUG_NOTICE,("CTDB daemon shutting down\n"));
+
+ /* Wait a second to allow pending log messages to be flushed */
+ sleep(1);
+ }
+}
+
+#ifdef HAVE_GETRUSAGE
+
+struct cpu_check_threshold_data {
+ unsigned short percent;
+ struct timeval timeofday;
+ struct timeval ru_time;
+};
+
+static void ctdb_cpu_check_threshold(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval tv,
+ void *private_data)
+{
+ struct ctdb_context *ctdb = talloc_get_type_abort(
+ private_data, struct ctdb_context);
+ uint32_t interval = 60;
+
+ static unsigned short threshold = 0;
+ static struct cpu_check_threshold_data prev = {
+ .percent = 0,
+ .timeofday = { .tv_sec = 0 },
+ .ru_time = { .tv_sec = 0 },
+ };
+
+ struct rusage usage;
+ struct cpu_check_threshold_data curr = {
+ .percent = 0,
+ };
+ int64_t ru_time_diff, timeofday_diff;
+ bool first;
+ int ret;
+
+ /*
+ * Cache the threshold so that we don't waste time checking
+ * the environment variable every time
+ */
+ if (threshold == 0) {
+ const char *t;
+
+ threshold = 90;
+
+ t = getenv("CTDB_TEST_CPU_USAGE_THRESHOLD");
+ if (t != NULL) {
+ int th;
+
+ th = atoi(t);
+ if (th <= 0 || th > 100) {
+ DBG_WARNING("Failed to parse env var: %s\n", t);
+ } else {
+ threshold = th;
+ }
+ }
+ }
+
+ ret = getrusage(RUSAGE_SELF, &usage);
+ if (ret != 0) {
+ DBG_WARNING("rusage() failed: %d\n", ret);
+ goto next;
+ }
+
+ /* Sum the system and user CPU usage */
+ curr.ru_time = timeval_sum(&usage.ru_utime, &usage.ru_stime);
+
+ curr.timeofday = tv;
+
+ first = timeval_is_zero(&prev.timeofday);
+ if (first) {
+ /* No previous values recorded so no calculation to do */
+ goto done;
+ }
+
+ timeofday_diff = usec_time_diff(&curr.timeofday, &prev.timeofday);
+ if (timeofday_diff <= 0) {
+ /*
+ * Time went backwards or didn't progress so no (sane)
+ * calculation can be done
+ */
+ goto done;
+ }
+
+ ru_time_diff = usec_time_diff(&curr.ru_time, &prev.ru_time);
+
+ curr.percent = ru_time_diff * 100 / timeofday_diff;
+
+ if (curr.percent >= threshold) {
+ /* Log only if the utilisation changes */
+ if (curr.percent != prev.percent) {
+ D_WARNING("WARNING: CPU utilisation %hu%% >= "
+ "threshold (%hu%%)\n",
+ curr.percent,
+ threshold);
+ }
+ } else {
+ /* Log if the utilisation falls below the threshold */
+ if (prev.percent >= threshold) {
+ D_WARNING("WARNING: CPU utilisation %hu%% < "
+ "threshold (%hu%%)\n",
+ curr.percent,
+ threshold);
+ }
+ }
+
+done:
+ prev = curr;
+
+next:
+ tevent_add_timer(ctdb->ev, ctdb,
+ timeval_current_ofs(interval, 0),
+ ctdb_cpu_check_threshold,
+ ctdb);
+}
+
+static void ctdb_start_cpu_check_threshold(struct ctdb_context *ctdb)
+{
+ tevent_add_timer(ctdb->ev, ctdb,
+ timeval_current(),
+ ctdb_cpu_check_threshold,
+ ctdb);
+}
+#endif /* HAVE_GETRUSAGE */
+
+static void ctdb_time_tick(struct tevent_context *ev, struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
+
+ if (getpid() != ctdb->ctdbd_pid) {
+ return;
+ }
+
+ tevent_add_timer(ctdb->ev, ctdb,
+ timeval_current_ofs(1, 0),
+ ctdb_time_tick, ctdb);
+}
+
+/* Used to trigger a dummy event once per second, to make
+ * detection of hangs more reliable.
+ */
+static void ctdb_start_time_tickd(struct ctdb_context *ctdb)
+{
+ tevent_add_timer(ctdb->ev, ctdb,
+ timeval_current_ofs(1, 0),
+ ctdb_time_tick, ctdb);
+}
+
+static void ctdb_start_periodic_events(struct ctdb_context *ctdb)
+{
+ /* start monitoring for connected/disconnected nodes */
+ ctdb_start_keepalive(ctdb);
+
+ /* start periodic update of tcp tickle lists */
+ ctdb_start_tcp_tickle_update(ctdb);
+
+ /* start listening for recovery daemon pings */
+ ctdb_control_recd_ping(ctdb);
+
+ /* start listening to timer ticks */
+ ctdb_start_time_tickd(ctdb);
+
+#ifdef HAVE_GETRUSAGE
+ ctdb_start_cpu_check_threshold(ctdb);
+#endif /* HAVE_GETRUSAGE */
+}
+
+static void ignore_signal(int signum)
+{
+ struct sigaction act;
+
+ memset(&act, 0, sizeof(act));
+
+ act.sa_handler = SIG_IGN;
+ sigemptyset(&act.sa_mask);
+ sigaddset(&act.sa_mask, signum);
+ sigaction(signum, &act, NULL);
+}
+
+
+/*
+ send a packet to a client
+ */
+static int daemon_queue_send(struct ctdb_client *client, struct ctdb_req_header *hdr)
+{
+ CTDB_INCREMENT_STAT(client->ctdb, client_packets_sent);
+ if (hdr->operation == CTDB_REQ_MESSAGE) {
+ if (ctdb_queue_length(client->queue) > client->ctdb->tunable.max_queue_depth_drop_msg) {
+ DEBUG(DEBUG_ERR,("CTDB_REQ_MESSAGE queue full - killing client connection.\n"));
+ talloc_free(client);
+ return -1;
+ }
+ }
+ return ctdb_queue_send(client->queue, (uint8_t *)hdr, hdr->length);
+}
+
+/*
+ message handler for when we are in daemon mode. This redirects the message
+ to the right client
+ */
+static void daemon_message_handler(uint64_t srvid, TDB_DATA data,
+ void *private_data)
+{
+ struct ctdb_client *client = talloc_get_type(private_data, struct ctdb_client);
+ struct ctdb_req_message_old *r;
+ int len;
+
+ /* construct a message to send to the client containing the data */
+ len = offsetof(struct ctdb_req_message_old, data) + data.dsize;
+ r = ctdbd_allocate_pkt(client->ctdb, client->ctdb, CTDB_REQ_MESSAGE,
+ len, struct ctdb_req_message_old);
+ CTDB_NO_MEMORY_VOID(client->ctdb, r);
+
+ talloc_set_name_const(r, "req_message packet");
+
+ r->srvid = srvid;
+ r->datalen = data.dsize;
+ memcpy(&r->data[0], data.dptr, data.dsize);
+
+ daemon_queue_send(client, &r->hdr);
+
+ talloc_free(r);
+}
+
+/*
+ this is called when the ctdb daemon received a ctdb request to
+ set the srvid from the client
+ */
+int daemon_register_message_handler(struct ctdb_context *ctdb, uint32_t client_id, uint64_t srvid)
+{
+ struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
+ int res;
+ if (client == NULL) {
+ DEBUG(DEBUG_ERR,("Bad client_id in daemon_request_register_message_handler\n"));
+ return -1;
+ }
+ res = srvid_register(ctdb->srv, client, srvid, daemon_message_handler,
+ client);
+ if (res != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to register handler %llu in daemon\n",
+ (unsigned long long)srvid));
+ } else {
+ DEBUG(DEBUG_INFO,(__location__ " Registered message handler for srvid=%llu\n",
+ (unsigned long long)srvid));
+ }
+
+ return res;
+}
+
+/*
+ this is called when the ctdb daemon received a ctdb request to
+ remove a srvid from the client
+ */
+int daemon_deregister_message_handler(struct ctdb_context *ctdb, uint32_t client_id, uint64_t srvid)
+{
+ struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
+ if (client == NULL) {
+ DEBUG(DEBUG_ERR,("Bad client_id in daemon_request_deregister_message_handler\n"));
+ return -1;
+ }
+ return srvid_deregister(ctdb->srv, srvid, client);
+}
+
+void daemon_tunnel_handler(uint64_t tunnel_id, TDB_DATA data,
+ void *private_data)
+{
+ struct ctdb_client *client =
+ talloc_get_type_abort(private_data, struct ctdb_client);
+ struct ctdb_req_tunnel_old *c, *pkt;
+ size_t len;
+
+ pkt = (struct ctdb_req_tunnel_old *)data.dptr;
+
+ len = offsetof(struct ctdb_req_tunnel_old, data) + pkt->datalen;
+ c = ctdbd_allocate_pkt(client->ctdb, client->ctdb, CTDB_REQ_TUNNEL,
+ len, struct ctdb_req_tunnel_old);
+ if (c == NULL) {
+ DEBUG(DEBUG_ERR, ("Memory error in daemon_tunnel_handler\n"));
+ return;
+ }
+
+ talloc_set_name_const(c, "req_tunnel packet");
+
+ c->tunnel_id = tunnel_id;
+ c->flags = pkt->flags;
+ c->datalen = pkt->datalen;
+ memcpy(c->data, pkt->data, pkt->datalen);
+
+ daemon_queue_send(client, &c->hdr);
+
+ talloc_free(c);
+}
+
+/*
+ destroy a ctdb_client
+*/
+static int ctdb_client_destructor(struct ctdb_client *client)
+{
+ struct ctdb_db_context *ctdb_db;
+
+ ctdb_takeover_client_destructor_hook(client);
+ reqid_remove(client->ctdb->idr, client->client_id);
+ client->ctdb->num_clients--;
+
+ if (client->num_persistent_updates != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Client disconnecting with %u persistent updates in flight. Starting recovery\n", client->num_persistent_updates));
+ client->ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
+ }
+ ctdb_db = find_ctdb_db(client->ctdb, client->db_id);
+ if (ctdb_db) {
+ DEBUG(DEBUG_ERR, (__location__ " client exit while transaction "
+ "commit active. Forcing recovery.\n"));
+ client->ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
+
+ /*
+ * trans3 transaction state:
+ *
+ * The destructor sets the pointer to NULL.
+ */
+ talloc_free(ctdb_db->persistent_state);
+ }
+
+ return 0;
+}
+
+
+/*
+ this is called when the ctdb daemon received a ctdb request message
+ from a local client over the unix domain socket
+ */
+static void daemon_request_message_from_client(struct ctdb_client *client,
+ struct ctdb_req_message_old *c)
+{
+ TDB_DATA data;
+ int res;
+
+ if (c->hdr.destnode == CTDB_CURRENT_NODE) {
+ c->hdr.destnode = ctdb_get_pnn(client->ctdb);
+ }
+
+ /* maybe the message is for another client on this node */
+ if (ctdb_get_pnn(client->ctdb)==c->hdr.destnode) {
+ ctdb_request_message(client->ctdb, (struct ctdb_req_header *)c);
+ return;
+ }
+
+ /* its for a remote node */
+ data.dptr = &c->data[0];
+ data.dsize = c->datalen;
+ res = ctdb_daemon_send_message(client->ctdb, c->hdr.destnode,
+ c->srvid, data);
+ if (res != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to send message to remote node %u\n",
+ c->hdr.destnode));
+ }
+}
+
+
+struct daemon_call_state {
+ struct ctdb_client *client;
+ uint32_t reqid;
+ struct ctdb_call *call;
+ struct timeval start_time;
+
+ /* readonly request ? */
+ uint32_t readonly_fetch;
+ uint32_t client_callid;
+};
+
+/*
+ complete a call from a client
+*/
+static void daemon_call_from_client_callback(struct ctdb_call_state *state)
+{
+ struct daemon_call_state *dstate = talloc_get_type(state->async.private_data,
+ struct daemon_call_state);
+ struct ctdb_reply_call_old *r;
+ int res;
+ uint32_t length;
+ struct ctdb_client *client = dstate->client;
+ struct ctdb_db_context *ctdb_db = state->ctdb_db;
+
+ talloc_steal(client, dstate);
+ talloc_steal(dstate, dstate->call);
+
+ res = ctdb_daemon_call_recv(state, dstate->call);
+ if (res != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " ctdbd_call_recv() returned error\n"));
+ CTDB_DECREMENT_STAT(client->ctdb, pending_calls);
+
+ CTDB_UPDATE_LATENCY(client->ctdb, ctdb_db, "call_from_client_cb 1", call_latency, dstate->start_time);
+ return;
+ }
+
+ length = offsetof(struct ctdb_reply_call_old, data) + dstate->call->reply_data.dsize;
+ /* If the client asked for readonly FETCH, we remapped this to
+ FETCH_WITH_HEADER when calling the daemon. So we must
+ strip the extra header off the reply data before passing
+ it back to the client.
+ */
+ if (dstate->readonly_fetch
+ && dstate->client_callid == CTDB_FETCH_FUNC) {
+ length -= sizeof(struct ctdb_ltdb_header);
+ }
+
+ r = ctdbd_allocate_pkt(client->ctdb, dstate, CTDB_REPLY_CALL,
+ length, struct ctdb_reply_call_old);
+ if (r == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Failed to allocate reply_call in ctdb daemon\n"));
+ CTDB_DECREMENT_STAT(client->ctdb, pending_calls);
+ CTDB_UPDATE_LATENCY(client->ctdb, ctdb_db, "call_from_client_cb 2", call_latency, dstate->start_time);
+ return;
+ }
+ r->hdr.reqid = dstate->reqid;
+ r->status = dstate->call->status;
+
+ if (dstate->readonly_fetch
+ && dstate->client_callid == CTDB_FETCH_FUNC) {
+ /* client only asked for a FETCH so we must strip off
+ the extra ctdb_ltdb header
+ */
+ r->datalen = dstate->call->reply_data.dsize - sizeof(struct ctdb_ltdb_header);
+ memcpy(&r->data[0], dstate->call->reply_data.dptr + sizeof(struct ctdb_ltdb_header), r->datalen);
+ } else {
+ r->datalen = dstate->call->reply_data.dsize;
+ memcpy(&r->data[0], dstate->call->reply_data.dptr, r->datalen);
+ }
+
+ res = daemon_queue_send(client, &r->hdr);
+ if (res == -1) {
+ /* client is dead - return immediately */
+ return;
+ }
+ if (res != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Failed to queue packet from daemon to client\n"));
+ }
+ CTDB_UPDATE_LATENCY(client->ctdb, ctdb_db, "call_from_client_cb 3", call_latency, dstate->start_time);
+ CTDB_DECREMENT_STAT(client->ctdb, pending_calls);
+ talloc_free(dstate);
+}
+
+struct ctdb_daemon_packet_wrap {
+ struct ctdb_context *ctdb;
+ uint32_t client_id;
+};
+
+/*
+ a wrapper to catch disconnected clients
+ */
+static void daemon_incoming_packet_wrap(void *p, struct ctdb_req_header *hdr)
+{
+ struct ctdb_client *client;
+ struct ctdb_daemon_packet_wrap *w = talloc_get_type(p,
+ struct ctdb_daemon_packet_wrap);
+ if (w == NULL) {
+ DEBUG(DEBUG_CRIT,(__location__ " Bad packet type '%s'\n", talloc_get_name(p)));
+ return;
+ }
+
+ client = reqid_find(w->ctdb->idr, w->client_id, struct ctdb_client);
+ if (client == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Packet for disconnected client %u\n",
+ w->client_id));
+ talloc_free(w);
+ return;
+ }
+ talloc_free(w);
+
+ /* process it */
+ daemon_incoming_packet(client, hdr);
+}
+
+struct ctdb_deferred_fetch_call {
+ struct ctdb_deferred_fetch_call *next, *prev;
+ struct ctdb_req_call_old *c;
+ struct ctdb_daemon_packet_wrap *w;
+};
+
+struct ctdb_deferred_fetch_queue {
+ struct ctdb_deferred_fetch_call *deferred_calls;
+};
+
+struct ctdb_deferred_requeue {
+ struct ctdb_deferred_fetch_call *dfc;
+ struct ctdb_client *client;
+};
+
+/* called from a timer event and starts reprocessing the deferred call.*/
+static void reprocess_deferred_call(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_deferred_requeue *dfr = (struct ctdb_deferred_requeue *)private_data;
+ struct ctdb_client *client = dfr->client;
+
+ talloc_steal(client, dfr->dfc->c);
+ daemon_incoming_packet(client, (struct ctdb_req_header *)dfr->dfc->c);
+ talloc_free(dfr);
+}
+
+/* the referral context is destroyed either after a timeout or when the initial
+ fetch-lock has finished.
+ at this stage, immediately start reprocessing the queued up deferred
+ calls so they get reprocessed immediately (and since we are dmaster at
+ this stage, trigger the waiting smbd processes to pick up and acquire the
+ record right away.
+*/
+static int deferred_fetch_queue_destructor(struct ctdb_deferred_fetch_queue *dfq)
+{
+
+ /* need to reprocess the packets from the queue explicitly instead of
+ just using a normal destructor since we need to
+ call the clients in the same order as the requests queued up
+ */
+ while (dfq->deferred_calls != NULL) {
+ struct ctdb_client *client;
+ struct ctdb_deferred_fetch_call *dfc = dfq->deferred_calls;
+ struct ctdb_deferred_requeue *dfr;
+
+ DLIST_REMOVE(dfq->deferred_calls, dfc);
+
+ client = reqid_find(dfc->w->ctdb->idr, dfc->w->client_id, struct ctdb_client);
+ if (client == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Packet for disconnected client %u\n",
+ dfc->w->client_id));
+ continue;
+ }
+
+ /* process it by pushing it back onto the eventloop */
+ dfr = talloc(client, struct ctdb_deferred_requeue);
+ if (dfr == NULL) {
+ DEBUG(DEBUG_ERR,("Failed to allocate deferred fetch requeue structure\n"));
+ continue;
+ }
+
+ dfr->dfc = talloc_steal(dfr, dfc);
+ dfr->client = client;
+
+ tevent_add_timer(dfc->w->ctdb->ev, client, timeval_zero(),
+ reprocess_deferred_call, dfr);
+ }
+
+ return 0;
+}
+
+/* insert the new deferral context into the rb tree.
+ there should never be a pre-existing context here, but check for it
+ warn and destroy the previous context if there is already a deferral context
+ for this key.
+*/
+static void *insert_dfq_callback(void *parm, void *data)
+{
+ if (data) {
+ DEBUG(DEBUG_ERR,("Already have DFQ registered. Free old %p and create new %p\n", data, parm));
+ talloc_free(data);
+ }
+ return parm;
+}
+
+/* if the original fetch-lock did not complete within a reasonable time,
+ free the context and context for all deferred requests to cause them to be
+ re-inserted into the event system.
+*/
+static void dfq_timeout(struct tevent_context *ev, struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ talloc_free(private_data);
+}
+
+/* This function is used in the local daemon to register a KEY in a database
+ for being "fetched"
+ While the remote fetch is in-flight, any further attempts to re-fetch the
+ same record will be deferred until the fetch completes.
+*/
+static int setup_deferred_fetch_locks(struct ctdb_db_context *ctdb_db, struct ctdb_call *call)
+{
+ uint32_t *k;
+ struct ctdb_deferred_fetch_queue *dfq;
+
+ k = ctdb_key_to_idkey(call, call->key);
+ if (k == NULL) {
+ DEBUG(DEBUG_ERR,("Failed to allocate key for deferred fetch\n"));
+ return -1;
+ }
+
+ dfq = talloc(call, struct ctdb_deferred_fetch_queue);
+ if (dfq == NULL) {
+ DEBUG(DEBUG_ERR,("Failed to allocate key for deferred fetch queue structure\n"));
+ talloc_free(k);
+ return -1;
+ }
+ dfq->deferred_calls = NULL;
+
+ trbt_insertarray32_callback(ctdb_db->deferred_fetch, k[0], &k[0], insert_dfq_callback, dfq);
+
+ talloc_set_destructor(dfq, deferred_fetch_queue_destructor);
+
+ /* If the fetch hasn't completed in 30 seconds, just tear it all down
+ and let it try again as the events are reissued */
+ tevent_add_timer(ctdb_db->ctdb->ev, dfq, timeval_current_ofs(30, 0),
+ dfq_timeout, dfq);
+
+ talloc_free(k);
+ return 0;
+}
+
+/* check if this is a duplicate request to a fetch already in-flight
+ if it is, make this call deferred to be reprocessed later when
+ the in-flight fetch completes.
+*/
+static int requeue_duplicate_fetch(struct ctdb_db_context *ctdb_db, struct ctdb_client *client, TDB_DATA key, struct ctdb_req_call_old *c)
+{
+ uint32_t *k;
+ struct ctdb_deferred_fetch_queue *dfq;
+ struct ctdb_deferred_fetch_call *dfc;
+
+ k = ctdb_key_to_idkey(c, key);
+ if (k == NULL) {
+ DEBUG(DEBUG_ERR,("Failed to allocate key for deferred fetch\n"));
+ return -1;
+ }
+
+ dfq = trbt_lookuparray32(ctdb_db->deferred_fetch, k[0], &k[0]);
+ if (dfq == NULL) {
+ talloc_free(k);
+ return -1;
+ }
+
+
+ talloc_free(k);
+
+ dfc = talloc(dfq, struct ctdb_deferred_fetch_call);
+ if (dfc == NULL) {
+ DEBUG(DEBUG_ERR, ("Failed to allocate deferred fetch call structure\n"));
+ return -1;
+ }
+
+ dfc->w = talloc(dfc, struct ctdb_daemon_packet_wrap);
+ if (dfc->w == NULL) {
+ DEBUG(DEBUG_ERR,("Failed to allocate deferred fetch daemon packet wrap structure\n"));
+ talloc_free(dfc);
+ return -1;
+ }
+
+ dfc->c = talloc_steal(dfc, c);
+ dfc->w->ctdb = ctdb_db->ctdb;
+ dfc->w->client_id = client->client_id;
+
+ DLIST_ADD_END(dfq->deferred_calls, dfc);
+
+ return 0;
+}
+
+
+/*
+ this is called when the ctdb daemon received a ctdb request call
+ from a local client over the unix domain socket
+ */
+static void daemon_request_call_from_client(struct ctdb_client *client,
+ struct ctdb_req_call_old *c)
+{
+ struct ctdb_call_state *state;
+ struct ctdb_db_context *ctdb_db;
+ struct daemon_call_state *dstate;
+ struct ctdb_call *call;
+ struct ctdb_ltdb_header header;
+ TDB_DATA key, data;
+ int ret;
+ struct ctdb_context *ctdb = client->ctdb;
+ struct ctdb_daemon_packet_wrap *w;
+
+ CTDB_INCREMENT_STAT(ctdb, total_calls);
+ CTDB_INCREMENT_STAT(ctdb, pending_calls);
+
+ ctdb_db = find_ctdb_db(client->ctdb, c->db_id);
+ if (!ctdb_db) {
+ DEBUG(DEBUG_ERR, (__location__ " Unknown database in request. db_id==0x%08x\n",
+ c->db_id));
+ CTDB_DECREMENT_STAT(ctdb, pending_calls);
+ return;
+ }
+
+ if (ctdb_db->unhealthy_reason) {
+ /*
+ * this is just a warning, as the tdb should be empty anyway,
+ * and only persistent databases can be unhealthy, which doesn't
+ * use this code patch
+ */
+ DEBUG(DEBUG_WARNING,("warn: db(%s) unhealty in daemon_request_call_from_client(): %s\n",
+ ctdb_db->db_name, ctdb_db->unhealthy_reason));
+ }
+
+ key.dptr = c->data;
+ key.dsize = c->keylen;
+
+ w = talloc(ctdb, struct ctdb_daemon_packet_wrap);
+ CTDB_NO_MEMORY_VOID(ctdb, w);
+
+ w->ctdb = ctdb;
+ w->client_id = client->client_id;
+
+ ret = ctdb_ltdb_lock_fetch_requeue(ctdb_db, key, &header,
+ (struct ctdb_req_header *)c, &data,
+ daemon_incoming_packet_wrap, w, true);
+ if (ret == -2) {
+ /* will retry later */
+ CTDB_DECREMENT_STAT(ctdb, pending_calls);
+ return;
+ }
+
+ talloc_free(w);
+
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Unable to fetch record\n"));
+ CTDB_DECREMENT_STAT(ctdb, pending_calls);
+ return;
+ }
+
+
+ /* check if this fetch request is a duplicate for a
+ request we already have in flight. If so defer it until
+ the first request completes.
+ */
+ if (ctdb->tunable.fetch_collapse == 1) {
+ if (requeue_duplicate_fetch(ctdb_db, client, key, c) == 0) {
+ ret = ctdb_ltdb_unlock(ctdb_db, key);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret));
+ }
+ CTDB_DECREMENT_STAT(ctdb, pending_calls);
+ talloc_free(data.dptr);
+ return;
+ }
+ }
+
+ /* Dont do READONLY if we don't have a tracking database */
+ if ((c->flags & CTDB_WANT_READONLY) && !ctdb_db_readonly(ctdb_db)) {
+ c->flags &= ~CTDB_WANT_READONLY;
+ }
+
+ if (header.flags & CTDB_REC_RO_REVOKE_COMPLETE) {
+ header.flags &= ~CTDB_REC_RO_FLAGS;
+ CTDB_INCREMENT_STAT(ctdb, total_ro_revokes);
+ CTDB_INCREMENT_DB_STAT(ctdb_db, db_ro_revokes);
+ if (ctdb_ltdb_store(ctdb_db, key, &header, data) != 0) {
+ ctdb_fatal(ctdb, "Failed to write header with cleared REVOKE flag");
+ }
+ /* and clear out the tracking data */
+ if (tdb_delete(ctdb_db->rottdb, key) != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to clear out trackingdb record\n"));
+ }
+ }
+
+ /* if we are revoking, we must defer all other calls until the revoke
+ * had completed.
+ */
+ if (header.flags & CTDB_REC_RO_REVOKING_READONLY) {
+ talloc_free(data.dptr);
+ ret = ctdb_ltdb_unlock(ctdb_db, key);
+
+ if (ctdb_add_revoke_deferred_call(ctdb, ctdb_db, key, (struct ctdb_req_header *)c, daemon_incoming_packet, client) != 0) {
+ ctdb_fatal(ctdb, "Failed to add deferred call for revoke child");
+ }
+ CTDB_DECREMENT_STAT(ctdb, pending_calls);
+ return;
+ }
+
+ if ((header.dmaster == ctdb->pnn)
+ && (!(c->flags & CTDB_WANT_READONLY))
+ && (header.flags & (CTDB_REC_RO_HAVE_DELEGATIONS|CTDB_REC_RO_HAVE_READONLY)) ) {
+ header.flags |= CTDB_REC_RO_REVOKING_READONLY;
+ if (ctdb_ltdb_store(ctdb_db, key, &header, data) != 0) {
+ ctdb_fatal(ctdb, "Failed to store record with HAVE_DELEGATIONS set");
+ }
+ ret = ctdb_ltdb_unlock(ctdb_db, key);
+
+ if (ctdb_start_revoke_ro_record(ctdb, ctdb_db, key, &header, data) != 0) {
+ ctdb_fatal(ctdb, "Failed to start record revoke");
+ }
+ talloc_free(data.dptr);
+
+ if (ctdb_add_revoke_deferred_call(ctdb, ctdb_db, key, (struct ctdb_req_header *)c, daemon_incoming_packet, client) != 0) {
+ ctdb_fatal(ctdb, "Failed to add deferred call for revoke child");
+ }
+
+ CTDB_DECREMENT_STAT(ctdb, pending_calls);
+ return;
+ }
+
+ dstate = talloc(client, struct daemon_call_state);
+ if (dstate == NULL) {
+ ret = ctdb_ltdb_unlock(ctdb_db, key);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret));
+ }
+
+ DEBUG(DEBUG_ERR,(__location__ " Unable to allocate dstate\n"));
+ CTDB_DECREMENT_STAT(ctdb, pending_calls);
+ return;
+ }
+ dstate->start_time = timeval_current();
+ dstate->client = client;
+ dstate->reqid = c->hdr.reqid;
+ talloc_steal(dstate, data.dptr);
+
+ call = dstate->call = talloc_zero(dstate, struct ctdb_call);
+ if (call == NULL) {
+ ret = ctdb_ltdb_unlock(ctdb_db, key);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret));
+ }
+
+ DEBUG(DEBUG_ERR,(__location__ " Unable to allocate call\n"));
+ CTDB_DECREMENT_STAT(ctdb, pending_calls);
+ CTDB_UPDATE_LATENCY(ctdb, ctdb_db, "call_from_client 1", call_latency, dstate->start_time);
+ return;
+ }
+
+ dstate->readonly_fetch = 0;
+ call->call_id = c->callid;
+ call->key = key;
+ call->call_data.dptr = c->data + c->keylen;
+ call->call_data.dsize = c->calldatalen;
+ call->flags = c->flags;
+
+ if (c->flags & CTDB_WANT_READONLY) {
+ /* client wants readonly record, so translate this into a
+ fetch with header. remember what the client asked for
+ so we can remap the reply back to the proper format for
+ the client in the reply
+ */
+ dstate->client_callid = call->call_id;
+ call->call_id = CTDB_FETCH_WITH_HEADER_FUNC;
+ dstate->readonly_fetch = 1;
+ }
+
+ if (header.dmaster == ctdb->pnn) {
+ state = ctdb_call_local_send(ctdb_db, call, &header, &data);
+ } else {
+ state = ctdb_daemon_call_send_remote(ctdb_db, call, &header);
+ if (ctdb->tunable.fetch_collapse == 1) {
+ /* This request triggered a remote fetch-lock.
+ set up a deferral for this key so any additional
+ fetch-locks are deferred until the current one
+ finishes.
+ */
+ setup_deferred_fetch_locks(ctdb_db, call);
+ }
+ }
+
+ ret = ctdb_ltdb_unlock(ctdb_db, key);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret));
+ }
+
+ if (state == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Unable to setup call send\n"));
+ CTDB_DECREMENT_STAT(ctdb, pending_calls);
+ CTDB_UPDATE_LATENCY(ctdb, ctdb_db, "call_from_client 2", call_latency, dstate->start_time);
+ return;
+ }
+ talloc_steal(state, dstate);
+ talloc_steal(client, state);
+
+ state->async.fn = daemon_call_from_client_callback;
+ state->async.private_data = dstate;
+}
+
+
+static void daemon_request_control_from_client(struct ctdb_client *client,
+ struct ctdb_req_control_old *c);
+static void daemon_request_tunnel_from_client(struct ctdb_client *client,
+ struct ctdb_req_tunnel_old *c);
+
+/* data contains a packet from the client */
+static void daemon_incoming_packet(void *p, struct ctdb_req_header *hdr)
+{
+ struct ctdb_client *client = talloc_get_type(p, struct ctdb_client);
+ TALLOC_CTX *tmp_ctx;
+ struct ctdb_context *ctdb = client->ctdb;
+
+ /* place the packet as a child of a tmp_ctx. We then use
+ talloc_free() below to free it. If any of the calls want
+ to keep it, then they will steal it somewhere else, and the
+ talloc_free() will be a no-op */
+ tmp_ctx = talloc_new(client);
+ talloc_steal(tmp_ctx, hdr);
+
+ if (hdr->ctdb_magic != CTDB_MAGIC) {
+ ctdb_set_error(client->ctdb, "Non CTDB packet rejected in daemon\n");
+ goto done;
+ }
+
+ if (hdr->ctdb_version != CTDB_PROTOCOL) {
+ ctdb_set_error(client->ctdb, "Bad CTDB version 0x%x rejected in daemon\n", hdr->ctdb_version);
+ goto done;
+ }
+
+ switch (hdr->operation) {
+ case CTDB_REQ_CALL:
+ CTDB_INCREMENT_STAT(ctdb, client.req_call);
+ daemon_request_call_from_client(client, (struct ctdb_req_call_old *)hdr);
+ break;
+
+ case CTDB_REQ_MESSAGE:
+ CTDB_INCREMENT_STAT(ctdb, client.req_message);
+ daemon_request_message_from_client(client, (struct ctdb_req_message_old *)hdr);
+ break;
+
+ case CTDB_REQ_CONTROL:
+ CTDB_INCREMENT_STAT(ctdb, client.req_control);
+ daemon_request_control_from_client(client, (struct ctdb_req_control_old *)hdr);
+ break;
+
+ case CTDB_REQ_TUNNEL:
+ CTDB_INCREMENT_STAT(ctdb, client.req_tunnel);
+ daemon_request_tunnel_from_client(client, (struct ctdb_req_tunnel_old *)hdr);
+ break;
+
+ default:
+ DEBUG(DEBUG_CRIT,(__location__ " daemon: unrecognized operation %u\n",
+ hdr->operation));
+ }
+
+done:
+ talloc_free(tmp_ctx);
+}
+
+/*
+ called when the daemon gets a incoming packet
+ */
+static void ctdb_daemon_read_cb(uint8_t *data, size_t cnt, void *args)
+{
+ struct ctdb_client *client = talloc_get_type(args, struct ctdb_client);
+ struct ctdb_req_header *hdr;
+
+ if (cnt == 0) {
+ talloc_free(client);
+ return;
+ }
+
+ CTDB_INCREMENT_STAT(client->ctdb, client_packets_recv);
+
+ if (cnt < sizeof(*hdr)) {
+ ctdb_set_error(client->ctdb, "Bad packet length %u in daemon\n",
+ (unsigned)cnt);
+ return;
+ }
+ hdr = (struct ctdb_req_header *)data;
+
+ if (hdr->ctdb_magic != CTDB_MAGIC) {
+ ctdb_set_error(client->ctdb, "Non CTDB packet rejected\n");
+ goto err_out;
+ }
+
+ if (hdr->ctdb_version != CTDB_PROTOCOL) {
+ ctdb_set_error(client->ctdb, "Bad CTDB version 0x%x rejected in daemon\n", hdr->ctdb_version);
+ goto err_out;
+ }
+
+ DEBUG(DEBUG_DEBUG,(__location__ " client request %u of type %u length %u from "
+ "node %u to %u\n", hdr->reqid, hdr->operation, hdr->length,
+ hdr->srcnode, hdr->destnode));
+
+ /* it is the responsibility of the incoming packet function to free 'data' */
+ daemon_incoming_packet(client, hdr);
+ return;
+
+err_out:
+ TALLOC_FREE(data);
+}
+
+
+static int ctdb_clientpid_destructor(struct ctdb_client_pid_list *client_pid)
+{
+ if (client_pid->ctdb->client_pids != NULL) {
+ DLIST_REMOVE(client_pid->ctdb->client_pids, client_pid);
+ }
+
+ return 0;
+}
+
+static int get_new_client_id(struct reqid_context *idr,
+ struct ctdb_client *client,
+ uint32_t *out)
+{
+ uint32_t client_id;
+
+ client_id = reqid_new(idr, client);
+ /*
+ * Some places in the code (e.g. ctdb_control_db_attach(),
+ * ctdb_control_db_detach()) assign a special meaning to
+ * client_id 0. The assumption is that if client_id is 0 then
+ * the control has come from another daemon. Therefore, we
+ * should never return client_id == 0.
+ */
+ if (client_id == 0) {
+ /*
+ * Don't leak ID 0. This is safe because the ID keeps
+ * increasing. A test will be added to ensure that
+ * this doesn't change.
+ */
+ reqid_remove(idr, 0);
+
+ client_id = reqid_new(idr, client);
+ }
+
+ if (client_id == REQID_INVALID) {
+ return EINVAL;
+ }
+
+ if (client_id == 0) {
+ /* Every other ID must have been used and we can't use 0 */
+ reqid_remove(idr, 0);
+ return EINVAL;
+ }
+
+ *out = client_id;
+ return 0;
+}
+
+static void ctdb_accept_client(struct tevent_context *ev,
+ struct tevent_fd *fde, uint16_t flags,
+ void *private_data)
+{
+ struct sockaddr_un addr;
+ socklen_t len;
+ int fd;
+ struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
+ struct ctdb_client *client;
+ struct ctdb_client_pid_list *client_pid;
+ pid_t peer_pid = 0;
+ int ret;
+
+ memset(&addr, 0, sizeof(addr));
+ len = sizeof(addr);
+ fd = accept(ctdb->daemon.sd, (struct sockaddr *)&addr, &len);
+ if (fd == -1) {
+ return;
+ }
+ smb_set_close_on_exec(fd);
+
+ ret = set_blocking(fd, false);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ (__location__
+ " failed to set socket non-blocking (%s)\n",
+ strerror(errno)));
+ close(fd);
+ return;
+ }
+
+ set_close_on_exec(fd);
+
+ DEBUG(DEBUG_DEBUG,(__location__ " Created SOCKET FD:%d to connected child\n", fd));
+
+ client = talloc_zero(ctdb, struct ctdb_client);
+ if (ctdb_get_peer_pid(fd, &peer_pid) == 0) {
+ DEBUG(DEBUG_INFO,("Connected client with pid:%u\n", (unsigned)peer_pid));
+ }
+
+ client->ctdb = ctdb;
+ client->fd = fd;
+
+ ret = get_new_client_id(ctdb->idr, client, &client->client_id);
+ if (ret != 0) {
+ DBG_ERR("Unable to get client ID (%d)\n", ret);
+ close(fd);
+ talloc_free(client);
+ return;
+ }
+
+ client->pid = peer_pid;
+
+ client_pid = talloc(client, struct ctdb_client_pid_list);
+ if (client_pid == NULL) {
+ DEBUG(DEBUG_ERR,("Failed to allocate client pid structure\n"));
+ close(fd);
+ talloc_free(client);
+ return;
+ }
+ client_pid->ctdb = ctdb;
+ client_pid->pid = peer_pid;
+ client_pid->client = client;
+
+ DLIST_ADD(ctdb->client_pids, client_pid);
+
+ client->queue = ctdb_queue_setup(ctdb, client, fd, CTDB_DS_ALIGNMENT,
+ ctdb_daemon_read_cb, client,
+ "client-%u", client->pid);
+
+ talloc_set_destructor(client, ctdb_client_destructor);
+ talloc_set_destructor(client_pid, ctdb_clientpid_destructor);
+ ctdb->num_clients++;
+}
+
+
+
+/*
+ * Create a unix domain socket, bind it, secure it and listen. Return
+ * the file descriptor for the socket.
+ */
+static int ux_socket_bind(struct ctdb_context *ctdb, bool test_mode_enabled)
+{
+ struct sockaddr_un addr = { .sun_family = AF_UNIX };
+ int ret;
+
+ ctdb->daemon.sd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (ctdb->daemon.sd == -1) {
+ return -1;
+ }
+
+ strncpy(addr.sun_path, ctdb->daemon.name, sizeof(addr.sun_path)-1);
+
+ if (! sock_clean(ctdb->daemon.name)) {
+ return -1;
+ }
+
+ set_close_on_exec(ctdb->daemon.sd);
+
+ ret = set_blocking(ctdb->daemon.sd, false);
+ if (ret != 0) {
+ DBG_ERR("Failed to set socket non-blocking (%s)\n",
+ strerror(errno));
+ goto failed;
+ }
+
+ ret = bind(ctdb->daemon.sd, (struct sockaddr *)&addr, sizeof(addr));
+ if (ret == -1) {
+ D_ERR("Unable to bind on ctdb socket '%s'\n", ctdb->daemon.name);
+ goto failed;
+ }
+
+ if (!test_mode_enabled) {
+ ret = chown(ctdb->daemon.name, geteuid(), getegid());
+ if (ret != 0 && !test_mode_enabled) {
+ D_ERR("Unable to secure (chown) ctdb socket '%s'\n",
+ ctdb->daemon.name);
+ goto failed;
+ }
+ }
+
+ ret = chmod(ctdb->daemon.name, 0700);
+ if (ret != 0) {
+ D_ERR("Unable to secure (chmod) ctdb socket '%s'\n",
+ ctdb->daemon.name);
+ goto failed;
+ }
+
+
+ ret = listen(ctdb->daemon.sd, 100);
+ if (ret != 0) {
+ D_ERR("Unable to listen on ctdb socket '%s'\n",
+ ctdb->daemon.name);
+ goto failed;
+ }
+
+ D_NOTICE("Listening to ctdb socket %s\n", ctdb->daemon.name);
+ return 0;
+
+failed:
+ close(ctdb->daemon.sd);
+ ctdb->daemon.sd = -1;
+ return -1;
+}
+
+struct ctdb_node *ctdb_find_node(struct ctdb_context *ctdb, uint32_t pnn)
+{
+ struct ctdb_node *node = NULL;
+ unsigned int i;
+
+ if (pnn == CTDB_CURRENT_NODE) {
+ pnn = ctdb->pnn;
+ }
+
+ /* Always found: PNN correctly set just before this is called */
+ for (i = 0; i < ctdb->num_nodes; i++) {
+ node = ctdb->nodes[i];
+ if (pnn == node->pnn) {
+ return node;
+ }
+ }
+
+ return NULL;
+}
+
+static void initialise_node_flags (struct ctdb_context *ctdb)
+{
+ struct ctdb_node *node = NULL;
+
+ node = ctdb_find_node(ctdb, CTDB_CURRENT_NODE);
+ /*
+ * PNN correctly set just before this is called so always
+ * found but keep static analysers happy...
+ */
+ if (node == NULL) {
+ DBG_ERR("Unable to find current node\n");
+ return;
+ }
+
+ node->flags &= ~NODE_FLAGS_DISCONNECTED;
+
+ /* do we start out in DISABLED mode? */
+ if (ctdb->start_as_disabled != 0) {
+ D_ERR("This node is configured to start in DISABLED state\n");
+ node->flags |= NODE_FLAGS_PERMANENTLY_DISABLED;
+ }
+ /* do we start out in STOPPED mode? */
+ if (ctdb->start_as_stopped != 0) {
+ D_ERR("This node is configured to start in STOPPED state\n");
+ node->flags |= NODE_FLAGS_STOPPED;
+ }
+}
+
+static void ctdb_setup_event_callback(struct ctdb_context *ctdb, int status,
+ void *private_data)
+{
+ if (status != 0) {
+ ctdb_die(ctdb, "Failed to run setup event");
+ }
+ ctdb_run_notification_script(ctdb, "setup");
+
+ /* Start the recovery daemon */
+ if (ctdb_start_recoverd(ctdb) != 0) {
+ DEBUG(DEBUG_ALERT,("Failed to start recovery daemon\n"));
+ exit(11);
+ }
+
+ ctdb_start_periodic_events(ctdb);
+
+ ctdb_wait_for_first_recovery(ctdb);
+}
+
+static struct timeval tevent_before_wait_ts;
+static struct timeval tevent_after_wait_ts;
+
+static void ctdb_tevent_trace_init(void)
+{
+ struct timeval now;
+
+ now = timeval_current();
+
+ tevent_before_wait_ts = now;
+ tevent_after_wait_ts = now;
+}
+
+static void ctdb_tevent_trace(enum tevent_trace_point tp,
+ void *private_data)
+{
+ struct timeval diff;
+ struct timeval now;
+ struct ctdb_context *ctdb =
+ talloc_get_type(private_data, struct ctdb_context);
+
+ if (getpid() != ctdb->ctdbd_pid) {
+ return;
+ }
+
+ now = timeval_current();
+
+ switch (tp) {
+ case TEVENT_TRACE_BEFORE_WAIT:
+ diff = timeval_until(&tevent_after_wait_ts, &now);
+ if (diff.tv_sec > 3) {
+ DEBUG(DEBUG_ERR,
+ ("Handling event took %ld seconds!\n",
+ (long)diff.tv_sec));
+ }
+ tevent_before_wait_ts = now;
+ break;
+
+ case TEVENT_TRACE_AFTER_WAIT:
+ diff = timeval_until(&tevent_before_wait_ts, &now);
+ if (diff.tv_sec > 3) {
+ DEBUG(DEBUG_ERR,
+ ("No event for %ld seconds!\n",
+ (long)diff.tv_sec));
+ }
+ tevent_after_wait_ts = now;
+ break;
+
+ default:
+ /* Do nothing for future tevent trace points */ ;
+ }
+}
+
+static void ctdb_remove_pidfile(void)
+{
+ TALLOC_FREE(ctdbd_pidfile_ctx);
+}
+
+static void ctdb_create_pidfile(TALLOC_CTX *mem_ctx)
+{
+ if (ctdbd_pidfile != NULL) {
+ int ret = pidfile_context_create(mem_ctx, ctdbd_pidfile,
+ &ctdbd_pidfile_ctx);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Failed to create PID file %s\n",
+ ctdbd_pidfile));
+ exit(11);
+ }
+
+ DEBUG(DEBUG_NOTICE, ("Created PID file %s\n", ctdbd_pidfile));
+ atexit(ctdb_remove_pidfile);
+ }
+}
+
+static void ctdb_initialise_vnn_map(struct ctdb_context *ctdb)
+{
+ unsigned int i, j, count;
+
+ /* initialize the vnn mapping table, skipping any deleted nodes */
+ ctdb->vnn_map = talloc(ctdb, struct ctdb_vnn_map);
+ CTDB_NO_MEMORY_FATAL(ctdb, ctdb->vnn_map);
+
+ count = 0;
+ for (i = 0; i < ctdb->num_nodes; i++) {
+ if ((ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) == 0) {
+ count++;
+ }
+ }
+
+ ctdb->vnn_map->generation = INVALID_GENERATION;
+ ctdb->vnn_map->size = count;
+ ctdb->vnn_map->map = talloc_array(ctdb->vnn_map, uint32_t, ctdb->vnn_map->size);
+ CTDB_NO_MEMORY_FATAL(ctdb, ctdb->vnn_map->map);
+
+ for(i=0, j=0; i < ctdb->vnn_map->size; i++) {
+ if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
+ continue;
+ }
+ ctdb->vnn_map->map[j] = i;
+ j++;
+ }
+}
+
+static void ctdb_set_my_pnn(struct ctdb_context *ctdb)
+{
+ if (ctdb->address == NULL) {
+ ctdb_fatal(ctdb,
+ "Can not determine PNN - node address is not set\n");
+ }
+
+ ctdb->pnn = ctdb_ip_to_pnn(ctdb, ctdb->address);
+ if (ctdb->pnn == CTDB_UNKNOWN_PNN) {
+ ctdb_fatal(ctdb,
+ "Can not determine PNN - unknown node address\n");
+ }
+
+ D_NOTICE("PNN is %u\n", ctdb->pnn);
+}
+
+static void stdin_handler(struct tevent_context *ev,
+ struct tevent_fd *fde,
+ uint16_t flags,
+ void *private_data)
+{
+ struct ctdb_context *ctdb = talloc_get_type_abort(
+ private_data, struct ctdb_context);
+ ssize_t nread;
+ char c;
+
+ nread = read(STDIN_FILENO, &c, 1);
+ if (nread != 1) {
+ D_ERR("stdin closed, exiting\n");
+ talloc_free(fde);
+ ctdb_shutdown_sequence(ctdb, EPIPE);
+ }
+}
+
+static int setup_stdin_handler(struct ctdb_context *ctdb)
+{
+ struct tevent_fd *fde;
+ struct stat st;
+ int ret;
+
+ ret = fstat(STDIN_FILENO, &st);
+ if (ret != 0) {
+ /* Problem with stdin, ignore... */
+ DBG_INFO("Can't fstat() stdin\n");
+ return 0;
+ }
+
+ if (!S_ISFIFO(st.st_mode)) {
+ DBG_INFO("Not a pipe...\n");
+ return 0;
+ }
+
+ fde = tevent_add_fd(ctdb->ev,
+ ctdb,
+ STDIN_FILENO,
+ TEVENT_FD_READ,
+ stdin_handler,
+ ctdb);
+ if (fde == NULL) {
+ return ENOMEM;
+ }
+
+ DBG_INFO("Set up stdin handler\n");
+ return 0;
+}
+
+static void fork_only(void)
+{
+ pid_t pid;
+
+ pid = fork();
+ if (pid == -1) {
+ D_ERR("Fork failed (errno=%d)\n", errno);
+ exit(1);
+ }
+
+ if (pid != 0) {
+ /* Parent simply exits... */
+ exit(0);
+ }
+}
+
+static void sighup_hook(void *private_data)
+{
+ struct ctdb_context *ctdb = talloc_get_type_abort(private_data,
+ struct ctdb_context);
+
+ if (ctdb->recoverd_pid > 0) {
+ kill(ctdb->recoverd_pid, SIGHUP);
+ }
+ ctdb_event_reopen_logs(ctdb);
+}
+
+/*
+ start the protocol going as a daemon
+*/
+int ctdb_start_daemon(struct ctdb_context *ctdb,
+ bool interactive,
+ bool test_mode_enabled)
+{
+ bool status;
+ int ret;
+ struct tevent_fd *fde;
+
+ /* Fork if not interactive */
+ if (!interactive) {
+ if (test_mode_enabled) {
+ /* Keep stdin open */
+ fork_only();
+ } else {
+ /* Fork, close stdin, start a session */
+ become_daemon(true, false, false);
+ }
+ }
+
+ ignore_signal(SIGPIPE);
+ ignore_signal(SIGUSR1);
+
+ ctdb->ctdbd_pid = getpid();
+ DEBUG(DEBUG_ERR, ("Starting CTDBD (Version %s) as PID: %u\n",
+ SAMBA_VERSION_STRING, ctdb->ctdbd_pid));
+ ctdb_create_pidfile(ctdb);
+
+ /* create a unix domain stream socket to listen to */
+ ret = ux_socket_bind(ctdb, test_mode_enabled);
+ if (ret != 0) {
+ D_ERR("Cannot continue. Exiting!\n");
+ exit(10);
+ }
+
+ /* Make sure we log something when the daemon terminates.
+ * This must be the first exit handler to run (so the last to
+ * be registered.
+ */
+ __ctdbd_pid = getpid();
+ atexit(print_exit_message);
+
+ if (ctdb->do_setsched) {
+ /* try to set us up as realtime */
+ if (!set_scheduler()) {
+ exit(1);
+ }
+ DEBUG(DEBUG_NOTICE, ("Set real-time scheduler priority\n"));
+ }
+
+ ctdb->ev = tevent_context_init(NULL);
+ if (ctdb->ev == NULL) {
+ DEBUG(DEBUG_ALERT,("tevent_context_init() failed\n"));
+ exit(1);
+ }
+ tevent_loop_allow_nesting(ctdb->ev);
+ ctdb_tevent_trace_init();
+ tevent_set_trace_callback(ctdb->ev, ctdb_tevent_trace, ctdb);
+
+ status = logging_setup_sighup_handler(ctdb->ev,
+ ctdb,
+ sighup_hook,
+ ctdb);
+ if (!status) {
+ D_ERR("Failed to set up signal handler for SIGHUP\n");
+ exit(1);
+ }
+
+ /* set up a handler to pick up sigchld */
+ if (ctdb_init_sigchld(ctdb) == NULL) {
+ DEBUG(DEBUG_CRIT,("Failed to set up signal handler for SIGCHLD\n"));
+ exit(1);
+ }
+
+ if (!interactive) {
+ ctdb_set_child_logging(ctdb);
+ }
+
+ /* Exit if stdin is closed */
+ if (test_mode_enabled) {
+ ret = setup_stdin_handler(ctdb);
+ if (ret != 0) {
+ DBG_ERR("Failed to setup stdin handler\n");
+ exit(1);
+ }
+ }
+
+ TALLOC_FREE(ctdb->srv);
+ if (srvid_init(ctdb, &ctdb->srv) != 0) {
+ DEBUG(DEBUG_CRIT,("Failed to setup message srvid context\n"));
+ exit(1);
+ }
+
+ TALLOC_FREE(ctdb->tunnels);
+ if (srvid_init(ctdb, &ctdb->tunnels) != 0) {
+ DEBUG(DEBUG_ERR, ("Failed to setup tunnels context\n"));
+ exit(1);
+ }
+
+ /* initialize statistics collection */
+ ctdb_statistics_init(ctdb);
+
+ /* force initial recovery for election */
+ ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
+
+ if (ctdb_start_eventd(ctdb) != 0) {
+ DEBUG(DEBUG_ERR, ("Failed to start event daemon\n"));
+ exit(1);
+ }
+
+ ctdb_set_runstate(ctdb, CTDB_RUNSTATE_INIT);
+ ret = ctdb_event_script(ctdb, CTDB_EVENT_INIT);
+ if (ret != 0) {
+ ctdb_die(ctdb, "Failed to run init event\n");
+ }
+ ctdb_run_notification_script(ctdb, "init");
+
+ if (strcmp(ctdb->transport, "tcp") == 0) {
+ ret = ctdb_tcp_init(ctdb);
+ }
+#ifdef USE_INFINIBAND
+ if (strcmp(ctdb->transport, "ib") == 0) {
+ ret = ctdb_ibw_init(ctdb);
+ }
+#endif
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,("Failed to initialise transport '%s'\n", ctdb->transport));
+ return -1;
+ }
+
+ if (ctdb->methods == NULL) {
+ DEBUG(DEBUG_ALERT,(__location__ " Can not initialize transport. ctdb->methods is NULL\n"));
+ ctdb_fatal(ctdb, "transport is unavailable. can not initialize.");
+ }
+
+ /* Initialise the transport. This sets the node address if it
+ * was not set via the command-line. */
+ if (ctdb->methods->initialise(ctdb) != 0) {
+ ctdb_fatal(ctdb, "transport failed to initialise");
+ }
+
+ ctdb_set_my_pnn(ctdb);
+
+ initialise_node_flags(ctdb);
+
+ ret = ctdb_set_public_addresses(ctdb, true);
+ if (ret == -1) {
+ D_ERR("Unable to setup public IP addresses\n");
+ exit(1);
+ }
+
+ ctdb_initialise_vnn_map(ctdb);
+
+ /* attach to existing databases */
+ if (ctdb_attach_databases(ctdb) != 0) {
+ ctdb_fatal(ctdb, "Failed to attach to databases\n");
+ }
+
+ /* start frozen, then let the first election sort things out */
+ if (!ctdb_blocking_freeze(ctdb)) {
+ ctdb_fatal(ctdb, "Failed to get initial freeze\n");
+ }
+
+ /* now start accepting clients, only can do this once frozen */
+ fde = tevent_add_fd(ctdb->ev, ctdb, ctdb->daemon.sd, TEVENT_FD_READ,
+ ctdb_accept_client, ctdb);
+ if (fde == NULL) {
+ ctdb_fatal(ctdb, "Failed to add daemon socket to event loop");
+ }
+ tevent_fd_set_auto_close(fde);
+
+ /* Start the transport */
+ if (ctdb->methods->start(ctdb) != 0) {
+ DEBUG(DEBUG_ALERT,("transport failed to start!\n"));
+ ctdb_fatal(ctdb, "transport failed to start");
+ }
+
+ /* Recovery daemon and timed events are started from the
+ * callback, only after the setup event completes
+ * successfully.
+ */
+ ctdb_set_runstate(ctdb, CTDB_RUNSTATE_SETUP);
+ ret = ctdb_event_script_callback(ctdb,
+ ctdb,
+ ctdb_setup_event_callback,
+ ctdb,
+ CTDB_EVENT_SETUP,
+ "%s",
+ "");
+ if (ret != 0) {
+ DEBUG(DEBUG_CRIT,("Failed to set up 'setup' event\n"));
+ exit(1);
+ }
+
+ lockdown_memory(ctdb->valgrinding);
+
+ /* go into a wait loop to allow other nodes to complete */
+ tevent_loop_wait(ctdb->ev);
+
+ DEBUG(DEBUG_CRIT,("event_loop_wait() returned. this should not happen\n"));
+ exit(1);
+}
+
+/*
+ allocate a packet for use in daemon<->daemon communication
+ */
+struct ctdb_req_header *_ctdb_transport_allocate(struct ctdb_context *ctdb,
+ TALLOC_CTX *mem_ctx,
+ enum ctdb_operation operation,
+ size_t length, size_t slength,
+ const char *type)
+{
+ int size;
+ struct ctdb_req_header *hdr;
+
+ length = MAX(length, slength);
+ size = (length+(CTDB_DS_ALIGNMENT-1)) & ~(CTDB_DS_ALIGNMENT-1);
+
+ if (ctdb->methods == NULL) {
+ DEBUG(DEBUG_INFO,(__location__ " Unable to allocate transport packet for operation %u of length %u. Transport is DOWN.\n",
+ operation, (unsigned)length));
+ return NULL;
+ }
+
+ hdr = (struct ctdb_req_header *)ctdb->methods->allocate_pkt(mem_ctx, size);
+ if (hdr == NULL) {
+ DEBUG(DEBUG_ERR,("Unable to allocate transport packet for operation %u of length %u\n",
+ operation, (unsigned)length));
+ return NULL;
+ }
+ talloc_set_name_const(hdr, type);
+ memset(hdr, 0, slength);
+ hdr->length = length;
+ hdr->operation = operation;
+ hdr->ctdb_magic = CTDB_MAGIC;
+ hdr->ctdb_version = CTDB_PROTOCOL;
+ hdr->generation = ctdb->vnn_map->generation;
+ hdr->srcnode = ctdb->pnn;
+
+ return hdr;
+}
+
+struct daemon_control_state {
+ struct daemon_control_state *next, *prev;
+ struct ctdb_client *client;
+ struct ctdb_req_control_old *c;
+ uint32_t reqid;
+ struct ctdb_node *node;
+};
+
+/*
+ callback when a control reply comes in
+ */
+static void daemon_control_callback(struct ctdb_context *ctdb,
+ int32_t status, TDB_DATA data,
+ const char *errormsg,
+ void *private_data)
+{
+ struct daemon_control_state *state = talloc_get_type(private_data,
+ struct daemon_control_state);
+ struct ctdb_client *client = state->client;
+ struct ctdb_reply_control_old *r;
+ size_t len;
+ int ret;
+
+ /* construct a message to send to the client containing the data */
+ len = offsetof(struct ctdb_reply_control_old, data) + data.dsize;
+ if (errormsg) {
+ len += strlen(errormsg);
+ }
+ r = ctdbd_allocate_pkt(ctdb, state, CTDB_REPLY_CONTROL, len,
+ struct ctdb_reply_control_old);
+ CTDB_NO_MEMORY_VOID(ctdb, r);
+
+ r->hdr.reqid = state->reqid;
+ r->status = status;
+ r->datalen = data.dsize;
+ r->errorlen = 0;
+ memcpy(&r->data[0], data.dptr, data.dsize);
+ if (errormsg) {
+ r->errorlen = strlen(errormsg);
+ memcpy(&r->data[r->datalen], errormsg, r->errorlen);
+ }
+
+ ret = daemon_queue_send(client, &r->hdr);
+ if (ret != -1) {
+ talloc_free(state);
+ }
+}
+
+/*
+ fail all pending controls to a disconnected node
+ */
+void ctdb_daemon_cancel_controls(struct ctdb_context *ctdb, struct ctdb_node *node)
+{
+ struct daemon_control_state *state;
+ while ((state = node->pending_controls)) {
+ DLIST_REMOVE(node->pending_controls, state);
+ daemon_control_callback(ctdb, (uint32_t)-1, tdb_null,
+ "node is disconnected", state);
+ }
+}
+
+/*
+ destroy a daemon_control_state
+ */
+static int daemon_control_destructor(struct daemon_control_state *state)
+{
+ if (state->node) {
+ DLIST_REMOVE(state->node->pending_controls, state);
+ }
+ return 0;
+}
+
+/*
+ this is called when the ctdb daemon received a ctdb request control
+ from a local client over the unix domain socket
+ */
+static void daemon_request_control_from_client(struct ctdb_client *client,
+ struct ctdb_req_control_old *c)
+{
+ TDB_DATA data;
+ int res;
+ struct daemon_control_state *state;
+ TALLOC_CTX *tmp_ctx = talloc_new(client);
+
+ if (c->hdr.destnode == CTDB_CURRENT_NODE) {
+ c->hdr.destnode = client->ctdb->pnn;
+ }
+
+ state = talloc(client, struct daemon_control_state);
+ CTDB_NO_MEMORY_VOID(client->ctdb, state);
+
+ state->client = client;
+ state->c = talloc_steal(state, c);
+ state->reqid = c->hdr.reqid;
+ if (ctdb_validate_pnn(client->ctdb, c->hdr.destnode)) {
+ state->node = client->ctdb->nodes[c->hdr.destnode];
+ DLIST_ADD(state->node->pending_controls, state);
+ } else {
+ state->node = NULL;
+ }
+
+ talloc_set_destructor(state, daemon_control_destructor);
+
+ if (c->flags & CTDB_CTRL_FLAG_NOREPLY) {
+ talloc_steal(tmp_ctx, state);
+ }
+
+ data.dptr = &c->data[0];
+ data.dsize = c->datalen;
+ res = ctdb_daemon_send_control(client->ctdb, c->hdr.destnode,
+ c->srvid, c->opcode, client->client_id,
+ c->flags,
+ data, daemon_control_callback,
+ state);
+ if (res != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to send control to remote node %u\n",
+ c->hdr.destnode));
+ }
+
+ talloc_free(tmp_ctx);
+}
+
+static void daemon_request_tunnel_from_client(struct ctdb_client *client,
+ struct ctdb_req_tunnel_old *c)
+{
+ TDB_DATA data;
+ int ret;
+
+ if (! ctdb_validate_pnn(client->ctdb, c->hdr.destnode)) {
+ DEBUG(DEBUG_ERR, ("Invalid destination 0x%x\n",
+ c->hdr.destnode));
+ return;
+ }
+
+ ret = srvid_exists(client->ctdb->tunnels, c->tunnel_id, NULL);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("tunnel id 0x%"PRIx64" not registered, dropping pkt\n",
+ c->tunnel_id));
+ return;
+ }
+
+ data = (TDB_DATA) {
+ .dsize = c->datalen,
+ .dptr = &c->data[0],
+ };
+
+ ret = ctdb_daemon_send_tunnel(client->ctdb, c->hdr.destnode,
+ c->tunnel_id, c->flags, data);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("Failed to set tunnel to remote note %u\n",
+ c->hdr.destnode));
+ }
+}
+
+/*
+ register a call function
+*/
+int ctdb_daemon_set_call(struct ctdb_context *ctdb, uint32_t db_id,
+ ctdb_fn_t fn, int id)
+{
+ struct ctdb_registered_call *call;
+ struct ctdb_db_context *ctdb_db;
+
+ ctdb_db = find_ctdb_db(ctdb, db_id);
+ if (ctdb_db == NULL) {
+ return -1;
+ }
+
+ call = talloc(ctdb_db, struct ctdb_registered_call);
+ call->fn = fn;
+ call->id = id;
+
+ DLIST_ADD(ctdb_db->calls, call);
+ return 0;
+}
+
+
+
+/*
+ this local messaging handler is ugly, but is needed to prevent
+ recursion in ctdb_send_message() when the destination node is the
+ same as the source node
+ */
+struct ctdb_local_message {
+ struct ctdb_context *ctdb;
+ uint64_t srvid;
+ TDB_DATA data;
+};
+
+static void ctdb_local_message_trigger(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_local_message *m = talloc_get_type(
+ private_data, struct ctdb_local_message);
+
+ srvid_dispatch(m->ctdb->srv, m->srvid, CTDB_SRVID_ALL, m->data);
+ talloc_free(m);
+}
+
+static int ctdb_local_message(struct ctdb_context *ctdb, uint64_t srvid, TDB_DATA data)
+{
+ struct ctdb_local_message *m;
+ m = talloc(ctdb, struct ctdb_local_message);
+ CTDB_NO_MEMORY(ctdb, m);
+
+ m->ctdb = ctdb;
+ m->srvid = srvid;
+ m->data = data;
+ m->data.dptr = talloc_memdup(m, m->data.dptr, m->data.dsize);
+ if (m->data.dptr == NULL) {
+ talloc_free(m);
+ return -1;
+ }
+
+ /* this needs to be done as an event to prevent recursion */
+ tevent_add_timer(ctdb->ev, m, timeval_zero(),
+ ctdb_local_message_trigger, m);
+ return 0;
+}
+
+/*
+ send a ctdb message
+*/
+int ctdb_daemon_send_message(struct ctdb_context *ctdb, uint32_t pnn,
+ uint64_t srvid, TDB_DATA data)
+{
+ struct ctdb_req_message_old *r;
+ int len;
+
+ if (ctdb->methods == NULL) {
+ DEBUG(DEBUG_INFO,(__location__ " Failed to send message. Transport is DOWN\n"));
+ return -1;
+ }
+
+ /* see if this is a message to ourselves */
+ if (pnn == ctdb->pnn) {
+ return ctdb_local_message(ctdb, srvid, data);
+ }
+
+ len = offsetof(struct ctdb_req_message_old, data) + data.dsize;
+ r = ctdb_transport_allocate(ctdb, ctdb, CTDB_REQ_MESSAGE, len,
+ struct ctdb_req_message_old);
+ CTDB_NO_MEMORY(ctdb, r);
+
+ r->hdr.destnode = pnn;
+ r->srvid = srvid;
+ r->datalen = data.dsize;
+ memcpy(&r->data[0], data.dptr, data.dsize);
+
+ ctdb_queue_packet(ctdb, &r->hdr);
+
+ talloc_free(r);
+ return 0;
+}
+
+
+
+struct ctdb_client_notify_list {
+ struct ctdb_client_notify_list *next, *prev;
+ struct ctdb_context *ctdb;
+ uint64_t srvid;
+ TDB_DATA data;
+};
+
+
+static int ctdb_client_notify_destructor(struct ctdb_client_notify_list *nl)
+{
+ int ret;
+
+ DEBUG(DEBUG_ERR,("Sending client notify message for srvid:%llu\n", (unsigned long long)nl->srvid));
+
+ ret = ctdb_daemon_send_message(nl->ctdb, CTDB_BROADCAST_CONNECTED, (unsigned long long)nl->srvid, nl->data);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,("Failed to send client notify message\n"));
+ }
+
+ return 0;
+}
+
+int32_t ctdb_control_register_notify(struct ctdb_context *ctdb, uint32_t client_id, TDB_DATA indata)
+{
+ struct ctdb_notify_data_old *notify = (struct ctdb_notify_data_old *)indata.dptr;
+ struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
+ struct ctdb_client_notify_list *nl;
+
+ DEBUG(DEBUG_INFO,("Register srvid %llu for client %d\n", (unsigned long long)notify->srvid, client_id));
+
+ if (indata.dsize < offsetof(struct ctdb_notify_data_old, notify_data)) {
+ DEBUG(DEBUG_ERR,(__location__ " Too little data in control : %d\n", (int)indata.dsize));
+ return -1;
+ }
+
+ if (indata.dsize != (notify->len + offsetof(struct ctdb_notify_data_old, notify_data))) {
+ DEBUG(DEBUG_ERR,(__location__ " Wrong amount of data in control. Got %d, expected %d\n", (int)indata.dsize, (int)(notify->len + offsetof(struct ctdb_notify_data_old, notify_data))));
+ return -1;
+ }
+
+
+ if (client == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Could not find client parent structure. You can not send this control to a remote node\n"));
+ return -1;
+ }
+
+ for(nl=client->notify; nl; nl=nl->next) {
+ if (nl->srvid == notify->srvid) {
+ break;
+ }
+ }
+ if (nl != NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Notification for srvid:%llu already exists for this client\n", (unsigned long long)notify->srvid));
+ return -1;
+ }
+
+ nl = talloc(client, struct ctdb_client_notify_list);
+ CTDB_NO_MEMORY(ctdb, nl);
+ nl->ctdb = ctdb;
+ nl->srvid = notify->srvid;
+ nl->data.dsize = notify->len;
+ nl->data.dptr = talloc_memdup(nl, notify->notify_data,
+ nl->data.dsize);
+ CTDB_NO_MEMORY(ctdb, nl->data.dptr);
+
+ DLIST_ADD(client->notify, nl);
+ talloc_set_destructor(nl, ctdb_client_notify_destructor);
+
+ return 0;
+}
+
+int32_t ctdb_control_deregister_notify(struct ctdb_context *ctdb, uint32_t client_id, TDB_DATA indata)
+{
+ uint64_t srvid = *(uint64_t *)indata.dptr;
+ struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
+ struct ctdb_client_notify_list *nl;
+
+ DEBUG(DEBUG_INFO,("Deregister srvid %llu for client %d\n", (unsigned long long)srvid, client_id));
+
+ if (client == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Could not find client parent structure. You can not send this control to a remote node\n"));
+ return -1;
+ }
+
+ for(nl=client->notify; nl; nl=nl->next) {
+ if (nl->srvid == srvid) {
+ break;
+ }
+ }
+ if (nl == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " No notification for srvid:%llu found for this client\n", (unsigned long long)srvid));
+ return -1;
+ }
+
+ DLIST_REMOVE(client->notify, nl);
+ talloc_set_destructor(nl, NULL);
+ talloc_free(nl);
+
+ return 0;
+}
+
+struct ctdb_client *ctdb_find_client_by_pid(struct ctdb_context *ctdb, pid_t pid)
+{
+ struct ctdb_client_pid_list *client_pid;
+
+ for (client_pid = ctdb->client_pids; client_pid; client_pid=client_pid->next) {
+ if (client_pid->pid == pid) {
+ return client_pid->client;
+ }
+ }
+ return NULL;
+}
+
+
+/* This control is used by samba when probing if a process (of a samba daemon)
+ exists on the node.
+ Samba does this when it needs/wants to check if a subrecord in one of the
+ databases is still valid, or if it is stale and can be removed.
+ If the node is in unhealthy or stopped state we just kill of the samba
+ process holding this sub-record and return to the calling samba that
+ the process does not exist.
+ This allows us to forcefully recall subrecords registered by samba processes
+ on banned and stopped nodes.
+*/
+int32_t ctdb_control_process_exists(struct ctdb_context *ctdb, pid_t pid)
+{
+ struct ctdb_client *client;
+
+ client = ctdb_find_client_by_pid(ctdb, pid);
+ if (client == NULL) {
+ return -1;
+ }
+
+ if (ctdb->nodes[ctdb->pnn]->flags & NODE_FLAGS_INACTIVE) {
+ DEBUG(DEBUG_NOTICE,
+ ("Killing client with pid:%d on banned/stopped node\n",
+ (int)pid));
+ talloc_free(client);
+ return -1;
+ }
+
+ return kill(pid, 0);
+}
+
+int32_t ctdb_control_check_pid_srvid(struct ctdb_context *ctdb,
+ TDB_DATA indata)
+{
+ struct ctdb_client_pid_list *client_pid;
+ pid_t pid;
+ uint64_t srvid;
+ int ret;
+
+ pid = *(pid_t *)indata.dptr;
+ srvid = *(uint64_t *)(indata.dptr + sizeof(pid_t));
+
+ for (client_pid = ctdb->client_pids;
+ client_pid != NULL;
+ client_pid = client_pid->next) {
+ if (client_pid->pid == pid) {
+ ret = srvid_exists(ctdb->srv, srvid,
+ client_pid->client);
+ if (ret == 0) {
+ return 0;
+ }
+ }
+ }
+
+ return -1;
+}
+
+int ctdb_control_getnodesfile(struct ctdb_context *ctdb, uint32_t opcode, TDB_DATA indata, TDB_DATA *outdata)
+{
+ struct ctdb_node_map_old *node_map = NULL;
+
+ CHECK_CONTROL_DATA_SIZE(0);
+
+ node_map = ctdb_read_nodes_file(ctdb, ctdb->nodes_file);
+ if (node_map == NULL) {
+ DEBUG(DEBUG_ERR, ("Failed to read nodes file\n"));
+ return -1;
+ }
+
+ outdata->dptr = (unsigned char *)node_map;
+ outdata->dsize = talloc_get_size(outdata->dptr);
+
+ return 0;
+}
+
+void ctdb_shutdown_sequence(struct ctdb_context *ctdb, int exit_code)
+{
+ if (ctdb->runstate == CTDB_RUNSTATE_SHUTDOWN) {
+ DEBUG(DEBUG_NOTICE,("Already shutting down so will not proceed.\n"));
+ return;
+ }
+
+ DEBUG(DEBUG_ERR,("Shutdown sequence commencing.\n"));
+ ctdb_set_runstate(ctdb, CTDB_RUNSTATE_SHUTDOWN);
+ ctdb_stop_recoverd(ctdb);
+ ctdb_stop_keepalive(ctdb);
+ ctdb_stop_monitoring(ctdb);
+ ctdb_event_script(ctdb, CTDB_EVENT_SHUTDOWN);
+ ctdb_stop_eventd(ctdb);
+ if (ctdb->methods != NULL && ctdb->methods->shutdown != NULL) {
+ ctdb->methods->shutdown(ctdb);
+ }
+
+ DEBUG(DEBUG_ERR,("Shutdown sequence complete, exiting.\n"));
+ exit(exit_code);
+}
+
+/* When forking the main daemon and the child process needs to connect
+ * back to the daemon as a client process, this function can be used
+ * to change the ctdb context from daemon into client mode. The child
+ * process must be created using ctdb_fork() and not fork() -
+ * ctdb_fork() does some necessary housekeeping.
+ */
+int switch_from_server_to_client(struct ctdb_context *ctdb)
+{
+ int ret;
+
+ if (ctdb->daemon.sd != -1) {
+ close(ctdb->daemon.sd);
+ ctdb->daemon.sd = -1;
+ }
+
+ /* get a new event context */
+ ctdb->ev = tevent_context_init(ctdb);
+ if (ctdb->ev == NULL) {
+ DEBUG(DEBUG_ALERT,("tevent_context_init() failed\n"));
+ exit(1);
+ }
+ tevent_loop_allow_nesting(ctdb->ev);
+
+ /* Connect to main CTDB daemon */
+ ret = ctdb_socket_connect(ctdb);
+ if (ret != 0) {
+ DEBUG(DEBUG_ALERT, (__location__ " Failed to init ctdb client\n"));
+ return -1;
+ }
+
+ ctdb->can_send_controls = true;
+
+ return 0;
+}
diff --git a/ctdb/server/ctdb_fork.c b/ctdb/server/ctdb_fork.c
new file mode 100644
index 0000000..1065423
--- /dev/null
+++ b/ctdb/server/ctdb_fork.c
@@ -0,0 +1,216 @@
+/*
+ functions to track and manage processes
+
+ Copyright (C) Ronnie Sahlberg 2012
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/wait.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/debug.h"
+#include "lib/util/time.h"
+
+#include "ctdb_private.h"
+#include "ctdb_client.h"
+
+#include "common/rb_tree.h"
+#include "common/system.h"
+#include "common/common.h"
+#include "common/logging.h"
+
+void ctdb_track_child(struct ctdb_context *ctdb, pid_t pid)
+{
+ char *process;
+
+ /* Only CTDB main daemon should track child processes */
+ if (getpid() != ctdb->ctdbd_pid) {
+ return;
+ }
+
+ process = talloc_asprintf(ctdb->child_processes, "process:%d", (int)pid);
+ trbt_insert32(ctdb->child_processes, pid, process);
+}
+
+/*
+ * This function forks a child process and drops the realtime
+ * scheduler for the child process.
+ */
+pid_t ctdb_fork(struct ctdb_context *ctdb)
+{
+ pid_t pid;
+ struct timeval before;
+ double delta_t;
+
+ before = timeval_current();
+
+ pid = fork();
+ if (pid == -1) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " fork() failed (%s)\n", strerror(errno)));
+ return -1;
+ }
+ if (pid == 0) {
+ /* Close the Unix Domain socket and the TCP socket.
+ * This ensures that none of the child processes will
+ * look like the main daemon when it is not running.
+ * tevent needs to be stopped before closing sockets.
+ */
+ if (ctdb->ev != NULL) {
+ talloc_free(ctdb->ev);
+ ctdb->ev = NULL;
+ }
+ if (ctdb->daemon.sd != -1) {
+ close(ctdb->daemon.sd);
+ ctdb->daemon.sd = -1;
+ }
+ if (ctdb->methods != NULL && ctdb->methods->shutdown != NULL) {
+ ctdb->methods->shutdown(ctdb);
+ }
+
+ /* The child does not need to be realtime */
+ if (ctdb->do_setsched) {
+ reset_scheduler();
+ }
+ ctdb->can_send_controls = false;
+
+ return 0;
+ }
+
+ delta_t = timeval_elapsed(&before);
+ if (delta_t > 3.0) {
+ DEBUG(DEBUG_WARNING, ("fork() took %lf seconds\n", delta_t));
+ }
+
+ ctdb_track_child(ctdb, pid);
+ return pid;
+}
+
+/*
+ * vfork + exec
+ */
+pid_t ctdb_vfork_exec(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ const char *helper, int helper_argc,
+ const char **helper_argv)
+{
+ pid_t pid;
+ struct timeval before;
+ double delta_t;
+ char **argv;
+ int i;
+
+ argv = talloc_array(mem_ctx, char *, helper_argc + 1);
+ if (argv == NULL) {
+ DEBUG(DEBUG_ERR, ("Memory allocation error\n"));
+ return -1;
+ }
+
+ argv[0] = discard_const(helper);
+ for (i=0; i<helper_argc; i++) {
+ argv[i+1] = discard_const(helper_argv[i]);
+ }
+
+ before = timeval_current();
+
+ pid = vfork();
+ if (pid == -1) {
+ DEBUG(DEBUG_ERR, ("vfork() failed (%s)\n", strerror(errno)));
+ return -1;
+ }
+
+ if (pid == 0) {
+ execv(helper, argv);
+ _exit(1);
+ }
+
+ delta_t = timeval_elapsed(&before);
+ if (delta_t > 3.0) {
+ DEBUG(DEBUG_WARNING, ("vfork() took %lf seconds\n", delta_t));
+ }
+
+ ctdb_track_child(ctdb, pid);
+ return pid;
+}
+
+static void ctdb_sigchld_handler(struct tevent_context *ev,
+ struct tevent_signal *te, int signum, int count,
+ void *dont_care,
+ void *private_data)
+{
+ struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
+ int status;
+ pid_t pid = -1;
+
+ while (pid != 0) {
+ pid = waitpid(-1, &status, WNOHANG);
+ if (pid == -1) {
+ DEBUG(DEBUG_ERR, (__location__ " waitpid() returned error. errno:%d\n", errno));
+ return;
+ }
+ if (pid > 0) {
+ char *process;
+
+ if (getpid() != ctdb->ctdbd_pid) {
+ continue;
+ }
+
+ process = trbt_lookup32(ctdb->child_processes, pid);
+ if (process == NULL) {
+ DEBUG(DEBUG_ERR,("Got SIGCHLD from pid:%d we didn not spawn with ctdb_fork\n", pid));
+ }
+
+ DEBUG(DEBUG_DEBUG, ("SIGCHLD from %d %s\n", (int)pid, process));
+ talloc_free(process);
+ }
+ }
+}
+
+
+struct tevent_signal *
+ctdb_init_sigchld(struct ctdb_context *ctdb)
+{
+ struct tevent_signal *se;
+
+ ctdb->child_processes = trbt_create(ctdb, 0);
+
+ se = tevent_add_signal(ctdb->ev, ctdb, SIGCHLD, 0, ctdb_sigchld_handler, ctdb);
+ return se;
+}
+
+int
+ctdb_kill(struct ctdb_context *ctdb, pid_t pid, int signum)
+{
+ char *process;
+
+ if (signum == 0) {
+ return kill(pid, signum);
+ }
+
+ if (getpid() != ctdb->ctdbd_pid) {
+ return kill(pid, signum);
+ }
+
+ process = trbt_lookup32(ctdb->child_processes, pid);
+ if (process == NULL) {
+ DEBUG(DEBUG_ERR,("ctdb_kill: trying to kill(%d, %d) a process that does not exist\n", pid, signum));
+ return 0;
+ }
+
+ return kill(pid, signum);
+}
diff --git a/ctdb/server/ctdb_freeze.c b/ctdb/server/ctdb_freeze.c
new file mode 100644
index 0000000..06aeacf
--- /dev/null
+++ b/ctdb/server/ctdb_freeze.c
@@ -0,0 +1,923 @@
+/*
+ ctdb freeze handling
+
+ Copyright (C) Andrew Tridgell 2007
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+#include "system/wait.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/tdb_wrap/tdb_wrap.h"
+#include "lib/util/dlinklist.h"
+#include "lib/util/debug.h"
+
+#include "ctdb_private.h"
+
+#include "common/rb_tree.h"
+#include "common/common.h"
+#include "common/logging.h"
+
+/**
+ * Cancel a transaction on database
+ */
+static int db_transaction_cancel_handler(struct ctdb_db_context *ctdb_db,
+ void *private_data)
+{
+ int ret;
+
+ tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
+ ret = tdb_transaction_cancel(ctdb_db->ltdb->tdb);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("Failed to cancel transaction for db %s\n",
+ ctdb_db->db_name));
+ }
+ tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
+ return 0;
+}
+
+/**
+ * Start a transaction on database
+ */
+static int db_transaction_start_handler(struct ctdb_db_context *ctdb_db,
+ void *private_data)
+{
+ bool freeze_transaction_started = *(bool *)private_data;
+ int ret;
+
+ tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
+ if (freeze_transaction_started) {
+ ret = tdb_transaction_cancel(ctdb_db->ltdb->tdb);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Failed to cancel transaction for db %s\n",
+ ctdb_db->db_name));
+ }
+ }
+ ret = tdb_transaction_start(ctdb_db->ltdb->tdb);
+ tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("Failed to start transaction for db %s\n",
+ ctdb_db->db_name));
+ return -1;
+ }
+ return 0;
+}
+
+/**
+ * Commit a transaction on database
+ */
+static int db_transaction_commit_handler(struct ctdb_db_context *ctdb_db,
+ void *private_data)
+{
+ unsigned int healthy_nodes = *(unsigned int *)private_data;
+ int ret;
+
+ tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
+ ret = tdb_transaction_commit(ctdb_db->ltdb->tdb);
+ tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("Failed to commit transaction for db %s\n",
+ ctdb_db->db_name));
+ return -1;
+ }
+
+ ret = ctdb_update_persistent_health(ctdb_db->ctdb, ctdb_db, NULL,
+ healthy_nodes);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("Failed to update persistent health for db %s\n",
+ ctdb_db->db_name));
+ }
+ return ret;
+}
+
+/* a list of control requests waiting for db freeze */
+struct ctdb_db_freeze_waiter {
+ struct ctdb_db_freeze_waiter *next, *prev;
+ struct ctdb_context *ctdb;
+ void *private_data;
+ int32_t status;
+};
+
+/* a handle to a db freeze lock child process */
+struct ctdb_db_freeze_handle {
+ struct ctdb_db_context *ctdb_db;
+ struct lock_request *lreq;
+ struct ctdb_db_freeze_waiter *waiters;
+};
+
+/**
+ * Called when freeing database freeze handle
+ */
+static int ctdb_db_freeze_handle_destructor(struct ctdb_db_freeze_handle *h)
+{
+ struct ctdb_db_context *ctdb_db = h->ctdb_db;
+
+ DEBUG(DEBUG_ERR, ("Release freeze handle for db %s\n",
+ ctdb_db->db_name));
+
+ /* Cancel any pending transactions */
+ if (ctdb_db->freeze_transaction_started) {
+ db_transaction_cancel_handler(ctdb_db, NULL);
+ ctdb_db->freeze_transaction_started = false;
+ }
+ ctdb_db->freeze_mode = CTDB_FREEZE_NONE;
+ ctdb_db->freeze_handle = NULL;
+
+ /* Clear invalid records flag */
+ ctdb_db->invalid_records = false;
+
+ talloc_free(h->lreq);
+ return 0;
+}
+
+/**
+ * Called when a database is frozen
+ */
+static void ctdb_db_freeze_handler(void *private_data, bool locked)
+{
+ struct ctdb_db_freeze_handle *h = talloc_get_type_abort(
+ private_data, struct ctdb_db_freeze_handle);
+ struct ctdb_db_freeze_waiter *w;
+
+ if (h->ctdb_db->freeze_mode == CTDB_FREEZE_FROZEN) {
+ DEBUG(DEBUG_ERR, ("Freeze db child died - unfreezing\n"));
+ h->ctdb_db->freeze_mode = CTDB_FREEZE_NONE;
+ talloc_free(h);
+ return;
+ }
+
+ if (!locked) {
+ DEBUG(DEBUG_ERR, ("Failed to get db lock for %s\n",
+ h->ctdb_db->db_name));
+ h->ctdb_db->freeze_mode = CTDB_FREEZE_NONE;
+ talloc_free(h);
+ return;
+ }
+
+ h->ctdb_db->freeze_mode = CTDB_FREEZE_FROZEN;
+
+ /* notify the waiters */
+ while ((w = h->waiters) != NULL) {
+ w->status = 0;
+ DLIST_REMOVE(h->waiters, w);
+ talloc_free(w);
+ }
+}
+
+/**
+ * Start freeze process for a database
+ */
+static void ctdb_start_db_freeze(struct ctdb_db_context *ctdb_db)
+{
+ struct ctdb_db_freeze_handle *h;
+
+ if (ctdb_db->freeze_mode == CTDB_FREEZE_FROZEN) {
+ return;
+ }
+
+ if (ctdb_db->freeze_handle != NULL) {
+ return;
+ }
+
+ DEBUG(DEBUG_ERR, ("Freeze db: %s\n", ctdb_db->db_name));
+
+ ctdb_stop_vacuuming(ctdb_db->ctdb);
+
+ h = talloc_zero(ctdb_db, struct ctdb_db_freeze_handle);
+ CTDB_NO_MEMORY_FATAL(ctdb_db->ctdb, h);
+
+ h->ctdb_db = ctdb_db;
+ h->lreq = ctdb_lock_db(h, ctdb_db, false, ctdb_db_freeze_handler, h);
+ CTDB_NO_MEMORY_FATAL(ctdb_db->ctdb, h->lreq);
+ talloc_set_destructor(h, ctdb_db_freeze_handle_destructor);
+
+ ctdb_db->freeze_handle = h;
+ ctdb_db->freeze_mode = CTDB_FREEZE_PENDING;
+}
+
+/**
+ * Reply to a waiter for db freeze
+ */
+static int ctdb_db_freeze_waiter_destructor(struct ctdb_db_freeze_waiter *w)
+{
+ /* 'c' pointer is talloc_memdup(), so cannot use talloc_get_type */
+ struct ctdb_req_control_old *c =
+ (struct ctdb_req_control_old *)w->private_data;
+
+ ctdb_request_control_reply(w->ctdb, c, NULL, w->status, NULL);
+ return 0;
+}
+
+/**
+ * freeze a database
+ */
+int32_t ctdb_control_db_freeze(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ uint32_t db_id,
+ bool *async_reply)
+{
+ struct ctdb_db_context *ctdb_db;
+ struct ctdb_db_freeze_waiter *w;
+
+ ctdb_db = find_ctdb_db(ctdb, db_id);
+ if (ctdb_db == NULL) {
+ DEBUG(DEBUG_ERR, ("Freeze db for unknown dbid 0x%08x\n", db_id));
+ return -1;
+ }
+
+ if (ctdb_db->freeze_mode == CTDB_FREEZE_FROZEN) {
+ DEBUG(DEBUG_ERR, ("Freeze db: %s frozen\n", ctdb_db->db_name));
+ return 0;
+ }
+
+ ctdb_start_db_freeze(ctdb_db);
+
+ /* add ourselves to the list of waiters */
+ w = talloc(ctdb_db->freeze_handle, struct ctdb_db_freeze_waiter);
+ CTDB_NO_MEMORY(ctdb, w);
+ w->ctdb = ctdb;
+ w->private_data = talloc_steal(w, c);
+ w->status = -1;
+ talloc_set_destructor(w, ctdb_db_freeze_waiter_destructor);
+ DLIST_ADD(ctdb_db->freeze_handle->waiters, w);
+
+ *async_reply = true;
+ return 0;
+}
+
+/**
+ * Thaw a database
+ */
+int32_t ctdb_control_db_thaw(struct ctdb_context *ctdb, uint32_t db_id)
+{
+ struct ctdb_db_context *ctdb_db;
+
+ ctdb_db = find_ctdb_db(ctdb, db_id);
+ if (ctdb_db == NULL) {
+ DEBUG(DEBUG_ERR, ("Thaw db for unknown dbid 0x%08x\n", db_id));
+ return -1;
+ }
+
+ DEBUG(DEBUG_ERR, ("Thaw db: %s generation %u\n", ctdb_db->db_name,
+ ctdb_db->generation));
+
+ TALLOC_FREE(ctdb_db->freeze_handle);
+ ctdb_call_resend_db(ctdb_db);
+ return 0;
+}
+
+
+/*
+ a list of control requests waiting for a freeze lock child to get
+ the database locks
+ */
+struct ctdb_freeze_waiter {
+ struct ctdb_freeze_waiter *next, *prev;
+ struct ctdb_context *ctdb;
+ struct ctdb_req_control_old *c;
+ int32_t status;
+};
+
+/* a handle to a freeze lock child process */
+struct ctdb_freeze_handle {
+ struct ctdb_context *ctdb;
+ unsigned int num_total, num_locked, num_failed;
+ struct ctdb_freeze_waiter *waiters;
+};
+
+static int db_thaw(struct ctdb_db_context *ctdb_db, void *private_data)
+{
+ talloc_free(ctdb_db->freeze_handle);
+ return 0;
+}
+
+/*
+ destroy a freeze handle
+ */
+static int ctdb_freeze_handle_destructor(struct ctdb_freeze_handle *h)
+{
+ struct ctdb_context *ctdb = h->ctdb;
+
+ DEBUG(DEBUG_ERR,("Release freeze handle\n"));
+
+ /* cancel any pending transactions */
+ if (ctdb->freeze_transaction_started) {
+ ctdb_db_iterator(ctdb, db_transaction_cancel_handler, NULL);
+ ctdb->freeze_transaction_started = false;
+ }
+
+ ctdb_db_iterator(ctdb, db_thaw, NULL);
+
+ ctdb->freeze_mode = CTDB_FREEZE_NONE;
+ ctdb->freeze_handle = NULL;
+
+ return 0;
+}
+
+/*
+ called when the child writes its status to us
+ */
+static void ctdb_freeze_lock_handler(void *private_data, bool locked)
+{
+ struct ctdb_freeze_handle *h = talloc_get_type_abort(private_data,
+ struct ctdb_freeze_handle);
+ struct ctdb_freeze_waiter *w;
+
+ if (h->ctdb->freeze_mode == CTDB_FREEZE_FROZEN) {
+ DEBUG(DEBUG_INFO,("freeze child died - unfreezing\n"));
+ talloc_free(h);
+ return;
+ }
+
+ if (!locked) {
+ DEBUG(DEBUG_ERR,("Failed to get locks in ctdb_freeze_child\n"));
+ /* we didn't get the locks - destroy the handle */
+ talloc_free(h);
+ return;
+ }
+
+ h->ctdb->freeze_mode = CTDB_FREEZE_FROZEN;
+
+ /* notify the waiters */
+ if (h != h->ctdb->freeze_handle) {
+ DEBUG(DEBUG_ERR,("lockwait finished but h is not linked\n"));
+ }
+ while ((w = h->waiters)) {
+ w->status = 0;
+ DLIST_REMOVE(h->waiters, w);
+ talloc_free(w);
+ }
+}
+
+/**
+ * When single database is frozen
+ */
+static int db_freeze_waiter_destructor(struct ctdb_db_freeze_waiter *w)
+{
+ struct ctdb_freeze_handle *h = talloc_get_type_abort(
+ w->private_data, struct ctdb_freeze_handle);
+
+ if (w->status == 0) {
+ h->num_locked += 1;
+ } else {
+ h->num_failed += 1;
+ }
+
+ /* Call ctdb_freeze_lock_handler() only when the status of all
+ * databases is known.
+ */
+ if (h->num_locked + h->num_failed == h->num_total) {
+ bool locked;
+
+ if (h->num_locked == h->num_total) {
+ locked = true;
+ } else {
+ locked = false;
+ }
+ ctdb_freeze_lock_handler(h, locked);
+ }
+ return 0;
+}
+
+/**
+ * Invalidate the records in the database.
+ * This only applies to volatile databases.
+ */
+static int db_invalidate(struct ctdb_db_context *ctdb_db, void *private_data)
+{
+ if (ctdb_db_volatile(ctdb_db)) {
+ ctdb_db->invalid_records = true;
+ }
+
+ return 0;
+}
+
+/**
+ * Count the number of databases
+ */
+static int db_count(struct ctdb_db_context *ctdb_db, void *private_data)
+{
+ unsigned int *count = (unsigned int *)private_data;
+
+ *count += 1;
+
+ return 0;
+}
+
+/**
+ * Freeze a single database
+ */
+static int db_freeze(struct ctdb_db_context *ctdb_db, void *private_data)
+{
+ struct ctdb_freeze_handle *h = talloc_get_type_abort(
+ private_data, struct ctdb_freeze_handle);
+ struct ctdb_db_freeze_waiter *w;
+
+ ctdb_start_db_freeze(ctdb_db);
+
+ w = talloc(ctdb_db->freeze_handle, struct ctdb_db_freeze_waiter);
+ CTDB_NO_MEMORY(h->ctdb, w);
+ w->ctdb = h->ctdb;
+ w->private_data = h;
+ w->status = -1;
+ talloc_set_destructor(w, db_freeze_waiter_destructor);
+
+ if (ctdb_db->freeze_mode == CTDB_FREEZE_FROZEN) {
+ /* Early return if already frozen */
+ w->status = 0;
+ talloc_free(w);
+ return 0;
+ }
+
+ DLIST_ADD(ctdb_db->freeze_handle->waiters, w);
+
+ return 0;
+}
+
+/*
+ start the freeze process for all databases
+ This is only called from ctdb_control_freeze(), which is called
+ only on node becoming INACTIVE. So mark the records invalid.
+ */
+static void ctdb_start_freeze(struct ctdb_context *ctdb)
+{
+ struct ctdb_freeze_handle *h;
+ int ret;
+
+ ctdb_db_iterator(ctdb, db_invalidate, NULL);
+
+ if (ctdb->freeze_mode == CTDB_FREEZE_FROZEN) {
+ unsigned int count = 0;
+
+ /*
+ * Check if all the databases are frozen
+ *
+ * It's possible that the databases can get attached after
+ * initial freeze. This typically happens during startup as
+ * CTDB will only attach persistent databases and go in to
+ * startup freeze. The recovery master during recovery will
+ * attach all the missing databases.
+ */
+
+ h = ctdb->freeze_handle;
+ if (h == NULL) {
+ ctdb->freeze_mode = CTDB_FREEZE_NONE;
+ return;
+ }
+
+ ret = ctdb_db_iterator(ctdb, db_count, &count);
+ if (ret != 0) {
+ TALLOC_FREE(ctdb->freeze_handle);
+ ctdb->freeze_mode = CTDB_FREEZE_NONE;
+ return;
+ }
+
+ if (count != h->num_total) {
+ DEBUG(DEBUG_ERR, ("Freeze all: incremental\n"));
+
+ h->num_total = count;
+ h->num_locked = 0;
+ h->num_failed = 0;
+
+ ctdb->freeze_mode = CTDB_FREEZE_PENDING;
+
+ ret = ctdb_db_iterator(ctdb, db_freeze, h);
+ if (ret != 0) {
+ TALLOC_FREE(ctdb->freeze_handle);
+ ctdb->freeze_mode = CTDB_FREEZE_NONE;
+ }
+ }
+ return;
+ }
+
+ if (ctdb->freeze_handle != NULL) {
+ /* already trying to freeze */
+ return;
+ }
+
+ DEBUG(DEBUG_ERR, ("Freeze all\n"));
+
+ /* Stop any vacuuming going on: we don't want to wait. */
+ ctdb_stop_vacuuming(ctdb);
+
+ /* create freeze lock children for each database */
+ h = talloc_zero(ctdb, struct ctdb_freeze_handle);
+ CTDB_NO_MEMORY_FATAL(ctdb, h);
+ h->ctdb = ctdb;
+ talloc_set_destructor(h, ctdb_freeze_handle_destructor);
+ ctdb->freeze_handle = h;
+
+ ret = ctdb_db_iterator(ctdb, db_count, &h->num_total);
+ if (ret != 0) {
+ talloc_free(h);
+ return;
+ }
+
+ ctdb->freeze_mode = CTDB_FREEZE_PENDING;
+
+ ret = ctdb_db_iterator(ctdb, db_freeze, h);
+ if (ret != 0) {
+ talloc_free(h);
+ return;
+ }
+
+ if (h->num_total == 0) {
+ ctdb->freeze_mode = CTDB_FREEZE_FROZEN;
+ }
+}
+
+/*
+ destroy a waiter for a freeze mode change
+ */
+static int ctdb_freeze_waiter_destructor(struct ctdb_freeze_waiter *w)
+{
+ ctdb_request_control_reply(w->ctdb, w->c, NULL, w->status, NULL);
+ return 0;
+}
+
+/*
+ freeze all the databases
+ This control is only used when freezing database on node becoming INACTIVE.
+ So mark the records invalid in ctdb_start_freeze().
+ */
+int32_t ctdb_control_freeze(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c, bool *async_reply)
+{
+ struct ctdb_freeze_waiter *w;
+
+ ctdb_start_freeze(ctdb);
+
+ if (ctdb->freeze_mode == CTDB_FREEZE_FROZEN) {
+ DEBUG(DEBUG_ERR, ("Freeze all: frozen\n"));
+ /* we're already frozen */
+ return 0;
+ }
+
+ if (ctdb->freeze_handle == NULL) {
+ DEBUG(DEBUG_ERR,("No freeze lock handle when adding a waiter\n"));
+ return -1;
+ }
+
+ /* If there are no databases, we are done. */
+ if (ctdb->freeze_handle->num_total == 0) {
+ return 0;
+ }
+
+ /* add ourselves to list of waiters */
+ w = talloc(ctdb->freeze_handle, struct ctdb_freeze_waiter);
+ CTDB_NO_MEMORY(ctdb, w);
+ w->ctdb = ctdb;
+ w->c = talloc_steal(w, c);
+ w->status = -1;
+ talloc_set_destructor(w, ctdb_freeze_waiter_destructor);
+ DLIST_ADD(ctdb->freeze_handle->waiters, w);
+
+ /* we won't reply till later */
+ *async_reply = true;
+ return 0;
+}
+
+
+static int db_freeze_block(struct ctdb_db_context *ctdb_db, void *private_data)
+{
+ struct tevent_context *ev = (struct tevent_context *)private_data;
+
+ ctdb_start_db_freeze(ctdb_db);
+
+ while (ctdb_db->freeze_mode == CTDB_FREEZE_PENDING) {
+ tevent_loop_once(ev);
+ }
+
+ if (ctdb_db->freeze_mode != CTDB_FREEZE_FROZEN) {
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ block until we are frozen, used during daemon startup
+ */
+bool ctdb_blocking_freeze(struct ctdb_context *ctdb)
+{
+ int ret;
+
+ ret = ctdb_db_iterator(ctdb, db_freeze_block, ctdb->ev);
+ if (ret != 0) {
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ thaw the databases
+ */
+int32_t ctdb_control_thaw(struct ctdb_context *ctdb, bool check_recmode)
+{
+ if (check_recmode && ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE) {
+ DEBUG(DEBUG_ERR, ("Failing to thaw databases while "
+ "recovery is active\n"));
+ return -1;
+ }
+
+ DEBUG(DEBUG_ERR,("Thawing all\n"));
+
+ /* cancel any pending transactions */
+ if (ctdb->freeze_transaction_started) {
+ ctdb_db_iterator(ctdb, db_transaction_cancel_handler, NULL);
+ ctdb->freeze_transaction_started = false;
+ }
+
+ ctdb_db_iterator(ctdb, db_thaw, NULL);
+ TALLOC_FREE(ctdb->freeze_handle);
+
+ ctdb_call_resend_all(ctdb);
+ return 0;
+}
+
+/**
+ * Database transaction wrappers
+ *
+ * These functions are wrappers around transaction start/cancel/commit handlers.
+ */
+
+struct db_start_transaction_state {
+ uint32_t transaction_id;
+ bool transaction_started;
+};
+
+static int db_start_transaction(struct ctdb_db_context *ctdb_db,
+ void *private_data)
+{
+ struct db_start_transaction_state *state =
+ (struct db_start_transaction_state *)private_data;
+ int ret;
+ bool transaction_started;
+
+ if (ctdb_db->freeze_mode != CTDB_FREEZE_FROZEN) {
+ DEBUG(DEBUG_ERR,
+ ("Database %s not frozen, cannot start transaction\n",
+ ctdb_db->db_name));
+ return -1;
+ }
+
+ transaction_started = state->transaction_started &
+ ctdb_db->freeze_transaction_started;
+
+ ret = db_transaction_start_handler(ctdb_db,
+ &transaction_started);
+ if (ret != 0) {
+ return -1;
+ }
+
+ ctdb_db->freeze_transaction_started = true;
+ ctdb_db->freeze_transaction_id = state->transaction_id;
+
+ return 0;
+}
+
+static int db_cancel_transaction(struct ctdb_db_context *ctdb_db,
+ void *private_data)
+{
+ int ret;
+
+ ret = db_transaction_cancel_handler(ctdb_db, private_data);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ctdb_db->freeze_transaction_started = false;
+
+ return 0;
+}
+
+struct db_commit_transaction_state {
+ uint32_t transaction_id;
+ unsigned int healthy_nodes;
+};
+
+static int db_commit_transaction(struct ctdb_db_context *ctdb_db,
+ void *private_data)
+{
+ struct db_commit_transaction_state *state =
+ (struct db_commit_transaction_state *)private_data;
+ int ret;
+
+ if (ctdb_db->freeze_mode != CTDB_FREEZE_FROZEN) {
+ DEBUG(DEBUG_ERR,
+ ("Database %s not frozen, cannot commit transaction\n",
+ ctdb_db->db_name));
+ return -1;
+ }
+
+ if (!ctdb_db->freeze_transaction_started) {
+ DEBUG(DEBUG_ERR, ("Transaction not started on %s\n",
+ ctdb_db->db_name));
+ return -1;
+ }
+
+ if (ctdb_db->freeze_transaction_id != state->transaction_id) {
+ DEBUG(DEBUG_ERR,
+ ("Incorrect transaction commit id 0x%08x for %s\n",
+ state->transaction_id, ctdb_db->db_name));
+ return -1;
+ }
+
+ ret = db_transaction_commit_handler(ctdb_db, &state->healthy_nodes);
+ if (ret != 0) {
+ return -1;
+ }
+
+ ctdb_db->freeze_transaction_started = false;
+ ctdb_db->freeze_transaction_id = 0;
+ ctdb_db->generation = state->transaction_id;
+ return 0;
+}
+
+/**
+ * Start a transaction on a database - used for db recovery
+ */
+int32_t ctdb_control_db_transaction_start(struct ctdb_context *ctdb,
+ TDB_DATA indata)
+{
+ struct ctdb_transdb *w =
+ (struct ctdb_transdb *)indata.dptr;
+ struct ctdb_db_context *ctdb_db;
+ struct db_start_transaction_state state;
+
+ ctdb_db = find_ctdb_db(ctdb, w->db_id);
+ if (ctdb_db == NULL) {
+ DEBUG(DEBUG_ERR,
+ ("Transaction start for unknown dbid 0x%08x\n",
+ w->db_id));
+ return -1;
+ }
+
+ state.transaction_id = w->tid;
+ state.transaction_started = true;
+
+ return db_start_transaction(ctdb_db, &state);
+}
+
+/**
+ * Cancel a transaction on a database - used for db recovery
+ */
+int32_t ctdb_control_db_transaction_cancel(struct ctdb_context *ctdb,
+ TDB_DATA indata)
+{
+ uint32_t db_id = *(uint32_t *)indata.dptr;
+ struct ctdb_db_context *ctdb_db;
+
+ ctdb_db = find_ctdb_db(ctdb, db_id);
+ if (ctdb_db == NULL) {
+ DEBUG(DEBUG_ERR,
+ ("Transaction cancel for unknown dbid 0x%08x\n", db_id));
+ return -1;
+ }
+
+ DEBUG(DEBUG_ERR, ("Recovery db transaction cancelled for %s\n",
+ ctdb_db->db_name));
+
+ return db_cancel_transaction(ctdb_db, NULL);
+}
+
+/**
+ * Commit a transaction on a database - used for db recovery
+ */
+int32_t ctdb_control_db_transaction_commit(struct ctdb_context *ctdb,
+ TDB_DATA indata)
+{
+ struct ctdb_transdb *w =
+ (struct ctdb_transdb *)indata.dptr;
+ struct ctdb_db_context *ctdb_db;
+ struct db_commit_transaction_state state;
+ unsigned int healthy_nodes, i;
+
+ ctdb_db = find_ctdb_db(ctdb, w->db_id);
+ if (ctdb_db == NULL) {
+ DEBUG(DEBUG_ERR,
+ ("Transaction commit for unknown dbid 0x%08x\n",
+ w->db_id));
+ return -1;
+ }
+
+ healthy_nodes = 0;
+ for (i=0; i < ctdb->num_nodes; i++) {
+ if (ctdb->nodes[i]->flags == 0) {
+ healthy_nodes += 1;
+ }
+ }
+
+ state.transaction_id = w->tid;
+ state.healthy_nodes = healthy_nodes;
+
+ return db_commit_transaction(ctdb_db, &state);
+}
+
+/*
+ wipe a database - only possible when in a frozen transaction
+ */
+int32_t ctdb_control_wipe_database(struct ctdb_context *ctdb, TDB_DATA indata)
+{
+ struct ctdb_transdb w = *(struct ctdb_transdb *)indata.dptr;
+ struct ctdb_db_context *ctdb_db;
+
+ ctdb_db = find_ctdb_db(ctdb, w.db_id);
+ if (!ctdb_db) {
+ DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%x\n", w.db_id));
+ return -1;
+ }
+
+ if (ctdb_db->freeze_mode != CTDB_FREEZE_FROZEN) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
+ return -1;
+ }
+
+ if (!ctdb_db->freeze_transaction_started) {
+ DEBUG(DEBUG_ERR,(__location__ " transaction not started\n"));
+ return -1;
+ }
+
+ if (w.tid != ctdb_db->freeze_transaction_id) {
+ DEBUG(DEBUG_ERR,(__location__ " incorrect transaction id 0x%x in commit\n", w.tid));
+ return -1;
+ }
+
+ if (tdb_wipe_all(ctdb_db->ltdb->tdb) != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to wipe database for db '%s'\n",
+ ctdb_db->db_name));
+ return -1;
+ }
+
+ if (ctdb_db_volatile(ctdb_db)) {
+ talloc_free(ctdb_db->delete_queue);
+ talloc_free(ctdb_db->fetch_queue);
+ ctdb_db->delete_queue = trbt_create(ctdb_db, 0);
+ if (ctdb_db->delete_queue == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Failed to re-create "
+ "the delete queue.\n"));
+ return -1;
+ }
+ ctdb_db->fetch_queue = trbt_create(ctdb_db, 0);
+ if (ctdb_db->fetch_queue == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Failed to re-create "
+ "the fetch queue.\n"));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+bool ctdb_db_frozen(struct ctdb_db_context *ctdb_db)
+{
+ if (ctdb_db->freeze_mode != CTDB_FREEZE_FROZEN) {
+ return false;
+ }
+
+ return true;
+}
+
+bool ctdb_db_all_frozen(struct ctdb_context *ctdb)
+{
+ if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
+ return false;
+ }
+ return true;
+}
+
+bool ctdb_db_allow_access(struct ctdb_db_context *ctdb_db)
+{
+ if (ctdb_db->freeze_mode == CTDB_FREEZE_NONE) {
+ /* If database is not frozen, then allow access. */
+ return true;
+ } else if (ctdb_db->freeze_transaction_started) {
+ /* If database is frozen, allow access only if the
+ * transaction is started. This is required during
+ * recovery.
+ *
+ * If a node is inactive, then transaction is not started.
+ */
+ return true;
+ }
+
+ return false;
+}
diff --git a/ctdb/server/ctdb_keepalive.c b/ctdb/server/ctdb_keepalive.c
new file mode 100644
index 0000000..9155ade
--- /dev/null
+++ b/ctdb/server/ctdb_keepalive.c
@@ -0,0 +1,234 @@
+/*
+ monitoring links to all other nodes to detect dead nodes
+
+
+ Copyright (C) Ronnie Sahlberg 2007
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/network.h"
+#include "system/time.h"
+#include "system/wait.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/debug.h"
+#include "lib/util/samba_util.h"
+
+#include "ctdb_private.h"
+#include "version.h"
+
+#include "common/common.h"
+#include "common/logging.h"
+
+
+static uint32_t keepalive_version(void)
+{
+ static uint32_t version = 0;
+
+ if (version == 0) {
+ const char *t;
+
+ version = (SAMBA_VERSION_MAJOR << 16) | SAMBA_VERSION_MINOR;
+
+ t = getenv("CTDB_TEST_SAMBA_VERSION");
+ if (t != NULL) {
+ int v;
+
+ v = atoi(t);
+ if (v <= 0) {
+ DBG_WARNING("Failed to parse env var: %s\n", t);
+ } else {
+ version = v;
+ }
+ }
+ }
+
+ return version;
+}
+
+static uint32_t keepalive_uptime(struct ctdb_context *ctdb)
+{
+ struct timeval current = tevent_timeval_current();
+
+ return current.tv_sec - ctdb->ctdbd_start_time.tv_sec;
+}
+
+/*
+ send a keepalive packet to the other node
+*/
+static void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode)
+{
+ struct ctdb_req_keepalive_old *r;
+
+ if (ctdb->methods == NULL) {
+ DEBUG(DEBUG_INFO,
+ ("Failed to send keepalive. Transport is DOWN\n"));
+ return;
+ }
+
+ r = ctdb_transport_allocate(ctdb, ctdb, CTDB_REQ_KEEPALIVE,
+ sizeof(struct ctdb_req_keepalive_old),
+ struct ctdb_req_keepalive_old);
+ CTDB_NO_MEMORY_FATAL(ctdb, r);
+ r->hdr.destnode = destnode;
+ r->hdr.reqid = 0;
+
+ r->version = keepalive_version();
+ r->uptime = keepalive_uptime(ctdb);
+
+ CTDB_INCREMENT_STAT(ctdb, keepalive_packets_sent);
+
+ ctdb_queue_packet(ctdb, &r->hdr);
+
+ talloc_free(r);
+}
+
+/*
+ see if any nodes are dead
+ */
+static void ctdb_check_for_dead_nodes(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
+ unsigned int i;
+
+ /* send a keepalive to all other nodes, unless */
+ for (i=0;i<ctdb->num_nodes;i++) {
+ struct ctdb_node *node = ctdb->nodes[i];
+
+ if (node->flags & NODE_FLAGS_DELETED) {
+ continue;
+ }
+
+ if (node->pnn == ctdb->pnn) {
+ continue;
+ }
+
+ if (node->flags & NODE_FLAGS_DISCONNECTED) {
+ /* it might have come alive again */
+ if (node->rx_cnt != 0) {
+ ctdb_node_connected(node);
+ }
+ continue;
+ }
+
+
+ if (node->rx_cnt == 0) {
+ node->dead_count++;
+ } else {
+ node->dead_count = 0;
+ }
+
+ node->rx_cnt = 0;
+
+ if (node->dead_count >= ctdb->tunable.keepalive_limit) {
+ DEBUG(DEBUG_NOTICE,("dead count reached for node %u\n", node->pnn));
+ ctdb_node_dead(node);
+ ctdb_send_keepalive(ctdb, node->pnn);
+ /* maybe tell the transport layer to kill the
+ sockets as well?
+ */
+ continue;
+ }
+
+ DEBUG(DEBUG_DEBUG,("sending keepalive to %u\n", node->pnn));
+ ctdb_send_keepalive(ctdb, node->pnn);
+
+ node->tx_cnt = 0;
+ }
+
+ tevent_add_timer(ctdb->ev, ctdb->keepalive_ctx,
+ timeval_current_ofs(ctdb->tunable.keepalive_interval, 0),
+ ctdb_check_for_dead_nodes, ctdb);
+}
+
+
+void ctdb_start_keepalive(struct ctdb_context *ctdb)
+{
+ struct tevent_timer *te;
+
+ ctdb->keepalive_ctx = talloc_new(ctdb);
+ CTDB_NO_MEMORY_FATAL(ctdb, ctdb->keepalive_ctx);
+
+ te = tevent_add_timer(ctdb->ev, ctdb->keepalive_ctx,
+ timeval_current_ofs(ctdb->tunable.keepalive_interval, 0),
+ ctdb_check_for_dead_nodes, ctdb);
+ CTDB_NO_MEMORY_FATAL(ctdb, te);
+
+ DEBUG(DEBUG_NOTICE,("Keepalive monitoring has been started\n"));
+
+ if (ctdb->tunable.allow_mixed_versions == 1) {
+ DEBUG(DEBUG_WARNING,
+ ("CTDB cluster with mixed versions configured\n"));
+ }
+}
+
+void ctdb_stop_keepalive(struct ctdb_context *ctdb)
+{
+ talloc_free(ctdb->keepalive_ctx);
+ ctdb->keepalive_ctx = NULL;
+}
+
+void ctdb_request_keepalive(struct ctdb_context *ctdb,
+ struct ctdb_req_header *hdr)
+{
+ struct ctdb_req_keepalive_old *c =
+ (struct ctdb_req_keepalive_old *)hdr;
+ uint32_t my_version = keepalive_version();
+ uint32_t my_uptime = keepalive_uptime(ctdb);
+
+ /* Don't check anything if mixed versions are allowed */
+ if (ctdb->tunable.allow_mixed_versions == 1) {
+ return;
+ }
+
+ if (hdr->length == sizeof(struct ctdb_req_header)) {
+ /* Old keepalive */
+ goto fail1;
+ }
+
+ if (c->version != my_version) {
+ if (c->uptime > my_uptime) {
+ goto fail2;
+ } else if (c->uptime == my_uptime) {
+ if (c->version > my_version) {
+ goto fail2;
+ }
+ }
+ }
+
+ return;
+
+fail1:
+ DEBUG(DEBUG_ERR,
+ ("Keepalive version missing from node %u\n", hdr->srcnode));
+ goto shutdown;
+
+fail2:
+ DEBUG(DEBUG_ERR,
+ ("Keepalive version mismatch 0x%08x != 0x%08x from node %u\n",
+ my_version, c->version, hdr->srcnode));
+ goto shutdown;
+
+shutdown:
+ DEBUG(DEBUG_ERR,
+ ("CTDB Cluster with mixed versions, cannot continue\n"));
+ ctdb_shutdown_sequence(ctdb, 0);
+}
diff --git a/ctdb/server/ctdb_lock.c b/ctdb/server/ctdb_lock.c
new file mode 100644
index 0000000..063ebfa
--- /dev/null
+++ b/ctdb/server/ctdb_lock.c
@@ -0,0 +1,996 @@
+/*
+ ctdb lock handling
+ provide API to do non-blocking locks for single or all databases
+
+ Copyright (C) Amitay Isaacs 2012
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/tdb_wrap/tdb_wrap.h"
+#include "lib/util/dlinklist.h"
+#include "lib/util/debug.h"
+#include "lib/util/samba_util.h"
+#include "lib/util/sys_rw.h"
+
+#include "ctdb_private.h"
+
+#include "common/common.h"
+#include "common/logging.h"
+
+/*
+ * Non-blocking Locking API
+ *
+ * 1. Create a child process to do blocking locks.
+ * 2. Once the locks are obtained, signal parent process via fd.
+ * 3. Invoke registered callback routine with locking status.
+ * 4. If the child process cannot get locks within certain time,
+ * execute an external script to debug.
+ *
+ * ctdb_lock_record() - get a lock on a record
+ * ctdb_lock_db() - get a lock on a DB
+ *
+ * auto_mark - whether to mark/unmark DBs in before/after callback
+ * = false is used for freezing databases for
+ * recovery since the recovery cannot start till
+ * databases are locked on all the nodes.
+ * = true is used for record locks.
+ */
+
+enum lock_type {
+ LOCK_RECORD,
+ LOCK_DB,
+};
+
+static const char * const lock_type_str[] = {
+ "lock_record",
+ "lock_db",
+};
+
+struct lock_request;
+
+/* lock_context is the common part for a lock request */
+struct lock_context {
+ struct lock_context *next, *prev;
+ enum lock_type type;
+ struct ctdb_context *ctdb;
+ struct ctdb_db_context *ctdb_db;
+ TDB_DATA key;
+ uint32_t priority;
+ bool auto_mark;
+ struct lock_request *request;
+ pid_t child;
+ int fd[2];
+ struct tevent_fd *tfd;
+ struct tevent_timer *ttimer;
+ struct timeval start_time;
+ uint32_t key_hash;
+ bool can_schedule;
+};
+
+/* lock_request is the client specific part for a lock request */
+struct lock_request {
+ struct lock_context *lctx;
+ void (*callback)(void *, bool);
+ void *private_data;
+};
+
+
+int ctdb_db_iterator(struct ctdb_context *ctdb, ctdb_db_handler_t handler,
+ void *private_data)
+{
+ struct ctdb_db_context *ctdb_db;
+ int ret;
+
+ for (ctdb_db = ctdb->db_list; ctdb_db; ctdb_db = ctdb_db->next) {
+ ret = handler(ctdb_db, private_data);
+ if (ret != 0) {
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * lock all databases - mark only
+ */
+static int db_lock_mark_handler(struct ctdb_db_context *ctdb_db,
+ void *private_data)
+{
+ int tdb_transaction_write_lock_mark(struct tdb_context *);
+
+ DEBUG(DEBUG_INFO, ("marking locked database %s\n", ctdb_db->db_name));
+
+ if (tdb_transaction_write_lock_mark(ctdb_db->ltdb->tdb) != 0) {
+ DEBUG(DEBUG_ERR, ("Failed to mark (transaction lock) database %s\n",
+ ctdb_db->db_name));
+ return -1;
+ }
+
+ if (tdb_lockall_mark(ctdb_db->ltdb->tdb) != 0) {
+ DEBUG(DEBUG_ERR, ("Failed to mark (all lock) database %s\n",
+ ctdb_db->db_name));
+ return -1;
+ }
+
+ return 0;
+}
+
+int ctdb_lockdb_mark(struct ctdb_db_context *ctdb_db)
+{
+ if (!ctdb_db_frozen(ctdb_db)) {
+ DEBUG(DEBUG_ERR,
+ ("Attempt to mark database locked when not frozen\n"));
+ return -1;
+ }
+
+ return db_lock_mark_handler(ctdb_db, NULL);
+}
+
+/*
+ * lock all databases - unmark only
+ */
+static int db_lock_unmark_handler(struct ctdb_db_context *ctdb_db,
+ void *private_data)
+{
+ int tdb_transaction_write_lock_unmark(struct tdb_context *);
+
+ DEBUG(DEBUG_INFO, ("unmarking locked database %s\n", ctdb_db->db_name));
+
+ if (tdb_transaction_write_lock_unmark(ctdb_db->ltdb->tdb) != 0) {
+ DEBUG(DEBUG_ERR, ("Failed to unmark (transaction lock) database %s\n",
+ ctdb_db->db_name));
+ return -1;
+ }
+
+ if (tdb_lockall_unmark(ctdb_db->ltdb->tdb) != 0) {
+ DEBUG(DEBUG_ERR, ("Failed to unmark (all lock) database %s\n",
+ ctdb_db->db_name));
+ return -1;
+ }
+
+ return 0;
+}
+
+int ctdb_lockdb_unmark(struct ctdb_db_context *ctdb_db)
+{
+ if (!ctdb_db_frozen(ctdb_db)) {
+ DEBUG(DEBUG_ERR,
+ ("Attempt to unmark database locked when not frozen\n"));
+ return -1;
+ }
+
+ return db_lock_unmark_handler(ctdb_db, NULL);
+}
+
+static void ctdb_lock_schedule(struct ctdb_context *ctdb);
+
+/*
+ * Destructor to kill the child locking process
+ */
+static int ctdb_lock_context_destructor(struct lock_context *lock_ctx)
+{
+ if (lock_ctx->request) {
+ lock_ctx->request->lctx = NULL;
+ }
+ if (lock_ctx->child > 0) {
+ ctdb_kill(lock_ctx->ctdb, lock_ctx->child, SIGTERM);
+ if (lock_ctx->type == LOCK_RECORD) {
+ DLIST_REMOVE(lock_ctx->ctdb_db->lock_current, lock_ctx);
+ } else {
+ DLIST_REMOVE(lock_ctx->ctdb->lock_current, lock_ctx);
+ }
+ if (lock_ctx->ctdb_db->lock_num_current == 0) {
+ ctdb_fatal(NULL, "Lock count is 0 before decrement\n");
+ }
+ lock_ctx->ctdb_db->lock_num_current--;
+ CTDB_DECREMENT_STAT(lock_ctx->ctdb, locks.num_current);
+ CTDB_DECREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_current);
+ } else {
+ if (lock_ctx->type == LOCK_RECORD) {
+ DLIST_REMOVE(lock_ctx->ctdb_db->lock_pending, lock_ctx);
+ } else {
+ DLIST_REMOVE(lock_ctx->ctdb->lock_pending, lock_ctx);
+ }
+ CTDB_DECREMENT_STAT(lock_ctx->ctdb, locks.num_pending);
+ CTDB_DECREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_pending);
+ }
+
+ ctdb_lock_schedule(lock_ctx->ctdb);
+
+ return 0;
+}
+
+
+/*
+ * Destructor to remove lock request
+ */
+static int ctdb_lock_request_destructor(struct lock_request *lock_request)
+{
+ if (lock_request->lctx == NULL) {
+ return 0;
+ }
+
+ lock_request->lctx->request = NULL;
+ TALLOC_FREE(lock_request->lctx);
+
+ return 0;
+}
+
+/*
+ * Process all the callbacks waiting for lock
+ *
+ * If lock has failed, callback is executed with locked=false
+ */
+static void process_callbacks(struct lock_context *lock_ctx, bool locked)
+{
+ struct lock_request *request;
+ bool auto_mark = lock_ctx->auto_mark;
+
+ if (auto_mark && locked) {
+ switch (lock_ctx->type) {
+ case LOCK_RECORD:
+ tdb_chainlock_mark(lock_ctx->ctdb_db->ltdb->tdb, lock_ctx->key);
+ break;
+
+ case LOCK_DB:
+ (void)ctdb_lockdb_mark(lock_ctx->ctdb_db);
+ break;
+ }
+ }
+
+ request = lock_ctx->request;
+ if (auto_mark) {
+ /* Since request may be freed in the callback, unset the lock
+ * context, so request destructor will not free lock context.
+ */
+ request->lctx = NULL;
+ }
+
+ /* Since request may be freed in the callback, unset the request */
+ lock_ctx->request = NULL;
+
+ request->callback(request->private_data, locked);
+
+ if (!auto_mark) {
+ return;
+ }
+
+ if (locked) {
+ switch (lock_ctx->type) {
+ case LOCK_RECORD:
+ tdb_chainlock_unmark(lock_ctx->ctdb_db->ltdb->tdb, lock_ctx->key);
+ break;
+
+ case LOCK_DB:
+ ctdb_lockdb_unmark(lock_ctx->ctdb_db);
+ break;
+ }
+ }
+
+ talloc_free(lock_ctx);
+}
+
+
+static int lock_bucket_id(double t)
+{
+ double ms = 1.e-3, s = 1;
+ int id;
+
+ if (t < 1*ms) {
+ id = 0;
+ } else if (t < 10*ms) {
+ id = 1;
+ } else if (t < 100*ms) {
+ id = 2;
+ } else if (t < 1*s) {
+ id = 3;
+ } else if (t < 2*s) {
+ id = 4;
+ } else if (t < 4*s) {
+ id = 5;
+ } else if (t < 8*s) {
+ id = 6;
+ } else if (t < 16*s) {
+ id = 7;
+ } else if (t < 32*s) {
+ id = 8;
+ } else if (t < 64*s) {
+ id = 9;
+ } else {
+ id = 10;
+ }
+
+ return id;
+}
+
+/*
+ * Callback routine when the required locks are obtained.
+ * Called from parent context
+ */
+static void ctdb_lock_handler(struct tevent_context *ev,
+ struct tevent_fd *tfd,
+ uint16_t flags,
+ void *private_data)
+{
+ struct lock_context *lock_ctx;
+ char c;
+ bool locked;
+ double t;
+ int id;
+
+ lock_ctx = talloc_get_type_abort(private_data, struct lock_context);
+
+ /* cancel the timeout event */
+ TALLOC_FREE(lock_ctx->ttimer);
+
+ t = timeval_elapsed(&lock_ctx->start_time);
+ id = lock_bucket_id(t);
+
+ /* Read the status from the child process */
+ if (sys_read(lock_ctx->fd[0], &c, 1) != 1) {
+ locked = false;
+ } else {
+ locked = (c == 0 ? true : false);
+ }
+
+ /* Update statistics */
+ CTDB_INCREMENT_STAT(lock_ctx->ctdb, locks.num_calls);
+ CTDB_INCREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_calls);
+
+ if (locked) {
+ CTDB_INCREMENT_STAT(lock_ctx->ctdb, locks.buckets[id]);
+ CTDB_UPDATE_LATENCY(lock_ctx->ctdb, lock_ctx->ctdb_db,
+ lock_type_str[lock_ctx->type], locks.latency,
+ lock_ctx->start_time);
+
+ CTDB_UPDATE_DB_LATENCY(lock_ctx->ctdb_db, lock_type_str[lock_ctx->type], locks.latency, t);
+ CTDB_INCREMENT_DB_STAT(lock_ctx->ctdb_db, locks.buckets[id]);
+ } else {
+ CTDB_INCREMENT_STAT(lock_ctx->ctdb, locks.num_failed);
+ CTDB_INCREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_failed);
+ }
+
+ process_callbacks(lock_ctx, locked);
+}
+
+struct lock_log_entry {
+ struct db_hash_context *lock_log;
+ TDB_DATA key;
+ unsigned long log_sec;
+ struct tevent_timer *timer;
+};
+
+static int lock_log_fetch_parser(uint8_t *keybuf, size_t keylen,
+ uint8_t *databuf, size_t datalen,
+ void *private_data)
+{
+ struct lock_log_entry **entry =
+ (struct lock_log_entry **)private_data;
+
+ if (datalen != sizeof(struct lock_log_entry *)) {
+ return EINVAL;
+ }
+
+ *entry = talloc_get_type_abort(*(void **)databuf,
+ struct lock_log_entry);
+ return 0;
+}
+
+static void lock_log_cleanup(struct tevent_context *ev,
+ struct tevent_timer *ttimer,
+ struct timeval current_time,
+ void *private_data)
+{
+ struct lock_log_entry *entry = talloc_get_type_abort(
+ private_data, struct lock_log_entry);
+ int ret;
+
+ entry->timer = NULL;
+
+ ret = db_hash_delete(entry->lock_log, entry->key.dptr,
+ entry->key.dsize);
+ if (ret != 0) {
+ return;
+ }
+ talloc_free(entry);
+}
+
+static bool lock_log_skip(struct tevent_context *ev,
+ struct db_hash_context *lock_log,
+ TDB_DATA key, unsigned long elapsed_sec)
+{
+ struct lock_log_entry *entry = NULL;
+ int ret;
+
+ ret = db_hash_fetch(lock_log, key.dptr, key.dsize,
+ lock_log_fetch_parser, &entry);
+ if (ret == ENOENT) {
+
+ entry = talloc_zero(lock_log, struct lock_log_entry);
+ if (entry == NULL) {
+ goto fail;
+ }
+
+ entry->lock_log = lock_log;
+
+ entry->key.dptr = talloc_memdup(entry, key.dptr, key.dsize);
+ if (entry->key.dptr == NULL) {
+ talloc_free(entry);
+ goto fail;
+ }
+ entry->key.dsize = key.dsize;
+
+ entry->log_sec = elapsed_sec;
+ entry->timer = tevent_add_timer(ev, entry,
+ timeval_current_ofs(30, 0),
+ lock_log_cleanup, entry);
+ if (entry->timer == NULL) {
+ talloc_free(entry);
+ goto fail;
+ }
+
+ ret = db_hash_add(lock_log, key.dptr, key.dsize,
+ (uint8_t *)&entry,
+ sizeof(struct lock_log_entry *));
+ if (ret != 0) {
+ talloc_free(entry);
+ goto fail;
+ }
+
+ return false;
+
+ } else if (ret == EINVAL) {
+
+ ret = db_hash_delete(lock_log, key.dptr, key.dsize);
+ if (ret != 0) {
+ goto fail;
+ }
+
+ return false;
+
+ } else if (ret == 0) {
+
+ if (elapsed_sec <= entry->log_sec) {
+ return true;
+ }
+
+ entry->log_sec = elapsed_sec;
+
+ TALLOC_FREE(entry->timer);
+ entry->timer = tevent_add_timer(ev, entry,
+ timeval_current_ofs(30, 0),
+ lock_log_cleanup, entry);
+ if (entry->timer == NULL) {
+ ret = db_hash_delete(lock_log, key.dptr, key.dsize);
+ if (ret != 0) {
+ goto fail;
+ }
+ talloc_free(entry);
+ }
+
+ return false;
+ }
+
+
+fail:
+ return false;
+
+}
+
+static const char **debug_locks_args(TALLOC_CTX *mem_ctx, struct lock_context *lock_ctx)
+{
+ const char **args = NULL;
+ int tdb_flags;
+ int nargs, i;
+
+ /* Program, lock helper PID, db|record, tdb path, fcntl|mutex, NULL */
+ nargs = 6;
+
+ args = talloc_array(mem_ctx, const char *, nargs);
+ if (args == NULL) {
+ return NULL;
+ }
+
+ args[0] = talloc_strdup(args, "debug_locks");
+ args[1] = talloc_asprintf(args, "%d", lock_ctx->child);
+
+ if (lock_ctx->type == LOCK_RECORD) {
+ args[2] = talloc_strdup(args, "RECORD");
+ } else {
+ args[2] = talloc_strdup(args, "DB");
+ }
+
+ args[3] = talloc_strdup(args, lock_ctx->ctdb_db->db_path);
+
+ tdb_flags = tdb_get_flags(lock_ctx->ctdb_db->ltdb->tdb);
+ if (tdb_flags & TDB_MUTEX_LOCKING) {
+ args[4] = talloc_strdup(args, "MUTEX");
+ } else {
+ args[4] = talloc_strdup(args, "FCNTL");
+ }
+
+ args[5] = NULL;
+
+ for (i=0; i<nargs-1; i++) {
+ if (args[i] == NULL) {
+ talloc_free(args);
+ return NULL;
+ }
+ }
+
+ return args;
+}
+
+/*
+ * Callback routine when required locks are not obtained within timeout
+ * Called from parent context
+ */
+static void ctdb_lock_timeout_handler(struct tevent_context *ev,
+ struct tevent_timer *ttimer,
+ struct timeval current_time,
+ void *private_data)
+{
+ static char debug_locks[PATH_MAX+1] = "";
+ struct lock_context *lock_ctx;
+ struct ctdb_context *ctdb;
+ pid_t pid;
+ double elapsed_time;
+ bool skip;
+ char *keystr;
+ const char **args;
+
+ lock_ctx = talloc_get_type_abort(private_data, struct lock_context);
+ ctdb = lock_ctx->ctdb;
+
+ elapsed_time = timeval_elapsed(&lock_ctx->start_time);
+
+ /* For database locks, always log */
+ if (lock_ctx->type == LOCK_DB) {
+ DEBUG(DEBUG_WARNING,
+ ("Unable to get DB lock on database %s for "
+ "%.0lf seconds\n",
+ lock_ctx->ctdb_db->db_name, elapsed_time));
+ goto lock_debug;
+ }
+
+ /* For record locks, check if we have already logged */
+ skip = lock_log_skip(ev, lock_ctx->ctdb_db->lock_log,
+ lock_ctx->key, (unsigned long)elapsed_time);
+ if (skip) {
+ goto skip_lock_debug;
+ }
+
+ keystr = hex_encode_talloc(lock_ctx, lock_ctx->key.dptr,
+ lock_ctx->key.dsize);
+ DEBUG(DEBUG_WARNING,
+ ("Unable to get RECORD lock on database %s for %.0lf seconds"
+ " (key %s)\n",
+ lock_ctx->ctdb_db->db_name, elapsed_time,
+ keystr ? keystr : ""));
+ TALLOC_FREE(keystr);
+
+ /* If a node stopped/banned, don't spam the logs */
+ if (ctdb->nodes[ctdb->pnn]->flags & NODE_FLAGS_INACTIVE) {
+ goto skip_lock_debug;
+ }
+
+lock_debug:
+
+ if (ctdb_set_helper("lock debugging helper",
+ debug_locks, sizeof(debug_locks),
+ "CTDB_DEBUG_LOCKS",
+ getenv("CTDB_BASE"), "debug_locks.sh")) {
+ args = debug_locks_args(lock_ctx, lock_ctx);
+ if (args != NULL) {
+ pid = vfork();
+ if (pid == 0) {
+ execvp(debug_locks, discard_const(args));
+ _exit(0);
+ }
+ talloc_free(args);
+ ctdb_track_child(ctdb, pid);
+ } else {
+ D_WARNING("No memory for debug locks args\n");
+ }
+ } else {
+ DEBUG(DEBUG_WARNING,
+ (__location__
+ " Unable to setup lock debugging\n"));
+ }
+
+skip_lock_debug:
+
+ /* reset the timeout timer */
+ // talloc_free(lock_ctx->ttimer);
+ lock_ctx->ttimer = tevent_add_timer(ctdb->ev,
+ lock_ctx,
+ timeval_current_ofs(10, 0),
+ ctdb_lock_timeout_handler,
+ (void *)lock_ctx);
+}
+
+static bool lock_helper_args(TALLOC_CTX *mem_ctx,
+ struct lock_context *lock_ctx, int fd,
+ int *argc, const char ***argv)
+{
+ const char **args = NULL;
+ int nargs = 0, i;
+
+ switch (lock_ctx->type) {
+ case LOCK_RECORD:
+ nargs = 6;
+ break;
+
+ case LOCK_DB:
+ nargs = 5;
+ break;
+ }
+
+ /* Add extra argument for null termination */
+ nargs++;
+
+ args = talloc_array(mem_ctx, const char *, nargs);
+ if (args == NULL) {
+ return false;
+ }
+
+ args[0] = talloc_asprintf(args, "%d", getpid());
+ args[1] = talloc_asprintf(args, "%d", fd);
+
+ switch (lock_ctx->type) {
+ case LOCK_RECORD:
+ args[2] = talloc_strdup(args, "RECORD");
+ args[3] = talloc_strdup(args, lock_ctx->ctdb_db->db_path);
+ args[4] = talloc_asprintf(args, "0x%x",
+ tdb_get_flags(lock_ctx->ctdb_db->ltdb->tdb));
+ if (lock_ctx->key.dsize == 0) {
+ args[5] = talloc_strdup(args, "NULL");
+ } else {
+ args[5] = hex_encode_talloc(args, lock_ctx->key.dptr, lock_ctx->key.dsize);
+ }
+ break;
+
+ case LOCK_DB:
+ args[2] = talloc_strdup(args, "DB");
+ args[3] = talloc_strdup(args, lock_ctx->ctdb_db->db_path);
+ args[4] = talloc_asprintf(args, "0x%x",
+ tdb_get_flags(lock_ctx->ctdb_db->ltdb->tdb));
+ break;
+ }
+
+ /* Make sure last argument is NULL */
+ args[nargs-1] = NULL;
+
+ for (i=0; i<nargs-1; i++) {
+ if (args[i] == NULL) {
+ talloc_free(args);
+ return false;
+ }
+ }
+
+ *argc = nargs;
+ *argv = args;
+ return true;
+}
+
+/*
+ * Find a lock request that can be scheduled
+ */
+static struct lock_context *ctdb_find_lock_context(struct ctdb_context *ctdb)
+{
+ struct lock_context *lock_ctx, *next_ctx;
+ struct ctdb_db_context *ctdb_db;
+
+ /* First check if there are database lock requests */
+
+ for (lock_ctx = ctdb->lock_pending; lock_ctx != NULL;
+ lock_ctx = next_ctx) {
+
+ if (lock_ctx->request != NULL) {
+ /* Found a lock context with a request */
+ return lock_ctx;
+ }
+
+ next_ctx = lock_ctx->next;
+
+ DEBUG(DEBUG_INFO, ("Removing lock context without lock "
+ "request\n"));
+ DLIST_REMOVE(ctdb->lock_pending, lock_ctx);
+ CTDB_DECREMENT_STAT(ctdb, locks.num_pending);
+ CTDB_DECREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_pending);
+ talloc_free(lock_ctx);
+ }
+
+ /* Next check database queues */
+ for (ctdb_db = ctdb->db_list; ctdb_db; ctdb_db = ctdb_db->next) {
+ if (ctdb_db->lock_num_current ==
+ ctdb->tunable.lock_processes_per_db) {
+ continue;
+ }
+
+ for (lock_ctx = ctdb_db->lock_pending; lock_ctx != NULL;
+ lock_ctx = next_ctx) {
+
+ next_ctx = lock_ctx->next;
+
+ if (lock_ctx->request != NULL) {
+ return lock_ctx;
+ }
+
+ DEBUG(DEBUG_INFO, ("Removing lock context without "
+ "lock request\n"));
+ DLIST_REMOVE(ctdb_db->lock_pending, lock_ctx);
+ CTDB_DECREMENT_STAT(ctdb, locks.num_pending);
+ CTDB_DECREMENT_DB_STAT(ctdb_db, locks.num_pending);
+ talloc_free(lock_ctx);
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ * Schedule a new lock child process
+ * Set up callback handler and timeout handler
+ */
+static void ctdb_lock_schedule(struct ctdb_context *ctdb)
+{
+ struct lock_context *lock_ctx;
+ int ret, argc;
+ TALLOC_CTX *tmp_ctx;
+ static char prog[PATH_MAX+1] = "";
+ const char **args;
+
+ if (!ctdb_set_helper("lock helper",
+ prog, sizeof(prog),
+ "CTDB_LOCK_HELPER",
+ CTDB_HELPER_BINDIR, "ctdb_lock_helper")) {
+ ctdb_die(ctdb, __location__
+ " Unable to set lock helper\n");
+ }
+
+ /* Find a lock context with requests */
+ lock_ctx = ctdb_find_lock_context(ctdb);
+ if (lock_ctx == NULL) {
+ return;
+ }
+
+ lock_ctx->child = -1;
+ ret = pipe(lock_ctx->fd);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("Failed to create pipe in ctdb_lock_schedule\n"));
+ return;
+ }
+
+ set_close_on_exec(lock_ctx->fd[0]);
+
+ /* Create data for child process */
+ tmp_ctx = talloc_new(lock_ctx);
+ if (tmp_ctx == NULL) {
+ DEBUG(DEBUG_ERR, ("Failed to allocate memory for helper args\n"));
+ close(lock_ctx->fd[0]);
+ close(lock_ctx->fd[1]);
+ return;
+ }
+
+ if (! ctdb->do_setsched) {
+ ret = setenv("CTDB_NOSETSCHED", "1", 1);
+ if (ret != 0) {
+ DEBUG(DEBUG_WARNING,
+ ("Failed to set CTDB_NOSETSCHED variable\n"));
+ }
+ }
+
+ /* Create arguments for lock helper */
+ if (!lock_helper_args(tmp_ctx, lock_ctx, lock_ctx->fd[1],
+ &argc, &args)) {
+ DEBUG(DEBUG_ERR, ("Failed to create lock helper args\n"));
+ close(lock_ctx->fd[0]);
+ close(lock_ctx->fd[1]);
+ talloc_free(tmp_ctx);
+ return;
+ }
+
+ lock_ctx->child = ctdb_vfork_exec(lock_ctx, ctdb, prog, argc,
+ (const char **)args);
+ if (lock_ctx->child == -1) {
+ DEBUG(DEBUG_ERR, ("Failed to create a child in ctdb_lock_schedule\n"));
+ close(lock_ctx->fd[0]);
+ close(lock_ctx->fd[1]);
+ talloc_free(tmp_ctx);
+ return;
+ }
+
+ /* Parent process */
+ close(lock_ctx->fd[1]);
+
+ talloc_free(tmp_ctx);
+
+ /* Set up timeout handler */
+ lock_ctx->ttimer = tevent_add_timer(ctdb->ev,
+ lock_ctx,
+ timeval_current_ofs(10, 0),
+ ctdb_lock_timeout_handler,
+ (void *)lock_ctx);
+ if (lock_ctx->ttimer == NULL) {
+ ctdb_kill(ctdb, lock_ctx->child, SIGTERM);
+ lock_ctx->child = -1;
+ close(lock_ctx->fd[0]);
+ return;
+ }
+
+ /* Set up callback */
+ lock_ctx->tfd = tevent_add_fd(ctdb->ev,
+ lock_ctx,
+ lock_ctx->fd[0],
+ TEVENT_FD_READ,
+ ctdb_lock_handler,
+ (void *)lock_ctx);
+ if (lock_ctx->tfd == NULL) {
+ TALLOC_FREE(lock_ctx->ttimer);
+ ctdb_kill(ctdb, lock_ctx->child, SIGTERM);
+ lock_ctx->child = -1;
+ close(lock_ctx->fd[0]);
+ return;
+ }
+ tevent_fd_set_auto_close(lock_ctx->tfd);
+
+ /* Move the context from pending to current */
+ if (lock_ctx->type == LOCK_RECORD) {
+ DLIST_REMOVE(lock_ctx->ctdb_db->lock_pending, lock_ctx);
+ DLIST_ADD_END(lock_ctx->ctdb_db->lock_current, lock_ctx);
+ } else {
+ DLIST_REMOVE(ctdb->lock_pending, lock_ctx);
+ DLIST_ADD_END(ctdb->lock_current, lock_ctx);
+ }
+ CTDB_DECREMENT_STAT(lock_ctx->ctdb, locks.num_pending);
+ CTDB_INCREMENT_STAT(lock_ctx->ctdb, locks.num_current);
+ lock_ctx->ctdb_db->lock_num_current++;
+ CTDB_DECREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_pending);
+ CTDB_INCREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_current);
+}
+
+
+/*
+ * Lock record / db depending on type
+ */
+static struct lock_request *ctdb_lock_internal(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ struct ctdb_db_context *ctdb_db,
+ TDB_DATA key,
+ uint32_t priority,
+ void (*callback)(void *, bool),
+ void *private_data,
+ enum lock_type type,
+ bool auto_mark)
+{
+ struct lock_context *lock_ctx = NULL;
+ struct lock_request *request;
+
+ if (callback == NULL) {
+ DEBUG(DEBUG_WARNING, ("No callback function specified, not locking\n"));
+ return NULL;
+ }
+
+ lock_ctx = talloc_zero(ctdb, struct lock_context);
+ if (lock_ctx == NULL) {
+ DEBUG(DEBUG_ERR, ("Failed to create a new lock context\n"));
+ return NULL;
+ }
+
+ if ((request = talloc_zero(mem_ctx, struct lock_request)) == NULL) {
+ talloc_free(lock_ctx);
+ return NULL;
+ }
+
+ lock_ctx->type = type;
+ lock_ctx->ctdb = ctdb;
+ lock_ctx->ctdb_db = ctdb_db;
+ lock_ctx->key.dsize = key.dsize;
+ if (key.dsize > 0) {
+ lock_ctx->key.dptr = talloc_memdup(lock_ctx, key.dptr, key.dsize);
+ if (lock_ctx->key.dptr == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ "Memory allocation error\n"));
+ talloc_free(lock_ctx);
+ talloc_free(request);
+ return NULL;
+ }
+ lock_ctx->key_hash = ctdb_hash(&key);
+ } else {
+ lock_ctx->key.dptr = NULL;
+ }
+ lock_ctx->priority = priority;
+ lock_ctx->auto_mark = auto_mark;
+
+ lock_ctx->request = request;
+ lock_ctx->child = -1;
+
+ /* Non-record locks are required by recovery and should be scheduled
+ * immediately, so keep them at the head of the pending queue.
+ */
+ if (lock_ctx->type == LOCK_RECORD) {
+ DLIST_ADD_END(ctdb_db->lock_pending, lock_ctx);
+ } else {
+ DLIST_ADD_END(ctdb->lock_pending, lock_ctx);
+ }
+ CTDB_INCREMENT_STAT(ctdb, locks.num_pending);
+ if (ctdb_db) {
+ CTDB_INCREMENT_DB_STAT(ctdb_db, locks.num_pending);
+ }
+
+ /* Start the timer when we activate the context */
+ lock_ctx->start_time = timeval_current();
+
+ request->lctx = lock_ctx;
+ request->callback = callback;
+ request->private_data = private_data;
+
+ talloc_set_destructor(request, ctdb_lock_request_destructor);
+ talloc_set_destructor(lock_ctx, ctdb_lock_context_destructor);
+
+ ctdb_lock_schedule(ctdb);
+
+ return request;
+}
+
+
+/*
+ * obtain a lock on a record in a database
+ */
+struct lock_request *ctdb_lock_record(TALLOC_CTX *mem_ctx,
+ struct ctdb_db_context *ctdb_db,
+ TDB_DATA key,
+ bool auto_mark,
+ void (*callback)(void *, bool),
+ void *private_data)
+{
+ return ctdb_lock_internal(mem_ctx,
+ ctdb_db->ctdb,
+ ctdb_db,
+ key,
+ 0,
+ callback,
+ private_data,
+ LOCK_RECORD,
+ auto_mark);
+}
+
+
+/*
+ * obtain a lock on a database
+ */
+struct lock_request *ctdb_lock_db(TALLOC_CTX *mem_ctx,
+ struct ctdb_db_context *ctdb_db,
+ bool auto_mark,
+ void (*callback)(void *, bool),
+ void *private_data)
+{
+ return ctdb_lock_internal(mem_ctx,
+ ctdb_db->ctdb,
+ ctdb_db,
+ tdb_null,
+ 0,
+ callback,
+ private_data,
+ LOCK_DB,
+ auto_mark);
+}
diff --git a/ctdb/server/ctdb_lock_helper.c b/ctdb/server/ctdb_lock_helper.c
new file mode 100644
index 0000000..51d2992
--- /dev/null
+++ b/ctdb/server/ctdb_lock_helper.c
@@ -0,0 +1,350 @@
+/*
+ ctdb lock helper
+
+ Copyright (C) Amitay Isaacs 2013
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/network.h"
+#include "system/wait.h"
+
+#include <talloc.h>
+#include <tevent.h>
+#include <tdb.h>
+
+#include "lib/util/sys_rw.h"
+#include "lib/util/tevent_unix.h"
+
+#include "protocol/protocol.h"
+
+#include "common/system.h"
+
+static bool realtime = true;
+
+struct lock_state {
+ struct tdb_context *tdb;
+ TDB_DATA key;
+};
+
+static void set_priority(void)
+{
+ const char *ptr;
+
+ ptr = getenv("CTDB_NOSETSCHED");
+ if (ptr != NULL) {
+ realtime = false;
+ }
+
+ if (! realtime) {
+ return;
+ }
+
+ realtime = set_scheduler();
+ if (! realtime) {
+ fprintf(stderr,
+ "locking: Unable to set real-time scheduler priority\n");
+ }
+}
+
+static void reset_priority(void)
+{
+ if (realtime) {
+ reset_scheduler();
+ }
+}
+
+static void send_result(int fd, char result)
+{
+ sys_write(fd, &result, 1);
+ if (result == 1) {
+ exit(1);
+ }
+}
+
+
+static void usage(const char *progname)
+{
+ fprintf(stderr, "\n");
+ fprintf(stderr, "Usage: %s <ctdbd-pid> <output-fd> RECORD <db-path> <db-flags> <db-key>\n", progname);
+ fprintf(stderr, " %s <ctdbd-pid> <output-fd> DB <db-path> <db-flags>\n", progname);
+}
+
+static uint8_t *hex_decode_talloc(TALLOC_CTX *mem_ctx,
+ const char *hex_in, size_t *len)
+{
+ unsigned int i;
+ int num;
+ uint8_t *buffer;
+
+ *len = strlen(hex_in) / 2;
+ buffer = talloc_array(mem_ctx, unsigned char, *len);
+
+ for (i=0; i<*len; i++) {
+ sscanf(&hex_in[i*2], "%02X", &num);
+ buffer[i] = (uint8_t)num;
+ }
+
+ return buffer;
+}
+
+static int lock_record(const char *dbpath, const char *dbflags,
+ const char *dbkey, struct lock_state *state)
+{
+ int tdb_flags;
+
+ /* No error checking since CTDB always passes sane values */
+ tdb_flags = strtol(dbflags, NULL, 0);
+
+ /* Convert hex key to key */
+ if (strcmp(dbkey, "NULL") == 0) {
+ state->key.dptr = NULL;
+ state->key.dsize = 0;
+ } else {
+ state->key.dptr = hex_decode_talloc(NULL, dbkey,
+ &state->key.dsize);
+ }
+
+ state->tdb = tdb_open(dbpath, 0, tdb_flags, O_RDWR, 0600);
+ if (state->tdb == NULL) {
+ fprintf(stderr, "locking: Error opening database %s\n", dbpath);
+ return 1;
+ }
+
+ set_priority();
+
+ if (tdb_chainlock(state->tdb, state->key) < 0) {
+ fprintf(stderr, "locking: Error getting record lock (%s)\n",
+ tdb_errorstr(state->tdb));
+ return 1;
+ }
+
+ reset_priority();
+
+ return 0;
+
+}
+
+static int lock_db(const char *dbpath, const char *dbflags,
+ struct lock_state *state)
+{
+ int tdb_flags;
+
+ /* No error checking since CTDB always passes sane values */
+ tdb_flags = strtol(dbflags, NULL, 0);
+
+ state->tdb = tdb_open(dbpath, 0, tdb_flags, O_RDWR, 0600);
+ if (state->tdb == NULL) {
+ fprintf(stderr, "locking: Error opening database %s\n", dbpath);
+ return 1;
+ }
+
+ set_priority();
+
+ if (tdb_lockall(state->tdb) < 0) {
+ fprintf(stderr, "locking: Error getting db lock (%s)\n",
+ tdb_errorstr(state->tdb));
+ return 1;
+ }
+
+ reset_priority();
+
+ return 0;
+}
+
+struct wait_for_parent_state {
+ struct tevent_context *ev;
+ pid_t ppid;
+};
+
+static void wait_for_parent_check(struct tevent_req *subreq);
+
+static struct tevent_req *wait_for_parent_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ pid_t ppid)
+{
+ struct tevent_req *req, *subreq;
+ struct wait_for_parent_state *state;
+
+ req = tevent_req_create(mem_ctx, &state, struct wait_for_parent_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->ppid = ppid;
+
+ if (ppid == 1) {
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+ }
+
+ subreq = tevent_wakeup_send(state, ev,
+ tevent_timeval_current_ofs(5,0));
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, wait_for_parent_check, req);
+
+ return req;
+}
+
+static void wait_for_parent_check(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct wait_for_parent_state *state = tevent_req_data(
+ req, struct wait_for_parent_state);
+ bool status;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ /* Ignore error */
+ fprintf(stderr, "locking: tevent_wakeup_recv() failed\n");
+ }
+
+ if (kill(state->ppid, 0) == -1 && errno == ESRCH) {
+ tevent_req_done(req);
+ return;
+ }
+
+ subreq = tevent_wakeup_send(state, state->ev,
+ tevent_timeval_current_ofs(5,0));
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, wait_for_parent_check, req);
+}
+
+static bool wait_for_parent_recv(struct tevent_req *req, int *perr)
+{
+ if (tevent_req_is_unix_error(req, perr)) {
+ return false;
+ }
+
+ return true;
+}
+
+static void cleanup(struct lock_state *state)
+{
+ if (state->tdb != NULL) {
+ if (state->key.dsize == 0) {
+ tdb_unlockall(state->tdb);
+ } else {
+ tdb_chainunlock(state->tdb, state->key);
+ }
+ tdb_close(state->tdb);
+ }
+}
+
+static void signal_handler(struct tevent_context *ev,
+ struct tevent_signal *se,
+ int signum, int count, void *siginfo,
+ void *private_data)
+{
+ struct lock_state *state = (struct lock_state *)private_data;
+
+ cleanup(state);
+ exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+ struct tevent_context *ev;
+ struct tevent_signal *se;
+ struct tevent_req *req;
+ struct lock_state state = { 0 };
+ int write_fd;
+ char result = 0;
+ int ppid;
+ const char *lock_type;
+ bool status;
+ int err;
+
+ reset_scheduler();
+
+ if (argc < 4) {
+ usage(argv[0]);
+ exit(1);
+ }
+
+ ppid = atoi(argv[1]);
+ write_fd = atoi(argv[2]);
+ lock_type = argv[3];
+
+ ev = tevent_context_init(NULL);
+ if (ev == NULL) {
+ fprintf(stderr, "locking: tevent_context_init() failed\n");
+ exit(1);
+ }
+
+ se = tevent_add_signal(ev, ev, SIGTERM, 0,
+ signal_handler, &state);
+ if (se == NULL) {
+ fprintf(stderr, "locking: tevent_add_signal() failed\n");
+ talloc_free(ev);
+ exit(1);
+ }
+
+ if (strcmp(lock_type, "RECORD") == 0) {
+ if (argc != 7) {
+ fprintf(stderr,
+ "locking: Invalid number of arguments (%d)\n",
+ argc);
+ usage(argv[0]);
+ exit(1);
+ }
+ result = lock_record(argv[4], argv[5], argv[6], &state);
+
+ } else if (strcmp(lock_type, "DB") == 0) {
+ if (argc != 6) {
+ fprintf(stderr,
+ "locking: Invalid number of arguments (%d)\n",
+ argc);
+ usage(argv[0]);
+ exit(1);
+ }
+ result = lock_db(argv[4], argv[5], &state);
+
+ } else {
+ fprintf(stderr, "locking: Invalid lock-type '%s'\n", lock_type);
+ usage(argv[0]);
+ exit(1);
+ }
+
+ send_result(write_fd, result);
+
+ req = wait_for_parent_send(ev, ev, ppid);
+ if (req == NULL) {
+ fprintf(stderr, "locking: wait_for_parent_send() failed\n");
+ cleanup(&state);
+ exit(1);
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = wait_for_parent_recv(req, &err);
+ if (! status) {
+ fprintf(stderr,
+ "locking: wait_for_parent_recv() failed (%d)\n",
+ err);
+ }
+
+ talloc_free(ev);
+ cleanup(&state);
+ return 0;
+}
diff --git a/ctdb/server/ctdb_logging.c b/ctdb/server/ctdb_logging.c
new file mode 100644
index 0000000..1da26b5
--- /dev/null
+++ b/ctdb/server/ctdb_logging.c
@@ -0,0 +1,174 @@
+/*
+ ctdb logging code
+
+ Copyright (C) Andrew Tridgell 2008
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/network.h"
+#include "system/time.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/dlinklist.h"
+#include "lib/util/debug.h"
+#include "lib/util/blocking.h"
+#include "lib/util/sys_rw.h"
+#include "lib/util/time.h"
+
+#include "ctdb_private.h"
+#include "ctdb_client.h"
+
+#include "common/common.h"
+#include "common/logging.h"
+
+struct ctdb_log_state {
+ int fd, pfd;
+ char buf[1024];
+ uint16_t buf_used;
+};
+
+/* Used by ctdb_set_child_logging() */
+static struct ctdb_log_state *log_state;
+
+/* Initialise logging */
+bool ctdb_logging_init(TALLOC_CTX *mem_ctx, const char *logging,
+ const char *debug_level)
+{
+ int ret;
+
+ log_state = talloc_zero(mem_ctx, struct ctdb_log_state);
+ if (log_state == NULL) {
+ return false;
+ }
+
+ ret = logging_init(mem_ctx, logging, debug_level, "ctdbd");
+ if (ret != 0) {
+ return false;
+ }
+
+ return true;
+}
+
+static void write_to_log(const char *buf, unsigned int len)
+{
+ DEBUG(script_log_level, ("%*.*s\n", len, len, buf));
+}
+
+/*
+ called when log data comes in from a child process
+ */
+static void ctdb_child_log_handler(struct tevent_context *ev,
+ struct tevent_fd *fde,
+ uint16_t flags, void *private)
+{
+ struct ctdb_log_state *log = talloc_get_type(private, struct ctdb_log_state);
+ char *p;
+ int n;
+
+ if (!(flags & TEVENT_FD_READ)) {
+ return;
+ }
+
+ n = sys_read(log->pfd, &log->buf[log->buf_used],
+ sizeof(log->buf) - log->buf_used);
+ if (n > 0) {
+ log->buf_used += n;
+ } else if (n == 0) {
+ if (log != log_state) {
+ talloc_free(log);
+ }
+ return;
+ }
+
+ while (log->buf_used > 0 &&
+ (p = memchr(log->buf, '\n', log->buf_used)) != NULL) {
+ int n1 = (p - log->buf)+1;
+ int n2 = n1 - 1;
+ /* swallow \r from child processes */
+ if (n2 > 0 && log->buf[n2-1] == '\r') {
+ n2--;
+ }
+ write_to_log(log->buf, n2);
+ memmove(log->buf, p+1, sizeof(log->buf) - n1);
+ log->buf_used -= n1;
+ }
+
+ /* the buffer could have completely filled - unfortunately we have
+ no choice but to dump it out straight away */
+ if (log->buf_used == sizeof(log->buf)) {
+ write_to_log(log->buf, log->buf_used);
+ log->buf_used = 0;
+ }
+}
+
+/*
+ setup for logging of child process stdout
+*/
+int ctdb_set_child_logging(struct ctdb_context *ctdb)
+{
+ int p[2];
+ int old_stdout, old_stderr;
+ struct tevent_fd *fde;
+
+ /* setup a pipe to catch IO from subprocesses */
+ if (pipe(p) != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to setup for child logging pipe\n"));
+ return -1;
+ }
+
+ /* We'll fail if stderr/stdout not already open; it's simpler. */
+ old_stdout = dup(STDOUT_FILENO);
+ if (old_stdout < 0) {
+ DEBUG(DEBUG_ERR, ("Failed to dup stdout for child logging\n"));
+ return -1;
+ }
+ old_stderr = dup(STDERR_FILENO);
+ if (old_stderr < 0) {
+ DEBUG(DEBUG_ERR, ("Failed to dup stderr for child logging\n"));
+ close(old_stdout);
+ return -1;
+ }
+ if (dup2(p[1], STDOUT_FILENO) < 0 || dup2(p[1], STDERR_FILENO) < 0) {
+ int saved_errno = errno;
+ dup2(old_stdout, STDOUT_FILENO);
+ dup2(old_stderr, STDERR_FILENO);
+ close(old_stdout);
+ close(old_stderr);
+ close(p[0]);
+ close(p[1]);
+ errno = saved_errno;
+
+ printf(__location__ " dup2 failed: %s\n",
+ strerror(errno));
+ return -1;
+ }
+ close(p[1]);
+ close(old_stdout);
+ close(old_stderr);
+
+ fde = tevent_add_fd(ctdb->ev, log_state, p[0], TEVENT_FD_READ,
+ ctdb_child_log_handler, log_state);
+ tevent_fd_set_auto_close(fde);
+
+ log_state->pfd = p[0];
+
+ DEBUG(DEBUG_DEBUG, (__location__ " Created PIPE FD:%d for logging\n", p[0]));
+
+ return 0;
+}
diff --git a/ctdb/server/ctdb_ltdb_server.c b/ctdb/server/ctdb_ltdb_server.c
new file mode 100644
index 0000000..e2cb916
--- /dev/null
+++ b/ctdb/server/ctdb_ltdb_server.c
@@ -0,0 +1,1663 @@
+/*
+ ctdb ltdb code - server side
+
+ Copyright (C) Andrew Tridgell 2007
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+#include "system/dir.h"
+#include "system/time.h"
+#include "system/locale.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/tdb_wrap/tdb_wrap.h"
+#include "lib/util/dlinklist.h"
+#include "lib/util/debug.h"
+#include "lib/util/samba_util.h"
+
+#include "ctdb_private.h"
+#include "ctdb_client.h"
+
+#include "common/rb_tree.h"
+#include "common/reqid.h"
+#include "common/system.h"
+#include "common/common.h"
+#include "common/logging.h"
+
+#include "server/ctdb_config.h"
+
+#define PERSISTENT_HEALTH_TDB "persistent_health.tdb"
+
+/**
+ * write a record to a normal database
+ *
+ * This is the server-variant of the ctdb_ltdb_store function.
+ * It contains logic to determine whether a record should be
+ * stored or deleted. It also sends SCHEDULE_FOR_DELETION
+ * controls to the local ctdb daemon if appropriate.
+ */
+static int ctdb_ltdb_store_server(struct ctdb_db_context *ctdb_db,
+ TDB_DATA key,
+ struct ctdb_ltdb_header *header,
+ TDB_DATA data)
+{
+ struct ctdb_context *ctdb = ctdb_db->ctdb;
+ TDB_DATA rec[2];
+ uint32_t hsize = sizeof(struct ctdb_ltdb_header);
+ int ret;
+ bool keep = false;
+ bool schedule_for_deletion = false;
+ bool remove_from_delete_queue = false;
+ uint32_t lmaster;
+
+ if (ctdb->flags & CTDB_FLAG_TORTURE) {
+ TDB_DATA old;
+ struct ctdb_ltdb_header *h2;
+
+ old = tdb_fetch(ctdb_db->ltdb->tdb, key);
+ h2 = (struct ctdb_ltdb_header *)old.dptr;
+ if (old.dptr != NULL &&
+ old.dsize >= hsize &&
+ h2->rsn > header->rsn) {
+ DEBUG(DEBUG_ERR,
+ ("RSN regression! %"PRIu64" %"PRIu64"\n",
+ h2->rsn, header->rsn));
+ }
+ if (old.dptr) {
+ free(old.dptr);
+ }
+ }
+
+ if (ctdb->vnn_map == NULL) {
+ /*
+ * Called from a client: always store the record
+ * Also don't call ctdb_lmaster since it uses the vnn_map!
+ */
+ keep = true;
+ goto store;
+ }
+
+ lmaster = ctdb_lmaster(ctdb_db->ctdb, &key);
+
+ /*
+ * If we migrate an empty record off to another node
+ * and the record has not been migrated with data,
+ * delete the record instead of storing the empty record.
+ */
+ if (data.dsize != 0) {
+ keep = true;
+ } else if (header->flags & CTDB_REC_RO_FLAGS) {
+ keep = true;
+ } else if (header->flags & CTDB_REC_FLAG_AUTOMATIC) {
+ /*
+ * The record is not created by the client but
+ * automatically by the ctdb_ltdb_fetch logic that
+ * creates a record with an initial header in the
+ * ltdb before trying to migrate the record from
+ * the current lmaster. Keep it instead of trying
+ * to delete the non-existing record...
+ */
+ keep = true;
+ schedule_for_deletion = true;
+ } else if (header->flags & CTDB_REC_FLAG_MIGRATED_WITH_DATA) {
+ keep = true;
+ } else if (ctdb_db->ctdb->pnn == lmaster) {
+ /*
+ * If we are lmaster, then we usually keep the record.
+ * But if we retrieve the dmaster role by a VACUUM_MIGRATE
+ * and the record is empty and has never been migrated
+ * with data, then we should delete it instead of storing it.
+ * This is part of the vacuuming process.
+ *
+ * The reason that we usually need to store even empty records
+ * on the lmaster is that a client operating directly on the
+ * lmaster (== dmaster) expects the local copy of the record to
+ * exist after successful ctdb migrate call. If the record does
+ * not exist, the client goes into a migrate loop and eventually
+ * fails. So storing the empty record makes sure that we do not
+ * need to change the client code.
+ */
+ if (!(header->flags & CTDB_REC_FLAG_VACUUM_MIGRATED)) {
+ keep = true;
+ } else if (ctdb_db->ctdb->pnn != header->dmaster) {
+ keep = true;
+ }
+ } else if (ctdb_db->ctdb->pnn == header->dmaster) {
+ keep = true;
+ }
+
+ if (keep) {
+ if (ctdb_db_volatile(ctdb_db) &&
+ (ctdb_db->ctdb->pnn == header->dmaster) &&
+ !(header->flags & CTDB_REC_RO_FLAGS))
+ {
+ header->rsn++;
+
+ if (data.dsize == 0) {
+ schedule_for_deletion = true;
+ }
+ }
+ remove_from_delete_queue = !schedule_for_deletion;
+ }
+
+store:
+ /*
+ * The VACUUM_MIGRATED flag is only set temporarily for
+ * the above logic when the record was retrieved by a
+ * VACUUM_MIGRATE call and should not be stored in the
+ * database.
+ *
+ * The VACUUM_MIGRATE call is triggered by a vacuum fetch,
+ * and there are two cases in which the corresponding record
+ * is stored in the local database:
+ * 1. The record has been migrated with data in the past
+ * (the MIGRATED_WITH_DATA record flag is set).
+ * 2. The record has been filled with data again since it
+ * had been submitted in the VACUUM_FETCH message to the
+ * lmaster.
+ * For such records it is important to not store the
+ * VACUUM_MIGRATED flag in the database.
+ */
+ header->flags &= ~CTDB_REC_FLAG_VACUUM_MIGRATED;
+
+ /*
+ * Similarly, clear the AUTOMATIC flag which should not enter
+ * the local database copy since this would require client
+ * modifications to clear the flag when the client stores
+ * the record.
+ */
+ header->flags &= ~CTDB_REC_FLAG_AUTOMATIC;
+
+ rec[0].dsize = hsize;
+ rec[0].dptr = (uint8_t *)header;
+
+ rec[1].dsize = data.dsize;
+ rec[1].dptr = data.dptr;
+
+ DEBUG(DEBUG_DEBUG, (__location__ " db[%s]: %s record: hash[0x%08x]\n",
+ ctdb_db->db_name,
+ keep?"storing":"deleting",
+ ctdb_hash(&key)));
+
+ if (keep) {
+ ret = tdb_storev(ctdb_db->ltdb->tdb, key, rec, 2, TDB_REPLACE);
+ } else {
+ ret = tdb_delete(ctdb_db->ltdb->tdb, key);
+ }
+
+ if (ret != 0) {
+ int lvl = DEBUG_ERR;
+
+ if (keep == false &&
+ tdb_error(ctdb_db->ltdb->tdb) == TDB_ERR_NOEXIST)
+ {
+ lvl = DEBUG_DEBUG;
+ }
+
+ DEBUG(lvl, (__location__ " db[%s]: Failed to %s record: "
+ "%d - %s\n",
+ ctdb_db->db_name,
+ keep?"store":"delete", ret,
+ tdb_errorstr(ctdb_db->ltdb->tdb)));
+
+ schedule_for_deletion = false;
+ remove_from_delete_queue = false;
+ }
+
+ if (schedule_for_deletion) {
+ int ret2;
+ ret2 = ctdb_local_schedule_for_deletion(ctdb_db, header, key);
+ if (ret2 != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " ctdb_local_schedule_for_deletion failed.\n"));
+ }
+ }
+
+ if (remove_from_delete_queue) {
+ ctdb_local_remove_from_delete_queue(ctdb_db, header, key);
+ }
+
+ return ret;
+}
+
+struct lock_fetch_state {
+ struct ctdb_context *ctdb;
+ struct ctdb_db_context *ctdb_db;
+ void (*recv_pkt)(void *, struct ctdb_req_header *);
+ void *recv_context;
+ struct ctdb_req_header *hdr;
+ uint32_t generation;
+ bool ignore_generation;
+};
+
+/*
+ called when we should retry the operation
+ */
+static void lock_fetch_callback(void *p, bool locked)
+{
+ struct lock_fetch_state *state = talloc_get_type(p, struct lock_fetch_state);
+ if (!state->ignore_generation &&
+ state->generation != state->ctdb_db->generation) {
+ DEBUG(DEBUG_NOTICE,("Discarding previous generation lockwait packet\n"));
+ talloc_free(state->hdr);
+ return;
+ }
+ state->recv_pkt(state->recv_context, state->hdr);
+ DEBUG(DEBUG_INFO,(__location__ " PACKET REQUEUED\n"));
+}
+
+
+/*
+ do a non-blocking ltdb_lock, deferring this ctdb request until we
+ have the chainlock
+
+ It does the following:
+
+ 1) tries to get the chainlock. If it succeeds, then it returns 0
+
+ 2) if it fails to get a chainlock immediately then it sets up a
+ non-blocking chainlock via ctdb_lock_record, and when it gets the
+ chainlock it re-submits this ctdb request to the main packet
+ receive function.
+
+ This effectively queues all ctdb requests that cannot be
+ immediately satisfied until it can get the lock. This means that
+ the main ctdb daemon will not block waiting for a chainlock held by
+ a client
+
+ There are 3 possible return values:
+
+ 0: means that it got the lock immediately.
+ -1: means that it failed to get the lock, and won't retry
+ -2: means that it failed to get the lock immediately, but will retry
+ */
+int ctdb_ltdb_lock_requeue(struct ctdb_db_context *ctdb_db,
+ TDB_DATA key, struct ctdb_req_header *hdr,
+ void (*recv_pkt)(void *, struct ctdb_req_header *),
+ void *recv_context, bool ignore_generation)
+{
+ int ret;
+ struct tdb_context *tdb = ctdb_db->ltdb->tdb;
+ struct lock_request *lreq;
+ struct lock_fetch_state *state;
+
+ ret = tdb_chainlock_nonblock(tdb, key);
+
+ if (ret != 0 &&
+ !(errno == EACCES || errno == EAGAIN || errno == EDEADLK)) {
+ /* a hard failure - don't try again */
+ return -1;
+ }
+
+ /* when torturing, ensure we test the contended path */
+ if ((ctdb_db->ctdb->flags & CTDB_FLAG_TORTURE) &&
+ random() % 5 == 0) {
+ ret = -1;
+ tdb_chainunlock(tdb, key);
+ }
+
+ /* first the non-contended path */
+ if (ret == 0) {
+ return 0;
+ }
+
+ state = talloc(hdr, struct lock_fetch_state);
+ state->ctdb = ctdb_db->ctdb;
+ state->ctdb_db = ctdb_db;
+ state->hdr = hdr;
+ state->recv_pkt = recv_pkt;
+ state->recv_context = recv_context;
+ state->generation = ctdb_db->generation;
+ state->ignore_generation = ignore_generation;
+
+ /* now the contended path */
+ lreq = ctdb_lock_record(state, ctdb_db, key, true, lock_fetch_callback, state);
+ if (lreq == NULL) {
+ return -1;
+ }
+
+ /* we need to move the packet off the temporary context in ctdb_input_pkt(),
+ so it won't be freed yet */
+ talloc_steal(state, hdr);
+
+ /* now tell the caller than we will retry asynchronously */
+ return -2;
+}
+
+/*
+ a variant of ctdb_ltdb_lock_requeue that also fetches the record
+ */
+int ctdb_ltdb_lock_fetch_requeue(struct ctdb_db_context *ctdb_db,
+ TDB_DATA key, struct ctdb_ltdb_header *header,
+ struct ctdb_req_header *hdr, TDB_DATA *data,
+ void (*recv_pkt)(void *, struct ctdb_req_header *),
+ void *recv_context, bool ignore_generation)
+{
+ int ret;
+
+ ret = ctdb_ltdb_lock_requeue(ctdb_db, key, hdr, recv_pkt,
+ recv_context, ignore_generation);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ret = ctdb_ltdb_fetch(ctdb_db, key, header, hdr, data);
+ if (ret != 0) {
+ int uret;
+ uret = ctdb_ltdb_unlock(ctdb_db, key);
+ if (uret != 0) {
+ DBG_ERR("ctdb_ltdb_unlock() failed with error %d\n",
+ uret);
+ }
+ }
+ return ret;
+}
+
+
+/*
+ paranoid check to see if the db is empty
+ */
+static void ctdb_check_db_empty(struct ctdb_db_context *ctdb_db)
+{
+ struct tdb_context *tdb = ctdb_db->ltdb->tdb;
+ int count = tdb_traverse_read(tdb, NULL, NULL);
+ if (count != 0) {
+ DEBUG(DEBUG_ALERT,(__location__ " tdb '%s' not empty on attach! aborting\n",
+ ctdb_db->db_path));
+ ctdb_fatal(ctdb_db->ctdb, "database not empty on attach");
+ }
+}
+
+int ctdb_load_persistent_health(struct ctdb_context *ctdb,
+ struct ctdb_db_context *ctdb_db)
+{
+ struct tdb_context *tdb = ctdb->db_persistent_health->tdb;
+ char *old;
+ char *reason = NULL;
+ TDB_DATA key;
+ TDB_DATA val;
+
+ key.dptr = discard_const_p(uint8_t, ctdb_db->db_name);
+ key.dsize = strlen(ctdb_db->db_name);
+
+ old = ctdb_db->unhealthy_reason;
+ ctdb_db->unhealthy_reason = NULL;
+
+ val = tdb_fetch(tdb, key);
+ if (val.dsize > 0) {
+ reason = talloc_strndup(ctdb_db,
+ (const char *)val.dptr,
+ val.dsize);
+ if (reason == NULL) {
+ DEBUG(DEBUG_ALERT,(__location__ " talloc_strndup(%d) failed\n",
+ (int)val.dsize));
+ ctdb_db->unhealthy_reason = old;
+ free(val.dptr);
+ return -1;
+ }
+ }
+
+ if (val.dptr) {
+ free(val.dptr);
+ }
+
+ talloc_free(old);
+ ctdb_db->unhealthy_reason = reason;
+ return 0;
+}
+
+int ctdb_update_persistent_health(struct ctdb_context *ctdb,
+ struct ctdb_db_context *ctdb_db,
+ const char *given_reason,/* NULL means healthy */
+ unsigned int num_healthy_nodes)
+{
+ struct tdb_context *tdb = ctdb->db_persistent_health->tdb;
+ int ret;
+ TDB_DATA key;
+ TDB_DATA val;
+ char *new_reason = NULL;
+ char *old_reason = NULL;
+
+ ret = tdb_transaction_start(tdb);
+ if (ret != 0) {
+ DEBUG(DEBUG_ALERT,(__location__ " tdb_transaction_start('%s') failed: %d - %s\n",
+ tdb_name(tdb), ret, tdb_errorstr(tdb)));
+ return -1;
+ }
+
+ ret = ctdb_load_persistent_health(ctdb, ctdb_db);
+ if (ret != 0) {
+ DEBUG(DEBUG_ALERT,(__location__ " ctdb_load_persistent_health('%s') failed: %d\n",
+ ctdb_db->db_name, ret));
+ return -1;
+ }
+ old_reason = ctdb_db->unhealthy_reason;
+
+ key.dptr = discard_const_p(uint8_t, ctdb_db->db_name);
+ key.dsize = strlen(ctdb_db->db_name);
+
+ if (given_reason) {
+ new_reason = talloc_strdup(ctdb_db, given_reason);
+ if (new_reason == NULL) {
+ DEBUG(DEBUG_ALERT,(__location__ " talloc_strdup(%s) failed\n",
+ given_reason));
+ return -1;
+ }
+ } else if (old_reason && num_healthy_nodes == 0) {
+ /*
+ * If the reason indicates ok, but there were no healthy nodes
+ * available, it means that we have not recovered valid content
+ * of the db. So if there's an old reason, prefix it with
+ * "NO-HEALTHY-NODES - "
+ */
+ const char *prefix;
+
+#define _TMP_PREFIX "NO-HEALTHY-NODES - "
+ ret = strncmp(_TMP_PREFIX, old_reason, strlen(_TMP_PREFIX));
+ if (ret != 0) {
+ prefix = _TMP_PREFIX;
+ } else {
+ prefix = "";
+ }
+ new_reason = talloc_asprintf(ctdb_db, "%s%s",
+ prefix, old_reason);
+ if (new_reason == NULL) {
+ DEBUG(DEBUG_ALERT,(__location__ " talloc_asprintf(%s%s) failed\n",
+ prefix, old_reason));
+ return -1;
+ }
+#undef _TMP_PREFIX
+ }
+
+ if (new_reason) {
+ val.dptr = discard_const_p(uint8_t, new_reason);
+ val.dsize = strlen(new_reason);
+
+ ret = tdb_store(tdb, key, val, TDB_REPLACE);
+ if (ret != 0) {
+ tdb_transaction_cancel(tdb);
+ DEBUG(DEBUG_ALERT,(__location__ " tdb_store('%s', %s, %s) failed: %d - %s\n",
+ tdb_name(tdb), ctdb_db->db_name, new_reason,
+ ret, tdb_errorstr(tdb)));
+ talloc_free(new_reason);
+ return -1;
+ }
+ DEBUG(DEBUG_ALERT,("Updated db health for db(%s) to: %s\n",
+ ctdb_db->db_name, new_reason));
+ } else if (old_reason) {
+ ret = tdb_delete(tdb, key);
+ if (ret != 0) {
+ tdb_transaction_cancel(tdb);
+ DEBUG(DEBUG_ALERT,(__location__ " tdb_delete('%s', %s) failed: %d - %s\n",
+ tdb_name(tdb), ctdb_db->db_name,
+ ret, tdb_errorstr(tdb)));
+ talloc_free(new_reason);
+ return -1;
+ }
+ DEBUG(DEBUG_NOTICE,("Updated db health for db(%s): OK\n",
+ ctdb_db->db_name));
+ }
+
+ ret = tdb_transaction_commit(tdb);
+ if (ret != TDB_SUCCESS) {
+ DEBUG(DEBUG_ALERT,(__location__ " tdb_transaction_commit('%s') failed: %d - %s\n",
+ tdb_name(tdb), ret, tdb_errorstr(tdb)));
+ talloc_free(new_reason);
+ return -1;
+ }
+
+ talloc_free(old_reason);
+ ctdb_db->unhealthy_reason = new_reason;
+
+ return 0;
+}
+
+static int ctdb_backup_corrupted_tdb(struct ctdb_context *ctdb,
+ struct ctdb_db_context *ctdb_db)
+{
+ time_t now = time(NULL);
+ char *new_path;
+ char *new_reason;
+ int ret;
+ struct tm *tm;
+
+ tm = gmtime(&now);
+
+ /* formatted like: foo.tdb.0.corrupted.20091204160825.0Z */
+ new_path = talloc_asprintf(ctdb_db, "%s.corrupted."
+ "%04u%02u%02u%02u%02u%02u.0Z",
+ ctdb_db->db_path,
+ tm->tm_year+1900, tm->tm_mon+1,
+ tm->tm_mday, tm->tm_hour, tm->tm_min,
+ tm->tm_sec);
+ if (new_path == NULL) {
+ DEBUG(DEBUG_CRIT,(__location__ " talloc_asprintf() failed\n"));
+ return -1;
+ }
+
+ new_reason = talloc_asprintf(ctdb_db,
+ "ERROR - Backup of corrupted TDB in '%s'",
+ new_path);
+ if (new_reason == NULL) {
+ DEBUG(DEBUG_CRIT,(__location__ " talloc_asprintf() failed\n"));
+ return -1;
+ }
+ ret = ctdb_update_persistent_health(ctdb, ctdb_db, new_reason, 0);
+ talloc_free(new_reason);
+ if (ret != 0) {
+ DEBUG(DEBUG_CRIT,(__location__
+ ": ctdb_backup_corrupted_tdb(%s) not implemented yet\n",
+ ctdb_db->db_path));
+ return -1;
+ }
+
+ ret = rename(ctdb_db->db_path, new_path);
+ if (ret != 0) {
+ DEBUG(DEBUG_CRIT,(__location__
+ ": ctdb_backup_corrupted_tdb(%s) rename to %s failed: %d - %s\n",
+ ctdb_db->db_path, new_path,
+ errno, strerror(errno)));
+ talloc_free(new_path);
+ return -1;
+ }
+
+ DEBUG(DEBUG_CRIT,(__location__
+ ": ctdb_backup_corrupted_tdb(%s) renamed to %s\n",
+ ctdb_db->db_path, new_path));
+ talloc_free(new_path);
+ return 0;
+}
+
+int ctdb_recheck_persistent_health(struct ctdb_context *ctdb)
+{
+ struct ctdb_db_context *ctdb_db;
+ int ret;
+ int ok = 0;
+ int fail = 0;
+
+ for (ctdb_db = ctdb->db_list; ctdb_db; ctdb_db = ctdb_db->next) {
+ if (!ctdb_db_persistent(ctdb_db)) {
+ continue;
+ }
+
+ ret = ctdb_load_persistent_health(ctdb, ctdb_db);
+ if (ret != 0) {
+ DEBUG(DEBUG_ALERT,(__location__
+ " load persistent health for '%s' failed\n",
+ ctdb_db->db_path));
+ return -1;
+ }
+
+ if (ctdb_db->unhealthy_reason == NULL) {
+ ok++;
+ DEBUG(DEBUG_INFO,(__location__
+ " persistent db '%s' healthy\n",
+ ctdb_db->db_path));
+ continue;
+ }
+
+ fail++;
+ DEBUG(DEBUG_ALERT,(__location__
+ " persistent db '%s' unhealthy: %s\n",
+ ctdb_db->db_path,
+ ctdb_db->unhealthy_reason));
+ }
+ DEBUG(DEBUG_NOTICE,
+ ("ctdb_recheck_persistent_health: OK[%d] FAIL[%d]\n",
+ ok, fail));
+
+ if (fail != 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+
+/*
+ mark a database - as healthy
+ */
+int32_t ctdb_control_db_set_healthy(struct ctdb_context *ctdb, TDB_DATA indata)
+{
+ uint32_t db_id = *(uint32_t *)indata.dptr;
+ struct ctdb_db_context *ctdb_db;
+ int ret;
+ bool may_recover = false;
+
+ ctdb_db = find_ctdb_db(ctdb, db_id);
+ if (!ctdb_db) {
+ DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%x\n", db_id));
+ return -1;
+ }
+
+ if (ctdb_db->unhealthy_reason) {
+ may_recover = true;
+ }
+
+ ret = ctdb_update_persistent_health(ctdb, ctdb_db, NULL, 1);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__
+ " ctdb_update_persistent_health(%s) failed\n",
+ ctdb_db->db_name));
+ return -1;
+ }
+
+ if (may_recover && ctdb->runstate == CTDB_RUNSTATE_STARTUP) {
+ DEBUG(DEBUG_ERR, (__location__ " db %s become healthy - force recovery for startup\n",
+ ctdb_db->db_name));
+ ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
+ }
+
+ return 0;
+}
+
+int32_t ctdb_control_db_get_health(struct ctdb_context *ctdb,
+ TDB_DATA indata,
+ TDB_DATA *outdata)
+{
+ uint32_t db_id = *(uint32_t *)indata.dptr;
+ struct ctdb_db_context *ctdb_db;
+ int ret;
+
+ ctdb_db = find_ctdb_db(ctdb, db_id);
+ if (!ctdb_db) {
+ DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%x\n", db_id));
+ return -1;
+ }
+
+ ret = ctdb_load_persistent_health(ctdb, ctdb_db);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__
+ " ctdb_load_persistent_health(%s) failed\n",
+ ctdb_db->db_name));
+ return -1;
+ }
+
+ *outdata = tdb_null;
+ if (ctdb_db->unhealthy_reason) {
+ outdata->dptr = (uint8_t *)ctdb_db->unhealthy_reason;
+ outdata->dsize = strlen(ctdb_db->unhealthy_reason)+1;
+ }
+
+ return 0;
+}
+
+
+int ctdb_set_db_readonly(struct ctdb_context *ctdb, struct ctdb_db_context *ctdb_db)
+{
+ char *ropath;
+
+ if (ctdb_db_readonly(ctdb_db)) {
+ return 0;
+ }
+
+ if (! ctdb_db_volatile(ctdb_db)) {
+ DEBUG(DEBUG_ERR,
+ ("Non-volatile databases do not support readonly flag\n"));
+ return -1;
+ }
+
+ ropath = talloc_asprintf(ctdb_db, "%s.RO", ctdb_db->db_path);
+ if (ropath == NULL) {
+ DEBUG(DEBUG_CRIT,("Failed to asprintf the tracking database\n"));
+ return -1;
+ }
+ ctdb_db->rottdb = tdb_open(ropath,
+ ctdb->tunable.database_hash_size,
+ TDB_NOLOCK|TDB_CLEAR_IF_FIRST|TDB_NOSYNC,
+ O_CREAT|O_RDWR, 0600);
+ if (ctdb_db->rottdb == NULL) {
+ DEBUG(DEBUG_CRIT,("Failed to open/create the tracking database '%s'\n", ropath));
+ talloc_free(ropath);
+ return -1;
+ }
+
+ DEBUG(DEBUG_NOTICE,("OPENED tracking database : '%s'\n", ropath));
+
+ ctdb_db_set_readonly(ctdb_db);
+
+ DEBUG(DEBUG_NOTICE, ("Readonly property set on DB %s\n", ctdb_db->db_name));
+
+ talloc_free(ropath);
+ return 0;
+}
+
+/*
+ attach to a database, handling both persistent and non-persistent databases
+ return 0 on success, -1 on failure
+ */
+static int ctdb_local_attach(struct ctdb_context *ctdb, const char *db_name,
+ uint8_t db_flags, const char *unhealthy_reason)
+{
+ struct ctdb_db_context *ctdb_db, *tmp_db;
+ int ret;
+ struct TDB_DATA key;
+ int tdb_flags;
+ int mode = 0600;
+ int remaining_tries = 0;
+
+ ctdb_db = talloc_zero(ctdb, struct ctdb_db_context);
+ CTDB_NO_MEMORY(ctdb, ctdb_db);
+
+ ctdb_db->ctdb = ctdb;
+ ctdb_db->db_name = talloc_strdup(ctdb_db, db_name);
+ CTDB_NO_MEMORY(ctdb, ctdb_db->db_name);
+
+ key.dsize = strlen(db_name)+1;
+ key.dptr = discard_const(db_name);
+ ctdb_db->db_id = ctdb_hash(&key);
+ ctdb_db->db_flags = db_flags;
+
+ if (ctdb_db_volatile(ctdb_db)) {
+ ctdb_db->delete_queue = trbt_create(ctdb_db, 0);
+ if (ctdb_db->delete_queue == NULL) {
+ CTDB_NO_MEMORY(ctdb, ctdb_db->delete_queue);
+ }
+
+ ctdb_db->fetch_queue = trbt_create(ctdb_db, 0);
+ if (ctdb_db->fetch_queue == NULL) {
+ CTDB_NO_MEMORY(ctdb, ctdb_db->fetch_queue);
+ }
+
+ ctdb_db->ctdb_ltdb_store_fn = ctdb_ltdb_store_server;
+ }
+
+ /* check for hash collisions */
+ for (tmp_db=ctdb->db_list;tmp_db;tmp_db=tmp_db->next) {
+ if (tmp_db->db_id == ctdb_db->db_id) {
+ DEBUG(DEBUG_CRIT,("db_id 0x%x hash collision. name1='%s' name2='%s'\n",
+ tmp_db->db_id, db_name, tmp_db->db_name));
+ talloc_free(ctdb_db);
+ return -1;
+ }
+ }
+
+ if (ctdb_db_persistent(ctdb_db)) {
+ if (unhealthy_reason) {
+ ret = ctdb_update_persistent_health(ctdb, ctdb_db,
+ unhealthy_reason, 0);
+ if (ret != 0) {
+ DEBUG(DEBUG_ALERT,(__location__ " ctdb_update_persistent_health('%s','%s') failed: %d\n",
+ ctdb_db->db_name, unhealthy_reason, ret));
+ talloc_free(ctdb_db);
+ return -1;
+ }
+ }
+
+ if (ctdb->max_persistent_check_errors > 0) {
+ remaining_tries = 1;
+ }
+ if (ctdb->runstate == CTDB_RUNSTATE_RUNNING) {
+ remaining_tries = 0;
+ }
+
+ ret = ctdb_load_persistent_health(ctdb, ctdb_db);
+ if (ret != 0) {
+ DEBUG(DEBUG_ALERT,(__location__ " ctdb_load_persistent_health('%s') failed: %d\n",
+ ctdb_db->db_name, ret));
+ talloc_free(ctdb_db);
+ return -1;
+ }
+ }
+
+ if (ctdb_db->unhealthy_reason && remaining_tries == 0) {
+ DEBUG(DEBUG_ALERT,(__location__ "ERROR: tdb %s is marked as unhealthy: %s\n",
+ ctdb_db->db_name, ctdb_db->unhealthy_reason));
+ talloc_free(ctdb_db);
+ return -1;
+ }
+
+ if (ctdb_db->unhealthy_reason) {
+ /* this is just a warning, but we want that in the log file! */
+ DEBUG(DEBUG_ALERT,(__location__ "Warning: tdb %s is marked as unhealthy: %s\n",
+ ctdb_db->db_name, ctdb_db->unhealthy_reason));
+ }
+
+ /* open the database */
+ ctdb_db->db_path = talloc_asprintf(ctdb_db, "%s/%s.%u",
+ ctdb_db_persistent(ctdb_db) ?
+ ctdb->db_directory_persistent :
+ ctdb->db_directory,
+ db_name, ctdb->pnn);
+
+ tdb_flags = ctdb_db_tdb_flags(db_flags,
+ ctdb->valgrinding,
+ ctdb_config.tdb_mutexes);
+
+again:
+ ctdb_db->ltdb = tdb_wrap_open(ctdb_db, ctdb_db->db_path,
+ ctdb->tunable.database_hash_size,
+ tdb_flags,
+ O_CREAT|O_RDWR, mode);
+ if (ctdb_db->ltdb == NULL) {
+ struct stat st;
+ int saved_errno = errno;
+
+ if (! ctdb_db_persistent(ctdb_db)) {
+ DEBUG(DEBUG_CRIT,("Failed to open tdb '%s': %d - %s\n",
+ ctdb_db->db_path,
+ saved_errno,
+ strerror(saved_errno)));
+ talloc_free(ctdb_db);
+ return -1;
+ }
+
+ if (remaining_tries == 0) {
+ DEBUG(DEBUG_CRIT,(__location__
+ "Failed to open persistent tdb '%s': %d - %s\n",
+ ctdb_db->db_path,
+ saved_errno,
+ strerror(saved_errno)));
+ talloc_free(ctdb_db);
+ return -1;
+ }
+
+ ret = stat(ctdb_db->db_path, &st);
+ if (ret != 0) {
+ DEBUG(DEBUG_CRIT,(__location__
+ "Failed to open persistent tdb '%s': %d - %s\n",
+ ctdb_db->db_path,
+ saved_errno,
+ strerror(saved_errno)));
+ talloc_free(ctdb_db);
+ return -1;
+ }
+
+ ret = ctdb_backup_corrupted_tdb(ctdb, ctdb_db);
+ if (ret != 0) {
+ DEBUG(DEBUG_CRIT,(__location__
+ "Failed to open persistent tdb '%s': %d - %s\n",
+ ctdb_db->db_path,
+ saved_errno,
+ strerror(saved_errno)));
+ talloc_free(ctdb_db);
+ return -1;
+ }
+
+ remaining_tries--;
+ mode = st.st_mode;
+ goto again;
+ }
+
+ if (!ctdb_db_persistent(ctdb_db)) {
+ ctdb_check_db_empty(ctdb_db);
+ } else {
+ ret = tdb_check(ctdb_db->ltdb->tdb, NULL, NULL);
+ if (ret != 0) {
+ int fd;
+ struct stat st;
+
+ DEBUG(DEBUG_CRIT,("tdb_check(%s) failed: %d - %s\n",
+ ctdb_db->db_path, ret,
+ tdb_errorstr(ctdb_db->ltdb->tdb)));
+ if (remaining_tries == 0) {
+ talloc_free(ctdb_db);
+ return -1;
+ }
+
+ fd = tdb_fd(ctdb_db->ltdb->tdb);
+ ret = fstat(fd, &st);
+ if (ret != 0) {
+ DEBUG(DEBUG_CRIT,(__location__
+ "Failed to fstat() persistent tdb '%s': %d - %s\n",
+ ctdb_db->db_path,
+ errno,
+ strerror(errno)));
+ talloc_free(ctdb_db);
+ return -1;
+ }
+
+ /* close the TDB */
+ talloc_free(ctdb_db->ltdb);
+ ctdb_db->ltdb = NULL;
+
+ ret = ctdb_backup_corrupted_tdb(ctdb, ctdb_db);
+ if (ret != 0) {
+ DEBUG(DEBUG_CRIT,("Failed to backup corrupted tdb '%s'\n",
+ ctdb_db->db_path));
+ talloc_free(ctdb_db);
+ return -1;
+ }
+
+ remaining_tries--;
+ mode = st.st_mode;
+ goto again;
+ }
+ }
+
+ /* remember the flags the client has specified */
+ tdb_add_flags(ctdb_db->ltdb->tdb, tdb_flags);
+
+
+ /* set up a rb tree we can use to track which records we have a
+ fetch-lock in-flight for so we can defer any additional calls
+ for the same record.
+ */
+ ctdb_db->deferred_fetch = trbt_create(ctdb_db, 0);
+ if (ctdb_db->deferred_fetch == NULL) {
+ DEBUG(DEBUG_ERR,("Failed to create deferred fetch rb tree for ctdb database\n"));
+ talloc_free(ctdb_db);
+ return -1;
+ }
+
+ ctdb_db->defer_dmaster = trbt_create(ctdb_db, 0);
+ if (ctdb_db->defer_dmaster == NULL) {
+ DEBUG(DEBUG_ERR, ("Failed to create defer dmaster rb tree for %s\n",
+ ctdb_db->db_name));
+ talloc_free(ctdb_db);
+ return -1;
+ }
+
+ DLIST_ADD(ctdb->db_list, ctdb_db);
+
+ /* setting this can help some high churn databases */
+ tdb_set_max_dead(ctdb_db->ltdb->tdb, ctdb->tunable.database_max_dead);
+
+ /*
+ all databases support the "null" function. we need this in
+ order to do forced migration of records
+ */
+ ret = ctdb_daemon_set_call(ctdb, ctdb_db->db_id, ctdb_null_func, CTDB_NULL_FUNC);
+ if (ret != 0) {
+ DEBUG(DEBUG_CRIT,("Failed to setup null function for '%s'\n", ctdb_db->db_name));
+ talloc_free(ctdb_db);
+ return -1;
+ }
+
+ /*
+ all databases support the "fetch" function. we need this
+ for efficient Samba3 ctdb fetch
+ */
+ ret = ctdb_daemon_set_call(ctdb, ctdb_db->db_id, ctdb_fetch_func, CTDB_FETCH_FUNC);
+ if (ret != 0) {
+ DEBUG(DEBUG_CRIT,("Failed to setup fetch function for '%s'\n", ctdb_db->db_name));
+ talloc_free(ctdb_db);
+ return -1;
+ }
+
+ /*
+ all databases support the "fetch_with_header" function. we need this
+ for efficient readonly record fetches
+ */
+ ret = ctdb_daemon_set_call(ctdb, ctdb_db->db_id, ctdb_fetch_with_header_func, CTDB_FETCH_WITH_HEADER_FUNC);
+ if (ret != 0) {
+ DEBUG(DEBUG_CRIT,("Failed to setup fetch function for '%s'\n", ctdb_db->db_name));
+ talloc_free(ctdb_db);
+ return -1;
+ }
+
+ ret = ctdb_vacuum_init(ctdb_db);
+ if (ret != 0) {
+ DEBUG(DEBUG_CRIT,("Failed to setup vacuuming for "
+ "database '%s'\n", ctdb_db->db_name));
+ talloc_free(ctdb_db);
+ return -1;
+ }
+
+ ret = ctdb_migration_init(ctdb_db);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Failed to setup migration tracking for db '%s'\n",
+ ctdb_db->db_name));
+ talloc_free(ctdb_db);
+ return -1;
+ }
+
+ ret = db_hash_init(ctdb_db, "lock_log", 2048, DB_HASH_COMPLEX,
+ &ctdb_db->lock_log);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Failed to setup lock logging for db '%s'\n",
+ ctdb_db->db_name));
+ talloc_free(ctdb_db);
+ return -1;
+ }
+
+ ctdb_db->generation = ctdb->vnn_map->generation;
+
+ DEBUG(DEBUG_NOTICE,("Attached to database '%s' with flags 0x%x\n",
+ ctdb_db->db_path, tdb_flags));
+
+ /* success */
+ return 0;
+}
+
+
+struct ctdb_deferred_attach_context {
+ struct ctdb_deferred_attach_context *next, *prev;
+ struct ctdb_context *ctdb;
+ struct ctdb_req_control_old *c;
+};
+
+
+static int ctdb_deferred_attach_destructor(struct ctdb_deferred_attach_context *da_ctx)
+{
+ DLIST_REMOVE(da_ctx->ctdb->deferred_attach, da_ctx);
+
+ return 0;
+}
+
+static void ctdb_deferred_attach_timeout(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_deferred_attach_context *da_ctx = talloc_get_type(private_data, struct ctdb_deferred_attach_context);
+ struct ctdb_context *ctdb = da_ctx->ctdb;
+
+ ctdb_request_control_reply(ctdb, da_ctx->c, NULL, -1, NULL);
+ talloc_free(da_ctx);
+}
+
+static void ctdb_deferred_attach_callback(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_deferred_attach_context *da_ctx = talloc_get_type(private_data, struct ctdb_deferred_attach_context);
+ struct ctdb_context *ctdb = da_ctx->ctdb;
+
+ /* This talloc-steals the packet ->c */
+ ctdb_input_pkt(ctdb, (struct ctdb_req_header *)da_ctx->c);
+ talloc_free(da_ctx);
+}
+
+int ctdb_process_deferred_attach(struct ctdb_context *ctdb)
+{
+ struct ctdb_deferred_attach_context *da_ctx;
+
+ /* call it from the main event loop as soon as the current event
+ finishes.
+ */
+ while ((da_ctx = ctdb->deferred_attach) != NULL) {
+ DLIST_REMOVE(ctdb->deferred_attach, da_ctx);
+ tevent_add_timer(ctdb->ev, da_ctx,
+ timeval_current_ofs(1,0),
+ ctdb_deferred_attach_callback, da_ctx);
+ }
+
+ return 0;
+}
+
+/*
+ a client has asked to attach a new database
+ */
+int32_t ctdb_control_db_attach(struct ctdb_context *ctdb,
+ TDB_DATA indata,
+ TDB_DATA *outdata,
+ uint8_t db_flags,
+ uint32_t srcnode,
+ uint32_t client_id,
+ struct ctdb_req_control_old *c,
+ bool *async_reply)
+{
+ const char *db_name = (const char *)indata.dptr;
+ struct ctdb_db_context *db;
+ struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
+ struct ctdb_client *client = NULL;
+ uint32_t opcode;
+
+ if (ctdb->tunable.allow_client_db_attach == 0) {
+ DEBUG(DEBUG_ERR, ("DB Attach to database %s denied by tunable "
+ "AllowClientDBAccess == 0\n", db_name));
+ return -1;
+ }
+
+ /* don't allow any local clients to attach while we are in recovery mode
+ * except for the recovery daemon.
+ * allow all attach from the network since these are always from remote
+ * recovery daemons.
+ */
+ if (srcnode == ctdb->pnn && client_id != 0) {
+ client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
+ }
+ if (client != NULL) {
+ /* If the node is inactive it is not part of the cluster
+ and we should not allow clients to attach to any
+ databases
+ */
+ if (node->flags & NODE_FLAGS_INACTIVE) {
+ DEBUG(DEBUG_ERR,("DB Attach to database %s refused since node is inactive (flags=0x%x)\n", db_name, node->flags));
+ return -1;
+ }
+
+ if ((c->flags & CTDB_CTRL_FLAG_ATTACH_RECOVERY) &&
+ ctdb->recovery_mode != CTDB_RECOVERY_ACTIVE) {
+ DBG_ERR("Attach from recovery refused because "
+ "recovery is not active\n");
+ return -1;
+ }
+
+ if (!(c->flags & CTDB_CTRL_FLAG_ATTACH_RECOVERY) &&
+ (ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE ||
+ ctdb->runstate < CTDB_RUNSTATE_STARTUP)) {
+ struct ctdb_deferred_attach_context *da_ctx = talloc(client, struct ctdb_deferred_attach_context);
+
+ if (da_ctx == NULL) {
+ DEBUG(DEBUG_ERR,("DB Attach to database %s deferral for client with pid:%d failed due to OOM.\n", db_name, client->pid));
+ return -1;
+ }
+
+ da_ctx->ctdb = ctdb;
+ da_ctx->c = talloc_steal(da_ctx, c);
+ talloc_set_destructor(da_ctx, ctdb_deferred_attach_destructor);
+ DLIST_ADD(ctdb->deferred_attach, da_ctx);
+
+ tevent_add_timer(ctdb->ev, da_ctx,
+ timeval_current_ofs(ctdb->tunable.deferred_attach_timeout, 0),
+ ctdb_deferred_attach_timeout, da_ctx);
+
+ DEBUG(DEBUG_ERR,("DB Attach to database %s deferred for client with pid:%d since node is in recovery mode.\n", db_name, client->pid));
+ *async_reply = true;
+ return 0;
+ }
+ }
+
+ /* see if we already have this name */
+ db = ctdb_db_handle(ctdb, db_name);
+ if (db) {
+ if ((db->db_flags & db_flags) != db_flags) {
+ DEBUG(DEBUG_ERR,
+ ("Error: Failed to re-attach with 0x%x flags,"
+ " database has 0x%x flags\n", db_flags,
+ db->db_flags));
+ return -1;
+ }
+ outdata->dptr = (uint8_t *)&db->db_id;
+ outdata->dsize = sizeof(db->db_id);
+ return 0;
+ }
+
+ if (ctdb_local_attach(ctdb, db_name, db_flags, NULL) != 0) {
+ return -1;
+ }
+
+ db = ctdb_db_handle(ctdb, db_name);
+ if (!db) {
+ DEBUG(DEBUG_ERR,("Failed to find db handle for name '%s'\n", db_name));
+ return -1;
+ }
+
+ outdata->dptr = (uint8_t *)&db->db_id;
+ outdata->dsize = sizeof(db->db_id);
+
+ /* Try to ensure it's locked in mem */
+ lockdown_memory(ctdb->valgrinding);
+
+ if (ctdb_db_persistent(db)) {
+ opcode = CTDB_CONTROL_DB_ATTACH_PERSISTENT;
+ } else if (ctdb_db_replicated(db)) {
+ opcode = CTDB_CONTROL_DB_ATTACH_REPLICATED;
+ } else {
+ opcode = CTDB_CONTROL_DB_ATTACH;
+ }
+
+ /* tell all the other nodes about this database */
+ ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, opcode,
+ 0, CTDB_CTRL_FLAG_NOREPLY,
+ indata, NULL, NULL);
+
+ /* success */
+ return 0;
+}
+
+/*
+ * a client has asked to detach from a database
+ */
+int32_t ctdb_control_db_detach(struct ctdb_context *ctdb, TDB_DATA indata,
+ uint32_t client_id)
+{
+ uint32_t db_id;
+ struct ctdb_db_context *ctdb_db;
+ struct ctdb_client *client = NULL;
+
+ db_id = *(uint32_t *)indata.dptr;
+ ctdb_db = find_ctdb_db(ctdb, db_id);
+ if (ctdb_db == NULL) {
+ DEBUG(DEBUG_ERR, ("Invalid dbid 0x%08x in DB detach\n",
+ db_id));
+ return -1;
+ }
+
+ if (ctdb->tunable.allow_client_db_attach == 1) {
+ DEBUG(DEBUG_ERR, ("DB detach from database %s denied. "
+ "Clients are allowed access to databases "
+ "(AllowClientDBAccess == 1)\n",
+ ctdb_db->db_name));
+ return -1;
+ }
+
+ if (! ctdb_db_volatile(ctdb_db)) {
+ DEBUG(DEBUG_ERR,
+ ("Detaching non-volatile database %s denied\n",
+ ctdb_db->db_name));
+ return -1;
+ }
+
+ /* Cannot detach from database when in recovery */
+ if (ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE) {
+ DEBUG(DEBUG_ERR, ("DB detach denied while in recovery\n"));
+ return -1;
+ }
+
+ /* If a control comes from a client, then broadcast it to all nodes.
+ * Do the actual detach only if the control comes from other daemons.
+ */
+ if (client_id != 0) {
+ client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
+ if (client != NULL) {
+ /* forward the control to all the nodes */
+ ctdb_daemon_send_control(ctdb,
+ CTDB_BROADCAST_CONNECTED, 0,
+ CTDB_CONTROL_DB_DETACH, 0,
+ CTDB_CTRL_FLAG_NOREPLY,
+ indata, NULL, NULL);
+ return 0;
+ }
+ DEBUG(DEBUG_ERR, ("Client has gone away. Failing DB detach "
+ "for database '%s'\n", ctdb_db->db_name));
+ return -1;
+ }
+
+ /* Disable vacuuming and drop all vacuuming data */
+ talloc_free(ctdb_db->vacuum_handle);
+ talloc_free(ctdb_db->delete_queue);
+ talloc_free(ctdb_db->fetch_queue);
+
+ /* Terminate any deferred fetch */
+ talloc_free(ctdb_db->deferred_fetch);
+
+ /* Terminate any traverses */
+ while (ctdb_db->traverse) {
+ talloc_free(ctdb_db->traverse);
+ }
+
+ /* Terminate any revokes */
+ while (ctdb_db->revokechild_active) {
+ talloc_free(ctdb_db->revokechild_active);
+ }
+
+ /* Free readonly tracking database */
+ if (ctdb_db_readonly(ctdb_db)) {
+ talloc_free(ctdb_db->rottdb);
+ }
+
+ DLIST_REMOVE(ctdb->db_list, ctdb_db);
+
+ DEBUG(DEBUG_NOTICE, ("Detached from database '%s'\n",
+ ctdb_db->db_name));
+ talloc_free(ctdb_db);
+
+ return 0;
+}
+
+/*
+ attach to all existing persistent databases
+ */
+static int ctdb_attach_persistent(struct ctdb_context *ctdb,
+ const char *unhealthy_reason)
+{
+ DIR *d;
+ struct dirent *de;
+
+ /* open the persistent db directory and scan it for files */
+ d = opendir(ctdb->db_directory_persistent);
+ if (d == NULL) {
+ return 0;
+ }
+
+ while ((de=readdir(d))) {
+ char *p, *s, *q;
+ size_t len = strlen(de->d_name);
+ uint32_t node;
+ int invalid_name = 0;
+
+ s = talloc_strdup(ctdb, de->d_name);
+ if (s == NULL) {
+ closedir(d);
+ CTDB_NO_MEMORY(ctdb, s);
+ }
+
+ /* only accept names ending in .tdb */
+ p = strstr(s, ".tdb.");
+ if (len < 7 || p == NULL) {
+ talloc_free(s);
+ continue;
+ }
+
+ /* only accept names ending with .tdb. and any number of digits */
+ q = p+5;
+ while (*q != 0 && invalid_name == 0) {
+ if (!isdigit(*q++)) {
+ invalid_name = 1;
+ }
+ }
+ if (invalid_name == 1 || sscanf(p+5, "%u", &node) != 1 || node != ctdb->pnn) {
+ DEBUG(DEBUG_ERR,("Ignoring persistent database '%s'\n", de->d_name));
+ talloc_free(s);
+ continue;
+ }
+ p[4] = 0;
+
+ if (ctdb_local_attach(ctdb, s, CTDB_DB_FLAGS_PERSISTENT, unhealthy_reason) != 0) {
+ DEBUG(DEBUG_ERR,("Failed to attach to persistent database '%s'\n", de->d_name));
+ closedir(d);
+ talloc_free(s);
+ return -1;
+ }
+
+ DEBUG(DEBUG_INFO,("Attached to persistent database %s\n", s));
+
+ talloc_free(s);
+ }
+ closedir(d);
+ return 0;
+}
+
+int ctdb_attach_databases(struct ctdb_context *ctdb)
+{
+ int ret;
+ char *persistent_health_path = NULL;
+ char *unhealthy_reason = NULL;
+ bool first_try = true;
+
+ persistent_health_path = talloc_asprintf(ctdb, "%s/%s.%u",
+ ctdb->db_directory_state,
+ PERSISTENT_HEALTH_TDB,
+ ctdb->pnn);
+ if (persistent_health_path == NULL) {
+ DEBUG(DEBUG_CRIT,(__location__ " talloc_asprintf() failed\n"));
+ return -1;
+ }
+
+again:
+
+ ctdb->db_persistent_health = tdb_wrap_open(ctdb, persistent_health_path,
+ 0, TDB_DISALLOW_NESTING,
+ O_CREAT | O_RDWR, 0600);
+ if (ctdb->db_persistent_health == NULL) {
+ struct tdb_wrap *tdb;
+
+ if (!first_try) {
+ DEBUG(DEBUG_CRIT,("Failed to open tdb '%s': %d - %s\n",
+ persistent_health_path,
+ errno,
+ strerror(errno)));
+ talloc_free(persistent_health_path);
+ talloc_free(unhealthy_reason);
+ return -1;
+ }
+ first_try = false;
+
+ unhealthy_reason = talloc_asprintf(ctdb, "WARNING - '%s' %s - %s",
+ persistent_health_path,
+ "was cleared after a failure",
+ "manual verification needed");
+ if (unhealthy_reason == NULL) {
+ DEBUG(DEBUG_CRIT,(__location__ " talloc_asprintf() failed\n"));
+ talloc_free(persistent_health_path);
+ return -1;
+ }
+
+ DEBUG(DEBUG_CRIT,("Failed to open tdb '%s' - retrying after CLEAR_IF_FIRST\n",
+ persistent_health_path));
+ tdb = tdb_wrap_open(ctdb, persistent_health_path,
+ 0, TDB_CLEAR_IF_FIRST | TDB_DISALLOW_NESTING,
+ O_CREAT | O_RDWR, 0600);
+ if (tdb) {
+ DEBUG(DEBUG_CRIT,("Failed to open tdb '%s' - with CLEAR_IF_FIRST: %d - %s\n",
+ persistent_health_path,
+ errno,
+ strerror(errno)));
+ talloc_free(persistent_health_path);
+ talloc_free(unhealthy_reason);
+ return -1;
+ }
+
+ talloc_free(tdb);
+ goto again;
+ }
+ ret = tdb_check(ctdb->db_persistent_health->tdb, NULL, NULL);
+ if (ret != 0) {
+ struct tdb_wrap *tdb;
+
+ talloc_free(ctdb->db_persistent_health);
+ ctdb->db_persistent_health = NULL;
+
+ if (!first_try) {
+ DEBUG(DEBUG_CRIT,("tdb_check('%s') failed\n",
+ persistent_health_path));
+ talloc_free(persistent_health_path);
+ talloc_free(unhealthy_reason);
+ return -1;
+ }
+ first_try = false;
+
+ unhealthy_reason = talloc_asprintf(ctdb, "WARNING - '%s' %s - %s",
+ persistent_health_path,
+ "was cleared after a failure",
+ "manual verification needed");
+ if (unhealthy_reason == NULL) {
+ DEBUG(DEBUG_CRIT,(__location__ " talloc_asprintf() failed\n"));
+ talloc_free(persistent_health_path);
+ return -1;
+ }
+
+ DEBUG(DEBUG_CRIT,("tdb_check('%s') failed - retrying after CLEAR_IF_FIRST\n",
+ persistent_health_path));
+ tdb = tdb_wrap_open(ctdb, persistent_health_path,
+ 0, TDB_CLEAR_IF_FIRST | TDB_DISALLOW_NESTING,
+ O_CREAT | O_RDWR, 0600);
+ if (tdb) {
+ DEBUG(DEBUG_CRIT,("Failed to open tdb '%s' - with CLEAR_IF_FIRST: %d - %s\n",
+ persistent_health_path,
+ errno,
+ strerror(errno)));
+ talloc_free(persistent_health_path);
+ talloc_free(unhealthy_reason);
+ return -1;
+ }
+
+ talloc_free(tdb);
+ goto again;
+ }
+ talloc_free(persistent_health_path);
+
+ ret = ctdb_attach_persistent(ctdb, unhealthy_reason);
+ talloc_free(unhealthy_reason);
+ if (ret != 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ called when a broadcast seqnum update comes in
+ */
+int32_t ctdb_ltdb_update_seqnum(struct ctdb_context *ctdb, uint32_t db_id, uint32_t srcnode)
+{
+ struct ctdb_db_context *ctdb_db;
+ if (srcnode == ctdb->pnn) {
+ /* don't update ourselves! */
+ return 0;
+ }
+
+ ctdb_db = find_ctdb_db(ctdb, db_id);
+ if (!ctdb_db) {
+ DEBUG(DEBUG_ERR,("Unknown db_id 0x%x in ctdb_ltdb_update_seqnum\n", db_id));
+ return -1;
+ }
+
+ if (ctdb_db->unhealthy_reason) {
+ DEBUG(DEBUG_ERR,("db(%s) unhealty in ctdb_ltdb_update_seqnum: %s\n",
+ ctdb_db->db_name, ctdb_db->unhealthy_reason));
+ return -1;
+ }
+
+ tdb_increment_seqnum_nonblock(ctdb_db->ltdb->tdb);
+ ctdb_db->seqnum = tdb_get_seqnum(ctdb_db->ltdb->tdb);
+ return 0;
+}
+
+/*
+ timer to check for seqnum changes in a ltdb and propagate them
+ */
+static void ctdb_ltdb_seqnum_check(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *p)
+{
+ struct ctdb_db_context *ctdb_db = talloc_get_type(p, struct ctdb_db_context);
+ struct ctdb_context *ctdb = ctdb_db->ctdb;
+ uint32_t new_seqnum = tdb_get_seqnum(ctdb_db->ltdb->tdb);
+ if (new_seqnum != ctdb_db->seqnum) {
+ /* something has changed - propagate it */
+ TDB_DATA data;
+ data.dptr = (uint8_t *)&ctdb_db->db_id;
+ data.dsize = sizeof(uint32_t);
+ ctdb_daemon_send_control(ctdb,
+ CTDB_BROADCAST_ACTIVE,
+ 0,
+ CTDB_CONTROL_UPDATE_SEQNUM,
+ 0,
+ CTDB_CTRL_FLAG_NOREPLY,
+ data,
+ NULL,
+ NULL);
+ }
+ ctdb_db->seqnum = new_seqnum;
+
+ /* setup a new timer */
+ ctdb_db->seqnum_update =
+ tevent_add_timer(ctdb->ev, ctdb_db,
+ timeval_current_ofs(ctdb->tunable.seqnum_interval/1000,
+ (ctdb->tunable.seqnum_interval%1000)*1000),
+ ctdb_ltdb_seqnum_check, ctdb_db);
+}
+
+/*
+ enable seqnum handling on this db
+ */
+int32_t ctdb_ltdb_enable_seqnum(struct ctdb_context *ctdb, uint32_t db_id)
+{
+ struct ctdb_db_context *ctdb_db;
+ ctdb_db = find_ctdb_db(ctdb, db_id);
+ if (!ctdb_db) {
+ DEBUG(DEBUG_ERR,("Unknown db_id 0x%x in ctdb_ltdb_enable_seqnum\n", db_id));
+ return -1;
+ }
+
+ if (ctdb_db->seqnum_update == NULL) {
+ ctdb_db->seqnum_update = tevent_add_timer(
+ ctdb->ev, ctdb_db,
+ timeval_current_ofs(ctdb->tunable.seqnum_interval/1000,
+ (ctdb->tunable.seqnum_interval%1000)*1000),
+ ctdb_ltdb_seqnum_check, ctdb_db);
+ }
+
+ tdb_enable_seqnum(ctdb_db->ltdb->tdb);
+ ctdb_db->seqnum = tdb_get_seqnum(ctdb_db->ltdb->tdb);
+ return 0;
+}
+
+int ctdb_set_db_sticky(struct ctdb_context *ctdb, struct ctdb_db_context *ctdb_db)
+{
+ if (ctdb_db_sticky(ctdb_db)) {
+ return 0;
+ }
+
+ if (! ctdb_db_volatile(ctdb_db)) {
+ DEBUG(DEBUG_ERR,
+ ("Non-volatile databases do not support sticky flag\n"));
+ return -1;
+ }
+
+ ctdb_db->sticky_records = trbt_create(ctdb_db, 0);
+
+ ctdb_db_set_sticky(ctdb_db);
+
+ DEBUG(DEBUG_NOTICE,("set db sticky %s\n", ctdb_db->db_name));
+
+ return 0;
+}
+
+void ctdb_db_statistics_reset(struct ctdb_db_context *ctdb_db)
+{
+ unsigned int i;
+
+ for (i=0; i<MAX_HOT_KEYS; i++) {
+ if (ctdb_db->hot_keys[i].key.dsize > 0) {
+ TALLOC_FREE(ctdb_db->hot_keys[i].key.dptr);
+ ctdb_db->hot_keys[i].key.dsize = 0;
+ }
+ ctdb_db->hot_keys[i].count = 0;
+ ctdb_db->hot_keys[i].last_logged_count = 0;
+ }
+
+ ZERO_STRUCT(ctdb_db->statistics);
+}
+
+int32_t ctdb_control_get_db_statistics(struct ctdb_context *ctdb,
+ uint32_t db_id,
+ TDB_DATA *outdata)
+{
+ struct ctdb_db_context *ctdb_db;
+ struct ctdb_db_statistics_old *stats;
+ unsigned int i;
+ size_t len;
+ char *ptr;
+
+ ctdb_db = find_ctdb_db(ctdb, db_id);
+ if (!ctdb_db) {
+ DEBUG(DEBUG_ERR,("Unknown db_id 0x%x in get_db_statistics\n", db_id));
+ return -1;
+ }
+
+ len = offsetof(struct ctdb_db_statistics_old, hot_keys_wire);
+ for (i = 0; i < MAX_HOT_KEYS; i++) {
+ struct ctdb_db_statistics_old *s = &ctdb_db->statistics;
+
+ s->hot_keys[i].key.dsize = ctdb_db->hot_keys[i].key.dsize;
+ s->hot_keys[i].key.dptr = ctdb_db->hot_keys[i].key.dptr;
+ s->hot_keys[i].count = ctdb_db->hot_keys[i].count;
+
+ len += s->hot_keys[i].key.dsize;
+ }
+
+ stats = talloc_size(outdata, len);
+ if (stats == NULL) {
+ DEBUG(DEBUG_ERR,("Failed to allocate db statistics structure\n"));
+ return -1;
+ }
+
+ memcpy(stats, &ctdb_db->statistics,
+ offsetof(struct ctdb_db_statistics_old, hot_keys_wire));
+
+ stats->num_hot_keys = MAX_HOT_KEYS;
+
+ ptr = &stats->hot_keys_wire[0];
+ for (i = 0; i < MAX_HOT_KEYS; i++) {
+ memcpy(ptr, ctdb_db->statistics.hot_keys[i].key.dptr,
+ ctdb_db->statistics.hot_keys[i].key.dsize);
+ ptr += ctdb_db->statistics.hot_keys[i].key.dsize;
+ }
+
+ outdata->dptr = (uint8_t *)stats;
+ outdata->dsize = len;
+
+ return 0;
+}
diff --git a/ctdb/server/ctdb_monitor.c b/ctdb/server/ctdb_monitor.c
new file mode 100644
index 0000000..ab58ec4
--- /dev/null
+++ b/ctdb/server/ctdb_monitor.c
@@ -0,0 +1,509 @@
+/*
+ monitoring links to all other nodes to detect dead nodes
+
+
+ Copyright (C) Ronnie Sahlberg 2007
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/network.h"
+#include "system/wait.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/debug.h"
+#include "lib/util/samba_util.h"
+#include "lib/util/util_process.h"
+
+#include "ctdb_private.h"
+
+#include "common/system.h"
+#include "common/common.h"
+#include "common/logging.h"
+
+struct ctdb_monitor_state {
+ TALLOC_CTX *monitor_context;
+ uint32_t next_interval;
+ uint32_t event_script_timeouts;
+};
+
+static void ctdb_check_health(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data);
+
+static int ctdb_run_notification_script_child(struct ctdb_context *ctdb, const char *event)
+{
+ struct stat st;
+ int ret;
+ char *cmd;
+
+ if (stat(ctdb->notification_script, &st) != 0) {
+ DEBUG(DEBUG_ERR,("Could not stat notification script %s. Can not send notifications.\n", ctdb->notification_script));
+ return -1;
+ }
+ if (!(st.st_mode & S_IXUSR)) {
+ DEBUG(DEBUG_ERR,("Notification script %s is not executable.\n", ctdb->notification_script));
+ return -1;
+ }
+
+ cmd = talloc_asprintf(ctdb, "%s %s\n", ctdb->notification_script, event);
+ CTDB_NO_MEMORY(ctdb, cmd);
+
+ ret = system(cmd);
+ /* if the system() call was successful, translate ret into the
+ return code from the command
+ */
+ if (ret != -1) {
+ ret = WEXITSTATUS(ret);
+ }
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,("Notification script \"%s\" failed with error %d\n", cmd, ret));
+ }
+
+ return ret;
+}
+
+void ctdb_run_notification_script(struct ctdb_context *ctdb, const char *event)
+{
+ pid_t child;
+
+ if (ctdb->notification_script == NULL) {
+ return;
+ }
+
+ child = ctdb_fork(ctdb);
+ if (child == (pid_t)-1) {
+ DEBUG(DEBUG_ERR,("Failed to fork() a notification child process\n"));
+ return;
+ }
+ if (child == 0) {
+ int ret;
+
+ prctl_set_comment("ctdb_notification");
+ ret = ctdb_run_notification_script_child(ctdb, event);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Notification script failed\n"));
+ }
+ _exit(0);
+ }
+
+ return;
+}
+
+/*
+ called when a health monitoring event script finishes
+ */
+static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
+{
+ struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
+ TDB_DATA data;
+ struct ctdb_node_flag_change c;
+ uint32_t next_interval;
+ int ret;
+ TDB_DATA rddata;
+ struct ctdb_srvid_message rd;
+ const char *state_str = NULL;
+
+ c.pnn = ctdb->pnn;
+ c.old_flags = node->flags;
+
+ ZERO_STRUCT(rd);
+ rd.pnn = ctdb->pnn;
+ rd.srvid = 0;
+
+ rddata.dptr = (uint8_t *)&rd;
+ rddata.dsize = sizeof(rd);
+
+ if (status == ECANCELED) {
+ DEBUG(DEBUG_ERR,("Monitoring event was cancelled\n"));
+ goto after_change_status;
+ }
+
+ if (status == ETIMEDOUT) {
+ ctdb->monitor->event_script_timeouts++;
+
+ if (ctdb->monitor->event_script_timeouts >=
+ ctdb->tunable.monitor_timeout_count) {
+ DEBUG(DEBUG_ERR,
+ ("Maximum monitor timeout count %u reached."
+ " Making node unhealthy\n",
+ ctdb->tunable.monitor_timeout_count));
+ } else {
+ /* We pretend this is OK. */
+ goto after_change_status;
+ }
+ } else {
+ ctdb->monitor->event_script_timeouts = 0;
+ }
+
+ if (status != 0 && !(node->flags & NODE_FLAGS_UNHEALTHY)) {
+ DEBUG(DEBUG_NOTICE,("monitor event failed - disabling node\n"));
+ node->flags |= NODE_FLAGS_UNHEALTHY;
+ ctdb->monitor->next_interval = 5;
+
+ ctdb_run_notification_script(ctdb, "unhealthy");
+ } else if (status == 0 && (node->flags & NODE_FLAGS_UNHEALTHY)) {
+ DEBUG(DEBUG_NOTICE,("monitor event OK - node re-enabled\n"));
+ node->flags &= ~NODE_FLAGS_UNHEALTHY;
+ ctdb->monitor->next_interval = 5;
+
+ ctdb_run_notification_script(ctdb, "healthy");
+ }
+
+after_change_status:
+ next_interval = ctdb->monitor->next_interval;
+
+ ctdb->monitor->next_interval *= 2;
+ if (ctdb->monitor->next_interval > ctdb->tunable.monitor_interval) {
+ ctdb->monitor->next_interval = ctdb->tunable.monitor_interval;
+ }
+
+ tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
+ timeval_current_ofs(next_interval, 0),
+ ctdb_check_health, ctdb);
+
+ if (c.old_flags == node->flags) {
+ return;
+ }
+
+ c.new_flags = node->flags;
+
+ data.dptr = (uint8_t *)&c;
+ data.dsize = sizeof(c);
+
+ /* ask the recovery daemon to push these changes out to all nodes */
+ ctdb_daemon_send_message(ctdb, ctdb->pnn,
+ CTDB_SRVID_PUSH_NODE_FLAGS, data);
+
+ if (c.new_flags & NODE_FLAGS_UNHEALTHY) {
+ state_str = "UNHEALTHY";
+ } else {
+ state_str = "HEALTHY";
+ }
+
+ /* ask the recmaster to reallocate all addresses */
+ DEBUG(DEBUG_ERR,
+ ("Node became %s. Ask recovery master to reallocate IPs\n",
+ state_str));
+ ret = ctdb_daemon_send_message(ctdb, CTDB_BROADCAST_CONNECTED, CTDB_SRVID_TAKEOVER_RUN, rddata);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ (__location__
+ " Failed to send IP takeover run request\n"));
+ }
+}
+
+
+static void ctdb_run_startup(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data);
+/*
+ called when the startup event script finishes
+ */
+static void ctdb_startup_callback(struct ctdb_context *ctdb, int status, void *p)
+{
+ if (status != 0) {
+ DEBUG(DEBUG_ERR,("startup event failed\n"));
+ tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
+ timeval_current_ofs(5, 0),
+ ctdb_run_startup, ctdb);
+ return;
+ }
+
+ DEBUG(DEBUG_NOTICE,("startup event OK - enabling monitoring\n"));
+ ctdb_set_runstate(ctdb, CTDB_RUNSTATE_RUNNING);
+ ctdb->monitor->next_interval = 2;
+ ctdb_run_notification_script(ctdb, "startup");
+
+ /* tell all other nodes we've just started up */
+ ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED,
+ 0, CTDB_CONTROL_STARTUP, 0,
+ CTDB_CTRL_FLAG_NOREPLY,
+ tdb_null, NULL, NULL);
+
+ tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
+ timeval_current_ofs(ctdb->monitor->next_interval, 0),
+ ctdb_check_health, ctdb);
+}
+
+static void ctdb_run_startup(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_context *ctdb = talloc_get_type(private_data,
+ struct ctdb_context);
+ int ret;
+
+ /* This is necessary to avoid the "startup" event colliding
+ * with the "ipreallocated" event from the takeover run
+ * following the first recovery. We might as well serialise
+ * these things if we can.
+ */
+ if (ctdb->runstate < CTDB_RUNSTATE_STARTUP) {
+ DEBUG(DEBUG_NOTICE,
+ ("Not yet in startup runstate. Wait one more second\n"));
+ tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
+ timeval_current_ofs(1, 0),
+ ctdb_run_startup, ctdb);
+ return;
+ }
+
+ DEBUG(DEBUG_NOTICE,("Running the \"startup\" event.\n"));
+ ret = ctdb_event_script_callback(ctdb,
+ ctdb->monitor->monitor_context,
+ ctdb_startup_callback,
+ ctdb, CTDB_EVENT_STARTUP, "%s", "");
+
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,("Unable to launch startup event script\n"));
+ tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
+ timeval_current_ofs(5, 0),
+ ctdb_run_startup, ctdb);
+ }
+}
+
+/*
+ wait until we have finished initial recoveries before we start the
+ monitoring events
+ */
+static void ctdb_wait_until_recovered(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
+ int ret;
+ static int count = 0;
+
+ count++;
+
+ if (count < 60 || count%600 == 0) {
+ DEBUG(DEBUG_NOTICE,("CTDB_WAIT_UNTIL_RECOVERED\n"));
+ if (ctdb->nodes[ctdb->pnn]->flags & NODE_FLAGS_STOPPED) {
+ DEBUG(DEBUG_NOTICE,("Node is STOPPED. Node will NOT recover.\n"));
+ }
+ }
+
+ if (ctdb->vnn_map->generation == INVALID_GENERATION) {
+ ctdb->db_persistent_startup_generation = INVALID_GENERATION;
+
+ tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
+ timeval_current_ofs(1, 0),
+ ctdb_wait_until_recovered, ctdb);
+ return;
+ }
+
+ if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
+ ctdb->db_persistent_startup_generation = INVALID_GENERATION;
+
+ DEBUG(DEBUG_NOTICE,(__location__ " in recovery. Wait one more second\n"));
+ tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
+ timeval_current_ofs(1, 0),
+ ctdb_wait_until_recovered, ctdb);
+ return;
+ }
+
+
+ if (!fast_start && timeval_elapsed(&ctdb->last_recovery_finished) < (ctdb->tunable.rerecovery_timeout + 3)) {
+ ctdb->db_persistent_startup_generation = INVALID_GENERATION;
+
+ DEBUG(DEBUG_NOTICE,(__location__ " wait for pending recoveries to end. Wait one more second.\n"));
+
+ tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
+ timeval_current_ofs(1, 0),
+ ctdb_wait_until_recovered, ctdb);
+ return;
+ }
+
+ if (ctdb->vnn_map->generation == ctdb->db_persistent_startup_generation) {
+ DEBUG(DEBUG_INFO,(__location__ " skip ctdb_recheck_persistent_health() "
+ "until the next recovery\n"));
+ tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
+ timeval_current_ofs(1, 0),
+ ctdb_wait_until_recovered, ctdb);
+ return;
+ }
+
+ ctdb->db_persistent_startup_generation = ctdb->vnn_map->generation;
+ ret = ctdb_recheck_persistent_health(ctdb);
+ if (ret != 0) {
+ ctdb->db_persistent_check_errors++;
+ if (ctdb->db_persistent_check_errors < ctdb->max_persistent_check_errors) {
+ DEBUG(DEBUG_ERR,
+ (__location__ "ctdb_recheck_persistent_health() "
+ "failed (%llu of %llu times) - retry later\n",
+ (unsigned long long)ctdb->db_persistent_check_errors,
+ (unsigned long long)ctdb->max_persistent_check_errors));
+ tevent_add_timer(ctdb->ev,
+ ctdb->monitor->monitor_context,
+ timeval_current_ofs(1, 0),
+ ctdb_wait_until_recovered, ctdb);
+ return;
+ }
+ DEBUG(DEBUG_ALERT,(__location__
+ "ctdb_recheck_persistent_health() failed (%llu times) - prepare shutdown\n",
+ (unsigned long long)ctdb->db_persistent_check_errors));
+ ctdb_shutdown_sequence(ctdb, 11);
+ /* In case above returns due to duplicate shutdown */
+ return;
+ }
+ ctdb->db_persistent_check_errors = 0;
+
+ tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
+ timeval_current(), ctdb_run_startup, ctdb);
+}
+
+
+/*
+ see if the event scripts think we are healthy
+ */
+static void ctdb_check_health(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
+ bool skip_monitoring = false;
+ int ret = 0;
+
+ if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL ||
+ ctdb->nodes[ctdb->pnn]->flags & NODE_FLAGS_INACTIVE ||
+ ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
+ skip_monitoring = true;
+ } else {
+ if (ctdb_db_all_frozen(ctdb)) {
+ DEBUG(DEBUG_ERR,
+ ("Skip monitoring since databases are frozen\n"));
+ skip_monitoring = true;
+ }
+ }
+
+ if (skip_monitoring) {
+ tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
+ timeval_current_ofs(ctdb->monitor->next_interval, 0),
+ ctdb_check_health, ctdb);
+ return;
+ }
+
+ ret = ctdb_event_script_callback(ctdb,
+ ctdb->monitor->monitor_context,
+ ctdb_health_callback,
+ ctdb, CTDB_EVENT_MONITOR, "%s", "");
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,("Unable to launch monitor event script\n"));
+ ctdb->monitor->next_interval = 5;
+ tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
+ timeval_current_ofs(5, 0),
+ ctdb_check_health, ctdb);
+ }
+}
+
+/* stop any monitoring
+ this should only be done when shutting down the daemon
+*/
+void ctdb_stop_monitoring(struct ctdb_context *ctdb)
+{
+ if (ctdb->monitor == NULL) {
+ D_NOTICE("Monitoring not yet initialised\n");
+ return;
+ }
+
+ TALLOC_FREE(ctdb->monitor->monitor_context);
+
+ ctdb->monitor->next_interval = 5;
+ DEBUG(DEBUG_NOTICE,("Monitoring has been stopped\n"));
+}
+
+/*
+ start watching for nodes that might be dead
+ */
+void ctdb_wait_for_first_recovery(struct ctdb_context *ctdb)
+{
+ ctdb_set_runstate(ctdb, CTDB_RUNSTATE_FIRST_RECOVERY);
+
+ ctdb->monitor = talloc(ctdb, struct ctdb_monitor_state);
+ CTDB_NO_MEMORY_FATAL(ctdb, ctdb->monitor);
+
+ ctdb->monitor->monitor_context = talloc_new(ctdb->monitor);
+ CTDB_NO_MEMORY_FATAL(ctdb, ctdb->monitor->monitor_context);
+
+ tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
+ timeval_current_ofs(1, 0),
+ ctdb_wait_until_recovered, ctdb);
+}
+
+
+/*
+ modify flags on a node
+ */
+int32_t ctdb_control_modflags(struct ctdb_context *ctdb, TDB_DATA indata)
+{
+ struct ctdb_node_flag_change *c = (struct ctdb_node_flag_change *)indata.dptr;
+ struct ctdb_node *node;
+ uint32_t old_flags;
+
+ /*
+ * Don't let other nodes override the current node's flags.
+ * The recovery master fetches flags from this node so there's
+ * no need to push them back. Doing so is racy.
+ */
+ if (c->pnn == ctdb->pnn) {
+ DBG_DEBUG("Ignoring flag changes for current node\n");
+ return 0;
+ }
+
+ node = ctdb_find_node(ctdb, c->pnn);
+ if (node == NULL) {
+ DBG_ERR("Node %u is invalid\n", c->pnn);
+ return -1;
+ }
+
+ if (node->flags & NODE_FLAGS_DISCONNECTED) {
+ DBG_DEBUG("Ignoring flag changes for disconnected node\n");
+ return 0;
+ }
+
+ /*
+ * Remember the old flags. We don't care what some other node
+ * thought the old flags were - that's irrelevant.
+ */
+ old_flags = node->flags;
+
+ /*
+ * This node tracks nodes it is connected to, so don't let
+ * another node override this
+ */
+ node->flags =
+ (old_flags & NODE_FLAGS_DISCONNECTED) |
+ (c->new_flags & ~NODE_FLAGS_DISCONNECTED);
+
+ if (node->flags == old_flags) {
+ return 0;
+ }
+
+ D_NOTICE("Node %u has changed flags - 0x%x -> 0x%x\n",
+ c->pnn,
+ old_flags,
+ node->flags);
+
+ if (node->flags == 0 && ctdb->runstate <= CTDB_RUNSTATE_STARTUP) {
+ DBG_ERR("Node %u became healthy - force recovery for startup\n",
+ c->pnn);
+ ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
+ }
+
+ return 0;
+}
diff --git a/ctdb/server/ctdb_mutex_fcntl_helper.c b/ctdb/server/ctdb_mutex_fcntl_helper.c
new file mode 100644
index 0000000..aac98ea
--- /dev/null
+++ b/ctdb/server/ctdb_mutex_fcntl_helper.c
@@ -0,0 +1,795 @@
+/*
+ CTDB mutex fcntl lock file helper
+
+ Copyright (C) Martin Schwenke 2015
+
+ wait_for_parent() code from ctdb_lock_helper.c:
+
+ Copyright (C) Amitay Isaacs 2013
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/network.h"
+#include "system/wait.h"
+#include "system/dir.h"
+
+#include <tevent.h>
+
+#include "lib/util/sys_rw.h"
+#include "lib/util/tevent_unix.h"
+#include "lib/util/util.h"
+#include "lib/util/smb_strtox.h"
+
+/* protocol.h is just needed for ctdb_sock_addr, which is used in system.h */
+#include "protocol/protocol.h"
+#include "common/system.h"
+#include "common/tmon.h"
+
+static char progpath[PATH_MAX];
+static char *progname = NULL;
+
+static int fcntl_lock_fd(int fd, bool block, off_t start)
+{
+ static struct flock lock = {
+ .l_type = F_WRLCK,
+ .l_whence = SEEK_SET,
+ .l_len = 1,
+ .l_pid = 0,
+ };
+ int cmd = block ? F_SETLKW : F_SETLK;
+
+ lock.l_start = start;
+ if (fcntl(fd, cmd, &lock) != 0) {
+ return errno;
+ }
+
+ return 0;
+}
+
+static char fcntl_lock(const char *file, int *outfd)
+{
+ int fd;
+ int ret;
+
+ fd = open(file, O_RDWR|O_CREAT, 0600);
+ if (fd == -1) {
+ fprintf(stderr, "%s: Unable to open %s - (%s)\n",
+ progname, file, strerror(errno));
+ return '3';
+ }
+
+ ret = fcntl_lock_fd(fd, false, 0);
+ if (ret != 0) {
+ close(fd);
+ if (ret == EACCES || ret == EAGAIN) {
+ /* Lock contention, fail silently */
+ return '1';
+ }
+
+ /* Log an error for any other failure */
+ fprintf(stderr,
+ "%s: Failed to get lock on '%s' - (%s)\n",
+ progname,
+ file,
+ strerror(ret));
+ return '3';
+ }
+
+ *outfd = fd;
+
+ return '0';
+}
+
+/*
+ * Wait and see if the parent exits
+ */
+
+struct wait_for_parent_state {
+ struct tevent_context *ev;
+ pid_t ppid;
+};
+
+static void wait_for_parent_check(struct tevent_req *subreq);
+
+static struct tevent_req *wait_for_parent_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ pid_t ppid)
+{
+ struct tevent_req *req, *subreq;
+ struct wait_for_parent_state *state;
+
+ req = tevent_req_create(mem_ctx, &state, struct wait_for_parent_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->ppid = ppid;
+
+ if (ppid == 1) {
+ fprintf(stderr, "parent == 1\n");
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+ }
+
+ subreq = tevent_wakeup_send(state, ev,
+ tevent_timeval_current_ofs(5,0));
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, wait_for_parent_check, req);
+
+ return req;
+}
+
+static void wait_for_parent_check(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct wait_for_parent_state *state = tevent_req_data(
+ req, struct wait_for_parent_state);
+ bool status;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ /* Ignore error */
+ fprintf(stderr, "%s: tevent_wakeup_recv() failed\n", progname);
+ }
+
+ if (kill(state->ppid, 0) == -1 && errno == ESRCH) {
+ fprintf(stderr, "parent gone\n");
+ tevent_req_done(req);
+ return;
+ }
+
+ subreq = tevent_wakeup_send(state, state->ev,
+ tevent_timeval_current_ofs(5,0));
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, wait_for_parent_check, req);
+}
+
+static bool wait_for_parent_recv(struct tevent_req *req, int *perr)
+{
+ if (tevent_req_is_unix_error(req, perr)) {
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Perform I/O on lock in a loop - complete when file removed or replaced
+ */
+
+struct lock_io_check_state {
+ struct tevent_context *ev;
+ const char *lock_file;
+ ino_t inode;
+ unsigned long recheck_interval;
+};
+
+static void lock_io_check_loop(struct tevent_req *subreq);
+
+static struct tevent_req *lock_io_check_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ const char *lock_file,
+ ino_t inode,
+ unsigned long recheck_interval)
+{
+ struct tevent_req *req, *subreq;
+ struct lock_io_check_state *state;
+
+ req = tevent_req_create(mem_ctx, &state, struct lock_io_check_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->lock_file = lock_file;
+ state->inode = inode;
+ state->recheck_interval = recheck_interval;
+
+ subreq = tevent_wakeup_send(
+ state,
+ ev,
+ tevent_timeval_current_ofs(state->recheck_interval, 0));
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, lock_io_check_loop, req);
+
+ return req;
+}
+
+static void lock_io_check_loop(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct lock_io_check_state *state = tevent_req_data(
+ req, struct lock_io_check_state);
+ bool status;
+ struct stat sb;
+ int fd = -1;
+ int ret;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ /* Ignore error */
+ fprintf(stderr, "%s: tevent_wakeup_recv() failed\n", progname);
+ }
+
+ fd = open(state->lock_file, O_RDWR);
+ if (fd == -1) {
+ fprintf(stderr,
+ "%s: "
+ "lock lost - lock file \"%s\" open failed (ret=%d)\n",
+ progname,
+ state->lock_file,
+ errno);
+ goto done;
+ }
+
+ ret = fstat(fd, &sb);
+ if (ret != 0) {
+ fprintf(stderr,
+ "%s: "
+ "lock lost - lock file \"%s\" check failed (ret=%d)\n",
+ progname,
+ state->lock_file,
+ errno);
+ goto done;
+ }
+
+ if (sb.st_ino != state->inode) {
+ fprintf(stderr,
+ "%s: lock lost - lock file \"%s\" inode changed\n",
+ progname,
+ state->lock_file);
+ goto done;
+ }
+
+ /*
+ * Attempt to lock a 2nd byte range. Using a blocking lock
+ * encourages ping timeouts if the cluster filesystem is in a
+ * bad state. It also makes testing easier.
+ */
+ ret = fcntl_lock_fd(fd, true, 1);
+ if (ret != 0) {
+ fprintf(stderr,
+ "%s: "
+ "lock fail - lock file \"%s\" test lock error (%d)\n",
+ progname,
+ state->lock_file,
+ ret);
+ goto done;
+ }
+
+ /* Unlock occurs on close */
+ close(fd);
+
+ subreq = tevent_wakeup_send(
+ state,
+ state->ev,
+ tevent_timeval_current_ofs(state->recheck_interval, 0));
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, lock_io_check_loop, req);
+
+ return;
+
+done:
+ if (fd != -1) {
+ close(fd);
+ }
+ tevent_req_done(req);
+}
+
+static bool lock_io_check_recv(struct tevent_req *req, int *perr)
+{
+ if (tevent_req_is_unix_error(req, perr)) {
+ return false;
+ }
+
+ return true;
+}
+
+struct lock_test_child_state {
+};
+
+static void lock_test_child_ping_done(struct tevent_req *subreq);
+static void lock_test_child_io_check_done(struct tevent_req *subreq);
+
+static struct tevent_req *lock_test_child_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ const char *lock_file,
+ int fd,
+ ino_t inode,
+ unsigned long recheck_interval,
+ bool send_pings)
+{
+ struct tevent_req *req, *subreq;
+ struct lock_test_child_state *state;
+ unsigned int interval = send_pings ? 1 : 0;
+
+ req = tevent_req_create(mem_ctx, &state, struct lock_test_child_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ subreq = tmon_ping_send(state, ev, fd, TMON_FD_BOTH, 0, interval);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, lock_test_child_ping_done, req);
+
+ subreq = lock_io_check_send(state,
+ ev,
+ lock_file,
+ inode,
+ recheck_interval);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, lock_test_child_io_check_done, req);
+
+ return req;
+}
+
+static void lock_test_child_ping_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ bool status;
+ int err;
+
+ status = tmon_ping_recv(subreq, &err);
+ TALLOC_FREE(subreq);
+ if (!status) {
+ tevent_req_error(req, err);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+static void lock_test_child_io_check_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ bool status;
+ int err;
+
+ status = lock_io_check_recv(subreq, &err);
+ TALLOC_FREE(subreq);
+ if (!status) {
+ tevent_req_error(req, err);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+static bool lock_test_child_recv(struct tevent_req *req, int *perr)
+{
+ if (tevent_req_is_unix_error(req, perr)) {
+ /* Parent exit is expected */
+ if (*perr == EPIPE) {
+ return true;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+static void lock_test_child(const char *lock_file,
+ int lock_fd,
+ int pipe_fd,
+ unsigned long recheck_interval,
+ bool send_pings)
+{
+ struct tevent_context *ev;
+ struct tevent_req *req;
+ struct stat sb;
+ ino_t inode;
+ bool status;
+ int ret;
+
+ ret = fstat(lock_fd, &sb);
+ if (ret != 0) {
+ fprintf(stderr,
+ "%s: lock lost - "
+ "lock file \"%s\" stat failed (ret=%d)\n",
+ progname,
+ lock_file,
+ errno);
+ _exit(1);
+ }
+ inode = sb.st_ino;
+ close(lock_fd);
+
+ ev = tevent_context_init(NULL);
+ if (ev == NULL) {
+ fprintf(stderr, "%s: tevent_context_init() failed\n", progname);
+ _exit(1);
+ }
+
+ req = lock_test_child_send(ev,
+ ev,
+ lock_file,
+ pipe_fd,
+ inode,
+ recheck_interval,
+ send_pings);
+ if (req == NULL) {
+ fprintf(stderr,
+ "%s: lock_test_child_send() failed\n",
+ progname);
+ _exit(1);
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = lock_test_child_recv(req, &ret);
+ if (! status) {
+ fprintf(stderr,
+ "%s: lock_test_child_recv() failed (%d)\n",
+ progname,
+ ret);
+ _exit(1);
+ }
+
+ _exit(0);
+}
+
+struct lock_test_state {
+ int *lock_fdp;
+ int pipe_fd;
+ pid_t child_pid;
+};
+
+static void lock_test_ping_done(struct tevent_req *subreq);
+
+static struct tevent_req *lock_test_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ const char *lock_file,
+ int *fdp,
+ unsigned long recheck_interval,
+ unsigned long ping_timeout)
+{
+ struct tevent_req *req, *subreq;
+ struct lock_test_state *state;
+ pid_t pid;
+ int sv[2];
+ int ret;
+
+ req = tevent_req_create(mem_ctx, &state, struct lock_test_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM, 0, sv);
+ if (ret != 0) {
+ fprintf(stderr,
+ "%s: socketpair() failed (errno=%d)\n",
+ progname,
+ errno);
+ tevent_req_error(req, errno);
+ return tevent_req_post(req, ev);
+ }
+
+ pid = fork();
+ if (pid == -1) {
+
+ int err = errno;
+ fprintf(stderr, "%s: fork() failed (errno=%d)\n", progname, err);
+ close(sv[0]);
+ close(sv[1]);
+ tevent_req_error(req, err);
+ return tevent_req_post(req, ev);
+ }
+ if (pid == 0) {
+ /* Child */
+ close(sv[0]);
+ TALLOC_FREE(ev);
+
+ lock_test_child(lock_file,
+ *fdp,
+ sv[1],
+ recheck_interval,
+ ping_timeout != 0);
+ /* Above does not return */
+ }
+
+ /* Parent */
+ close(sv[1]);
+
+ state->lock_fdp = fdp;
+ state->pipe_fd = sv[0];
+ state->child_pid = pid;
+
+ subreq = tmon_ping_send(state, ev, sv[0], TMON_FD_BOTH, ping_timeout, 0);
+ if (tevent_req_nomem(subreq, req)) {
+ close(sv[0]);
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, lock_test_ping_done, req);
+
+ return req;
+}
+
+static void lock_test_ping_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct lock_test_state *state = tevent_req_data(
+ req, struct lock_test_state);
+ int wstatus;
+ bool status;
+ int err;
+
+ status = tmon_ping_recv(subreq, &err);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ switch (err) {
+ case EPIPE:
+ /* Child exit, child already printed message */
+ break;
+ case ETIMEDOUT:
+ fprintf(stderr,
+ "%s: ping timeout from lock test child\n",
+ progname);
+ break;
+ default:
+ fprintf(stderr,
+ "%s: tmon_ping_recv() failed (%d)\n",
+ progname,
+ err);
+ }
+ /* Ignore error */
+ }
+
+ /*
+ * Lock checking child is gone or not sending pings. Release
+ * the lock, close this end of pipe, send SIGKILL to the child
+ * process and wait for the child to exit.
+ */
+ close(*state->lock_fdp);
+ *state->lock_fdp = -1;
+ close(state->pipe_fd);
+ kill(state->child_pid, SIGKILL);
+ waitpid(state->child_pid, &wstatus, 0);
+
+ tevent_req_done(req);
+}
+
+static bool lock_test_recv(struct tevent_req *req, int *perr)
+{
+ if (tevent_req_is_unix_error(req, perr)) {
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Wait for a reason to exit, indicating that parent has exited or I/O
+ * on lock failed
+ */
+
+struct wait_for_exit_state {
+};
+
+static void wait_for_exit_parent_done(struct tevent_req *subreq);
+static void wait_for_exit_lock_test_done(struct tevent_req *subreq);
+
+static struct tevent_req *wait_for_exit_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ pid_t ppid,
+ const char *lock_file,
+ int *fdp,
+ unsigned long recheck_interval,
+ unsigned long ping_timeout)
+{
+ struct tevent_req *req, *subreq;
+ struct wait_for_exit_state *state;
+
+ req = tevent_req_create(mem_ctx, &state, struct wait_for_exit_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ subreq = wait_for_parent_send(state, ev, ppid);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, wait_for_exit_parent_done, req);
+
+ if (recheck_interval > 0) {
+ subreq = lock_test_send(state,
+ ev,
+ lock_file,
+ fdp,
+ recheck_interval,
+ ping_timeout);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq,
+ wait_for_exit_lock_test_done,
+ req);
+ }
+
+ return req;
+}
+
+static void wait_for_exit_parent_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ bool status;
+ int err;
+
+ status = wait_for_parent_recv(subreq, &err);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ /* Ignore error */
+ fprintf(stderr,
+ "%s: "
+ "wait_for_parent_recv() failed (%d)\n",
+ progname,
+ err);
+ }
+
+ tevent_req_done(req);
+}
+
+static void wait_for_exit_lock_test_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ bool status;
+ int err;
+
+ status = lock_test_recv(subreq, &err);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ fprintf(stderr,
+ "%s: "
+ "lock_test_recv() failed (%d)\n",
+ progname,
+ err);
+ /* Ignore error, fall through to done */
+ }
+
+ tevent_req_done(req);
+}
+
+static bool wait_for_exit_recv(struct tevent_req *req, int *perr)
+{
+ if (tevent_req_is_unix_error(req, perr)) {
+ return false;
+ }
+
+ return true;
+}
+
+static void usage(void)
+{
+ fprintf(stderr,
+ "Usage: %s <file> [recheck_interval [ping_timeout]]\n",
+ progname);
+}
+
+int main(int argc, char *argv[])
+{
+ struct tevent_context *ev;
+ char result;
+ int ppid;
+ const char *file = NULL;
+ unsigned long recheck_interval;
+ unsigned long ping_timeout;
+ int ret;
+ int fd = -1;
+ struct tevent_req *req;
+ bool status;
+
+ strlcpy(progpath, argv[0], sizeof(progpath));
+ progname = basename(progpath);
+
+ if (argc < 2 || argc > 4) {
+ usage();
+ exit(1);
+ }
+
+ ev = tevent_context_init(NULL);
+ if (ev == NULL) {
+ fprintf(stderr, "locking: tevent_context_init() failed\n");
+ exit(1);
+ }
+
+ ppid = getppid();
+
+ file = argv[1];
+
+ recheck_interval = 5;
+ ping_timeout = 0;
+ if (argc >= 3) {
+ recheck_interval = smb_strtoul(argv[2],
+ NULL,
+ 10,
+ &ret,
+ SMB_STR_STANDARD);
+ if (ret != 0) {
+ usage();
+ exit(1);
+ }
+ }
+ if (argc >= 4) {
+ ping_timeout = smb_strtoul(argv[3],
+ NULL,
+ 10,
+ &ret,
+ SMB_STR_STANDARD);
+ if (ret != 0) {
+ usage();
+ exit(1);
+ }
+ }
+
+ result = fcntl_lock(file, &fd);
+ sys_write(STDOUT_FILENO, &result, 1);
+
+ if (result != '0') {
+ return 0;
+ }
+
+ req = wait_for_exit_send(ev,
+ ev,
+ ppid,
+ file,
+ &fd,
+ recheck_interval,
+ ping_timeout);
+ if (req == NULL) {
+ fprintf(stderr,
+ "%s: wait_for_exit_send() failed\n",
+ progname);
+ exit(1);
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = wait_for_exit_recv(req, &ret);
+ if (! status) {
+ fprintf(stderr,
+ "%s: wait_for_exit_recv() failed (%d)\n",
+ progname,
+ ret);
+ }
+
+ if (fd != -1) {
+ close(fd);
+ }
+
+ return 0;
+}
diff --git a/ctdb/server/ctdb_persistent.c b/ctdb/server/ctdb_persistent.c
new file mode 100644
index 0000000..2671744
--- /dev/null
+++ b/ctdb/server/ctdb_persistent.c
@@ -0,0 +1,397 @@
+/*
+ persistent store logic
+
+ Copyright (C) Andrew Tridgell 2007
+ Copyright (C) Ronnie Sahlberg 2007
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/network.h"
+#include "system/time.h"
+#include "system/wait.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/tdb_wrap/tdb_wrap.h"
+#include "lib/util/debug.h"
+#include "lib/util/samba_util.h"
+
+#include "ctdb_private.h"
+
+#include "common/reqid.h"
+#include "common/common.h"
+#include "common/logging.h"
+
+struct ctdb_persistent_state {
+ struct ctdb_context *ctdb;
+ struct ctdb_db_context *ctdb_db; /* used by trans3_commit */
+ struct ctdb_client *client; /* used by trans3_commit */
+ struct ctdb_req_control_old *c;
+ const char *errormsg;
+ uint32_t num_pending;
+ int32_t status;
+ uint32_t num_failed, num_sent;
+};
+
+/*
+ 1) all nodes fail, and all nodes reply
+ 2) some nodes fail, all nodes reply
+ 3) some nodes timeout
+ 4) all nodes succeed
+ */
+
+/*
+ called when a node has acknowledged a ctdb_control_update_record call
+ */
+static void ctdb_persistent_callback(struct ctdb_context *ctdb,
+ int32_t status, TDB_DATA data,
+ const char *errormsg,
+ void *private_data)
+{
+ struct ctdb_persistent_state *state = talloc_get_type(private_data,
+ struct ctdb_persistent_state);
+
+ if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
+ DEBUG(DEBUG_INFO, ("ctdb_persistent_callback: ignoring reply "
+ "during recovery\n"));
+ return;
+ }
+
+ if (status != 0) {
+ DEBUG(DEBUG_ERR,("ctdb_persistent_callback failed with status %d (%s)\n",
+ status, errormsg?errormsg:"no error message given"));
+ state->status = status;
+ state->errormsg = errormsg;
+ state->num_failed++;
+
+ /*
+ * If a node failed to complete the update_record control,
+ * then either a recovery is already running or something
+ * bad is going on. So trigger a recovery and let the
+ * recovery finish the transaction, sending back the reply
+ * for the trans3_commit control to the client.
+ */
+ ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
+ return;
+ }
+
+ state->num_pending--;
+
+ if (state->num_pending != 0) {
+ return;
+ }
+
+ ctdb_request_control_reply(state->ctdb, state->c, NULL, 0, state->errormsg);
+ talloc_free(state);
+}
+
+/*
+ called if persistent store times out
+ */
+static void ctdb_persistent_store_timeout(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_persistent_state *state = talloc_get_type(private_data, struct ctdb_persistent_state);
+
+ if (state->ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
+ DEBUG(DEBUG_INFO, ("ctdb_persistent_store_timeout: ignoring "
+ "timeout during recovery\n"));
+ return;
+ }
+
+ ctdb_request_control_reply(state->ctdb, state->c, NULL, 1,
+ "timeout in ctdb_persistent_state");
+
+ talloc_free(state);
+}
+
+/**
+ * Finish pending trans3 commit controls, i.e. send
+ * reply to the client. This is called by the end-recovery
+ * control to fix the situation when a recovery interrupts
+ * the usual progress of a transaction.
+ */
+void ctdb_persistent_finish_trans3_commits(struct ctdb_context *ctdb)
+{
+ struct ctdb_db_context *ctdb_db;
+
+ if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
+ DEBUG(DEBUG_INFO, ("ctdb_persistent_finish_trans3_commits: "
+ "skipping execution when recovery is "
+ "active\n"));
+ return;
+ }
+
+ for (ctdb_db = ctdb->db_list; ctdb_db; ctdb_db = ctdb_db->next) {
+ struct ctdb_persistent_state *state;
+
+ if (ctdb_db->persistent_state == NULL) {
+ continue;
+ }
+
+ state = ctdb_db->persistent_state;
+
+ ctdb_request_control_reply(ctdb, state->c, NULL, 2,
+ "trans3 commit ended by recovery");
+
+ /* The destructor sets ctdb_db->persistent_state to NULL. */
+ talloc_free(state);
+ }
+}
+
+static int ctdb_persistent_state_destructor(struct ctdb_persistent_state *state)
+{
+ if (state->client != NULL) {
+ state->client->db_id = 0;
+ }
+
+ if (state->ctdb_db != NULL) {
+ state->ctdb_db->persistent_state = NULL;
+ }
+
+ return 0;
+}
+
+/*
+ * Store a set of persistent records.
+ * This is used to roll out a transaction to all nodes.
+ */
+int32_t ctdb_control_trans3_commit(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA recdata, bool *async_reply)
+{
+ struct ctdb_client *client;
+ struct ctdb_persistent_state *state;
+ unsigned int i;
+ struct ctdb_marshall_buffer *m = (struct ctdb_marshall_buffer *)recdata.dptr;
+ struct ctdb_db_context *ctdb_db;
+
+ if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
+ DEBUG(DEBUG_INFO,("rejecting ctdb_control_trans3_commit when recovery active\n"));
+ return -1;
+ }
+
+ client = reqid_find(ctdb->idr, c->client_id, struct ctdb_client);
+ if (client == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " can not match persistent_store "
+ "to a client. Returning error\n"));
+ return -1;
+ }
+
+ if (client->db_id != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ERROR: trans3_commit: "
+ "client-db_id[0x%08x] != 0 "
+ "(client_id[0x%08x]): trans3_commit active?\n",
+ client->db_id, client->client_id));
+ return -1;
+ }
+
+ ctdb_db = find_ctdb_db(ctdb, m->db_id);
+ if (ctdb_db == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control_trans3_commit: "
+ "Unknown database db_id[0x%08x]\n", m->db_id));
+ return -1;
+ }
+
+ if (ctdb_db->persistent_state != NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Error: "
+ "ctdb_control_trans3_commit "
+ "called while a transaction commit is "
+ "active. db_id[0x%08x]\n", m->db_id));
+ return -1;
+ }
+
+ ctdb_db->persistent_state = talloc_zero(ctdb_db,
+ struct ctdb_persistent_state);
+ CTDB_NO_MEMORY(ctdb, ctdb_db->persistent_state);
+
+ client->db_id = m->db_id;
+
+ state = ctdb_db->persistent_state;
+ state->ctdb = ctdb;
+ state->ctdb_db = ctdb_db;
+ state->c = c;
+ state->client = client;
+
+ talloc_set_destructor(state, ctdb_persistent_state_destructor);
+
+ for (i = 0; i < ctdb->vnn_map->size; i++) {
+ struct ctdb_node *node = ctdb->nodes[ctdb->vnn_map->map[i]];
+ int ret;
+
+ /* only send to active nodes */
+ if (node->flags & NODE_FLAGS_INACTIVE) {
+ continue;
+ }
+
+ ret = ctdb_daemon_send_control(ctdb, node->pnn, 0,
+ CTDB_CONTROL_UPDATE_RECORD,
+ c->client_id, 0, recdata,
+ ctdb_persistent_callback,
+ state);
+ if (ret == -1) {
+ DEBUG(DEBUG_ERR,("Unable to send "
+ "CTDB_CONTROL_UPDATE_RECORD "
+ "to pnn %u\n", node->pnn));
+ talloc_free(state);
+ return -1;
+ }
+
+ state->num_pending++;
+ state->num_sent++;
+ }
+
+ if (state->num_pending == 0) {
+ talloc_free(state);
+ return 0;
+ }
+
+ /* we need to wait for the replies */
+ *async_reply = true;
+
+ /* need to keep the control structure around */
+ talloc_steal(state, c);
+
+ /* but we won't wait forever */
+ tevent_add_timer(ctdb->ev, state,
+ timeval_current_ofs(ctdb->tunable.control_timeout, 0),
+ ctdb_persistent_store_timeout, state);
+
+ return 0;
+}
+
+
+/*
+ backwards compatibility:
+
+ start a persistent store operation. passing both the key, header and
+ data to the daemon. If the client disconnects before it has issued
+ a persistent_update call to the daemon we trigger a full recovery
+ to ensure the databases are brought back in sync.
+ for now we ignore the recdata that the client has passed to us.
+ */
+int32_t ctdb_control_start_persistent_update(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA recdata)
+{
+ struct ctdb_client *client = reqid_find(ctdb->idr, c->client_id, struct ctdb_client);
+
+ if (client == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " can not match start_persistent_update to a client. Returning error\n"));
+ return -1;
+ }
+
+ client->num_persistent_updates++;
+
+ return 0;
+}
+
+/*
+ backwards compatibility:
+
+ called to tell ctdbd that it is no longer doing a persistent update
+*/
+int32_t ctdb_control_cancel_persistent_update(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA recdata)
+{
+ struct ctdb_client *client = reqid_find(ctdb->idr, c->client_id, struct ctdb_client);
+
+ if (client == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " can not match cancel_persistent_update to a client. Returning error\n"));
+ return -1;
+ }
+
+ if (client->num_persistent_updates > 0) {
+ client->num_persistent_updates--;
+ }
+
+ return 0;
+}
+
+static int32_t ctdb_get_db_seqnum(struct ctdb_context *ctdb,
+ uint32_t db_id,
+ uint64_t *seqnum)
+{
+ int32_t ret;
+ struct ctdb_db_context *ctdb_db;
+ const char *keyname = CTDB_DB_SEQNUM_KEY;
+ TDB_DATA key;
+ TDB_DATA data;
+ TALLOC_CTX *mem_ctx = talloc_new(ctdb);
+ struct ctdb_ltdb_header header;
+
+ ctdb_db = find_ctdb_db(ctdb, db_id);
+ if (!ctdb_db) {
+ DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%08x\n", db_id));
+ ret = -1;
+ goto done;
+ }
+
+ if (! ctdb_db_allow_access(ctdb_db)) {
+ ret = -1;
+ goto done;
+ }
+
+ key.dptr = (uint8_t *)discard_const(keyname);
+ key.dsize = strlen(keyname) + 1;
+
+ ret = (int32_t)ctdb_ltdb_fetch(ctdb_db, key, &header, mem_ctx, &data);
+ if (ret != 0) {
+ goto done;
+ }
+
+ if (data.dsize != sizeof(uint64_t)) {
+ *seqnum = 0;
+ goto done;
+ }
+
+ *seqnum = *(uint64_t *)data.dptr;
+
+done:
+ talloc_free(mem_ctx);
+ return ret;
+}
+
+/**
+ * Get the sequence number of a persistent database.
+ */
+int32_t ctdb_control_get_db_seqnum(struct ctdb_context *ctdb,
+ TDB_DATA indata,
+ TDB_DATA *outdata)
+{
+ uint32_t db_id;
+ int32_t ret;
+ uint64_t seqnum;
+
+ db_id = *(uint32_t *)indata.dptr;
+ ret = ctdb_get_db_seqnum(ctdb, db_id, &seqnum);
+ if (ret != 0) {
+ goto done;
+ }
+
+ outdata->dsize = sizeof(uint64_t);
+ outdata->dptr = talloc_memdup(outdata, &seqnum, sizeof(uint64_t));
+ if (outdata->dptr == NULL) {
+ ret = -1;
+ }
+
+done:
+ return ret;
+}
diff --git a/ctdb/server/ctdb_recover.c b/ctdb/server/ctdb_recover.c
new file mode 100644
index 0000000..7b30d11
--- /dev/null
+++ b/ctdb/server/ctdb_recover.c
@@ -0,0 +1,1243 @@
+/*
+ ctdb recovery code
+
+ Copyright (C) Andrew Tridgell 2007
+ Copyright (C) Ronnie Sahlberg 2007
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include "replace.h"
+#include "system/time.h"
+#include "system/network.h"
+#include "system/filesys.h"
+#include "system/wait.h"
+
+#include <talloc.h>
+#include <tevent.h>
+#include <tdb.h>
+
+#include "lib/tdb_wrap/tdb_wrap.h"
+#include "lib/util/dlinklist.h"
+#include "lib/util/debug.h"
+#include "lib/util/time.h"
+#include "lib/util/util_process.h"
+
+#include "ctdb_private.h"
+#include "ctdb_client.h"
+
+#include "common/system.h"
+#include "common/common.h"
+#include "common/logging.h"
+
+#include "ctdb_cluster_mutex.h"
+
+int
+ctdb_control_getvnnmap(struct ctdb_context *ctdb, uint32_t opcode, TDB_DATA indata, TDB_DATA *outdata)
+{
+ struct ctdb_vnn_map_wire *map;
+ size_t len;
+
+ CHECK_CONTROL_DATA_SIZE(0);
+
+ len = offsetof(struct ctdb_vnn_map_wire, map) + sizeof(uint32_t)*ctdb->vnn_map->size;
+ map = talloc_size(outdata, len);
+ CTDB_NO_MEMORY(ctdb, map);
+
+ map->generation = ctdb->vnn_map->generation;
+ map->size = ctdb->vnn_map->size;
+ memcpy(map->map, ctdb->vnn_map->map, sizeof(uint32_t)*map->size);
+
+ outdata->dsize = len;
+ outdata->dptr = (uint8_t *)map;
+
+ return 0;
+}
+
+int
+ctdb_control_setvnnmap(struct ctdb_context *ctdb, uint32_t opcode, TDB_DATA indata, TDB_DATA *outdata)
+{
+ struct ctdb_vnn_map_wire *map = (struct ctdb_vnn_map_wire *)indata.dptr;
+
+ if (ctdb->recovery_mode != CTDB_RECOVERY_ACTIVE) {
+ DEBUG(DEBUG_ERR, ("Attempt to set vnnmap when not in recovery\n"));
+ return -1;
+ }
+
+ talloc_free(ctdb->vnn_map);
+
+ ctdb->vnn_map = talloc(ctdb, struct ctdb_vnn_map);
+ CTDB_NO_MEMORY(ctdb, ctdb->vnn_map);
+
+ ctdb->vnn_map->generation = map->generation;
+ ctdb->vnn_map->size = map->size;
+ ctdb->vnn_map->map = talloc_array(ctdb->vnn_map, uint32_t, map->size);
+ CTDB_NO_MEMORY(ctdb, ctdb->vnn_map->map);
+
+ memcpy(ctdb->vnn_map->map, map->map, sizeof(uint32_t)*map->size);
+
+ return 0;
+}
+
+int
+ctdb_control_getdbmap(struct ctdb_context *ctdb, uint32_t opcode, TDB_DATA indata, TDB_DATA *outdata)
+{
+ uint32_t i, len;
+ struct ctdb_db_context *ctdb_db;
+ struct ctdb_dbid_map_old *dbid_map;
+
+ CHECK_CONTROL_DATA_SIZE(0);
+
+ len = 0;
+ for(ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next){
+ len++;
+ }
+
+
+ outdata->dsize = offsetof(struct ctdb_dbid_map_old, dbs) + sizeof(dbid_map->dbs[0])*len;
+ outdata->dptr = (unsigned char *)talloc_zero_size(outdata, outdata->dsize);
+ if (!outdata->dptr) {
+ DEBUG(DEBUG_ALERT, (__location__ " Failed to allocate dbmap array\n"));
+ exit(1);
+ }
+
+ dbid_map = (struct ctdb_dbid_map_old *)outdata->dptr;
+ dbid_map->num = len;
+ for (i=0,ctdb_db=ctdb->db_list;ctdb_db;i++,ctdb_db=ctdb_db->next){
+ dbid_map->dbs[i].db_id = ctdb_db->db_id;
+ dbid_map->dbs[i].flags = ctdb_db->db_flags;
+ }
+
+ return 0;
+}
+
+int
+ctdb_control_getnodemap(struct ctdb_context *ctdb, uint32_t opcode, TDB_DATA indata, TDB_DATA *outdata)
+{
+ CHECK_CONTROL_DATA_SIZE(0);
+
+ outdata->dptr = (unsigned char *)ctdb_node_list_to_map(ctdb->nodes,
+ ctdb->num_nodes,
+ outdata);
+ if (outdata->dptr == NULL) {
+ return -1;
+ }
+
+ outdata->dsize = talloc_get_size(outdata->dptr);
+
+ return 0;
+}
+
+/*
+ reload the nodes file
+*/
+int
+ctdb_control_reload_nodes_file(struct ctdb_context *ctdb, uint32_t opcode)
+{
+ unsigned int i, num_nodes;
+ TALLOC_CTX *tmp_ctx;
+ struct ctdb_node **nodes;
+
+ tmp_ctx = talloc_new(ctdb);
+
+ /* steal the old nodes file for a while */
+ talloc_steal(tmp_ctx, ctdb->nodes);
+ nodes = ctdb->nodes;
+ ctdb->nodes = NULL;
+ num_nodes = ctdb->num_nodes;
+ ctdb->num_nodes = 0;
+
+ /* load the new nodes file */
+ ctdb_load_nodes_file(ctdb);
+
+ for (i=0; i<ctdb->num_nodes; i++) {
+ /* keep any identical pre-existing nodes and connections */
+ if ((i < num_nodes) && ctdb_same_address(&ctdb->nodes[i]->address, &nodes[i]->address)) {
+ talloc_free(ctdb->nodes[i]);
+ ctdb->nodes[i] = talloc_steal(ctdb->nodes, nodes[i]);
+ continue;
+ }
+
+ if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
+ continue;
+ }
+
+ /* any new or different nodes must be added */
+ if (ctdb->methods->add_node(ctdb->nodes[i]) != 0) {
+ DEBUG(DEBUG_CRIT, (__location__ " methods->add_node failed at %d\n", i));
+ ctdb_fatal(ctdb, "failed to add node. shutting down\n");
+ }
+ if (ctdb->methods->connect_node(ctdb->nodes[i]) != 0) {
+ DEBUG(DEBUG_CRIT, (__location__ " methods->add_connect failed at %d\n", i));
+ ctdb_fatal(ctdb, "failed to connect to node. shutting down\n");
+ }
+ }
+
+ /* tell the recovery daemon to reload the nodes file too */
+ ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELOAD_NODES, tdb_null);
+
+ talloc_free(tmp_ctx);
+
+ return 0;
+}
+
+struct db_pull_state {
+ struct ctdb_context *ctdb;
+ struct ctdb_db_context *ctdb_db;
+ struct ctdb_marshall_buffer *recs;
+ uint32_t pnn;
+ uint64_t srvid;
+ uint32_t num_records;
+};
+
+static int traverse_db_pull(struct tdb_context *tdb, TDB_DATA key,
+ TDB_DATA data, void *private_data)
+{
+ struct db_pull_state *state = (struct db_pull_state *)private_data;
+ struct ctdb_marshall_buffer *recs;
+
+ recs = ctdb_marshall_add(state->ctdb, state->recs,
+ state->ctdb_db->db_id, 0, key, NULL, data);
+ if (recs == NULL) {
+ TALLOC_FREE(state->recs);
+ return -1;
+ }
+ state->recs = recs;
+
+ if (talloc_get_size(state->recs) >=
+ state->ctdb->tunable.rec_buffer_size_limit) {
+ TDB_DATA buffer;
+ int ret;
+
+ buffer = ctdb_marshall_finish(state->recs);
+ ret = ctdb_daemon_send_message(state->ctdb, state->pnn,
+ state->srvid, buffer);
+ if (ret != 0) {
+ TALLOC_FREE(state->recs);
+ return -1;
+ }
+
+ state->num_records += state->recs->count;
+ TALLOC_FREE(state->recs);
+ }
+
+ return 0;
+}
+
+int32_t ctdb_control_db_pull(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA indata, TDB_DATA *outdata)
+{
+ struct ctdb_pulldb_ext *pulldb_ext;
+ struct ctdb_db_context *ctdb_db;
+ struct db_pull_state state;
+ int ret;
+
+ pulldb_ext = (struct ctdb_pulldb_ext *)indata.dptr;
+
+ ctdb_db = find_ctdb_db(ctdb, pulldb_ext->db_id);
+ if (ctdb_db == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%08x\n",
+ pulldb_ext->db_id));
+ return -1;
+ }
+
+ if (!ctdb_db_frozen(ctdb_db)) {
+ DEBUG(DEBUG_ERR,
+ ("rejecting ctdb_control_pull_db when not frozen\n"));
+ return -1;
+ }
+
+ if (ctdb_db->unhealthy_reason) {
+ /* this is just a warning, as the tdb should be empty anyway */
+ DEBUG(DEBUG_WARNING,
+ ("db(%s) unhealty in ctdb_control_db_pull: %s\n",
+ ctdb_db->db_name, ctdb_db->unhealthy_reason));
+ }
+
+ state.ctdb = ctdb;
+ state.ctdb_db = ctdb_db;
+ state.recs = NULL;
+ state.pnn = c->hdr.srcnode;
+ state.srvid = pulldb_ext->srvid;
+ state.num_records = 0;
+
+ /* If the records are invalid, we are done */
+ if (ctdb_db->invalid_records) {
+ goto done;
+ }
+
+ if (ctdb_lockdb_mark(ctdb_db) != 0) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " Failed to get lock on entire db - failing\n"));
+ return -1;
+ }
+
+ ret = tdb_traverse_read(ctdb_db->ltdb->tdb, traverse_db_pull, &state);
+ if (ret == -1) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " Failed to get traverse db '%s'\n",
+ ctdb_db->db_name));
+ ctdb_lockdb_unmark(ctdb_db);
+ return -1;
+ }
+
+ /* Last few records */
+ if (state.recs != NULL) {
+ TDB_DATA buffer;
+
+ buffer = ctdb_marshall_finish(state.recs);
+ ret = ctdb_daemon_send_message(state.ctdb, state.pnn,
+ state.srvid, buffer);
+ if (ret != 0) {
+ TALLOC_FREE(state.recs);
+ ctdb_lockdb_unmark(ctdb_db);
+ return -1;
+ }
+
+ state.num_records += state.recs->count;
+ TALLOC_FREE(state.recs);
+ }
+
+ ctdb_lockdb_unmark(ctdb_db);
+
+done:
+ outdata->dptr = talloc_size(outdata, sizeof(uint32_t));
+ if (outdata->dptr == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Memory allocation error\n"));
+ return -1;
+ }
+
+ memcpy(outdata->dptr, (uint8_t *)&state.num_records, sizeof(uint32_t));
+ outdata->dsize = sizeof(uint32_t);
+
+ return 0;
+}
+
+struct db_push_state {
+ struct ctdb_context *ctdb;
+ struct ctdb_db_context *ctdb_db;
+ uint64_t srvid;
+ uint32_t num_records;
+ bool failed;
+};
+
+static void db_push_msg_handler(uint64_t srvid, TDB_DATA indata,
+ void *private_data)
+{
+ struct db_push_state *state = talloc_get_type(
+ private_data, struct db_push_state);
+ struct ctdb_marshall_buffer *recs;
+ struct ctdb_rec_data_old *rec;
+ unsigned int i;
+ int ret;
+
+ if (state->failed) {
+ return;
+ }
+
+ recs = (struct ctdb_marshall_buffer *)indata.dptr;
+ rec = (struct ctdb_rec_data_old *)&recs->data[0];
+
+ DEBUG(DEBUG_INFO, ("starting push of %u records for dbid 0x%x\n",
+ recs->count, recs->db_id));
+
+ for (i=0; i<recs->count; i++) {
+ TDB_DATA key, data;
+ struct ctdb_ltdb_header *hdr;
+
+ key.dptr = &rec->data[0];
+ key.dsize = rec->keylen;
+ data.dptr = &rec->data[key.dsize];
+ data.dsize = rec->datalen;
+
+ if (data.dsize < sizeof(struct ctdb_ltdb_header)) {
+ DEBUG(DEBUG_CRIT,(__location__ " bad ltdb record\n"));
+ goto failed;
+ }
+
+ hdr = (struct ctdb_ltdb_header *)data.dptr;
+ /* Strip off any read only record flags.
+ * All readonly records are revoked implicitly by a recovery.
+ */
+ hdr->flags &= ~CTDB_REC_RO_FLAGS;
+
+ data.dptr += sizeof(*hdr);
+ data.dsize -= sizeof(*hdr);
+
+ ret = ctdb_ltdb_store(state->ctdb_db, key, hdr, data);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " Unable to store record\n"));
+ goto failed;
+ }
+
+ rec = (struct ctdb_rec_data_old *)(rec->length + (uint8_t *)rec);
+ }
+
+ DEBUG(DEBUG_DEBUG, ("finished push of %u records for dbid 0x%x\n",
+ recs->count, recs->db_id));
+
+ state->num_records += recs->count;
+ return;
+
+failed:
+ state->failed = true;
+}
+
+int32_t ctdb_control_db_push_start(struct ctdb_context *ctdb, TDB_DATA indata)
+{
+ struct ctdb_pulldb_ext *pulldb_ext;
+ struct ctdb_db_context *ctdb_db;
+ struct db_push_state *state;
+ int ret;
+
+ pulldb_ext = (struct ctdb_pulldb_ext *)indata.dptr;
+
+ ctdb_db = find_ctdb_db(ctdb, pulldb_ext->db_id);
+ if (ctdb_db == NULL) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " Unknown db 0x%08x\n", pulldb_ext->db_id));
+ return -1;
+ }
+
+ if (!ctdb_db_frozen(ctdb_db)) {
+ DEBUG(DEBUG_ERR,
+ ("rejecting ctdb_control_db_push_start when not frozen\n"));
+ return -1;
+ }
+
+ if (ctdb_db->push_started) {
+ DEBUG(DEBUG_WARNING,
+ (__location__ " DB push already started for %s\n",
+ ctdb_db->db_name));
+
+ /* De-register old state */
+ state = (struct db_push_state *)ctdb_db->push_state;
+ if (state != NULL) {
+ srvid_deregister(ctdb->srv, state->srvid, state);
+ talloc_free(state);
+ ctdb_db->push_state = NULL;
+ }
+ }
+
+ state = talloc_zero(ctdb_db, struct db_push_state);
+ if (state == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Memory allocation error\n"));
+ return -1;
+ }
+
+ state->ctdb = ctdb;
+ state->ctdb_db = ctdb_db;
+ state->srvid = pulldb_ext->srvid;
+ state->failed = false;
+
+ ret = srvid_register(ctdb->srv, state, state->srvid,
+ db_push_msg_handler, state);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " Failed to register srvid for db push\n"));
+ talloc_free(state);
+ return -1;
+ }
+
+ if (ctdb_lockdb_mark(ctdb_db) != 0) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " Failed to get lock on entire db - failing\n"));
+ srvid_deregister(ctdb->srv, state->srvid, state);
+ talloc_free(state);
+ return -1;
+ }
+
+ ctdb_db->push_started = true;
+ ctdb_db->push_state = state;
+
+ return 0;
+}
+
+int32_t ctdb_control_db_push_confirm(struct ctdb_context *ctdb,
+ TDB_DATA indata, TDB_DATA *outdata)
+{
+ uint32_t db_id;
+ struct ctdb_db_context *ctdb_db;
+ struct db_push_state *state;
+
+ db_id = *(uint32_t *)indata.dptr;
+
+ ctdb_db = find_ctdb_db(ctdb, db_id);
+ if (ctdb_db == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%08x\n", db_id));
+ return -1;
+ }
+
+ if (!ctdb_db_frozen(ctdb_db)) {
+ DEBUG(DEBUG_ERR,
+ ("rejecting ctdb_control_db_push_confirm when not frozen\n"));
+ return -1;
+ }
+
+ if (!ctdb_db->push_started) {
+ DEBUG(DEBUG_ERR, (__location__ " DB push not started\n"));
+ return -1;
+ }
+
+ if (ctdb_db_readonly(ctdb_db)) {
+ DEBUG(DEBUG_ERR,
+ ("Clearing the tracking database for dbid 0x%x\n",
+ ctdb_db->db_id));
+ if (tdb_wipe_all(ctdb_db->rottdb) != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Failed to wipe tracking database for 0x%x."
+ " Dropping read-only delegation support\n",
+ ctdb_db->db_id));
+ tdb_close(ctdb_db->rottdb);
+ ctdb_db->rottdb = NULL;
+ ctdb_db_reset_readonly(ctdb_db);
+ }
+
+ while (ctdb_db->revokechild_active != NULL) {
+ talloc_free(ctdb_db->revokechild_active);
+ }
+ }
+
+ ctdb_lockdb_unmark(ctdb_db);
+
+ state = (struct db_push_state *)ctdb_db->push_state;
+ if (state == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Missing push db state\n"));
+ return -1;
+ }
+
+ srvid_deregister(ctdb->srv, state->srvid, state);
+
+ outdata->dptr = talloc_size(outdata, sizeof(uint32_t));
+ if (outdata->dptr == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Memory allocation error\n"));
+ talloc_free(state);
+ ctdb_db->push_state = NULL;
+ return -1;
+ }
+
+ memcpy(outdata->dptr, (uint8_t *)&state->num_records, sizeof(uint32_t));
+ outdata->dsize = sizeof(uint32_t);
+
+ talloc_free(state);
+ ctdb_db->push_started = false;
+ ctdb_db->push_state = NULL;
+
+ return 0;
+}
+
+struct set_recmode_state {
+ struct ctdb_context *ctdb;
+ struct ctdb_req_control_old *c;
+};
+
+static void set_recmode_handler(char status,
+ double latency,
+ void *private_data)
+{
+ struct set_recmode_state *state = talloc_get_type_abort(
+ private_data, struct set_recmode_state);
+ int s = 0;
+ const char *err = NULL;
+
+ switch (status) {
+ case '0':
+ /* Mutex taken */
+ DEBUG(DEBUG_ERR,
+ ("ERROR: Daemon able to take recovery lock on \"%s\" during recovery\n",
+ state->ctdb->recovery_lock));
+ s = -1;
+ err = "Took recovery lock from daemon during recovery - probably a cluster filesystem lock coherence problem";
+ break;
+
+ case '1':
+ /* Contention */
+ DEBUG(DEBUG_DEBUG, (__location__ " Recovery lock check OK\n"));
+ state->ctdb->recovery_mode = CTDB_RECOVERY_NORMAL;
+ ctdb_process_deferred_attach(state->ctdb);
+
+ s = 0;
+
+ CTDB_UPDATE_RECLOCK_LATENCY(state->ctdb, "daemon reclock",
+ reclock.ctdbd, latency);
+ break;
+
+ case '2':
+ /* Timeout. Consider this a success, not a failure,
+ * as we failed to set the recovery lock which is what
+ * we wanted. This can be caused by the cluster
+ * filesystem being very slow to arbitrate locks
+ * immediately after a node failure. */
+ DEBUG(DEBUG_WARNING,
+ (__location__
+ "Time out getting recovery lock, allowing recmode set anyway\n"));
+ state->ctdb->recovery_mode = CTDB_RECOVERY_NORMAL;
+ ctdb_process_deferred_attach(state->ctdb);
+
+ s = 0;
+ break;
+
+ default:
+ DEBUG(DEBUG_ERR,
+ ("Unexpected error when testing recovery lock\n"));
+ s = -1;
+ err = "Unexpected error when testing recovery lock";
+ }
+
+ ctdb_request_control_reply(state->ctdb, state->c, NULL, s, err);
+ talloc_free(state);
+}
+
+static void
+ctdb_drop_all_ips_event(struct tevent_context *ev, struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
+
+ DEBUG(DEBUG_ERR,(__location__ " Been in recovery mode for too long. Dropping all IPS\n"));
+ talloc_free(ctdb->release_ips_ctx);
+ ctdb->release_ips_ctx = NULL;
+
+ ctdb_release_all_ips(ctdb);
+}
+
+/*
+ * Set up an event to drop all public ips if we remain in recovery for too
+ * long
+ */
+int ctdb_deferred_drop_all_ips(struct ctdb_context *ctdb)
+{
+ if (ctdb->release_ips_ctx != NULL) {
+ talloc_free(ctdb->release_ips_ctx);
+ }
+ ctdb->release_ips_ctx = talloc_new(ctdb);
+ CTDB_NO_MEMORY(ctdb, ctdb->release_ips_ctx);
+
+ tevent_add_timer(ctdb->ev, ctdb->release_ips_ctx,
+ timeval_current_ofs(ctdb->tunable.recovery_drop_all_ips, 0),
+ ctdb_drop_all_ips_event, ctdb);
+ return 0;
+}
+
+/*
+ set the recovery mode
+ */
+int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA indata, bool *async_reply,
+ const char **errormsg)
+{
+ uint32_t recmode = *(uint32_t *)indata.dptr;
+ struct ctdb_db_context *ctdb_db;
+ struct set_recmode_state *state;
+ struct ctdb_cluster_mutex_handle *h;
+
+ if (recmode == ctdb->recovery_mode) {
+ D_INFO("Recovery mode already set to %s\n",
+ recmode == CTDB_RECOVERY_NORMAL ? "NORMAL" : "ACTIVE");
+ return 0;
+ }
+
+ D_NOTICE("Recovery mode set to %s\n",
+ recmode == CTDB_RECOVERY_NORMAL ? "NORMAL" : "ACTIVE");
+
+ /* if we enter recovery but stay in recovery for too long
+ we will eventually drop all our ip addresses
+ */
+ if (recmode == CTDB_RECOVERY_ACTIVE) {
+ if (ctdb_deferred_drop_all_ips(ctdb) != 0) {
+ D_ERR("Failed to set up deferred drop all ips\n");
+ }
+
+ ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
+ return 0;
+ }
+
+ /* From this point: recmode == CTDB_RECOVERY_NORMAL
+ *
+ * Therefore, what follows is special handling when setting
+ * recovery mode back to normal */
+
+ TALLOC_FREE(ctdb->release_ips_ctx);
+
+ for (ctdb_db = ctdb->db_list; ctdb_db != NULL; ctdb_db = ctdb_db->next) {
+ if (ctdb_db->generation != ctdb->vnn_map->generation) {
+ DEBUG(DEBUG_ERR,
+ ("Inconsistent DB generation %u for %s\n",
+ ctdb_db->generation, ctdb_db->db_name));
+ DEBUG(DEBUG_ERR, ("Recovery mode set to ACTIVE\n"));
+ return -1;
+ }
+ }
+
+ /* force the databases to thaw */
+ if (ctdb_db_all_frozen(ctdb)) {
+ ctdb_control_thaw(ctdb, false);
+ }
+
+ if (ctdb->recovery_lock == NULL) {
+ /* Not using recovery lock file */
+ ctdb->recovery_mode = CTDB_RECOVERY_NORMAL;
+ ctdb_process_deferred_attach(ctdb);
+ return 0;
+ }
+
+ state = talloc_zero(ctdb, struct set_recmode_state);
+ if (state == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
+ return -1;
+ }
+ state->ctdb = ctdb;
+ state->c = NULL;
+
+ h = ctdb_cluster_mutex(state, ctdb, ctdb->recovery_lock, 5,
+ set_recmode_handler, state, NULL, NULL);
+ if (h == NULL) {
+ talloc_free(state);
+ return -1;
+ }
+
+ state->c = talloc_steal(state, c);
+ *async_reply = true;
+
+ return 0;
+}
+
+
+/*
+ delete a record as part of the vacuum process
+ only delete if we are not lmaster or dmaster, and our rsn is <= the provided rsn
+ use non-blocking locks
+
+ return 0 if the record was successfully deleted (i.e. it does not exist
+ when the function returns)
+ or !0 is the record still exists in the tdb after returning.
+ */
+static int delete_tdb_record(struct ctdb_context *ctdb, struct ctdb_db_context *ctdb_db, struct ctdb_rec_data_old *rec)
+{
+ TDB_DATA key, data, data2;
+ struct ctdb_ltdb_header *hdr, *hdr2;
+
+ /* these are really internal tdb functions - but we need them here for
+ non-blocking lock of the freelist */
+ int tdb_lock_nonblock(struct tdb_context *tdb, int list, int ltype);
+ int tdb_unlock(struct tdb_context *tdb, int list, int ltype);
+
+
+ key.dsize = rec->keylen;
+ key.dptr = &rec->data[0];
+ data.dsize = rec->datalen;
+ data.dptr = &rec->data[rec->keylen];
+
+ if (ctdb_lmaster(ctdb, &key) == ctdb->pnn) {
+ DBG_INFO("Called delete on record where we are lmaster\n");
+ return -1;
+ }
+
+ if (data.dsize != sizeof(struct ctdb_ltdb_header)) {
+ DBG_ERR("Bad record size\n");
+ return -1;
+ }
+
+ hdr = (struct ctdb_ltdb_header *)data.dptr;
+
+ /* use a non-blocking lock */
+ if (tdb_chainlock_nonblock(ctdb_db->ltdb->tdb, key) != 0) {
+ DBG_INFO("Failed to get non-blocking chain lock\n");
+ return -1;
+ }
+
+ data2 = tdb_fetch(ctdb_db->ltdb->tdb, key);
+ if (data2.dptr == NULL) {
+ tdb_chainunlock(ctdb_db->ltdb->tdb, key);
+ return 0;
+ }
+
+ if (data2.dsize < sizeof(struct ctdb_ltdb_header)) {
+ if (tdb_lock_nonblock(ctdb_db->ltdb->tdb, -1, F_WRLCK) == 0) {
+ if (tdb_delete(ctdb_db->ltdb->tdb, key) != 0) {
+ DBG_ERR("Failed to delete corrupt record\n");
+ }
+ tdb_unlock(ctdb_db->ltdb->tdb, -1, F_WRLCK);
+ DBG_ERR("Deleted corrupt record\n");
+ }
+ tdb_chainunlock(ctdb_db->ltdb->tdb, key);
+ free(data2.dptr);
+ return 0;
+ }
+
+ hdr2 = (struct ctdb_ltdb_header *)data2.dptr;
+
+ if (hdr2->rsn > hdr->rsn) {
+ tdb_chainunlock(ctdb_db->ltdb->tdb, key);
+ DBG_INFO("Skipping record with rsn=%llu - called with rsn=%llu\n",
+ (unsigned long long)hdr2->rsn,
+ (unsigned long long)hdr->rsn);
+ free(data2.dptr);
+ return -1;
+ }
+
+ /* do not allow deleting record that have readonly flags set. */
+ if (hdr->flags & CTDB_REC_RO_FLAGS) {
+ tdb_chainunlock(ctdb_db->ltdb->tdb, key);
+ DBG_INFO("Skipping record with readonly flags set\n");
+ free(data2.dptr);
+ return -1;
+ }
+ if (hdr2->flags & CTDB_REC_RO_FLAGS) {
+ tdb_chainunlock(ctdb_db->ltdb->tdb, key);
+ DBG_INFO("Skipping record with readonly flags set locally\n");
+ free(data2.dptr);
+ return -1;
+ }
+
+ if (hdr2->dmaster == ctdb->pnn) {
+ tdb_chainunlock(ctdb_db->ltdb->tdb, key);
+ DBG_INFO("Attempted delete record where we are the dmaster\n");
+ free(data2.dptr);
+ return -1;
+ }
+
+ if (tdb_lock_nonblock(ctdb_db->ltdb->tdb, -1, F_WRLCK) != 0) {
+ tdb_chainunlock(ctdb_db->ltdb->tdb, key);
+ DBG_INFO("Failed to get non-blocking freelist lock\n");
+ free(data2.dptr);
+ return -1;
+ }
+
+ if (tdb_delete(ctdb_db->ltdb->tdb, key) != 0) {
+ tdb_unlock(ctdb_db->ltdb->tdb, -1, F_WRLCK);
+ tdb_chainunlock(ctdb_db->ltdb->tdb, key);
+ DBG_INFO("Failed to delete record\n");
+ free(data2.dptr);
+ return -1;
+ }
+
+ tdb_unlock(ctdb_db->ltdb->tdb, -1, F_WRLCK);
+ tdb_chainunlock(ctdb_db->ltdb->tdb, key);
+ free(data2.dptr);
+ return 0;
+}
+
+
+
+struct recovery_callback_state {
+ struct ctdb_req_control_old *c;
+};
+
+
+/*
+ called when the 'recovered' event script has finished
+ */
+static void ctdb_end_recovery_callback(struct ctdb_context *ctdb, int status, void *p)
+{
+ struct recovery_callback_state *state = talloc_get_type(p, struct recovery_callback_state);
+
+ CTDB_INCREMENT_STAT(ctdb, num_recoveries);
+
+ if (status != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " recovered event script failed (status %d)\n", status));
+ if (status == -ETIMEDOUT) {
+ ctdb_ban_self(ctdb);
+ }
+ }
+
+ ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
+ talloc_free(state);
+
+ gettimeofday(&ctdb->last_recovery_finished, NULL);
+
+ if (ctdb->runstate == CTDB_RUNSTATE_FIRST_RECOVERY) {
+ ctdb_set_runstate(ctdb, CTDB_RUNSTATE_STARTUP);
+ }
+}
+
+/*
+ recovery has finished
+ */
+int32_t ctdb_control_end_recovery(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ bool *async_reply)
+{
+ int ret;
+ struct recovery_callback_state *state;
+
+ DEBUG(DEBUG_ERR,("Recovery has finished\n"));
+
+ ctdb_persistent_finish_trans3_commits(ctdb);
+
+ state = talloc(ctdb, struct recovery_callback_state);
+ CTDB_NO_MEMORY(ctdb, state);
+
+ state->c = c;
+
+ ret = ctdb_event_script_callback(ctdb, state,
+ ctdb_end_recovery_callback,
+ state,
+ CTDB_EVENT_RECOVERED, "%s", "");
+
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to end recovery\n"));
+ talloc_free(state);
+ return -1;
+ }
+
+ /* tell the control that we will be reply asynchronously */
+ state->c = talloc_steal(state, c);
+ *async_reply = true;
+ return 0;
+}
+
+/*
+ called when the 'startrecovery' event script has finished
+ */
+static void ctdb_start_recovery_callback(struct ctdb_context *ctdb, int status, void *p)
+{
+ struct recovery_callback_state *state = talloc_get_type(p, struct recovery_callback_state);
+
+ if (status != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " startrecovery event script failed (status %d)\n", status));
+ }
+
+ ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
+ talloc_free(state);
+}
+
+static void run_start_recovery_event(struct ctdb_context *ctdb,
+ struct recovery_callback_state *state)
+{
+ int ret;
+
+ ret = ctdb_event_script_callback(ctdb, state,
+ ctdb_start_recovery_callback,
+ state,
+ CTDB_EVENT_START_RECOVERY,
+ "%s", "");
+
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,("Unable to run startrecovery event\n"));
+ ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
+ talloc_free(state);
+ return;
+ }
+
+ return;
+}
+
+static bool reclock_strings_equal(const char *a, const char *b)
+{
+ return (a == NULL && b == NULL) ||
+ (a != NULL && b != NULL && strcmp(a, b) == 0);
+}
+
+static void start_recovery_reclock_callback(struct ctdb_context *ctdb,
+ int32_t status,
+ TDB_DATA data,
+ const char *errormsg,
+ void *private_data)
+{
+ struct recovery_callback_state *state = talloc_get_type_abort(
+ private_data, struct recovery_callback_state);
+ const char *local = ctdb->recovery_lock;
+ const char *remote = NULL;
+
+ if (status != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " GET_RECLOCK failed\n"));
+ ctdb_request_control_reply(ctdb, state->c, NULL,
+ status, errormsg);
+ talloc_free(state);
+ return;
+ }
+
+ /* Check reclock consistency */
+ if (data.dsize > 0) {
+ /* Ensure NUL-termination */
+ data.dptr[data.dsize-1] = '\0';
+ remote = (const char *)data.dptr;
+ }
+ if (! reclock_strings_equal(local, remote)) {
+ /* Inconsistent */
+ ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
+ DEBUG(DEBUG_ERR,
+ ("Recovery lock configuration inconsistent: "
+ "recmaster has %s, this node has %s, shutting down\n",
+ remote == NULL ? "NULL" : remote,
+ local == NULL ? "NULL" : local));
+ talloc_free(state);
+ ctdb_shutdown_sequence(ctdb, 1);
+ }
+ DEBUG(DEBUG_INFO,
+ ("Recovery lock consistency check successful\n"));
+
+ run_start_recovery_event(ctdb, state);
+}
+
+/* Check recovery lock consistency and run eventscripts for the
+ * "startrecovery" event */
+int32_t ctdb_control_start_recovery(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ bool *async_reply)
+{
+ int ret;
+ struct recovery_callback_state *state;
+ uint32_t recmaster = c->hdr.srcnode;
+
+ DEBUG(DEBUG_ERR, ("Recovery has started\n"));
+ gettimeofday(&ctdb->last_recovery_started, NULL);
+
+ state = talloc(ctdb, struct recovery_callback_state);
+ CTDB_NO_MEMORY(ctdb, state);
+
+ state->c = c;
+
+ /* Although the recovery master sent this node a start
+ * recovery control, this node might still think the recovery
+ * master is disconnected. In this case defer the recovery
+ * lock consistency check. */
+ if (ctdb->nodes[recmaster]->flags & NODE_FLAGS_DISCONNECTED) {
+ run_start_recovery_event(ctdb, state);
+ } else {
+ /* Ask the recovery master about its reclock setting */
+ ret = ctdb_daemon_send_control(ctdb,
+ recmaster,
+ 0,
+ CTDB_CONTROL_GET_RECLOCK_FILE,
+ 0, 0,
+ tdb_null,
+ start_recovery_reclock_callback,
+ state);
+
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " GET_RECLOCK failed\n"));
+ talloc_free(state);
+ return -1;
+ }
+ }
+
+ /* tell the control that we will be reply asynchronously */
+ state->c = talloc_steal(state, c);
+ *async_reply = true;
+
+ return 0;
+}
+
+/*
+ try to delete all these records as part of the vacuuming process
+ and return the records we failed to delete
+*/
+int32_t ctdb_control_try_delete_records(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
+{
+ struct ctdb_marshall_buffer *reply = (struct ctdb_marshall_buffer *)indata.dptr;
+ struct ctdb_db_context *ctdb_db;
+ unsigned int i;
+ struct ctdb_rec_data_old *rec;
+ struct ctdb_marshall_buffer *records;
+
+ if (indata.dsize < offsetof(struct ctdb_marshall_buffer, data)) {
+ DEBUG(DEBUG_ERR,(__location__ " invalid data in try_delete_records\n"));
+ return -1;
+ }
+
+ ctdb_db = find_ctdb_db(ctdb, reply->db_id);
+ if (!ctdb_db) {
+ DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%08x\n", reply->db_id));
+ return -1;
+ }
+
+
+ DEBUG(DEBUG_DEBUG,("starting try_delete_records of %u records for dbid 0x%x\n",
+ reply->count, reply->db_id));
+
+
+ /* create a blob to send back the records we couldn't delete */
+ records = (struct ctdb_marshall_buffer *)
+ talloc_zero_size(outdata,
+ offsetof(struct ctdb_marshall_buffer, data));
+ if (records == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
+ return -1;
+ }
+ records->db_id = ctdb_db->db_id;
+
+
+ rec = (struct ctdb_rec_data_old *)&reply->data[0];
+ for (i=0;i<reply->count;i++) {
+ TDB_DATA key, data;
+
+ key.dptr = &rec->data[0];
+ key.dsize = rec->keylen;
+ data.dptr = &rec->data[key.dsize];
+ data.dsize = rec->datalen;
+
+ if (data.dsize < sizeof(struct ctdb_ltdb_header)) {
+ DEBUG(DEBUG_CRIT,(__location__ " bad ltdb record in indata\n"));
+ talloc_free(records);
+ return -1;
+ }
+
+ /* If we can't delete the record we must add it to the reply
+ so the lmaster knows it may not purge this record
+ */
+ if (delete_tdb_record(ctdb, ctdb_db, rec) != 0) {
+ size_t old_size;
+ struct ctdb_ltdb_header *hdr;
+
+ hdr = (struct ctdb_ltdb_header *)data.dptr;
+ data.dptr += sizeof(*hdr);
+ data.dsize -= sizeof(*hdr);
+
+ DEBUG(DEBUG_INFO, (__location__ " Failed to vacuum delete record with hash 0x%08x\n", ctdb_hash(&key)));
+
+ old_size = talloc_get_size(records);
+ records = talloc_realloc_size(outdata, records, old_size + rec->length);
+ if (records == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to expand\n"));
+ return -1;
+ }
+ records->count++;
+ memcpy(old_size+(uint8_t *)records, rec, rec->length);
+ }
+
+ rec = (struct ctdb_rec_data_old *)(rec->length + (uint8_t *)rec);
+ }
+
+
+ *outdata = ctdb_marshall_finish(records);
+
+ return 0;
+}
+
+/*
+ report capabilities
+ */
+int32_t ctdb_control_get_capabilities(struct ctdb_context *ctdb, TDB_DATA *outdata)
+{
+ uint32_t *capabilities = NULL;
+
+ capabilities = talloc(outdata, uint32_t);
+ CTDB_NO_MEMORY(ctdb, capabilities);
+ *capabilities = ctdb->capabilities;
+
+ outdata->dsize = sizeof(uint32_t);
+ outdata->dptr = (uint8_t *)capabilities;
+
+ return 0;
+}
+
+/* The recovery daemon will ping us at regular intervals.
+ If we haven't been pinged for a while we assume the recovery
+ daemon is inoperable and we restart.
+*/
+static void ctdb_recd_ping_timeout(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *p)
+{
+ struct ctdb_context *ctdb = talloc_get_type(p, struct ctdb_context);
+ uint32_t *count = talloc_get_type(ctdb->recd_ping_count, uint32_t);
+
+ DEBUG(DEBUG_ERR, ("Recovery daemon ping timeout. Count : %u\n", *count));
+
+ if (*count < ctdb->tunable.recd_ping_failcount) {
+ (*count)++;
+ tevent_add_timer(ctdb->ev, ctdb->recd_ping_count,
+ timeval_current_ofs(ctdb->tunable.recd_ping_timeout, 0),
+ ctdb_recd_ping_timeout, ctdb);
+ return;
+ }
+
+ DEBUG(DEBUG_ERR, ("Final timeout for recovery daemon ping. Restarting recovery daemon. (This can be caused if the cluster filesystem has hung)\n"));
+
+ ctdb_stop_recoverd(ctdb);
+ ctdb_start_recoverd(ctdb);
+}
+
+int32_t ctdb_control_recd_ping(struct ctdb_context *ctdb)
+{
+ talloc_free(ctdb->recd_ping_count);
+
+ ctdb->recd_ping_count = talloc_zero(ctdb, uint32_t);
+ CTDB_NO_MEMORY(ctdb, ctdb->recd_ping_count);
+
+ if (ctdb->tunable.recd_ping_timeout != 0) {
+ tevent_add_timer(ctdb->ev, ctdb->recd_ping_count,
+ timeval_current_ofs(ctdb->tunable.recd_ping_timeout, 0),
+ ctdb_recd_ping_timeout, ctdb);
+ }
+
+ return 0;
+}
+
+void ctdb_node_become_inactive(struct ctdb_context *ctdb)
+{
+ struct ctdb_db_context *ctdb_db;
+
+ D_WARNING("Making node INACTIVE\n");
+
+ /*
+ * Do not service database calls - reset generation to invalid
+ * so this node ignores any REQ/REPLY CALL/DMASTER
+ */
+ ctdb->vnn_map->generation = INVALID_GENERATION;
+ for (ctdb_db = ctdb->db_list; ctdb_db != NULL; ctdb_db = ctdb_db->next) {
+ ctdb_db->generation = INVALID_GENERATION;
+ }
+
+ /*
+ * Although this bypasses the control, the only thing missing
+ * is the deferred drop of all public IPs, which isn't
+ * necessary because they are dropped below
+ */
+ if (ctdb->recovery_mode != CTDB_RECOVERY_ACTIVE) {
+ D_NOTICE("Recovery mode set to ACTIVE\n");
+ ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
+ }
+
+ /*
+ * Initiate database freeze - this will be scheduled for
+ * immediate execution and will be in progress long before the
+ * calling control returns
+ */
+ ctdb_daemon_send_control(ctdb,
+ ctdb->pnn,
+ 0,
+ CTDB_CONTROL_FREEZE,
+ 0,
+ CTDB_CTRL_FLAG_NOREPLY,
+ tdb_null,
+ NULL,
+ NULL);
+
+ D_NOTICE("Dropping all public IP addresses\n");
+ ctdb_release_all_ips(ctdb);
+}
+
+int32_t ctdb_control_stop_node(struct ctdb_context *ctdb)
+{
+ DEBUG(DEBUG_ERR, ("Stopping node\n"));
+ ctdb->nodes[ctdb->pnn]->flags |= NODE_FLAGS_STOPPED;
+
+ ctdb_node_become_inactive(ctdb);
+
+ return 0;
+}
+
+int32_t ctdb_control_continue_node(struct ctdb_context *ctdb)
+{
+ DEBUG(DEBUG_ERR, ("Continue node\n"));
+ ctdb->nodes[ctdb->pnn]->flags &= ~NODE_FLAGS_STOPPED;
+
+ return 0;
+}
+
diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c
new file mode 100644
index 0000000..84e2081
--- /dev/null
+++ b/ctdb/server/ctdb_recoverd.c
@@ -0,0 +1,3286 @@
+/*
+ ctdb recovery daemon
+
+ Copyright (C) Ronnie Sahlberg 2007
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/time.h"
+#include "system/network.h"
+#include "system/wait.h"
+
+#include <popt.h>
+#include <talloc.h>
+#include <tevent.h>
+#include <tdb.h>
+
+#include "lib/tdb_wrap/tdb_wrap.h"
+#include "lib/util/dlinklist.h"
+#include "lib/util/debug.h"
+#include "lib/util/samba_util.h"
+#include "lib/util/sys_rw.h"
+#include "lib/util/util_process.h"
+
+#include "ctdb_private.h"
+#include "ctdb_client.h"
+
+#include "protocol/protocol_basic.h"
+
+#include "common/system_socket.h"
+#include "common/common.h"
+#include "common/logging.h"
+
+#include "server/ctdb_config.h"
+
+#include "ctdb_cluster_mutex.h"
+
+/* List of SRVID requests that need to be processed */
+struct srvid_list {
+ struct srvid_list *next, *prev;
+ struct ctdb_srvid_message *request;
+};
+
+struct srvid_requests {
+ struct srvid_list *requests;
+};
+
+static void srvid_request_reply(struct ctdb_context *ctdb,
+ struct ctdb_srvid_message *request,
+ TDB_DATA result)
+{
+ /* Someone that sent srvid==0 does not want a reply */
+ if (request->srvid == 0) {
+ talloc_free(request);
+ return;
+ }
+
+ if (ctdb_client_send_message(ctdb, request->pnn, request->srvid,
+ result) == 0) {
+ DEBUG(DEBUG_INFO,("Sent SRVID reply to %u:%llu\n",
+ (unsigned)request->pnn,
+ (unsigned long long)request->srvid));
+ } else {
+ DEBUG(DEBUG_ERR,("Failed to send SRVID reply to %u:%llu\n",
+ (unsigned)request->pnn,
+ (unsigned long long)request->srvid));
+ }
+
+ talloc_free(request);
+}
+
+static void srvid_requests_reply(struct ctdb_context *ctdb,
+ struct srvid_requests **requests,
+ TDB_DATA result)
+{
+ struct srvid_list *r;
+
+ if (*requests == NULL) {
+ return;
+ }
+
+ for (r = (*requests)->requests; r != NULL; r = r->next) {
+ srvid_request_reply(ctdb, r->request, result);
+ }
+
+ /* Free the list structure... */
+ TALLOC_FREE(*requests);
+}
+
+static void srvid_request_add(struct ctdb_context *ctdb,
+ struct srvid_requests **requests,
+ struct ctdb_srvid_message *request)
+{
+ struct srvid_list *t;
+ int32_t ret;
+ TDB_DATA result;
+
+ if (*requests == NULL) {
+ *requests = talloc_zero(ctdb, struct srvid_requests);
+ if (*requests == NULL) {
+ goto nomem;
+ }
+ }
+
+ t = talloc_zero(*requests, struct srvid_list);
+ if (t == NULL) {
+ /* If *requests was just allocated above then free it */
+ if ((*requests)->requests == NULL) {
+ TALLOC_FREE(*requests);
+ }
+ goto nomem;
+ }
+
+ t->request = (struct ctdb_srvid_message *)talloc_steal(t, request);
+ DLIST_ADD((*requests)->requests, t);
+
+ return;
+
+nomem:
+ /* Failed to add the request to the list. Send a fail. */
+ DEBUG(DEBUG_ERR, (__location__
+ " Out of memory, failed to queue SRVID request\n"));
+ ret = -ENOMEM;
+ result.dsize = sizeof(ret);
+ result.dptr = (uint8_t *)&ret;
+ srvid_request_reply(ctdb, request, result);
+}
+
+/* An abstraction to allow an operation (takeover runs, recoveries,
+ * ...) to be disabled for a given timeout */
+struct ctdb_op_state {
+ struct tevent_timer *timer;
+ bool in_progress;
+ const char *name;
+};
+
+static struct ctdb_op_state *ctdb_op_init(TALLOC_CTX *mem_ctx, const char *name)
+{
+ struct ctdb_op_state *state = talloc_zero(mem_ctx, struct ctdb_op_state);
+
+ if (state != NULL) {
+ state->in_progress = false;
+ state->name = name;
+ }
+
+ return state;
+}
+
+static bool ctdb_op_is_disabled(struct ctdb_op_state *state)
+{
+ return state->timer != NULL;
+}
+
+static bool ctdb_op_begin(struct ctdb_op_state *state)
+{
+ if (ctdb_op_is_disabled(state)) {
+ DEBUG(DEBUG_NOTICE,
+ ("Unable to begin - %s are disabled\n", state->name));
+ return false;
+ }
+
+ state->in_progress = true;
+ return true;
+}
+
+static bool ctdb_op_end(struct ctdb_op_state *state)
+{
+ return state->in_progress = false;
+}
+
+static bool ctdb_op_is_in_progress(struct ctdb_op_state *state)
+{
+ return state->in_progress;
+}
+
+static void ctdb_op_enable(struct ctdb_op_state *state)
+{
+ TALLOC_FREE(state->timer);
+}
+
+static void ctdb_op_timeout_handler(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval yt, void *p)
+{
+ struct ctdb_op_state *state =
+ talloc_get_type(p, struct ctdb_op_state);
+
+ DEBUG(DEBUG_NOTICE,("Reenabling %s after timeout\n", state->name));
+ ctdb_op_enable(state);
+}
+
+static int ctdb_op_disable(struct ctdb_op_state *state,
+ struct tevent_context *ev,
+ uint32_t timeout)
+{
+ if (timeout == 0) {
+ DEBUG(DEBUG_NOTICE,("Reenabling %s\n", state->name));
+ ctdb_op_enable(state);
+ return 0;
+ }
+
+ if (state->in_progress) {
+ DEBUG(DEBUG_ERR,
+ ("Unable to disable %s - in progress\n", state->name));
+ return -EAGAIN;
+ }
+
+ DEBUG(DEBUG_NOTICE,("Disabling %s for %u seconds\n",
+ state->name, timeout));
+
+ /* Clear any old timers */
+ talloc_free(state->timer);
+
+ /* Arrange for the timeout to occur */
+ state->timer = tevent_add_timer(ev, state,
+ timeval_current_ofs(timeout, 0),
+ ctdb_op_timeout_handler, state);
+ if (state->timer == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Unable to setup timer\n"));
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+struct ctdb_banning_state {
+ uint32_t pnn;
+ uint32_t count;
+ struct timeval last_reported_time;
+};
+
+struct ctdb_cluster_lock_handle;
+
+/*
+ private state of recovery daemon
+ */
+struct ctdb_recoverd {
+ struct ctdb_context *ctdb;
+ uint32_t leader;
+ struct tevent_timer *leader_broadcast_te;
+ struct tevent_timer *leader_broadcast_timeout_te;
+ uint32_t pnn;
+ uint32_t last_culprit_node;
+ struct ctdb_banning_state *banning_state;
+ struct ctdb_node_map_old *nodemap;
+ struct timeval priority_time;
+ bool need_takeover_run;
+ bool need_recovery;
+ uint32_t node_flags;
+ struct tevent_timer *send_election_te;
+ bool election_in_progress;
+ struct tevent_timer *election_timeout;
+ struct srvid_requests *reallocate_requests;
+ struct ctdb_op_state *takeover_run;
+ struct ctdb_op_state *recovery;
+ struct ctdb_iface_list_old *ifaces;
+ uint32_t *force_rebalance_nodes;
+ struct ctdb_node_capabilities *caps;
+ bool frozen_on_inactive;
+ struct ctdb_cluster_lock_handle *cluster_lock_handle;
+ pid_t helper_pid;
+};
+
+#define CONTROL_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_timeout, 0)
+#define MONITOR_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_interval, 0)
+
+static void ctdb_restart_recd(struct tevent_context *ev,
+ struct tevent_timer *te, struct timeval t,
+ void *private_data);
+
+static bool this_node_is_leader(struct ctdb_recoverd *rec)
+{
+ return rec->leader == rec->pnn;
+}
+
+static bool this_node_can_be_leader(struct ctdb_recoverd *rec)
+{
+ return (rec->node_flags & NODE_FLAGS_INACTIVE) == 0 &&
+ (rec->ctdb->capabilities & CTDB_CAP_RECMASTER) != 0;
+}
+
+static bool node_flags(struct ctdb_recoverd *rec, uint32_t pnn, uint32_t *flags)
+{
+ size_t i;
+
+ for (i = 0; i < rec->nodemap->num; i++) {
+ struct ctdb_node_and_flags *node = &rec->nodemap->nodes[i];
+ if (node->pnn == pnn) {
+ if (flags != NULL) {
+ *flags = node->flags;
+ }
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/*
+ ban a node for a period of time
+ */
+static void ctdb_ban_node(struct ctdb_recoverd *rec, uint32_t pnn)
+{
+ int ret;
+ struct ctdb_context *ctdb = rec->ctdb;
+ uint32_t ban_time = ctdb->tunable.recovery_ban_period;
+ struct ctdb_ban_state bantime;
+
+ if (!ctdb_validate_pnn(ctdb, pnn)) {
+ DEBUG(DEBUG_ERR,("Bad pnn %u in ctdb_ban_node\n", pnn));
+ return;
+ }
+
+ DEBUG(DEBUG_NOTICE,("Banning node %u for %u seconds\n", pnn, ban_time));
+
+ bantime.pnn = pnn;
+ bantime.time = ban_time;
+
+ ret = ctdb_ctrl_set_ban(ctdb, CONTROL_TIMEOUT(), pnn, &bantime);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to ban node %d\n", pnn));
+ return;
+ }
+
+}
+
+enum monitor_result { MONITOR_OK, MONITOR_RECOVERY_NEEDED, MONITOR_ELECTION_NEEDED, MONITOR_FAILED};
+
+
+/*
+ remember the trouble maker
+ */
+static void ctdb_set_culprit_count(struct ctdb_recoverd *rec,
+ uint32_t culprit,
+ uint32_t count)
+{
+ struct ctdb_context *ctdb = talloc_get_type_abort(
+ rec->ctdb, struct ctdb_context);
+ struct ctdb_banning_state *ban_state = NULL;
+ size_t len;
+ bool ok;
+
+ ok = node_flags(rec, culprit, NULL);
+ if (!ok) {
+ DBG_WARNING("Unknown culprit node %"PRIu32"\n", culprit);
+ return;
+ }
+
+ /* If we are banned or stopped, do not set other nodes as culprits */
+ if (rec->node_flags & NODE_FLAGS_INACTIVE) {
+ D_WARNING("This node is INACTIVE, cannot set culprit node %d\n",
+ culprit);
+ return;
+ }
+
+ if (rec->banning_state == NULL) {
+ len = 0;
+ } else {
+ size_t i;
+
+ len = talloc_array_length(rec->banning_state);
+
+ for (i = 0 ; i < len; i++) {
+ if (rec->banning_state[i].pnn == culprit) {
+ ban_state= &rec->banning_state[i];
+ break;
+ }
+ }
+ }
+
+ /* Not found, so extend (or allocate new) array */
+ if (ban_state == NULL) {
+ struct ctdb_banning_state *t;
+
+ len += 1;
+ /*
+ * talloc_realloc() handles the corner case where
+ * rec->banning_state is NULL
+ */
+ t = talloc_realloc(rec,
+ rec->banning_state,
+ struct ctdb_banning_state,
+ len);
+ if (t == NULL) {
+ DBG_WARNING("Memory allocation error\n");
+ return;
+ }
+ rec->banning_state = t;
+
+ /* New element is always at the end - initialise it... */
+ ban_state = &rec->banning_state[len - 1];
+ *ban_state = (struct ctdb_banning_state) {
+ .pnn = culprit,
+ .count = 0,
+ };
+ } else if (ban_state->count > 0 &&
+ timeval_elapsed(&ban_state->last_reported_time) >
+ ctdb->tunable.recovery_grace_period) {
+ /*
+ * Forgive old transgressions beyond the tunable time-limit
+ */
+ ban_state->count = 0;
+ }
+
+ ban_state->count += count;
+ ban_state->last_reported_time = timeval_current();
+ rec->last_culprit_node = culprit;
+}
+
+static void ban_counts_reset(struct ctdb_recoverd *rec)
+{
+ D_NOTICE("Resetting ban count to 0 for all nodes\n");
+ TALLOC_FREE(rec->banning_state);
+}
+
+/*
+ remember the trouble maker
+ */
+static void ctdb_set_culprit(struct ctdb_recoverd *rec, uint32_t culprit)
+{
+ ctdb_set_culprit_count(rec, culprit, 1);
+}
+
+/*
+ Retrieve capabilities from all connected nodes
+ */
+static int update_capabilities(struct ctdb_recoverd *rec,
+ struct ctdb_node_map_old *nodemap)
+{
+ uint32_t *capp;
+ TALLOC_CTX *tmp_ctx;
+ struct ctdb_node_capabilities *caps;
+ struct ctdb_context *ctdb = rec->ctdb;
+
+ tmp_ctx = talloc_new(rec);
+ CTDB_NO_MEMORY(ctdb, tmp_ctx);
+
+ caps = ctdb_get_capabilities(ctdb, tmp_ctx,
+ CONTROL_TIMEOUT(), nodemap);
+
+ if (caps == NULL) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " Failed to get node capabilities\n"));
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
+ capp = ctdb_get_node_capabilities(caps, rec->pnn);
+ if (capp == NULL) {
+ DEBUG(DEBUG_ERR,
+ (__location__
+ " Capabilities don't include current node.\n"));
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+ ctdb->capabilities = *capp;
+
+ TALLOC_FREE(rec->caps);
+ rec->caps = talloc_steal(rec, caps);
+
+ talloc_free(tmp_ctx);
+ return 0;
+}
+
+/*
+ change recovery mode on all nodes
+ */
+static int set_recovery_mode(struct ctdb_context *ctdb,
+ struct ctdb_recoverd *rec,
+ struct ctdb_node_map_old *nodemap,
+ uint32_t rec_mode)
+{
+ TDB_DATA data;
+ uint32_t *nodes;
+ TALLOC_CTX *tmp_ctx;
+
+ tmp_ctx = talloc_new(ctdb);
+ CTDB_NO_MEMORY(ctdb, tmp_ctx);
+
+ nodes = list_of_active_nodes(ctdb, nodemap, tmp_ctx, true);
+
+ data.dsize = sizeof(uint32_t);
+ data.dptr = (unsigned char *)&rec_mode;
+
+ if (ctdb_client_async_control(ctdb, CTDB_CONTROL_SET_RECMODE,
+ nodes, 0,
+ CONTROL_TIMEOUT(),
+ false, data,
+ NULL, NULL,
+ NULL) != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to set recovery mode. Recovery failed.\n"));
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
+ talloc_free(tmp_ctx);
+ return 0;
+}
+
+/*
+ * Update flags on all connected nodes
+ */
+static int update_flags_on_all_nodes(struct ctdb_recoverd *rec,
+ uint32_t pnn,
+ uint32_t flags)
+{
+ struct ctdb_context *ctdb = rec->ctdb;
+ struct timeval timeout = CONTROL_TIMEOUT();
+ TDB_DATA data;
+ struct ctdb_node_map_old *nodemap=NULL;
+ struct ctdb_node_flag_change c;
+ TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
+ uint32_t *nodes;
+ uint32_t i;
+ int ret;
+
+ nodemap = rec->nodemap;
+
+ for (i = 0; i < nodemap->num; i++) {
+ if (pnn == nodemap->nodes[i].pnn) {
+ break;
+ }
+ }
+ if (i >= nodemap->num) {
+ DBG_ERR("Nodemap does not contain node %d\n", pnn);
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
+ c.pnn = pnn;
+ c.old_flags = nodemap->nodes[i].flags;
+ c.new_flags = flags;
+
+ data.dsize = sizeof(c);
+ data.dptr = (unsigned char *)&c;
+
+ /* send the flags update to all connected nodes */
+ nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
+
+ ret = ctdb_client_async_control(ctdb,
+ CTDB_CONTROL_MODIFY_FLAGS,
+ nodes,
+ 0,
+ timeout,
+ false,
+ data,
+ NULL,
+ NULL,
+ NULL);
+ if (ret != 0) {
+ DBG_ERR("Unable to update flags on remote nodes\n");
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
+ talloc_free(tmp_ctx);
+ return 0;
+}
+
+static bool _cluster_lock_lock(struct ctdb_recoverd *rec);
+static bool cluster_lock_held(struct ctdb_recoverd *rec);
+
+static bool cluster_lock_enabled(struct ctdb_recoverd *rec)
+{
+ return rec->ctdb->recovery_lock != NULL;
+}
+
+static bool cluster_lock_take(struct ctdb_recoverd *rec)
+{
+ struct ctdb_context *ctdb = rec->ctdb;
+ bool have_lock;
+
+ if (!cluster_lock_enabled(rec)) {
+ return true;
+ }
+
+ if (cluster_lock_held(rec)) {
+ D_NOTICE("Already holding cluster lock\n");
+ return true;
+ }
+
+ D_NOTICE("Attempting to take cluster lock (%s)\n", ctdb->recovery_lock);
+ have_lock = _cluster_lock_lock(rec);
+ if (!have_lock) {
+ return false;
+ }
+
+ D_NOTICE("Cluster lock taken successfully\n");
+ return true;
+}
+
+/*
+ called when ctdb_wait_timeout should finish
+ */
+static void ctdb_wait_handler(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval yt, void *p)
+{
+ uint32_t *timed_out = (uint32_t *)p;
+ (*timed_out) = 1;
+}
+
+/*
+ wait for a given number of seconds
+ */
+static void ctdb_wait_timeout(struct ctdb_context *ctdb, double secs)
+{
+ uint32_t timed_out = 0;
+ uint32_t usecs = (secs - (uint32_t)secs) * 1000000;
+ tevent_add_timer(ctdb->ev, ctdb, timeval_current_ofs(secs, usecs),
+ ctdb_wait_handler, &timed_out);
+ while (!timed_out) {
+ tevent_loop_once(ctdb->ev);
+ }
+}
+
+/*
+ * Broadcast cluster leader
+ */
+
+static int leader_broadcast_send(struct ctdb_recoverd *rec, uint32_t pnn)
+{
+ struct ctdb_context *ctdb = rec->ctdb;
+ TDB_DATA data;
+ int ret;
+
+ data.dptr = (uint8_t *)&pnn;
+ data.dsize = sizeof(pnn);
+
+ ret = ctdb_client_send_message(ctdb,
+ CTDB_BROADCAST_CONNECTED,
+ CTDB_SRVID_LEADER,
+ data);
+ return ret;
+}
+
+static int leader_broadcast_loop(struct ctdb_recoverd *rec);
+static void cluster_lock_release(struct ctdb_recoverd *rec);
+
+/* This runs continuously but only sends the broadcast when leader */
+static void leader_broadcast_loop_handler(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval current_time,
+ void *private_data)
+{
+ struct ctdb_recoverd *rec = talloc_get_type_abort(
+ private_data, struct ctdb_recoverd);
+ int ret;
+
+ if (!this_node_can_be_leader(rec)) {
+ if (this_node_is_leader(rec)) {
+ rec->leader = CTDB_UNKNOWN_PNN;
+ }
+ if (cluster_lock_enabled(rec) && cluster_lock_held(rec)) {
+ cluster_lock_release(rec);
+ }
+ goto done;
+ }
+
+ if (!this_node_is_leader(rec)) {
+ goto done;
+ }
+
+ if (rec->election_in_progress) {
+ goto done;
+ }
+
+ ret = leader_broadcast_send(rec, rec->leader);
+ if (ret != 0) {
+ DBG_WARNING("Failed to send leader broadcast\n");
+ }
+
+done:
+ ret = leader_broadcast_loop(rec);
+ if (ret != 0) {
+ D_WARNING("Failed to set up leader broadcast\n");
+ }
+}
+
+static int leader_broadcast_loop(struct ctdb_recoverd *rec)
+{
+ struct ctdb_context *ctdb = rec->ctdb;
+
+ TALLOC_FREE(rec->leader_broadcast_te);
+ rec->leader_broadcast_te =
+ tevent_add_timer(ctdb->ev,
+ rec,
+ timeval_current_ofs(1, 0),
+ leader_broadcast_loop_handler,
+ rec);
+ if (rec->leader_broadcast_te == NULL) {
+ return ENOMEM;
+ }
+
+ return 0;
+}
+
+static bool leader_broadcast_loop_active(struct ctdb_recoverd *rec)
+{
+ return rec->leader_broadcast_te != NULL;
+}
+
+/*
+ called when an election times out (ends)
+ */
+static void ctdb_election_timeout(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *p)
+{
+ struct ctdb_recoverd *rec = talloc_get_type(p, struct ctdb_recoverd);
+ bool ok;
+
+ rec->election_in_progress = false;
+ rec->election_timeout = NULL;
+ fast_start = false;
+
+ D_WARNING("Election period ended, leader=%u\n", rec->leader);
+
+ if (!this_node_is_leader(rec)) {
+ return;
+ }
+
+ ok = cluster_lock_take(rec);
+ if (!ok) {
+ D_ERR("Unable to get cluster lock, banning node\n");
+ ctdb_ban_node(rec, rec->pnn);
+ }
+}
+
+
+/*
+ wait for an election to finish. It finished election_timeout seconds after
+ the last election packet is received
+ */
+static void ctdb_wait_election(struct ctdb_recoverd *rec)
+{
+ struct ctdb_context *ctdb = rec->ctdb;
+ while (rec->election_in_progress) {
+ tevent_loop_once(ctdb->ev);
+ }
+}
+
+/*
+ * Update local flags from all remote connected nodes and push out
+ * flags changes to all nodes. This is only run by the leader.
+ */
+static int update_flags(struct ctdb_recoverd *rec,
+ struct ctdb_node_map_old *nodemap,
+ struct ctdb_node_map_old **remote_nodemaps)
+{
+ unsigned int j;
+ struct ctdb_context *ctdb = rec->ctdb;
+ TALLOC_CTX *mem_ctx = talloc_new(ctdb);
+
+ /* Check flags from remote nodes */
+ for (j=0; j<nodemap->num; j++) {
+ struct ctdb_node_map_old *remote_nodemap=NULL;
+ uint32_t local_flags = nodemap->nodes[j].flags;
+ uint32_t remote_pnn = nodemap->nodes[j].pnn;
+ uint32_t remote_flags;
+ unsigned int i;
+ int ret;
+
+ if (local_flags & NODE_FLAGS_DISCONNECTED) {
+ continue;
+ }
+ if (remote_pnn == rec->pnn) {
+ /*
+ * No remote nodemap for this node since this
+ * is the local nodemap. However, still need
+ * to check this against the remote nodes and
+ * push it if they are out-of-date.
+ */
+ goto compare_remotes;
+ }
+
+ remote_nodemap = remote_nodemaps[j];
+ remote_flags = remote_nodemap->nodes[j].flags;
+
+ if (local_flags != remote_flags) {
+ /*
+ * Update the local copy of the flags in the
+ * recovery daemon.
+ */
+ D_NOTICE("Remote node %u had flags 0x%x, "
+ "local had 0x%x - updating local\n",
+ remote_pnn,
+ remote_flags,
+ local_flags);
+ nodemap->nodes[j].flags = remote_flags;
+ local_flags = remote_flags;
+ goto push;
+ }
+
+compare_remotes:
+ for (i = 0; i < nodemap->num; i++) {
+ if (i == j) {
+ continue;
+ }
+ if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
+ continue;
+ }
+ if (nodemap->nodes[i].pnn == rec->pnn) {
+ continue;
+ }
+
+ remote_nodemap = remote_nodemaps[i];
+ remote_flags = remote_nodemap->nodes[j].flags;
+
+ if (local_flags != remote_flags) {
+ goto push;
+ }
+ }
+
+ continue;
+
+push:
+ D_NOTICE("Pushing updated flags for node %u (0x%x)\n",
+ remote_pnn,
+ local_flags);
+ ret = update_flags_on_all_nodes(rec, remote_pnn, local_flags);
+ if (ret != 0) {
+ DBG_ERR("Unable to update flags on remote nodes\n");
+ talloc_free(mem_ctx);
+ return -1;
+ }
+ }
+ talloc_free(mem_ctx);
+ return 0;
+}
+
+
+/* Create a new random generation id.
+ The generation id can not be the INVALID_GENERATION id
+*/
+static uint32_t new_generation(void)
+{
+ uint32_t generation;
+
+ while (1) {
+ generation = random();
+
+ if (generation != INVALID_GENERATION) {
+ break;
+ }
+ }
+
+ return generation;
+}
+
+static bool cluster_lock_held(struct ctdb_recoverd *rec)
+{
+ return (rec->cluster_lock_handle != NULL);
+}
+
+struct ctdb_cluster_lock_handle {
+ bool done;
+ bool locked;
+ double latency;
+ struct ctdb_cluster_mutex_handle *h;
+ struct ctdb_recoverd *rec;
+};
+
+static void take_cluster_lock_handler(char status,
+ double latency,
+ void *private_data)
+{
+ struct ctdb_cluster_lock_handle *s =
+ (struct ctdb_cluster_lock_handle *) private_data;
+
+ s->locked = (status == '0') ;
+
+ /*
+ * If unsuccessful then ensure the process has exited and that
+ * the file descriptor event handler has been cancelled
+ */
+ if (! s->locked) {
+ TALLOC_FREE(s->h);
+ }
+
+ switch (status) {
+ case '0':
+ s->latency = latency;
+ break;
+
+ case '1':
+ D_ERR("Unable to take cluster lock - contention\n");
+ break;
+
+ case '2':
+ D_ERR("Unable to take cluster lock - timeout\n");
+ break;
+
+ default:
+ D_ERR("Unable to take cluster lock - unknown error\n");
+ }
+
+ s->done = true;
+}
+
+static void force_election(struct ctdb_recoverd *rec);
+
+static void lost_cluster_lock_handler(void *private_data)
+{
+ struct ctdb_recoverd *rec = talloc_get_type_abort(
+ private_data, struct ctdb_recoverd);
+
+ D_ERR("Cluster lock helper terminated\n");
+ TALLOC_FREE(rec->cluster_lock_handle);
+
+ if (this_node_can_be_leader(rec)) {
+ force_election(rec);
+ }
+}
+
+static bool _cluster_lock_lock(struct ctdb_recoverd *rec)
+{
+ struct ctdb_context *ctdb = rec->ctdb;
+ struct ctdb_cluster_mutex_handle *h;
+ struct ctdb_cluster_lock_handle *s;
+
+ s = talloc_zero(rec, struct ctdb_cluster_lock_handle);
+ if (s == NULL) {
+ DBG_ERR("Memory allocation error\n");
+ return false;
+ };
+
+ s->rec = rec;
+
+ h = ctdb_cluster_mutex(s,
+ ctdb,
+ ctdb->recovery_lock,
+ 120,
+ take_cluster_lock_handler,
+ s,
+ lost_cluster_lock_handler,
+ rec);
+ if (h == NULL) {
+ talloc_free(s);
+ return false;
+ }
+
+ rec->cluster_lock_handle = s;
+ s->h = h;
+
+ while (! s->done) {
+ tevent_loop_once(ctdb->ev);
+ }
+
+ if (! s->locked) {
+ TALLOC_FREE(rec->cluster_lock_handle);
+ return false;
+ }
+
+ ctdb_ctrl_report_recd_lock_latency(ctdb,
+ CONTROL_TIMEOUT(),
+ s->latency);
+
+ return true;
+}
+
+static void cluster_lock_release(struct ctdb_recoverd *rec)
+{
+ if (rec->cluster_lock_handle == NULL) {
+ return;
+ }
+
+ if (! rec->cluster_lock_handle->done) {
+ /*
+ * Taking of cluster lock still in progress. Free
+ * the cluster mutex handle to release it but leave
+ * the cluster lock handle in place to allow taking
+ * of the lock to fail.
+ */
+ D_NOTICE("Cancelling cluster lock\n");
+ TALLOC_FREE(rec->cluster_lock_handle->h);
+ rec->cluster_lock_handle->done = true;
+ rec->cluster_lock_handle->locked = false;
+ return;
+ }
+
+ D_NOTICE("Releasing cluster lock\n");
+ TALLOC_FREE(rec->cluster_lock_handle);
+}
+
+static void ban_misbehaving_nodes(struct ctdb_recoverd *rec, bool *self_ban)
+{
+ size_t len = talloc_array_length(rec->banning_state);
+ size_t i;
+
+
+ *self_ban = false;
+ for (i = 0; i < len; i++) {
+ struct ctdb_banning_state *ban_state = &rec->banning_state[i];
+
+ if (ban_state->count < 2 * rec->nodemap->num) {
+ continue;
+ }
+
+ D_NOTICE("Node %u reached %u banning credits\n",
+ ban_state->pnn,
+ ban_state->count);
+ ctdb_ban_node(rec, ban_state->pnn);
+ ban_state->count = 0;
+
+ /* Banning ourself? */
+ if (ban_state->pnn == rec->pnn) {
+ *self_ban = true;
+ }
+ }
+}
+
+struct helper_state {
+ int fd[2];
+ pid_t pid;
+ int result;
+ bool done;
+};
+
+static void helper_handler(struct tevent_context *ev,
+ struct tevent_fd *fde,
+ uint16_t flags, void *private_data)
+{
+ struct helper_state *state = talloc_get_type_abort(
+ private_data, struct helper_state);
+ int ret;
+
+ ret = sys_read(state->fd[0], &state->result, sizeof(state->result));
+ if (ret != sizeof(state->result)) {
+ state->result = EPIPE;
+ }
+
+ state->done = true;
+}
+
+static int helper_run(struct ctdb_recoverd *rec, TALLOC_CTX *mem_ctx,
+ const char *prog, const char *arg, const char *type)
+{
+ struct helper_state *state;
+ struct tevent_fd *fde;
+ const char **args;
+ int nargs, ret;
+
+ state = talloc_zero(mem_ctx, struct helper_state);
+ if (state == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " memory error\n"));
+ return -1;
+ }
+
+ state->pid = -1;
+
+ ret = pipe(state->fd);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Failed to create pipe for %s helper\n", type));
+ goto fail;
+ }
+
+ set_close_on_exec(state->fd[0]);
+
+ nargs = 4;
+ args = talloc_array(state, const char *, nargs);
+ if (args == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " memory error\n"));
+ goto fail;
+ }
+
+ args[0] = talloc_asprintf(args, "%d", state->fd[1]);
+ if (args[0] == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " memory error\n"));
+ goto fail;
+ }
+ args[1] = rec->ctdb->daemon.name;
+ args[2] = arg;
+ args[3] = NULL;
+
+ if (args[2] == NULL) {
+ nargs = 3;
+ }
+
+ state->pid = ctdb_vfork_exec(state, rec->ctdb, prog, nargs, args);
+ if (state->pid == -1) {
+ DEBUG(DEBUG_ERR,
+ ("Failed to create child for %s helper\n", type));
+ goto fail;
+ }
+
+ close(state->fd[1]);
+ state->fd[1] = -1;
+
+ rec->helper_pid = state->pid;
+ state->done = false;
+
+ fde = tevent_add_fd(rec->ctdb->ev, state, state->fd[0],
+ TEVENT_FD_READ, helper_handler, state);
+ if (fde == NULL) {
+ goto fail;
+ }
+ tevent_fd_set_auto_close(fde);
+
+ while (!state->done) {
+ tevent_loop_once(rec->ctdb->ev);
+
+ if (!this_node_is_leader(rec)) {
+ D_ERR("Leader changed to %u, aborting %s\n",
+ rec->leader,
+ type);
+ state->result = 1;
+ break;
+ }
+ }
+
+ close(state->fd[0]);
+ state->fd[0] = -1;
+
+ if (state->result != 0) {
+ goto fail;
+ }
+
+ rec->helper_pid = -1;
+ ctdb_kill(rec->ctdb, state->pid, SIGKILL);
+ talloc_free(state);
+ return 0;
+
+fail:
+ if (state->fd[0] != -1) {
+ close(state->fd[0]);
+ }
+ if (state->fd[1] != -1) {
+ close(state->fd[1]);
+ }
+ rec->helper_pid = -1;
+ if (state->pid != -1) {
+ ctdb_kill(rec->ctdb, state->pid, SIGKILL);
+ }
+ talloc_free(state);
+ return -1;
+}
+
+
+static int ctdb_takeover(struct ctdb_recoverd *rec,
+ uint32_t *force_rebalance_nodes)
+{
+ static char prog[PATH_MAX+1] = "";
+ char *arg;
+ unsigned int i;
+ int ret;
+
+ if (!ctdb_set_helper("takeover_helper", prog, sizeof(prog),
+ "CTDB_TAKEOVER_HELPER", CTDB_HELPER_BINDIR,
+ "ctdb_takeover_helper")) {
+ ctdb_die(rec->ctdb, "Unable to set takeover helper\n");
+ }
+
+ arg = NULL;
+ for (i = 0; i < talloc_array_length(force_rebalance_nodes); i++) {
+ uint32_t pnn = force_rebalance_nodes[i];
+ if (arg == NULL) {
+ arg = talloc_asprintf(rec, "%u", pnn);
+ } else {
+ arg = talloc_asprintf_append(arg, ",%u", pnn);
+ }
+ if (arg == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " memory error\n"));
+ return -1;
+ }
+ }
+
+ if (ctdb_config.failover_disabled) {
+ ret = setenv("CTDB_DISABLE_IP_FAILOVER", "1", 1);
+ if (ret != 0) {
+ D_ERR("Failed to set CTDB_DISABLE_IP_FAILOVER variable\n");
+ return -1;
+ }
+ }
+
+ return helper_run(rec, rec, prog, arg, "takeover");
+}
+
+static bool do_takeover_run(struct ctdb_recoverd *rec,
+ struct ctdb_node_map_old *nodemap)
+{
+ uint32_t *nodes = NULL;
+ struct ctdb_disable_message dtr;
+ TDB_DATA data;
+ size_t i;
+ uint32_t *rebalance_nodes = rec->force_rebalance_nodes;
+ int ret;
+ bool ok;
+
+ DEBUG(DEBUG_NOTICE, ("Takeover run starting\n"));
+
+ if (ctdb_op_is_in_progress(rec->takeover_run)) {
+ DEBUG(DEBUG_ERR, (__location__
+ " takeover run already in progress \n"));
+ ok = false;
+ goto done;
+ }
+
+ if (!ctdb_op_begin(rec->takeover_run)) {
+ ok = false;
+ goto done;
+ }
+
+ /* Disable IP checks (takeover runs, really) on other nodes
+ * while doing this takeover run. This will stop those other
+ * nodes from triggering takeover runs when think they should
+ * be hosting an IP but it isn't yet on an interface. Don't
+ * wait for replies since a failure here might cause some
+ * noise in the logs but will not actually cause a problem.
+ */
+ ZERO_STRUCT(dtr);
+ dtr.srvid = 0; /* No reply */
+ dtr.pnn = -1;
+
+ data.dptr = (uint8_t*)&dtr;
+ data.dsize = sizeof(dtr);
+
+ nodes = list_of_connected_nodes(rec->ctdb, nodemap, rec, false);
+
+ /* Disable for 60 seconds. This can be a tunable later if
+ * necessary.
+ */
+ dtr.timeout = 60;
+ for (i = 0; i < talloc_array_length(nodes); i++) {
+ if (ctdb_client_send_message(rec->ctdb, nodes[i],
+ CTDB_SRVID_DISABLE_TAKEOVER_RUNS,
+ data) != 0) {
+ DEBUG(DEBUG_INFO,("Failed to disable takeover runs\n"));
+ }
+ }
+
+ ret = ctdb_takeover(rec, rec->force_rebalance_nodes);
+
+ /* Re-enable takeover runs and IP checks on other nodes */
+ dtr.timeout = 0;
+ for (i = 0; i < talloc_array_length(nodes); i++) {
+ if (ctdb_client_send_message(rec->ctdb, nodes[i],
+ CTDB_SRVID_DISABLE_TAKEOVER_RUNS,
+ data) != 0) {
+ DEBUG(DEBUG_INFO,("Failed to re-enable takeover runs\n"));
+ }
+ }
+
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("ctdb_takeover_run() failed\n"));
+ ok = false;
+ goto done;
+ }
+
+ ok = true;
+ /* Takeover run was successful so clear force rebalance targets */
+ if (rebalance_nodes == rec->force_rebalance_nodes) {
+ TALLOC_FREE(rec->force_rebalance_nodes);
+ } else {
+ DEBUG(DEBUG_WARNING,
+ ("Rebalance target nodes changed during takeover run - not clearing\n"));
+ }
+done:
+ rec->need_takeover_run = !ok;
+ talloc_free(nodes);
+ ctdb_op_end(rec->takeover_run);
+
+ DEBUG(DEBUG_NOTICE, ("Takeover run %s\n", ok ? "completed successfully" : "unsuccessful"));
+ return ok;
+}
+
+static int db_recovery_parallel(struct ctdb_recoverd *rec, TALLOC_CTX *mem_ctx)
+{
+ static char prog[PATH_MAX+1] = "";
+ const char *arg;
+
+ if (!ctdb_set_helper("recovery_helper", prog, sizeof(prog),
+ "CTDB_RECOVERY_HELPER", CTDB_HELPER_BINDIR,
+ "ctdb_recovery_helper")) {
+ ctdb_die(rec->ctdb, "Unable to set recovery helper\n");
+ }
+
+ arg = talloc_asprintf(mem_ctx, "%u", new_generation());
+ if (arg == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " memory error\n"));
+ return -1;
+ }
+
+ setenv("CTDB_DBDIR_STATE", rec->ctdb->db_directory_state, 1);
+
+ return helper_run(rec, mem_ctx, prog, arg, "recovery");
+}
+
+/*
+ * Main recovery function, only run by leader
+ */
+static int do_recovery(struct ctdb_recoverd *rec, TALLOC_CTX *mem_ctx)
+{
+ struct ctdb_context *ctdb = rec->ctdb;
+ struct ctdb_node_map_old *nodemap = rec->nodemap;
+ unsigned int i;
+ int ret;
+ bool self_ban;
+
+ DEBUG(DEBUG_NOTICE, (__location__ " Starting do_recovery\n"));
+
+ /* Check if the current node is still the leader. It's possible that
+ * re-election has changed the leader.
+ */
+ if (!this_node_is_leader(rec)) {
+ D_NOTICE("Leader changed to %u, aborting recovery\n",
+ rec->leader);
+ return -1;
+ }
+
+ /* if recovery fails, force it again */
+ rec->need_recovery = true;
+
+ if (!ctdb_op_begin(rec->recovery)) {
+ return -1;
+ }
+
+ if (rec->election_in_progress) {
+ /* an election is in progress */
+ DEBUG(DEBUG_ERR, ("do_recovery called while election in progress - try again later\n"));
+ goto fail;
+ }
+
+ ban_misbehaving_nodes(rec, &self_ban);
+ if (self_ban) {
+ DEBUG(DEBUG_NOTICE, ("This node was banned, aborting recovery\n"));
+ goto fail;
+ }
+
+ if (cluster_lock_enabled(rec) && !cluster_lock_held(rec)) {
+ /* Leader can change in ban_misbehaving_nodes() */
+ if (!this_node_is_leader(rec)) {
+ D_NOTICE("Leader changed to %u, aborting recovery\n",
+ rec->leader);
+ rec->need_recovery = false;
+ goto fail;
+ }
+
+ D_ERR("Cluster lock not held - abort recovery, ban node\n");
+ ctdb_ban_node(rec, rec->pnn);
+ goto fail;
+ }
+
+ DEBUG(DEBUG_NOTICE, (__location__ " Recovery initiated due to problem with node %u\n", rec->last_culprit_node));
+
+ /* Retrieve capabilities from all connected nodes */
+ ret = update_capabilities(rec, nodemap);
+ if (ret!=0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to update node capabilities.\n"));
+ return -1;
+ }
+
+ /*
+ update all nodes to have the same flags that we have
+ */
+ for (i=0;i<nodemap->num;i++) {
+ if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
+ continue;
+ }
+
+ ret = update_flags_on_all_nodes(rec,
+ nodemap->nodes[i].pnn,
+ nodemap->nodes[i].flags);
+ if (ret != 0) {
+ if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
+ DEBUG(DEBUG_WARNING, (__location__ "Unable to update flags on inactive node %d\n", i));
+ } else {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to update flags on all nodes for node %d\n", i));
+ return -1;
+ }
+ }
+ }
+
+ DEBUG(DEBUG_NOTICE, (__location__ " Recovery - updated flags\n"));
+
+ ret = db_recovery_parallel(rec, mem_ctx);
+ if (ret != 0) {
+ goto fail;
+ }
+
+ do_takeover_run(rec, nodemap);
+
+ /* send a message to all clients telling them that the cluster
+ has been reconfigured */
+ ret = ctdb_client_send_message(ctdb, CTDB_BROADCAST_CONNECTED,
+ CTDB_SRVID_RECONFIGURE, tdb_null);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Failed to send reconfigure message\n"));
+ goto fail;
+ }
+
+ DEBUG(DEBUG_NOTICE, (__location__ " Recovery complete\n"));
+
+ rec->need_recovery = false;
+ ctdb_op_end(rec->recovery);
+
+ /*
+ * Completed a full recovery so forgive any past transgressions
+ */
+ ban_counts_reset(rec);
+
+ /* We just finished a recovery successfully.
+ We now wait for rerecovery_timeout before we allow
+ another recovery to take place.
+ */
+ DEBUG(DEBUG_NOTICE, ("Just finished a recovery. New recoveries will now be suppressed for the rerecovery timeout (%d seconds)\n", ctdb->tunable.rerecovery_timeout));
+ ctdb_op_disable(rec->recovery, ctdb->ev,
+ ctdb->tunable.rerecovery_timeout);
+ return 0;
+
+fail:
+ ctdb_op_end(rec->recovery);
+ return -1;
+}
+
+
+/*
+ elections are won by first checking the number of connected nodes, then
+ the priority time, then the pnn
+ */
+struct election_message {
+ uint32_t num_connected;
+ struct timeval priority_time;
+ uint32_t pnn;
+ uint32_t node_flags;
+};
+
+/*
+ form this nodes election data
+ */
+static void ctdb_election_data(struct ctdb_recoverd *rec, struct election_message *em)
+{
+ unsigned int i;
+ int ret;
+ struct ctdb_node_map_old *nodemap;
+ struct ctdb_context *ctdb = rec->ctdb;
+ bool ok;
+
+ ZERO_STRUCTP(em);
+
+ em->pnn = rec->pnn;
+ em->priority_time = rec->priority_time;
+
+ ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, rec, &nodemap);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " unable to get node map\n"));
+ return;
+ }
+
+ ok = node_flags(rec, rec->pnn, &rec->node_flags);
+ if (!ok) {
+ DBG_ERR("Unable to get node flags for this node\n");
+ return;
+ }
+ em->node_flags = rec->node_flags;
+
+ for (i=0;i<nodemap->num;i++) {
+ if (!(nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED)) {
+ em->num_connected++;
+ }
+ }
+
+ if (!this_node_can_be_leader(rec)) {
+ /* Try to lose... */
+ em->num_connected = 0;
+ em->priority_time = timeval_current();
+ }
+
+ talloc_free(nodemap);
+}
+
+/*
+ see if the given election data wins
+ */
+static bool ctdb_election_win(struct ctdb_recoverd *rec, struct election_message *em)
+{
+ struct election_message myem;
+ int cmp = 0;
+
+ ctdb_election_data(rec, &myem);
+
+ if (!this_node_can_be_leader(rec)) {
+ return false;
+ }
+
+ /* Automatically win if other node is banned or stopped */
+ if (em->node_flags & NODE_FLAGS_INACTIVE) {
+ return true;
+ }
+
+ /* then the longest running node */
+ if (cmp == 0) {
+ cmp = timeval_compare(&em->priority_time, &myem.priority_time);
+ }
+
+ if (cmp == 0) {
+ cmp = (int)myem.pnn - (int)em->pnn;
+ }
+
+ return cmp > 0;
+}
+
+/*
+ send out an election request
+ */
+static int send_election_request(struct ctdb_recoverd *rec)
+{
+ TDB_DATA election_data;
+ struct election_message emsg;
+ uint64_t srvid;
+ struct ctdb_context *ctdb = rec->ctdb;
+
+ srvid = CTDB_SRVID_ELECTION;
+
+ ctdb_election_data(rec, &emsg);
+
+ election_data.dsize = sizeof(struct election_message);
+ election_data.dptr = (unsigned char *)&emsg;
+
+
+ /* Assume this node will win the election, set leader accordingly */
+ rec->leader = rec->pnn;
+
+ /* send an election message to all active nodes */
+ DEBUG(DEBUG_INFO,(__location__ " Send election request to all active nodes\n"));
+ return ctdb_client_send_message(ctdb, CTDB_BROADCAST_ALL, srvid, election_data);
+}
+
+/*
+ we think we are winning the election - send a broadcast election request
+ */
+static void election_send_request(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *p)
+{
+ struct ctdb_recoverd *rec = talloc_get_type(p, struct ctdb_recoverd);
+ int ret;
+
+ ret = send_election_request(rec);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,("Failed to send election request!\n"));
+ }
+
+ TALLOC_FREE(rec->send_election_te);
+}
+
+/*
+ handler for memory dumps
+*/
+static void mem_dump_handler(uint64_t srvid, TDB_DATA data, void *private_data)
+{
+ struct ctdb_recoverd *rec = talloc_get_type(
+ private_data, struct ctdb_recoverd);
+ struct ctdb_context *ctdb = rec->ctdb;
+ TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
+ TDB_DATA *dump;
+ int ret;
+ struct ctdb_srvid_message *rd;
+
+ if (data.dsize != sizeof(struct ctdb_srvid_message)) {
+ DEBUG(DEBUG_ERR, (__location__ " Wrong size of return address.\n"));
+ talloc_free(tmp_ctx);
+ return;
+ }
+ rd = (struct ctdb_srvid_message *)data.dptr;
+
+ dump = talloc_zero(tmp_ctx, TDB_DATA);
+ if (dump == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Failed to allocate memory for memdump\n"));
+ talloc_free(tmp_ctx);
+ return;
+ }
+ ret = ctdb_dump_memory(ctdb, dump);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " ctdb_dump_memory() failed\n"));
+ talloc_free(tmp_ctx);
+ return;
+ }
+
+ DBG_ERR("recovery daemon memory dump\n");
+
+ ret = ctdb_client_send_message(ctdb, rd->pnn, rd->srvid, *dump);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,("Failed to send rd memdump reply message\n"));
+ talloc_free(tmp_ctx);
+ return;
+ }
+
+ talloc_free(tmp_ctx);
+}
+
+/*
+ handler for reload_nodes
+*/
+static void reload_nodes_handler(uint64_t srvid, TDB_DATA data,
+ void *private_data)
+{
+ struct ctdb_recoverd *rec = talloc_get_type(
+ private_data, struct ctdb_recoverd);
+
+ DEBUG(DEBUG_ERR, (__location__ " Reload nodes file from recovery daemon\n"));
+
+ ctdb_load_nodes_file(rec->ctdb);
+}
+
+
+static void recd_node_rebalance_handler(uint64_t srvid, TDB_DATA data,
+ void *private_data)
+{
+ struct ctdb_recoverd *rec = talloc_get_type(
+ private_data, struct ctdb_recoverd);
+ struct ctdb_context *ctdb = rec->ctdb;
+ uint32_t pnn;
+ uint32_t *t;
+ int len;
+
+ if (!this_node_is_leader(rec)) {
+ return;
+ }
+
+ if (data.dsize != sizeof(uint32_t)) {
+ DEBUG(DEBUG_ERR,(__location__ " Incorrect size of node rebalance message. Was %zd but expected %zd bytes\n", data.dsize, sizeof(uint32_t)));
+ return;
+ }
+
+ pnn = *(uint32_t *)&data.dptr[0];
+
+ DEBUG(DEBUG_NOTICE,("Setting up rebalance of IPs to node %u\n", pnn));
+
+ /* Copy any existing list of nodes. There's probably some
+ * sort of realloc variant that will do this but we need to
+ * make sure that freeing the old array also cancels the timer
+ * event for the timeout... not sure if realloc will do that.
+ */
+ len = (rec->force_rebalance_nodes != NULL) ?
+ talloc_array_length(rec->force_rebalance_nodes) :
+ 0;
+
+ /* This allows duplicates to be added but they don't cause
+ * harm. A call to add a duplicate PNN arguably means that
+ * the timeout should be reset, so this is the simplest
+ * solution.
+ */
+ t = talloc_zero_array(rec, uint32_t, len+1);
+ CTDB_NO_MEMORY_VOID(ctdb, t);
+ if (len > 0) {
+ memcpy(t, rec->force_rebalance_nodes, sizeof(uint32_t) * len);
+ }
+ t[len] = pnn;
+
+ talloc_free(rec->force_rebalance_nodes);
+
+ rec->force_rebalance_nodes = t;
+}
+
+
+
+static void srvid_disable_and_reply(struct ctdb_recoverd *rec,
+ TDB_DATA data,
+ struct ctdb_op_state *op_state)
+{
+ struct ctdb_context *ctdb = rec->ctdb;
+ struct ctdb_disable_message *r;
+ uint32_t timeout;
+ TDB_DATA result;
+ int32_t ret = 0;
+
+ /* Validate input data */
+ if (data.dsize != sizeof(struct ctdb_disable_message)) {
+ DEBUG(DEBUG_ERR,(__location__ " Wrong size for data :%lu "
+ "expecting %lu\n", (long unsigned)data.dsize,
+ (long unsigned)sizeof(struct ctdb_srvid_message)));
+ return;
+ }
+ if (data.dptr == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " No data received\n"));
+ return;
+ }
+
+ r = (struct ctdb_disable_message *)data.dptr;
+ timeout = r->timeout;
+
+ ret = ctdb_op_disable(op_state, ctdb->ev, timeout);
+ if (ret != 0) {
+ goto done;
+ }
+
+ /* Returning our PNN tells the caller that we succeeded */
+ ret = rec->pnn;
+done:
+ result.dsize = sizeof(int32_t);
+ result.dptr = (uint8_t *)&ret;
+ srvid_request_reply(ctdb, (struct ctdb_srvid_message *)r, result);
+}
+
+static void disable_takeover_runs_handler(uint64_t srvid, TDB_DATA data,
+ void *private_data)
+{
+ struct ctdb_recoverd *rec = talloc_get_type(
+ private_data, struct ctdb_recoverd);
+
+ srvid_disable_and_reply(rec, data, rec->takeover_run);
+}
+
+/* Backward compatibility for this SRVID */
+static void disable_ip_check_handler(uint64_t srvid, TDB_DATA data,
+ void *private_data)
+{
+ struct ctdb_recoverd *rec = talloc_get_type(
+ private_data, struct ctdb_recoverd);
+ uint32_t timeout;
+
+ if (data.dsize != sizeof(uint32_t)) {
+ DEBUG(DEBUG_ERR,(__location__ " Wrong size for data :%lu "
+ "expecting %lu\n", (long unsigned)data.dsize,
+ (long unsigned)sizeof(uint32_t)));
+ return;
+ }
+ if (data.dptr == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " No data received\n"));
+ return;
+ }
+
+ timeout = *((uint32_t *)data.dptr);
+
+ ctdb_op_disable(rec->takeover_run, rec->ctdb->ev, timeout);
+}
+
+static void disable_recoveries_handler(uint64_t srvid, TDB_DATA data,
+ void *private_data)
+{
+ struct ctdb_recoverd *rec = talloc_get_type(
+ private_data, struct ctdb_recoverd);
+
+ srvid_disable_and_reply(rec, data, rec->recovery);
+}
+
+/*
+ handler for ip reallocate, just add it to the list of requests and
+ handle this later in the monitor_cluster loop so we do not recurse
+ with other requests to takeover_run()
+*/
+static void ip_reallocate_handler(uint64_t srvid, TDB_DATA data,
+ void *private_data)
+{
+ struct ctdb_srvid_message *request;
+ struct ctdb_recoverd *rec = talloc_get_type(
+ private_data, struct ctdb_recoverd);
+
+ if (data.dsize != sizeof(struct ctdb_srvid_message)) {
+ DEBUG(DEBUG_ERR, (__location__ " Wrong size of return address.\n"));
+ return;
+ }
+
+ request = (struct ctdb_srvid_message *)data.dptr;
+
+ srvid_request_add(rec->ctdb, &rec->reallocate_requests, request);
+}
+
+static void process_ipreallocate_requests(struct ctdb_context *ctdb,
+ struct ctdb_recoverd *rec)
+{
+ TDB_DATA result;
+ int32_t ret;
+ struct srvid_requests *current;
+
+ /* Only process requests that are currently pending. More
+ * might come in while the takeover run is in progress and
+ * they will need to be processed later since they might
+ * be in response flag changes.
+ */
+ current = rec->reallocate_requests;
+ rec->reallocate_requests = NULL;
+
+ if (do_takeover_run(rec, rec->nodemap)) {
+ ret = rec->pnn;
+ } else {
+ ret = -1;
+ }
+
+ result.dsize = sizeof(int32_t);
+ result.dptr = (uint8_t *)&ret;
+
+ srvid_requests_reply(ctdb, &current, result);
+}
+
+/*
+ * handler for assigning banning credits
+ */
+static void banning_handler(uint64_t srvid, TDB_DATA data, void *private_data)
+{
+ struct ctdb_recoverd *rec = talloc_get_type(
+ private_data, struct ctdb_recoverd);
+ uint32_t ban_pnn;
+
+ /* Ignore if we are not leader */
+ if (!this_node_is_leader(rec)) {
+ return;
+ }
+
+ if (data.dsize != sizeof(uint32_t)) {
+ DEBUG(DEBUG_ERR, (__location__ "invalid data size %zu\n",
+ data.dsize));
+ return;
+ }
+
+ ban_pnn = *(uint32_t *)data.dptr;
+
+ ctdb_set_culprit_count(rec, ban_pnn, rec->nodemap->num);
+}
+
+/*
+ * Handler for leader elections
+ */
+static void election_handler(uint64_t srvid, TDB_DATA data, void *private_data)
+{
+ struct ctdb_recoverd *rec = talloc_get_type(
+ private_data, struct ctdb_recoverd);
+ struct ctdb_context *ctdb = rec->ctdb;
+ struct election_message *em = (struct election_message *)data.dptr;
+
+ /* Ignore election packets from ourself */
+ if (rec->pnn == em->pnn) {
+ return;
+ }
+
+ /* we got an election packet - update the timeout for the election */
+ talloc_free(rec->election_timeout);
+ rec->election_in_progress = true;
+ rec->election_timeout = tevent_add_timer(
+ ctdb->ev, ctdb,
+ fast_start ?
+ timeval_current_ofs(0, 500000) :
+ timeval_current_ofs(ctdb->tunable.election_timeout, 0),
+ ctdb_election_timeout, rec);
+
+ /* someone called an election. check their election data
+ and if we disagree and we would rather be the elected node,
+ send a new election message to all other nodes
+ */
+ if (ctdb_election_win(rec, em)) {
+ if (!rec->send_election_te) {
+ rec->send_election_te = tevent_add_timer(
+ ctdb->ev, rec,
+ timeval_current_ofs(0, 500000),
+ election_send_request, rec);
+ }
+ return;
+ }
+
+ /* we didn't win */
+ TALLOC_FREE(rec->send_election_te);
+
+ /* Release the cluster lock file */
+ if (cluster_lock_held(rec)) {
+ cluster_lock_release(rec);
+ }
+
+ /* Set leader to the winner of this round */
+ rec->leader = em->pnn;
+
+ return;
+}
+
+static void cluster_lock_election(struct ctdb_recoverd *rec)
+{
+ bool ok;
+
+ if (!this_node_can_be_leader(rec)) {
+ if (cluster_lock_held(rec)) {
+ cluster_lock_release(rec);
+ }
+ goto done;
+ }
+
+ /*
+ * Don't need to unconditionally release the lock and then
+ * attempt to retake it. This provides stability.
+ */
+ if (cluster_lock_held(rec)) {
+ goto done;
+ }
+
+ rec->leader = CTDB_UNKNOWN_PNN;
+
+ ok = cluster_lock_take(rec);
+ if (ok) {
+ rec->leader = rec->pnn;
+ D_WARNING("Took cluster lock, leader=%"PRIu32"\n", rec->leader);
+ }
+
+done:
+ rec->election_in_progress = false;
+}
+
+/*
+ force the start of the election process
+ */
+static void force_election(struct ctdb_recoverd *rec)
+{
+ int ret;
+ struct ctdb_context *ctdb = rec->ctdb;
+
+ D_ERR("Start election\n");
+
+ /* set all nodes to recovery mode to stop all internode traffic */
+ ret = set_recovery_mode(ctdb, rec, rec->nodemap, CTDB_RECOVERY_ACTIVE);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to set recovery mode to active on cluster\n"));
+ return;
+ }
+
+ rec->election_in_progress = true;
+ /* Let other nodes know that an election is underway */
+ leader_broadcast_send(rec, CTDB_UNKNOWN_PNN);
+
+ if (cluster_lock_enabled(rec)) {
+ cluster_lock_election(rec);
+ return;
+ }
+
+ talloc_free(rec->election_timeout);
+ rec->election_timeout = tevent_add_timer(
+ ctdb->ev, ctdb,
+ fast_start ?
+ timeval_current_ofs(0, 500000) :
+ timeval_current_ofs(ctdb->tunable.election_timeout, 0),
+ ctdb_election_timeout, rec);
+
+ ret = send_election_request(rec);
+ if (ret!=0) {
+ DBG_ERR("Failed to initiate leader election\n");
+ return;
+ }
+
+ /* wait for a few seconds to collect all responses */
+ ctdb_wait_election(rec);
+}
+
+
+static void srvid_not_implemented(uint64_t srvid,
+ TDB_DATA data,
+ void *private_data)
+{
+ const char *s;
+
+ switch (srvid) {
+ case CTDB_SRVID_SET_NODE_FLAGS:
+ s = "CTDB_SRVID_SET_NODE_FLAGS";
+ break;
+ default:
+ s = "UNKNOWN";
+ }
+
+ D_WARNING("SRVID %s (0x%" PRIx64 ") is obsolete\n", s, srvid);
+}
+
+/*
+ handler for when we need to push out flag changes to all other nodes
+*/
+static void push_flags_handler(uint64_t srvid, TDB_DATA data,
+ void *private_data)
+{
+ struct ctdb_recoverd *rec = talloc_get_type(
+ private_data, struct ctdb_recoverd);
+ struct ctdb_context *ctdb = rec->ctdb;
+ int ret;
+ struct ctdb_node_flag_change *c = (struct ctdb_node_flag_change *)data.dptr;
+ struct ctdb_node_map_old *nodemap=NULL;
+ TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
+ uint32_t *nodes;
+
+ /* read the node flags from the leader */
+ ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), rec->leader,
+ tmp_ctx, &nodemap);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to get nodemap from node %u\n", c->pnn));
+ talloc_free(tmp_ctx);
+ return;
+ }
+ if (c->pnn >= nodemap->num) {
+ DBG_ERR("Nodemap from leader does not contain node %d\n",
+ c->pnn);
+ talloc_free(tmp_ctx);
+ return;
+ }
+
+ /* send the flags update to all connected nodes */
+ nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
+
+ if (ctdb_client_async_control(ctdb, CTDB_CONTROL_MODIFY_FLAGS,
+ nodes, 0, CONTROL_TIMEOUT(),
+ false, data,
+ NULL, NULL,
+ NULL) != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " ctdb_control to modify node flags failed\n"));
+
+ talloc_free(tmp_ctx);
+ return;
+ }
+
+ talloc_free(tmp_ctx);
+}
+
+static void leader_broadcast_timeout_handler(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval current_time,
+ void *private_data)
+{
+ struct ctdb_recoverd *rec = talloc_get_type_abort(
+ private_data, struct ctdb_recoverd);
+
+ rec->leader_broadcast_timeout_te = NULL;
+
+ D_NOTICE("Leader broadcast timeout\n");
+
+ force_election(rec);
+}
+
+static void leader_broadcast_timeout_cancel(struct ctdb_recoverd *rec)
+{
+ TALLOC_FREE(rec->leader_broadcast_timeout_te);
+}
+
+static int leader_broadcast_timeout_start(struct ctdb_recoverd *rec)
+{
+ struct ctdb_context *ctdb = rec->ctdb;
+
+ /*
+ * This should not be necessary. However, there will be
+ * interactions with election code here. It will want to
+ * cancel and restart the timer around potentially long
+ * elections.
+ */
+ leader_broadcast_timeout_cancel(rec);
+
+ rec->leader_broadcast_timeout_te =
+ tevent_add_timer(
+ ctdb->ev,
+ rec,
+ timeval_current_ofs(ctdb_config.leader_timeout, 0),
+ leader_broadcast_timeout_handler,
+ rec);
+ if (rec->leader_broadcast_timeout_te == NULL) {
+ D_ERR("Unable to start leader broadcast timeout\n");
+ return ENOMEM;
+ }
+
+ return 0;
+}
+
+static bool leader_broadcast_timeout_active(struct ctdb_recoverd *rec)
+{
+ return rec->leader_broadcast_timeout_te != NULL;
+}
+
+static void leader_handler(uint64_t srvid, TDB_DATA data, void *private_data)
+{
+ struct ctdb_recoverd *rec = talloc_get_type_abort(
+ private_data, struct ctdb_recoverd);
+ uint32_t pnn;
+ size_t npull;
+ int ret;
+
+ ret = ctdb_uint32_pull(data.dptr, data.dsize, &pnn, &npull);
+ if (ret != 0) {
+ DBG_WARNING("Unable to parse leader broadcast, ret=%d\n", ret);
+ return;
+ }
+
+ leader_broadcast_timeout_cancel(rec);
+
+ if (pnn == rec->leader) {
+ goto done;
+ }
+
+ if (pnn == CTDB_UNKNOWN_PNN) {
+ bool was_election_in_progress = rec->election_in_progress;
+
+ /*
+ * Leader broadcast timeout was cancelled above - stop
+ * main loop from restarting it until election is
+ * complete
+ */
+ rec->election_in_progress = true;
+
+ /*
+ * This is the only notification for a cluster lock
+ * election, so handle it here...
+ */
+ if (cluster_lock_enabled(rec) && !was_election_in_progress) {
+ cluster_lock_election(rec);
+ }
+
+ return;
+ }
+
+ D_NOTICE("Received leader broadcast, leader=%"PRIu32"\n", pnn);
+ rec->leader = pnn;
+
+done:
+ leader_broadcast_timeout_start(rec);
+}
+
+struct verify_recmode_normal_data {
+ uint32_t count;
+ enum monitor_result status;
+};
+
+static void verify_recmode_normal_callback(struct ctdb_client_control_state *state)
+{
+ struct verify_recmode_normal_data *rmdata = talloc_get_type(state->async.private_data, struct verify_recmode_normal_data);
+
+
+ /* one more node has responded with recmode data*/
+ rmdata->count--;
+
+ /* if we failed to get the recmode, then return an error and let
+ the main loop try again.
+ */
+ if (state->state != CTDB_CONTROL_DONE) {
+ if (rmdata->status == MONITOR_OK) {
+ rmdata->status = MONITOR_FAILED;
+ }
+ return;
+ }
+
+ /* if we got a response, then the recmode will be stored in the
+ status field
+ */
+ if (state->status != CTDB_RECOVERY_NORMAL) {
+ DEBUG(DEBUG_NOTICE, ("Node:%u was in recovery mode. Start recovery process\n", state->c->hdr.destnode));
+ rmdata->status = MONITOR_RECOVERY_NEEDED;
+ }
+
+ return;
+}
+
+
+/* verify that all nodes are in normal recovery mode */
+static enum monitor_result verify_recmode(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap)
+{
+ struct verify_recmode_normal_data *rmdata;
+ TALLOC_CTX *mem_ctx = talloc_new(ctdb);
+ struct ctdb_client_control_state *state;
+ enum monitor_result status;
+ unsigned int j;
+
+ rmdata = talloc(mem_ctx, struct verify_recmode_normal_data);
+ CTDB_NO_MEMORY_FATAL(ctdb, rmdata);
+ rmdata->count = 0;
+ rmdata->status = MONITOR_OK;
+
+ /* loop over all active nodes and send an async getrecmode call to
+ them*/
+ for (j=0; j<nodemap->num; j++) {
+ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
+ continue;
+ }
+ state = ctdb_ctrl_getrecmode_send(ctdb, mem_ctx,
+ CONTROL_TIMEOUT(),
+ nodemap->nodes[j].pnn);
+ if (state == NULL) {
+ /* we failed to send the control, treat this as
+ an error and try again next iteration
+ */
+ DEBUG(DEBUG_ERR,("Failed to call ctdb_ctrl_getrecmode_send during monitoring\n"));
+ talloc_free(mem_ctx);
+ return MONITOR_FAILED;
+ }
+
+ /* set up the callback functions */
+ state->async.fn = verify_recmode_normal_callback;
+ state->async.private_data = rmdata;
+
+ /* one more control to wait for to complete */
+ rmdata->count++;
+ }
+
+
+ /* now wait for up to the maximum number of seconds allowed
+ or until all nodes we expect a response from has replied
+ */
+ while (rmdata->count > 0) {
+ tevent_loop_once(ctdb->ev);
+ }
+
+ status = rmdata->status;
+ talloc_free(mem_ctx);
+ return status;
+}
+
+
+static bool interfaces_have_changed(struct ctdb_context *ctdb,
+ struct ctdb_recoverd *rec)
+{
+ struct ctdb_iface_list_old *ifaces = NULL;
+ TALLOC_CTX *mem_ctx;
+ bool ret = false;
+
+ mem_ctx = talloc_new(NULL);
+
+ /* Read the interfaces from the local node */
+ if (ctdb_ctrl_get_ifaces(ctdb, CONTROL_TIMEOUT(),
+ CTDB_CURRENT_NODE, mem_ctx, &ifaces) != 0) {
+ D_ERR("Unable to get interfaces from local node %u\n", rec->pnn);
+ /* We could return an error. However, this will be
+ * rare so we'll decide that the interfaces have
+ * actually changed, just in case.
+ */
+ talloc_free(mem_ctx);
+ return true;
+ }
+
+ if (!rec->ifaces) {
+ /* We haven't been here before so things have changed */
+ DEBUG(DEBUG_NOTICE, ("Initial interface fetched\n"));
+ ret = true;
+ } else if (rec->ifaces->num != ifaces->num) {
+ /* Number of interfaces has changed */
+ DEBUG(DEBUG_NOTICE, ("Interface count changed from %d to %d\n",
+ rec->ifaces->num, ifaces->num));
+ ret = true;
+ } else {
+ /* See if interface names or link states have changed */
+ unsigned int i;
+ for (i = 0; i < rec->ifaces->num; i++) {
+ struct ctdb_iface * iface = &rec->ifaces->ifaces[i];
+ if (strcmp(iface->name, ifaces->ifaces[i].name) != 0) {
+ DEBUG(DEBUG_NOTICE,
+ ("Interface in slot %d changed: %s => %s\n",
+ i, iface->name, ifaces->ifaces[i].name));
+ ret = true;
+ break;
+ }
+ if (iface->link_state != ifaces->ifaces[i].link_state) {
+ DEBUG(DEBUG_NOTICE,
+ ("Interface %s changed state: %d => %d\n",
+ iface->name, iface->link_state,
+ ifaces->ifaces[i].link_state));
+ ret = true;
+ break;
+ }
+ }
+ }
+
+ talloc_free(rec->ifaces);
+ rec->ifaces = talloc_steal(rec, ifaces);
+
+ talloc_free(mem_ctx);
+ return ret;
+}
+
+/* Check that the local allocation of public IP addresses is correct
+ * and do some house-keeping */
+static int verify_local_ip_allocation(struct ctdb_recoverd *rec)
+{
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+ struct ctdb_context *ctdb = rec->ctdb;
+ unsigned int j;
+ int ret;
+ bool need_takeover_run = false;
+ struct ctdb_public_ip_list_old *ips = NULL;
+
+ /* If we are not the leader then do some housekeeping */
+ if (!this_node_is_leader(rec)) {
+ /* Ignore any IP reallocate requests - only leader
+ * processes them
+ */
+ TALLOC_FREE(rec->reallocate_requests);
+ /* Clear any nodes that should be force rebalanced in
+ * the next takeover run. If the leader has changed
+ * then we don't want to process these some time in
+ * the future.
+ */
+ TALLOC_FREE(rec->force_rebalance_nodes);
+ }
+
+ /* Return early if disabled... */
+ if (ctdb_config.failover_disabled ||
+ ctdb_op_is_disabled(rec->takeover_run)) {
+ talloc_free(mem_ctx);
+ return 0;
+ }
+
+ if (interfaces_have_changed(ctdb, rec)) {
+ need_takeover_run = true;
+ }
+
+ /* If there are unhosted IPs but this node can host them then
+ * trigger an IP reallocation */
+
+ /* Read *available* IPs from local node */
+ ret = ctdb_ctrl_get_public_ips_flags(
+ ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, mem_ctx,
+ CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE, &ips);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("Unable to retrieve available public IPs\n"));
+ talloc_free(mem_ctx);
+ return -1;
+ }
+
+ for (j=0; j<ips->num; j++) {
+ if (ips->ips[j].pnn == CTDB_UNKNOWN_PNN &&
+ rec->nodemap->nodes[rec->pnn].flags == 0) {
+ DEBUG(DEBUG_WARNING,
+ ("Unassigned IP %s can be served by this node\n",
+ ctdb_addr_to_str(&ips->ips[j].addr)));
+ need_takeover_run = true;
+ }
+ }
+
+ talloc_free(ips);
+
+ if (!ctdb->do_checkpublicip) {
+ goto done;
+ }
+
+ /* Validate the IP addresses that this node has on network
+ * interfaces. If there is an inconsistency between reality
+ * and the state expected by CTDB then try to fix it by
+ * triggering an IP reallocation or releasing extraneous IP
+ * addresses. */
+
+ /* Read *known* IPs from local node */
+ ret = ctdb_ctrl_get_public_ips_flags(
+ ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, mem_ctx, 0, &ips);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("Unable to retrieve known public IPs\n"));
+ talloc_free(mem_ctx);
+ return -1;
+ }
+
+ for (j=0; j<ips->num; j++) {
+ if (ips->ips[j].pnn == rec->pnn) {
+ if (!ctdb_sys_have_ip(&ips->ips[j].addr)) {
+ DEBUG(DEBUG_ERR,
+ ("Assigned IP %s not on an interface\n",
+ ctdb_addr_to_str(&ips->ips[j].addr)));
+ need_takeover_run = true;
+ }
+ } else {
+ if (ctdb_sys_have_ip(&ips->ips[j].addr)) {
+ DEBUG(DEBUG_ERR,
+ ("IP %s incorrectly on an interface\n",
+ ctdb_addr_to_str(&ips->ips[j].addr)));
+ need_takeover_run = true;
+ }
+ }
+ }
+
+done:
+ if (need_takeover_run) {
+ struct ctdb_srvid_message rd;
+ TDB_DATA data;
+
+ DEBUG(DEBUG_NOTICE,("Trigger takeoverrun\n"));
+
+ ZERO_STRUCT(rd);
+ rd.pnn = rec->pnn;
+ rd.srvid = 0;
+ data.dptr = (uint8_t *)&rd;
+ data.dsize = sizeof(rd);
+
+ ret = ctdb_client_send_message(ctdb,
+ CTDB_BROADCAST_CONNECTED,
+ CTDB_SRVID_TAKEOVER_RUN,
+ data);
+ if (ret != 0) {
+ D_ERR("Failed to send takeover run request\n");
+ }
+ }
+ talloc_free(mem_ctx);
+ return 0;
+}
+
+
+struct remote_nodemaps_state {
+ struct ctdb_node_map_old **remote_nodemaps;
+ struct ctdb_recoverd *rec;
+};
+
+static void async_getnodemap_callback(struct ctdb_context *ctdb,
+ uint32_t node_pnn,
+ int32_t res,
+ TDB_DATA outdata,
+ void *callback_data)
+{
+ struct remote_nodemaps_state *state =
+ (struct remote_nodemaps_state *)callback_data;
+ struct ctdb_node_map_old **remote_nodemaps = state->remote_nodemaps;
+ struct ctdb_node_map_old *nodemap = state->rec->nodemap;
+ size_t i;
+
+ for (i = 0; i < nodemap->num; i++) {
+ if (nodemap->nodes[i].pnn == node_pnn) {
+ break;
+ }
+ }
+
+ if (i >= nodemap->num) {
+ DBG_ERR("Invalid PNN %"PRIu32"\n", node_pnn);
+ return;
+ }
+
+ remote_nodemaps[i] = (struct ctdb_node_map_old *)talloc_steal(
+ remote_nodemaps, outdata.dptr);
+
+}
+
+static void async_getnodemap_error(struct ctdb_context *ctdb,
+ uint32_t node_pnn,
+ int32_t res,
+ TDB_DATA outdata,
+ void *callback_data)
+{
+ struct remote_nodemaps_state *state =
+ (struct remote_nodemaps_state *)callback_data;
+ struct ctdb_recoverd *rec = state->rec;
+
+ DBG_ERR("Failed to retrieve nodemap from node %u\n", node_pnn);
+ ctdb_set_culprit(rec, node_pnn);
+}
+
+static int get_remote_nodemaps(struct ctdb_recoverd *rec,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_node_map_old ***remote_nodemaps)
+{
+ struct ctdb_context *ctdb = rec->ctdb;
+ struct ctdb_node_map_old **t;
+ uint32_t *nodes;
+ struct remote_nodemaps_state state;
+ int ret;
+
+ t = talloc_zero_array(mem_ctx,
+ struct ctdb_node_map_old *,
+ rec->nodemap->num);
+ if (t == NULL) {
+ DBG_ERR("Memory allocation error\n");
+ return -1;
+ }
+
+ nodes = list_of_connected_nodes(ctdb, rec->nodemap, mem_ctx, false);
+
+ state.remote_nodemaps = t;
+ state.rec = rec;
+
+ ret = ctdb_client_async_control(ctdb,
+ CTDB_CONTROL_GET_NODEMAP,
+ nodes,
+ 0,
+ CONTROL_TIMEOUT(),
+ false,
+ tdb_null,
+ async_getnodemap_callback,
+ async_getnodemap_error,
+ &state);
+ talloc_free(nodes);
+
+ if (ret != 0) {
+ talloc_free(t);
+ return ret;
+ }
+
+ *remote_nodemaps = t;
+ return 0;
+}
+
+static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
+ TALLOC_CTX *mem_ctx)
+{
+ struct ctdb_node_map_old *nodemap=NULL;
+ struct ctdb_node_map_old **remote_nodemaps=NULL;
+ struct ctdb_vnn_map *vnnmap=NULL;
+ struct ctdb_vnn_map *remote_vnnmap=NULL;
+ uint32_t num_lmasters;
+ int32_t debug_level;
+ unsigned int i, j;
+ int ret;
+ bool self_ban;
+
+
+ /* verify that the main daemon is still running */
+ if (ctdb_kill(ctdb, ctdb->ctdbd_pid, 0) != 0) {
+ DEBUG(DEBUG_CRIT,("CTDB daemon is no longer available. Shutting down recovery daemon\n"));
+ exit(-1);
+ }
+
+ /* ping the local daemon to tell it we are alive */
+ ctdb_ctrl_recd_ping(ctdb);
+
+ if (rec->election_in_progress) {
+ /* an election is in progress */
+ return;
+ }
+
+ /*
+ * Start leader broadcasts if they are not active (1st time
+ * through main loop? Memory allocation error?)
+ */
+ if (!leader_broadcast_loop_active(rec)) {
+ ret = leader_broadcast_loop(rec);
+ if (ret != 0) {
+ D_ERR("Failed to set up leader broadcast\n");
+ ctdb_set_culprit(rec, rec->pnn);
+ }
+ }
+ /*
+ * Similar for leader broadcast timeouts. These can also have
+ * been stopped by another node receiving a leader broadcast
+ * timeout and transmitting an "unknown leader broadcast".
+ * Note that this should never be done during an election - at
+ * the moment there is nothing between here and the above
+ * election-in-progress check that can process an election
+ * result (i.e. no event loop).
+ */
+ if (!leader_broadcast_timeout_active(rec)) {
+ ret = leader_broadcast_timeout_start(rec);
+ if (ret != 0) {
+ ctdb_set_culprit(rec, rec->pnn);
+ }
+ }
+
+
+ /* read the debug level from the parent and update locally */
+ ret = ctdb_ctrl_get_debuglevel(ctdb, CTDB_CURRENT_NODE, &debug_level);
+ if (ret !=0) {
+ DEBUG(DEBUG_ERR, (__location__ " Failed to read debuglevel from parent\n"));
+ return;
+ }
+ debuglevel_set(debug_level);
+
+ /* get relevant tunables */
+ ret = ctdb_ctrl_get_all_tunables(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &ctdb->tunable);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,("Failed to get tunables - retrying\n"));
+ return;
+ }
+
+ /* get runstate */
+ ret = ctdb_ctrl_get_runstate(ctdb, CONTROL_TIMEOUT(),
+ CTDB_CURRENT_NODE, &ctdb->runstate);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("Failed to get runstate - retrying\n"));
+ return;
+ }
+
+ /* get nodemap */
+ ret = ctdb_ctrl_getnodemap(ctdb,
+ CONTROL_TIMEOUT(),
+ rec->pnn,
+ rec,
+ &nodemap);
+ if (ret != 0) {
+ DBG_ERR("Unable to get nodemap from node %"PRIu32"\n", rec->pnn);
+ return;
+ }
+ talloc_free(rec->nodemap);
+ rec->nodemap = nodemap;
+
+ /* remember our own node flags */
+ rec->node_flags = nodemap->nodes[rec->pnn].flags;
+
+ ban_misbehaving_nodes(rec, &self_ban);
+ if (self_ban) {
+ DEBUG(DEBUG_NOTICE, ("This node was banned, restart main_loop\n"));
+ return;
+ }
+
+ ret = ctdb_ctrl_getrecmode(ctdb, mem_ctx, CONTROL_TIMEOUT(),
+ CTDB_CURRENT_NODE, &ctdb->recovery_mode);
+ if (ret != 0) {
+ D_ERR("Failed to read recmode from local node\n");
+ return;
+ }
+
+ /* if the local daemon is STOPPED or BANNED, we verify that the databases are
+ also frozen and that the recmode is set to active.
+ */
+ if (rec->node_flags & NODE_FLAGS_INACTIVE) {
+ /* If this node has become inactive then we want to
+ * reduce the chances of it taking over the leader
+ * role when it becomes active again. This
+ * helps to stabilise the leader role so that
+ * it stays on the most stable node.
+ */
+ rec->priority_time = timeval_current();
+
+ if (ctdb->recovery_mode == CTDB_RECOVERY_NORMAL) {
+ DEBUG(DEBUG_ERR,("Node is stopped or banned but recovery mode is not active. Activate recovery mode and lock databases\n"));
+
+ ret = ctdb_ctrl_setrecmode(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, CTDB_RECOVERY_ACTIVE);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to activate recovery mode in STOPPED or BANNED state\n"));
+
+ return;
+ }
+ }
+ if (! rec->frozen_on_inactive) {
+ ret = ctdb_ctrl_freeze(ctdb, CONTROL_TIMEOUT(),
+ CTDB_CURRENT_NODE);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " Failed to freeze node "
+ "in STOPPED or BANNED state\n"));
+ return;
+ }
+
+ rec->frozen_on_inactive = true;
+ }
+
+ /* If this node is stopped or banned then it is not the recovery
+ * master, so don't do anything. This prevents stopped or banned
+ * node from starting election and sending unnecessary controls.
+ */
+ return;
+ }
+
+ rec->frozen_on_inactive = false;
+
+ /* Retrieve capabilities from all connected nodes */
+ ret = update_capabilities(rec, nodemap);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to update node capabilities.\n"));
+ return;
+ }
+
+ if (ctdb->recovery_mode == CTDB_RECOVERY_NORMAL) {
+ /* Check if an IP takeover run is needed and trigger one if
+ * necessary */
+ verify_local_ip_allocation(rec);
+ }
+
+ /* If this node is not the leader then skip recovery checks */
+ if (!this_node_is_leader(rec)) {
+ return;
+ }
+
+
+ /* Get the nodemaps for all connected remote nodes */
+ ret = get_remote_nodemaps(rec, mem_ctx, &remote_nodemaps);
+ if (ret != 0) {
+ DBG_ERR("Failed to read remote nodemaps\n");
+ return;
+ }
+
+ /* Ensure our local and remote flags are correct */
+ ret = update_flags(rec, nodemap, remote_nodemaps);
+ if (ret != 0) {
+ D_ERR("Unable to update flags\n");
+ return;
+ }
+
+ if (ctdb->num_nodes != nodemap->num) {
+ DEBUG(DEBUG_ERR, (__location__ " ctdb->num_nodes (%d) != nodemap->num (%d) reloading nodes file\n", ctdb->num_nodes, nodemap->num));
+ ctdb_load_nodes_file(ctdb);
+ return;
+ }
+
+ /* get the vnnmap */
+ ret = ctdb_ctrl_getvnnmap(ctdb,
+ CONTROL_TIMEOUT(),
+ rec->pnn,
+ mem_ctx,
+ &vnnmap);
+ if (ret != 0) {
+ DBG_ERR("Unable to get vnnmap from node %u\n", rec->pnn);
+ return;
+ }
+
+ if (rec->need_recovery) {
+ /* a previous recovery didn't finish */
+ do_recovery(rec, mem_ctx);
+ return;
+ }
+
+ /* verify that all active nodes are in normal mode
+ and not in recovery mode
+ */
+ switch (verify_recmode(ctdb, nodemap)) {
+ case MONITOR_RECOVERY_NEEDED:
+ do_recovery(rec, mem_ctx);
+ return;
+ case MONITOR_FAILED:
+ return;
+ case MONITOR_ELECTION_NEEDED:
+ /* can not happen */
+ case MONITOR_OK:
+ break;
+ }
+
+ if (cluster_lock_enabled(rec)) {
+ /* We must already hold the cluster lock */
+ if (!cluster_lock_held(rec)) {
+ D_ERR("Failed cluster lock sanity check\n");
+ ctdb_set_culprit(rec, rec->pnn);
+ do_recovery(rec, mem_ctx);
+ return;
+ }
+ }
+
+
+ /* If recoveries are disabled then there is no use doing any
+ * nodemap or flags checks. Recoveries might be disabled due
+ * to "reloadnodes", so doing these checks might cause an
+ * unnecessary recovery. */
+ if (ctdb_op_is_disabled(rec->recovery)) {
+ goto takeover_run_checks;
+ }
+
+ /* verify that all other nodes have the same nodemap as we have
+ */
+ for (j=0; j<nodemap->num; j++) {
+ if (nodemap->nodes[j].pnn == rec->pnn) {
+ continue;
+ }
+ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
+ continue;
+ }
+
+ /* if the nodes disagree on how many nodes there are
+ then this is a good reason to try recovery
+ */
+ if (remote_nodemaps[j]->num != nodemap->num) {
+ DEBUG(DEBUG_ERR, (__location__ " Remote node:%u has different node count. %u vs %u of the local node\n",
+ nodemap->nodes[j].pnn, remote_nodemaps[j]->num, nodemap->num));
+ ctdb_set_culprit(rec, nodemap->nodes[j].pnn);
+ do_recovery(rec, mem_ctx);
+ return;
+ }
+
+ /* if the nodes disagree on which nodes exist and are
+ active, then that is also a good reason to do recovery
+ */
+ for (i=0;i<nodemap->num;i++) {
+ if (remote_nodemaps[j]->nodes[i].pnn != nodemap->nodes[i].pnn) {
+ DEBUG(DEBUG_ERR, (__location__ " Remote node:%u has different nodemap pnn for %d (%u vs %u).\n",
+ nodemap->nodes[j].pnn, i,
+ remote_nodemaps[j]->nodes[i].pnn, nodemap->nodes[i].pnn));
+ ctdb_set_culprit(rec, nodemap->nodes[j].pnn);
+ do_recovery(rec, mem_ctx);
+ return;
+ }
+ }
+ }
+
+ /* count how many active nodes there are */
+ num_lmasters = 0;
+ for (i=0; i<nodemap->num; i++) {
+ if (!(nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE)) {
+ if (ctdb_node_has_capabilities(rec->caps,
+ ctdb->nodes[i]->pnn,
+ CTDB_CAP_LMASTER)) {
+ num_lmasters++;
+ }
+ }
+ }
+
+
+ /* There must be the same number of lmasters in the vnn map as
+ * there are active nodes with the lmaster capability... or
+ * do a recovery.
+ */
+ if (vnnmap->size != num_lmasters) {
+ DEBUG(DEBUG_ERR, (__location__ " The vnnmap count is different from the number of active lmaster nodes: %u vs %u\n",
+ vnnmap->size, num_lmasters));
+ ctdb_set_culprit(rec, rec->pnn);
+ do_recovery(rec, mem_ctx);
+ return;
+ }
+
+ /*
+ * Verify that all active lmaster nodes in the nodemap also
+ * exist in the vnnmap
+ */
+ for (j=0; j<nodemap->num; j++) {
+ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
+ continue;
+ }
+ if (! ctdb_node_has_capabilities(rec->caps,
+ nodemap->nodes[j].pnn,
+ CTDB_CAP_LMASTER)) {
+ continue;
+ }
+ if (nodemap->nodes[j].pnn == rec->pnn) {
+ continue;
+ }
+
+ for (i=0; i<vnnmap->size; i++) {
+ if (vnnmap->map[i] == nodemap->nodes[j].pnn) {
+ break;
+ }
+ }
+ if (i == vnnmap->size) {
+ D_ERR("Active LMASTER node %u is not in the vnnmap\n",
+ nodemap->nodes[j].pnn);
+ ctdb_set_culprit(rec, nodemap->nodes[j].pnn);
+ do_recovery(rec, mem_ctx);
+ return;
+ }
+ }
+
+
+ /* verify that all other nodes have the same vnnmap
+ and are from the same generation
+ */
+ for (j=0; j<nodemap->num; j++) {
+ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
+ continue;
+ }
+ if (nodemap->nodes[j].pnn == rec->pnn) {
+ continue;
+ }
+
+ ret = ctdb_ctrl_getvnnmap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn,
+ mem_ctx, &remote_vnnmap);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to get vnnmap from remote node %u\n",
+ nodemap->nodes[j].pnn));
+ return;
+ }
+
+ /* verify the vnnmap generation is the same */
+ if (vnnmap->generation != remote_vnnmap->generation) {
+ DEBUG(DEBUG_ERR, (__location__ " Remote node %u has different generation of vnnmap. %u vs %u (ours)\n",
+ nodemap->nodes[j].pnn, remote_vnnmap->generation, vnnmap->generation));
+ ctdb_set_culprit(rec, nodemap->nodes[j].pnn);
+ do_recovery(rec, mem_ctx);
+ return;
+ }
+
+ /* verify the vnnmap size is the same */
+ if (vnnmap->size != remote_vnnmap->size) {
+ DEBUG(DEBUG_ERR, (__location__ " Remote node %u has different size of vnnmap. %u vs %u (ours)\n",
+ nodemap->nodes[j].pnn, remote_vnnmap->size, vnnmap->size));
+ ctdb_set_culprit(rec, nodemap->nodes[j].pnn);
+ do_recovery(rec, mem_ctx);
+ return;
+ }
+
+ /* verify the vnnmap is the same */
+ for (i=0;i<vnnmap->size;i++) {
+ if (remote_vnnmap->map[i] != vnnmap->map[i]) {
+ DEBUG(DEBUG_ERR, (__location__ " Remote node %u has different vnnmap.\n",
+ nodemap->nodes[j].pnn));
+ ctdb_set_culprit(rec, nodemap->nodes[j].pnn);
+ do_recovery(rec, mem_ctx);
+ return;
+ }
+ }
+ }
+
+ /* FIXME: Add remote public IP checking to ensure that nodes
+ * have the IP addresses that are allocated to them. */
+
+takeover_run_checks:
+
+ /* If there are IP takeover runs requested or the previous one
+ * failed then perform one and notify the waiters */
+ if (!ctdb_op_is_disabled(rec->takeover_run) &&
+ (rec->reallocate_requests || rec->need_takeover_run)) {
+ process_ipreallocate_requests(ctdb, rec);
+ }
+}
+
+static void recd_sig_term_handler(struct tevent_context *ev,
+ struct tevent_signal *se, int signum,
+ int count, void *dont_care,
+ void *private_data)
+{
+ struct ctdb_recoverd *rec = talloc_get_type_abort(
+ private_data, struct ctdb_recoverd);
+
+ DEBUG(DEBUG_ERR, ("Received SIGTERM, exiting\n"));
+ cluster_lock_release(rec);
+ exit(0);
+}
+
+/*
+ * Periodically log elements of the cluster state
+ *
+ * This can be used to confirm a split brain has occurred
+ */
+static void maybe_log_cluster_state(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval current_time,
+ void *private_data)
+{
+ struct ctdb_recoverd *rec = talloc_get_type_abort(
+ private_data, struct ctdb_recoverd);
+ struct ctdb_context *ctdb = rec->ctdb;
+ struct tevent_timer *tt;
+
+ static struct timeval start_incomplete = {
+ .tv_sec = 0,
+ };
+
+ bool is_complete;
+ bool was_complete;
+ unsigned int i;
+ double seconds;
+ unsigned int minutes;
+ unsigned int num_connected;
+
+ if (!this_node_is_leader(rec)) {
+ goto done;
+ }
+
+ if (rec->nodemap == NULL) {
+ goto done;
+ }
+
+ is_complete = true;
+ num_connected = 0;
+ for (i = 0; i < rec->nodemap->num; i++) {
+ struct ctdb_node_and_flags *n = &rec->nodemap->nodes[i];
+
+ if (n->pnn == rec->pnn) {
+ continue;
+ }
+ if ((n->flags & NODE_FLAGS_DELETED) != 0) {
+ continue;
+ }
+ if ((n->flags & NODE_FLAGS_DISCONNECTED) != 0) {
+ is_complete = false;
+ continue;
+ }
+
+ num_connected++;
+ }
+
+ was_complete = timeval_is_zero(&start_incomplete);
+
+ if (is_complete) {
+ if (! was_complete) {
+ D_WARNING("Cluster complete with leader=%u\n",
+ rec->leader);
+ start_incomplete = timeval_zero();
+ }
+ goto done;
+ }
+
+ /* Cluster is newly incomplete... */
+ if (was_complete) {
+ start_incomplete = current_time;
+ minutes = 0;
+ goto log;
+ }
+
+ /*
+ * Cluster has been incomplete since previous check, so figure
+ * out how long (in minutes) and decide whether to log anything
+ */
+ seconds = timeval_elapsed2(&start_incomplete, &current_time);
+ minutes = (unsigned int)seconds / 60;
+ if (minutes >= 60) {
+ /* Over an hour, log every hour */
+ if (minutes % 60 != 0) {
+ goto done;
+ }
+ } else if (minutes >= 10) {
+ /* Over 10 minutes, log every 10 minutes */
+ if (minutes % 10 != 0) {
+ goto done;
+ }
+ }
+
+log:
+ D_WARNING("Cluster incomplete with leader=%u, elapsed=%u minutes, "
+ "connected=%u\n",
+ rec->leader,
+ minutes,
+ num_connected);
+
+done:
+ tt = tevent_add_timer(ctdb->ev,
+ rec,
+ timeval_current_ofs(60, 0),
+ maybe_log_cluster_state,
+ rec);
+ if (tt == NULL) {
+ DBG_WARNING("Failed to set up cluster state timer\n");
+ }
+}
+
+static void recd_sighup_hook(void *private_data)
+{
+ struct ctdb_recoverd *rec = talloc_get_type_abort(
+ private_data, struct ctdb_recoverd);
+
+ if (rec->helper_pid > 0) {
+ kill(rec->helper_pid, SIGHUP);
+ }
+}
+
+/*
+ the main monitoring loop
+ */
+static void monitor_cluster(struct ctdb_context *ctdb)
+{
+ struct tevent_signal *se;
+ struct ctdb_recoverd *rec;
+ bool status;
+
+ DEBUG(DEBUG_NOTICE,("monitor_cluster starting\n"));
+
+ rec = talloc_zero(ctdb, struct ctdb_recoverd);
+ CTDB_NO_MEMORY_FATAL(ctdb, rec);
+
+ rec->ctdb = ctdb;
+ rec->leader = CTDB_UNKNOWN_PNN;
+ rec->pnn = ctdb_get_pnn(ctdb);
+ rec->cluster_lock_handle = NULL;
+ rec->helper_pid = -1;
+
+ rec->takeover_run = ctdb_op_init(rec, "takeover runs");
+ CTDB_NO_MEMORY_FATAL(ctdb, rec->takeover_run);
+
+ rec->recovery = ctdb_op_init(rec, "recoveries");
+ CTDB_NO_MEMORY_FATAL(ctdb, rec->recovery);
+
+ rec->priority_time = timeval_current();
+ rec->frozen_on_inactive = false;
+
+ status = logging_setup_sighup_handler(rec->ctdb->ev,
+ rec,
+ recd_sighup_hook,
+ rec);
+ if (!status) {
+ D_ERR("Failed to install SIGHUP handler\n");
+ exit(1);
+ }
+
+ se = tevent_add_signal(ctdb->ev, ctdb, SIGTERM, 0,
+ recd_sig_term_handler, rec);
+ if (se == NULL) {
+ DEBUG(DEBUG_ERR, ("Failed to install SIGTERM handler\n"));
+ exit(1);
+ }
+
+ if (!cluster_lock_enabled(rec)) {
+ struct tevent_timer *tt;
+
+ tt = tevent_add_timer(ctdb->ev,
+ rec,
+ timeval_current_ofs(60, 0),
+ maybe_log_cluster_state,
+ rec);
+ if (tt == NULL) {
+ DBG_WARNING("Failed to set up cluster state timer\n");
+ }
+ }
+
+ /* register a message port for sending memory dumps */
+ ctdb_client_set_message_handler(ctdb, CTDB_SRVID_MEM_DUMP, mem_dump_handler, rec);
+
+ /* when a node is assigned banning credits */
+ ctdb_client_set_message_handler(ctdb, CTDB_SRVID_BANNING,
+ banning_handler, rec);
+
+ /* register a message port for recovery elections */
+ ctdb_client_set_message_handler(ctdb, CTDB_SRVID_ELECTION, election_handler, rec);
+
+ ctdb_client_set_message_handler(ctdb,
+ CTDB_SRVID_SET_NODE_FLAGS,
+ srvid_not_implemented,
+ rec);
+
+ /* when we are asked to puch out a flag change */
+ ctdb_client_set_message_handler(ctdb, CTDB_SRVID_PUSH_NODE_FLAGS, push_flags_handler, rec);
+
+ /* register a message port for reloadnodes */
+ ctdb_client_set_message_handler(ctdb, CTDB_SRVID_RELOAD_NODES, reload_nodes_handler, rec);
+
+ /* register a message port for performing a takeover run */
+ ctdb_client_set_message_handler(ctdb, CTDB_SRVID_TAKEOVER_RUN, ip_reallocate_handler, rec);
+
+ /* register a message port for disabling the ip check for a short while */
+ ctdb_client_set_message_handler(ctdb, CTDB_SRVID_DISABLE_IP_CHECK, disable_ip_check_handler, rec);
+
+ /* register a message port for forcing a rebalance of a node next
+ reallocation */
+ ctdb_client_set_message_handler(ctdb, CTDB_SRVID_REBALANCE_NODE, recd_node_rebalance_handler, rec);
+
+ /* Register a message port for disabling takeover runs */
+ ctdb_client_set_message_handler(ctdb,
+ CTDB_SRVID_DISABLE_TAKEOVER_RUNS,
+ disable_takeover_runs_handler, rec);
+
+ /* Register a message port for disabling recoveries */
+ ctdb_client_set_message_handler(ctdb,
+ CTDB_SRVID_DISABLE_RECOVERIES,
+ disable_recoveries_handler, rec);
+
+ ctdb_client_set_message_handler(ctdb,
+ CTDB_SRVID_LEADER,
+ leader_handler,
+ rec);
+
+ for (;;) {
+ TALLOC_CTX *mem_ctx = talloc_new(ctdb);
+ struct timeval start;
+ double elapsed;
+
+ if (!mem_ctx) {
+ DEBUG(DEBUG_CRIT,(__location__
+ " Failed to create temp context\n"));
+ exit(-1);
+ }
+
+ start = timeval_current();
+ main_loop(ctdb, rec, mem_ctx);
+ talloc_free(mem_ctx);
+
+ /* we only check for recovery once every second */
+ elapsed = timeval_elapsed(&start);
+ if (elapsed < ctdb->tunable.recover_interval) {
+ ctdb_wait_timeout(ctdb, ctdb->tunable.recover_interval
+ - elapsed);
+ }
+ }
+}
+
+/*
+ event handler for when the main ctdbd dies
+ */
+static void ctdb_recoverd_parent(struct tevent_context *ev,
+ struct tevent_fd *fde,
+ uint16_t flags, void *private_data)
+{
+ DEBUG(DEBUG_ALERT,("recovery daemon parent died - exiting\n"));
+ _exit(1);
+}
+
+/*
+ called regularly to verify that the recovery daemon is still running
+ */
+static void ctdb_check_recd(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval yt, void *p)
+{
+ struct ctdb_context *ctdb = talloc_get_type(p, struct ctdb_context);
+
+ if (ctdb_kill(ctdb, ctdb->recoverd_pid, 0) != 0) {
+ DEBUG(DEBUG_ERR,("Recovery daemon (pid:%d) is no longer running. Trying to restart recovery daemon.\n", (int)ctdb->recoverd_pid));
+
+ tevent_add_timer(ctdb->ev, ctdb, timeval_zero(),
+ ctdb_restart_recd, ctdb);
+
+ return;
+ }
+
+ tevent_add_timer(ctdb->ev, ctdb->recd_ctx,
+ timeval_current_ofs(30, 0),
+ ctdb_check_recd, ctdb);
+}
+
+static void recd_sig_child_handler(struct tevent_context *ev,
+ struct tevent_signal *se, int signum,
+ int count, void *dont_care,
+ void *private_data)
+{
+// struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
+ int status;
+ pid_t pid = -1;
+
+ while (pid != 0) {
+ pid = waitpid(-1, &status, WNOHANG);
+ if (pid == -1) {
+ if (errno != ECHILD) {
+ DEBUG(DEBUG_ERR, (__location__ " waitpid() returned error. errno:%s(%d)\n", strerror(errno),errno));
+ }
+ return;
+ }
+ if (pid > 0) {
+ DEBUG(DEBUG_DEBUG, ("RECD SIGCHLD from %d\n", (int)pid));
+ }
+ }
+}
+
+/*
+ startup the recovery daemon as a child of the main ctdb daemon
+ */
+int ctdb_start_recoverd(struct ctdb_context *ctdb)
+{
+ int fd[2];
+ struct tevent_signal *se;
+ struct tevent_fd *fde;
+ int ret;
+
+ if (pipe(fd) != 0) {
+ return -1;
+ }
+
+ ctdb->recoverd_pid = ctdb_fork(ctdb);
+ if (ctdb->recoverd_pid == -1) {
+ return -1;
+ }
+
+ if (ctdb->recoverd_pid != 0) {
+ talloc_free(ctdb->recd_ctx);
+ ctdb->recd_ctx = talloc_new(ctdb);
+ CTDB_NO_MEMORY(ctdb, ctdb->recd_ctx);
+
+ close(fd[0]);
+ tevent_add_timer(ctdb->ev, ctdb->recd_ctx,
+ timeval_current_ofs(30, 0),
+ ctdb_check_recd, ctdb);
+ return 0;
+ }
+
+ close(fd[1]);
+
+ srandom(getpid() ^ time(NULL));
+
+ ret = logging_init(ctdb, NULL, NULL, "ctdb-recoverd");
+ if (ret != 0) {
+ return -1;
+ }
+
+ prctl_set_comment("ctdb_recoverd");
+ if (switch_from_server_to_client(ctdb) != 0) {
+ DEBUG(DEBUG_CRIT, (__location__ "ERROR: failed to switch recovery daemon into client mode. shutting down.\n"));
+ exit(1);
+ }
+
+ DEBUG(DEBUG_DEBUG, (__location__ " Created PIPE FD:%d to recovery daemon\n", fd[0]));
+
+ fde = tevent_add_fd(ctdb->ev, ctdb, fd[0], TEVENT_FD_READ,
+ ctdb_recoverd_parent, &fd[0]);
+ tevent_fd_set_auto_close(fde);
+
+ /* set up a handler to pick up sigchld */
+ se = tevent_add_signal(ctdb->ev, ctdb, SIGCHLD, 0,
+ recd_sig_child_handler, ctdb);
+ if (se == NULL) {
+ DEBUG(DEBUG_CRIT,("Failed to set up signal handler for SIGCHLD in recovery daemon\n"));
+ exit(1);
+ }
+
+ monitor_cluster(ctdb);
+
+ DEBUG(DEBUG_ALERT,("ERROR: ctdb_recoverd finished!?\n"));
+ return -1;
+}
+
+/*
+ shutdown the recovery daemon
+ */
+void ctdb_stop_recoverd(struct ctdb_context *ctdb)
+{
+ if (ctdb->recoverd_pid == 0) {
+ return;
+ }
+
+ DEBUG(DEBUG_NOTICE,("Shutting down recovery daemon\n"));
+ ctdb_kill(ctdb, ctdb->recoverd_pid, SIGTERM);
+
+ TALLOC_FREE(ctdb->recd_ctx);
+ TALLOC_FREE(ctdb->recd_ping_count);
+}
+
+static void ctdb_restart_recd(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
+
+ DEBUG(DEBUG_ERR,("Restarting recovery daemon\n"));
+ ctdb_stop_recoverd(ctdb);
+ ctdb_start_recoverd(ctdb);
+}
diff --git a/ctdb/server/ctdb_recovery_helper.c b/ctdb/server/ctdb_recovery_helper.c
new file mode 100644
index 0000000..4df4841
--- /dev/null
+++ b/ctdb/server/ctdb_recovery_helper.c
@@ -0,0 +1,3200 @@
+/*
+ ctdb parallel database recovery
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+
+#include <talloc.h>
+#include <tevent.h>
+#include <tdb.h>
+#include <libgen.h>
+
+#include "lib/tdb_wrap/tdb_wrap.h"
+#include "lib/util/dlinklist.h"
+#include "lib/util/sys_rw.h"
+#include "lib/util/time.h"
+#include "lib/util/tevent_unix.h"
+#include "lib/util/util.h"
+#include "lib/util/smb_strtox.h"
+
+#include "protocol/protocol.h"
+#include "protocol/protocol_api.h"
+#include "client/client.h"
+
+#include "common/logging.h"
+
+static int recover_timeout = 30;
+
+#define NUM_RETRIES 3
+
+#define TIMEOUT() timeval_current_ofs(recover_timeout, 0)
+
+/*
+ * Utility functions
+ */
+
+static bool generic_recv(struct tevent_req *req, int *perr)
+{
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+static uint64_t rec_srvid = CTDB_SRVID_RECOVERY;
+
+static uint64_t srvid_next(void)
+{
+ rec_srvid += 1;
+ return rec_srvid;
+}
+
+/*
+ * Node related functions
+ */
+
+struct node_list {
+ uint32_t *pnn_list;
+ uint32_t *caps;
+ uint32_t *ban_credits;
+ unsigned int size;
+ unsigned int count;
+};
+
+static struct node_list *node_list_init(TALLOC_CTX *mem_ctx, unsigned int size)
+{
+ struct node_list *nlist;
+ unsigned int i;
+
+ nlist = talloc_zero(mem_ctx, struct node_list);
+ if (nlist == NULL) {
+ return NULL;
+ }
+
+ nlist->pnn_list = talloc_array(nlist, uint32_t, size);
+ nlist->caps = talloc_zero_array(nlist, uint32_t, size);
+ nlist->ban_credits = talloc_zero_array(nlist, uint32_t, size);
+
+ if (nlist->pnn_list == NULL ||
+ nlist->caps == NULL ||
+ nlist->ban_credits == NULL) {
+ talloc_free(nlist);
+ return NULL;
+ }
+ nlist->size = size;
+
+ for (i=0; i<nlist->size; i++) {
+ nlist->pnn_list[i] = CTDB_UNKNOWN_PNN;
+ }
+
+ return nlist;
+}
+
+static bool node_list_add(struct node_list *nlist, uint32_t pnn)
+{
+ unsigned int i;
+
+ if (nlist->count == nlist->size) {
+ return false;
+ }
+
+ for (i=0; i<nlist->count; i++) {
+ if (nlist->pnn_list[i] == pnn) {
+ return false;
+ }
+ }
+
+ nlist->pnn_list[nlist->count] = pnn;
+ nlist->count += 1;
+
+ return true;
+}
+
+static uint32_t *node_list_lmaster(struct node_list *nlist,
+ TALLOC_CTX *mem_ctx,
+ unsigned int *pnn_count)
+{
+ uint32_t *pnn_list;
+ unsigned int count, i;
+
+ pnn_list = talloc_zero_array(mem_ctx, uint32_t, nlist->count);
+ if (pnn_list == NULL) {
+ return NULL;
+ }
+
+ count = 0;
+ for (i=0; i<nlist->count; i++) {
+ if (!(nlist->caps[i] & CTDB_CAP_LMASTER)) {
+ continue;
+ }
+
+ pnn_list[count] = nlist->pnn_list[i];
+ count += 1;
+ }
+
+ *pnn_count = count;
+ return pnn_list;
+}
+
+static void node_list_ban_credits(struct node_list *nlist, uint32_t pnn)
+{
+ unsigned int i;
+
+ for (i=0; i<nlist->count; i++) {
+ if (nlist->pnn_list[i] == pnn) {
+ nlist->ban_credits[i] += 1;
+ break;
+ }
+ }
+}
+
+/*
+ * Database list functions
+ *
+ * Simple, naive implementation that could be updated to a db_hash or similar
+ */
+
+struct db {
+ struct db *prev, *next;
+
+ uint32_t db_id;
+ uint32_t db_flags;
+ uint32_t *pnn_list;
+ unsigned int num_nodes;
+};
+
+struct db_list {
+ unsigned int num_dbs;
+ struct db *db;
+ unsigned int num_nodes;
+};
+
+static struct db_list *db_list_init(TALLOC_CTX *mem_ctx, unsigned int num_nodes)
+{
+ struct db_list *l;
+
+ l = talloc_zero(mem_ctx, struct db_list);
+ l->num_nodes = num_nodes;
+
+ return l;
+}
+
+static struct db *db_list_find(struct db_list *dblist, uint32_t db_id)
+{
+ struct db *db;
+
+ if (dblist == NULL) {
+ return NULL;
+ }
+
+ db = dblist->db;
+ while (db != NULL && db->db_id != db_id) {
+ db = db->next;
+ }
+
+ return db;
+}
+
+static int db_list_add(struct db_list *dblist,
+ uint32_t db_id,
+ uint32_t db_flags,
+ uint32_t node)
+{
+ struct db *db = NULL;
+
+ if (dblist == NULL) {
+ return EINVAL;
+ }
+
+ db = talloc_zero(dblist, struct db);
+ if (db == NULL) {
+ return ENOMEM;
+ }
+
+ db->db_id = db_id;
+ db->db_flags = db_flags;
+ db->pnn_list = talloc_zero_array(db, uint32_t, dblist->num_nodes);
+ if (db->pnn_list == NULL) {
+ talloc_free(db);
+ return ENOMEM;
+ }
+ db->pnn_list[0] = node;
+ db->num_nodes = 1;
+
+ DLIST_ADD_END(dblist->db, db);
+ dblist->num_dbs++;
+
+ return 0;
+}
+
+static int db_list_check_and_add(struct db_list *dblist,
+ uint32_t db_id,
+ uint32_t db_flags,
+ uint32_t node)
+{
+ struct db *db = NULL;
+ int ret;
+
+ /*
+ * These flags are masked out because they are only set on a
+ * node when a client attaches to that node, so they might not
+ * be set yet. They can't be passed as part of the attach, so
+ * they're no use here.
+ */
+ db_flags &= ~(CTDB_DB_FLAGS_READONLY | CTDB_DB_FLAGS_STICKY);
+
+ if (dblist == NULL) {
+ return EINVAL;
+ }
+
+ db = db_list_find(dblist, db_id);
+ if (db == NULL) {
+ ret = db_list_add(dblist, db_id, db_flags, node);
+ return ret;
+ }
+
+ if (db->db_flags != db_flags) {
+ D_ERR("Incompatible database flags for 0x%"PRIx32" "
+ "(0x%"PRIx32" != 0x%"PRIx32")\n",
+ db_id,
+ db_flags,
+ db->db_flags);
+ return EINVAL;
+ }
+
+ if (db->num_nodes >= dblist->num_nodes) {
+ return EINVAL;
+ }
+
+ db->pnn_list[db->num_nodes] = node;
+ db->num_nodes++;
+
+ return 0;
+}
+
+/*
+ * Create database on nodes where it is missing
+ */
+
+struct db_create_missing_state {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+
+ struct node_list *nlist;
+
+ const char *db_name;
+ uint32_t *missing_pnn_list;
+ int missing_num_nodes;
+};
+
+static void db_create_missing_done(struct tevent_req *subreq);
+
+static struct tevent_req *db_create_missing_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct node_list *nlist,
+ const char *db_name,
+ struct db *db)
+{
+ struct tevent_req *req, *subreq;
+ struct db_create_missing_state *state;
+ struct ctdb_req_control request;
+ unsigned int i, j;
+
+ req = tevent_req_create(mem_ctx,
+ &state,
+ struct db_create_missing_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->client = client;
+ state->nlist = nlist;
+ state->db_name = db_name;
+
+ if (nlist->count == db->num_nodes) {
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+ }
+
+ state->missing_pnn_list = talloc_array(mem_ctx, uint32_t, nlist->count);
+ if (tevent_req_nomem(state->missing_pnn_list, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ for (i = 0; i < nlist->count; i++) {
+ uint32_t pnn = nlist->pnn_list[i] ;
+
+ for (j = 0; j < db->num_nodes; j++) {
+ if (pnn == db->pnn_list[j]) {
+ break;
+ }
+ }
+
+ if (j < db->num_nodes) {
+ continue;
+ }
+
+ DBG_INFO("Create database %s on node %u\n",
+ state->db_name,
+ pnn);
+ state->missing_pnn_list[state->missing_num_nodes] = pnn;
+ state->missing_num_nodes++;
+ }
+
+ if (db->db_flags & CTDB_DB_FLAGS_PERSISTENT) {
+ ctdb_req_control_db_attach_persistent(&request, db_name);
+ } else if (db->db_flags & CTDB_DB_FLAGS_REPLICATED) {
+ ctdb_req_control_db_attach_replicated(&request, db_name);
+ } else {
+ ctdb_req_control_db_attach(&request, db_name);
+ }
+ request.flags = CTDB_CTRL_FLAG_ATTACH_RECOVERY;
+ subreq = ctdb_client_control_multi_send(state,
+ state->ev,
+ state->client,
+ state->missing_pnn_list,
+ state->missing_num_nodes,
+ TIMEOUT(),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, db_create_missing_done, req);
+
+ return req;
+}
+
+static void db_create_missing_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct db_create_missing_state *state = tevent_req_data(
+ req, struct db_create_missing_state);
+ int *err_list;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_multi_recv(subreq,
+ &ret,
+ NULL,
+ &err_list,
+ NULL);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ int ret2;
+ uint32_t pnn;
+
+ ret2 = ctdb_client_control_multi_error(
+ state->missing_pnn_list,
+ state->missing_num_nodes,
+ err_list,
+ &pnn);
+ if (ret2 != 0) {
+ D_ERR("control DB_ATTACH failed for db %s"
+ " on node %u, ret=%d\n",
+ state->db_name,
+ pnn,
+ ret2);
+ node_list_ban_credits(state->nlist, pnn);
+ } else {
+ D_ERR("control DB_ATTACH failed for db %s, ret=%d\n",
+ state->db_name,
+ ret);
+ }
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+static bool db_create_missing_recv(struct tevent_req *req, int *perr)
+{
+ return generic_recv(req, perr);
+}
+
+/*
+ * Recovery database functions
+ */
+
+struct recdb_context {
+ uint32_t db_id;
+ const char *db_name;
+ const char *db_path;
+ struct tdb_wrap *db;
+ bool persistent;
+};
+
+static struct recdb_context *recdb_create(TALLOC_CTX *mem_ctx, uint32_t db_id,
+ const char *db_name,
+ const char *db_path,
+ uint32_t hash_size, bool persistent)
+{
+ static char *db_dir_state = NULL;
+ struct recdb_context *recdb;
+ unsigned int tdb_flags;
+
+ recdb = talloc(mem_ctx, struct recdb_context);
+ if (recdb == NULL) {
+ return NULL;
+ }
+
+ if (db_dir_state == NULL) {
+ db_dir_state = getenv("CTDB_DBDIR_STATE");
+ }
+
+ recdb->db_name = db_name;
+ recdb->db_id = db_id;
+ recdb->db_path = talloc_asprintf(recdb, "%s/recdb.%s",
+ db_dir_state != NULL ?
+ db_dir_state :
+ dirname(discard_const(db_path)),
+ db_name);
+ if (recdb->db_path == NULL) {
+ talloc_free(recdb);
+ return NULL;
+ }
+ unlink(recdb->db_path);
+
+ tdb_flags = TDB_NOLOCK | TDB_INCOMPATIBLE_HASH | TDB_DISALLOW_NESTING;
+ recdb->db = tdb_wrap_open(mem_ctx, recdb->db_path, hash_size,
+ tdb_flags, O_RDWR|O_CREAT|O_EXCL, 0600);
+ if (recdb->db == NULL) {
+ talloc_free(recdb);
+ D_ERR("failed to create recovery db %s\n", recdb->db_path);
+ return NULL;
+ }
+
+ recdb->persistent = persistent;
+
+ return recdb;
+}
+
+static uint32_t recdb_id(struct recdb_context *recdb)
+{
+ return recdb->db_id;
+}
+
+static const char *recdb_name(struct recdb_context *recdb)
+{
+ return recdb->db_name;
+}
+
+static const char *recdb_path(struct recdb_context *recdb)
+{
+ return recdb->db_path;
+}
+
+static struct tdb_context *recdb_tdb(struct recdb_context *recdb)
+{
+ return recdb->db->tdb;
+}
+
+static bool recdb_persistent(struct recdb_context *recdb)
+{
+ return recdb->persistent;
+}
+
+struct recdb_add_traverse_state {
+ struct recdb_context *recdb;
+ uint32_t mypnn;
+};
+
+static int recdb_add_traverse(uint32_t reqid, struct ctdb_ltdb_header *header,
+ TDB_DATA key, TDB_DATA data,
+ void *private_data)
+{
+ struct recdb_add_traverse_state *state =
+ (struct recdb_add_traverse_state *)private_data;
+ struct ctdb_ltdb_header *hdr;
+ TDB_DATA prev_data;
+ int ret;
+
+ /* header is not marshalled separately in the pulldb control */
+ if (data.dsize < sizeof(struct ctdb_ltdb_header)) {
+ return -1;
+ }
+
+ hdr = (struct ctdb_ltdb_header *)data.dptr;
+
+ /* fetch the existing record, if any */
+ prev_data = tdb_fetch(recdb_tdb(state->recdb), key);
+
+ if (prev_data.dptr != NULL) {
+ struct ctdb_ltdb_header prev_hdr;
+
+ prev_hdr = *(struct ctdb_ltdb_header *)prev_data.dptr;
+ free(prev_data.dptr);
+ if (hdr->rsn < prev_hdr.rsn ||
+ (hdr->rsn == prev_hdr.rsn &&
+ prev_hdr.dmaster != state->mypnn)) {
+ return 0;
+ }
+ }
+
+ ret = tdb_store(recdb_tdb(state->recdb), key, data, TDB_REPLACE);
+ if (ret != 0) {
+ return -1;
+ }
+ return 0;
+}
+
+static bool recdb_add(struct recdb_context *recdb, int mypnn,
+ struct ctdb_rec_buffer *recbuf)
+{
+ struct recdb_add_traverse_state state;
+ int ret;
+
+ state.recdb = recdb;
+ state.mypnn = mypnn;
+
+ ret = ctdb_rec_buffer_traverse(recbuf, recdb_add_traverse, &state);
+ if (ret != 0) {
+ return false;
+ }
+
+ return true;
+}
+
+/* This function decides which records from recdb are retained */
+static int recbuf_filter_add(struct ctdb_rec_buffer *recbuf, bool persistent,
+ uint32_t reqid, uint32_t dmaster,
+ TDB_DATA key, TDB_DATA data)
+{
+ struct ctdb_ltdb_header *header;
+ int ret;
+
+ /* Skip empty records */
+ if (data.dsize <= sizeof(struct ctdb_ltdb_header)) {
+ return 0;
+ }
+
+ /* update the dmaster field to point to us */
+ header = (struct ctdb_ltdb_header *)data.dptr;
+ if (!persistent) {
+ header->dmaster = dmaster;
+ header->flags |= CTDB_REC_FLAG_MIGRATED_WITH_DATA;
+ }
+
+ ret = ctdb_rec_buffer_add(recbuf, recbuf, reqid, NULL, key, data);
+ if (ret != 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+struct recdb_file_traverse_state {
+ struct ctdb_rec_buffer *recbuf;
+ struct recdb_context *recdb;
+ TALLOC_CTX *mem_ctx;
+ uint32_t dmaster;
+ uint32_t reqid;
+ bool persistent;
+ bool failed;
+ int fd;
+ size_t max_size;
+ unsigned int num_buffers;
+};
+
+static int recdb_file_traverse(struct tdb_context *tdb,
+ TDB_DATA key, TDB_DATA data,
+ void *private_data)
+{
+ struct recdb_file_traverse_state *state =
+ (struct recdb_file_traverse_state *)private_data;
+ int ret;
+
+ ret = recbuf_filter_add(state->recbuf, state->persistent,
+ state->reqid, state->dmaster, key, data);
+ if (ret != 0) {
+ state->failed = true;
+ return ret;
+ }
+
+ if (ctdb_rec_buffer_len(state->recbuf) > state->max_size) {
+ ret = ctdb_rec_buffer_write(state->recbuf, state->fd);
+ if (ret != 0) {
+ D_ERR("Failed to collect recovery records for %s\n",
+ recdb_name(state->recdb));
+ state->failed = true;
+ return ret;
+ }
+
+ state->num_buffers += 1;
+
+ TALLOC_FREE(state->recbuf);
+ state->recbuf = ctdb_rec_buffer_init(state->mem_ctx,
+ recdb_id(state->recdb));
+ if (state->recbuf == NULL) {
+ state->failed = true;
+ return ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+static int recdb_file(struct recdb_context *recdb, TALLOC_CTX *mem_ctx,
+ uint32_t dmaster, int fd, int max_size)
+{
+ struct recdb_file_traverse_state state;
+ int ret;
+
+ state.recbuf = ctdb_rec_buffer_init(mem_ctx, recdb_id(recdb));
+ if (state.recbuf == NULL) {
+ return -1;
+ }
+ state.recdb = recdb;
+ state.mem_ctx = mem_ctx;
+ state.dmaster = dmaster;
+ state.reqid = 0;
+ state.persistent = recdb_persistent(recdb);
+ state.failed = false;
+ state.fd = fd;
+ state.max_size = max_size;
+ state.num_buffers = 0;
+
+ ret = tdb_traverse_read(recdb_tdb(recdb), recdb_file_traverse, &state);
+ if (ret == -1 || state.failed) {
+ TALLOC_FREE(state.recbuf);
+ return -1;
+ }
+
+ ret = ctdb_rec_buffer_write(state.recbuf, fd);
+ if (ret != 0) {
+ D_ERR("Failed to collect recovery records for %s\n",
+ recdb_name(recdb));
+ TALLOC_FREE(state.recbuf);
+ return -1;
+ }
+ state.num_buffers += 1;
+
+ D_DEBUG("Wrote %d buffers of recovery records for %s\n",
+ state.num_buffers, recdb_name(recdb));
+
+ return state.num_buffers;
+}
+
+/*
+ * Pull database from a single node
+ */
+
+struct pull_database_state {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct recdb_context *recdb;
+ uint32_t pnn;
+ uint64_t srvid;
+ unsigned int num_records;
+ int result;
+};
+
+static void pull_database_handler(uint64_t srvid, TDB_DATA data,
+ void *private_data);
+static void pull_database_register_done(struct tevent_req *subreq);
+static void pull_database_unregister_done(struct tevent_req *subreq);
+static void pull_database_done(struct tevent_req *subreq);
+
+static struct tevent_req *pull_database_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t pnn,
+ struct recdb_context *recdb)
+{
+ struct tevent_req *req, *subreq;
+ struct pull_database_state *state;
+
+ req = tevent_req_create(mem_ctx, &state, struct pull_database_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->client = client;
+ state->recdb = recdb;
+ state->pnn = pnn;
+ state->srvid = srvid_next();
+
+ subreq = ctdb_client_set_message_handler_send(
+ state, state->ev, state->client,
+ state->srvid, pull_database_handler,
+ req);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ tevent_req_set_callback(subreq, pull_database_register_done, req);
+
+ return req;
+}
+
+static void pull_database_handler(uint64_t srvid, TDB_DATA data,
+ void *private_data)
+{
+ struct tevent_req *req = talloc_get_type_abort(
+ private_data, struct tevent_req);
+ struct pull_database_state *state = tevent_req_data(
+ req, struct pull_database_state);
+ struct ctdb_rec_buffer *recbuf;
+ size_t np;
+ int ret;
+ bool status;
+
+ if (srvid != state->srvid) {
+ return;
+ }
+
+ ret = ctdb_rec_buffer_pull(data.dptr, data.dsize, state, &recbuf, &np);
+ if (ret != 0) {
+ D_ERR("Invalid data received for DB_PULL messages\n");
+ return;
+ }
+
+ if (recbuf->db_id != recdb_id(state->recdb)) {
+ talloc_free(recbuf);
+ D_ERR("Invalid dbid:%08x for DB_PULL messages for %s\n",
+ recbuf->db_id, recdb_name(state->recdb));
+ return;
+ }
+
+ status = recdb_add(state->recdb, ctdb_client_pnn(state->client),
+ recbuf);
+ if (! status) {
+ talloc_free(recbuf);
+ D_ERR("Failed to add records to recdb for %s\n",
+ recdb_name(state->recdb));
+ return;
+ }
+
+ state->num_records += recbuf->count;
+ talloc_free(recbuf);
+}
+
+static void pull_database_register_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct pull_database_state *state = tevent_req_data(
+ req, struct pull_database_state);
+ struct ctdb_req_control request;
+ struct ctdb_pulldb_ext pulldb_ext;
+ int ret;
+ bool status;
+
+ status = ctdb_client_set_message_handler_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ D_ERR("Failed to set message handler for DB_PULL for %s\n",
+ recdb_name(state->recdb));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ pulldb_ext.db_id = recdb_id(state->recdb);
+ pulldb_ext.lmaster = CTDB_LMASTER_ANY;
+ pulldb_ext.srvid = state->srvid;
+
+ ctdb_req_control_db_pull(&request, &pulldb_ext);
+ subreq = ctdb_client_control_send(state, state->ev, state->client,
+ state->pnn, TIMEOUT(), &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, pull_database_done, req);
+}
+
+static void pull_database_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct pull_database_state *state = tevent_req_data(
+ req, struct pull_database_state);
+ struct ctdb_reply_control *reply;
+ uint32_t num_records;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ D_ERR("control DB_PULL failed for %s on node %u, ret=%d\n",
+ recdb_name(state->recdb), state->pnn, ret);
+ state->result = ret;
+ goto unregister;
+ }
+
+ ret = ctdb_reply_control_db_pull(reply, &num_records);
+ talloc_free(reply);
+ if (num_records != state->num_records) {
+ D_ERR("mismatch (%u != %u) in DB_PULL records for db %s\n",
+ num_records, state->num_records,
+ recdb_name(state->recdb));
+ state->result = EIO;
+ goto unregister;
+ }
+
+ D_INFO("Pulled %d records for db %s from node %d\n",
+ state->num_records, recdb_name(state->recdb), state->pnn);
+
+unregister:
+
+ subreq = ctdb_client_remove_message_handler_send(
+ state, state->ev, state->client,
+ state->srvid, req);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, pull_database_unregister_done, req);
+}
+
+static void pull_database_unregister_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct pull_database_state *state = tevent_req_data(
+ req, struct pull_database_state);
+ int ret;
+ bool status;
+
+ status = ctdb_client_remove_message_handler_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ D_ERR("failed to remove message handler for DB_PULL for db %s\n",
+ recdb_name(state->recdb));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ if (state->result != 0) {
+ tevent_req_error(req, state->result);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+static bool pull_database_recv(struct tevent_req *req, int *perr)
+{
+ return generic_recv(req, perr);
+}
+
+/*
+ * Push database to specified nodes (new style)
+ */
+
+struct push_database_state {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct recdb_context *recdb;
+ uint32_t *pnn_list;
+ unsigned int count;
+ uint64_t srvid;
+ uint32_t dmaster;
+ int fd;
+ int num_buffers;
+ int num_buffers_sent;
+ unsigned int num_records;
+};
+
+static void push_database_started(struct tevent_req *subreq);
+static void push_database_send_msg(struct tevent_req *req);
+static void push_database_send_done(struct tevent_req *subreq);
+static void push_database_confirmed(struct tevent_req *subreq);
+
+static struct tevent_req *push_database_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t *pnn_list,
+ unsigned int count,
+ struct recdb_context *recdb,
+ int max_size)
+{
+ struct tevent_req *req, *subreq;
+ struct push_database_state *state;
+ struct ctdb_req_control request;
+ struct ctdb_pulldb_ext pulldb_ext;
+ char *filename;
+ off_t offset;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct push_database_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->client = client;
+ state->recdb = recdb;
+ state->pnn_list = pnn_list;
+ state->count = count;
+
+ state->srvid = srvid_next();
+ state->dmaster = ctdb_client_pnn(client);
+ state->num_buffers_sent = 0;
+ state->num_records = 0;
+
+ filename = talloc_asprintf(state, "%s.dat", recdb_path(recdb));
+ if (tevent_req_nomem(filename, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ state->fd = open(filename, O_RDWR|O_CREAT, 0644);
+ if (state->fd == -1) {
+ tevent_req_error(req, errno);
+ return tevent_req_post(req, ev);
+ }
+ unlink(filename);
+ talloc_free(filename);
+
+ state->num_buffers = recdb_file(recdb, state, state->dmaster,
+ state->fd, max_size);
+ if (state->num_buffers == -1) {
+ tevent_req_error(req, ENOMEM);
+ return tevent_req_post(req, ev);
+ }
+
+ offset = lseek(state->fd, 0, SEEK_SET);
+ if (offset != 0) {
+ tevent_req_error(req, EIO);
+ return tevent_req_post(req, ev);
+ }
+
+ pulldb_ext.db_id = recdb_id(recdb);
+ pulldb_ext.srvid = state->srvid;
+
+ ctdb_req_control_db_push_start(&request, &pulldb_ext);
+ subreq = ctdb_client_control_multi_send(state, ev, client,
+ pnn_list, count,
+ TIMEOUT(), &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, push_database_started, req);
+
+ return req;
+}
+
+static void push_database_started(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct push_database_state *state = tevent_req_data(
+ req, struct push_database_state);
+ int *err_list;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_multi_recv(subreq, &ret, state,
+ &err_list, NULL);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ int ret2;
+ uint32_t pnn;
+
+ ret2 = ctdb_client_control_multi_error(state->pnn_list,
+ state->count,
+ err_list, &pnn);
+ if (ret2 != 0) {
+ D_ERR("control DB_PUSH_START failed for db %s"
+ " on node %u, ret=%d\n",
+ recdb_name(state->recdb), pnn, ret2);
+ } else {
+ D_ERR("control DB_PUSH_START failed for db %s,"
+ " ret=%d\n",
+ recdb_name(state->recdb), ret);
+ }
+ talloc_free(err_list);
+
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ push_database_send_msg(req);
+}
+
+static void push_database_send_msg(struct tevent_req *req)
+{
+ struct push_database_state *state = tevent_req_data(
+ req, struct push_database_state);
+ struct tevent_req *subreq;
+ struct ctdb_rec_buffer *recbuf;
+ struct ctdb_req_message message;
+ TDB_DATA data;
+ size_t np;
+ int ret;
+
+ if (state->num_buffers_sent == state->num_buffers) {
+ struct ctdb_req_control request;
+
+ ctdb_req_control_db_push_confirm(&request,
+ recdb_id(state->recdb));
+ subreq = ctdb_client_control_multi_send(state, state->ev,
+ state->client,
+ state->pnn_list,
+ state->count,
+ TIMEOUT(), &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, push_database_confirmed, req);
+ return;
+ }
+
+ ret = ctdb_rec_buffer_read(state->fd, state, &recbuf);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ data.dsize = ctdb_rec_buffer_len(recbuf);
+ data.dptr = talloc_size(state, data.dsize);
+ if (tevent_req_nomem(data.dptr, req)) {
+ return;
+ }
+
+ ctdb_rec_buffer_push(recbuf, data.dptr, &np);
+
+ message.srvid = state->srvid;
+ message.data.data = data;
+
+ D_DEBUG("Pushing buffer %d with %d records for db %s\n",
+ state->num_buffers_sent, recbuf->count,
+ recdb_name(state->recdb));
+
+ subreq = ctdb_client_message_multi_send(state, state->ev,
+ state->client,
+ state->pnn_list, state->count,
+ &message);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, push_database_send_done, req);
+
+ state->num_records += recbuf->count;
+
+ talloc_free(data.dptr);
+ talloc_free(recbuf);
+}
+
+static void push_database_send_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct push_database_state *state = tevent_req_data(
+ req, struct push_database_state);
+ bool status;
+ int ret;
+
+ status = ctdb_client_message_multi_recv(subreq, &ret, NULL, NULL);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ D_ERR("Sending recovery records failed for %s\n",
+ recdb_name(state->recdb));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ state->num_buffers_sent += 1;
+
+ push_database_send_msg(req);
+}
+
+static void push_database_confirmed(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct push_database_state *state = tevent_req_data(
+ req, struct push_database_state);
+ struct ctdb_reply_control **reply;
+ int *err_list;
+ bool status;
+ unsigned int i;
+ int ret;
+ uint32_t num_records;
+
+ status = ctdb_client_control_multi_recv(subreq, &ret, state,
+ &err_list, &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ int ret2;
+ uint32_t pnn;
+
+ ret2 = ctdb_client_control_multi_error(state->pnn_list,
+ state->count, err_list,
+ &pnn);
+ if (ret2 != 0) {
+ D_ERR("control DB_PUSH_CONFIRM failed for db %s"
+ " on node %u, ret=%d\n",
+ recdb_name(state->recdb), pnn, ret2);
+ } else {
+ D_ERR("control DB_PUSH_CONFIRM failed for db %s,"
+ " ret=%d\n",
+ recdb_name(state->recdb), ret);
+ }
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ for (i=0; i<state->count; i++) {
+ ret = ctdb_reply_control_db_push_confirm(reply[i],
+ &num_records);
+ if (ret != 0) {
+ tevent_req_error(req, EPROTO);
+ return;
+ }
+
+ if (num_records != state->num_records) {
+ D_ERR("Node %u received %d of %d records for %s\n",
+ state->pnn_list[i], num_records,
+ state->num_records, recdb_name(state->recdb));
+ tevent_req_error(req, EPROTO);
+ return;
+ }
+ }
+
+ talloc_free(reply);
+
+ D_INFO("Pushed %d records for db %s\n",
+ state->num_records, recdb_name(state->recdb));
+
+ tevent_req_done(req);
+}
+
+static bool push_database_recv(struct tevent_req *req, int *perr)
+{
+ return generic_recv(req, perr);
+}
+
+/*
+ * Collect databases using highest sequence number
+ */
+
+struct collect_highseqnum_db_state {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct node_list *nlist;
+ uint32_t db_id;
+ struct recdb_context *recdb;
+
+ uint32_t max_pnn;
+};
+
+static void collect_highseqnum_db_seqnum_done(struct tevent_req *subreq);
+static void collect_highseqnum_db_pulldb_done(struct tevent_req *subreq);
+
+static struct tevent_req *collect_highseqnum_db_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct node_list *nlist,
+ uint32_t db_id,
+ struct recdb_context *recdb)
+{
+ struct tevent_req *req, *subreq;
+ struct collect_highseqnum_db_state *state;
+ struct ctdb_req_control request;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct collect_highseqnum_db_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->client = client;
+ state->nlist = nlist;
+ state->db_id = db_id;
+ state->recdb = recdb;
+
+ ctdb_req_control_get_db_seqnum(&request, db_id);
+ subreq = ctdb_client_control_multi_send(mem_ctx,
+ ev,
+ client,
+ nlist->pnn_list,
+ nlist->count,
+ TIMEOUT(),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, collect_highseqnum_db_seqnum_done,
+ req);
+
+ return req;
+}
+
+static void collect_highseqnum_db_seqnum_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct collect_highseqnum_db_state *state = tevent_req_data(
+ req, struct collect_highseqnum_db_state);
+ struct ctdb_reply_control **reply;
+ int *err_list;
+ bool status;
+ unsigned int i;
+ int ret;
+ uint64_t seqnum, max_seqnum;
+
+ status = ctdb_client_control_multi_recv(subreq, &ret, state,
+ &err_list, &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ int ret2;
+ uint32_t pnn;
+
+ ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list,
+ state->nlist->count,
+ err_list,
+ &pnn);
+ if (ret2 != 0) {
+ D_ERR("control GET_DB_SEQNUM failed for db %s"
+ " on node %u, ret=%d\n",
+ recdb_name(state->recdb), pnn, ret2);
+ } else {
+ D_ERR("control GET_DB_SEQNUM failed for db %s,"
+ " ret=%d\n",
+ recdb_name(state->recdb), ret);
+ }
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ max_seqnum = 0;
+ state->max_pnn = state->nlist->pnn_list[0];
+ for (i=0; i<state->nlist->count; i++) {
+ ret = ctdb_reply_control_get_db_seqnum(reply[i], &seqnum);
+ if (ret != 0) {
+ tevent_req_error(req, EPROTO);
+ return;
+ }
+
+ if (max_seqnum < seqnum) {
+ max_seqnum = seqnum;
+ state->max_pnn = state->nlist->pnn_list[i];
+ }
+ }
+
+ talloc_free(reply);
+
+ D_INFO("Pull persistent db %s from node %d with seqnum 0x%"PRIx64"\n",
+ recdb_name(state->recdb), state->max_pnn, max_seqnum);
+
+ subreq = pull_database_send(state,
+ state->ev,
+ state->client,
+ state->max_pnn,
+ state->recdb);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, collect_highseqnum_db_pulldb_done,
+ req);
+}
+
+static void collect_highseqnum_db_pulldb_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct collect_highseqnum_db_state *state = tevent_req_data(
+ req, struct collect_highseqnum_db_state);
+ int ret;
+ bool status;
+
+ status = pull_database_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ node_list_ban_credits(state->nlist, state->max_pnn);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+static bool collect_highseqnum_db_recv(struct tevent_req *req, int *perr)
+{
+ return generic_recv(req, perr);
+}
+
+/*
+ * Collect all databases
+ */
+
+struct collect_all_db_state {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct node_list *nlist;
+ uint32_t db_id;
+ struct recdb_context *recdb;
+
+ struct ctdb_pulldb pulldb;
+ unsigned int index;
+};
+
+static void collect_all_db_pulldb_done(struct tevent_req *subreq);
+
+static struct tevent_req *collect_all_db_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct node_list *nlist,
+ uint32_t db_id,
+ struct recdb_context *recdb)
+{
+ struct tevent_req *req, *subreq;
+ struct collect_all_db_state *state;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct collect_all_db_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->client = client;
+ state->nlist = nlist;
+ state->db_id = db_id;
+ state->recdb = recdb;
+ state->index = 0;
+
+ subreq = pull_database_send(state,
+ ev,
+ client,
+ nlist->pnn_list[state->index],
+ recdb);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, collect_all_db_pulldb_done, req);
+
+ return req;
+}
+
+static void collect_all_db_pulldb_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct collect_all_db_state *state = tevent_req_data(
+ req, struct collect_all_db_state);
+ int ret;
+ bool status;
+
+ status = pull_database_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ node_list_ban_credits(state->nlist,
+ state->nlist->pnn_list[state->index]);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ state->index += 1;
+ if (state->index == state->nlist->count) {
+ tevent_req_done(req);
+ return;
+ }
+
+ subreq = pull_database_send(state,
+ state->ev,
+ state->client,
+ state->nlist->pnn_list[state->index],
+ state->recdb);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, collect_all_db_pulldb_done, req);
+}
+
+static bool collect_all_db_recv(struct tevent_req *req, int *perr)
+{
+ return generic_recv(req, perr);
+}
+
+
+/**
+ * For each database do the following:
+ * - Get DB name from all nodes
+ * - Attach database on missing nodes
+ * - Get DB path
+ * - Freeze database on all nodes
+ * - Start transaction on all nodes
+ * - Collect database from all nodes
+ * - Wipe database on all nodes
+ * - Push database to all nodes
+ * - Commit transaction on all nodes
+ * - Thaw database on all nodes
+ */
+
+struct recover_db_state {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct ctdb_tunable_list *tun_list;
+ struct node_list *nlist;
+ struct db *db;
+
+ uint32_t destnode;
+ struct ctdb_transdb transdb;
+
+ const char *db_name, *db_path;
+ struct recdb_context *recdb;
+};
+
+static void recover_db_name_done(struct tevent_req *subreq);
+static void recover_db_create_missing_done(struct tevent_req *subreq);
+static void recover_db_path_done(struct tevent_req *subreq);
+static void recover_db_freeze_done(struct tevent_req *subreq);
+static void recover_db_transaction_started(struct tevent_req *subreq);
+static void recover_db_collect_done(struct tevent_req *subreq);
+static void recover_db_wipedb_done(struct tevent_req *subreq);
+static void recover_db_pushdb_done(struct tevent_req *subreq);
+static void recover_db_transaction_committed(struct tevent_req *subreq);
+static void recover_db_thaw_done(struct tevent_req *subreq);
+
+static struct tevent_req *recover_db_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct ctdb_tunable_list *tun_list,
+ struct node_list *nlist,
+ uint32_t generation,
+ struct db *db)
+{
+ struct tevent_req *req, *subreq;
+ struct recover_db_state *state;
+ struct ctdb_req_control request;
+
+ req = tevent_req_create(mem_ctx, &state, struct recover_db_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->client = client;
+ state->tun_list = tun_list;
+ state->nlist = nlist;
+ state->db = db;
+
+ state->destnode = ctdb_client_pnn(client);
+ state->transdb.db_id = db->db_id;
+ state->transdb.tid = generation;
+
+ ctdb_req_control_get_dbname(&request, db->db_id);
+ subreq = ctdb_client_control_multi_send(state,
+ ev,
+ client,
+ state->db->pnn_list,
+ state->db->num_nodes,
+ TIMEOUT(),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, recover_db_name_done, req);
+
+ return req;
+}
+
+static void recover_db_name_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct recover_db_state *state = tevent_req_data(
+ req, struct recover_db_state);
+ struct ctdb_reply_control **reply;
+ int *err_list;
+ unsigned int i;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_multi_recv(subreq,
+ &ret,
+ state,
+ &err_list,
+ &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ int ret2;
+ uint32_t pnn;
+
+ ret2 = ctdb_client_control_multi_error(state->db->pnn_list,
+ state->db->num_nodes,
+ err_list,
+ &pnn);
+ if (ret2 != 0) {
+ D_ERR("control GET_DBNAME failed on node %u,"
+ " ret=%d\n",
+ pnn,
+ ret2);
+ } else {
+ D_ERR("control GET_DBNAME failed, ret=%d\n",
+ ret);
+ }
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ for (i = 0; i < state->db->num_nodes; i++) {
+ const char *db_name;
+ uint32_t pnn;
+
+ pnn = state->nlist->pnn_list[i];
+
+ ret = ctdb_reply_control_get_dbname(reply[i],
+ state,
+ &db_name);
+ if (ret != 0) {
+ D_ERR("control GET_DBNAME failed on node %u "
+ "for db=0x%x, ret=%d\n",
+ pnn,
+ state->db->db_id,
+ ret);
+ tevent_req_error(req, EPROTO);
+ return;
+ }
+
+ if (state->db_name == NULL) {
+ state->db_name = db_name;
+ continue;
+ }
+
+ if (strcmp(state->db_name, db_name) != 0) {
+ D_ERR("Incompatible database name for 0x%"PRIx32" "
+ "(%s != %s) on node %"PRIu32"\n",
+ state->db->db_id,
+ db_name,
+ state->db_name,
+ pnn);
+ node_list_ban_credits(state->nlist, pnn);
+ tevent_req_error(req, ret);
+ return;
+ }
+ }
+
+ talloc_free(reply);
+
+ subreq = db_create_missing_send(state,
+ state->ev,
+ state->client,
+ state->nlist,
+ state->db_name,
+ state->db);
+
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, recover_db_create_missing_done, req);
+}
+
+static void recover_db_create_missing_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct recover_db_state *state = tevent_req_data(
+ req, struct recover_db_state);
+ struct ctdb_req_control request;
+ int ret;
+ bool status;
+
+ /* Could sanity check the db_id here */
+ status = db_create_missing_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ctdb_req_control_getdbpath(&request, state->db->db_id);
+ subreq = ctdb_client_control_send(state, state->ev, state->client,
+ state->destnode, TIMEOUT(),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, recover_db_path_done, req);
+}
+
+static void recover_db_path_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct recover_db_state *state = tevent_req_data(
+ req, struct recover_db_state);
+ struct ctdb_reply_control *reply;
+ struct ctdb_req_control request;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ D_ERR("control GETDBPATH failed for db %s, ret=%d\n",
+ state->db_name, ret);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_reply_control_getdbpath(reply, state, &state->db_path);
+ if (ret != 0) {
+ D_ERR("control GETDBPATH failed for db %s, ret=%d\n",
+ state->db_name, ret);
+ tevent_req_error(req, EPROTO);
+ return;
+ }
+
+ talloc_free(reply);
+
+ ctdb_req_control_db_freeze(&request, state->db->db_id);
+ subreq = ctdb_client_control_multi_send(state,
+ state->ev,
+ state->client,
+ state->nlist->pnn_list,
+ state->nlist->count,
+ TIMEOUT(),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, recover_db_freeze_done, req);
+}
+
+static void recover_db_freeze_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct recover_db_state *state = tevent_req_data(
+ req, struct recover_db_state);
+ struct ctdb_req_control request;
+ int *err_list;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_multi_recv(subreq, &ret, NULL, &err_list,
+ NULL);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ int ret2;
+ uint32_t pnn;
+
+ ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list,
+ state->nlist->count,
+ err_list,
+ &pnn);
+ if (ret2 != 0) {
+ D_ERR("control FREEZE_DB failed for db %s"
+ " on node %u, ret=%d\n",
+ state->db_name, pnn, ret2);
+
+ node_list_ban_credits(state->nlist, pnn);
+ } else {
+ D_ERR("control FREEZE_DB failed for db %s, ret=%d\n",
+ state->db_name, ret);
+ }
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ctdb_req_control_db_transaction_start(&request, &state->transdb);
+ subreq = ctdb_client_control_multi_send(state,
+ state->ev,
+ state->client,
+ state->nlist->pnn_list,
+ state->nlist->count,
+ TIMEOUT(),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, recover_db_transaction_started, req);
+}
+
+static void recover_db_transaction_started(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct recover_db_state *state = tevent_req_data(
+ req, struct recover_db_state);
+ int *err_list;
+ uint32_t flags;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_multi_recv(subreq, &ret, NULL, &err_list,
+ NULL);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ int ret2;
+ uint32_t pnn;
+
+ ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list,
+ state->nlist->count,
+ err_list,
+ &pnn);
+ if (ret2 != 0) {
+ D_ERR("control TRANSACTION_DB failed for db=%s"
+ " on node %u, ret=%d\n",
+ state->db_name, pnn, ret2);
+ } else {
+ D_ERR("control TRANSACTION_DB failed for db=%s,"
+ " ret=%d\n", state->db_name, ret);
+ }
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ flags = state->db->db_flags;
+ state->recdb = recdb_create(state,
+ state->db->db_id,
+ state->db_name,
+ state->db_path,
+ state->tun_list->database_hash_size,
+ flags & CTDB_DB_FLAGS_PERSISTENT);
+ if (tevent_req_nomem(state->recdb, req)) {
+ return;
+ }
+
+ if ((flags & CTDB_DB_FLAGS_PERSISTENT) ||
+ (flags & CTDB_DB_FLAGS_REPLICATED)) {
+ subreq = collect_highseqnum_db_send(state,
+ state->ev,
+ state->client,
+ state->nlist,
+ state->db->db_id,
+ state->recdb);
+ } else {
+ subreq = collect_all_db_send(state,
+ state->ev,
+ state->client,
+ state->nlist,
+ state->db->db_id,
+ state->recdb);
+ }
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, recover_db_collect_done, req);
+}
+
+static void recover_db_collect_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct recover_db_state *state = tevent_req_data(
+ req, struct recover_db_state);
+ struct ctdb_req_control request;
+ int ret;
+ bool status;
+
+ if ((state->db->db_flags & CTDB_DB_FLAGS_PERSISTENT) ||
+ (state->db->db_flags & CTDB_DB_FLAGS_REPLICATED)) {
+ status = collect_highseqnum_db_recv(subreq, &ret);
+ } else {
+ status = collect_all_db_recv(subreq, &ret);
+ }
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ctdb_req_control_wipe_database(&request, &state->transdb);
+ subreq = ctdb_client_control_multi_send(state,
+ state->ev,
+ state->client,
+ state->nlist->pnn_list,
+ state->nlist->count,
+ TIMEOUT(),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, recover_db_wipedb_done, req);
+}
+
+static void recover_db_wipedb_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct recover_db_state *state = tevent_req_data(
+ req, struct recover_db_state);
+ int *err_list;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_multi_recv(subreq, &ret, NULL, &err_list,
+ NULL);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ int ret2;
+ uint32_t pnn;
+
+ ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list,
+ state->nlist->count,
+ err_list,
+ &pnn);
+ if (ret2 != 0) {
+ D_ERR("control WIPEDB failed for db %s on node %u,"
+ " ret=%d\n", state->db_name, pnn, ret2);
+ } else {
+ D_ERR("control WIPEDB failed for db %s, ret=%d\n",
+ state->db_name, ret);
+ }
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ subreq = push_database_send(state,
+ state->ev,
+ state->client,
+ state->nlist->pnn_list,
+ state->nlist->count,
+ state->recdb,
+ state->tun_list->rec_buffer_size_limit);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, recover_db_pushdb_done, req);
+}
+
+static void recover_db_pushdb_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct recover_db_state *state = tevent_req_data(
+ req, struct recover_db_state);
+ struct ctdb_req_control request;
+ int ret;
+ bool status;
+
+ status = push_database_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ TALLOC_FREE(state->recdb);
+
+ ctdb_req_control_db_transaction_commit(&request, &state->transdb);
+ subreq = ctdb_client_control_multi_send(state,
+ state->ev,
+ state->client,
+ state->nlist->pnn_list,
+ state->nlist->count,
+ TIMEOUT(),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, recover_db_transaction_committed, req);
+}
+
+static void recover_db_transaction_committed(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct recover_db_state *state = tevent_req_data(
+ req, struct recover_db_state);
+ struct ctdb_req_control request;
+ int *err_list;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_multi_recv(subreq, &ret, NULL, &err_list,
+ NULL);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ int ret2;
+ uint32_t pnn;
+
+ ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list,
+ state->nlist->count,
+ err_list,
+ &pnn);
+ if (ret2 != 0) {
+ D_ERR("control DB_TRANSACTION_COMMIT failed for db %s"
+ " on node %u, ret=%d\n",
+ state->db_name, pnn, ret2);
+ } else {
+ D_ERR("control DB_TRANSACTION_COMMIT failed for db %s,"
+ " ret=%d\n", state->db_name, ret);
+ }
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ctdb_req_control_db_thaw(&request, state->db->db_id);
+ subreq = ctdb_client_control_multi_send(state,
+ state->ev,
+ state->client,
+ state->nlist->pnn_list,
+ state->nlist->count,
+ TIMEOUT(),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, recover_db_thaw_done, req);
+}
+
+static void recover_db_thaw_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct recover_db_state *state = tevent_req_data(
+ req, struct recover_db_state);
+ int *err_list;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_multi_recv(subreq, &ret, NULL, &err_list,
+ NULL);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ int ret2;
+ uint32_t pnn;
+
+ ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list,
+ state->nlist->count,
+ err_list,
+ &pnn);
+ if (ret2 != 0) {
+ D_ERR("control DB_THAW failed for db %s on node %u,"
+ " ret=%d\n", state->db_name, pnn, ret2);
+ } else {
+ D_ERR("control DB_THAW failed for db %s, ret=%d\n",
+ state->db_name, ret);
+ }
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+static bool recover_db_recv(struct tevent_req *req)
+{
+ return generic_recv(req, NULL);
+}
+
+
+/*
+ * Start database recovery for each database
+ *
+ * Try to recover each database 5 times before failing recovery.
+ */
+
+struct db_recovery_state {
+ struct tevent_context *ev;
+ struct db_list *dblist;
+ unsigned int num_replies;
+ unsigned int num_failed;
+};
+
+struct db_recovery_one_state {
+ struct tevent_req *req;
+ struct ctdb_client_context *client;
+ struct db_list *dblist;
+ struct ctdb_tunable_list *tun_list;
+ struct node_list *nlist;
+ uint32_t generation;
+ struct db *db;
+ int num_fails;
+};
+
+static void db_recovery_one_done(struct tevent_req *subreq);
+
+static struct tevent_req *db_recovery_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct db_list *dblist,
+ struct ctdb_tunable_list *tun_list,
+ struct node_list *nlist,
+ uint32_t generation)
+{
+ struct tevent_req *req, *subreq;
+ struct db_recovery_state *state;
+ struct db *db;
+
+ req = tevent_req_create(mem_ctx, &state, struct db_recovery_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->dblist = dblist;
+ state->num_replies = 0;
+ state->num_failed = 0;
+
+ if (dblist->num_dbs == 0) {
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+ }
+
+ for (db = dblist->db; db != NULL; db = db->next) {
+ struct db_recovery_one_state *substate;
+
+ substate = talloc_zero(state, struct db_recovery_one_state);
+ if (tevent_req_nomem(substate, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ substate->req = req;
+ substate->client = client;
+ substate->dblist = dblist;
+ substate->tun_list = tun_list;
+ substate->nlist = nlist;
+ substate->generation = generation;
+ substate->db = db;
+
+ subreq = recover_db_send(state,
+ ev,
+ client,
+ tun_list,
+ nlist,
+ generation,
+ substate->db);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, db_recovery_one_done,
+ substate);
+ D_NOTICE("recover database 0x%08x\n", substate->db->db_id);
+ }
+
+ return req;
+}
+
+static void db_recovery_one_done(struct tevent_req *subreq)
+{
+ struct db_recovery_one_state *substate = tevent_req_callback_data(
+ subreq, struct db_recovery_one_state);
+ struct tevent_req *req = substate->req;
+ struct db_recovery_state *state = tevent_req_data(
+ req, struct db_recovery_state);
+ bool status;
+
+ status = recover_db_recv(subreq);
+ TALLOC_FREE(subreq);
+
+ if (status) {
+ talloc_free(substate);
+ goto done;
+ }
+
+ substate->num_fails += 1;
+ if (substate->num_fails < NUM_RETRIES) {
+ subreq = recover_db_send(state,
+ state->ev,
+ substate->client,
+ substate->tun_list,
+ substate->nlist,
+ substate->generation,
+ substate->db);
+ if (tevent_req_nomem(subreq, req)) {
+ goto failed;
+ }
+ tevent_req_set_callback(subreq, db_recovery_one_done, substate);
+ D_NOTICE("recover database 0x%08x, attempt %d\n",
+ substate->db->db_id, substate->num_fails+1);
+ return;
+ }
+
+failed:
+ state->num_failed += 1;
+
+done:
+ state->num_replies += 1;
+
+ if (state->num_replies == state->dblist->num_dbs) {
+ tevent_req_done(req);
+ }
+}
+
+static bool db_recovery_recv(struct tevent_req *req, unsigned int *count)
+{
+ struct db_recovery_state *state = tevent_req_data(
+ req, struct db_recovery_state);
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ *count = 0;
+ return false;
+ }
+
+ *count = state->num_replies - state->num_failed;
+
+ if (state->num_failed > 0) {
+ return false;
+ }
+
+ return true;
+}
+
+struct ban_node_state {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct ctdb_tunable_list *tun_list;
+ struct node_list *nlist;
+ uint32_t destnode;
+
+ uint32_t max_pnn;
+};
+
+static bool ban_node_check(struct tevent_req *req);
+static void ban_node_check_done(struct tevent_req *subreq);
+static void ban_node_done(struct tevent_req *subreq);
+
+static struct tevent_req *ban_node_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct ctdb_tunable_list *tun_list,
+ struct node_list *nlist)
+{
+ struct tevent_req *req;
+ struct ban_node_state *state;
+ bool ok;
+
+ req = tevent_req_create(mem_ctx, &state, struct ban_node_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->client = client;
+ state->tun_list = tun_list;
+ state->nlist = nlist;
+ state->destnode = ctdb_client_pnn(client);
+
+ /* Bans are not enabled */
+ if (state->tun_list->enable_bans == 0) {
+ D_ERR("Bans are not enabled\n");
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+ }
+
+ ok = ban_node_check(req);
+ if (!ok) {
+ return tevent_req_post(req, ev);
+ }
+
+ return req;
+}
+
+static bool ban_node_check(struct tevent_req *req)
+{
+ struct tevent_req *subreq;
+ struct ban_node_state *state = tevent_req_data(
+ req, struct ban_node_state);
+ struct ctdb_req_control request;
+ unsigned max_credits = 0, i;
+
+ for (i=0; i<state->nlist->count; i++) {
+ if (state->nlist->ban_credits[i] > max_credits) {
+ state->max_pnn = state->nlist->pnn_list[i];
+ max_credits = state->nlist->ban_credits[i];
+ }
+ }
+
+ if (max_credits < NUM_RETRIES) {
+ tevent_req_done(req);
+ return false;
+ }
+
+ ctdb_req_control_get_nodemap(&request);
+ subreq = ctdb_client_control_send(state,
+ state->ev,
+ state->client,
+ state->max_pnn,
+ TIMEOUT(),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return false;
+ }
+ tevent_req_set_callback(subreq, ban_node_check_done, req);
+
+ return true;
+}
+
+static void ban_node_check_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ban_node_state *state = tevent_req_data(
+ req, struct ban_node_state);
+ struct ctdb_reply_control *reply;
+ struct ctdb_node_map *nodemap;
+ struct ctdb_req_control request;
+ struct ctdb_ban_state ban;
+ unsigned int i;
+ int ret;
+ bool ok;
+
+ ok = ctdb_client_control_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+ if (!ok) {
+ D_ERR("control GET_NODEMAP failed to node %u, ret=%d\n",
+ state->max_pnn, ret);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_reply_control_get_nodemap(reply, state, &nodemap);
+ if (ret != 0) {
+ D_ERR("control GET_NODEMAP failed, ret=%d\n", ret);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ for (i=0; i<nodemap->num; i++) {
+ if (nodemap->node[i].pnn != state->max_pnn) {
+ continue;
+ }
+
+ /* If the node became inactive, reset ban_credits */
+ if (nodemap->node[i].flags & NODE_FLAGS_INACTIVE) {
+ unsigned int j;
+
+ for (j=0; j<state->nlist->count; j++) {
+ if (state->nlist->pnn_list[j] ==
+ state->max_pnn) {
+ state->nlist->ban_credits[j] = 0;
+ break;
+ }
+ }
+ state->max_pnn = CTDB_UNKNOWN_PNN;
+ }
+ }
+
+ talloc_free(nodemap);
+ talloc_free(reply);
+
+ /* If node becomes inactive during recovery, pick next */
+ if (state->max_pnn == CTDB_UNKNOWN_PNN) {
+ (void) ban_node_check(req);
+ return;
+ }
+
+ ban = (struct ctdb_ban_state) {
+ .pnn = state->max_pnn,
+ .time = state->tun_list->recovery_ban_period,
+ };
+
+ D_ERR("Banning node %u for %u seconds\n", ban.pnn, ban.time);
+
+ ctdb_req_control_set_ban_state(&request, &ban);
+ subreq = ctdb_client_control_send(state,
+ state->ev,
+ state->client,
+ ban.pnn,
+ TIMEOUT(),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, ban_node_done, req);
+}
+
+static void ban_node_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ban_node_state *state = tevent_req_data(
+ req, struct ban_node_state);
+ struct ctdb_reply_control *reply;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_reply_control_set_ban_state(reply);
+ if (ret != 0) {
+ D_ERR("control SET_BAN_STATE failed, ret=%d\n", ret);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ talloc_free(reply);
+ tevent_req_done(req);
+}
+
+static bool ban_node_recv(struct tevent_req *req, int *perr)
+{
+ if (tevent_req_is_unix_error(req, perr)) {
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Run the parallel database recovery
+ *
+ * - Get tunables
+ * - Get nodemap from all nodes
+ * - Get capabilities from all nodes
+ * - Get dbmap
+ * - Set RECOVERY_ACTIVE
+ * - Send START_RECOVERY
+ * - Update vnnmap on all nodes
+ * - Run database recovery
+ * - Set RECOVERY_NORMAL
+ * - Send END_RECOVERY
+ */
+
+struct recovery_state {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ uint32_t generation;
+ uint32_t destnode;
+ struct node_list *nlist;
+ struct ctdb_tunable_list *tun_list;
+ struct ctdb_vnn_map *vnnmap;
+ struct db_list *dblist;
+};
+
+static void recovery_tunables_done(struct tevent_req *subreq);
+static void recovery_nodemap_done(struct tevent_req *subreq);
+static void recovery_nodemap_verify(struct tevent_req *subreq);
+static void recovery_capabilities_done(struct tevent_req *subreq);
+static void recovery_dbmap_done(struct tevent_req *subreq);
+static void recovery_active_done(struct tevent_req *subreq);
+static void recovery_start_recovery_done(struct tevent_req *subreq);
+static void recovery_vnnmap_update_done(struct tevent_req *subreq);
+static void recovery_db_recovery_done(struct tevent_req *subreq);
+static void recovery_failed_done(struct tevent_req *subreq);
+static void recovery_normal_done(struct tevent_req *subreq);
+static void recovery_end_recovery_done(struct tevent_req *subreq);
+
+static struct tevent_req *recovery_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t generation)
+{
+ struct tevent_req *req, *subreq;
+ struct recovery_state *state;
+ struct ctdb_req_control request;
+
+ req = tevent_req_create(mem_ctx, &state, struct recovery_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->client = client;
+ state->generation = generation;
+ state->destnode = ctdb_client_pnn(client);
+
+ ctdb_req_control_get_all_tunables(&request);
+ subreq = ctdb_client_control_send(state, state->ev, state->client,
+ state->destnode, TIMEOUT(),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, recovery_tunables_done, req);
+
+ return req;
+}
+
+static void recovery_tunables_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct recovery_state *state = tevent_req_data(
+ req, struct recovery_state);
+ struct ctdb_reply_control *reply;
+ struct ctdb_req_control request;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ D_ERR("control GET_ALL_TUNABLES failed, ret=%d\n", ret);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_reply_control_get_all_tunables(reply, state,
+ &state->tun_list);
+ if (ret != 0) {
+ D_ERR("control GET_ALL_TUNABLES failed, ret=%d\n", ret);
+ tevent_req_error(req, EPROTO);
+ return;
+ }
+
+ talloc_free(reply);
+
+ recover_timeout = state->tun_list->recover_timeout;
+
+ ctdb_req_control_get_nodemap(&request);
+ subreq = ctdb_client_control_send(state, state->ev, state->client,
+ state->destnode, TIMEOUT(),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, recovery_nodemap_done, req);
+}
+
+static void recovery_nodemap_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct recovery_state *state = tevent_req_data(
+ req, struct recovery_state);
+ struct ctdb_reply_control *reply;
+ struct ctdb_req_control request;
+ struct ctdb_node_map *nodemap;
+ unsigned int i;
+ bool status;
+ int ret;
+
+ status = ctdb_client_control_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ D_ERR("control GET_NODEMAP failed to node %u, ret=%d\n",
+ state->destnode, ret);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_reply_control_get_nodemap(reply, state, &nodemap);
+ if (ret != 0) {
+ D_ERR("control GET_NODEMAP failed, ret=%d\n", ret);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ state->nlist = node_list_init(state, nodemap->num);
+ if (tevent_req_nomem(state->nlist, req)) {
+ return;
+ }
+
+ for (i=0; i<nodemap->num; i++) {
+ bool ok;
+
+ if (nodemap->node[i].flags & NODE_FLAGS_DISCONNECTED) {
+ continue;
+ }
+
+ ok = node_list_add(state->nlist, nodemap->node[i].pnn);
+ if (!ok) {
+ tevent_req_error(req, EINVAL);
+ return;
+ }
+ }
+
+ talloc_free(nodemap);
+ talloc_free(reply);
+
+ /* Verify flags by getting local node information from each node */
+ ctdb_req_control_get_nodemap(&request);
+ subreq = ctdb_client_control_multi_send(state,
+ state->ev,
+ state->client,
+ state->nlist->pnn_list,
+ state->nlist->count,
+ TIMEOUT(),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, recovery_nodemap_verify, req);
+}
+
+static void recovery_nodemap_verify(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct recovery_state *state = tevent_req_data(
+ req, struct recovery_state);
+ struct ctdb_req_control request;
+ struct ctdb_reply_control **reply;
+ struct node_list *nlist;
+ unsigned int i;
+ int *err_list;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_multi_recv(subreq,
+ &ret,
+ state,
+ &err_list,
+ &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ int ret2;
+ uint32_t pnn;
+
+ ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list,
+ state->nlist->count,
+ err_list,
+ &pnn);
+ if (ret2 != 0) {
+ D_ERR("control GET_NODEMAP failed on node %u,"
+ " ret=%d\n", pnn, ret2);
+ } else {
+ D_ERR("control GET_NODEMAP failed, ret=%d\n", ret);
+ }
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ nlist = node_list_init(state, state->nlist->size);
+ if (tevent_req_nomem(nlist, req)) {
+ return;
+ }
+
+ for (i=0; i<state->nlist->count; i++) {
+ struct ctdb_node_map *nodemap = NULL;
+ uint32_t pnn, flags;
+ unsigned int j;
+ bool ok;
+
+ pnn = state->nlist->pnn_list[i];
+ ret = ctdb_reply_control_get_nodemap(reply[i],
+ state,
+ &nodemap);
+ if (ret != 0) {
+ D_ERR("control GET_NODEMAP failed on node %u\n", pnn);
+ tevent_req_error(req, EPROTO);
+ return;
+ }
+
+ flags = NODE_FLAGS_DISCONNECTED;
+ for (j=0; j<nodemap->num; j++) {
+ if (nodemap->node[j].pnn == pnn) {
+ flags = nodemap->node[j].flags;
+ break;
+ }
+ }
+
+ TALLOC_FREE(nodemap);
+
+ if (flags & NODE_FLAGS_INACTIVE) {
+ continue;
+ }
+
+ ok = node_list_add(nlist, pnn);
+ if (!ok) {
+ tevent_req_error(req, EINVAL);
+ return;
+ }
+ }
+
+ talloc_free(reply);
+
+ talloc_free(state->nlist);
+ state->nlist = nlist;
+
+ ctdb_req_control_get_capabilities(&request);
+ subreq = ctdb_client_control_multi_send(state,
+ state->ev,
+ state->client,
+ state->nlist->pnn_list,
+ state->nlist->count,
+ TIMEOUT(),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, recovery_capabilities_done, req);
+}
+
+static void recovery_capabilities_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct recovery_state *state = tevent_req_data(
+ req, struct recovery_state);
+ struct ctdb_reply_control **reply;
+ struct ctdb_req_control request;
+ int *err_list;
+ unsigned int i;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_multi_recv(subreq, &ret, state, &err_list,
+ &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ int ret2;
+ uint32_t pnn;
+
+ ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list,
+ state->nlist->count,
+ err_list,
+ &pnn);
+ if (ret2 != 0) {
+ D_ERR("control GET_CAPABILITIES failed on node %u,"
+ " ret=%d\n", pnn, ret2);
+ } else {
+ D_ERR("control GET_CAPABILITIES failed, ret=%d\n",
+ ret);
+ }
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ for (i=0; i<state->nlist->count; i++) {
+ uint32_t caps;
+
+ ret = ctdb_reply_control_get_capabilities(reply[i], &caps);
+ if (ret != 0) {
+ D_ERR("control GET_CAPABILITIES failed on node %u\n",
+ state->nlist->pnn_list[i]);
+ tevent_req_error(req, EPROTO);
+ return;
+ }
+
+ state->nlist->caps[i] = caps;
+ }
+
+ talloc_free(reply);
+
+ ctdb_req_control_get_dbmap(&request);
+ subreq = ctdb_client_control_multi_send(state,
+ state->ev,
+ state->client,
+ state->nlist->pnn_list,
+ state->nlist->count,
+ TIMEOUT(),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, recovery_dbmap_done, req);
+}
+
+static void recovery_dbmap_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct recovery_state *state = tevent_req_data(
+ req, struct recovery_state);
+ struct ctdb_reply_control **reply;
+ struct ctdb_req_control request;
+ int *err_list;
+ unsigned int i, j;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_multi_recv(subreq,
+ &ret,
+ state,
+ &err_list,
+ &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ int ret2;
+ uint32_t pnn;
+
+ ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list,
+ state->nlist->count,
+ err_list,
+ &pnn);
+ if (ret2 != 0) {
+ D_ERR("control GET_DBMAP failed on node %u,"
+ " ret=%d\n", pnn, ret2);
+ } else {
+ D_ERR("control GET_DBMAP failed, ret=%d\n",
+ ret);
+ }
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ state->dblist = db_list_init(state, state->nlist->count);
+ if (tevent_req_nomem(state->dblist, req)) {
+ D_ERR("memory allocation error\n");
+ return;
+ }
+
+ for (i = 0; i < state->nlist->count; i++) {
+ struct ctdb_dbid_map *dbmap = NULL;
+ uint32_t pnn;
+
+ pnn = state->nlist->pnn_list[i];
+
+ ret = ctdb_reply_control_get_dbmap(reply[i], state, &dbmap);
+ if (ret != 0) {
+ D_ERR("control GET_DBMAP failed on node %u\n",
+ pnn);
+ tevent_req_error(req, EPROTO);
+ return;
+ }
+
+ for (j = 0; j < dbmap->num; j++) {
+ ret = db_list_check_and_add(state->dblist,
+ dbmap->dbs[j].db_id,
+ dbmap->dbs[j].flags,
+ pnn);
+ if (ret != 0) {
+ D_ERR("failed to add database list entry, "
+ "ret=%d\n",
+ ret);
+ tevent_req_error(req, ret);
+ return;
+ }
+ }
+
+ TALLOC_FREE(dbmap);
+ }
+
+ ctdb_req_control_set_recmode(&request, CTDB_RECOVERY_ACTIVE);
+ subreq = ctdb_client_control_multi_send(state,
+ state->ev,
+ state->client,
+ state->nlist->pnn_list,
+ state->nlist->count,
+ TIMEOUT(),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, recovery_active_done, req);
+}
+
+static void recovery_active_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct recovery_state *state = tevent_req_data(
+ req, struct recovery_state);
+ struct ctdb_req_control request;
+ struct ctdb_vnn_map *vnnmap;
+ int *err_list;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_multi_recv(subreq, &ret, NULL, &err_list,
+ NULL);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ int ret2;
+ uint32_t pnn;
+
+ ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list,
+ state->nlist->count,
+ err_list,
+ &pnn);
+ if (ret2 != 0) {
+ D_ERR("failed to set recovery mode ACTIVE on node %u,"
+ " ret=%d\n", pnn, ret2);
+ } else {
+ D_ERR("failed to set recovery mode ACTIVE, ret=%d\n",
+ ret);
+ }
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ D_ERR("Set recovery mode to ACTIVE\n");
+
+ /* Calculate new VNNMAP */
+ vnnmap = talloc_zero(state, struct ctdb_vnn_map);
+ if (tevent_req_nomem(vnnmap, req)) {
+ return;
+ }
+
+ vnnmap->map = node_list_lmaster(state->nlist, vnnmap, &vnnmap->size);
+ if (tevent_req_nomem(vnnmap->map, req)) {
+ return;
+ }
+
+ if (vnnmap->size == 0) {
+ D_WARNING("No active lmasters found. Adding recmaster anyway\n");
+ vnnmap->map[0] = state->destnode;
+ vnnmap->size = 1;
+ }
+
+ vnnmap->generation = state->generation;
+
+ state->vnnmap = vnnmap;
+
+ ctdb_req_control_start_recovery(&request);
+ subreq = ctdb_client_control_multi_send(state,
+ state->ev,
+ state->client,
+ state->nlist->pnn_list,
+ state->nlist->count,
+ TIMEOUT(),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, recovery_start_recovery_done, req);
+}
+
+static void recovery_start_recovery_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct recovery_state *state = tevent_req_data(
+ req, struct recovery_state);
+ struct ctdb_req_control request;
+ int *err_list;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_multi_recv(subreq, &ret, NULL, &err_list,
+ NULL);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ int ret2;
+ uint32_t pnn;
+
+ ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list,
+ state->nlist->count,
+ err_list,
+ &pnn);
+ if (ret2 != 0) {
+ D_ERR("failed to run start_recovery event on node %u,"
+ " ret=%d\n", pnn, ret2);
+ } else {
+ D_ERR("failed to run start_recovery event, ret=%d\n",
+ ret);
+ }
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ D_ERR("start_recovery event finished\n");
+
+ ctdb_req_control_setvnnmap(&request, state->vnnmap);
+ subreq = ctdb_client_control_multi_send(state,
+ state->ev,
+ state->client,
+ state->nlist->pnn_list,
+ state->nlist->count,
+ TIMEOUT(),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, recovery_vnnmap_update_done, req);
+}
+
+static void recovery_vnnmap_update_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct recovery_state *state = tevent_req_data(
+ req, struct recovery_state);
+ int *err_list;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_multi_recv(subreq, &ret, NULL, &err_list,
+ NULL);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ int ret2;
+ uint32_t pnn;
+
+ ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list,
+ state->nlist->count,
+ err_list,
+ &pnn);
+ if (ret2 != 0) {
+ D_ERR("failed to update VNNMAP on node %u, ret=%d\n",
+ pnn, ret2);
+ } else {
+ D_ERR("failed to update VNNMAP, ret=%d\n", ret);
+ }
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ D_NOTICE("updated VNNMAP\n");
+
+ subreq = db_recovery_send(state,
+ state->ev,
+ state->client,
+ state->dblist,
+ state->tun_list,
+ state->nlist,
+ state->vnnmap->generation);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, recovery_db_recovery_done, req);
+}
+
+static void recovery_db_recovery_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct recovery_state *state = tevent_req_data(
+ req, struct recovery_state);
+ struct ctdb_req_control request;
+ bool status;
+ unsigned int count;
+
+ status = db_recovery_recv(subreq, &count);
+ TALLOC_FREE(subreq);
+
+ D_ERR("%d of %d databases recovered\n", count, state->dblist->num_dbs);
+
+ if (! status) {
+ subreq = ban_node_send(state,
+ state->ev,
+ state->client,
+ state->tun_list,
+ state->nlist);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, recovery_failed_done, req);
+ return;
+ }
+
+ ctdb_req_control_set_recmode(&request, CTDB_RECOVERY_NORMAL);
+ subreq = ctdb_client_control_multi_send(state,
+ state->ev,
+ state->client,
+ state->nlist->pnn_list,
+ state->nlist->count,
+ TIMEOUT(),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, recovery_normal_done, req);
+}
+
+static void recovery_failed_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ int ret;
+ bool status;
+
+ status = ban_node_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ D_ERR("failed to ban node, ret=%d\n", ret);
+ }
+
+ tevent_req_error(req, EIO);
+}
+
+static void recovery_normal_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct recovery_state *state = tevent_req_data(
+ req, struct recovery_state);
+ struct ctdb_req_control request;
+ int *err_list;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_multi_recv(subreq, &ret, state, &err_list,
+ NULL);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ int ret2;
+ uint32_t pnn;
+
+ ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list,
+ state->nlist->count,
+ err_list,
+ &pnn);
+ if (ret2 != 0) {
+ D_ERR("failed to set recovery mode NORMAL on node %u,"
+ " ret=%d\n", pnn, ret2);
+ } else {
+ D_ERR("failed to set recovery mode NORMAL, ret=%d\n",
+ ret);
+ }
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ D_ERR("Set recovery mode to NORMAL\n");
+
+ ctdb_req_control_end_recovery(&request);
+ subreq = ctdb_client_control_multi_send(state,
+ state->ev,
+ state->client,
+ state->nlist->pnn_list,
+ state->nlist->count,
+ TIMEOUT(),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, recovery_end_recovery_done, req);
+}
+
+static void recovery_end_recovery_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct recovery_state *state = tevent_req_data(
+ req, struct recovery_state);
+ int *err_list;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_multi_recv(subreq, &ret, state, &err_list,
+ NULL);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ int ret2;
+ uint32_t pnn;
+
+ ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list,
+ state->nlist->count,
+ err_list,
+ &pnn);
+ if (ret2 != 0) {
+ D_ERR("failed to run recovered event on node %u,"
+ " ret=%d\n", pnn, ret2);
+ } else {
+ D_ERR("failed to run recovered event, ret=%d\n", ret);
+ }
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ D_ERR("recovered event finished\n");
+
+ tevent_req_done(req);
+}
+
+static void recovery_recv(struct tevent_req *req, int *perr)
+{
+ generic_recv(req, perr);
+}
+
+static void usage(const char *progname)
+{
+ fprintf(stderr, "\nUsage: %s <output-fd> <ctdb-socket-path> <generation>\n",
+ progname);
+}
+
+
+/*
+ * Arguments - log fd, write fd, socket path, generation
+ */
+int main(int argc, char *argv[])
+{
+ int write_fd;
+ const char *sockpath;
+ TALLOC_CTX *mem_ctx = NULL;
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ bool status;
+ int ret = 0;
+ struct tevent_req *req;
+ uint32_t generation;
+
+ if (argc != 4) {
+ usage(argv[0]);
+ exit(1);
+ }
+
+ write_fd = atoi(argv[1]);
+ sockpath = argv[2];
+ generation = (uint32_t)smb_strtoul(argv[3],
+ NULL,
+ 0,
+ &ret,
+ SMB_STR_STANDARD);
+ if (ret != 0) {
+ fprintf(stderr, "recovery: unable to initialize generation\n");
+ goto failed;
+ }
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ fprintf(stderr, "recovery: talloc_new() failed\n");
+ goto failed;
+ }
+
+ ret = logging_init(mem_ctx, NULL, NULL, "ctdb-recovery");
+ if (ret != 0) {
+ fprintf(stderr, "recovery: Unable to initialize logging\n");
+ goto failed;
+ }
+
+ ev = tevent_context_init(mem_ctx);
+ if (ev == NULL) {
+ D_ERR("tevent_context_init() failed\n");
+ goto failed;
+ }
+
+ status = logging_setup_sighup_handler(ev, mem_ctx, NULL, NULL);
+ if (!status) {
+ D_ERR("logging_setup_sighup_handler() failed\n");
+ goto failed;
+ }
+
+ ret = ctdb_client_init(mem_ctx, ev, sockpath, &client);
+ if (ret != 0) {
+ D_ERR("ctdb_client_init() failed, ret=%d\n", ret);
+ goto failed;
+ }
+
+ req = recovery_send(mem_ctx, ev, client, generation);
+ if (req == NULL) {
+ D_ERR("database_recover_send() failed\n");
+ goto failed;
+ }
+
+ if (! tevent_req_poll(req, ev)) {
+ D_ERR("tevent_req_poll() failed\n");
+ goto failed;
+ }
+
+ recovery_recv(req, &ret);
+ TALLOC_FREE(req);
+ if (ret != 0) {
+ D_ERR("database recovery failed, ret=%d\n", ret);
+ goto failed;
+ }
+
+ sys_write(write_fd, &ret, sizeof(ret));
+ return 0;
+
+failed:
+ TALLOC_FREE(mem_ctx);
+ return 1;
+}
diff --git a/ctdb/server/ctdb_server.c b/ctdb/server/ctdb_server.c
new file mode 100644
index 0000000..b602cee
--- /dev/null
+++ b/ctdb/server/ctdb_server.c
@@ -0,0 +1,608 @@
+/*
+ ctdb main protocol code
+
+ Copyright (C) Andrew Tridgell 2006
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/dlinklist.h"
+#include "lib/util/debug.h"
+#include "lib/util/samba_util.h"
+
+#include "ctdb_private.h"
+#include "ctdb_client.h"
+
+#include "common/common.h"
+#include "common/logging.h"
+
+/*
+ choose the transport we will use
+*/
+int ctdb_set_transport(struct ctdb_context *ctdb, const char *transport)
+{
+ ctdb->transport = talloc_strdup(ctdb, transport);
+ CTDB_NO_MEMORY(ctdb, ctdb->transport);
+
+ return 0;
+}
+
+/* Return the node structure for nodeip, NULL if nodeip is invalid */
+struct ctdb_node *ctdb_ip_to_node(struct ctdb_context *ctdb,
+ const ctdb_sock_addr *nodeip)
+{
+ unsigned int nodeid;
+
+ for (nodeid=0;nodeid<ctdb->num_nodes;nodeid++) {
+ if (ctdb->nodes[nodeid]->flags & NODE_FLAGS_DELETED) {
+ continue;
+ }
+ if (ctdb_same_ip(&ctdb->nodes[nodeid]->address, nodeip)) {
+ return ctdb->nodes[nodeid];
+ }
+ }
+
+ return NULL;
+}
+
+/* Return the PNN for nodeip, CTDB_UNKNOWN_PNN if nodeip is invalid */
+uint32_t ctdb_ip_to_pnn(struct ctdb_context *ctdb,
+ const ctdb_sock_addr *nodeip)
+{
+ struct ctdb_node *node;
+
+ node = ctdb_ip_to_node(ctdb, nodeip);
+ if (node == NULL) {
+ return CTDB_UNKNOWN_PNN;
+ }
+
+ return node->pnn;
+}
+
+/* Load a nodes list file into a nodes array */
+static int convert_node_map_to_list(struct ctdb_context *ctdb,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_node_map_old *node_map,
+ struct ctdb_node ***nodes,
+ uint32_t *num_nodes)
+{
+ unsigned int i;
+
+ *nodes = talloc_zero_array(mem_ctx,
+ struct ctdb_node *, node_map->num);
+ CTDB_NO_MEMORY(ctdb, *nodes);
+ *num_nodes = node_map->num;
+
+ for (i = 0; i < node_map->num; i++) {
+ struct ctdb_node *node;
+
+ node = talloc_zero(*nodes, struct ctdb_node);
+ CTDB_NO_MEMORY(ctdb, node);
+ (*nodes)[i] = node;
+
+ node->address = node_map->nodes[i].addr;
+ node->name = talloc_asprintf(node, "%s:%u",
+ ctdb_addr_to_str(&node->address),
+ ctdb_addr_to_port(&node->address));
+
+ node->flags = node_map->nodes[i].flags;
+ if (!(node->flags & NODE_FLAGS_DELETED)) {
+ node->flags = NODE_FLAGS_UNHEALTHY;
+ }
+ node->flags |= NODE_FLAGS_DISCONNECTED;
+
+ node->pnn = i;
+ node->ctdb = ctdb;
+ node->dead_count = 0;
+ }
+
+ return 0;
+}
+
+/* Load the nodes list from a file */
+void ctdb_load_nodes_file(struct ctdb_context *ctdb)
+{
+ struct ctdb_node_map_old *node_map;
+ int ret;
+
+ node_map = ctdb_read_nodes_file(ctdb, ctdb->nodes_file);
+ if (node_map == NULL) {
+ goto fail;
+ }
+
+ TALLOC_FREE(ctdb->nodes);
+ ret = convert_node_map_to_list(ctdb, ctdb, node_map,
+ &ctdb->nodes, &ctdb->num_nodes);
+ if (ret == -1) {
+ goto fail;
+ }
+
+ talloc_free(node_map);
+ return;
+
+fail:
+ DEBUG(DEBUG_ERR, ("Failed to load nodes file \"%s\"\n",
+ ctdb->nodes_file));
+ talloc_free(node_map);
+ exit(1);
+}
+
+/*
+ setup the local node address
+*/
+int ctdb_set_address(struct ctdb_context *ctdb, const char *address)
+{
+ ctdb->address = talloc(ctdb, ctdb_sock_addr);
+ CTDB_NO_MEMORY(ctdb, ctdb->address);
+
+ if (ctdb_parse_address(ctdb, address, ctdb->address) != 0) {
+ return -1;
+ }
+
+ ctdb->name = talloc_asprintf(ctdb, "%s:%u",
+ ctdb_addr_to_str(ctdb->address),
+ ctdb_addr_to_port(ctdb->address));
+ return 0;
+}
+
+
+/*
+ return the number of active nodes
+*/
+uint32_t ctdb_get_num_active_nodes(struct ctdb_context *ctdb)
+{
+ unsigned int i;
+ uint32_t count=0;
+ for (i=0; i < ctdb->num_nodes; i++) {
+ if (!(ctdb->nodes[i]->flags & NODE_FLAGS_INACTIVE)) {
+ count++;
+ }
+ }
+ return count;
+}
+
+
+/*
+ called when we need to process a packet. This can be a requeued packet
+ after a lockwait, or a real packet from another node
+*/
+void ctdb_input_pkt(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
+{
+ TALLOC_CTX *tmp_ctx;
+
+ /* place the packet as a child of the tmp_ctx. We then use
+ talloc_free() below to free it. If any of the calls want
+ to keep it, then they will steal it somewhere else, and the
+ talloc_free() will only free the tmp_ctx */
+ tmp_ctx = talloc_new(ctdb);
+ talloc_steal(tmp_ctx, hdr);
+
+ DEBUG(DEBUG_DEBUG,(__location__ " ctdb request %u of type %u length %u from "
+ "node %u to %u\n", hdr->reqid, hdr->operation, hdr->length,
+ hdr->srcnode, hdr->destnode));
+
+ switch (hdr->operation) {
+ case CTDB_REQ_CALL:
+ case CTDB_REPLY_CALL:
+ case CTDB_REQ_DMASTER:
+ case CTDB_REPLY_DMASTER:
+ /* we don't allow these calls when banned */
+ if (ctdb->nodes[ctdb->pnn]->flags & NODE_FLAGS_BANNED) {
+ DEBUG(DEBUG_DEBUG,(__location__ " ctdb operation %u"
+ " request %u"
+ " length %u from node %u to %u while node"
+ " is banned\n",
+ hdr->operation, hdr->reqid,
+ hdr->length,
+ hdr->srcnode, hdr->destnode));
+ goto done;
+ }
+
+ /* for ctdb_call inter-node operations verify that the
+ remote node that sent us the call is running in the
+ same generation instance as this node
+ */
+ if (ctdb->vnn_map->generation != hdr->generation) {
+ DEBUG(DEBUG_DEBUG,(__location__ " ctdb operation %u"
+ " request %u"
+ " length %u from node %u to %u had an"
+ " invalid generation id:%u while our"
+ " generation id is:%u\n",
+ hdr->operation, hdr->reqid,
+ hdr->length,
+ hdr->srcnode, hdr->destnode,
+ hdr->generation, ctdb->vnn_map->generation));
+ goto done;
+ }
+ }
+
+ switch (hdr->operation) {
+ case CTDB_REQ_CALL:
+ CTDB_INCREMENT_STAT(ctdb, node.req_call);
+ ctdb_request_call(ctdb, hdr);
+ break;
+
+ case CTDB_REPLY_CALL:
+ CTDB_INCREMENT_STAT(ctdb, node.reply_call);
+ ctdb_reply_call(ctdb, hdr);
+ break;
+
+ case CTDB_REPLY_ERROR:
+ CTDB_INCREMENT_STAT(ctdb, node.reply_error);
+ ctdb_reply_error(ctdb, hdr);
+ break;
+
+ case CTDB_REQ_DMASTER:
+ CTDB_INCREMENT_STAT(ctdb, node.req_dmaster);
+ ctdb_request_dmaster(ctdb, hdr);
+ break;
+
+ case CTDB_REPLY_DMASTER:
+ CTDB_INCREMENT_STAT(ctdb, node.reply_dmaster);
+ ctdb_reply_dmaster(ctdb, hdr);
+ break;
+
+ case CTDB_REQ_MESSAGE:
+ CTDB_INCREMENT_STAT(ctdb, node.req_message);
+ ctdb_request_message(ctdb, hdr);
+ break;
+
+ case CTDB_REQ_CONTROL:
+ CTDB_INCREMENT_STAT(ctdb, node.req_control);
+ ctdb_request_control(ctdb, hdr);
+ break;
+
+ case CTDB_REPLY_CONTROL:
+ CTDB_INCREMENT_STAT(ctdb, node.reply_control);
+ ctdb_reply_control(ctdb, hdr);
+ break;
+
+ case CTDB_REQ_KEEPALIVE:
+ CTDB_INCREMENT_STAT(ctdb, keepalive_packets_recv);
+ ctdb_request_keepalive(ctdb, hdr);
+ break;
+
+ case CTDB_REQ_TUNNEL:
+ CTDB_INCREMENT_STAT(ctdb, node.req_tunnel);
+ ctdb_request_tunnel(ctdb, hdr);
+ break;
+
+ default:
+ DEBUG(DEBUG_CRIT,("%s: Packet with unknown operation %u\n",
+ __location__, hdr->operation));
+ break;
+ }
+
+done:
+ talloc_free(tmp_ctx);
+}
+
+
+/*
+ called by the transport layer when a node is dead
+*/
+void ctdb_node_dead(struct ctdb_node *node)
+{
+ if (node->ctdb->methods == NULL) {
+ DBG_ERR("Can not restart transport while shutting down\n");
+ return;
+ }
+ node->ctdb->methods->restart(node);
+
+ if (node->flags & NODE_FLAGS_DISCONNECTED) {
+ DEBUG(DEBUG_INFO,("%s: node %s is already marked disconnected: %u connected\n",
+ node->ctdb->name, node->name,
+ node->ctdb->num_connected));
+ return;
+ }
+ node->ctdb->num_connected--;
+ node->flags |= NODE_FLAGS_DISCONNECTED | NODE_FLAGS_UNHEALTHY;
+ node->rx_cnt = 0;
+ node->dead_count = 0;
+
+ DEBUG(DEBUG_ERR,("%s: node %s is dead: %u connected\n",
+ node->ctdb->name, node->name, node->ctdb->num_connected));
+ ctdb_daemon_cancel_controls(node->ctdb, node);
+}
+
+/*
+ called by the transport layer when a node is connected
+*/
+void ctdb_node_connected(struct ctdb_node *node)
+{
+ if (!(node->flags & NODE_FLAGS_DISCONNECTED)) {
+ DEBUG(DEBUG_INFO,("%s: node %s is already marked connected: %u connected\n",
+ node->ctdb->name, node->name,
+ node->ctdb->num_connected));
+ return;
+ }
+ node->ctdb->num_connected++;
+ node->dead_count = 0;
+ node->flags &= ~NODE_FLAGS_DISCONNECTED;
+ DEBUG(DEBUG_ERR,
+ ("%s: connected to %s - %u connected\n",
+ node->ctdb->name, node->name, node->ctdb->num_connected));
+}
+
+struct queue_next {
+ struct ctdb_context *ctdb;
+ struct ctdb_req_header *hdr;
+};
+
+
+/*
+ triggered when a deferred packet is due
+ */
+static void queue_next_trigger(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct queue_next *q = talloc_get_type(private_data, struct queue_next);
+ ctdb_input_pkt(q->ctdb, q->hdr);
+ talloc_free(q);
+}
+
+/*
+ defer a packet, so it is processed on the next event loop
+ this is used for sending packets to ourselves
+ */
+static void ctdb_defer_packet(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
+{
+ struct queue_next *q;
+ q = talloc(ctdb, struct queue_next);
+ if (q == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to allocate deferred packet\n"));
+ return;
+ }
+ q->ctdb = ctdb;
+ q->hdr = talloc_memdup(q, hdr, hdr->length);
+ if (q->hdr == NULL) {
+ talloc_free(q);
+ DEBUG(DEBUG_ERR,("Error copying deferred packet to self\n"));
+ return;
+ }
+#if 0
+ /* use this to put packets directly into our recv function */
+ ctdb_input_pkt(q->ctdb, q->hdr);
+#else
+ tevent_add_timer(ctdb->ev, q, timeval_zero(), queue_next_trigger, q);
+#endif
+}
+
+
+/*
+ broadcast a packet to all nodes
+*/
+static void ctdb_broadcast_packet_all(struct ctdb_context *ctdb,
+ struct ctdb_req_header *hdr)
+{
+ unsigned int i;
+ for (i=0; i < ctdb->num_nodes; i++) {
+ if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
+ continue;
+ }
+ hdr->destnode = ctdb->nodes[i]->pnn;
+ ctdb_queue_packet(ctdb, hdr);
+ }
+}
+
+/*
+ broadcast a packet to all active nodes
+*/
+static void ctdb_broadcast_packet_active(struct ctdb_context *ctdb,
+ struct ctdb_req_header *hdr)
+{
+ unsigned int i;
+ for (i = 0; i < ctdb->num_nodes; i++) {
+ if (ctdb->nodes[i]->flags & NODE_FLAGS_INACTIVE) {
+ continue;
+ }
+
+ hdr->destnode = ctdb->nodes[i]->pnn;
+ ctdb_queue_packet(ctdb, hdr);
+ }
+}
+
+/*
+ broadcast a packet to all connected nodes
+*/
+static void ctdb_broadcast_packet_connected(struct ctdb_context *ctdb,
+ struct ctdb_req_header *hdr)
+{
+ unsigned int i;
+ for (i=0; i < ctdb->num_nodes; i++) {
+ if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
+ continue;
+ }
+ if (!(ctdb->nodes[i]->flags & NODE_FLAGS_DISCONNECTED)) {
+ hdr->destnode = ctdb->nodes[i]->pnn;
+ ctdb_queue_packet(ctdb, hdr);
+ }
+ }
+}
+
+/*
+ queue a packet or die
+*/
+void ctdb_queue_packet(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
+{
+ struct ctdb_node *node;
+
+ switch (hdr->destnode) {
+ case CTDB_BROADCAST_ALL:
+ ctdb_broadcast_packet_all(ctdb, hdr);
+ return;
+ case CTDB_BROADCAST_ACTIVE:
+ ctdb_broadcast_packet_active(ctdb, hdr);
+ return;
+ case CTDB_BROADCAST_CONNECTED:
+ ctdb_broadcast_packet_connected(ctdb, hdr);
+ return;
+ }
+
+ CTDB_INCREMENT_STAT(ctdb, node_packets_sent);
+
+ if (!ctdb_validate_pnn(ctdb, hdr->destnode)) {
+ DEBUG(DEBUG_CRIT,(__location__ " can't send to node %u that does not exist\n",
+ hdr->destnode));
+ return;
+ }
+
+ node = ctdb->nodes[hdr->destnode];
+
+ if (node->flags & NODE_FLAGS_DELETED) {
+ DEBUG(DEBUG_ERR, (__location__ " Can not queue packet to DELETED node %d\n", hdr->destnode));
+ return;
+ }
+
+ if (node->pnn == ctdb->pnn) {
+ ctdb_defer_packet(ctdb, hdr);
+ return;
+ }
+
+ if (ctdb->methods == NULL) {
+ DEBUG(DEBUG_ALERT, (__location__ " Can not queue packet. "
+ "Transport is DOWN\n"));
+ return;
+ }
+
+ node->tx_cnt++;
+ if (ctdb->methods->queue_pkt(node, (uint8_t *)hdr, hdr->length) != 0) {
+ ctdb_fatal(ctdb, "Unable to queue packet\n");
+ }
+}
+
+
+
+
+/*
+ a valgrind hack to allow us to get opcode specific backtraces
+ very ugly, and relies on no compiler optimisation!
+*/
+void ctdb_queue_packet_opcode(struct ctdb_context *ctdb, struct ctdb_req_header *hdr, unsigned opcode)
+{
+ switch (opcode) {
+#define DO_OP(x) case x: ctdb_queue_packet(ctdb, hdr); break
+ DO_OP(1);
+ DO_OP(2);
+ DO_OP(3);
+ DO_OP(4);
+ DO_OP(5);
+ DO_OP(6);
+ DO_OP(7);
+ DO_OP(8);
+ DO_OP(9);
+ DO_OP(10);
+ DO_OP(11);
+ DO_OP(12);
+ DO_OP(13);
+ DO_OP(14);
+ DO_OP(15);
+ DO_OP(16);
+ DO_OP(17);
+ DO_OP(18);
+ DO_OP(19);
+ DO_OP(20);
+ DO_OP(21);
+ DO_OP(22);
+ DO_OP(23);
+ DO_OP(24);
+ DO_OP(25);
+ DO_OP(26);
+ DO_OP(27);
+ DO_OP(28);
+ DO_OP(29);
+ DO_OP(30);
+ DO_OP(31);
+ DO_OP(32);
+ DO_OP(33);
+ DO_OP(34);
+ DO_OP(35);
+ DO_OP(36);
+ DO_OP(37);
+ DO_OP(38);
+ DO_OP(39);
+ DO_OP(40);
+ DO_OP(41);
+ DO_OP(42);
+ DO_OP(43);
+ DO_OP(44);
+ DO_OP(45);
+ DO_OP(46);
+ DO_OP(47);
+ DO_OP(48);
+ DO_OP(49);
+ DO_OP(50);
+ DO_OP(51);
+ DO_OP(52);
+ DO_OP(53);
+ DO_OP(54);
+ DO_OP(55);
+ DO_OP(56);
+ DO_OP(57);
+ DO_OP(58);
+ DO_OP(59);
+ DO_OP(60);
+ DO_OP(61);
+ DO_OP(62);
+ DO_OP(63);
+ DO_OP(64);
+ DO_OP(65);
+ DO_OP(66);
+ DO_OP(67);
+ DO_OP(68);
+ DO_OP(69);
+ DO_OP(70);
+ DO_OP(71);
+ DO_OP(72);
+ DO_OP(73);
+ DO_OP(74);
+ DO_OP(75);
+ DO_OP(76);
+ DO_OP(77);
+ DO_OP(78);
+ DO_OP(79);
+ DO_OP(80);
+ DO_OP(81);
+ DO_OP(82);
+ DO_OP(83);
+ DO_OP(84);
+ DO_OP(85);
+ DO_OP(86);
+ DO_OP(87);
+ DO_OP(88);
+ DO_OP(89);
+ DO_OP(90);
+ DO_OP(91);
+ DO_OP(92);
+ DO_OP(93);
+ DO_OP(94);
+ DO_OP(95);
+ DO_OP(96);
+ DO_OP(97);
+ DO_OP(98);
+ DO_OP(99);
+ DO_OP(100);
+ default:
+ ctdb_queue_packet(ctdb, hdr);
+ break;
+ }
+}
diff --git a/ctdb/server/ctdb_statistics.c b/ctdb/server/ctdb_statistics.c
new file mode 100644
index 0000000..4cf8f9e
--- /dev/null
+++ b/ctdb/server/ctdb_statistics.c
@@ -0,0 +1,93 @@
+/*
+ ctdb statistics code
+
+ Copyright (C) Ronnie Sahlberg 2010
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/time.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/debug.h"
+#include "lib/util/samba_util.h"
+
+#include "ctdb_private.h"
+
+#include "common/logging.h"
+
+static void ctdb_statistics_update(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *p)
+{
+ struct ctdb_context *ctdb = talloc_get_type(p, struct ctdb_context);
+
+ memmove(&ctdb->statistics_history[1], &ctdb->statistics_history[0], (MAX_STAT_HISTORY-1)*sizeof(struct ctdb_statistics));
+ memcpy(&ctdb->statistics_history[0], &ctdb->statistics_current, sizeof(struct ctdb_statistics));
+ ctdb->statistics_history[0].statistics_current_time = timeval_current();
+
+
+ bzero(&ctdb->statistics_current, sizeof(struct ctdb_statistics));
+ ctdb->statistics_current.statistics_start_time = timeval_current();
+
+ tevent_add_timer(ctdb->ev, ctdb,
+ timeval_current_ofs(ctdb->tunable.stat_history_interval, 0),
+ ctdb_statistics_update, ctdb);
+}
+
+int ctdb_statistics_init(struct ctdb_context *ctdb)
+{
+ bzero(&ctdb->statistics, sizeof(struct ctdb_statistics));
+ ctdb->statistics.statistics_start_time = timeval_current();
+
+ bzero(&ctdb->statistics_current, sizeof(struct ctdb_statistics));
+ ctdb->statistics_current.statistics_start_time = timeval_current();
+
+ bzero(ctdb->statistics_history, sizeof(ctdb->statistics_history));
+
+ tevent_add_timer(ctdb->ev, ctdb,
+ timeval_current_ofs(ctdb->tunable.stat_history_interval, 0),
+ ctdb_statistics_update, ctdb);
+ return 0;
+}
+
+
+int32_t ctdb_control_get_stat_history(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA *outdata)
+{
+ int len;
+ struct ctdb_statistics_list_old *s;
+
+ len = offsetof(struct ctdb_statistics_list_old, stats) +
+ MAX_STAT_HISTORY*sizeof(struct ctdb_statistics);
+
+ s = talloc_size(outdata, len);
+ if (s == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to allocate statistics history structure\n"));
+ return -1;
+ }
+
+ s->num = MAX_STAT_HISTORY;
+ memcpy(&s->stats[0], &ctdb->statistics_history[0], sizeof(ctdb->statistics_history));
+
+ outdata->dsize = len;
+ outdata->dptr = (uint8_t *)s;
+
+ return 0;
+}
diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c
new file mode 100644
index 0000000..b622faf
--- /dev/null
+++ b/ctdb/server/ctdb_takeover.c
@@ -0,0 +1,2751 @@
+/*
+ ctdb ip takeover code
+
+ Copyright (C) Ronnie Sahlberg 2007
+ Copyright (C) Andrew Tridgell 2007
+ Copyright (C) Martin Schwenke 2011
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+#include "system/time.h"
+#include "system/wait.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/dlinklist.h"
+#include "lib/util/debug.h"
+#include "lib/util/samba_util.h"
+#include "lib/util/sys_rw.h"
+#include "lib/util/util_process.h"
+
+#include "protocol/protocol_util.h"
+
+#include "ctdb_private.h"
+#include "ctdb_client.h"
+
+#include "common/reqid.h"
+#include "common/system.h"
+#include "common/system_socket.h"
+#include "common/common.h"
+#include "common/logging.h"
+
+#include "server/ctdb_config.h"
+
+#include "server/ipalloc.h"
+
+#define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
+
+#define CTDB_ARP_INTERVAL 1
+#define CTDB_ARP_REPEAT 3
+
+struct ctdb_interface {
+ struct ctdb_interface *prev, *next;
+ const char *name;
+ bool link_up;
+ uint32_t references;
+};
+
+struct vnn_interface {
+ struct vnn_interface *prev, *next;
+ struct ctdb_interface *iface;
+};
+
+/* state associated with a public ip address */
+struct ctdb_vnn {
+ struct ctdb_vnn *prev, *next;
+
+ struct ctdb_interface *iface;
+ struct vnn_interface *ifaces;
+ ctdb_sock_addr public_address;
+ uint8_t public_netmask_bits;
+
+ /*
+ * The node number that is serving this public address - set
+ * to CTDB_UNKNOWN_PNN if node is serving it
+ */
+ uint32_t pnn;
+
+ /* List of clients to tickle for this public address */
+ struct ctdb_tcp_array *tcp_array;
+
+ /* whether we need to update the other nodes with changes to our list
+ of connected clients */
+ bool tcp_update_needed;
+
+ /* a context to hang sending gratious arp events off */
+ TALLOC_CTX *takeover_ctx;
+
+ /* Set to true any time an update to this VNN is in flight.
+ This helps to avoid races. */
+ bool update_in_flight;
+
+ /* If CTDB_CONTROL_DEL_PUBLIC_IP is received for this IP
+ * address then this flag is set. It will be deleted in the
+ * release IP callback. */
+ bool delete_pending;
+};
+
+static const char *iface_string(const struct ctdb_interface *iface)
+{
+ return (iface != NULL ? iface->name : "__none__");
+}
+
+static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
+{
+ return iface_string(vnn->iface);
+}
+
+static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
+ const char *iface);
+
+static struct ctdb_interface *
+ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
+{
+ struct ctdb_interface *i;
+
+ if (strlen(iface) > CTDB_IFACE_SIZE) {
+ DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
+ return NULL;
+ }
+
+ /* Verify that we don't have an entry for this ip yet */
+ i = ctdb_find_iface(ctdb, iface);
+ if (i != NULL) {
+ return i;
+ }
+
+ /* create a new structure for this interface */
+ i = talloc_zero(ctdb, struct ctdb_interface);
+ if (i == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
+ return NULL;
+ }
+ i->name = talloc_strdup(i, iface);
+ if (i->name == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
+ talloc_free(i);
+ return NULL;
+ }
+
+ i->link_up = true;
+
+ DLIST_ADD(ctdb->ifaces, i);
+
+ return i;
+}
+
+static bool vnn_has_interface(struct ctdb_vnn *vnn,
+ const struct ctdb_interface *iface)
+{
+ struct vnn_interface *i;
+
+ for (i = vnn->ifaces; i != NULL; i = i->next) {
+ if (iface == i->iface) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/* If any interfaces now have no possible IPs then delete them. This
+ * implementation is naive (i.e. simple) rather than clever
+ * (i.e. complex). Given that this is run on delip and that operation
+ * is rare, this doesn't need to be efficient - it needs to be
+ * foolproof. One alternative is reference counting, where the logic
+ * is distributed and can, therefore, be broken in multiple places.
+ * Another alternative is to build a red-black tree of interfaces that
+ * can have addresses (by walking ctdb->vnn once) and then walking
+ * ctdb->ifaces once and deleting those not in the tree. Let's go to
+ * one of those if the naive implementation causes problems... :-)
+ */
+static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
+ struct ctdb_vnn *vnn)
+{
+ struct ctdb_interface *i, *next;
+
+ /* For each interface, check if there's an IP using it. */
+ for (i = ctdb->ifaces; i != NULL; i = next) {
+ struct ctdb_vnn *tv;
+ bool found;
+ next = i->next;
+
+ /* Only consider interfaces named in the given VNN. */
+ if (!vnn_has_interface(vnn, i)) {
+ continue;
+ }
+
+ /* Search for a vnn with this interface. */
+ found = false;
+ for (tv=ctdb->vnn; tv; tv=tv->next) {
+ if (vnn_has_interface(tv, i)) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ /* None of the VNNs are using this interface. */
+ DLIST_REMOVE(ctdb->ifaces, i);
+ talloc_free(i);
+ }
+ }
+}
+
+
+static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
+ const char *iface)
+{
+ struct ctdb_interface *i;
+
+ for (i=ctdb->ifaces;i;i=i->next) {
+ if (strcmp(i->name, iface) == 0) {
+ return i;
+ }
+ }
+
+ return NULL;
+}
+
+static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
+ struct ctdb_vnn *vnn)
+{
+ struct vnn_interface *i;
+ struct ctdb_interface *cur = NULL;
+ struct ctdb_interface *best = NULL;
+
+ for (i = vnn->ifaces; i != NULL; i = i->next) {
+
+ cur = i->iface;
+
+ if (!cur->link_up) {
+ continue;
+ }
+
+ if (best == NULL) {
+ best = cur;
+ continue;
+ }
+
+ if (cur->references < best->references) {
+ best = cur;
+ continue;
+ }
+ }
+
+ return best;
+}
+
+static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
+ struct ctdb_vnn *vnn)
+{
+ struct ctdb_interface *best = NULL;
+
+ if (vnn->iface) {
+ DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
+ "still assigned to iface '%s'\n",
+ ctdb_addr_to_str(&vnn->public_address),
+ ctdb_vnn_iface_string(vnn)));
+ return 0;
+ }
+
+ best = ctdb_vnn_best_iface(ctdb, vnn);
+ if (best == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
+ "cannot assign to iface any iface\n",
+ ctdb_addr_to_str(&vnn->public_address)));
+ return -1;
+ }
+
+ vnn->iface = best;
+ best->references++;
+ vnn->pnn = ctdb->pnn;
+
+ DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
+ "now assigned to iface '%s' refs[%d]\n",
+ ctdb_addr_to_str(&vnn->public_address),
+ ctdb_vnn_iface_string(vnn),
+ best->references));
+ return 0;
+}
+
+static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
+ struct ctdb_vnn *vnn)
+{
+ DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
+ "now unassigned (old iface '%s' refs[%d])\n",
+ ctdb_addr_to_str(&vnn->public_address),
+ ctdb_vnn_iface_string(vnn),
+ vnn->iface?vnn->iface->references:0));
+ if (vnn->iface) {
+ vnn->iface->references--;
+ }
+ vnn->iface = NULL;
+ if (vnn->pnn == ctdb->pnn) {
+ vnn->pnn = CTDB_UNKNOWN_PNN;
+ }
+}
+
+static bool ctdb_vnn_available(struct ctdb_context *ctdb,
+ struct ctdb_vnn *vnn)
+{
+ uint32_t flags;
+ struct vnn_interface *i;
+
+ /* Nodes that are not RUNNING can not host IPs */
+ if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
+ return false;
+ }
+
+ flags = ctdb->nodes[ctdb->pnn]->flags;
+ if ((flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED)) != 0) {
+ return false;
+ }
+
+ if (vnn->delete_pending) {
+ return false;
+ }
+
+ if (vnn->iface && vnn->iface->link_up) {
+ return true;
+ }
+
+ for (i = vnn->ifaces; i != NULL; i = i->next) {
+ if (i->iface->link_up) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+struct ctdb_takeover_arp {
+ struct ctdb_context *ctdb;
+ uint32_t count;
+ ctdb_sock_addr addr;
+ struct ctdb_tcp_array *tcparray;
+ struct ctdb_vnn *vnn;
+};
+
+
+/*
+ lists of tcp endpoints
+ */
+struct ctdb_tcp_list {
+ struct ctdb_tcp_list *prev, *next;
+ struct ctdb_client *client;
+ struct ctdb_connection connection;
+};
+
+/*
+ send a gratuitous arp
+ */
+static void ctdb_control_send_arp(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
+ struct ctdb_takeover_arp);
+ int ret;
+ struct ctdb_tcp_array *tcparray;
+ const char *iface;
+
+ /* IP address might have been released between sends */
+ if (arp->vnn->iface == NULL) {
+ DBG_INFO("Cancelling ARP send for released IP %s\n",
+ ctdb_addr_to_str(&arp->vnn->public_address));
+ talloc_free(arp);
+ return;
+ }
+
+ iface = ctdb_vnn_iface_string(arp->vnn);
+ ret = ctdb_sys_send_arp(&arp->addr, iface);
+ if (ret != 0) {
+ DBG_ERR("Failed to send ARP on interface %s: %s\n",
+ iface, strerror(ret));
+ }
+
+ tcparray = arp->tcparray;
+ if (tcparray) {
+ unsigned int i;
+
+ for (i=0;i<tcparray->num;i++) {
+ struct ctdb_connection *tcon;
+ char buf[128];
+
+ tcon = &tcparray->connections[i];
+ ret = ctdb_connection_to_buf(buf,
+ sizeof(buf),
+ tcon,
+ false,
+ " -> ");
+ if (ret != 0) {
+ strlcpy(buf, "UNKNOWN", sizeof(buf));
+ }
+ D_INFO("Send TCP tickle ACK: %s\n", buf);
+ ret = ctdb_sys_send_tcp(
+ &tcon->src,
+ &tcon->dst,
+ 0, 0, 0);
+ if (ret != 0) {
+ DBG_ERR("Failed to send TCP tickle ACK: %s\n",
+ buf);
+ }
+ }
+ }
+
+ arp->count++;
+
+ if (arp->count == CTDB_ARP_REPEAT) {
+ talloc_free(arp);
+ return;
+ }
+
+ tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
+ timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
+ ctdb_control_send_arp, arp);
+}
+
+static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
+ struct ctdb_vnn *vnn)
+{
+ struct ctdb_takeover_arp *arp;
+ struct ctdb_tcp_array *tcparray;
+
+ if (!vnn->takeover_ctx) {
+ vnn->takeover_ctx = talloc_new(vnn);
+ if (!vnn->takeover_ctx) {
+ return -1;
+ }
+ }
+
+ arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
+ if (!arp) {
+ return -1;
+ }
+
+ arp->ctdb = ctdb;
+ arp->addr = vnn->public_address;
+ arp->vnn = vnn;
+
+ tcparray = vnn->tcp_array;
+ if (tcparray) {
+ /* add all of the known tcp connections for this IP to the
+ list of tcp connections to send tickle acks for */
+ arp->tcparray = talloc_steal(arp, tcparray);
+
+ vnn->tcp_array = NULL;
+ vnn->tcp_update_needed = true;
+ }
+
+ tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
+ timeval_zero(), ctdb_control_send_arp, arp);
+
+ return 0;
+}
+
+struct ctdb_do_takeip_state {
+ struct ctdb_req_control_old *c;
+ struct ctdb_vnn *vnn;
+};
+
+/*
+ called when takeip event finishes
+ */
+static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
+ void *private_data)
+{
+ struct ctdb_do_takeip_state *state =
+ talloc_get_type(private_data, struct ctdb_do_takeip_state);
+ int32_t ret;
+ TDB_DATA data;
+
+ if (status != 0) {
+ if (status == -ETIMEDOUT) {
+ ctdb_ban_self(ctdb);
+ }
+ DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
+ ctdb_addr_to_str(&state->vnn->public_address),
+ ctdb_vnn_iface_string(state->vnn)));
+ ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
+
+ talloc_free(state);
+ return;
+ }
+
+ if (ctdb->do_checkpublicip) {
+
+ ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
+ if (ret != 0) {
+ ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
+ talloc_free(state);
+ return;
+ }
+
+ }
+
+ data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
+ data.dsize = strlen((char *)data.dptr) + 1;
+ DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
+
+ ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
+
+
+ /* the control succeeded */
+ ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
+ talloc_free(state);
+ return;
+}
+
+static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
+{
+ state->vnn->update_in_flight = false;
+ return 0;
+}
+
+/*
+ take over an ip address
+ */
+static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ struct ctdb_vnn *vnn)
+{
+ int ret;
+ struct ctdb_do_takeip_state *state;
+
+ if (vnn->update_in_flight) {
+ DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
+ "update for this IP already in flight\n",
+ ctdb_addr_to_str(&vnn->public_address),
+ vnn->public_netmask_bits));
+ return -1;
+ }
+
+ ret = ctdb_vnn_assign_iface(ctdb, vnn);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
+ "assign a usable interface\n",
+ ctdb_addr_to_str(&vnn->public_address),
+ vnn->public_netmask_bits));
+ return -1;
+ }
+
+ state = talloc(vnn, struct ctdb_do_takeip_state);
+ CTDB_NO_MEMORY(ctdb, state);
+
+ state->c = NULL;
+ state->vnn = vnn;
+
+ vnn->update_in_flight = true;
+ talloc_set_destructor(state, ctdb_takeip_destructor);
+
+ DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
+ ctdb_addr_to_str(&vnn->public_address),
+ vnn->public_netmask_bits,
+ ctdb_vnn_iface_string(vnn)));
+
+ ret = ctdb_event_script_callback(ctdb,
+ state,
+ ctdb_do_takeip_callback,
+ state,
+ CTDB_EVENT_TAKE_IP,
+ "%s %s %u",
+ ctdb_vnn_iface_string(vnn),
+ ctdb_addr_to_str(&vnn->public_address),
+ vnn->public_netmask_bits);
+
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
+ ctdb_addr_to_str(&vnn->public_address),
+ ctdb_vnn_iface_string(vnn)));
+ talloc_free(state);
+ return -1;
+ }
+
+ state->c = talloc_steal(ctdb, c);
+ return 0;
+}
+
+struct ctdb_do_updateip_state {
+ struct ctdb_req_control_old *c;
+ struct ctdb_interface *old;
+ struct ctdb_vnn *vnn;
+};
+
+/*
+ called when updateip event finishes
+ */
+static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
+ void *private_data)
+{
+ struct ctdb_do_updateip_state *state =
+ talloc_get_type(private_data, struct ctdb_do_updateip_state);
+
+ if (status != 0) {
+ if (status == -ETIMEDOUT) {
+ ctdb_ban_self(ctdb);
+ }
+ DEBUG(DEBUG_ERR,
+ ("Failed update of IP %s from interface %s to %s\n",
+ ctdb_addr_to_str(&state->vnn->public_address),
+ iface_string(state->old),
+ ctdb_vnn_iface_string(state->vnn)));
+
+ /*
+ * All we can do is reset the old interface
+ * and let the next run fix it
+ */
+ ctdb_vnn_unassign_iface(ctdb, state->vnn);
+ state->vnn->iface = state->old;
+ state->vnn->iface->references++;
+
+ ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
+ talloc_free(state);
+ return;
+ }
+
+ /* the control succeeded */
+ ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
+ talloc_free(state);
+ return;
+}
+
+static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
+{
+ state->vnn->update_in_flight = false;
+ return 0;
+}
+
+/*
+ update (move) an ip address
+ */
+static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ struct ctdb_vnn *vnn)
+{
+ int ret;
+ struct ctdb_do_updateip_state *state;
+ struct ctdb_interface *old = vnn->iface;
+ const char *old_name = iface_string(old);
+ const char *new_name;
+
+ if (vnn->update_in_flight) {
+ DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
+ "update for this IP already in flight\n",
+ ctdb_addr_to_str(&vnn->public_address),
+ vnn->public_netmask_bits));
+ return -1;
+ }
+
+ ctdb_vnn_unassign_iface(ctdb, vnn);
+ ret = ctdb_vnn_assign_iface(ctdb, vnn);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,("Update of IP %s/%u failed to "
+ "assign a usable interface (old iface '%s')\n",
+ ctdb_addr_to_str(&vnn->public_address),
+ vnn->public_netmask_bits,
+ old_name));
+ return -1;
+ }
+
+ if (old == vnn->iface) {
+ /* A benign update from one interface onto itself.
+ * no need to run the eventscripts in this case, just return
+ * success.
+ */
+ ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
+ return 0;
+ }
+
+ state = talloc(vnn, struct ctdb_do_updateip_state);
+ CTDB_NO_MEMORY(ctdb, state);
+
+ state->c = NULL;
+ state->old = old;
+ state->vnn = vnn;
+
+ vnn->update_in_flight = true;
+ talloc_set_destructor(state, ctdb_updateip_destructor);
+
+ new_name = ctdb_vnn_iface_string(vnn);
+ DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
+ "interface %s to %s\n",
+ ctdb_addr_to_str(&vnn->public_address),
+ vnn->public_netmask_bits,
+ old_name,
+ new_name));
+
+ ret = ctdb_event_script_callback(ctdb,
+ state,
+ ctdb_do_updateip_callback,
+ state,
+ CTDB_EVENT_UPDATE_IP,
+ "%s %s %s %u",
+ old_name,
+ new_name,
+ ctdb_addr_to_str(&vnn->public_address),
+ vnn->public_netmask_bits);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Failed update IP %s from interface %s to %s\n",
+ ctdb_addr_to_str(&vnn->public_address),
+ old_name, new_name));
+ talloc_free(state);
+ return -1;
+ }
+
+ state->c = talloc_steal(ctdb, c);
+ return 0;
+}
+
+/*
+ Find the vnn of the node that has a public ip address
+ returns -1 if the address is not known as a public address
+ */
+static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
+{
+ struct ctdb_vnn *vnn;
+
+ for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
+ if (ctdb_same_ip(&vnn->public_address, addr)) {
+ return vnn;
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ take over an ip address
+ */
+int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA indata,
+ bool *async_reply)
+{
+ int ret;
+ struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
+ struct ctdb_vnn *vnn;
+ bool have_ip = false;
+ bool do_updateip = false;
+ bool do_takeip = false;
+ struct ctdb_interface *best_iface = NULL;
+
+ if (pip->pnn != ctdb->pnn) {
+ DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
+ "with pnn %d, but we're node %d\n",
+ ctdb_addr_to_str(&pip->addr),
+ pip->pnn, ctdb->pnn));
+ return -1;
+ }
+
+ /* update out vnn list */
+ vnn = find_public_ip_vnn(ctdb, &pip->addr);
+ if (vnn == NULL) {
+ DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
+ ctdb_addr_to_str(&pip->addr)));
+ return 0;
+ }
+
+ if (ctdb_config.failover_disabled == 0 && ctdb->do_checkpublicip) {
+ have_ip = ctdb_sys_have_ip(&pip->addr);
+ }
+ best_iface = ctdb_vnn_best_iface(ctdb, vnn);
+ if (best_iface == NULL) {
+ DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
+ "a usable interface (old %s, have_ip %d)\n",
+ ctdb_addr_to_str(&vnn->public_address),
+ vnn->public_netmask_bits,
+ ctdb_vnn_iface_string(vnn),
+ have_ip));
+ return -1;
+ }
+
+ if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != CTDB_UNKNOWN_PNN) {
+ DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
+ "and we have it on iface[%s], but it was assigned to node %d"
+ "and we are node %d, banning ourself\n",
+ ctdb_addr_to_str(&vnn->public_address),
+ ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
+ ctdb_ban_self(ctdb);
+ return -1;
+ }
+
+ if (vnn->pnn == CTDB_UNKNOWN_PNN && have_ip) {
+ /* This will cause connections to be reset and
+ * reestablished. However, this is a very unusual
+ * situation and doing this will completely repair the
+ * inconsistency in the VNN.
+ */
+ DEBUG(DEBUG_WARNING,
+ (__location__
+ " Doing updateip for IP %s already on an interface\n",
+ ctdb_addr_to_str(&vnn->public_address)));
+ do_updateip = true;
+ }
+
+ if (vnn->iface) {
+ if (vnn->iface != best_iface) {
+ if (!vnn->iface->link_up) {
+ do_updateip = true;
+ } else if (vnn->iface->references > (best_iface->references + 1)) {
+ /* only move when the rebalance gains something */
+ do_updateip = true;
+ }
+ }
+ }
+
+ if (!have_ip) {
+ if (do_updateip) {
+ ctdb_vnn_unassign_iface(ctdb, vnn);
+ do_updateip = false;
+ }
+ do_takeip = true;
+ }
+
+ if (do_takeip) {
+ ret = ctdb_do_takeip(ctdb, c, vnn);
+ if (ret != 0) {
+ return -1;
+ }
+ } else if (do_updateip) {
+ ret = ctdb_do_updateip(ctdb, c, vnn);
+ if (ret != 0) {
+ return -1;
+ }
+ } else {
+ /*
+ * The interface is up and the kernel known the ip
+ * => do nothing
+ */
+ DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
+ ctdb_addr_to_str(&pip->addr),
+ vnn->public_netmask_bits,
+ ctdb_vnn_iface_string(vnn)));
+ return 0;
+ }
+
+ /* tell ctdb_control.c that we will be replying asynchronously */
+ *async_reply = true;
+
+ return 0;
+}
+
+static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
+{
+ DLIST_REMOVE(ctdb->vnn, vnn);
+ ctdb_vnn_unassign_iface(ctdb, vnn);
+ ctdb_remove_orphaned_ifaces(ctdb, vnn);
+ talloc_free(vnn);
+}
+
+static struct ctdb_vnn *release_ip_post(struct ctdb_context *ctdb,
+ struct ctdb_vnn *vnn,
+ ctdb_sock_addr *addr)
+{
+ TDB_DATA data;
+
+ /* Send a message to all clients of this node telling them
+ * that the cluster has been reconfigured and they should
+ * close any connections on this IP address
+ */
+ data.dptr = (uint8_t *)ctdb_addr_to_str(addr);
+ data.dsize = strlen((char *)data.dptr)+1;
+ DEBUG(DEBUG_INFO, ("Sending RELEASE_IP message for %s\n", data.dptr));
+ ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
+
+ ctdb_vnn_unassign_iface(ctdb, vnn);
+
+ /* Process the IP if it has been marked for deletion */
+ if (vnn->delete_pending) {
+ do_delete_ip(ctdb, vnn);
+ return NULL;
+ }
+
+ return vnn;
+}
+
+struct release_ip_callback_state {
+ struct ctdb_req_control_old *c;
+ ctdb_sock_addr *addr;
+ struct ctdb_vnn *vnn;
+ uint32_t target_pnn;
+};
+
+/*
+ called when releaseip event finishes
+ */
+static void release_ip_callback(struct ctdb_context *ctdb, int status,
+ void *private_data)
+{
+ struct release_ip_callback_state *state =
+ talloc_get_type(private_data, struct release_ip_callback_state);
+
+ if (status == -ETIMEDOUT) {
+ ctdb_ban_self(ctdb);
+ }
+
+ if (ctdb_config.failover_disabled == 0 && ctdb->do_checkpublicip) {
+ if (ctdb_sys_have_ip(state->addr)) {
+ DEBUG(DEBUG_ERR,
+ ("IP %s still hosted during release IP callback, failing\n",
+ ctdb_addr_to_str(state->addr)));
+ ctdb_request_control_reply(ctdb, state->c,
+ NULL, -1, NULL);
+ talloc_free(state);
+ return;
+ }
+ }
+
+ state->vnn->pnn = state->target_pnn;
+ state->vnn = release_ip_post(ctdb, state->vnn, state->addr);
+
+ /* the control succeeded */
+ ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
+ talloc_free(state);
+}
+
+static int ctdb_releaseip_destructor(struct release_ip_callback_state *state)
+{
+ if (state->vnn != NULL) {
+ state->vnn->update_in_flight = false;
+ }
+ return 0;
+}
+
+/*
+ release an ip address
+ */
+int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA indata,
+ bool *async_reply)
+{
+ int ret;
+ struct release_ip_callback_state *state;
+ struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
+ struct ctdb_vnn *vnn;
+ const char *iface;
+
+ /* update our vnn list */
+ vnn = find_public_ip_vnn(ctdb, &pip->addr);
+ if (vnn == NULL) {
+ DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
+ ctdb_addr_to_str(&pip->addr)));
+ return 0;
+ }
+
+ /* stop any previous arps */
+ talloc_free(vnn->takeover_ctx);
+ vnn->takeover_ctx = NULL;
+
+ /* RELEASE_IP controls are sent to all nodes that should not
+ * be hosting a particular IP. This serves 2 purposes. The
+ * first is to help resolve any inconsistencies. If a node
+ * does unexpectedly host an IP then it will be released. The
+ * 2nd is to use a "redundant release" to tell non-takeover
+ * nodes where an IP is moving to. This is how "ctdb ip" can
+ * report the (likely) location of an IP by only asking the
+ * local node. Redundant releases need to update the PNN but
+ * are otherwise ignored.
+ */
+ if (ctdb_config.failover_disabled == 0 && ctdb->do_checkpublicip) {
+ if (!ctdb_sys_have_ip(&pip->addr)) {
+ DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
+ ctdb_addr_to_str(&pip->addr),
+ vnn->public_netmask_bits,
+ ctdb_vnn_iface_string(vnn)));
+ vnn->pnn = pip->pnn;
+ ctdb_vnn_unassign_iface(ctdb, vnn);
+ return 0;
+ }
+ } else {
+ if (vnn->iface == NULL) {
+ DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
+ ctdb_addr_to_str(&pip->addr),
+ vnn->public_netmask_bits));
+ vnn->pnn = pip->pnn;
+ return 0;
+ }
+ }
+
+ /* There is a potential race between take_ip and us because we
+ * update the VNN via a callback that run when the
+ * eventscripts have been run. Avoid the race by allowing one
+ * update to be in flight at a time.
+ */
+ if (vnn->update_in_flight) {
+ DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
+ "update for this IP already in flight\n",
+ ctdb_addr_to_str(&vnn->public_address),
+ vnn->public_netmask_bits));
+ return -1;
+ }
+
+ iface = ctdb_vnn_iface_string(vnn);
+
+ DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
+ ctdb_addr_to_str(&pip->addr),
+ vnn->public_netmask_bits,
+ iface,
+ pip->pnn));
+
+ state = talloc(ctdb, struct release_ip_callback_state);
+ if (state == NULL) {
+ ctdb_set_error(ctdb, "Out of memory at %s:%d",
+ __FILE__, __LINE__);
+ return -1;
+ }
+
+ state->c = NULL;
+ state->addr = talloc(state, ctdb_sock_addr);
+ if (state->addr == NULL) {
+ ctdb_set_error(ctdb, "Out of memory at %s:%d",
+ __FILE__, __LINE__);
+ talloc_free(state);
+ return -1;
+ }
+ *state->addr = pip->addr;
+ state->target_pnn = pip->pnn;
+ state->vnn = vnn;
+
+ vnn->update_in_flight = true;
+ talloc_set_destructor(state, ctdb_releaseip_destructor);
+
+ ret = ctdb_event_script_callback(ctdb,
+ state, release_ip_callback, state,
+ CTDB_EVENT_RELEASE_IP,
+ "%s %s %u",
+ iface,
+ ctdb_addr_to_str(&pip->addr),
+ vnn->public_netmask_bits);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
+ ctdb_addr_to_str(&pip->addr),
+ ctdb_vnn_iface_string(vnn)));
+ talloc_free(state);
+ return -1;
+ }
+
+ /* tell the control that we will be reply asynchronously */
+ *async_reply = true;
+ state->c = talloc_steal(state, c);
+ return 0;
+}
+
+static int ctdb_add_public_address(struct ctdb_context *ctdb,
+ ctdb_sock_addr *addr,
+ unsigned mask, const char *ifaces,
+ bool check_address)
+{
+ struct ctdb_vnn *vnn;
+ char *tmp;
+ const char *iface;
+
+ /* Verify that we don't have an entry for this IP yet */
+ for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
+ if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
+ D_ERR("Duplicate public IP address '%s'\n",
+ ctdb_addr_to_str(addr));
+ return -1;
+ }
+ }
+
+ /* Create a new VNN structure for this IP address */
+ vnn = talloc_zero(ctdb, struct ctdb_vnn);
+ if (vnn == NULL) {
+ DBG_ERR("Memory allocation error\n");
+ return -1;
+ }
+ tmp = talloc_strdup(vnn, ifaces);
+ if (tmp == NULL) {
+ DBG_ERR("Memory allocation error\n");
+ talloc_free(vnn);
+ return -1;
+ }
+ for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
+ struct vnn_interface *vnn_iface;
+ struct ctdb_interface *i;
+
+ if (!ctdb_sys_check_iface_exists(iface)) {
+ D_ERR("Unknown interface %s for public address %s\n",
+ iface,
+ ctdb_addr_to_str(addr));
+ talloc_free(vnn);
+ return -1;
+ }
+
+ i = ctdb_add_local_iface(ctdb, iface);
+ if (i == NULL) {
+ D_ERR("Failed to add interface '%s' "
+ "for public address %s\n",
+ iface,
+ ctdb_addr_to_str(addr));
+ talloc_free(vnn);
+ return -1;
+ }
+
+ vnn_iface = talloc_zero(vnn, struct vnn_interface);
+ if (vnn_iface == NULL) {
+ DBG_ERR("Memory allocation error\n");
+ talloc_free(vnn);
+ return -1;
+ }
+
+ vnn_iface->iface = i;
+ DLIST_ADD_END(vnn->ifaces, vnn_iface);
+ }
+ talloc_free(tmp);
+ vnn->public_address = *addr;
+ vnn->public_netmask_bits = mask;
+ vnn->pnn = -1;
+
+ DLIST_ADD(ctdb->vnn, vnn);
+
+ return 0;
+}
+
+/*
+ setup the public address lists from a file
+*/
+int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
+{
+ bool ok;
+ char **lines;
+ int nlines;
+ int i;
+
+ /* If no public addresses file given then try the default */
+ if (ctdb->public_addresses_file == NULL) {
+ const char *b = getenv("CTDB_BASE");
+ if (b == NULL) {
+ DBG_ERR("CTDB_BASE not set\n");
+ return -1;
+ }
+ ctdb->public_addresses_file = talloc_asprintf(
+ ctdb, "%s/%s", b, "public_addresses");
+ if (ctdb->public_addresses_file == NULL) {
+ DBG_ERR("Out of memory\n");
+ return -1;
+ }
+ }
+
+ /* If the file doesn't exist then warn and do nothing */
+ ok = file_exist(ctdb->public_addresses_file);
+ if (!ok) {
+ D_WARNING("Not loading public addresses, no file %s\n",
+ ctdb->public_addresses_file);
+ return 0;
+ }
+
+ lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
+ if (lines == NULL) {
+ ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
+ return -1;
+ }
+ while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
+ nlines--;
+ }
+
+ for (i=0;i<nlines;i++) {
+ unsigned mask;
+ ctdb_sock_addr addr;
+ const char *addrstr;
+ const char *ifaces;
+ char *tok, *line;
+ int ret;
+
+ line = lines[i];
+ while ((*line == ' ') || (*line == '\t')) {
+ line++;
+ }
+ if (*line == '#') {
+ continue;
+ }
+ if (strcmp(line, "") == 0) {
+ continue;
+ }
+ tok = strtok(line, " \t");
+ addrstr = tok;
+
+ tok = strtok(NULL, " \t");
+ if (tok == NULL) {
+ D_ERR("No interface specified at line %u "
+ "of public addresses file\n", i+1);
+ talloc_free(lines);
+ return -1;
+ }
+ ifaces = tok;
+
+ if (addrstr == NULL) {
+ D_ERR("Badly formed line %u in public address list\n",
+ i+1);
+ talloc_free(lines);
+ return -1;
+ }
+
+ ret = ctdb_sock_addr_mask_from_string(addrstr, &addr, &mask);
+ if (ret != 0) {
+ D_ERR("Badly formed line %u in public address list\n",
+ i+1);
+ talloc_free(lines);
+ return -1;
+ }
+
+ if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
+ DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
+ talloc_free(lines);
+ return -1;
+ }
+ }
+
+
+ D_NOTICE("Loaded public addresses from %s\n",
+ ctdb->public_addresses_file);
+
+ talloc_free(lines);
+ return 0;
+}
+
+/*
+ destroy a ctdb_tcp_list structure
+ */
+static int ctdb_tcp_list_destructor(struct ctdb_tcp_list *tcp)
+{
+ struct ctdb_client *client = tcp->client;
+ struct ctdb_connection *conn = &tcp->connection;
+ char conn_str[132] = { 0, };
+ int ret;
+
+ ret = ctdb_connection_to_buf(conn_str,
+ sizeof(conn_str),
+ conn,
+ false,
+ " -> ");
+ if (ret != 0) {
+ strlcpy(conn_str, "UNKNOWN", sizeof(conn_str));
+ }
+
+ D_DEBUG("removing client TCP connection %s "
+ "(client_id %u pid %d)\n",
+ conn_str, client->client_id, client->pid);
+
+ DLIST_REMOVE(client->tcp_list, tcp);
+
+ /*
+ * We don't call ctdb_remove_connection(vnn, conn) here
+ * as we want the caller to decide if it's called
+ * directly (local only) or indirectly via a
+ * CTDB_CONTROL_TCP_REMOVE broadcast
+ */
+
+ return 0;
+}
+
+/*
+ called by a client to inform us of a TCP connection that it is managing
+ that should tickled with an ACK when IP takeover is done
+ */
+int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
+ TDB_DATA indata)
+{
+ struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
+ struct ctdb_connection *tcp_sock = NULL;
+ struct ctdb_tcp_list *tcp;
+ struct ctdb_connection t;
+ int ret;
+ TDB_DATA data;
+ struct ctdb_vnn *vnn;
+ char conn_str[132] = { 0, };
+
+ /* If we don't have public IPs, tickles are useless */
+ if (ctdb->vnn == NULL) {
+ return 0;
+ }
+
+ tcp_sock = (struct ctdb_connection *)indata.dptr;
+
+ ctdb_canonicalize_ip_inplace(&tcp_sock->src);
+ ctdb_canonicalize_ip_inplace(&tcp_sock->dst);
+
+ ret = ctdb_connection_to_buf(conn_str,
+ sizeof(conn_str),
+ tcp_sock,
+ false,
+ " -> ");
+ if (ret != 0) {
+ strlcpy(conn_str, "UNKNOWN", sizeof(conn_str));
+ }
+
+ vnn = find_public_ip_vnn(ctdb, &tcp_sock->dst);
+ if (vnn == NULL) {
+ D_ERR("Could not register TCP connection %s - "
+ "not a public address (client_id %u pid %u)\n",
+ conn_str, client_id, client->pid);
+ return 0;
+ }
+
+ if (vnn->pnn != ctdb->pnn) {
+ D_ERR("Attempt to register tcp client for IP %s we don't hold - "
+ "failing (client_id %u pid %u)\n",
+ ctdb_addr_to_str(&tcp_sock->dst),
+ client_id, client->pid);
+ /* failing this call will tell smbd to die */
+ return -1;
+ }
+
+ tcp = talloc(client, struct ctdb_tcp_list);
+ CTDB_NO_MEMORY(ctdb, tcp);
+ tcp->client = client;
+
+ tcp->connection.src = tcp_sock->src;
+ tcp->connection.dst = tcp_sock->dst;
+
+ DLIST_ADD(client->tcp_list, tcp);
+ talloc_set_destructor(tcp, ctdb_tcp_list_destructor);
+
+ t.src = tcp_sock->src;
+ t.dst = tcp_sock->dst;
+
+ data.dptr = (uint8_t *)&t;
+ data.dsize = sizeof(t);
+
+ D_INFO("Registered TCP connection %s (client_id %u pid %u)\n",
+ conn_str, client_id, client->pid);
+
+ /* tell all nodes about this tcp connection */
+ ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
+ CTDB_CONTROL_TCP_ADD,
+ 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
+ return -1;
+ }
+
+ return 0;
+}
+
+static bool ctdb_client_remove_tcp(struct ctdb_client *client,
+ const struct ctdb_connection *conn)
+{
+ struct ctdb_tcp_list *tcp = NULL;
+ struct ctdb_tcp_list *tcp_next = NULL;
+ bool found = false;
+
+ for (tcp = client->tcp_list; tcp != NULL; tcp = tcp_next) {
+ bool same;
+
+ tcp_next = tcp->next;
+
+ same = ctdb_connection_same(conn, &tcp->connection);
+ if (!same) {
+ continue;
+ }
+
+ TALLOC_FREE(tcp);
+ found = true;
+ }
+
+ return found;
+}
+
+/*
+ called by a client to inform us of a TCP connection that was disconnected
+ */
+int32_t ctdb_control_tcp_client_disconnected(struct ctdb_context *ctdb,
+ uint32_t client_id,
+ TDB_DATA indata)
+{
+ struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
+ struct ctdb_connection *tcp_sock = NULL;
+ int ret;
+ TDB_DATA data;
+ char conn_str[132] = { 0, };
+ bool found = false;
+
+ tcp_sock = (struct ctdb_connection *)indata.dptr;
+
+ ctdb_canonicalize_ip_inplace(&tcp_sock->src);
+ ctdb_canonicalize_ip_inplace(&tcp_sock->dst);
+
+ ret = ctdb_connection_to_buf(conn_str,
+ sizeof(conn_str),
+ tcp_sock,
+ false,
+ " -> ");
+ if (ret != 0) {
+ strlcpy(conn_str, "UNKNOWN", sizeof(conn_str));
+ }
+
+ found = ctdb_client_remove_tcp(client, tcp_sock);
+ if (!found) {
+ DBG_DEBUG("TCP connection %s not found "
+ "(client_id %u pid %u).\n",
+ conn_str, client_id, client->pid);
+ return 0;
+ }
+
+ D_INFO("deregistered TCP connection %s "
+ "(client_id %u pid %u)\n",
+ conn_str, client_id, client->pid);
+
+ data.dptr = (uint8_t *)tcp_sock;
+ data.dsize = sizeof(*tcp_sock);
+
+ /* tell all nodes about this tcp connection is gone */
+ ret = ctdb_daemon_send_control(ctdb,
+ CTDB_BROADCAST_CONNECTED,
+ 0,
+ CTDB_CONTROL_TCP_REMOVE,
+ 0,
+ CTDB_CTRL_FLAG_NOREPLY,
+ data,
+ NULL,
+ NULL);
+ if (ret != 0) {
+ DBG_ERR("Failed to send CTDB_CONTROL_TCP_REMOVE: %s\n",
+ conn_str);
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ called by a client to inform us of a TCP connection was passed to a different
+ "client" (typically with multichannel to another smbd process).
+ */
+int32_t ctdb_control_tcp_client_passed(struct ctdb_context *ctdb,
+ uint32_t client_id,
+ TDB_DATA indata)
+{
+ struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
+ struct ctdb_connection *tcp_sock = NULL;
+ int ret;
+ char conn_str[132] = { 0, };
+ bool found = false;
+
+ tcp_sock = (struct ctdb_connection *)indata.dptr;
+
+ ctdb_canonicalize_ip_inplace(&tcp_sock->src);
+ ctdb_canonicalize_ip_inplace(&tcp_sock->dst);
+
+ ret = ctdb_connection_to_buf(conn_str,
+ sizeof(conn_str),
+ tcp_sock,
+ false,
+ " -> ");
+ if (ret != 0) {
+ strlcpy(conn_str, "UNKNOWN", sizeof(conn_str));
+ }
+
+ found = ctdb_client_remove_tcp(client, tcp_sock);
+ if (!found) {
+ DBG_DEBUG("TCP connection from %s not found "
+ "(client_id %u pid %u).\n",
+ conn_str, client_id, client->pid);
+ return 0;
+ }
+
+ D_INFO("TCP connection from %s "
+ "(client_id %u pid %u) passed to another client\n",
+ conn_str, client_id, client->pid);
+
+ /*
+ * We don't call CTDB_CONTROL_TCP_REMOVE
+ * nor ctdb_remove_connection() as the connection
+ * is still alive, but handled by another client
+ */
+
+ return 0;
+}
+
+/*
+ find a tcp address on a list
+ */
+static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
+ struct ctdb_connection *tcp)
+{
+ unsigned int i;
+
+ if (array == NULL) {
+ return NULL;
+ }
+
+ for (i=0;i<array->num;i++) {
+ if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
+ ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
+ return &array->connections[i];
+ }
+ }
+ return NULL;
+}
+
+
+
+/*
+ called by a daemon to inform us of a TCP connection that one of its
+ clients managing that should tickled with an ACK when IP takeover is
+ done
+ */
+int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
+{
+ struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
+ struct ctdb_tcp_array *tcparray;
+ struct ctdb_connection tcp;
+ struct ctdb_vnn *vnn;
+
+ /* If we don't have public IPs, tickles are useless */
+ if (ctdb->vnn == NULL) {
+ return 0;
+ }
+
+ vnn = find_public_ip_vnn(ctdb, &p->dst);
+ if (vnn == NULL) {
+ DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
+ ctdb_addr_to_str(&p->dst)));
+
+ return -1;
+ }
+
+
+ tcparray = vnn->tcp_array;
+
+ /* If this is the first tickle */
+ if (tcparray == NULL) {
+ tcparray = talloc(vnn, struct ctdb_tcp_array);
+ CTDB_NO_MEMORY(ctdb, tcparray);
+ vnn->tcp_array = tcparray;
+
+ tcparray->num = 0;
+ tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
+ CTDB_NO_MEMORY(ctdb, tcparray->connections);
+
+ tcparray->connections[tcparray->num].src = p->src;
+ tcparray->connections[tcparray->num].dst = p->dst;
+ tcparray->num++;
+
+ if (tcp_update_needed) {
+ vnn->tcp_update_needed = true;
+ }
+ return 0;
+ }
+
+
+ /* Do we already have this tickle ?*/
+ tcp.src = p->src;
+ tcp.dst = p->dst;
+ if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
+ DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
+ ctdb_addr_to_str(&tcp.dst),
+ ntohs(tcp.dst.ip.sin_port),
+ vnn->pnn));
+ return 0;
+ }
+
+ /* A new tickle, we must add it to the array */
+ tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
+ struct ctdb_connection,
+ tcparray->num+1);
+ CTDB_NO_MEMORY(ctdb, tcparray->connections);
+
+ tcparray->connections[tcparray->num].src = p->src;
+ tcparray->connections[tcparray->num].dst = p->dst;
+ tcparray->num++;
+
+ DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
+ ctdb_addr_to_str(&tcp.dst),
+ ntohs(tcp.dst.ip.sin_port),
+ vnn->pnn));
+
+ if (tcp_update_needed) {
+ vnn->tcp_update_needed = true;
+ }
+
+ return 0;
+}
+
+
+static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
+{
+ struct ctdb_connection *tcpp;
+
+ if (vnn == NULL) {
+ return;
+ }
+
+ /* if the array is empty we can't remove it
+ and we don't need to do anything
+ */
+ if (vnn->tcp_array == NULL) {
+ DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist (array is empty) %s:%u\n",
+ ctdb_addr_to_str(&conn->dst),
+ ntohs(conn->dst.ip.sin_port)));
+ return;
+ }
+
+
+ /* See if we know this connection
+ if we don't know this connection then we don't need to do anything
+ */
+ tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
+ if (tcpp == NULL) {
+ DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist %s:%u\n",
+ ctdb_addr_to_str(&conn->dst),
+ ntohs(conn->dst.ip.sin_port)));
+ return;
+ }
+
+
+ /* We need to remove this entry from the array.
+ Instead of allocating a new array and copying data to it
+ we cheat and just copy the last entry in the existing array
+ to the entry that is to be removed and just shring the
+ ->num field
+ */
+ *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
+ vnn->tcp_array->num--;
+
+ /* If we deleted the last entry we also need to remove the entire array
+ */
+ if (vnn->tcp_array->num == 0) {
+ talloc_free(vnn->tcp_array);
+ vnn->tcp_array = NULL;
+ }
+
+ vnn->tcp_update_needed = true;
+
+ DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
+ ctdb_addr_to_str(&conn->src),
+ ntohs(conn->src.ip.sin_port)));
+}
+
+
+/*
+ called by a daemon to inform us of a TCP connection that one of its
+ clients used are no longer needed in the tickle database
+ */
+int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
+{
+ struct ctdb_vnn *vnn;
+ struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
+
+ /* If we don't have public IPs, tickles are useless */
+ if (ctdb->vnn == NULL) {
+ return 0;
+ }
+
+ vnn = find_public_ip_vnn(ctdb, &conn->dst);
+ if (vnn == NULL) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " unable to find public address %s\n",
+ ctdb_addr_to_str(&conn->dst)));
+ return 0;
+ }
+
+ ctdb_remove_connection(vnn, conn);
+
+ return 0;
+}
+
+
+static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb,
+ bool force);
+
+/*
+ Called when another daemon starts - causes all tickles for all
+ public addresses we are serving to be sent to the new node on the
+ next check. This actually causes the tickles to be sent to the
+ other node immediately. In case there is an error, the periodic
+ timer will send the updates on timer event. This is simple and
+ doesn't require careful error handling.
+ */
+int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
+{
+ DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
+ (unsigned long) pnn));
+
+ ctdb_send_set_tcp_tickles_for_all(ctdb, true);
+ return 0;
+}
+
+
+/*
+ called when a client structure goes away - hook to remove
+ elements from the tcp_list in all daemons
+ */
+void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
+{
+ while (client->tcp_list) {
+ struct ctdb_vnn *vnn;
+ struct ctdb_tcp_list *tcp = client->tcp_list;
+ struct ctdb_connection *conn = &tcp->connection;
+
+ vnn = find_public_ip_vnn(client->ctdb,
+ &conn->dst);
+
+ /* If the IP address is hosted on this node then
+ * remove the connection. */
+ if (vnn != NULL && vnn->pnn == client->ctdb->pnn) {
+ ctdb_remove_connection(vnn, conn);
+ }
+
+ /* Otherwise this function has been called because the
+ * server IP address has been released to another node
+ * and the client has exited. This means that we
+ * should not delete the connection information. The
+ * takeover node processes connections too. */
+
+ /*
+ * The destructor removes from the list
+ */
+ TALLOC_FREE(tcp);
+ }
+}
+
+
+void ctdb_release_all_ips(struct ctdb_context *ctdb)
+{
+ struct ctdb_vnn *vnn, *next;
+ int count = 0;
+
+ if (ctdb_config.failover_disabled == 1) {
+ return;
+ }
+
+ for (vnn = ctdb->vnn; vnn != NULL; vnn = next) {
+ /* vnn can be freed below in release_ip_post() */
+ next = vnn->next;
+
+ if (!ctdb_sys_have_ip(&vnn->public_address)) {
+ ctdb_vnn_unassign_iface(ctdb, vnn);
+ continue;
+ }
+
+ /* Don't allow multiple releases at once. Some code,
+ * particularly ctdb_tickle_sentenced_connections() is
+ * not re-entrant */
+ if (vnn->update_in_flight) {
+ DEBUG(DEBUG_WARNING,
+ (__location__
+ " Not releasing IP %s/%u on interface %s, an update is already in progress\n",
+ ctdb_addr_to_str(&vnn->public_address),
+ vnn->public_netmask_bits,
+ ctdb_vnn_iface_string(vnn)));
+ continue;
+ }
+ vnn->update_in_flight = true;
+
+ DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
+ ctdb_addr_to_str(&vnn->public_address),
+ vnn->public_netmask_bits,
+ ctdb_vnn_iface_string(vnn)));
+
+ ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
+ ctdb_vnn_iface_string(vnn),
+ ctdb_addr_to_str(&vnn->public_address),
+ vnn->public_netmask_bits);
+ /* releaseip timeouts are converted to success, so to
+ * detect failures just check if the IP address is
+ * still there...
+ */
+ if (ctdb_sys_have_ip(&vnn->public_address)) {
+ DEBUG(DEBUG_ERR,
+ (__location__
+ " IP address %s not released\n",
+ ctdb_addr_to_str(&vnn->public_address)));
+ vnn->update_in_flight = false;
+ continue;
+ }
+
+ vnn = release_ip_post(ctdb, vnn, &vnn->public_address);
+ if (vnn != NULL) {
+ vnn->update_in_flight = false;
+ }
+ count++;
+ }
+
+ DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
+}
+
+
+/*
+ get list of public IPs
+ */
+int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c, TDB_DATA *outdata)
+{
+ int i, num, len;
+ struct ctdb_public_ip_list_old *ips;
+ struct ctdb_vnn *vnn;
+ bool only_available = false;
+
+ if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
+ only_available = true;
+ }
+
+ /* count how many public ip structures we have */
+ num = 0;
+ for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
+ num++;
+ }
+
+ len = offsetof(struct ctdb_public_ip_list_old, ips) +
+ num*sizeof(struct ctdb_public_ip);
+ ips = talloc_zero_size(outdata, len);
+ CTDB_NO_MEMORY(ctdb, ips);
+
+ i = 0;
+ for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
+ if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
+ continue;
+ }
+ ips->ips[i].pnn = vnn->pnn;
+ ips->ips[i].addr = vnn->public_address;
+ i++;
+ }
+ ips->num = i;
+ len = offsetof(struct ctdb_public_ip_list_old, ips) +
+ i*sizeof(struct ctdb_public_ip);
+
+ outdata->dsize = len;
+ outdata->dptr = (uint8_t *)ips;
+
+ return 0;
+}
+
+
+int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA indata,
+ TDB_DATA *outdata)
+{
+ int i, num, len;
+ ctdb_sock_addr *addr;
+ struct ctdb_public_ip_info_old *info;
+ struct ctdb_vnn *vnn;
+ struct vnn_interface *iface;
+
+ addr = (ctdb_sock_addr *)indata.dptr;
+
+ vnn = find_public_ip_vnn(ctdb, addr);
+ if (vnn == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
+ "'%s'not a public address\n",
+ ctdb_addr_to_str(addr)));
+ return -1;
+ }
+
+ /* count how many public ip structures we have */
+ num = 0;
+ for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
+ num++;
+ }
+
+ len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
+ num*sizeof(struct ctdb_iface);
+ info = talloc_zero_size(outdata, len);
+ CTDB_NO_MEMORY(ctdb, info);
+
+ info->ip.addr = vnn->public_address;
+ info->ip.pnn = vnn->pnn;
+ info->active_idx = 0xFFFFFFFF;
+
+ i = 0;
+ for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
+ struct ctdb_interface *cur;
+
+ cur = iface->iface;
+ if (vnn->iface == cur) {
+ info->active_idx = i;
+ }
+ strncpy(info->ifaces[i].name, cur->name,
+ sizeof(info->ifaces[i].name));
+ info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
+ info->ifaces[i].link_state = cur->link_up;
+ info->ifaces[i].references = cur->references;
+
+ i++;
+ }
+ info->num = i;
+ len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
+ i*sizeof(struct ctdb_iface);
+
+ outdata->dsize = len;
+ outdata->dptr = (uint8_t *)info;
+
+ return 0;
+}
+
+int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA *outdata)
+{
+ int i, num, len;
+ struct ctdb_iface_list_old *ifaces;
+ struct ctdb_interface *cur;
+
+ /* count how many public ip structures we have */
+ num = 0;
+ for (cur=ctdb->ifaces;cur;cur=cur->next) {
+ num++;
+ }
+
+ len = offsetof(struct ctdb_iface_list_old, ifaces) +
+ num*sizeof(struct ctdb_iface);
+ ifaces = talloc_zero_size(outdata, len);
+ CTDB_NO_MEMORY(ctdb, ifaces);
+
+ i = 0;
+ for (cur=ctdb->ifaces;cur;cur=cur->next) {
+ strncpy(ifaces->ifaces[i].name, cur->name,
+ sizeof(ifaces->ifaces[i].name));
+ ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
+ ifaces->ifaces[i].link_state = cur->link_up;
+ ifaces->ifaces[i].references = cur->references;
+ i++;
+ }
+ ifaces->num = i;
+ len = offsetof(struct ctdb_iface_list_old, ifaces) +
+ i*sizeof(struct ctdb_iface);
+
+ outdata->dsize = len;
+ outdata->dptr = (uint8_t *)ifaces;
+
+ return 0;
+}
+
+int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA indata)
+{
+ struct ctdb_iface *info;
+ struct ctdb_interface *iface;
+ bool link_up = false;
+
+ info = (struct ctdb_iface *)indata.dptr;
+
+ if (info->name[CTDB_IFACE_SIZE] != '\0') {
+ int len = strnlen(info->name, CTDB_IFACE_SIZE);
+ DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
+ len, len, info->name));
+ return -1;
+ }
+
+ switch (info->link_state) {
+ case 0:
+ link_up = false;
+ break;
+ case 1:
+ link_up = true;
+ break;
+ default:
+ DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
+ (unsigned int)info->link_state));
+ return -1;
+ }
+
+ if (info->references != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
+ (unsigned int)info->references));
+ return -1;
+ }
+
+ iface = ctdb_find_iface(ctdb, info->name);
+ if (iface == NULL) {
+ return -1;
+ }
+
+ if (link_up == iface->link_up) {
+ return 0;
+ }
+
+ DEBUG(DEBUG_ERR,
+ ("iface[%s] has changed it's link status %s => %s\n",
+ iface->name,
+ iface->link_up?"up":"down",
+ link_up?"up":"down"));
+
+ iface->link_up = link_up;
+ return 0;
+}
+
+
+/*
+ called by a daemon to inform us of the entire list of TCP tickles for
+ a particular public address.
+ this control should only be sent by the node that is currently serving
+ that public address.
+ */
+int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
+{
+ struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
+ struct ctdb_tcp_array *tcparray;
+ struct ctdb_vnn *vnn;
+
+ /* We must at least have tickles.num or else we can't verify the size
+ of the received data blob
+ */
+ if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
+ DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
+ return -1;
+ }
+
+ /* verify that the size of data matches what we expect */
+ if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
+ + sizeof(struct ctdb_connection) * list->num) {
+ DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
+ return -1;
+ }
+
+ DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
+ ctdb_addr_to_str(&list->addr)));
+
+ vnn = find_public_ip_vnn(ctdb, &list->addr);
+ if (vnn == NULL) {
+ DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
+ ctdb_addr_to_str(&list->addr)));
+
+ return 1;
+ }
+
+ if (vnn->pnn == ctdb->pnn) {
+ DEBUG(DEBUG_INFO,
+ ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
+ ctdb_addr_to_str(&list->addr)));
+ return 0;
+ }
+
+ /* remove any old ticklelist we might have */
+ talloc_free(vnn->tcp_array);
+ vnn->tcp_array = NULL;
+
+ tcparray = talloc(vnn, struct ctdb_tcp_array);
+ CTDB_NO_MEMORY(ctdb, tcparray);
+
+ tcparray->num = list->num;
+
+ tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
+ CTDB_NO_MEMORY(ctdb, tcparray->connections);
+
+ memcpy(tcparray->connections, &list->connections[0],
+ sizeof(struct ctdb_connection)*tcparray->num);
+
+ /* We now have a new fresh tickle list array for this vnn */
+ vnn->tcp_array = tcparray;
+
+ return 0;
+}
+
+/*
+ called to return the full list of tickles for the puclic address associated
+ with the provided vnn
+ */
+int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
+{
+ ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
+ struct ctdb_tickle_list_old *list;
+ struct ctdb_tcp_array *tcparray;
+ unsigned int num, i;
+ struct ctdb_vnn *vnn;
+ unsigned port;
+
+ vnn = find_public_ip_vnn(ctdb, addr);
+ if (vnn == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
+ ctdb_addr_to_str(addr)));
+
+ return 1;
+ }
+
+ port = ctdb_addr_to_port(addr);
+
+ tcparray = vnn->tcp_array;
+ num = 0;
+ if (tcparray != NULL) {
+ if (port == 0) {
+ /* All connections */
+ num = tcparray->num;
+ } else {
+ /* Count connections for port */
+ for (i = 0; i < tcparray->num; i++) {
+ if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
+ num++;
+ }
+ }
+ }
+ }
+
+ outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
+ + sizeof(struct ctdb_connection) * num;
+
+ outdata->dptr = talloc_size(outdata, outdata->dsize);
+ CTDB_NO_MEMORY(ctdb, outdata->dptr);
+ list = (struct ctdb_tickle_list_old *)outdata->dptr;
+
+ list->addr = *addr;
+ list->num = num;
+
+ if (num == 0) {
+ return 0;
+ }
+
+ num = 0;
+ for (i = 0; i < tcparray->num; i++) {
+ if (port == 0 || \
+ port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
+ list->connections[num] = tcparray->connections[i];
+ num++;
+ }
+ }
+
+ return 0;
+}
+
+
+/*
+ set the list of all tcp tickles for a public address
+ */
+static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
+ ctdb_sock_addr *addr,
+ struct ctdb_tcp_array *tcparray)
+{
+ int ret, num;
+ TDB_DATA data;
+ struct ctdb_tickle_list_old *list;
+
+ if (tcparray) {
+ num = tcparray->num;
+ } else {
+ num = 0;
+ }
+
+ data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
+ sizeof(struct ctdb_connection) * num;
+ data.dptr = talloc_size(ctdb, data.dsize);
+ CTDB_NO_MEMORY(ctdb, data.dptr);
+
+ list = (struct ctdb_tickle_list_old *)data.dptr;
+ list->addr = *addr;
+ list->num = num;
+ if (tcparray) {
+ memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
+ }
+
+ ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
+ CTDB_CONTROL_SET_TCP_TICKLE_LIST,
+ 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
+ return -1;
+ }
+
+ talloc_free(data.dptr);
+
+ return ret;
+}
+
+static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb,
+ bool force)
+{
+ struct ctdb_vnn *vnn;
+ int ret;
+
+ for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
+ /* we only send out updates for public addresses that
+ we have taken over
+ */
+ if (ctdb->pnn != vnn->pnn) {
+ continue;
+ }
+
+ /* We only send out the updates if we need to */
+ if (!force && !vnn->tcp_update_needed) {
+ continue;
+ }
+
+ ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
+ &vnn->public_address,
+ vnn->tcp_array);
+ if (ret != 0) {
+ D_ERR("Failed to send the tickle update for ip %s\n",
+ ctdb_addr_to_str(&vnn->public_address));
+ vnn->tcp_update_needed = true;
+ } else {
+ D_INFO("Sent tickle update for ip %s\n",
+ ctdb_addr_to_str(&vnn->public_address));
+ vnn->tcp_update_needed = false;
+ }
+ }
+
+}
+
+/*
+ perform tickle updates if required
+ */
+static void ctdb_update_tcp_tickles(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_context *ctdb = talloc_get_type(
+ private_data, struct ctdb_context);
+
+ ctdb_send_set_tcp_tickles_for_all(ctdb, false);
+
+ tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
+ timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
+ ctdb_update_tcp_tickles, ctdb);
+}
+
+/*
+ start periodic update of tcp tickles
+ */
+void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
+{
+ ctdb->tickle_update_context = talloc_new(ctdb);
+
+ tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
+ timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
+ ctdb_update_tcp_tickles, ctdb);
+}
+
+
+
+
+struct control_gratious_arp {
+ struct ctdb_context *ctdb;
+ ctdb_sock_addr addr;
+ const char *iface;
+ int count;
+};
+
+/*
+ send a control_gratuitous arp
+ */
+static void send_gratious_arp(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ int ret;
+ struct control_gratious_arp *arp = talloc_get_type(private_data,
+ struct control_gratious_arp);
+
+ ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
+ if (ret != 0) {
+ DBG_ERR("Failed to send gratuitous ARP on iface %s: %s\n",
+ arp->iface, strerror(ret));
+ }
+
+
+ arp->count++;
+ if (arp->count == CTDB_ARP_REPEAT) {
+ talloc_free(arp);
+ return;
+ }
+
+ tevent_add_timer(arp->ctdb->ev, arp,
+ timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
+ send_gratious_arp, arp);
+}
+
+
+/*
+ send a gratious arp
+ */
+int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
+{
+ struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
+ struct control_gratious_arp *arp;
+
+ /* verify the size of indata */
+ if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
+ DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
+ (unsigned)indata.dsize,
+ (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
+ return -1;
+ }
+ if (indata.dsize !=
+ ( offsetof(struct ctdb_addr_info_old, iface)
+ + gratious_arp->len ) ){
+
+ DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
+ "but should be %u bytes\n",
+ (unsigned)indata.dsize,
+ (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
+ return -1;
+ }
+
+
+ arp = talloc(ctdb, struct control_gratious_arp);
+ CTDB_NO_MEMORY(ctdb, arp);
+
+ arp->ctdb = ctdb;
+ arp->addr = gratious_arp->addr;
+ arp->iface = talloc_strdup(arp, gratious_arp->iface);
+ CTDB_NO_MEMORY(ctdb, arp->iface);
+ arp->count = 0;
+
+ tevent_add_timer(arp->ctdb->ev, arp,
+ timeval_zero(), send_gratious_arp, arp);
+
+ return 0;
+}
+
+int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
+{
+ struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
+ int ret;
+
+ /* verify the size of indata */
+ if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
+ DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
+ return -1;
+ }
+ if (indata.dsize !=
+ ( offsetof(struct ctdb_addr_info_old, iface)
+ + pub->len ) ){
+
+ DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
+ "but should be %u bytes\n",
+ (unsigned)indata.dsize,
+ (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
+ return -1;
+ }
+
+ DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
+
+ ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
+
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
+ return -1;
+ }
+
+ return 0;
+}
+
+int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
+{
+ struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
+ struct ctdb_vnn *vnn;
+
+ /* verify the size of indata */
+ if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
+ DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
+ return -1;
+ }
+ if (indata.dsize !=
+ ( offsetof(struct ctdb_addr_info_old, iface)
+ + pub->len ) ){
+
+ DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
+ "but should be %u bytes\n",
+ (unsigned)indata.dsize,
+ (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
+ return -1;
+ }
+
+ DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
+
+ /* walk over all public addresses until we find a match */
+ for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
+ if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
+ if (vnn->pnn == ctdb->pnn) {
+ /* This IP is currently being hosted.
+ * Defer the deletion until the next
+ * takeover run. "ctdb reloadips" will
+ * always cause a takeover run. "ctdb
+ * delip" will now need an explicit
+ * "ctdb ipreallocated" afterwards. */
+ vnn->delete_pending = true;
+ } else {
+ /* This IP is not hosted on the
+ * current node so just delete it
+ * now. */
+ do_delete_ip(ctdb, vnn);
+ }
+
+ return 0;
+ }
+ }
+
+ DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
+ ctdb_addr_to_str(&pub->addr)));
+ return -1;
+}
+
+
+struct ipreallocated_callback_state {
+ struct ctdb_req_control_old *c;
+};
+
+static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
+ int status, void *p)
+{
+ struct ipreallocated_callback_state *state =
+ talloc_get_type(p, struct ipreallocated_callback_state);
+ TDB_DATA data = { .dsize = 0, };
+
+ if (status != 0) {
+ DEBUG(DEBUG_ERR,
+ (" \"ipreallocated\" event script failed (status %d)\n",
+ status));
+ if (status == -ETIMEDOUT) {
+ ctdb_ban_self(ctdb);
+ }
+ }
+
+ D_INFO("Sending IPREALLOCATED message\n");
+ ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_IPREALLOCATED, data);
+
+ ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
+ talloc_free(state);
+}
+
+/* A control to run the ipreallocated event */
+int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ bool *async_reply)
+{
+ int ret;
+ struct ipreallocated_callback_state *state;
+
+ state = talloc(ctdb, struct ipreallocated_callback_state);
+ CTDB_NO_MEMORY(ctdb, state);
+
+ DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
+
+ ret = ctdb_event_script_callback(ctdb, state,
+ ctdb_ipreallocated_callback, state,
+ CTDB_EVENT_IPREALLOCATED,
+ "%s", "");
+
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
+ talloc_free(state);
+ return -1;
+ }
+
+ /* tell the control that we will be reply asynchronously */
+ state->c = talloc_steal(state, c);
+ *async_reply = true;
+
+ return 0;
+}
+
+
+struct ctdb_reloadips_handle {
+ struct ctdb_context *ctdb;
+ struct ctdb_req_control_old *c;
+ int status;
+ int fd[2];
+ pid_t child;
+ struct tevent_fd *fde;
+};
+
+static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
+{
+ if (h == h->ctdb->reload_ips) {
+ h->ctdb->reload_ips = NULL;
+ }
+ if (h->c != NULL) {
+ ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
+ h->c = NULL;
+ }
+ ctdb_kill(h->ctdb, h->child, SIGKILL);
+ return 0;
+}
+
+static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
+
+ talloc_free(h);
+}
+
+static void ctdb_reloadips_child_handler(struct tevent_context *ev,
+ struct tevent_fd *fde,
+ uint16_t flags, void *private_data)
+{
+ struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
+
+ char res;
+ int ret;
+
+ ret = sys_read(h->fd[0], &res, 1);
+ if (ret < 1 || res != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
+ res = 1;
+ }
+ h->status = res;
+
+ talloc_free(h);
+}
+
+static int ctdb_reloadips_child(struct ctdb_context *ctdb)
+{
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+ struct ctdb_public_ip_list_old *ips;
+ struct ctdb_vnn *vnn;
+ struct client_async_data *async_data;
+ struct timeval timeout;
+ TDB_DATA data;
+ struct ctdb_client_control_state *state;
+ bool first_add;
+ unsigned int i;
+ int ret;
+
+ CTDB_NO_MEMORY(ctdb, mem_ctx);
+
+ /* Read IPs from local node */
+ ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
+ CTDB_CURRENT_NODE, mem_ctx, &ips);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Unable to fetch public IPs from local node\n"));
+ talloc_free(mem_ctx);
+ return -1;
+ }
+
+ /* Read IPs file - this is safe since this is a child process */
+ ctdb->vnn = NULL;
+ if (ctdb_set_public_addresses(ctdb, false) != 0) {
+ DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
+ talloc_free(mem_ctx);
+ return -1;
+ }
+
+ async_data = talloc_zero(mem_ctx, struct client_async_data);
+ CTDB_NO_MEMORY(ctdb, async_data);
+
+ /* Compare IPs between node and file for IPs to be deleted */
+ for (i = 0; i < ips->num; i++) {
+ /* */
+ for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
+ if (ctdb_same_ip(&vnn->public_address,
+ &ips->ips[i].addr)) {
+ /* IP is still in file */
+ break;
+ }
+ }
+
+ if (vnn == NULL) {
+ /* Delete IP ips->ips[i] */
+ struct ctdb_addr_info_old *pub;
+
+ DEBUG(DEBUG_NOTICE,
+ ("IP %s no longer configured, deleting it\n",
+ ctdb_addr_to_str(&ips->ips[i].addr)));
+
+ pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
+ CTDB_NO_MEMORY(ctdb, pub);
+
+ pub->addr = ips->ips[i].addr;
+ pub->mask = 0;
+ pub->len = 0;
+
+ timeout = TAKEOVER_TIMEOUT();
+
+ data.dsize = offsetof(struct ctdb_addr_info_old,
+ iface) + pub->len;
+ data.dptr = (uint8_t *)pub;
+
+ state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
+ CTDB_CONTROL_DEL_PUBLIC_IP,
+ 0, data, async_data,
+ &timeout, NULL);
+ if (state == NULL) {
+ DEBUG(DEBUG_ERR,
+ (__location__
+ " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
+ goto failed;
+ }
+
+ ctdb_client_async_add(async_data, state);
+ }
+ }
+
+ /* Compare IPs between node and file for IPs to be added */
+ first_add = true;
+ for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
+ for (i = 0; i < ips->num; i++) {
+ if (ctdb_same_ip(&vnn->public_address,
+ &ips->ips[i].addr)) {
+ /* IP already on node */
+ break;
+ }
+ }
+ if (i == ips->num) {
+ /* Add IP ips->ips[i] */
+ struct ctdb_addr_info_old *pub;
+ const char *ifaces = NULL;
+ uint32_t len;
+ struct vnn_interface *iface = NULL;
+
+ DEBUG(DEBUG_NOTICE,
+ ("New IP %s configured, adding it\n",
+ ctdb_addr_to_str(&vnn->public_address)));
+ if (first_add) {
+ uint32_t pnn = ctdb_get_pnn(ctdb);
+
+ data.dsize = sizeof(pnn);
+ data.dptr = (uint8_t *)&pnn;
+
+ ret = ctdb_client_send_message(
+ ctdb,
+ CTDB_BROADCAST_CONNECTED,
+ CTDB_SRVID_REBALANCE_NODE,
+ data);
+ if (ret != 0) {
+ DEBUG(DEBUG_WARNING,
+ ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
+ }
+
+ first_add = false;
+ }
+
+ ifaces = vnn->ifaces->iface->name;
+ iface = vnn->ifaces->next;
+ while (iface != NULL) {
+ ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
+ iface->iface->name);
+ iface = iface->next;
+ }
+
+ len = strlen(ifaces) + 1;
+ pub = talloc_zero_size(mem_ctx,
+ offsetof(struct ctdb_addr_info_old, iface) + len);
+ CTDB_NO_MEMORY(ctdb, pub);
+
+ pub->addr = vnn->public_address;
+ pub->mask = vnn->public_netmask_bits;
+ pub->len = len;
+ memcpy(&pub->iface[0], ifaces, pub->len);
+
+ timeout = TAKEOVER_TIMEOUT();
+
+ data.dsize = offsetof(struct ctdb_addr_info_old,
+ iface) + pub->len;
+ data.dptr = (uint8_t *)pub;
+
+ state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
+ CTDB_CONTROL_ADD_PUBLIC_IP,
+ 0, data, async_data,
+ &timeout, NULL);
+ if (state == NULL) {
+ DEBUG(DEBUG_ERR,
+ (__location__
+ " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
+ goto failed;
+ }
+
+ ctdb_client_async_add(async_data, state);
+ }
+ }
+
+ if (ctdb_client_async_wait(ctdb, async_data) != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
+ goto failed;
+ }
+
+ talloc_free(mem_ctx);
+ return 0;
+
+failed:
+ talloc_free(mem_ctx);
+ return -1;
+}
+
+/* This control is sent to force the node to re-read the public addresses file
+ and drop any addresses we should nnot longer host, and add new addresses
+ that we are now able to host
+*/
+int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
+{
+ struct ctdb_reloadips_handle *h;
+ pid_t parent = getpid();
+
+ if (ctdb->reload_ips != NULL) {
+ talloc_free(ctdb->reload_ips);
+ ctdb->reload_ips = NULL;
+ }
+
+ h = talloc(ctdb, struct ctdb_reloadips_handle);
+ CTDB_NO_MEMORY(ctdb, h);
+ h->ctdb = ctdb;
+ h->c = NULL;
+ h->status = -1;
+
+ if (pipe(h->fd) == -1) {
+ DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
+ talloc_free(h);
+ return -1;
+ }
+
+ h->child = ctdb_fork(ctdb);
+ if (h->child == (pid_t)-1) {
+ DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
+ close(h->fd[0]);
+ close(h->fd[1]);
+ talloc_free(h);
+ return -1;
+ }
+
+ /* child process */
+ if (h->child == 0) {
+ signed char res = 0;
+
+ close(h->fd[0]);
+
+ prctl_set_comment("ctdb_reloadips");
+ if (switch_from_server_to_client(ctdb) != 0) {
+ DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
+ res = -1;
+ } else {
+ res = ctdb_reloadips_child(ctdb);
+ if (res != 0) {
+ DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
+ }
+ }
+
+ sys_write(h->fd[1], &res, 1);
+ ctdb_wait_for_process_to_exit(parent);
+ _exit(0);
+ }
+
+ h->c = talloc_steal(h, c);
+
+ close(h->fd[1]);
+ set_close_on_exec(h->fd[0]);
+
+ talloc_set_destructor(h, ctdb_reloadips_destructor);
+
+
+ h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
+ ctdb_reloadips_child_handler, (void *)h);
+ tevent_fd_set_auto_close(h->fde);
+
+ tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
+ ctdb_reloadips_timeout_event, h);
+
+ /* we reply later */
+ *async_reply = true;
+ return 0;
+}
diff --git a/ctdb/server/ctdb_takeover_helper.c b/ctdb/server/ctdb_takeover_helper.c
new file mode 100644
index 0000000..c088970
--- /dev/null
+++ b/ctdb/server/ctdb_takeover_helper.c
@@ -0,0 +1,1276 @@
+/*
+ CTDB IP takeover helper
+
+ Copyright (C) Martin Schwenke 2016
+
+ Based on ctdb_recovery_helper.c
+ Copyright (C) Amitay Isaacs 2015
+
+ and ctdb_takeover.c
+ Copyright (C) Ronnie Sahlberg 2007
+ Copyright (C) Andrew Tridgell 2007
+ Copyright (C) Martin Schwenke 2011
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+
+#include <popt.h>
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/debug.h"
+#include "lib/util/strv.h"
+#include "lib/util/strv_util.h"
+#include "lib/util/sys_rw.h"
+#include "lib/util/time.h"
+#include "lib/util/tevent_unix.h"
+
+#include "protocol/protocol.h"
+#include "protocol/protocol_api.h"
+#include "protocol/protocol_util.h"
+#include "client/client.h"
+
+#include "common/logging.h"
+
+#include "server/ipalloc.h"
+
+static int takeover_timeout = 9;
+
+#define TIMEOUT() timeval_current_ofs(takeover_timeout, 0)
+
+/*
+ * Utility functions
+ */
+
+static bool generic_recv(struct tevent_req *req, int *perr)
+{
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+static enum ipalloc_algorithm
+determine_algorithm(const struct ctdb_tunable_list *tunables)
+{
+ switch (tunables->ip_alloc_algorithm) {
+ case 0:
+ return IPALLOC_DETERMINISTIC;
+ case 1:
+ return IPALLOC_NONDETERMINISTIC;
+ case 2:
+ return IPALLOC_LCP2;
+ default:
+ return IPALLOC_LCP2;
+ };
+}
+
+/**********************************************************************/
+
+struct get_public_ips_state {
+ uint32_t *pnns;
+ int count;
+ struct ctdb_public_ip_list *ips;
+ uint32_t *ban_credits;
+};
+
+static void get_public_ips_done(struct tevent_req *subreq);
+
+static struct tevent_req *get_public_ips_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t *pnns,
+ int count, int num_nodes,
+ uint32_t *ban_credits,
+ bool available_only)
+{
+ struct tevent_req *req, *subreq;
+ struct get_public_ips_state *state;
+ struct ctdb_req_control request;
+
+ req = tevent_req_create(mem_ctx, &state, struct get_public_ips_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->pnns = pnns;
+ state->count = count;
+ state->ban_credits = ban_credits;
+
+ state->ips = talloc_zero_array(state,
+ struct ctdb_public_ip_list,
+ num_nodes);
+ if (tevent_req_nomem(state->ips, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ /* Short circuit if no nodes being asked for IPs */
+ if (state->count == 0) {
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+ }
+
+ ctdb_req_control_get_public_ips(&request, available_only);
+ subreq = ctdb_client_control_multi_send(mem_ctx, ev, client,
+ state->pnns,
+ state->count,
+ TIMEOUT(), &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, get_public_ips_done, req);
+
+ return req;
+}
+
+static void get_public_ips_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct get_public_ips_state *state = tevent_req_data(
+ req, struct get_public_ips_state);
+ struct ctdb_reply_control **reply;
+ int *err_list;
+ int ret, i;
+ bool status, found_errors;
+
+ status = ctdb_client_control_multi_recv(subreq, &ret, state, &err_list,
+ &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ for (i = 0; i < state->count; i++) {
+ if (err_list[i] != 0) {
+ uint32_t pnn = state->pnns[i];
+
+ D_ERR("control GET_PUBLIC_IPS failed on "
+ "node %u, ret=%d\n", pnn, err_list[i]);
+
+ state->ban_credits[pnn]++;
+ }
+ }
+
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ found_errors = false;
+ for (i = 0; i < state->count; i++) {
+ uint32_t pnn;
+ struct ctdb_public_ip_list *ips;
+
+ pnn = state->pnns[i];
+ ret = ctdb_reply_control_get_public_ips(reply[i], state->ips,
+ &ips);
+ if (ret != 0) {
+ D_ERR("control GET_PUBLIC_IPS failed on "
+ "node %u\n", pnn);
+ state->ban_credits[pnn]++;
+ found_errors = true;
+ continue;
+ }
+
+ D_INFO("Fetched public IPs from node %u\n", pnn);
+ state->ips[pnn] = *ips;
+ }
+
+ if (found_errors) {
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ talloc_free(reply);
+
+ tevent_req_done(req);
+}
+
+static bool get_public_ips_recv(struct tevent_req *req, int *perr,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_public_ip_list **ips)
+{
+ struct get_public_ips_state *state = tevent_req_data(
+ req, struct get_public_ips_state);
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+
+ *ips = talloc_steal(mem_ctx, state->ips);
+
+ return true;
+}
+
+/**********************************************************************/
+
+struct release_ip_state {
+ int num_sent;
+ int num_replies;
+ int num_fails;
+ int err_any;
+ uint32_t *ban_credits;
+};
+
+struct release_ip_one_state {
+ struct tevent_req *req;
+ uint32_t *pnns;
+ int count;
+ const char *ip_str;
+};
+
+static void release_ip_done(struct tevent_req *subreq);
+
+static struct tevent_req *release_ip_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t *pnns,
+ int count,
+ struct timeval timeout,
+ struct public_ip_list *all_ips,
+ uint32_t *ban_credits)
+{
+ struct tevent_req *req, *subreq;
+ struct release_ip_state *state;
+ struct ctdb_req_control request;
+ struct public_ip_list *tmp_ip;
+
+ req = tevent_req_create(mem_ctx, &state, struct release_ip_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->num_sent = 0;
+ state->num_replies = 0;
+ state->num_fails = 0;
+ state->ban_credits = ban_credits;
+
+ /* Send a RELEASE_IP to all nodes that should not be hosting
+ * each IP. For each IP, all but one of these will be
+ * redundant. However, the redundant ones are used to tell
+ * nodes which node should be hosting the IP so that commands
+ * like "ctdb ip" can display a particular nodes idea of who
+ * is hosting what. */
+ for (tmp_ip = all_ips; tmp_ip != NULL; tmp_ip = tmp_ip->next) {
+ struct release_ip_one_state *substate;
+ struct ctdb_public_ip ip;
+ int i;
+
+ substate = talloc_zero(state, struct release_ip_one_state);
+ if (tevent_req_nomem(substate, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ substate->pnns = talloc_zero_array(substate, uint32_t, count);
+ if (tevent_req_nomem(substate->pnns, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ substate->count = 0;
+ substate->req = req;
+
+ substate->ip_str = ctdb_sock_addr_to_string(substate,
+ &tmp_ip->addr,
+ false);
+ if (tevent_req_nomem(substate->ip_str, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ for (i = 0; i < count; i++) {
+ uint32_t pnn = pnns[i];
+
+ /* Skip this node if IP is not known */
+ if (! bitmap_query(tmp_ip->known_on, pnn)) {
+ continue;
+ }
+
+ /* If pnn is not the node that should be
+ * hosting the IP then add it to the list of
+ * nodes that need to do a release. */
+ if (tmp_ip->pnn != pnn) {
+ substate->pnns[substate->count] = pnn;
+ substate->count++;
+ }
+ }
+
+ if (substate->count == 0) {
+ /* No releases to send for this address... */
+ TALLOC_FREE(substate);
+ continue;
+ }
+
+ ip.pnn = tmp_ip->pnn;
+ ip.addr = tmp_ip->addr;
+ ctdb_req_control_release_ip(&request, &ip);
+ subreq = ctdb_client_control_multi_send(state, ev, client,
+ substate->pnns,
+ substate->count,
+ timeout,/* cumulative */
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, release_ip_done, substate);
+
+ state->num_sent++;
+ }
+
+ /* None sent, finished... */
+ if (state->num_sent == 0) {
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+ }
+
+ return req;
+}
+
+static void release_ip_done(struct tevent_req *subreq)
+{
+ struct release_ip_one_state *substate = tevent_req_callback_data(
+ subreq, struct release_ip_one_state);
+ struct tevent_req *req = substate->req;
+ struct release_ip_state *state = tevent_req_data(
+ req, struct release_ip_state);
+ int ret, i;
+ int *err_list;
+ bool status, found_errors;
+
+ status = ctdb_client_control_multi_recv(subreq, &ret, state,
+ &err_list, NULL);
+ TALLOC_FREE(subreq);
+
+ if (status) {
+ D_INFO("RELEASE_IP %s succeeded on %d nodes\n",
+ substate->ip_str, substate->count);
+ goto done;
+ }
+
+ /* Get some clear error messages out of err_list and count
+ * banning credits
+ */
+ found_errors = false;
+ for (i = 0; i < substate->count; i++) {
+ int err = err_list[i];
+ if (err != 0) {
+ uint32_t pnn = substate->pnns[i];
+
+ D_ERR("RELEASE_IP %s failed on node %u, "
+ "ret=%d\n", substate->ip_str, pnn, err);
+
+ state->ban_credits[pnn]++;
+ state->err_any = err;
+ found_errors = true;
+ }
+ }
+ if (! found_errors) {
+ D_ERR("RELEASE_IP %s internal error, ret=%d\n",
+ substate->ip_str, ret);
+ state->err_any = EIO;
+ }
+
+ state->num_fails++;
+
+done:
+ talloc_free(substate);
+
+ state->num_replies++;
+
+ if (state->num_replies < state->num_sent) {
+ /* Not all replies received, don't go further */
+ return;
+ }
+
+ if (state->num_fails > 0) {
+ tevent_req_error(req, state->err_any);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+static bool release_ip_recv(struct tevent_req *req, int *perr)
+{
+ return generic_recv(req, perr);
+}
+
+/**********************************************************************/
+
+struct take_ip_state {
+ int num_sent;
+ int num_replies;
+ int num_fails;
+ int err_any;
+ uint32_t *ban_credits;
+};
+
+struct take_ip_one_state {
+ struct tevent_req *req;
+ uint32_t pnn;
+ const char *ip_str;
+};
+
+static void take_ip_done(struct tevent_req *subreq);
+
+static struct tevent_req *take_ip_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct timeval timeout,
+ struct public_ip_list *all_ips,
+ uint32_t *ban_credits)
+{
+ struct tevent_req *req, *subreq;
+ struct take_ip_state *state;
+ struct ctdb_req_control request;
+ struct public_ip_list *tmp_ip;
+
+ req = tevent_req_create(mem_ctx, &state, struct take_ip_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->num_sent = 0;
+ state->num_replies = 0;
+ state->num_fails = 0;
+ state->ban_credits = ban_credits;
+
+ /* For each IP, send a TAKOVER_IP to the node that should be
+ * hosting it. Many of these will often be redundant (since
+ * the allocation won't have changed) but they can be useful
+ * to recover from inconsistencies. */
+ for (tmp_ip = all_ips; tmp_ip != NULL; tmp_ip = tmp_ip->next) {
+ struct take_ip_one_state *substate;
+ struct ctdb_public_ip ip;
+
+ if (tmp_ip->pnn == CTDB_UNKNOWN_PNN) {
+ /* IP will be unassigned */
+ continue;
+ }
+
+ substate = talloc_zero(state, struct take_ip_one_state);
+ if (tevent_req_nomem(substate, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ substate->req = req;
+ substate->pnn = tmp_ip->pnn;
+
+ substate->ip_str = ctdb_sock_addr_to_string(substate,
+ &tmp_ip->addr,
+ false);
+ if (tevent_req_nomem(substate->ip_str, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ ip.pnn = tmp_ip->pnn;
+ ip.addr = tmp_ip->addr;
+ ctdb_req_control_takeover_ip(&request, &ip);
+ subreq = ctdb_client_control_send(
+ state, ev, client, tmp_ip->pnn,
+ timeout, /* cumulative */
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, take_ip_done, substate);
+
+ state->num_sent++;
+ }
+
+ /* None sent, finished... */
+ if (state->num_sent == 0) {
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+ }
+
+ return req;
+}
+
+static void take_ip_done(struct tevent_req *subreq)
+{
+ struct take_ip_one_state *substate = tevent_req_callback_data(
+ subreq, struct take_ip_one_state);
+ struct tevent_req *req = substate->req;
+ struct ctdb_reply_control *reply;
+ struct take_ip_state *state = tevent_req_data(
+ req, struct take_ip_state);
+ int ret = 0;
+ bool status;
+
+ status = ctdb_client_control_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+
+ if (! status) {
+ D_ERR("TAKEOVER_IP %s failed to node %u, ret=%d\n",
+ substate->ip_str, substate->pnn, ret);
+ goto fail;
+ }
+
+ ret = ctdb_reply_control_takeover_ip(reply);
+ if (ret != 0) {
+ D_ERR("TAKEOVER_IP %s failed on node %u, ret=%d\n",
+ substate->ip_str, substate->pnn, ret);
+ goto fail;
+ }
+
+ D_INFO("TAKEOVER_IP %s succeeded on node %u\n",
+ substate->ip_str, substate->pnn);
+ goto done;
+
+fail:
+ state->ban_credits[substate->pnn]++;
+ state->num_fails++;
+ state->err_any = ret;
+
+done:
+ talloc_free(substate);
+
+ state->num_replies++;
+
+ if (state->num_replies < state->num_sent) {
+ /* Not all replies received, don't go further */
+ return;
+ }
+
+ if (state->num_fails > 0) {
+ tevent_req_error(req, state->err_any);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+static bool take_ip_recv(struct tevent_req *req, int *perr)
+{
+ return generic_recv(req, perr);
+}
+
+/**********************************************************************/
+
+struct ipreallocated_state {
+ uint32_t *pnns;
+ int count;
+ uint32_t *ban_credits;
+};
+
+static void ipreallocated_done(struct tevent_req *subreq);
+
+static struct tevent_req *ipreallocated_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t *pnns,
+ int count,
+ struct timeval timeout,
+ uint32_t *ban_credits)
+{
+ struct tevent_req *req, *subreq;
+ struct ipreallocated_state *state;
+ struct ctdb_req_control request;
+
+ req = tevent_req_create(mem_ctx, &state, struct ipreallocated_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->pnns = pnns;
+ state->count = count;
+ state->ban_credits = ban_credits;
+
+ ctdb_req_control_ipreallocated(&request);
+ subreq = ctdb_client_control_multi_send(state, ev, client,
+ pnns, count,
+ timeout, /* cumulative */
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, ipreallocated_done, req);
+
+ return req;
+}
+
+static void ipreallocated_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ipreallocated_state *state = tevent_req_data(
+ req, struct ipreallocated_state);
+ int *err_list = NULL;
+ int ret, i;
+ bool status, found_errors;
+
+ status = ctdb_client_control_multi_recv(subreq, &ret, state,
+ &err_list, NULL);
+ TALLOC_FREE(subreq);
+
+ if (status) {
+ D_INFO("IPREALLOCATED succeeded on %d nodes\n", state->count);
+ tevent_req_done(req);
+ return;
+ }
+
+ /* Get some clear error messages out of err_list and count
+ * banning credits
+ */
+ found_errors = false;
+ for (i = 0; i < state->count; i++) {
+ int err = err_list[i];
+ if (err != 0) {
+ uint32_t pnn = state->pnns[i];
+
+ D_ERR("IPREALLOCATED failed on node %u, ret=%d\n",
+ pnn, err);
+
+ state->ban_credits[pnn]++;
+ found_errors = true;
+ }
+ }
+
+ if (! found_errors) {
+ D_ERR("IPREALLOCATED internal error, ret=%d\n", ret);
+ }
+
+ tevent_req_error(req, ret);
+}
+
+static bool ipreallocated_recv(struct tevent_req *req, int *perr)
+{
+ return generic_recv(req, perr);
+}
+
+/**********************************************************************/
+
+/*
+ * Recalculate the allocation of public IPs to nodes and have the
+ * nodes host their allocated addresses.
+ *
+ * - Get tunables
+ * - Get nodemap
+ * - Initialise IP allocation state. Pass:
+ * + algorithm to be used;
+ * + various tunables (NoIPTakeover, NoIPFailback)
+ * + list of nodes to force rebalance (internal structure, currently
+ * no way to fetch, only used by LCP2 for nodes that have had new
+ * IP addresses added).
+ * - Set IP flags for IP allocation based on node map
+ * - Retrieve known and available IP addresses (done separately so
+ * values can be faked in unit testing)
+ * - Use ipalloc_set_public_ips() to set known and available IP
+ * addresses for allocation
+ * - If cluster can't host IP addresses then jump to IPREALLOCATED
+ * - Run IP allocation algorithm
+ * - Send RELEASE_IP to all nodes for IPs they should not host
+ * - Send TAKE_IP to all nodes for IPs they should host
+ * - Send IPREALLOCATED to all nodes
+ */
+
+struct takeover_state {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct timeval timeout;
+ unsigned int num_nodes;
+ uint32_t *pnns_connected;
+ int num_connected;
+ uint32_t *pnns_active;
+ int num_active;
+ uint32_t destnode;
+ uint32_t *force_rebalance_nodes;
+ struct ctdb_tunable_list *tun_list;
+ struct ipalloc_state *ipalloc_state;
+ struct ctdb_public_ip_list *known_ips;
+ struct public_ip_list *all_ips;
+ uint32_t *ban_credits;
+};
+
+static void takeover_tunables_done(struct tevent_req *subreq);
+static void takeover_nodemap_done(struct tevent_req *subreq);
+static void takeover_known_ips_done(struct tevent_req *subreq);
+static void takeover_avail_ips_done(struct tevent_req *subreq);
+static void takeover_release_ip_done(struct tevent_req *subreq);
+static void takeover_take_ip_done(struct tevent_req *subreq);
+static void takeover_ipreallocated(struct tevent_req *req);
+static void takeover_ipreallocated_done(struct tevent_req *subreq);
+static void takeover_failed(struct tevent_req *subreq, int ret);
+static void takeover_failed_done(struct tevent_req *subreq);
+
+static struct tevent_req *takeover_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t *force_rebalance_nodes)
+{
+ struct tevent_req *req, *subreq;
+ struct takeover_state *state;
+ struct ctdb_req_control request;
+
+ req = tevent_req_create(mem_ctx, &state, struct takeover_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->client = client;
+ state->force_rebalance_nodes = force_rebalance_nodes;
+ state->destnode = ctdb_client_pnn(client);
+
+ ctdb_req_control_get_all_tunables(&request);
+ subreq = ctdb_client_control_send(state, state->ev, state->client,
+ state->destnode, TIMEOUT(),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, takeover_tunables_done, req);
+
+ return req;
+}
+
+static void takeover_tunables_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct takeover_state *state = tevent_req_data(
+ req, struct takeover_state);
+ struct ctdb_reply_control *reply;
+ struct ctdb_req_control request;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ D_ERR("control GET_ALL_TUNABLES failed, ret=%d\n", ret);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_reply_control_get_all_tunables(reply, state,
+ &state->tun_list);
+ if (ret != 0) {
+ D_ERR("control GET_ALL_TUNABLES failed, ret=%d\n", ret);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ talloc_free(reply);
+
+ takeover_timeout = state->tun_list->takeover_timeout;
+
+ ctdb_req_control_get_nodemap(&request);
+ subreq = ctdb_client_control_send(state, state->ev, state->client,
+ state->destnode, TIMEOUT(),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, takeover_nodemap_done, req);
+}
+
+static void takeover_nodemap_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct takeover_state *state = tevent_req_data(
+ req, struct takeover_state);
+ struct ctdb_reply_control *reply;
+ bool status;
+ int ret;
+ struct ctdb_node_map *nodemap;
+ const char *ptr;
+
+ status = ctdb_client_control_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ D_ERR("control GET_NODEMAP failed to node %u, ret=%d\n",
+ state->destnode, ret);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_reply_control_get_nodemap(reply, state, &nodemap);
+ if (ret != 0) {
+ D_ERR("control GET_NODEMAP failed, ret=%d\n", ret);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ state->num_nodes = nodemap->num;
+
+ state->num_connected = list_of_connected_nodes(nodemap,
+ CTDB_UNKNOWN_PNN, state,
+ &state->pnns_connected);
+ if (state->num_connected <= 0) {
+ tevent_req_error(req, ENOMEM);
+ return;
+ }
+
+ state->num_active = list_of_active_nodes(nodemap,
+ CTDB_UNKNOWN_PNN, state,
+ &state->pnns_active);
+ if (state->num_active <= 0) {
+ tevent_req_error(req, ENOMEM);
+ return;
+ }
+
+ /* Default timeout for early jump to IPREALLOCATED. See below
+ * for explanation of 3 times...
+ */
+ state->timeout = timeval_current_ofs(3 * takeover_timeout, 0);
+
+ state->ban_credits = talloc_zero_array(state, uint32_t,
+ state->num_nodes);
+ if (tevent_req_nomem(state->ban_credits, req)) {
+ return;
+ }
+
+ ptr = getenv("CTDB_DISABLE_IP_FAILOVER");
+ if (ptr != NULL) {
+ /* IP failover is completely disabled so just send out
+ * ipreallocated event.
+ */
+ takeover_ipreallocated(req);
+ return;
+ }
+
+ state->ipalloc_state =
+ ipalloc_state_init(
+ state, state->num_nodes,
+ determine_algorithm(state->tun_list),
+ (state->tun_list->no_ip_takeover != 0),
+ (state->tun_list->no_ip_failback != 0),
+ state->force_rebalance_nodes);
+ if (tevent_req_nomem(state->ipalloc_state, req)) {
+ return;
+ }
+
+ subreq = get_public_ips_send(state, state->ev, state->client,
+ state->pnns_connected, state->num_connected,
+ state->num_nodes, state->ban_credits,
+ false);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+
+ tevent_req_set_callback(subreq, takeover_known_ips_done, req);
+}
+
+static void takeover_known_ips_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct takeover_state *state = tevent_req_data(
+ req, struct takeover_state);
+ int ret;
+ bool status;
+ uint32_t *pnns = NULL;
+ int count, i;
+
+ status = get_public_ips_recv(subreq, &ret, state, &state->known_ips);
+ TALLOC_FREE(subreq);
+
+ if (! status) {
+ D_ERR("Failed to fetch known public IPs\n");
+ takeover_failed(req, ret);
+ return;
+ }
+
+ /* Get available IPs from active nodes that actually have known IPs */
+
+ pnns = talloc_zero_array(state, uint32_t, state->num_active);
+ if (tevent_req_nomem(pnns, req)) {
+ return;
+ }
+
+ count = 0;
+ for (i = 0; i < state->num_active; i++) {
+ uint32_t pnn = state->pnns_active[i];
+
+ /* If pnn has IPs then fetch available IPs from it */
+ if (state->known_ips[pnn].num > 0) {
+ pnns[count] = pnn;
+ count++;
+ }
+ }
+
+ subreq = get_public_ips_send(state, state->ev, state->client,
+ pnns, count,
+ state->num_nodes, state->ban_credits,
+ true);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+
+ tevent_req_set_callback(subreq, takeover_avail_ips_done, req);
+}
+
+static void takeover_avail_ips_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct takeover_state *state = tevent_req_data(
+ req, struct takeover_state);
+ bool status;
+ int ret;
+ struct ctdb_public_ip_list *available_ips;
+
+ status = get_public_ips_recv(subreq, &ret, state, &available_ips);
+ TALLOC_FREE(subreq);
+
+ if (! status) {
+ D_ERR("Failed to fetch available public IPs\n");
+ takeover_failed(req, ret);
+ return;
+ }
+
+ ipalloc_set_public_ips(state->ipalloc_state,
+ state->known_ips, available_ips);
+
+ if (! ipalloc_can_host_ips(state->ipalloc_state)) {
+ D_NOTICE("No nodes available to host public IPs yet\n");
+ takeover_ipreallocated(req);
+ return;
+ }
+
+ /* Do the IP reassignment calculations */
+ state->all_ips = ipalloc(state->ipalloc_state);
+ if (tevent_req_nomem(state->all_ips, req)) {
+ return;
+ }
+
+ /* Each of the following stages (RELEASE_IP, TAKEOVER_IP,
+ * IPREALLOCATED) notionally has a timeout of TakeoverTimeout
+ * seconds. However, RELEASE_IP can take longer due to TCP
+ * connection killing, so sometimes needs more time.
+ * Therefore, use a cumulative timeout of TakeoverTimeout * 3
+ * seconds across all 3 stages. No explicit expiry checks are
+ * needed before each stage because tevent is smart enough to
+ * fire the timeouts even if they are in the past. Initialise
+ * this here so it explicitly covers the stages we're
+ * interested in but, in particular, not the time taken by the
+ * ipalloc().
+ */
+ state->timeout = timeval_current_ofs(3 * takeover_timeout, 0);
+
+ subreq = release_ip_send(state, state->ev, state->client,
+ state->pnns_connected, state->num_connected,
+ state->timeout, state->all_ips,
+ state->ban_credits);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, takeover_release_ip_done, req);
+}
+
+static void takeover_release_ip_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct takeover_state *state = tevent_req_data(
+ req, struct takeover_state);
+ int ret;
+ bool status;
+
+ status = release_ip_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+
+ if (! status) {
+ takeover_failed(req, ret);
+ return;
+ }
+
+ /* All released, now for takeovers */
+
+ subreq = take_ip_send(state, state->ev, state->client,
+ state->timeout, state->all_ips,
+ state->ban_credits);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, takeover_take_ip_done, req);
+}
+
+static void takeover_take_ip_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ int ret = 0;
+ bool status;
+
+ status = take_ip_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+
+ if (! status) {
+ takeover_failed(req, ret);
+ return;
+ }
+
+ takeover_ipreallocated(req);
+}
+
+static void takeover_ipreallocated(struct tevent_req *req)
+{
+ struct takeover_state *state = tevent_req_data(
+ req, struct takeover_state);
+ struct tevent_req *subreq;
+
+ subreq = ipreallocated_send(state, state->ev, state->client,
+ state->pnns_connected,
+ state->num_connected,
+ state->timeout,
+ state->ban_credits);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, takeover_ipreallocated_done, req);
+}
+
+static void takeover_ipreallocated_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ int ret;
+ bool status;
+
+ status = ipreallocated_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+
+ if (! status) {
+ takeover_failed(req, ret);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+struct takeover_failed_state {
+ struct tevent_req *req;
+ int ret;
+};
+
+void takeover_failed(struct tevent_req *req, int ret)
+{
+ struct takeover_state *state = tevent_req_data(
+ req, struct takeover_state);
+ struct tevent_req *subreq;
+ uint32_t max_pnn = CTDB_UNKNOWN_PNN;
+ unsigned int max_credits = 0;
+ uint32_t pnn;
+
+ /* Check that bans are enabled */
+ if (state->tun_list->enable_bans == 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ for (pnn = 0; pnn < state->num_nodes; pnn++) {
+ if (state->ban_credits[pnn] > max_credits) {
+ max_pnn = pnn;
+ max_credits = state->ban_credits[pnn];
+ }
+ }
+
+ if (max_credits > 0) {
+ struct ctdb_req_message message;
+ struct takeover_failed_state *substate;
+
+ D_WARNING("Assigning banning credits to node %u\n", max_pnn);
+
+ substate = talloc_zero(state, struct takeover_failed_state);
+ if (tevent_req_nomem(substate, req)) {
+ return;
+ }
+ substate->req = req;
+ substate->ret = ret;
+
+ message.srvid = CTDB_SRVID_BANNING;
+ message.data.pnn = max_pnn;
+
+ subreq = ctdb_client_message_send(
+ state, state->ev, state->client,
+ ctdb_client_pnn(state->client),
+ &message);
+ if (subreq == NULL) {
+ D_ERR("failed to assign banning credits\n");
+ tevent_req_error(req, ret);
+ return;
+ }
+ tevent_req_set_callback(subreq, takeover_failed_done, substate);
+ } else {
+ tevent_req_error(req, ret);
+ }
+}
+
+static void takeover_failed_done(struct tevent_req *subreq)
+{
+ struct takeover_failed_state *substate = tevent_req_callback_data(
+ subreq, struct takeover_failed_state);
+ struct tevent_req *req = substate->req;
+ int ret;
+ bool status;
+
+ status = ctdb_client_message_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ D_ERR("failed to assign banning credits, ret=%d\n", ret);
+ }
+
+ ret = substate->ret;
+ talloc_free(substate);
+ tevent_req_error(req, ret);
+}
+
+static void takeover_recv(struct tevent_req *req, int *perr)
+{
+ generic_recv(req, perr);
+}
+
+static uint32_t *parse_node_list(TALLOC_CTX *mem_ctx, const char* s)
+{
+ char *strv = NULL;
+ int num, i, ret;
+ char *t;
+ uint32_t *nodes;
+
+ ret = strv_split(mem_ctx, &strv, s, ",");
+ if (ret != 0) {
+ D_ERR("out of memory\n");
+ return NULL;
+ }
+
+ num = strv_count(strv);
+
+ nodes = talloc_array(mem_ctx, uint32_t, num);
+ if (nodes == NULL) {
+ D_ERR("out of memory\n");
+ return NULL;
+ }
+
+ t = NULL;
+ for (i = 0; i < num; i++) {
+ t = strv_next(strv, t);
+ nodes[i] = atoi(t);
+ }
+
+ return nodes;
+}
+
+static void usage(const char *progname)
+{
+ fprintf(stderr,
+ "\nUsage: %s <output-fd> <ctdb-socket-path> "
+ "[<force-rebalance-nodes>]\n",
+ progname);
+}
+
+/*
+ * Arguments - write fd, socket path
+ */
+int main(int argc, const char *argv[])
+{
+ int write_fd;
+ const char *sockpath;
+ TALLOC_CTX *mem_ctx;
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ bool status;
+ int ret;
+ struct tevent_req *req;
+ uint32_t *force_rebalance_nodes = NULL;
+
+ if (argc < 3 || argc > 4) {
+ usage(argv[0]);
+ exit(1);
+ }
+
+ write_fd = atoi(argv[1]);
+ sockpath = argv[2];
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ fprintf(stderr, "talloc_new() failed\n");
+ ret = ENOMEM;
+ goto done;
+ }
+
+ if (argc == 4) {
+ force_rebalance_nodes = parse_node_list(mem_ctx, argv[3]);
+ if (force_rebalance_nodes == NULL) {
+ usage(argv[0]);
+ ret = EINVAL;
+ goto done;
+ }
+ }
+
+ ret = logging_init(mem_ctx, NULL, NULL, "ctdb-takeover");
+ if (ret != 0) {
+ fprintf(stderr,
+ "ctdb-takeover: Unable to initialize logging\n");
+ goto done;
+ }
+
+ ev = tevent_context_init(mem_ctx);
+ if (ev == NULL) {
+ D_ERR("tevent_context_init() failed\n");
+ ret = ENOMEM;
+ goto done;
+ }
+
+ status = logging_setup_sighup_handler(ev, mem_ctx, NULL, NULL);
+ if (!status) {
+ D_ERR("logging_setup_sighup_handler() failed\n");
+ ret = ENOMEM;
+ goto done;
+ }
+
+ ret = ctdb_client_init(mem_ctx, ev, sockpath, &client);
+ if (ret != 0) {
+ D_ERR("ctdb_client_init() failed, ret=%d\n", ret);
+ goto done;
+ }
+
+ req = takeover_send(mem_ctx, ev, client, force_rebalance_nodes);
+ if (req == NULL) {
+ D_ERR("takeover_send() failed\n");
+ ret = 1;
+ goto done;
+ }
+
+ if (! tevent_req_poll(req, ev)) {
+ D_ERR("tevent_req_poll() failed\n");
+ ret = 1;
+ goto done;
+ }
+
+ takeover_recv(req, &ret);
+ TALLOC_FREE(req);
+ if (ret != 0) {
+ D_ERR("takeover run failed, ret=%d\n", ret);
+ }
+
+done:
+ sys_write_v(write_fd, &ret, sizeof(ret));
+
+ talloc_free(mem_ctx);
+ return ret;
+}
diff --git a/ctdb/server/ctdb_traverse.c b/ctdb/server/ctdb_traverse.c
new file mode 100644
index 0000000..4865dcc
--- /dev/null
+++ b/ctdb/server/ctdb_traverse.c
@@ -0,0 +1,781 @@
+/*
+ efficient async ctdb traverse
+
+ Copyright (C) Andrew Tridgell 2007
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/network.h"
+#include "system/wait.h"
+#include "system/time.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/tdb_wrap/tdb_wrap.h"
+#include "lib/util/dlinklist.h"
+#include "lib/util/debug.h"
+#include "lib/util/samba_util.h"
+#include "lib/util/sys_rw.h"
+#include "lib/util/util_process.h"
+
+#include "ctdb_private.h"
+#include "ctdb_client.h"
+
+#include "common/reqid.h"
+#include "common/system.h"
+#include "common/common.h"
+#include "common/logging.h"
+
+typedef void (*ctdb_traverse_fn_t)(void *private_data, TDB_DATA key, TDB_DATA data);
+
+/*
+ handle returned to caller - freeing this handler will kill the child and
+ terminate the traverse
+ */
+struct ctdb_traverse_local_handle {
+ struct ctdb_traverse_local_handle *next, *prev;
+ struct ctdb_db_context *ctdb_db;
+ int fd[2];
+ pid_t child;
+ uint64_t srvid;
+ uint32_t client_reqid;
+ uint32_t reqid;
+ int srcnode;
+ void *private_data;
+ ctdb_traverse_fn_t callback;
+ bool withemptyrecords;
+ struct tevent_fd *fde;
+ int records_failed;
+ int records_sent;
+};
+
+/*
+ * called when traverse is completed by child or on error
+ */
+static void ctdb_traverse_child_handler(struct tevent_context *ev, struct tevent_fd *fde,
+ uint16_t flags, void *private_data)
+{
+ struct ctdb_traverse_local_handle *h = talloc_get_type(private_data,
+ struct ctdb_traverse_local_handle);
+ ctdb_traverse_fn_t callback = h->callback;
+ void *p = h->private_data;
+ int res;
+ ssize_t n;
+
+ /* Read the number of records sent by traverse child */
+ n = sys_read(h->fd[0], &res, sizeof(res));
+ if (n < 0 || n != sizeof(res)) {
+ /* Traverse child failed */
+ DEBUG(DEBUG_ERR, ("Local traverse failed db:%s reqid:%d\n",
+ h->ctdb_db->db_name, h->reqid));
+ } else if (res < 0) {
+ /* Traverse failed */
+ res = -res;
+ DEBUG(DEBUG_ERR, ("Local traverse failed db:%s reqid:%d records:%d\n",
+ h->ctdb_db->db_name, h->reqid, res));
+ } else {
+ DEBUG(DEBUG_INFO, ("Local traverse end db:%s reqid:%d records:%d\n",
+ h->ctdb_db->db_name, h->reqid, res));
+ }
+
+ callback(p, tdb_null, tdb_null);
+}
+
+/*
+ destroy a in-flight traverse operation
+ */
+static int traverse_local_destructor(struct ctdb_traverse_local_handle *h)
+{
+ DLIST_REMOVE(h->ctdb_db->traverse, h);
+ ctdb_kill(h->ctdb_db->ctdb, h->child, SIGKILL);
+ return 0;
+}
+
+/*
+ callback from tdb_traverse_read()
+ */
+static int ctdb_traverse_local_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *p)
+{
+ struct ctdb_traverse_local_handle *h = talloc_get_type(p,
+ struct ctdb_traverse_local_handle);
+ struct ctdb_rec_data_old *d;
+ struct ctdb_ltdb_header *hdr;
+ int res, status;
+ TDB_DATA outdata;
+
+ hdr = (struct ctdb_ltdb_header *)data.dptr;
+
+ if (ctdb_db_volatile(h->ctdb_db)) {
+ /* filter out zero-length records */
+ if (!h->withemptyrecords &&
+ data.dsize <= sizeof(struct ctdb_ltdb_header))
+ {
+ return 0;
+ }
+
+ /* filter out non-authoritative records */
+ if (hdr->dmaster != h->ctdb_db->ctdb->pnn) {
+ return 0;
+ }
+ }
+
+ d = ctdb_marshall_record(h, h->reqid, key, NULL, data);
+ if (d == NULL) {
+ /* error handling is tricky in this child code .... */
+ h->records_failed++;
+ return -1;
+ }
+
+ outdata.dptr = (uint8_t *)d;
+ outdata.dsize = d->length;
+
+ res = ctdb_control(h->ctdb_db->ctdb, h->srcnode, 0, CTDB_CONTROL_TRAVERSE_DATA,
+ CTDB_CTRL_FLAG_NOREPLY, outdata, NULL, NULL, &status, NULL, NULL);
+ if (res != 0 || status != 0) {
+ h->records_failed++;
+ return -1;
+ }
+
+ h->records_sent++;
+ return 0;
+}
+
+struct traverse_all_state {
+ struct ctdb_context *ctdb;
+ struct ctdb_traverse_local_handle *h;
+ uint32_t reqid;
+ uint32_t srcnode;
+ uint32_t client_reqid;
+ uint64_t srvid;
+ bool withemptyrecords;
+};
+
+/*
+ setup a non-blocking traverse of a local ltdb. The callback function
+ will be called on every record in the local ltdb. To stop the
+ traverse, talloc_free() the traverse_handle.
+
+ The traverse is finished when the callback is called with tdb_null for key and data
+ */
+static struct ctdb_traverse_local_handle *ctdb_traverse_local(struct ctdb_db_context *ctdb_db,
+ ctdb_traverse_fn_t callback,
+ struct traverse_all_state *all_state)
+{
+ struct ctdb_traverse_local_handle *h;
+ int ret;
+
+ h = talloc_zero(all_state, struct ctdb_traverse_local_handle);
+ if (h == NULL) {
+ return NULL;
+ }
+
+ ret = pipe(h->fd);
+
+ if (ret != 0) {
+ talloc_free(h);
+ return NULL;
+ }
+
+ h->child = ctdb_fork(ctdb_db->ctdb);
+
+ if (h->child == (pid_t)-1) {
+ close(h->fd[0]);
+ close(h->fd[1]);
+ talloc_free(h);
+ return NULL;
+ }
+
+ h->callback = callback;
+ h->private_data = all_state;
+ h->ctdb_db = ctdb_db;
+ h->client_reqid = all_state->client_reqid;
+ h->reqid = all_state->reqid;
+ h->srvid = all_state->srvid;
+ h->srcnode = all_state->srcnode;
+ h->withemptyrecords = all_state->withemptyrecords;
+
+ if (h->child == 0) {
+ /* start the traverse in the child */
+ int res, status;
+ pid_t parent = getpid();
+ struct ctdb_context *ctdb = ctdb_db->ctdb;
+ struct ctdb_rec_data_old *d;
+ TDB_DATA outdata;
+
+ close(h->fd[0]);
+
+ prctl_set_comment("ctdb_traverse");
+ if (switch_from_server_to_client(ctdb) != 0) {
+ DEBUG(DEBUG_CRIT, ("Failed to switch traverse child into client mode\n"));
+ _exit(0);
+ }
+
+ d = ctdb_marshall_record(h, h->reqid, tdb_null, NULL, tdb_null);
+ if (d == NULL) {
+ res = 0;
+ sys_write(h->fd[1], &res, sizeof(int));
+ _exit(0);
+ }
+
+ res = tdb_traverse_read(ctdb_db->ltdb->tdb, ctdb_traverse_local_fn, h);
+ if (res == -1 || h->records_failed > 0) {
+ /* traverse failed */
+ res = -(h->records_sent);
+ } else {
+ res = h->records_sent;
+ }
+
+ /* Wait till all the data is flushed from output queue */
+ while (ctdb_queue_length(ctdb->daemon.queue) > 0) {
+ tevent_loop_once(ctdb->ev);
+ }
+
+ /* End traverse by sending empty record */
+ outdata.dptr = (uint8_t *)d;
+ outdata.dsize = d->length;
+ ret = ctdb_control(ctdb, h->srcnode, 0,
+ CTDB_CONTROL_TRAVERSE_DATA,
+ CTDB_CTRL_FLAG_NOREPLY, outdata,
+ NULL, NULL, &status, NULL, NULL);
+ if (ret == -1 || status == -1) {
+ if (res > 0) {
+ res = -res;
+ }
+ }
+
+ sys_write(h->fd[1], &res, sizeof(res));
+
+ ctdb_wait_for_process_to_exit(parent);
+ _exit(0);
+ }
+
+ close(h->fd[1]);
+ set_close_on_exec(h->fd[0]);
+
+ talloc_set_destructor(h, traverse_local_destructor);
+
+ DLIST_ADD(ctdb_db->traverse, h);
+
+ h->fde = tevent_add_fd(ctdb_db->ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
+ ctdb_traverse_child_handler, h);
+ if (h->fde == NULL) {
+ close(h->fd[0]);
+ talloc_free(h);
+ return NULL;
+ }
+ tevent_fd_set_auto_close(h->fde);
+
+ return h;
+}
+
+
+struct ctdb_traverse_all_handle {
+ struct ctdb_context *ctdb;
+ struct ctdb_db_context *ctdb_db;
+ uint32_t reqid;
+ ctdb_traverse_fn_t callback;
+ void *private_data;
+ uint32_t null_count;
+ bool timedout;
+};
+
+/*
+ destroy a traverse_all op
+ */
+static int ctdb_traverse_all_destructor(struct ctdb_traverse_all_handle *state)
+{
+ reqid_remove(state->ctdb->idr, state->reqid);
+ return 0;
+}
+
+/* called when a traverse times out */
+static void ctdb_traverse_all_timeout(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_traverse_all_handle *state = talloc_get_type(private_data, struct ctdb_traverse_all_handle);
+
+ DEBUG(DEBUG_ERR,(__location__ " Traverse all timeout on database:%s\n", state->ctdb_db->db_name));
+ CTDB_INCREMENT_STAT(state->ctdb, timeouts.traverse);
+
+ state->timedout = true;
+ state->callback(state->private_data, tdb_null, tdb_null);
+}
+
+
+struct traverse_start_state {
+ struct ctdb_context *ctdb;
+ struct ctdb_traverse_all_handle *h;
+ uint32_t srcnode;
+ uint32_t reqid;
+ uint32_t db_id;
+ uint64_t srvid;
+ bool withemptyrecords;
+ int num_records;
+};
+
+
+/*
+ setup a cluster-wide non-blocking traverse of a ctdb. The
+ callback function will be called on every record in the local
+ ltdb. To stop the traverse, talloc_free() the traverse_handle.
+
+ The traverse is finished when the callback is called with tdb_null
+ for key and data
+ */
+static struct ctdb_traverse_all_handle *ctdb_daemon_traverse_all(struct ctdb_db_context *ctdb_db,
+ ctdb_traverse_fn_t callback,
+ struct traverse_start_state *start_state)
+{
+ struct ctdb_traverse_all_handle *state;
+ struct ctdb_context *ctdb = ctdb_db->ctdb;
+ int ret;
+ TDB_DATA data;
+ struct ctdb_traverse_all r;
+ struct ctdb_traverse_all_ext r_ext;
+ uint32_t destination;
+
+ state = talloc(start_state, struct ctdb_traverse_all_handle);
+ if (state == NULL) {
+ return NULL;
+ }
+
+ state->ctdb = ctdb;
+ state->ctdb_db = ctdb_db;
+ state->reqid = reqid_new(ctdb_db->ctdb->idr, state);
+ state->callback = callback;
+ state->private_data = start_state;
+ state->null_count = 0;
+ state->timedout = false;
+
+ talloc_set_destructor(state, ctdb_traverse_all_destructor);
+
+ if (start_state->withemptyrecords) {
+ r_ext.db_id = ctdb_db->db_id;
+ r_ext.reqid = state->reqid;
+ r_ext.pnn = ctdb->pnn;
+ r_ext.client_reqid = start_state->reqid;
+ r_ext.srvid = start_state->srvid;
+ r_ext.withemptyrecords = start_state->withemptyrecords;
+
+ data.dptr = (uint8_t *)&r_ext;
+ data.dsize = sizeof(r_ext);
+ } else {
+ r.db_id = ctdb_db->db_id;
+ r.reqid = state->reqid;
+ r.pnn = ctdb->pnn;
+ r.client_reqid = start_state->reqid;
+ r.srvid = start_state->srvid;
+
+ data.dptr = (uint8_t *)&r;
+ data.dsize = sizeof(r);
+ }
+
+ if (ctdb_db_volatile(ctdb_db)) {
+ /* volatile database, traverse all active nodes */
+ destination = CTDB_BROADCAST_ACTIVE;
+ } else {
+ unsigned int i;
+ /* persistent database, traverse one node, preferably
+ * the local one
+ */
+ destination = ctdb->pnn;
+ /* check we are in the vnnmap */
+ for (i=0; i < ctdb->vnn_map->size; i++) {
+ if (ctdb->vnn_map->map[i] == ctdb->pnn) {
+ break;
+ }
+ }
+ /* if we are not in the vnn map we just pick the first
+ * node instead
+ */
+ if (i == ctdb->vnn_map->size) {
+ destination = ctdb->vnn_map->map[0];
+ }
+ }
+
+ /* tell all the nodes in the cluster to start sending records to this
+ * node, or if it is a persistent database, just tell the local
+ * node
+ */
+
+ if (start_state->withemptyrecords) {
+ ret = ctdb_daemon_send_control(ctdb, destination, 0,
+ CTDB_CONTROL_TRAVERSE_ALL_EXT,
+ 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
+ } else {
+ ret = ctdb_daemon_send_control(ctdb, destination, 0,
+ CTDB_CONTROL_TRAVERSE_ALL,
+ 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
+ }
+
+ if (ret != 0) {
+ talloc_free(state);
+ return NULL;
+ }
+
+ DEBUG(DEBUG_NOTICE,("Starting traverse on DB %s (id %d)\n",
+ ctdb_db->db_name, state->reqid));
+
+ /* timeout the traverse */
+ tevent_add_timer(ctdb->ev, state,
+ timeval_current_ofs(ctdb->tunable.traverse_timeout, 0),
+ ctdb_traverse_all_timeout, state);
+
+ return state;
+}
+
+/*
+ called when local traverse ends
+ */
+static void traverse_all_callback(void *p, TDB_DATA key, TDB_DATA data)
+{
+ struct traverse_all_state *state = talloc_get_type(p, struct traverse_all_state);
+
+ /* we're done */
+ talloc_free(state);
+}
+
+/*
+ * extended version to take the "withemptyrecords" parameter"
+ */
+int32_t ctdb_control_traverse_all_ext(struct ctdb_context *ctdb, TDB_DATA data, TDB_DATA *outdata)
+{
+ struct ctdb_traverse_all_ext *c = (struct ctdb_traverse_all_ext *)data.dptr;
+ struct traverse_all_state *state;
+ struct ctdb_db_context *ctdb_db;
+
+ if (data.dsize != sizeof(struct ctdb_traverse_all_ext)) {
+ DEBUG(DEBUG_ERR,(__location__ " Invalid size in ctdb_control_traverse_all_ext\n"));
+ return -1;
+ }
+
+ ctdb_db = find_ctdb_db(ctdb, c->db_id);
+ if (ctdb_db == NULL) {
+ return -1;
+ }
+
+ if (ctdb_db->unhealthy_reason) {
+ if (ctdb->tunable.allow_unhealthy_db_read == 0) {
+ DEBUG(DEBUG_ERR,("db(%s) unhealty in ctdb_control_traverse_all: %s\n",
+ ctdb_db->db_name, ctdb_db->unhealthy_reason));
+ return -1;
+ }
+ DEBUG(DEBUG_WARNING,("warn: db(%s) unhealty in ctdb_control_traverse_all: %s\n",
+ ctdb_db->db_name, ctdb_db->unhealthy_reason));
+ }
+
+ state = talloc(ctdb_db, struct traverse_all_state);
+ if (state == NULL) {
+ return -1;
+ }
+
+ state->reqid = c->reqid;
+ state->srcnode = c->pnn;
+ state->ctdb = ctdb;
+ state->client_reqid = c->client_reqid;
+ state->srvid = c->srvid;
+ state->withemptyrecords = c->withemptyrecords;
+
+ state->h = ctdb_traverse_local(ctdb_db, traverse_all_callback, state);
+ if (state->h == NULL) {
+ talloc_free(state);
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ called when a CTDB_CONTROL_TRAVERSE_ALL control comes in. We then
+ setup a traverse of our local ltdb, sending the records as
+ CTDB_CONTROL_TRAVERSE_DATA records back to the originator
+ */
+int32_t ctdb_control_traverse_all(struct ctdb_context *ctdb, TDB_DATA data, TDB_DATA *outdata)
+{
+ struct ctdb_traverse_all *c = (struct ctdb_traverse_all *)data.dptr;
+ struct traverse_all_state *state;
+ struct ctdb_db_context *ctdb_db;
+
+ if (data.dsize != sizeof(struct ctdb_traverse_all)) {
+ DEBUG(DEBUG_ERR,(__location__ " Invalid size in ctdb_control_traverse_all\n"));
+ return -1;
+ }
+
+ ctdb_db = find_ctdb_db(ctdb, c->db_id);
+ if (ctdb_db == NULL) {
+ return -1;
+ }
+
+ if (ctdb_db->unhealthy_reason) {
+ if (ctdb->tunable.allow_unhealthy_db_read == 0) {
+ DEBUG(DEBUG_ERR,("db(%s) unhealty in ctdb_control_traverse_all: %s\n",
+ ctdb_db->db_name, ctdb_db->unhealthy_reason));
+ return -1;
+ }
+ DEBUG(DEBUG_WARNING,("warn: db(%s) unhealty in ctdb_control_traverse_all: %s\n",
+ ctdb_db->db_name, ctdb_db->unhealthy_reason));
+ }
+
+ state = talloc(ctdb_db, struct traverse_all_state);
+ if (state == NULL) {
+ return -1;
+ }
+
+ state->reqid = c->reqid;
+ state->srcnode = c->pnn;
+ state->ctdb = ctdb;
+ state->client_reqid = c->client_reqid;
+ state->srvid = c->srvid;
+ state->withemptyrecords = false;
+
+ state->h = ctdb_traverse_local(ctdb_db, traverse_all_callback, state);
+ if (state->h == NULL) {
+ talloc_free(state);
+ return -1;
+ }
+
+ return 0;
+}
+
+
+/*
+ called when a CTDB_CONTROL_TRAVERSE_DATA control comes in. We then
+ call the traverse_all callback with the record
+ */
+int32_t ctdb_control_traverse_data(struct ctdb_context *ctdb, TDB_DATA data, TDB_DATA *outdata)
+{
+ struct ctdb_rec_data_old *d = (struct ctdb_rec_data_old *)data.dptr;
+ struct ctdb_traverse_all_handle *state;
+ TDB_DATA key;
+ ctdb_traverse_fn_t callback;
+ void *private_data;
+
+ if (data.dsize < sizeof(uint32_t) || data.dsize != d->length) {
+ DEBUG(DEBUG_ERR,("Bad record size in ctdb_control_traverse_data\n"));
+ return -1;
+ }
+
+ state = reqid_find(ctdb->idr, d->reqid, struct ctdb_traverse_all_handle);
+ if (state == NULL || d->reqid != state->reqid) {
+ /* traverse might have been terminated already */
+ return -1;
+ }
+
+ key.dsize = d->keylen;
+ key.dptr = &d->data[0];
+ data.dsize = d->datalen;
+ data.dptr = &d->data[d->keylen];
+
+ if (key.dsize == 0 && data.dsize == 0) {
+ state->null_count++;
+ /* Persistent databases are only scanned on one node (the local
+ * node)
+ */
+ if (ctdb_db_volatile(state->ctdb_db)) {
+ if (state->null_count != ctdb_get_num_active_nodes(ctdb)) {
+ return 0;
+ }
+ }
+ }
+
+ callback = state->callback;
+ private_data = state->private_data;
+
+ callback(private_data, key, data);
+ return 0;
+}
+
+/*
+ kill a in-progress traverse, used when a client disconnects
+ */
+int32_t ctdb_control_traverse_kill(struct ctdb_context *ctdb, TDB_DATA data,
+ TDB_DATA *outdata, uint32_t srcnode)
+{
+ struct ctdb_traverse_start *d = (struct ctdb_traverse_start *)data.dptr;
+ struct ctdb_db_context *ctdb_db;
+ struct ctdb_traverse_local_handle *t;
+
+ ctdb_db = find_ctdb_db(ctdb, d->db_id);
+ if (ctdb_db == NULL) {
+ return -1;
+ }
+
+ for (t=ctdb_db->traverse; t; t=t->next) {
+ if (t->client_reqid == d->reqid &&
+ t->srvid == d->srvid) {
+ talloc_free(t);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+/*
+ this is called when a client disconnects during a traverse
+ we need to notify all the nodes taking part in the search that they
+ should kill their traverse children
+ */
+static int ctdb_traverse_start_destructor(struct traverse_start_state *state)
+{
+ struct ctdb_traverse_start r;
+ TDB_DATA data;
+
+ DEBUG(DEBUG_ERR,(__location__ " Traverse cancelled by client disconnect for database:0x%08x\n", state->db_id));
+ r.db_id = state->db_id;
+ r.reqid = state->reqid;
+ r.srvid = state->srvid;
+
+ data.dptr = (uint8_t *)&r;
+ data.dsize = sizeof(r);
+
+ ctdb_daemon_send_control(state->ctdb, CTDB_BROADCAST_CONNECTED, 0,
+ CTDB_CONTROL_TRAVERSE_KILL,
+ 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
+ return 0;
+}
+
+/*
+ callback which sends records as messages to the client
+ */
+static void traverse_start_callback(void *p, TDB_DATA key, TDB_DATA data)
+{
+ struct traverse_start_state *state;
+ struct ctdb_rec_data_old *d;
+ TDB_DATA cdata;
+
+ state = talloc_get_type(p, struct traverse_start_state);
+
+ d = ctdb_marshall_record(state, state->reqid, key, NULL, data);
+ if (d == NULL) {
+ return;
+ }
+
+ cdata.dptr = (uint8_t *)d;
+ cdata.dsize = d->length;
+
+ srvid_dispatch(state->ctdb->srv, state->srvid, 0, cdata);
+ if (key.dsize == 0 && data.dsize == 0) {
+ DEBUG(DEBUG_NOTICE, ("Ending traverse on DB %s (id %d), records %d\n",
+ state->h->ctdb_db->db_name, state->h->reqid,
+ state->num_records));
+
+ if (state->h->timedout) {
+ /* timed out, send TRAVERSE_KILL control */
+ talloc_free(state);
+ } else {
+ /* end of traverse */
+ talloc_set_destructor(state, NULL);
+ talloc_free(state);
+ }
+ } else {
+ state->num_records++;
+ }
+}
+
+
+/**
+ * start a traverse_all - called as a control from a client.
+ * extended version to take the "withemptyrecords" parameter.
+ */
+int32_t ctdb_control_traverse_start_ext(struct ctdb_context *ctdb,
+ TDB_DATA data,
+ TDB_DATA *outdata,
+ uint32_t srcnode,
+ uint32_t client_id)
+{
+ struct ctdb_traverse_start_ext *d = (struct ctdb_traverse_start_ext *)data.dptr;
+ struct traverse_start_state *state;
+ struct ctdb_db_context *ctdb_db;
+ struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
+
+ if (client == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " No client found\n"));
+ return -1;
+ }
+
+ if (data.dsize != sizeof(*d)) {
+ DEBUG(DEBUG_ERR,("Bad record size in ctdb_control_traverse_start\n"));
+ return -1;
+ }
+
+ ctdb_db = find_ctdb_db(ctdb, d->db_id);
+ if (ctdb_db == NULL) {
+ return -1;
+ }
+
+ if (ctdb_db->unhealthy_reason) {
+ if (ctdb->tunable.allow_unhealthy_db_read == 0) {
+ DEBUG(DEBUG_ERR,("db(%s) unhealty in ctdb_control_traverse_start: %s\n",
+ ctdb_db->db_name, ctdb_db->unhealthy_reason));
+ return -1;
+ }
+ DEBUG(DEBUG_WARNING,("warn: db(%s) unhealty in ctdb_control_traverse_start: %s\n",
+ ctdb_db->db_name, ctdb_db->unhealthy_reason));
+ }
+
+ state = talloc(client, struct traverse_start_state);
+ if (state == NULL) {
+ return -1;
+ }
+
+ state->srcnode = srcnode;
+ state->reqid = d->reqid;
+ state->srvid = d->srvid;
+ state->db_id = d->db_id;
+ state->ctdb = ctdb;
+ state->withemptyrecords = d->withemptyrecords;
+ state->num_records = 0;
+
+ state->h = ctdb_daemon_traverse_all(ctdb_db, traverse_start_callback, state);
+ if (state->h == NULL) {
+ talloc_free(state);
+ return -1;
+ }
+
+ talloc_set_destructor(state, ctdb_traverse_start_destructor);
+
+ return 0;
+}
+
+/**
+ * start a traverse_all - called as a control from a client.
+ */
+int32_t ctdb_control_traverse_start(struct ctdb_context *ctdb,
+ TDB_DATA data,
+ TDB_DATA *outdata,
+ uint32_t srcnode,
+ uint32_t client_id)
+{
+ struct ctdb_traverse_start *d = (struct ctdb_traverse_start *)data.dptr;
+ struct ctdb_traverse_start_ext d2;
+ TDB_DATA data2;
+
+ ZERO_STRUCT(d2);
+ d2.db_id = d->db_id;
+ d2.reqid = d->reqid;
+ d2.srvid = d->srvid;
+ d2.withemptyrecords = false;
+
+ data2.dsize = sizeof(d2);
+ data2.dptr = (uint8_t *)&d2;
+
+ return ctdb_control_traverse_start_ext(ctdb, data2, outdata, srcnode, client_id);
+}
diff --git a/ctdb/server/ctdb_tunables.c b/ctdb/server/ctdb_tunables.c
new file mode 100644
index 0000000..0dce656
--- /dev/null
+++ b/ctdb/server/ctdb_tunables.c
@@ -0,0 +1,170 @@
+/*
+ ctdb tunables code
+
+ Copyright (C) Andrew Tridgell 2007
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include "replace.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <tdb.h>
+
+#include "lib/util/debug.h"
+
+#include "ctdb_private.h"
+
+#include "common/common.h"
+#include "common/logging.h"
+#include "common/path.h"
+#include "common/tunable.h"
+
+/*
+ set all tunables to defaults
+ */
+void ctdb_tunables_set_defaults(struct ctdb_context *ctdb)
+{
+ ctdb_tunable_set_defaults(&ctdb->tunable);
+}
+
+
+/*
+ get a tunable
+ */
+int32_t ctdb_control_get_tunable(struct ctdb_context *ctdb, TDB_DATA indata,
+ TDB_DATA *outdata)
+{
+ struct ctdb_control_get_tunable *t =
+ (struct ctdb_control_get_tunable *)indata.dptr;
+ char *name;
+ uint32_t val;
+ bool ret;
+
+ if (indata.dsize < sizeof(*t) ||
+ t->length > indata.dsize - offsetof(struct ctdb_control_get_tunable, name)) {
+ DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_get_tunable\n"));
+ return -1;
+ }
+
+ name = talloc_strndup(ctdb, (char*)t->name, t->length);
+ CTDB_NO_MEMORY(ctdb, name);
+
+ ret = ctdb_tunable_get_value(&ctdb->tunable, name, &val);
+ talloc_free(name);
+ if (! ret) {
+ return -EINVAL;
+ }
+
+ outdata->dptr = (uint8_t *)talloc(outdata, uint32_t);
+ CTDB_NO_MEMORY(ctdb, outdata->dptr);
+
+ *(uint32_t *)outdata->dptr = val;
+ outdata->dsize = sizeof(uint32_t);
+
+ return 0;
+}
+
+
+/*
+ set a tunable
+ */
+int32_t ctdb_control_set_tunable(struct ctdb_context *ctdb, TDB_DATA indata)
+{
+ struct ctdb_tunable_old *t =
+ (struct ctdb_tunable_old *)indata.dptr;
+ char *name;
+ int ret;
+ bool obsolete;
+
+ if (indata.dsize < sizeof(*t) ||
+ t->length > indata.dsize - offsetof(struct ctdb_tunable_old, name)) {
+ DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tunable\n"));
+ return -1;
+ }
+
+ name = talloc_strndup(ctdb, (char *)t->name, t->length);
+ CTDB_NO_MEMORY(ctdb, name);
+
+ ret = ctdb_tunable_set_value(&ctdb->tunable, name, t->value,
+ &obsolete);
+ if (! ret) {
+ talloc_free(name);
+ return -1;
+ }
+
+ if (obsolete) {
+ DEBUG(DEBUG_WARNING,
+ ("Setting obsolete tunable \"%s\"\n", name));
+ talloc_free(name);
+ return 1;
+ }
+
+ talloc_free(name);
+ return 0;
+}
+
+/*
+ list tunables
+ */
+int32_t ctdb_control_list_tunables(struct ctdb_context *ctdb, TDB_DATA *outdata)
+{
+ char *list = NULL;
+ struct ctdb_control_list_tunable *t;
+
+ list = ctdb_tunable_names_to_string(outdata);
+ CTDB_NO_MEMORY(ctdb, list);
+
+ outdata->dsize = offsetof(struct ctdb_control_list_tunable, data) +
+ strlen(list) + 1;
+ outdata->dptr = talloc_size(outdata, outdata->dsize);
+ CTDB_NO_MEMORY(ctdb, outdata->dptr);
+
+ t = (struct ctdb_control_list_tunable *)outdata->dptr;
+ t->length = strlen(list)+1;
+
+ memcpy(t->data, list, t->length);
+ talloc_free(list);
+
+ return 0;
+}
+
+bool ctdb_tunables_load(struct ctdb_context *ctdb)
+{
+ bool status;
+ TALLOC_CTX *tmp_ctx;
+ char *file = NULL;
+
+ /* Fail by default */
+ status = false;
+
+ tmp_ctx = talloc_new(ctdb);
+ if (tmp_ctx == NULL) {
+ DBG_ERR("Memory allocation error\n");
+ goto done;
+ }
+
+ file = path_etcdir_append(tmp_ctx, "ctdb.tunables");
+ if (file == NULL) {
+ D_ERR("Failed to construct path for ctdb.tunables\n");
+ goto done;
+ }
+
+ status = ctdb_tunable_load_file(tmp_ctx, &ctdb->tunable, file);
+ /* No need to log error, already logged above */
+
+done:
+ talloc_free(tmp_ctx);
+ return status;
+}
diff --git a/ctdb/server/ctdb_tunnel.c b/ctdb/server/ctdb_tunnel.c
new file mode 100644
index 0000000..2df9474
--- /dev/null
+++ b/ctdb/server/ctdb_tunnel.c
@@ -0,0 +1,141 @@
+/*
+ ctdb_tunnel protocol code
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <tevent.h>
+#include <tdb.h>
+
+#include "lib/util/debug.h"
+
+#include "common/logging.h"
+#include "common/reqid.h"
+#include "common/srvid.h"
+
+#include "ctdb_private.h"
+
+int32_t ctdb_control_tunnel_register(struct ctdb_context *ctdb,
+ uint32_t client_id, uint64_t tunnel_id)
+{
+ struct ctdb_client *client;
+ int ret;
+
+ client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
+ if (client == NULL) {
+ DEBUG(DEBUG_ERR, ("Bad client_id in ctdb_tunnel_register\n"));
+ return -1;
+ }
+
+ ret = srvid_exists(ctdb->tunnels, tunnel_id, NULL);
+ if (ret == 0) {
+ DEBUG(DEBUG_ERR,
+ ("Tunnel id 0x%"PRIx64" already registered\n",
+ tunnel_id));
+ return -1;
+ }
+
+ ret = srvid_register(ctdb->tunnels, client, tunnel_id,
+ daemon_tunnel_handler, client);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Failed to register tunnel id 0x%"PRIx64"\n",
+ tunnel_id));
+ return -1;
+ }
+
+ DEBUG(DEBUG_INFO, ("Registered tunnel for id 0x%"PRIx64"\n",
+ tunnel_id));
+ return 0;
+}
+
+int32_t ctdb_control_tunnel_deregister(struct ctdb_context *ctdb,
+ uint32_t client_id, uint64_t tunnel_id)
+{
+ struct ctdb_client *client;
+ int ret;
+
+ client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
+ if (client == NULL) {
+ DEBUG(DEBUG_ERR, ("Bad client_id in ctdb_tunnel_deregister\n"));
+ return -1;
+ }
+
+ ret = srvid_deregister(ctdb->tunnels, tunnel_id, client);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Failed to deregister tunnel id 0x%"PRIx64"\n",
+ tunnel_id));
+ return -1;
+ }
+
+ return 0;
+}
+
+int ctdb_daemon_send_tunnel(struct ctdb_context *ctdb, uint32_t destnode,
+ uint64_t tunnel_id, uint32_t flags, TDB_DATA data)
+{
+ struct ctdb_req_tunnel_old *c;
+ size_t len;
+
+ if (ctdb->methods == NULL) {
+ DEBUG(DEBUG_INFO,
+ ("Failed to send tunnel. Transport is DOWN\n"));
+ return -1;
+ }
+
+ len = offsetof(struct ctdb_req_tunnel_old, data) + data.dsize;
+ c = ctdb_transport_allocate(ctdb, ctdb, CTDB_REQ_TUNNEL, len,
+ struct ctdb_req_tunnel_old);
+ if (c == NULL) {
+ DEBUG(DEBUG_ERR,
+ ("Memory error in ctdb_daemon_send_tunnel()\n"));
+ return -1;
+ }
+
+ c->hdr.destnode = destnode;
+ c->tunnel_id = tunnel_id;
+ c->flags = flags;
+ c->datalen = data.dsize;
+ memcpy(c->data, data.dptr, data.dsize);
+
+ ctdb_queue_packet(ctdb, &c->hdr);
+
+ talloc_free(c);
+ return 0;
+}
+
+void ctdb_request_tunnel(struct ctdb_context *ctdb,
+ struct ctdb_req_header *hdr)
+{
+ struct ctdb_req_tunnel_old *c =
+ (struct ctdb_req_tunnel_old *)hdr;
+ TDB_DATA data;
+ int ret;
+
+ data.dsize = hdr->length;
+ data.dptr = (uint8_t *)c;
+
+ ret = srvid_dispatch(ctdb->tunnels, c->tunnel_id, 0, data);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("Tunnel id 0x%"PRIx64" not registered\n",
+ c->tunnel_id));
+ }
+}
diff --git a/ctdb/server/ctdb_update_record.c b/ctdb/server/ctdb_update_record.c
new file mode 100644
index 0000000..405499c
--- /dev/null
+++ b/ctdb/server/ctdb_update_record.c
@@ -0,0 +1,372 @@
+/*
+ implementation of the update record control
+
+ Copyright (C) Andrew Tridgell 2007
+ Copyright (C) Ronnie Sahlberg 2007
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/time.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/tdb_wrap/tdb_wrap.h"
+#include "lib/util/debug.h"
+#include "lib/util/samba_util.h"
+#include "lib/util/sys_rw.h"
+#include "lib/util/util_process.h"
+
+#include "ctdb_private.h"
+#include "ctdb_client.h"
+
+#include "common/system.h"
+#include "common/common.h"
+#include "common/logging.h"
+
+struct ctdb_persistent_write_state {
+ struct ctdb_db_context *ctdb_db;
+ struct ctdb_marshall_buffer *m;
+ struct ctdb_req_control_old *c;
+ uint32_t flags;
+};
+
+/* don't create/update records that does not exist locally */
+#define UPDATE_FLAGS_REPLACE_ONLY 1
+
+/*
+ called from a child process to write the data
+ */
+static int ctdb_persistent_store(struct ctdb_persistent_write_state *state)
+{
+ unsigned int i;
+ int ret;
+ struct ctdb_rec_data_old *rec = NULL;
+ struct ctdb_marshall_buffer *m = state->m;
+
+ ret = tdb_transaction_start(state->ctdb_db->ltdb->tdb);
+ if (ret == -1) {
+ DEBUG(DEBUG_ERR,("Failed to start transaction for db_id 0x%08x in ctdb_persistent_store\n",
+ state->ctdb_db->db_id));
+ return -1;
+ }
+
+ for (i=0;i<m->count;i++) {
+ struct ctdb_ltdb_header oldheader;
+ struct ctdb_ltdb_header header;
+ TDB_DATA key, data, olddata;
+ TALLOC_CTX *tmp_ctx = talloc_new(state);
+
+ rec = ctdb_marshall_loop_next(m, rec, NULL, &header, &key, &data);
+
+ if (rec == NULL) {
+ D_ERR("Failed to get next record %u for db_id 0x%08x "
+ "in ctdb_persistent_store\n",
+ i,
+ state->ctdb_db->db_id);
+ talloc_free(tmp_ctx);
+ goto failed;
+ }
+
+ /* we must check if the record exists or not because
+ ctdb_ltdb_fetch will unconditionally create a record
+ */
+ if (state->flags & UPDATE_FLAGS_REPLACE_ONLY) {
+ TDB_DATA trec;
+ trec = tdb_fetch(state->ctdb_db->ltdb->tdb, key);
+ if (trec.dsize == 0) {
+ talloc_free(tmp_ctx);
+ continue;
+ }
+ free(trec.dptr);
+ }
+
+ /* fetch the old header and ensure the rsn is less than the new rsn */
+ ret = ctdb_ltdb_fetch(state->ctdb_db, key, &oldheader, tmp_ctx, &olddata);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,("Failed to fetch old record for db_id 0x%08x in ctdb_persistent_store\n",
+ state->ctdb_db->db_id));
+ talloc_free(tmp_ctx);
+ goto failed;
+ }
+
+ if (oldheader.rsn >= header.rsn &&
+ (olddata.dsize != data.dsize ||
+ memcmp(olddata.dptr, data.dptr, data.dsize) != 0)) {
+ DEBUG(DEBUG_CRIT,("existing header for db_id 0x%08x has larger RSN %llu than new RSN %llu in ctdb_persistent_store\n",
+ state->ctdb_db->db_id,
+ (unsigned long long)oldheader.rsn, (unsigned long long)header.rsn));
+ talloc_free(tmp_ctx);
+ goto failed;
+ }
+
+ talloc_free(tmp_ctx);
+
+ ret = ctdb_ltdb_store(state->ctdb_db, key, &header, data);
+ if (ret != 0) {
+ DEBUG(DEBUG_CRIT,("Failed to store record for db_id 0x%08x in ctdb_persistent_store\n",
+ state->ctdb_db->db_id));
+ goto failed;
+ }
+ }
+
+ ret = tdb_transaction_commit(state->ctdb_db->ltdb->tdb);
+ if (ret == -1) {
+ DEBUG(DEBUG_ERR,("Failed to commit transaction for db_id 0x%08x in ctdb_persistent_store\n",
+ state->ctdb_db->db_id));
+ return -1;
+ }
+
+ return 0;
+
+failed:
+ tdb_transaction_cancel(state->ctdb_db->ltdb->tdb);
+ return -1;
+}
+
+
+/*
+ called when we the child has completed the persistent write
+ on our behalf
+ */
+static void ctdb_persistent_write_callback(int status, void *private_data)
+{
+ struct ctdb_persistent_write_state *state = talloc_get_type(private_data,
+ struct ctdb_persistent_write_state);
+
+
+ ctdb_request_control_reply(state->ctdb_db->ctdb, state->c, NULL, status, NULL);
+
+ talloc_free(state);
+}
+
+/*
+ called if our lockwait child times out
+ */
+static void ctdb_persistent_lock_timeout(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_persistent_write_state *state = talloc_get_type(private_data,
+ struct ctdb_persistent_write_state);
+ ctdb_request_control_reply(state->ctdb_db->ctdb, state->c, NULL, -1, "timeout in ctdb_persistent_lock");
+ talloc_free(state);
+}
+
+struct childwrite_handle {
+ struct ctdb_context *ctdb;
+ struct ctdb_db_context *ctdb_db;
+ struct tevent_fd *fde;
+ int fd[2];
+ pid_t child;
+ void *private_data;
+ void (*callback)(int, void *);
+ struct timeval start_time;
+};
+
+static int childwrite_destructor(struct childwrite_handle *h)
+{
+ CTDB_DECREMENT_STAT(h->ctdb, pending_childwrite_calls);
+ ctdb_kill(h->ctdb, h->child, SIGKILL);
+ return 0;
+}
+
+/* called when the child process has finished writing the record to the
+ database
+*/
+static void childwrite_handler(struct tevent_context *ev,
+ struct tevent_fd *fde,
+ uint16_t flags, void *private_data)
+{
+ struct childwrite_handle *h = talloc_get_type(private_data,
+ struct childwrite_handle);
+ void *p = h->private_data;
+ void (*callback)(int, void *) = h->callback;
+ pid_t child = h->child;
+ TALLOC_CTX *tmp_ctx = talloc_new(ev);
+ int ret;
+ char c;
+
+ CTDB_UPDATE_LATENCY(h->ctdb, h->ctdb_db, "persistent", childwrite_latency, h->start_time);
+ CTDB_DECREMENT_STAT(h->ctdb, pending_childwrite_calls);
+
+ /* the handle needs to go away when the context is gone - when
+ the handle goes away this implicitly closes the pipe, which
+ kills the child */
+ talloc_steal(tmp_ctx, h);
+
+ talloc_set_destructor(h, NULL);
+
+ ret = sys_read(h->fd[0], &c, 1);
+ if (ret < 1) {
+ DEBUG(DEBUG_ERR, (__location__ " Read returned %d. Childwrite failed\n", ret));
+ c = 1;
+ }
+
+ callback(c, p);
+
+ ctdb_kill(h->ctdb, child, SIGKILL);
+ talloc_free(tmp_ctx);
+}
+
+/* this creates a child process which will take out a tdb transaction
+ and write the record to the database.
+*/
+static struct childwrite_handle *ctdb_childwrite(
+ struct ctdb_db_context *ctdb_db,
+ void (*callback)(int, void *private_data),
+ struct ctdb_persistent_write_state *state)
+{
+ struct childwrite_handle *result;
+ int ret;
+ pid_t parent = getpid();
+
+ CTDB_INCREMENT_STAT(ctdb_db->ctdb, childwrite_calls);
+ CTDB_INCREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
+
+ if (!(result = talloc_zero(state, struct childwrite_handle))) {
+ CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
+ return NULL;
+ }
+
+ ret = pipe(result->fd);
+
+ if (ret != 0) {
+ talloc_free(result);
+ CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
+ return NULL;
+ }
+
+ result->child = ctdb_fork(ctdb_db->ctdb);
+
+ if (result->child == (pid_t)-1) {
+ close(result->fd[0]);
+ close(result->fd[1]);
+ talloc_free(result);
+ CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
+ return NULL;
+ }
+
+ result->callback = callback;
+ result->private_data = state;
+ result->ctdb = ctdb_db->ctdb;
+ result->ctdb_db = ctdb_db;
+
+ if (result->child == 0) {
+ char c = 0;
+
+ close(result->fd[0]);
+ prctl_set_comment("ctdb_write_persistent");
+ ret = ctdb_persistent_store(state);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Failed to write persistent data\n"));
+ c = 1;
+ }
+
+ sys_write(result->fd[1], &c, 1);
+
+ ctdb_wait_for_process_to_exit(parent);
+ _exit(0);
+ }
+
+ close(result->fd[1]);
+ set_close_on_exec(result->fd[0]);
+
+ talloc_set_destructor(result, childwrite_destructor);
+
+ DEBUG(DEBUG_DEBUG, (__location__ " Created PIPE FD:%d for ctdb_childwrite\n", result->fd[0]));
+
+ result->fde = tevent_add_fd(ctdb_db->ctdb->ev, result, result->fd[0],
+ TEVENT_FD_READ, childwrite_handler,
+ (void *)result);
+ if (result->fde == NULL) {
+ talloc_free(result);
+ CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
+ return NULL;
+ }
+ tevent_fd_set_auto_close(result->fde);
+
+ result->start_time = timeval_current();
+
+ return result;
+}
+
+/*
+ update a record on this node if the new record has a higher rsn than the
+ current record
+ */
+int32_t ctdb_control_update_record(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c, TDB_DATA recdata,
+ bool *async_reply)
+{
+ struct ctdb_db_context *ctdb_db;
+ struct ctdb_persistent_write_state *state;
+ struct childwrite_handle *handle;
+ struct ctdb_marshall_buffer *m = (struct ctdb_marshall_buffer *)recdata.dptr;
+
+ if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
+ DEBUG(DEBUG_INFO,("rejecting ctdb_control_update_record when recovery active\n"));
+ return -1;
+ }
+
+ ctdb_db = find_ctdb_db(ctdb, m->db_id);
+ if (ctdb_db == NULL) {
+ DEBUG(DEBUG_ERR,("Unknown database 0x%08x in ctdb_control_update_record\n", m->db_id));
+ return -1;
+ }
+
+ if (ctdb_db->unhealthy_reason) {
+ DEBUG(DEBUG_ERR,("db(%s) unhealty in ctdb_control_update_record: %s\n",
+ ctdb_db->db_name, ctdb_db->unhealthy_reason));
+ return -1;
+ }
+
+ state = talloc(ctdb, struct ctdb_persistent_write_state);
+ CTDB_NO_MEMORY(ctdb, state);
+
+ state->ctdb_db = ctdb_db;
+ state->c = c;
+ state->m = m;
+ state->flags = 0;
+ if (ctdb_db_volatile(ctdb_db)) {
+ state->flags = UPDATE_FLAGS_REPLACE_ONLY;
+ }
+
+ /* create a child process to take out a transaction and
+ write the data.
+ */
+ handle = ctdb_childwrite(ctdb_db, ctdb_persistent_write_callback, state);
+ if (handle == NULL) {
+ DEBUG(DEBUG_ERR,("Failed to setup childwrite handler in ctdb_control_update_record\n"));
+ talloc_free(state);
+ return -1;
+ }
+
+ /* we need to wait for the replies */
+ *async_reply = true;
+
+ /* need to keep the control structure around */
+ talloc_steal(state, c);
+
+ /* but we won't wait forever */
+ tevent_add_timer(ctdb->ev, state,
+ timeval_current_ofs(ctdb->tunable.control_timeout, 0),
+ ctdb_persistent_lock_timeout, state);
+
+ return 0;
+}
+
diff --git a/ctdb/server/ctdb_uptime.c b/ctdb/server/ctdb_uptime.c
new file mode 100644
index 0000000..53025f5
--- /dev/null
+++ b/ctdb/server/ctdb_uptime.c
@@ -0,0 +1,55 @@
+/*
+ ctdb uptime code
+
+ Copyright (C) Ronnie Sahlberg 2008
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/syslog.h"
+#include "system/time.h"
+#include "system/filesys.h"
+#include "system/network.h"
+
+#include <talloc.h>
+
+#include "lib/util/debug.h"
+
+#include "ctdb_private.h"
+#include "ctdb_client.h"
+
+#include "common/common.h"
+#include "common/logging.h"
+
+/*
+ returns the ctdb uptime
+*/
+int32_t ctdb_control_uptime(struct ctdb_context *ctdb, TDB_DATA *outdata)
+{
+ struct ctdb_uptime *uptime;
+
+ uptime = talloc_zero(outdata, struct ctdb_uptime);
+ CTDB_NO_MEMORY(ctdb, uptime);
+
+ gettimeofday(&uptime->current_time, NULL);
+ uptime->ctdbd_start_time = ctdb->ctdbd_start_time;
+ uptime->last_recovery_started = ctdb->last_recovery_started;
+ uptime->last_recovery_finished = ctdb->last_recovery_finished;
+
+ outdata->dsize = sizeof(struct ctdb_uptime);
+ outdata->dptr = (uint8_t *)uptime;
+
+ return 0;
+}
diff --git a/ctdb/server/ctdb_vacuum.c b/ctdb/server/ctdb_vacuum.c
new file mode 100644
index 0000000..7ff79ac
--- /dev/null
+++ b/ctdb/server/ctdb_vacuum.c
@@ -0,0 +1,1990 @@
+/*
+ ctdb vacuuming events
+
+ Copyright (C) Ronnie Sahlberg 2009
+ Copyright (C) Michael Adam 2010-2013
+ Copyright (C) Stefan Metzmacher 2010-2011
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+#include "system/time.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/tdb_wrap/tdb_wrap.h"
+#include "lib/util/dlinklist.h"
+#include "lib/util/debug.h"
+#include "lib/util/samba_util.h"
+#include "lib/util/sys_rw.h"
+#include "lib/util/util_process.h"
+
+#include "ctdb_private.h"
+#include "ctdb_client.h"
+
+#include "protocol/protocol_private.h"
+
+#include "common/rb_tree.h"
+#include "common/common.h"
+#include "common/logging.h"
+
+#include "protocol/protocol_api.h"
+
+#define TIMELIMIT() timeval_current_ofs(10, 0)
+
+enum vacuum_child_status { VACUUM_RUNNING, VACUUM_OK, VACUUM_ERROR, VACUUM_TIMEOUT};
+
+struct ctdb_vacuum_child_context {
+ struct ctdb_vacuum_handle *vacuum_handle;
+ /* fd child writes status to */
+ int fd[2];
+ pid_t child_pid;
+ enum vacuum_child_status status;
+ struct timeval start_time;
+ bool scheduled;
+};
+
+struct ctdb_vacuum_handle {
+ struct ctdb_db_context *ctdb_db;
+ uint32_t fast_path_count;
+ uint32_t vacuum_interval;
+};
+
+
+/* a list of records to possibly delete */
+struct vacuum_data {
+ struct ctdb_context *ctdb;
+ struct ctdb_db_context *ctdb_db;
+ struct tdb_context *dest_db;
+ trbt_tree_t *delete_list;
+ struct ctdb_marshall_buffer **vacuum_fetch_list;
+ struct timeval start;
+ bool traverse_error;
+ bool vacuum;
+ struct {
+ struct {
+ uint32_t added_to_vacuum_fetch_list;
+ uint32_t added_to_delete_list;
+ uint32_t deleted;
+ uint32_t skipped;
+ uint32_t error;
+ uint32_t total;
+ } delete_queue;
+ struct {
+ uint32_t scheduled;
+ uint32_t skipped;
+ uint32_t error;
+ uint32_t total;
+ } db_traverse;
+ struct {
+ uint32_t total;
+ uint32_t remote_error;
+ uint32_t local_error;
+ uint32_t deleted;
+ uint32_t skipped;
+ uint32_t left;
+ } delete_list;
+ struct {
+ uint32_t vacuumed;
+ uint32_t copied;
+ } repack;
+ } count;
+};
+
+/* this structure contains the information for one record to be deleted */
+struct delete_record_data {
+ struct ctdb_context *ctdb;
+ struct ctdb_db_context *ctdb_db;
+ struct ctdb_ltdb_header hdr;
+ uint32_t remote_fail_count;
+ TDB_DATA key;
+ uint8_t keydata[1];
+};
+
+struct delete_records_list {
+ struct ctdb_marshall_buffer *records;
+ struct vacuum_data *vdata;
+};
+
+struct fetch_record_data {
+ TDB_DATA key;
+ uint8_t keydata[1];
+};
+
+static int insert_record_into_delete_queue(struct ctdb_db_context *ctdb_db,
+ const struct ctdb_ltdb_header *hdr,
+ TDB_DATA key);
+
+/**
+ * Store key and header in a tree, indexed by the key hash.
+ */
+static int insert_delete_record_data_into_tree(struct ctdb_context *ctdb,
+ struct ctdb_db_context *ctdb_db,
+ trbt_tree_t *tree,
+ const struct ctdb_ltdb_header *hdr,
+ TDB_DATA key)
+{
+ struct delete_record_data *dd;
+ uint32_t hash;
+ size_t len;
+
+ len = offsetof(struct delete_record_data, keydata) + key.dsize;
+
+ dd = (struct delete_record_data *)talloc_size(tree, len);
+ if (dd == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
+ return -1;
+ }
+ talloc_set_name_const(dd, "struct delete_record_data");
+
+ dd->ctdb = ctdb;
+ dd->ctdb_db = ctdb_db;
+ dd->key.dsize = key.dsize;
+ dd->key.dptr = dd->keydata;
+ memcpy(dd->keydata, key.dptr, key.dsize);
+
+ dd->hdr = *hdr;
+ dd->remote_fail_count = 0;
+
+ hash = ctdb_hash(&key);
+
+ trbt_insert32(tree, hash, dd);
+
+ return 0;
+}
+
+static int add_record_to_delete_list(struct vacuum_data *vdata, TDB_DATA key,
+ struct ctdb_ltdb_header *hdr)
+{
+ struct ctdb_context *ctdb = vdata->ctdb;
+ struct ctdb_db_context *ctdb_db = vdata->ctdb_db;
+ uint32_t hash;
+ int ret;
+
+ hash = ctdb_hash(&key);
+
+ if (trbt_lookup32(vdata->delete_list, hash)) {
+ DEBUG(DEBUG_INFO, (__location__ " Hash collision when vacuuming, skipping this record.\n"));
+ return 0;
+ }
+
+ ret = insert_delete_record_data_into_tree(ctdb, ctdb_db,
+ vdata->delete_list,
+ hdr, key);
+ if (ret != 0) {
+ return -1;
+ }
+
+ vdata->count.delete_list.total++;
+
+ return 0;
+}
+
+/**
+ * Add a record to the list of records to be sent
+ * to their lmaster with VACUUM_FETCH.
+ */
+static int add_record_to_vacuum_fetch_list(struct vacuum_data *vdata,
+ TDB_DATA key)
+{
+ struct ctdb_context *ctdb = vdata->ctdb;
+ uint32_t lmaster;
+ struct ctdb_marshall_buffer *vfl;
+
+ lmaster = ctdb_lmaster(ctdb, &key);
+
+ vfl = vdata->vacuum_fetch_list[lmaster];
+
+ vfl = ctdb_marshall_add(ctdb, vfl, vfl->db_id, ctdb->pnn,
+ key, NULL, tdb_null);
+ if (vfl == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
+ vdata->traverse_error = true;
+ return -1;
+ }
+
+ vdata->vacuum_fetch_list[lmaster] = vfl;
+
+ return 0;
+}
+
+
+static void ctdb_vacuum_event(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data);
+
+static int vacuum_record_parser(TDB_DATA key, TDB_DATA data, void *private_data)
+{
+ struct ctdb_ltdb_header *header =
+ (struct ctdb_ltdb_header *)private_data;
+
+ if (data.dsize != sizeof(struct ctdb_ltdb_header)) {
+ return -1;
+ }
+
+ *header = *(struct ctdb_ltdb_header *)data.dptr;
+
+ return 0;
+}
+
+/*
+ * traverse function for gathering the records that can be deleted
+ */
+static int vacuum_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data,
+ void *private_data)
+{
+ struct vacuum_data *vdata = talloc_get_type(private_data,
+ struct vacuum_data);
+ struct ctdb_context *ctdb = vdata->ctdb;
+ struct ctdb_db_context *ctdb_db = vdata->ctdb_db;
+ uint32_t lmaster;
+ struct ctdb_ltdb_header *hdr;
+ int res = 0;
+
+ vdata->count.db_traverse.total++;
+
+ lmaster = ctdb_lmaster(ctdb, &key);
+ if (lmaster >= ctdb->num_nodes) {
+ vdata->count.db_traverse.error++;
+ DEBUG(DEBUG_CRIT, (__location__
+ " lmaster[%u] >= ctdb->num_nodes[%u] for key"
+ " with hash[%u]!\n",
+ (unsigned)lmaster,
+ (unsigned)ctdb->num_nodes,
+ (unsigned)ctdb_hash(&key)));
+ return -1;
+ }
+
+ if (data.dsize != sizeof(struct ctdb_ltdb_header)) {
+ /* it is not a deleted record */
+ vdata->count.db_traverse.skipped++;
+ return 0;
+ }
+
+ hdr = (struct ctdb_ltdb_header *)data.dptr;
+
+ if (hdr->dmaster != ctdb->pnn) {
+ vdata->count.db_traverse.skipped++;
+ return 0;
+ }
+
+ /*
+ * Add the record to this process's delete_queue for processing
+ * in the subsequent traverse in the fast vacuum run.
+ */
+ res = insert_record_into_delete_queue(ctdb_db, hdr, key);
+ if (res != 0) {
+ vdata->count.db_traverse.error++;
+ } else {
+ vdata->count.db_traverse.scheduled++;
+ }
+
+ return 0;
+}
+
+/*
+ * traverse the tree of records to delete and marshall them into
+ * a blob
+ */
+static int delete_marshall_traverse(void *param, void *data)
+{
+ struct delete_record_data *dd = talloc_get_type(data, struct delete_record_data);
+ struct delete_records_list *recs = talloc_get_type(param, struct delete_records_list);
+ struct ctdb_marshall_buffer *m;
+
+ m = ctdb_marshall_add(recs, recs->records, recs->records->db_id,
+ recs->records->db_id,
+ dd->key, &dd->hdr, tdb_null);
+ if (m == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " failed to marshall record\n"));
+ return -1;
+ }
+
+ recs->records = m;
+ return 0;
+}
+
+struct fetch_queue_state {
+ struct ctdb_db_context *ctdb_db;
+ int count;
+};
+
+struct fetch_record_migrate_state {
+ struct fetch_queue_state *fetch_queue;
+ TDB_DATA key;
+};
+
+static void fetch_record_migrate_callback(struct ctdb_client_call_state *state)
+{
+ struct fetch_record_migrate_state *fetch = talloc_get_type_abort(
+ state->async.private_data, struct fetch_record_migrate_state);
+ struct fetch_queue_state *fetch_queue = fetch->fetch_queue;
+ struct ctdb_ltdb_header hdr;
+ struct ctdb_call call = { 0 };
+ int ret;
+
+ ret = ctdb_call_recv(state, &call);
+ fetch_queue->count--;
+ if (ret != 0) {
+ D_ERR("Failed to migrate record for vacuuming\n");
+ goto done;
+ }
+
+ ret = tdb_chainlock_nonblock(fetch_queue->ctdb_db->ltdb->tdb,
+ fetch->key);
+ if (ret != 0) {
+ goto done;
+ }
+
+ ret = tdb_parse_record(fetch_queue->ctdb_db->ltdb->tdb,
+ fetch->key,
+ vacuum_record_parser,
+ &hdr);
+
+ tdb_chainunlock(fetch_queue->ctdb_db->ltdb->tdb, fetch->key);
+
+ if (ret != 0) {
+ goto done;
+ }
+
+ D_INFO("Vacuum Fetch record, key=%.*s\n",
+ (int)fetch->key.dsize,
+ fetch->key.dptr);
+
+ (void) ctdb_local_schedule_for_deletion(fetch_queue->ctdb_db,
+ &hdr,
+ fetch->key);
+
+done:
+ talloc_free(fetch);
+}
+
+static int fetch_record_parser(TDB_DATA key, TDB_DATA data, void *private_data)
+{
+ struct ctdb_ltdb_header *header =
+ (struct ctdb_ltdb_header *)private_data;
+
+ if (data.dsize < sizeof(struct ctdb_ltdb_header)) {
+ return -1;
+ }
+
+ memcpy(header, data.dptr, sizeof(*header));
+ return 0;
+}
+
+/**
+ * traverse function for the traversal of the fetch_queue.
+ *
+ * Send a record migration request.
+ */
+static int fetch_queue_traverse(void *param, void *data)
+{
+ struct fetch_record_data *rd = talloc_get_type_abort(
+ data, struct fetch_record_data);
+ struct fetch_queue_state *fetch_queue =
+ (struct fetch_queue_state *)param;
+ struct ctdb_db_context *ctdb_db = fetch_queue->ctdb_db;
+ struct ctdb_client_call_state *state;
+ struct fetch_record_migrate_state *fetch;
+ struct ctdb_call call = { 0 };
+ struct ctdb_ltdb_header header;
+ int ret;
+
+ ret = tdb_chainlock_nonblock(ctdb_db->ltdb->tdb, rd->key);
+ if (ret != 0) {
+ return 0;
+ }
+
+ ret = tdb_parse_record(ctdb_db->ltdb->tdb,
+ rd->key,
+ fetch_record_parser,
+ &header);
+
+ tdb_chainunlock(ctdb_db->ltdb->tdb, rd->key);
+
+ if (ret != 0) {
+ goto skipped;
+ }
+
+ if (header.dmaster == ctdb_db->ctdb->pnn) {
+ /* If the record is already migrated, skip */
+ goto skipped;
+ }
+
+ fetch = talloc_zero(ctdb_db, struct fetch_record_migrate_state);
+ if (fetch == NULL) {
+ D_ERR("Failed to setup fetch record migrate state\n");
+ return 0;
+ }
+
+ fetch->fetch_queue = fetch_queue;
+
+ fetch->key.dsize = rd->key.dsize;
+ fetch->key.dptr = talloc_memdup(fetch, rd->key.dptr, rd->key.dsize);
+ if (fetch->key.dptr == NULL) {
+ D_ERR("Memory error in fetch_queue_traverse\n");
+ talloc_free(fetch);
+ return 0;
+ }
+
+ call.call_id = CTDB_NULL_FUNC;
+ call.flags = CTDB_IMMEDIATE_MIGRATION |
+ CTDB_CALL_FLAG_VACUUM_MIGRATION;
+ call.key = fetch->key;
+
+ state = ctdb_call_send(ctdb_db, &call);
+ if (state == NULL) {
+ DEBUG(DEBUG_ERR, ("Failed to setup vacuum fetch call\n"));
+ talloc_free(fetch);
+ return 0;
+ }
+
+ state->async.fn = fetch_record_migrate_callback;
+ state->async.private_data = fetch;
+
+ fetch_queue->count++;
+
+ return 0;
+
+skipped:
+ D_INFO("Skipped Fetch record, key=%.*s\n",
+ (int)rd->key.dsize,
+ rd->key.dptr);
+ return 0;
+}
+
+/**
+ * Traverse the fetch.
+ * Records are migrated to the local node and
+ * added to delete queue for further processing.
+ */
+static void ctdb_process_fetch_queue(struct ctdb_db_context *ctdb_db)
+{
+ struct fetch_queue_state state;
+ int ret;
+
+ state.ctdb_db = ctdb_db;
+ state.count = 0;
+
+ ret = trbt_traversearray32(ctdb_db->fetch_queue, 1,
+ fetch_queue_traverse, &state);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Error traversing "
+ "the fetch queue.\n"));
+ }
+
+ /* Wait for all migrations to complete */
+ while (state.count > 0) {
+ tevent_loop_once(ctdb_db->ctdb->ev);
+ }
+}
+
+/**
+ * traverse function for the traversal of the delete_queue,
+ * the fast-path vacuuming list.
+ *
+ * - If the record has been migrated off the node
+ * or has been revived (filled with data) on the node,
+ * then skip the record.
+ *
+ * - If the current node is the record's lmaster and it is
+ * a record that has never been migrated with data, then
+ * delete the record from the local tdb.
+ *
+ * - If the current node is the record's lmaster and it has
+ * been migrated with data, then schedule it for the normal
+ * vacuuming procedure (i.e. add it to the delete_list).
+ *
+ * - If the current node is NOT the record's lmaster then
+ * add it to the list of records that are to be sent to
+ * the lmaster with the VACUUM_FETCH message.
+ */
+static int delete_queue_traverse(void *param, void *data)
+{
+ struct delete_record_data *dd =
+ talloc_get_type(data, struct delete_record_data);
+ struct vacuum_data *vdata = talloc_get_type(param, struct vacuum_data);
+ struct ctdb_db_context *ctdb_db = dd->ctdb_db;
+ struct ctdb_context *ctdb = ctdb_db->ctdb; /* or dd->ctdb ??? */
+ int res;
+ struct ctdb_ltdb_header header;
+ uint32_t lmaster;
+ uint32_t hash = ctdb_hash(&(dd->key));
+
+ vdata->count.delete_queue.total++;
+
+ res = tdb_chainlock_nonblock(ctdb_db->ltdb->tdb, dd->key);
+ if (res != 0) {
+ vdata->count.delete_queue.error++;
+ return 0;
+ }
+
+ res = tdb_parse_record(ctdb_db->ltdb->tdb, dd->key,
+ vacuum_record_parser, &header);
+ if (res != 0) {
+ goto skipped;
+ }
+
+ if (header.dmaster != ctdb->pnn) {
+ /* The record has been migrated off the node. Skip. */
+ goto skipped;
+ }
+
+ if (header.rsn != dd->hdr.rsn) {
+ /*
+ * The record has been migrated off the node and back again.
+ * But not requeued for deletion. Skip it.
+ */
+ goto skipped;
+ }
+
+ /*
+ * We are dmaster, and the record has no data, and it has
+ * not been migrated after it has been queued for deletion.
+ *
+ * At this stage, the record could still have been revived locally
+ * and last been written with empty data. This can only be
+ * fixed with the addition of an active or delete flag. (TODO)
+ */
+
+ lmaster = ctdb_lmaster(ctdb_db->ctdb, &dd->key);
+
+ if (lmaster != ctdb->pnn) {
+ res = add_record_to_vacuum_fetch_list(vdata, dd->key);
+
+ if (res != 0) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " Error adding record to list "
+ "of records to send to lmaster.\n"));
+ vdata->count.delete_queue.error++;
+ } else {
+ vdata->count.delete_queue.added_to_vacuum_fetch_list++;
+ }
+ goto done;
+ }
+
+ /* use header->flags or dd->hdr.flags ?? */
+ if (dd->hdr.flags & CTDB_REC_FLAG_MIGRATED_WITH_DATA) {
+ res = add_record_to_delete_list(vdata, dd->key, &dd->hdr);
+
+ if (res != 0) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " Error adding record to list "
+ "of records for deletion on lmaster.\n"));
+ vdata->count.delete_queue.error++;
+ } else {
+ vdata->count.delete_queue.added_to_delete_list++;
+ }
+ } else {
+ res = tdb_delete(ctdb_db->ltdb->tdb, dd->key);
+
+ if (res != 0) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " Error deleting record with key "
+ "hash [0x%08x] from local data base db[%s].\n",
+ hash, ctdb_db->db_name));
+ vdata->count.delete_queue.error++;
+ goto done;
+ }
+
+ DEBUG(DEBUG_DEBUG,
+ (__location__ " Deleted record with key hash "
+ "[0x%08x] from local data base db[%s].\n",
+ hash, ctdb_db->db_name));
+ vdata->count.delete_queue.deleted++;
+ }
+
+ goto done;
+
+skipped:
+ vdata->count.delete_queue.skipped++;
+
+done:
+ tdb_chainunlock(ctdb_db->ltdb->tdb, dd->key);
+
+ return 0;
+}
+
+/**
+ * Delete the records that we are lmaster and dmaster for and
+ * that could be deleted on all other nodes via the TRY_DELETE_RECORDS
+ * control.
+ */
+static int delete_record_traverse(void *param, void *data)
+{
+ struct delete_record_data *dd =
+ talloc_get_type(data, struct delete_record_data);
+ struct vacuum_data *vdata = talloc_get_type(param, struct vacuum_data);
+ struct ctdb_db_context *ctdb_db = dd->ctdb_db;
+ struct ctdb_context *ctdb = ctdb_db->ctdb;
+ int res;
+ struct ctdb_ltdb_header header;
+ uint32_t lmaster;
+ uint32_t hash = ctdb_hash(&(dd->key));
+
+ if (dd->remote_fail_count > 0) {
+ vdata->count.delete_list.remote_error++;
+ vdata->count.delete_list.left--;
+ talloc_free(dd);
+ return 0;
+ }
+
+ res = tdb_chainlock(ctdb_db->ltdb->tdb, dd->key);
+ if (res != 0) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " Error getting chainlock on record with "
+ "key hash [0x%08x] on database db[%s].\n",
+ hash, ctdb_db->db_name));
+ vdata->count.delete_list.local_error++;
+ vdata->count.delete_list.left--;
+ talloc_free(dd);
+ return 0;
+ }
+
+ /*
+ * Verify that the record is still empty, its RSN has not
+ * changed and that we are still its lmaster and dmaster.
+ */
+
+ res = tdb_parse_record(ctdb_db->ltdb->tdb, dd->key,
+ vacuum_record_parser, &header);
+ if (res != 0) {
+ goto skip;
+ }
+
+ if (header.flags & CTDB_REC_RO_FLAGS) {
+ DEBUG(DEBUG_INFO, (__location__ ": record with hash [0x%08x] "
+ "on database db[%s] has read-only flags. "
+ "skipping.\n",
+ hash, ctdb_db->db_name));
+ goto skip;
+ }
+
+ if (header.dmaster != ctdb->pnn) {
+ DEBUG(DEBUG_INFO, (__location__ ": record with hash [0x%08x] "
+ "on database db[%s] has been migrated away. "
+ "skipping.\n",
+ hash, ctdb_db->db_name));
+ goto skip;
+ }
+
+ if (header.rsn != dd->hdr.rsn) {
+ /*
+ * The record has been migrated off the node and back again.
+ * But not requeued for deletion. Skip it.
+ */
+ DEBUG(DEBUG_INFO, (__location__ ": record with hash [0x%08x] "
+ "on database db[%s] seems to have been "
+ "migrated away and back again (with empty "
+ "data). skipping.\n",
+ hash, ctdb_db->db_name));
+ goto skip;
+ }
+
+ lmaster = ctdb_lmaster(ctdb_db->ctdb, &dd->key);
+
+ if (lmaster != ctdb->pnn) {
+ DEBUG(DEBUG_INFO, (__location__ ": not lmaster for record in "
+ "delete list (key hash [0x%08x], db[%s]). "
+ "Strange! skipping.\n",
+ hash, ctdb_db->db_name));
+ goto skip;
+ }
+
+ res = tdb_delete(ctdb_db->ltdb->tdb, dd->key);
+
+ if (res != 0) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " Error deleting record with key hash "
+ "[0x%08x] from local data base db[%s].\n",
+ hash, ctdb_db->db_name));
+ vdata->count.delete_list.local_error++;
+ goto done;
+ }
+
+ DEBUG(DEBUG_DEBUG,
+ (__location__ " Deleted record with key hash [0x%08x] from "
+ "local data base db[%s].\n", hash, ctdb_db->db_name));
+
+ vdata->count.delete_list.deleted++;
+ goto done;
+
+skip:
+ vdata->count.delete_list.skipped++;
+
+done:
+ tdb_chainunlock(ctdb_db->ltdb->tdb, dd->key);
+
+ talloc_free(dd);
+ vdata->count.delete_list.left--;
+
+ return 0;
+}
+
+/**
+ * Traverse the delete_queue.
+ * Records are either deleted directly or filled
+ * into the delete list or the vacuum fetch lists
+ * for further processing.
+ */
+static void ctdb_process_delete_queue(struct ctdb_db_context *ctdb_db,
+ struct vacuum_data *vdata)
+{
+ uint32_t sum;
+ int ret;
+
+ ret = trbt_traversearray32(ctdb_db->delete_queue, 1,
+ delete_queue_traverse, vdata);
+
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Error traversing "
+ "the delete queue.\n"));
+ }
+
+ sum = vdata->count.delete_queue.deleted
+ + vdata->count.delete_queue.skipped
+ + vdata->count.delete_queue.error
+ + vdata->count.delete_queue.added_to_delete_list
+ + vdata->count.delete_queue.added_to_vacuum_fetch_list;
+
+ if (vdata->count.delete_queue.total != sum) {
+ DEBUG(DEBUG_ERR, (__location__ " Inconsistency in fast vacuum "
+ "counts for db[%s]: total[%u] != sum[%u]\n",
+ ctdb_db->db_name,
+ (unsigned)vdata->count.delete_queue.total,
+ (unsigned)sum));
+ }
+
+ if (vdata->count.delete_queue.total > 0) {
+ DEBUG(DEBUG_INFO,
+ (__location__
+ " fast vacuuming delete_queue traverse statistics: "
+ "db[%s] "
+ "total[%u] "
+ "del[%u] "
+ "skp[%u] "
+ "err[%u] "
+ "adl[%u] "
+ "avf[%u]\n",
+ ctdb_db->db_name,
+ (unsigned)vdata->count.delete_queue.total,
+ (unsigned)vdata->count.delete_queue.deleted,
+ (unsigned)vdata->count.delete_queue.skipped,
+ (unsigned)vdata->count.delete_queue.error,
+ (unsigned)vdata->count.delete_queue.added_to_delete_list,
+ (unsigned)vdata->count.delete_queue.added_to_vacuum_fetch_list));
+ }
+
+ return;
+}
+
+/**
+ * read-only traverse of the database, looking for records that
+ * might be able to be vacuumed.
+ *
+ * This is not done each time but only every tunable
+ * VacuumFastPathCount times.
+ */
+static void ctdb_vacuum_traverse_db(struct ctdb_db_context *ctdb_db,
+ struct vacuum_data *vdata)
+{
+ int ret;
+
+ ret = tdb_traverse_read(ctdb_db->ltdb->tdb, vacuum_traverse, vdata);
+ if (ret == -1 || vdata->traverse_error) {
+ DEBUG(DEBUG_ERR, (__location__ " Traverse error in vacuuming "
+ "'%s'\n", ctdb_db->db_name));
+ return;
+ }
+
+ if (vdata->count.db_traverse.total > 0) {
+ DEBUG(DEBUG_INFO,
+ (__location__
+ " full vacuuming db traverse statistics: "
+ "db[%s] "
+ "total[%u] "
+ "skp[%u] "
+ "err[%u] "
+ "sched[%u]\n",
+ ctdb_db->db_name,
+ (unsigned)vdata->count.db_traverse.total,
+ (unsigned)vdata->count.db_traverse.skipped,
+ (unsigned)vdata->count.db_traverse.error,
+ (unsigned)vdata->count.db_traverse.scheduled));
+ }
+
+ return;
+}
+
+/**
+ * Process the vacuum fetch lists:
+ * For records for which we are not the lmaster, tell the lmaster to
+ * fetch the record.
+ */
+static void ctdb_process_vacuum_fetch_lists(struct ctdb_db_context *ctdb_db,
+ struct vacuum_data *vdata)
+{
+ unsigned int i;
+ struct ctdb_context *ctdb = ctdb_db->ctdb;
+ int ret, res;
+
+ for (i = 0; i < ctdb->num_nodes; i++) {
+ TDB_DATA data;
+ struct ctdb_marshall_buffer *vfl = vdata->vacuum_fetch_list[i];
+
+ if (ctdb->nodes[i]->pnn == ctdb->pnn) {
+ continue;
+ }
+
+ if (vfl->count == 0) {
+ continue;
+ }
+
+ DEBUG(DEBUG_INFO, ("Found %u records for lmaster %u in '%s'\n",
+ vfl->count, ctdb->nodes[i]->pnn,
+ ctdb_db->db_name));
+
+ data = ctdb_marshall_finish(vfl);
+
+ ret = ctdb_control(ctdb, ctdb->nodes[i]->pnn, 0,
+ CTDB_CONTROL_VACUUM_FETCH, 0,
+ data, NULL, NULL, &res, NULL, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR, ("Failed to send vacuum "
+ "fetch control to node %u\n",
+ ctdb->nodes[i]->pnn));
+ }
+ }
+}
+
+/**
+ * Process the delete list:
+ *
+ * This is the last step of vacuuming that consistently deletes
+ * those records that have been migrated with data and can hence
+ * not be deleted when leaving a node.
+ *
+ * In this step, the lmaster does the final deletion of those empty
+ * records that it is also dmaster for. It has usually received
+ * at least some of these records previously from the former dmasters
+ * with the vacuum fetch message.
+ *
+ * 1) Send the records to all active nodes with the TRY_DELETE_RECORDS
+ * control. The remote notes delete their local copy.
+ * 2) The lmaster locally deletes its copies of all records that
+ * could successfully be deleted remotely in step #2.
+ */
+static void ctdb_process_delete_list(struct ctdb_db_context *ctdb_db,
+ struct vacuum_data *vdata)
+{
+ int ret, i;
+ struct ctdb_context *ctdb = ctdb_db->ctdb;
+ struct delete_records_list *recs;
+ TDB_DATA indata;
+ struct ctdb_node_map_old *nodemap;
+ uint32_t *active_nodes;
+ int num_active_nodes;
+ TALLOC_CTX *tmp_ctx;
+ uint32_t sum;
+
+ if (vdata->count.delete_list.total == 0) {
+ return;
+ }
+
+ tmp_ctx = talloc_new(vdata);
+ if (tmp_ctx == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
+ return;
+ }
+
+ vdata->count.delete_list.left = vdata->count.delete_list.total;
+
+ /*
+ * get the list of currently active nodes
+ */
+
+ ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(),
+ CTDB_CURRENT_NODE,
+ tmp_ctx,
+ &nodemap);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " unable to get node map\n"));
+ goto done;
+ }
+
+ active_nodes = list_of_active_nodes(ctdb, nodemap,
+ nodemap, /* talloc context */
+ false /* include self */);
+ /* yuck! ;-) */
+ num_active_nodes = talloc_get_size(active_nodes)/sizeof(*active_nodes);
+
+ /*
+ * Now delete the records all active nodes in a two-phase process:
+ * 1) tell all active remote nodes to delete all their copy
+ * 2) if all remote nodes deleted their record copy, delete it locally
+ */
+
+ recs = talloc_zero(tmp_ctx, struct delete_records_list);
+ if (recs == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
+ goto done;
+ }
+
+ /*
+ * Step 1:
+ * Send all records to all active nodes for deletion.
+ */
+
+ /*
+ * Create a marshall blob from the remaining list of records to delete.
+ */
+
+ recs->records = (struct ctdb_marshall_buffer *)
+ talloc_zero_size(recs,
+ offsetof(struct ctdb_marshall_buffer, data));
+ if (recs->records == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
+ goto done;
+ }
+ recs->records->db_id = ctdb_db->db_id;
+
+ ret = trbt_traversearray32(vdata->delete_list, 1,
+ delete_marshall_traverse, recs);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Error traversing the "
+ "delete list for second marshalling.\n"));
+ goto done;
+ }
+
+ indata = ctdb_marshall_finish(recs->records);
+
+ for (i = 0; i < num_active_nodes; i++) {
+ struct ctdb_marshall_buffer *records;
+ struct ctdb_rec_data_old *rec;
+ int32_t res;
+ TDB_DATA outdata;
+
+ ret = ctdb_control(ctdb, active_nodes[i], 0,
+ CTDB_CONTROL_TRY_DELETE_RECORDS, 0,
+ indata, recs, &outdata, &res,
+ NULL, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR, ("Failed to delete records on "
+ "node %u: ret[%d] res[%d]\n",
+ active_nodes[i], ret, res));
+ goto done;
+ }
+
+ /*
+ * outdata contains the list of records coming back
+ * from the node: These are the records that the
+ * remote node could not delete. We remove these from
+ * the list to delete locally.
+ */
+ records = (struct ctdb_marshall_buffer *)outdata.dptr;
+ rec = (struct ctdb_rec_data_old *)&records->data[0];
+ while (records->count-- > 0) {
+ TDB_DATA reckey, recdata;
+ struct ctdb_ltdb_header *rechdr;
+ struct delete_record_data *dd;
+
+ reckey.dptr = &rec->data[0];
+ reckey.dsize = rec->keylen;
+ recdata.dptr = &rec->data[reckey.dsize];
+ recdata.dsize = rec->datalen;
+
+ if (recdata.dsize < sizeof(struct ctdb_ltdb_header)) {
+ DEBUG(DEBUG_CRIT,(__location__ " bad ltdb record\n"));
+ goto done;
+ }
+ rechdr = (struct ctdb_ltdb_header *)recdata.dptr;
+ recdata.dptr += sizeof(*rechdr);
+ recdata.dsize -= sizeof(*rechdr);
+
+ dd = (struct delete_record_data *)trbt_lookup32(
+ vdata->delete_list,
+ ctdb_hash(&reckey));
+ if (dd != NULL) {
+ /*
+ * The remote node could not delete the
+ * record. Since other remote nodes can
+ * also fail, we just mark the record.
+ */
+ dd->remote_fail_count++;
+ } else {
+ DEBUG(DEBUG_ERR, (__location__ " Failed to "
+ "find record with hash 0x%08x coming "
+ "back from TRY_DELETE_RECORDS "
+ "control in delete list.\n",
+ ctdb_hash(&reckey)));
+ }
+
+ rec = (struct ctdb_rec_data_old *)(rec->length + (uint8_t *)rec);
+ }
+ }
+
+ /*
+ * Step 2:
+ * Delete the remaining records locally.
+ *
+ * These records have successfully been deleted on all
+ * active remote nodes.
+ */
+
+ ret = trbt_traversearray32(vdata->delete_list, 1,
+ delete_record_traverse, vdata);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Error traversing the "
+ "delete list for deletion.\n"));
+ }
+
+ if (vdata->count.delete_list.left != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Vacuum db[%s] error: "
+ "there are %u records left for deletion after "
+ "processing delete list\n",
+ ctdb_db->db_name,
+ (unsigned)vdata->count.delete_list.left));
+ }
+
+ sum = vdata->count.delete_list.deleted
+ + vdata->count.delete_list.skipped
+ + vdata->count.delete_list.remote_error
+ + vdata->count.delete_list.local_error
+ + vdata->count.delete_list.left;
+
+ if (vdata->count.delete_list.total != sum) {
+ DEBUG(DEBUG_ERR, (__location__ " Inconsistency in vacuum "
+ "delete list counts for db[%s]: total[%u] != sum[%u]\n",
+ ctdb_db->db_name,
+ (unsigned)vdata->count.delete_list.total,
+ (unsigned)sum));
+ }
+
+ if (vdata->count.delete_list.total > 0) {
+ DEBUG(DEBUG_INFO,
+ (__location__
+ " vacuum delete list statistics: "
+ "db[%s] "
+ "total[%u] "
+ "del[%u] "
+ "skip[%u] "
+ "rem.err[%u] "
+ "loc.err[%u] "
+ "left[%u]\n",
+ ctdb_db->db_name,
+ (unsigned)vdata->count.delete_list.total,
+ (unsigned)vdata->count.delete_list.deleted,
+ (unsigned)vdata->count.delete_list.skipped,
+ (unsigned)vdata->count.delete_list.remote_error,
+ (unsigned)vdata->count.delete_list.local_error,
+ (unsigned)vdata->count.delete_list.left));
+ }
+
+done:
+ talloc_free(tmp_ctx);
+
+ return;
+}
+
+/**
+ * initialize the vacuum_data
+ */
+static struct vacuum_data *ctdb_vacuum_init_vacuum_data(
+ struct ctdb_db_context *ctdb_db,
+ TALLOC_CTX *mem_ctx)
+{
+ unsigned int i;
+ struct ctdb_context *ctdb = ctdb_db->ctdb;
+ struct vacuum_data *vdata;
+
+ vdata = talloc_zero(mem_ctx, struct vacuum_data);
+ if (vdata == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
+ return NULL;
+ }
+
+ vdata->ctdb = ctdb_db->ctdb;
+ vdata->ctdb_db = ctdb_db;
+ vdata->delete_list = trbt_create(vdata, 0);
+ if (vdata->delete_list == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
+ goto fail;
+ }
+
+ vdata->start = timeval_current();
+
+ vdata->count.delete_queue.added_to_delete_list = 0;
+ vdata->count.delete_queue.added_to_vacuum_fetch_list = 0;
+ vdata->count.delete_queue.deleted = 0;
+ vdata->count.delete_queue.skipped = 0;
+ vdata->count.delete_queue.error = 0;
+ vdata->count.delete_queue.total = 0;
+ vdata->count.db_traverse.scheduled = 0;
+ vdata->count.db_traverse.skipped = 0;
+ vdata->count.db_traverse.error = 0;
+ vdata->count.db_traverse.total = 0;
+ vdata->count.delete_list.total = 0;
+ vdata->count.delete_list.left = 0;
+ vdata->count.delete_list.remote_error = 0;
+ vdata->count.delete_list.local_error = 0;
+ vdata->count.delete_list.skipped = 0;
+ vdata->count.delete_list.deleted = 0;
+
+ /* the list needs to be of length num_nodes */
+ vdata->vacuum_fetch_list = talloc_zero_array(vdata,
+ struct ctdb_marshall_buffer *,
+ ctdb->num_nodes);
+ if (vdata->vacuum_fetch_list == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
+ goto fail;
+ }
+ for (i = 0; i < ctdb->num_nodes; i++) {
+ vdata->vacuum_fetch_list[i] = (struct ctdb_marshall_buffer *)
+ talloc_zero_size(vdata->vacuum_fetch_list,
+ offsetof(struct ctdb_marshall_buffer, data));
+ if (vdata->vacuum_fetch_list[i] == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
+ talloc_free(vdata);
+ return NULL;
+ }
+ vdata->vacuum_fetch_list[i]->db_id = ctdb_db->db_id;
+ }
+
+ return vdata;
+
+fail:
+ talloc_free(vdata);
+ return NULL;
+}
+
+/**
+ * Vacuum a DB:
+ * - Always do the fast vacuuming run, which traverses
+ * - the in-memory fetch queue: these records have been
+ * scheduled for migration
+ * - the in-memory delete queue: these records have been
+ * scheduled for deletion.
+ * - Only if explicitly requested, the database is traversed
+ * in order to use the traditional heuristics on empty records
+ * to trigger deletion.
+ * This is done only every VacuumFastPathCount'th vacuuming run.
+ *
+ * The traverse runs fill two lists:
+ *
+ * - The delete_list:
+ * This is the list of empty records the current
+ * node is lmaster and dmaster for. These records are later
+ * deleted first on other nodes and then locally.
+ *
+ * The fast vacuuming run has a short cut for those records
+ * that have never been migrated with data: these records
+ * are immediately deleted locally, since they have left
+ * no trace on other nodes.
+ *
+ * - The vacuum_fetch lists
+ * (one for each other lmaster node):
+ * The records in this list are sent for deletion to
+ * their lmaster in a bulk VACUUM_FETCH control.
+ *
+ * The lmaster then migrates all these records to itelf
+ * so that they can be vacuumed there.
+ *
+ * This executes in the child context.
+ */
+static int ctdb_vacuum_db(struct ctdb_db_context *ctdb_db,
+ bool full_vacuum_run)
+{
+ struct ctdb_context *ctdb = ctdb_db->ctdb;
+ int ret, pnn;
+ struct vacuum_data *vdata;
+ TALLOC_CTX *tmp_ctx;
+
+ DEBUG(DEBUG_INFO, (__location__ " Entering %s vacuum run for db "
+ "%s db_id[0x%08x]\n",
+ full_vacuum_run ? "full" : "fast",
+ ctdb_db->db_name, ctdb_db->db_id));
+
+ ret = ctdb_ctrl_getvnnmap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, &ctdb->vnn_map);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("Unable to get vnnmap from local node\n"));
+ return ret;
+ }
+
+ pnn = ctdb_ctrl_getpnn(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE);
+ if (pnn == -1) {
+ DEBUG(DEBUG_ERR, ("Unable to get pnn from local node\n"));
+ return -1;
+ }
+
+ ctdb->pnn = pnn;
+
+ tmp_ctx = talloc_new(ctdb_db);
+ if (tmp_ctx == NULL) {
+ DEBUG(DEBUG_ERR, ("Out of memory!\n"));
+ return -1;
+ }
+
+ vdata = ctdb_vacuum_init_vacuum_data(ctdb_db, tmp_ctx);
+ if (vdata == NULL) {
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
+ if (full_vacuum_run) {
+ ctdb_vacuum_traverse_db(ctdb_db, vdata);
+ }
+
+ ctdb_process_fetch_queue(ctdb_db);
+
+ ctdb_process_delete_queue(ctdb_db, vdata);
+
+ ctdb_process_vacuum_fetch_lists(ctdb_db, vdata);
+
+ ctdb_process_delete_list(ctdb_db, vdata);
+
+ talloc_free(tmp_ctx);
+
+ return 0;
+}
+
+/*
+ * repack and vacuum a db
+ * called from the child context
+ */
+static int ctdb_vacuum_and_repack_db(struct ctdb_db_context *ctdb_db,
+ bool full_vacuum_run)
+{
+ uint32_t repack_limit = ctdb_db->ctdb->tunable.repack_limit;
+ const char *name = ctdb_db->db_name;
+ int freelist_size = 0;
+ int ret;
+
+ if (ctdb_vacuum_db(ctdb_db, full_vacuum_run) != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to vacuum '%s'\n", name));
+ }
+
+ freelist_size = tdb_freelist_size(ctdb_db->ltdb->tdb);
+ if (freelist_size == -1) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to get freelist size for '%s'\n", name));
+ return -1;
+ }
+
+ /*
+ * decide if a repack is necessary
+ */
+ if ((repack_limit == 0 || (uint32_t)freelist_size < repack_limit))
+ {
+ return 0;
+ }
+
+ D_NOTICE("Repacking %s with %u freelist entries\n",
+ name,
+ freelist_size);
+
+ ret = tdb_repack(ctdb_db->ltdb->tdb);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to repack '%s'\n", name));
+ return -1;
+ }
+
+ return 0;
+}
+
+static uint32_t get_vacuum_interval(struct ctdb_db_context *ctdb_db)
+{
+ uint32_t interval = ctdb_db->ctdb->tunable.vacuum_interval;
+
+ return interval;
+}
+
+static int vacuum_child_destructor(struct ctdb_vacuum_child_context *child_ctx)
+{
+ double l = timeval_elapsed(&child_ctx->start_time);
+ struct ctdb_vacuum_handle *vacuum_handle = child_ctx->vacuum_handle;
+ struct ctdb_db_context *ctdb_db = vacuum_handle->ctdb_db;
+ struct ctdb_context *ctdb = ctdb_db->ctdb;
+
+ CTDB_UPDATE_DB_LATENCY(ctdb_db, "vacuum", vacuum.latency, l);
+ DEBUG(DEBUG_INFO,("Vacuuming took %.3f seconds for database %s\n", l, ctdb_db->db_name));
+
+ if (child_ctx->child_pid != -1) {
+ ctdb_kill(ctdb, child_ctx->child_pid, SIGKILL);
+ } else {
+ /* Bump the number of successful fast-path runs. */
+ vacuum_handle->fast_path_count++;
+ }
+
+ ctdb->vacuumer = NULL;
+
+ if (child_ctx->scheduled) {
+ vacuum_handle->vacuum_interval = get_vacuum_interval(ctdb_db);
+
+ tevent_add_timer(
+ ctdb->ev,
+ vacuum_handle,
+ timeval_current_ofs(vacuum_handle->vacuum_interval, 0),
+ ctdb_vacuum_event,
+ vacuum_handle);
+ }
+
+ return 0;
+}
+
+/*
+ * this event is generated when a vacuum child process times out
+ */
+static void vacuum_child_timeout(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_vacuum_child_context *child_ctx = talloc_get_type(private_data, struct ctdb_vacuum_child_context);
+
+ DEBUG(DEBUG_ERR,("Vacuuming child process timed out for db %s\n", child_ctx->vacuum_handle->ctdb_db->db_name));
+
+ child_ctx->status = VACUUM_TIMEOUT;
+
+ talloc_free(child_ctx);
+}
+
+
+/*
+ * this event is generated when a vacuum child process has completed
+ */
+static void vacuum_child_handler(struct tevent_context *ev,
+ struct tevent_fd *fde,
+ uint16_t flags, void *private_data)
+{
+ struct ctdb_vacuum_child_context *child_ctx = talloc_get_type(private_data, struct ctdb_vacuum_child_context);
+ char c = 0;
+ int ret;
+
+ DEBUG(DEBUG_INFO,("Vacuuming child process %d finished for db %s\n", child_ctx->child_pid, child_ctx->vacuum_handle->ctdb_db->db_name));
+ child_ctx->child_pid = -1;
+
+ ret = sys_read(child_ctx->fd[0], &c, 1);
+ if (ret != 1 || c != 0) {
+ child_ctx->status = VACUUM_ERROR;
+ DEBUG(DEBUG_ERR, ("A vacuum child process failed with an error for database %s. ret=%d c=%d\n", child_ctx->vacuum_handle->ctdb_db->db_name, ret, c));
+ } else {
+ child_ctx->status = VACUUM_OK;
+ }
+
+ talloc_free(child_ctx);
+}
+
+/*
+ * this event is called every time we need to start a new vacuum process
+ */
+static int vacuum_db_child(TALLOC_CTX *mem_ctx,
+ struct ctdb_db_context *ctdb_db,
+ bool scheduled,
+ bool full_vacuum_run,
+ struct ctdb_vacuum_child_context **out)
+{
+ struct ctdb_context *ctdb = ctdb_db->ctdb;
+ struct ctdb_vacuum_child_context *child_ctx;
+ struct tevent_fd *fde;
+ int ret;
+
+ /* we don't vacuum if we are in recovery mode, or db frozen */
+ if (ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE ||
+ ctdb_db_frozen(ctdb_db)) {
+ D_INFO("Not vacuuming %s (%s)\n", ctdb_db->db_name,
+ ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE ?
+ "in recovery" : "frozen");
+ return EAGAIN;
+ }
+
+ /* Do not allow multiple vacuuming child processes to be active at the
+ * same time. If there is vacuuming child process active, delay
+ * new vacuuming event to stagger vacuuming events.
+ */
+ if (ctdb->vacuumer != NULL) {
+ return EBUSY;
+ }
+
+ child_ctx = talloc_zero(mem_ctx, struct ctdb_vacuum_child_context);
+ if (child_ctx == NULL) {
+ DBG_ERR("Failed to allocate child context for vacuuming of %s\n",
+ ctdb_db->db_name);
+ return ENOMEM;
+ }
+
+
+ ret = pipe(child_ctx->fd);
+ if (ret != 0) {
+ talloc_free(child_ctx);
+ D_ERR("Failed to create pipe for vacuum child process.\n");
+ return EAGAIN;
+ }
+
+ child_ctx->child_pid = ctdb_fork(ctdb);
+ if (child_ctx->child_pid == (pid_t)-1) {
+ close(child_ctx->fd[0]);
+ close(child_ctx->fd[1]);
+ talloc_free(child_ctx);
+ D_ERR("Failed to fork vacuum child process.\n");
+ return EAGAIN;
+ }
+
+
+ if (child_ctx->child_pid == 0) {
+ char cc = 0;
+ close(child_ctx->fd[0]);
+
+ D_INFO("Vacuuming child process %d for db %s started\n",
+ getpid(),
+ ctdb_db->db_name);
+ prctl_set_comment("ctdb_vacuum");
+ ret = switch_from_server_to_client(ctdb);
+ if (ret != 0) {
+ DBG_ERR("ERROR: failed to switch vacuum daemon "
+ "into client mode.\n");
+ return EIO;
+ }
+
+ cc = ctdb_vacuum_and_repack_db(ctdb_db, full_vacuum_run);
+
+ sys_write(child_ctx->fd[1], &cc, 1);
+ _exit(0);
+ }
+
+ set_close_on_exec(child_ctx->fd[0]);
+ close(child_ctx->fd[1]);
+
+ child_ctx->status = VACUUM_RUNNING;
+ child_ctx->scheduled = scheduled;
+ child_ctx->start_time = timeval_current();
+
+ ctdb->vacuumer = child_ctx;
+ talloc_set_destructor(child_ctx, vacuum_child_destructor);
+
+ /*
+ * Clear the fastpath vacuuming list in the parent.
+ */
+ talloc_free(ctdb_db->delete_queue);
+ ctdb_db->delete_queue = trbt_create(ctdb_db, 0);
+ if (ctdb_db->delete_queue == NULL) {
+ DBG_ERR("Out of memory when re-creating vacuum tree\n");
+ return ENOMEM;
+ }
+
+ talloc_free(ctdb_db->fetch_queue);
+ ctdb_db->fetch_queue = trbt_create(ctdb_db, 0);
+ if (ctdb_db->fetch_queue == NULL) {
+ ctdb_fatal(ctdb, "Out of memory when re-create fetch queue "
+ " in parent context. Shutting down\n");
+ }
+
+ tevent_add_timer(ctdb->ev, child_ctx,
+ timeval_current_ofs(ctdb->tunable.vacuum_max_run_time,
+ 0),
+ vacuum_child_timeout, child_ctx);
+
+ DBG_DEBUG(" Created PIPE FD:%d to child vacuum process\n",
+ child_ctx->fd[0]);
+
+ fde = tevent_add_fd(ctdb->ev, child_ctx, child_ctx->fd[0],
+ TEVENT_FD_READ, vacuum_child_handler, child_ctx);
+ tevent_fd_set_auto_close(fde);
+
+ child_ctx->vacuum_handle = ctdb_db->vacuum_handle;
+
+ *out = child_ctx;
+ return 0;
+}
+
+static void ctdb_vacuum_event(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_vacuum_handle *vacuum_handle = talloc_get_type(
+ private_data, struct ctdb_vacuum_handle);
+ struct ctdb_db_context *ctdb_db = vacuum_handle->ctdb_db;
+ struct ctdb_context *ctdb = ctdb_db->ctdb;
+ struct ctdb_vacuum_child_context *child_ctx = NULL;
+ uint32_t fast_path_max = ctdb->tunable.vacuum_fast_path_count;
+ uint32_t vacuum_interval = get_vacuum_interval(ctdb_db);
+ bool full_vacuum_run = false;
+ int ret;
+
+ if (vacuum_interval > vacuum_handle->vacuum_interval) {
+ uint32_t d = vacuum_interval - vacuum_handle->vacuum_interval;
+
+ DBG_INFO("Vacuum interval increased from "
+ "%"PRIu32" to %"PRIu32", rescheduling\n",
+ vacuum_handle->vacuum_interval,
+ vacuum_interval);
+ vacuum_handle->vacuum_interval = vacuum_interval;
+ tevent_add_timer(ctdb->ev,
+ vacuum_handle,
+ timeval_current_ofs(d, 0),
+ ctdb_vacuum_event,
+ vacuum_handle);
+ return;
+ }
+
+ vacuum_handle->vacuum_interval = vacuum_interval;
+
+ if (vacuum_handle->fast_path_count >= fast_path_max) {
+ if (fast_path_max > 0) {
+ full_vacuum_run = true;
+ }
+ vacuum_handle->fast_path_count = 0;
+ }
+
+ ret = vacuum_db_child(vacuum_handle,
+ ctdb_db,
+ true,
+ full_vacuum_run,
+ &child_ctx);
+
+ if (ret == 0) {
+ return;
+ }
+
+ switch (ret) {
+ case EBUSY:
+ /* Stagger */
+ tevent_add_timer(ctdb->ev,
+ vacuum_handle,
+ timeval_current_ofs(0, 500*1000),
+ ctdb_vacuum_event,
+ vacuum_handle);
+ break;
+
+ default:
+ /* Temporary failure, schedule next attempt */
+ tevent_add_timer(ctdb->ev,
+ vacuum_handle,
+ timeval_current_ofs(
+ vacuum_handle->vacuum_interval, 0),
+ ctdb_vacuum_event,
+ vacuum_handle);
+ }
+
+}
+
+struct vacuum_control_state {
+ struct ctdb_vacuum_child_context *child_ctx;
+ struct ctdb_req_control_old *c;
+ struct ctdb_context *ctdb;
+};
+
+static int vacuum_control_state_destructor(struct vacuum_control_state *state)
+{
+ struct ctdb_vacuum_child_context *child_ctx = state->child_ctx;
+ int32_t status;
+
+ status = (child_ctx->status == VACUUM_OK ? 0 : -1);
+ ctdb_request_control_reply(state->ctdb, state->c, NULL, status, NULL);
+
+ return 0;
+}
+
+int32_t ctdb_control_db_vacuum(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA indata,
+ bool *async_reply)
+{
+ struct ctdb_db_context *ctdb_db;
+ struct ctdb_vacuum_child_context *child_ctx = NULL;
+ struct ctdb_db_vacuum *db_vacuum;
+ struct vacuum_control_state *state;
+ size_t np;
+ int ret;
+
+ ret = ctdb_db_vacuum_pull(indata.dptr,
+ indata.dsize,
+ ctdb,
+ &db_vacuum,
+ &np);
+ if (ret != 0) {
+ DBG_ERR("Invalid data\n");
+ return -1;
+ }
+
+ ctdb_db = find_ctdb_db(ctdb, db_vacuum->db_id);
+ if (ctdb_db == NULL) {
+ DBG_ERR("Unknown db id 0x%08x\n", db_vacuum->db_id);
+ talloc_free(db_vacuum);
+ return -1;
+ }
+
+ state = talloc(ctdb, struct vacuum_control_state);
+ if (state == NULL) {
+ DBG_ERR("Memory allocation error\n");
+ return -1;
+ }
+
+ ret = vacuum_db_child(ctdb_db,
+ ctdb_db,
+ false,
+ db_vacuum->full_vacuum_run,
+ &child_ctx);
+
+ talloc_free(db_vacuum);
+
+ if (ret == 0) {
+ (void) talloc_steal(child_ctx, state);
+
+ state->child_ctx = child_ctx;
+ state->c = talloc_steal(state, c);
+ state->ctdb = ctdb;
+
+ talloc_set_destructor(state, vacuum_control_state_destructor);
+
+ *async_reply = true;
+ return 0;
+ }
+
+ talloc_free(state);
+
+ switch (ret) {
+ case EBUSY:
+ DBG_WARNING("Vacuuming collision\n");
+ break;
+
+ default:
+ DBG_ERR("Temporary vacuuming failure, ret=%d\n", ret);
+ }
+
+ return -1;
+}
+
+void ctdb_stop_vacuuming(struct ctdb_context *ctdb)
+{
+ if (ctdb->vacuumer != NULL) {
+ D_INFO("Aborting vacuuming for %s (%i)\n",
+ ctdb->vacuumer->vacuum_handle->ctdb_db->db_name,
+ (int)ctdb->vacuumer->child_pid);
+ /* vacuum_child_destructor kills it, removes from list */
+ talloc_free(ctdb->vacuumer);
+ }
+}
+
+/* this function initializes the vacuuming context for a database
+ * starts the vacuuming events
+ */
+int ctdb_vacuum_init(struct ctdb_db_context *ctdb_db)
+{
+ struct ctdb_vacuum_handle *vacuum_handle;
+
+ if (! ctdb_db_volatile(ctdb_db)) {
+ DEBUG(DEBUG_ERR,
+ ("Vacuuming is disabled for non-volatile database %s\n",
+ ctdb_db->db_name));
+ return 0;
+ }
+
+ vacuum_handle = talloc(ctdb_db, struct ctdb_vacuum_handle);
+ if (vacuum_handle == NULL) {
+ DBG_ERR("Memory allocation error\n");
+ return -1;
+ }
+
+ vacuum_handle->ctdb_db = ctdb_db;
+ vacuum_handle->fast_path_count = 0;
+ vacuum_handle->vacuum_interval = get_vacuum_interval(ctdb_db);
+
+ ctdb_db->vacuum_handle = vacuum_handle;
+
+ tevent_add_timer(ctdb_db->ctdb->ev,
+ vacuum_handle,
+ timeval_current_ofs(vacuum_handle->vacuum_interval, 0),
+ ctdb_vacuum_event,
+ vacuum_handle);
+
+ return 0;
+}
+
+static void remove_record_from_delete_queue(struct ctdb_db_context *ctdb_db,
+ const struct ctdb_ltdb_header *hdr,
+ const TDB_DATA key)
+{
+ struct delete_record_data *kd;
+ uint32_t hash;
+
+ hash = (uint32_t)ctdb_hash(&key);
+
+ DEBUG(DEBUG_DEBUG, (__location__
+ " remove_record_from_delete_queue: "
+ "db[%s] "
+ "db_id[0x%08x] "
+ "key_hash[0x%08x] "
+ "lmaster[%u] "
+ "migrated_with_data[%s]\n",
+ ctdb_db->db_name, ctdb_db->db_id,
+ hash,
+ ctdb_lmaster(ctdb_db->ctdb, &key),
+ hdr->flags & CTDB_REC_FLAG_MIGRATED_WITH_DATA ? "yes" : "no"));
+
+ kd = (struct delete_record_data *)trbt_lookup32(ctdb_db->delete_queue, hash);
+ if (kd == NULL) {
+ DEBUG(DEBUG_DEBUG, (__location__
+ " remove_record_from_delete_queue: "
+ "record not in queue (hash[0x%08x])\n.",
+ hash));
+ return;
+ }
+
+ if ((kd->key.dsize != key.dsize) ||
+ (memcmp(kd->key.dptr, key.dptr, key.dsize) != 0))
+ {
+ DEBUG(DEBUG_DEBUG, (__location__
+ " remove_record_from_delete_queue: "
+ "hash collision for key with hash[0x%08x] "
+ "in db[%s] - skipping\n",
+ hash, ctdb_db->db_name));
+ return;
+ }
+
+ DEBUG(DEBUG_DEBUG, (__location__
+ " remove_record_from_delete_queue: "
+ "removing key with hash[0x%08x]\n",
+ hash));
+
+ talloc_free(kd);
+
+ return;
+}
+
+/**
+ * Insert a record into the ctdb_db context's delete queue,
+ * handling hash collisions.
+ */
+static int insert_record_into_delete_queue(struct ctdb_db_context *ctdb_db,
+ const struct ctdb_ltdb_header *hdr,
+ TDB_DATA key)
+{
+ struct delete_record_data *kd;
+ uint32_t hash;
+ int ret;
+
+ hash = (uint32_t)ctdb_hash(&key);
+
+ DEBUG(DEBUG_DEBUG, (__location__ " schedule for deletion: db[%s] "
+ "db_id[0x%08x] "
+ "key_hash[0x%08x] "
+ "lmaster[%u] "
+ "migrated_with_data[%s]\n",
+ ctdb_db->db_name, ctdb_db->db_id,
+ hash,
+ ctdb_lmaster(ctdb_db->ctdb, &key),
+ hdr->flags & CTDB_REC_FLAG_MIGRATED_WITH_DATA ? "yes" : "no"));
+
+ kd = (struct delete_record_data *)trbt_lookup32(ctdb_db->delete_queue, hash);
+ if (kd != NULL) {
+ if ((kd->key.dsize != key.dsize) ||
+ (memcmp(kd->key.dptr, key.dptr, key.dsize) != 0))
+ {
+ DEBUG(DEBUG_INFO,
+ (__location__ " schedule for deletion: "
+ "hash collision for key hash [0x%08x]. "
+ "Skipping the record.\n", hash));
+ return 0;
+ } else {
+ DEBUG(DEBUG_DEBUG,
+ (__location__ " schedule for deletion: "
+ "updating entry for key with hash [0x%08x].\n",
+ hash));
+ }
+ }
+
+ ret = insert_delete_record_data_into_tree(ctdb_db->ctdb, ctdb_db,
+ ctdb_db->delete_queue,
+ hdr, key);
+ if (ret != 0) {
+ DEBUG(DEBUG_INFO,
+ (__location__ " schedule for deletion: error "
+ "inserting key with hash [0x%08x] into delete queue\n",
+ hash));
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * Schedule a record for deletion.
+ * Called from the parent context.
+ */
+int32_t ctdb_control_schedule_for_deletion(struct ctdb_context *ctdb,
+ TDB_DATA indata)
+{
+ struct ctdb_control_schedule_for_deletion *dd;
+ struct ctdb_db_context *ctdb_db;
+ int ret;
+ TDB_DATA key;
+
+ dd = (struct ctdb_control_schedule_for_deletion *)indata.dptr;
+
+ ctdb_db = find_ctdb_db(ctdb, dd->db_id);
+ if (ctdb_db == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Unknown db id 0x%08x\n",
+ dd->db_id));
+ return -1;
+ }
+
+ key.dsize = dd->keylen;
+ key.dptr = dd->key;
+
+ ret = insert_record_into_delete_queue(ctdb_db, &dd->hdr, key);
+
+ return ret;
+}
+
+int32_t ctdb_local_schedule_for_deletion(struct ctdb_db_context *ctdb_db,
+ const struct ctdb_ltdb_header *hdr,
+ TDB_DATA key)
+{
+ int ret;
+ struct ctdb_control_schedule_for_deletion *dd;
+ TDB_DATA indata;
+ int32_t status;
+
+ if (ctdb_db->ctdb->ctdbd_pid == getpid()) {
+ /* main daemon - directly queue */
+ ret = insert_record_into_delete_queue(ctdb_db, hdr, key);
+
+ return ret;
+ }
+
+ /* if we don't have a connection to the daemon we can not send
+ a control. For example sometimes from update_record control child
+ process.
+ */
+ if (!ctdb_db->ctdb->can_send_controls) {
+ return -1;
+ }
+
+
+ /* child process: send the main daemon a control */
+ indata.dsize = offsetof(struct ctdb_control_schedule_for_deletion, key) + key.dsize;
+ indata.dptr = talloc_zero_array(ctdb_db, uint8_t, indata.dsize);
+ if (indata.dptr == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
+ return -1;
+ }
+ dd = (struct ctdb_control_schedule_for_deletion *)(void *)indata.dptr;
+ dd->db_id = ctdb_db->db_id;
+ dd->hdr = *hdr;
+ dd->keylen = key.dsize;
+ memcpy(dd->key, key.dptr, key.dsize);
+
+ ret = ctdb_control(ctdb_db->ctdb,
+ CTDB_CURRENT_NODE,
+ ctdb_db->db_id,
+ CTDB_CONTROL_SCHEDULE_FOR_DELETION,
+ CTDB_CTRL_FLAG_NOREPLY, /* flags */
+ indata,
+ NULL, /* mem_ctx */
+ NULL, /* outdata */
+ &status,
+ NULL, /* timeout : NULL == wait forever */
+ NULL); /* error message */
+
+ talloc_free(indata.dptr);
+
+ if (ret != 0 || status != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Error sending "
+ "SCHEDULE_FOR_DELETION "
+ "control.\n"));
+ if (status != 0) {
+ ret = -1;
+ }
+ }
+
+ return ret;
+}
+
+void ctdb_local_remove_from_delete_queue(struct ctdb_db_context *ctdb_db,
+ const struct ctdb_ltdb_header *hdr,
+ const TDB_DATA key)
+{
+ if (ctdb_db->ctdb->ctdbd_pid != getpid()) {
+ /*
+ * Only remove the record from the delete queue if called
+ * in the main daemon.
+ */
+ return;
+ }
+
+ remove_record_from_delete_queue(ctdb_db, hdr, key);
+
+ return;
+}
+
+static int vacuum_fetch_parser(uint32_t reqid,
+ struct ctdb_ltdb_header *header,
+ TDB_DATA key, TDB_DATA data,
+ void *private_data)
+{
+ struct ctdb_db_context *ctdb_db = talloc_get_type_abort(
+ private_data, struct ctdb_db_context);
+ struct fetch_record_data *rd;
+ size_t len;
+ uint32_t hash;
+
+ len = offsetof(struct fetch_record_data, keydata) + key.dsize;
+
+ rd = (struct fetch_record_data *)talloc_size(ctdb_db->fetch_queue,
+ len);
+ if (rd == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Memory error\n"));
+ return -1;
+ }
+ talloc_set_name_const(rd, "struct fetch_record_data");
+
+ rd->key.dsize = key.dsize;
+ rd->key.dptr = rd->keydata;
+ memcpy(rd->keydata, key.dptr, key.dsize);
+
+ hash = ctdb_hash(&key);
+
+ trbt_insert32(ctdb_db->fetch_queue, hash, rd);
+
+ return 0;
+}
+
+int32_t ctdb_control_vacuum_fetch(struct ctdb_context *ctdb, TDB_DATA indata)
+{
+ struct ctdb_rec_buffer *recbuf;
+ struct ctdb_db_context *ctdb_db;
+ size_t npull;
+ int ret;
+
+ ret = ctdb_rec_buffer_pull(indata.dptr, indata.dsize, ctdb, &recbuf,
+ &npull);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("Invalid data in vacuum_fetch\n"));
+ return -1;
+ }
+
+ ctdb_db = find_ctdb_db(ctdb, recbuf->db_id);
+ if (ctdb_db == NULL) {
+ talloc_free(recbuf);
+ DEBUG(DEBUG_ERR, (__location__ " Unknown db 0x%08x\n",
+ recbuf->db_id));
+ return -1;
+ }
+
+ ret = ctdb_rec_buffer_traverse(recbuf, vacuum_fetch_parser, ctdb_db);
+ talloc_free(recbuf);
+ return ret;
+}
diff --git a/ctdb/server/ctdbd.c b/ctdb/server/ctdbd.c
new file mode 100644
index 0000000..a388bff
--- /dev/null
+++ b/ctdb/server/ctdbd.c
@@ -0,0 +1,407 @@
+/*
+ standalone ctdb daemon
+
+ Copyright (C) Andrew Tridgell 2006
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/time.h"
+#include "system/wait.h"
+#include "system/network.h"
+#include "system/syslog.h"
+
+#include <popt.h>
+#include <talloc.h>
+/* Allow use of deprecated function tevent_loop_allow_nesting() */
+#define TEVENT_DEPRECATED
+#include <tevent.h>
+
+#include "lib/util/debug.h"
+#include "lib/util/samba_util.h"
+
+#include "ctdb_private.h"
+
+#include "common/reqid.h"
+#include "common/system.h"
+#include "common/common.h"
+#include "common/path.h"
+#include "common/logging.h"
+#include "common/logging_conf.h"
+
+#include "ctdb_config.h"
+
+int script_log_level;
+bool fast_start;
+
+/*
+ called by the transport layer when a packet comes in
+*/
+static void ctdb_recv_pkt(struct ctdb_context *ctdb, uint8_t *data, uint32_t length)
+{
+ struct ctdb_req_header *hdr = (struct ctdb_req_header *)data;
+
+ CTDB_INCREMENT_STAT(ctdb, node_packets_recv);
+
+ /* up the counter for this source node, so we know its alive */
+ if (ctdb_validate_pnn(ctdb, hdr->srcnode)) {
+ /* as a special case, redirected calls don't increment the rx_cnt */
+ if (hdr->operation != CTDB_REQ_CALL ||
+ ((struct ctdb_req_call_old *)hdr)->hopcount == 0) {
+ ctdb->nodes[hdr->srcnode]->rx_cnt++;
+ }
+ }
+
+ ctdb_input_pkt(ctdb, hdr);
+}
+
+static const struct ctdb_upcalls ctdb_upcalls = {
+ .recv_pkt = ctdb_recv_pkt,
+ .node_dead = ctdb_node_dead,
+ .node_connected = ctdb_node_connected
+};
+
+static struct ctdb_context *ctdb_init(struct tevent_context *ev)
+{
+ int ret;
+ struct ctdb_context *ctdb;
+
+ ctdb = talloc_zero(ev, struct ctdb_context);
+ if (ctdb == NULL) {
+ DBG_ERR("Memory error\n");
+ return NULL;
+ }
+ ctdb->ev = ev;
+
+ /* Wrap early to exercise code. */
+ ret = reqid_init(ctdb, INT_MAX-200, &ctdb->idr);
+ if (ret != 0) {
+ D_ERR("reqid_init failed (%s)\n", strerror(ret));
+ talloc_free(ctdb);
+ return NULL;
+ }
+
+ ret = srvid_init(ctdb, &ctdb->srv);
+ if (ret != 0) {
+ D_ERR("srvid_init failed (%s)\n", strerror(ret));
+ talloc_free(ctdb);
+ return NULL;
+ }
+
+ ctdb->daemon.name = path_socket(ctdb, "ctdbd");
+ if (ctdb->daemon.name == NULL) {
+ DBG_ERR("Memory allocation error\n");
+ talloc_free(ctdb);
+ return NULL;
+ }
+
+ ctdbd_pidfile = path_pidfile(ctdb, "ctdbd");
+ if (ctdbd_pidfile == NULL) {
+ DBG_ERR("Memory allocation error\n");
+ talloc_free(ctdb);
+ return NULL;
+ }
+
+ gettimeofday(&ctdb->ctdbd_start_time, NULL);
+
+ gettimeofday(&ctdb->last_recovery_started, NULL);
+ gettimeofday(&ctdb->last_recovery_finished, NULL);
+
+ ctdb->recovery_mode = CTDB_RECOVERY_NORMAL;
+
+ ctdb->upcalls = &ctdb_upcalls;
+
+ ctdb->statistics.statistics_start_time = timeval_current();
+
+ ctdb->capabilities = CTDB_CAP_DEFAULT;
+
+ /*
+ * Initialise this node's PNN to the unknown value. This will
+ * be set to the correct value by either ctdb_add_node() as
+ * part of loading the nodes file or by
+ * ctdb_tcp_listen_automatic() when the transport is
+ * initialised. At some point we should de-optimise this and
+ * pull it out into ctdb_start_daemon() so it is done clearly
+ * and only in one place.
+ */
+ ctdb->pnn = CTDB_UNKNOWN_PNN;
+
+ ctdb->do_checkpublicip = true;
+
+ return ctdb;
+}
+
+
+/*
+ main program
+*/
+int main(int argc, const char *argv[])
+{
+ struct ctdb_context *ctdb = NULL;
+ int interactive_opt = 0;
+ bool interactive = false;
+
+ struct poptOption popt_options[] = {
+ POPT_AUTOHELP
+ { "interactive", 'i', POPT_ARG_NONE, &interactive_opt, 0,
+ "don't fork, log to stderr", NULL },
+ POPT_TABLEEND
+ };
+ int opt, ret;
+ const char **extra_argv;
+ poptContext pc;
+ struct tevent_context *ev;
+ const char *ctdb_base;
+ struct conf_context *conf;
+ const char *logging_location;
+ const char *test_mode;
+ bool ok;
+
+ setproctitle_init(argc, discard_const(argv), environ);
+
+ /*
+ * Basic setup
+ */
+
+ talloc_enable_null_tracking();
+
+ fault_setup();
+
+ ev = tevent_context_init(NULL);
+ if (ev == NULL) {
+ fprintf(stderr, "tevent_context_init() failed\n");
+ exit(1);
+ }
+ tevent_loop_allow_nesting(ev);
+
+ ctdb = ctdb_init(ev);
+ if (ctdb == NULL) {
+ fprintf(stderr, "Failed to init ctdb\n");
+ exit(1);
+ }
+
+ /* Default value for CTDB_BASE - don't override */
+ setenv("CTDB_BASE", CTDB_ETCDIR, 0);
+ ctdb_base = getenv("CTDB_BASE");
+ if (ctdb_base == NULL) {
+ D_ERR("CTDB_BASE not set\n");
+ exit(1);
+ }
+
+ /*
+ * Command-line option handling
+ */
+
+ pc = poptGetContext(argv[0], argc, argv, popt_options, POPT_CONTEXT_KEEP_FIRST);
+
+ while ((opt = poptGetNextOpt(pc)) != -1) {
+ switch (opt) {
+ default:
+ fprintf(stderr, "Invalid option %s: %s\n",
+ poptBadOption(pc, 0), poptStrerror(opt));
+ goto fail;
+ }
+ }
+
+ /* If there are extra arguments then exit with usage message */
+ extra_argv = poptGetArgs(pc);
+ if (extra_argv) {
+ extra_argv++;
+ if (extra_argv[0]) {
+ poptPrintHelp(pc, stdout, 0);
+ goto fail;
+ }
+ }
+
+ interactive = (interactive_opt != 0);
+
+ /*
+ * Configuration file handling
+ */
+
+ ret = ctdbd_config_load(ctdb, &conf);
+ if (ret != 0) {
+ /* ctdbd_config_load() logs the failure */
+ goto fail;
+ }
+
+ /*
+ * Logging setup/options
+ */
+
+ test_mode = getenv("CTDB_TEST_MODE");
+
+ /* Log to stderr (ignoring configuration) when running as interactive */
+ if (interactive) {
+ logging_location = "file:";
+ setenv("CTDB_INTERACTIVE", "true", 1);
+ } else {
+ logging_location = logging_conf_location(conf);
+ }
+
+ if (strcmp(logging_location, "syslog") != 0 && test_mode == NULL) {
+ /* This can help when CTDB logging is misconfigured */
+ syslog(LOG_DAEMON|LOG_NOTICE,
+ "CTDB logging to location %s",
+ logging_location);
+ }
+
+ /* Initialize logging and set the debug level */
+ ok = ctdb_logging_init(ctdb,
+ logging_location,
+ logging_conf_log_level(conf));
+ if (!ok) {
+ goto fail;
+ }
+ setenv("CTDB_LOGGING", logging_location, 1);
+ setenv("CTDB_DEBUGLEVEL", debug_level_to_string(DEBUGLEVEL), 1);
+
+ script_log_level = debug_level_from_string(
+ ctdb_config.script_log_level);
+
+ D_NOTICE("CTDB starting on node\n");
+
+ /*
+ * Cluster setup/options
+ */
+
+ ret = ctdb_set_transport(ctdb, ctdb_config.transport);
+ if (ret == -1) {
+ D_ERR("ctdb_set_transport failed - %s\n", ctdb_errstr(ctdb));
+ goto fail;
+ }
+
+ if (ctdb_config.cluster_lock != NULL) {
+ ctdb->recovery_lock = ctdb_config.cluster_lock;
+ } else if (ctdb_config.recovery_lock != NULL) {
+ ctdb->recovery_lock = ctdb_config.recovery_lock;
+ } else {
+ D_WARNING("Cluster lock not set\n");
+ }
+
+ /* tell ctdb what address to listen on */
+ if (ctdb_config.node_address) {
+ ret = ctdb_set_address(ctdb, ctdb_config.node_address);
+ if (ret == -1) {
+ D_ERR("ctdb_set_address failed - %s\n",
+ ctdb_errstr(ctdb));
+ goto fail;
+ }
+ }
+
+ /* tell ctdb what nodes are available */
+ ctdb->nodes_file = talloc_asprintf(ctdb, "%s/nodes", ctdb_base);
+ if (ctdb->nodes_file == NULL) {
+ DBG_ERR(" Out of memory\n");
+ goto fail;
+ }
+ ctdb_load_nodes_file(ctdb);
+
+ /*
+ * Database setup/options
+ */
+
+ ctdb->db_directory = ctdb_config.dbdir_volatile;
+ ok = directory_exist(ctdb->db_directory);
+ if (! ok) {
+ D_ERR("Volatile database directory %s does not exist\n",
+ ctdb->db_directory);
+ goto fail;
+ }
+
+ ctdb->db_directory_persistent = ctdb_config.dbdir_persistent;
+ ok = directory_exist(ctdb->db_directory_persistent);
+ if (! ok) {
+ D_ERR("Persistent database directory %s does not exist\n",
+ ctdb->db_directory_persistent);
+ goto fail;
+ }
+
+ ctdb->db_directory_state = ctdb_config.dbdir_state;
+ ok = directory_exist(ctdb->db_directory_state);
+ if (! ok) {
+ D_ERR("State database directory %s does not exist\n",
+ ctdb->db_directory_state);
+ goto fail;
+ }
+
+ if (ctdb_config.lock_debug_script != NULL) {
+ ret = setenv("CTDB_DEBUG_LOCKS",
+ ctdb_config.lock_debug_script,
+ 1);
+ if (ret != 0) {
+ D_ERR("Failed to set up lock debugging (%s)\n",
+ strerror(errno));
+ goto fail;
+ }
+ }
+
+ /*
+ * Legacy setup/options
+ */
+
+ ctdb->start_as_disabled = (int)ctdb_config.start_as_disabled;
+ ctdb->start_as_stopped = (int)ctdb_config.start_as_stopped;
+
+ /* set ctdbd capabilities */
+ if (!ctdb_config.lmaster_capability) {
+ ctdb->capabilities &= ~CTDB_CAP_LMASTER;
+ }
+ if (!ctdb_config.leader_capability) {
+ ctdb->capabilities &= ~CTDB_CAP_RECMASTER;
+ }
+
+ ctdb->do_setsched = ctdb_config.realtime_scheduling;
+
+ /*
+ * Miscellaneous setup
+ */
+
+ ctdb_tunables_load(ctdb);
+
+ ctdb->event_script_dir = talloc_asprintf(ctdb,
+ "%s/events/legacy",
+ ctdb_base);
+ if (ctdb->event_script_dir == NULL) {
+ DBG_ERR("Out of memory\n");
+ goto fail;
+ }
+
+ ctdb->notification_script = talloc_asprintf(ctdb,
+ "%s/notify.sh",
+ ctdb_base);
+ if (ctdb->notification_script == NULL) {
+ D_ERR("Unable to set notification script\n");
+ goto fail;
+ }
+
+ /*
+ * Testing and debug options
+ */
+
+ if (test_mode != NULL) {
+ ctdb->do_setsched = false;
+ ctdb->do_checkpublicip = false;
+ fast_start = true;
+ }
+
+ /* start the protocol running (as a child) */
+ return ctdb_start_daemon(ctdb, interactive, test_mode != NULL);
+
+fail:
+ talloc_free(ctdb);
+ exit(1);
+}
diff --git a/ctdb/server/eventscript.c b/ctdb/server/eventscript.c
new file mode 100644
index 0000000..3ea7d74
--- /dev/null
+++ b/ctdb/server/eventscript.c
@@ -0,0 +1,845 @@
+/*
+ event script handling
+
+ Copyright (C) Andrew Tridgell 2007
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/network.h"
+#include "system/wait.h"
+#include "system/dir.h"
+#include "system/locale.h"
+#include "system/time.h"
+#include "system/dir.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/dlinklist.h"
+#include "lib/util/debug.h"
+#include "lib/util/samba_util.h"
+#include "lib/util/sys_rw.h"
+
+#include "ctdb_private.h"
+
+#include "common/common.h"
+#include "common/logging.h"
+#include "common/reqid.h"
+#include "common/sock_io.h"
+#include "common/path.h"
+
+#include "protocol/protocol_util.h"
+#include "event/event_protocol_api.h"
+
+/*
+ * Setting up event daemon
+ */
+
+struct eventd_context {
+ struct tevent_context *ev;
+ const char *path;
+ const char *socket;
+
+ /* server state */
+ pid_t eventd_pid;
+ struct tevent_fd *eventd_fde;
+
+ /* client state */
+ struct reqid_context *idr;
+ struct sock_queue *queue;
+ struct eventd_client_state *calls;
+};
+
+static bool eventd_context_init(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ struct eventd_context **out)
+{
+ struct eventd_context *ectx;
+ const char *eventd = CTDB_HELPER_BINDIR "/ctdb-eventd";
+ const char *value;
+ int ret;
+
+ ectx = talloc_zero(mem_ctx, struct eventd_context);
+ if (ectx == NULL) {
+ return false;
+ }
+
+ ectx->ev = ctdb->ev;
+
+ value = getenv("CTDB_EVENTD");
+ if (value != NULL) {
+ eventd = value;
+ }
+
+ ectx->path = talloc_strdup(ectx, eventd);
+ if (ectx->path == NULL) {
+ talloc_free(ectx);
+ return false;
+ }
+
+ ectx->socket = path_socket(ectx, "eventd");
+ if (ectx->socket == NULL) {
+ talloc_free(ectx);
+ return false;
+ }
+
+ ret = reqid_init(ectx, 1, &ectx->idr);
+ if (ret != 0) {
+ talloc_free(ectx);
+ return false;
+ }
+
+ ectx->eventd_pid = -1;
+
+ *out = ectx;
+ return true;
+}
+
+struct eventd_startup_state {
+ bool done;
+ int ret;
+ int fd;
+};
+
+static void eventd_startup_timeout_handler(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t,
+ void *private_data)
+{
+ struct eventd_startup_state *state =
+ (struct eventd_startup_state *) private_data;
+
+ state->done = true;
+ state->ret = ETIMEDOUT;
+}
+
+static void eventd_startup_handler(struct tevent_context *ev,
+ struct tevent_fd *fde, uint16_t flags,
+ void *private_data)
+{
+ struct eventd_startup_state *state =
+ (struct eventd_startup_state *)private_data;
+ unsigned int data;
+ ssize_t num_read;
+
+ num_read = sys_read(state->fd, &data, sizeof(data));
+ if (num_read == sizeof(data)) {
+ if (data == 0) {
+ state->ret = 0;
+ } else {
+ state->ret = EIO;
+ }
+ } else if (num_read == 0) {
+ state->ret = EPIPE;
+ } else if (num_read == -1) {
+ state->ret = errno;
+ } else {
+ state->ret = EINVAL;
+ }
+
+ state->done = true;
+}
+
+
+static int wait_for_daemon_startup(struct tevent_context *ev,
+ int fd)
+{
+ TALLOC_CTX *mem_ctx;
+ struct tevent_timer *timer;
+ struct tevent_fd *fde;
+ struct eventd_startup_state state = {
+ .done = false,
+ .ret = 0,
+ .fd = fd,
+ };
+
+ mem_ctx = talloc_new(ev);
+ if (mem_ctx == NULL) {
+ return ENOMEM;
+ }
+
+ timer = tevent_add_timer(ev,
+ mem_ctx,
+ tevent_timeval_current_ofs(10, 0),
+ eventd_startup_timeout_handler,
+ &state);
+ if (timer == NULL) {
+ talloc_free(mem_ctx);
+ return ENOMEM;
+ }
+
+ fde = tevent_add_fd(ev,
+ mem_ctx,
+ fd,
+ TEVENT_FD_READ,
+ eventd_startup_handler,
+ &state);
+ if (fde == NULL) {
+ talloc_free(mem_ctx);
+ return ENOMEM;
+ }
+
+ while (! state.done) {
+ tevent_loop_once(ev);
+ }
+
+ talloc_free(mem_ctx);
+
+ return state.ret;
+}
+
+
+/*
+ * Start and stop event daemon
+ */
+
+static bool eventd_client_connect(struct eventd_context *ectx);
+static void eventd_dead_handler(struct tevent_context *ev,
+ struct tevent_fd *fde, uint16_t flags,
+ void *private_data);
+
+int ctdb_start_eventd(struct ctdb_context *ctdb)
+{
+ struct eventd_context *ectx;
+ const char **argv;
+ int fd[2];
+ pid_t pid;
+ int ret;
+ bool status;
+
+ if (ctdb->ectx == NULL) {
+ status = eventd_context_init(ctdb, ctdb, &ctdb->ectx);
+ if (! status) {
+ DEBUG(DEBUG_ERR,
+ ("Failed to initialize eventd context\n"));
+ return -1;
+ }
+ }
+
+ ectx = ctdb->ectx;
+
+ if (! sock_clean(ectx->socket)) {
+ return -1;
+ }
+
+ ret = pipe(fd);
+ if (ret != 0) {
+ return -1;
+ }
+
+ argv = talloc_array(ectx, const char *, 6);
+ if (argv == NULL) {
+ close(fd[0]);
+ close(fd[1]);
+ return -1;
+ }
+
+ argv[0] = ectx->path;
+ argv[1] = "-P";
+ argv[2] = talloc_asprintf(argv, "%d", ctdb->ctdbd_pid);
+ argv[3] = "-S";
+ argv[4] = talloc_asprintf(argv, "%d", fd[1]);
+ argv[5] = NULL;
+
+ if (argv[2] == NULL || argv[4] == NULL) {
+ close(fd[0]);
+ close(fd[1]);
+ talloc_free(argv);
+ return -1;
+ }
+
+ D_NOTICE("Starting event daemon %s %s %s %s %s\n",
+ argv[0],
+ argv[1],
+ argv[2],
+ argv[3],
+ argv[4]);
+
+ pid = ctdb_fork(ctdb);
+ if (pid == -1) {
+ close(fd[0]);
+ close(fd[1]);
+ talloc_free(argv);
+ return -1;
+ }
+
+ if (pid == 0) {
+ close(fd[0]);
+ ret = execv(argv[0], discard_const(argv));
+ if (ret == -1) {
+ _exit(errno);
+ }
+ _exit(0);
+ }
+
+ talloc_free(argv);
+ close(fd[1]);
+
+ ret = wait_for_daemon_startup(ctdb->ev, fd[0]);
+ if (ret != 0) {
+ ctdb_kill(ctdb, pid, SIGKILL);
+ close(fd[0]);
+ D_ERR("Failed to initialize event daemon (%d)\n", ret);
+ return -1;
+ }
+
+ ectx->eventd_fde = tevent_add_fd(ctdb->ev, ectx, fd[0],
+ TEVENT_FD_READ,
+ eventd_dead_handler, ectx);
+ if (ectx->eventd_fde == NULL) {
+ ctdb_kill(ctdb, pid, SIGKILL);
+ close(fd[0]);
+ return -1;
+ }
+
+ tevent_fd_set_auto_close(ectx->eventd_fde);
+ ectx->eventd_pid = pid;
+
+ status = eventd_client_connect(ectx);
+ if (! status) {
+ DEBUG(DEBUG_ERR, ("Failed to connect to event daemon\n"));
+ ctdb_stop_eventd(ctdb);
+ return -1;
+ }
+
+ return 0;
+}
+
+static void eventd_dead_handler(struct tevent_context *ev,
+ struct tevent_fd *fde, uint16_t flags,
+ void *private_data)
+{
+ D_ERR("Eventd went away - exiting\n");
+ exit(1);
+}
+
+void ctdb_stop_eventd(struct ctdb_context *ctdb)
+{
+ struct eventd_context *ectx = ctdb->ectx;
+
+ if (ectx == NULL) {
+ return;
+ }
+
+ TALLOC_FREE(ectx->eventd_fde);
+ if (ectx->eventd_pid != -1) {
+ kill(ectx->eventd_pid, SIGTERM);
+ ectx->eventd_pid = -1;
+ }
+ TALLOC_FREE(ctdb->ectx);
+}
+
+/*
+ * Connect to event daemon
+ */
+
+struct eventd_client_state {
+ struct eventd_client_state *prev, *next;
+
+ struct eventd_context *ectx;
+ void (*callback)(struct ctdb_event_reply *reply, void *private_data);
+ void *private_data;
+
+ uint32_t reqid;
+ uint8_t *buf;
+ size_t buflen;
+};
+
+static void eventd_client_read(uint8_t *buf, size_t buflen,
+ void *private_data);
+static int eventd_client_state_destructor(struct eventd_client_state *state);
+
+static bool eventd_client_connect(struct eventd_context *ectx)
+{
+ int fd;
+
+ if (ectx->queue != NULL) {
+ return true;
+ }
+
+ fd = sock_connect(ectx->socket);
+ if (fd == -1) {
+ return false;
+ }
+
+ ectx->queue = sock_queue_setup(ectx, ectx->ev, fd,
+ eventd_client_read, ectx);
+ if (ectx->queue == NULL) {
+ close(fd);
+ return false;
+ }
+
+ return true;
+}
+
+static int eventd_client_write(struct eventd_context *ectx,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_event_request *request,
+ void (*callback)(struct ctdb_event_reply *reply,
+ void *private_data),
+ void *private_data)
+{
+ struct ctdb_event_header header = { 0 };
+ struct eventd_client_state *state;
+ int ret;
+
+ if (! eventd_client_connect(ectx)) {
+ return -1;
+ }
+
+ state = talloc_zero(mem_ctx, struct eventd_client_state);
+ if (state == NULL) {
+ return -1;
+ }
+
+ state->ectx = ectx;
+ state->callback = callback;
+ state->private_data = private_data;
+
+ state->reqid = reqid_new(ectx->idr, state);
+ if (state->reqid == REQID_INVALID) {
+ talloc_free(state);
+ return -1;
+ }
+
+ talloc_set_destructor(state, eventd_client_state_destructor);
+
+ header.reqid = state->reqid;
+
+ state->buflen = ctdb_event_request_len(&header, request);
+ state->buf = talloc_size(state, state->buflen);
+ if (state->buf == NULL) {
+ talloc_free(state);
+ return -1;
+ }
+
+ ret = ctdb_event_request_push(&header,
+ request,
+ state->buf,
+ &state->buflen);
+ if (ret != 0) {
+ talloc_free(state);
+ return -1;
+ }
+
+ ret = sock_queue_write(ectx->queue, state->buf, state->buflen);
+ if (ret != 0) {
+ talloc_free(state);
+ return -1;
+ }
+
+ DLIST_ADD(ectx->calls, state);
+
+ return 0;
+}
+
+static int eventd_client_state_destructor(struct eventd_client_state *state)
+{
+ struct eventd_context *ectx = state->ectx;
+
+ reqid_remove(ectx->idr, state->reqid);
+ DLIST_REMOVE(ectx->calls, state);
+ return 0;
+}
+
+static void eventd_client_read(uint8_t *buf, size_t buflen,
+ void *private_data)
+{
+ struct eventd_context *ectx = talloc_get_type_abort(
+ private_data, struct eventd_context);
+ struct eventd_client_state *state;
+ struct ctdb_event_header header;
+ struct ctdb_event_reply *reply;
+ int ret;
+
+ if (buf == NULL) {
+ /* connection lost */
+ TALLOC_FREE(ectx->queue);
+ return;
+ }
+
+ ret = ctdb_event_reply_pull(buf, buflen, &header, ectx, &reply);
+ if (ret != 0) {
+ D_ERR("Invalid packet received, ret=%d\n", ret);
+ return;
+ }
+
+ if (buflen != header.length) {
+ D_ERR("Packet size mismatch %zu != %"PRIu32"\n",
+ buflen, header.length);
+ talloc_free(reply);
+ return;
+ }
+
+ state = reqid_find(ectx->idr, header.reqid,
+ struct eventd_client_state);
+ if (state == NULL) {
+ talloc_free(reply);
+ return;
+ }
+
+ if (state->reqid != header.reqid) {
+ talloc_free(reply);
+ return;
+ }
+
+ state = talloc_steal(reply, state);
+ state->callback(reply, state->private_data);
+ talloc_free(reply);
+}
+
+/*
+ * Run an event
+ */
+
+struct eventd_client_run_state {
+ struct eventd_context *ectx;
+ void (*callback)(int result, void *private_data);
+ void *private_data;
+};
+
+static void eventd_client_run_done(struct ctdb_event_reply *reply,
+ void *private_data);
+
+static int eventd_client_run(struct eventd_context *ectx,
+ TALLOC_CTX *mem_ctx,
+ void (*callback)(int result,
+ void *private_data),
+ void *private_data,
+ enum ctdb_event event,
+ const char *arg_str,
+ uint32_t timeout)
+{
+ struct eventd_client_run_state *state;
+ struct ctdb_event_request request;
+ struct ctdb_event_request_run rdata;
+ int ret;
+
+ state = talloc_zero(mem_ctx, struct eventd_client_run_state);
+ if (state == NULL) {
+ return -1;
+ }
+
+ state->ectx = ectx;
+ state->callback = callback;
+ state->private_data = private_data;
+
+ rdata.component = "legacy";
+ rdata.event = ctdb_event_to_string(event);
+ rdata.args = arg_str;
+ rdata.timeout = timeout;
+ rdata.flags = 0;
+
+ request.cmd = CTDB_EVENT_CMD_RUN;
+ request.data.run = &rdata;
+
+ ret = eventd_client_write(ectx, state, &request,
+ eventd_client_run_done, state);
+ if (ret != 0) {
+ talloc_free(state);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void eventd_client_run_done(struct ctdb_event_reply *reply,
+ void *private_data)
+{
+ struct eventd_client_run_state *state = talloc_get_type_abort(
+ private_data, struct eventd_client_run_state);
+
+ state = talloc_steal(state->ectx, state);
+ state->callback(reply->result, state->private_data);
+ talloc_free(state);
+}
+
+/*
+ * CTDB event script functions
+ */
+
+int ctdb_event_script_run(struct ctdb_context *ctdb,
+ TALLOC_CTX *mem_ctx,
+ void (*callback)(struct ctdb_context *ctdb,
+ int result, void *private_data),
+ void *private_data,
+ enum ctdb_event event,
+ const char *fmt, va_list ap)
+ PRINTF_ATTRIBUTE(6,0);
+
+struct ctdb_event_script_run_state {
+ struct ctdb_context *ctdb;
+ void (*callback)(struct ctdb_context *ctdb, int result,
+ void *private_data);
+ void *private_data;
+ enum ctdb_event event;
+};
+
+static bool event_allowed_during_recovery(enum ctdb_event event);
+static void ctdb_event_script_run_done(int result, void *private_data);
+static bool check_options(enum ctdb_event call, const char *options);
+
+int ctdb_event_script_run(struct ctdb_context *ctdb,
+ TALLOC_CTX *mem_ctx,
+ void (*callback)(struct ctdb_context *ctdb,
+ int result, void *private_data),
+ void *private_data,
+ enum ctdb_event event,
+ const char *fmt, va_list ap)
+{
+ struct ctdb_event_script_run_state *state;
+ char *arg_str;
+ int ret;
+
+ if ( (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) &&
+ (! event_allowed_during_recovery(event)) ) {
+ DEBUG(DEBUG_ERR,
+ ("Refusing to run event '%s' while in recovery\n",
+ ctdb_eventscript_call_names[event]));
+ return -1;
+ }
+
+ state = talloc_zero(mem_ctx, struct ctdb_event_script_run_state);
+ if (state == NULL) {
+ return -1;
+ }
+
+ state->ctdb = ctdb;
+ state->callback = callback;
+ state->private_data = private_data;
+ state->event = event;
+
+ if (fmt != NULL) {
+ arg_str = talloc_vasprintf(state, fmt, ap);
+ if (arg_str == NULL) {
+ talloc_free(state);
+ return -1;
+ }
+ } else {
+ arg_str = NULL;
+ }
+
+ if (! check_options(event, arg_str)) {
+ DEBUG(DEBUG_ERR,
+ ("Bad event script arguments '%s' for '%s'\n",
+ arg_str, ctdb_eventscript_call_names[event]));
+ talloc_free(arg_str);
+ return -1;
+ }
+
+ ret = eventd_client_run(ctdb->ectx, state,
+ ctdb_event_script_run_done, state,
+ event, arg_str, ctdb->tunable.script_timeout);
+ if (ret != 0) {
+ talloc_free(state);
+ return ret;
+ }
+
+ DEBUG(DEBUG_INFO,
+ (__location__ " Running event %s with arguments %s\n",
+ ctdb_eventscript_call_names[event], arg_str));
+
+ talloc_free(arg_str);
+ return 0;
+}
+
+static void ctdb_event_script_run_done(int result, void *private_data)
+{
+ struct ctdb_event_script_run_state *state = talloc_get_type_abort(
+ private_data, struct ctdb_event_script_run_state);
+
+ if (result == ETIMEDOUT) {
+ switch (state->event) {
+ case CTDB_EVENT_START_RECOVERY:
+ case CTDB_EVENT_RECOVERED:
+ case CTDB_EVENT_TAKE_IP:
+ case CTDB_EVENT_RELEASE_IP:
+ DEBUG(DEBUG_ERR,
+ ("Ignoring hung script for %s event\n",
+ ctdb_eventscript_call_names[state->event]));
+ result = 0;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ state = talloc_steal(state->ctdb, state);
+ state->callback(state->ctdb, result, state->private_data);
+ talloc_free(state);
+}
+
+
+static unsigned int count_words(const char *options)
+{
+ unsigned int words = 0;
+
+ if (options == NULL) {
+ return 0;
+ }
+
+ options += strspn(options, " \t");
+ while (*options) {
+ words++;
+ options += strcspn(options, " \t");
+ options += strspn(options, " \t");
+ }
+ return words;
+}
+
+static bool check_options(enum ctdb_event call, const char *options)
+{
+ switch (call) {
+ /* These all take no arguments. */
+ case CTDB_EVENT_INIT:
+ case CTDB_EVENT_SETUP:
+ case CTDB_EVENT_STARTUP:
+ case CTDB_EVENT_START_RECOVERY:
+ case CTDB_EVENT_RECOVERED:
+ case CTDB_EVENT_MONITOR:
+ case CTDB_EVENT_SHUTDOWN:
+ case CTDB_EVENT_IPREALLOCATED:
+ return count_words(options) == 0;
+
+ case CTDB_EVENT_TAKE_IP: /* interface, IP address, netmask bits. */
+ case CTDB_EVENT_RELEASE_IP:
+ return count_words(options) == 3;
+
+ case CTDB_EVENT_UPDATE_IP: /* old interface, new interface, IP address, netmask bits. */
+ return count_words(options) == 4;
+
+ default:
+ DEBUG(DEBUG_ERR,(__location__ "Unknown ctdb_event %u\n", call));
+ return false;
+ }
+}
+
+/* only specific events are allowed while in recovery */
+static bool event_allowed_during_recovery(enum ctdb_event event)
+{
+ const enum ctdb_event allowed_events[] = {
+ CTDB_EVENT_INIT,
+ CTDB_EVENT_SETUP,
+ CTDB_EVENT_START_RECOVERY,
+ CTDB_EVENT_SHUTDOWN,
+ CTDB_EVENT_RELEASE_IP,
+ CTDB_EVENT_IPREALLOCATED,
+ };
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(allowed_events); i++) {
+ if (event == allowed_events[i]) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/*
+ run the event script in the background, calling the callback when
+ finished. If mem_ctx is freed, callback will never be called.
+ */
+int ctdb_event_script_callback(struct ctdb_context *ctdb,
+ TALLOC_CTX *mem_ctx,
+ void (*callback)(struct ctdb_context *, int, void *),
+ void *private_data,
+ enum ctdb_event call,
+ const char *fmt, ...)
+{
+ va_list ap;
+ int ret;
+
+ va_start(ap, fmt);
+ ret = ctdb_event_script_run(ctdb, mem_ctx, callback, private_data,
+ call, fmt, ap);
+ va_end(ap);
+
+ return ret;
+}
+
+
+struct ctdb_event_script_args_state {
+ bool done;
+ int status;
+};
+
+static void ctdb_event_script_args_done(struct ctdb_context *ctdb,
+ int status, void *private_data)
+{
+ struct ctdb_event_script_args_state *s =
+ (struct ctdb_event_script_args_state *)private_data;
+
+ s->done = true;
+ s->status = status;
+}
+
+/*
+ run the event script, waiting for it to complete. Used when the caller
+ doesn't want to continue till the event script has finished.
+ */
+int ctdb_event_script_args(struct ctdb_context *ctdb, enum ctdb_event call,
+ const char *fmt, ...)
+{
+ va_list ap;
+ int ret;
+ struct ctdb_event_script_args_state state = {
+ .status = -1,
+ .done = false,
+ };
+
+ va_start(ap, fmt);
+ ret = ctdb_event_script_run(ctdb, ctdb,
+ ctdb_event_script_args_done, &state,
+ call, fmt, ap);
+ va_end(ap);
+ if (ret != 0) {
+ return ret;
+ }
+
+ while (! state.done) {
+ tevent_loop_once(ctdb->ev);
+ }
+
+ if (state.status == ETIMEDOUT) {
+ /* Don't ban self if CTDB is starting up or shutting down */
+ if (call != CTDB_EVENT_INIT && call != CTDB_EVENT_SHUTDOWN) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " eventscript for '%s' timed out."
+ " Immediately banning ourself for %d seconds\n",
+ ctdb_eventscript_call_names[call],
+ ctdb->tunable.recovery_ban_period));
+ ctdb_ban_self(ctdb);
+ }
+ }
+
+ return state.status;
+}
+
+int ctdb_event_script(struct ctdb_context *ctdb, enum ctdb_event call)
+{
+ /* GCC complains about empty format string, so use %s and "". */
+ return ctdb_event_script_args(ctdb, call, NULL);
+}
+
+void ctdb_event_reopen_logs(struct ctdb_context *ctdb)
+{
+ if (ctdb->ectx->eventd_pid > 0) {
+ kill(ctdb->ectx->eventd_pid, SIGHUP);
+ }
+}
diff --git a/ctdb/server/ipalloc.c b/ctdb/server/ipalloc.c
new file mode 100644
index 0000000..7f49364
--- /dev/null
+++ b/ctdb/server/ipalloc.c
@@ -0,0 +1,284 @@
+/*
+ ctdb ip takeover code
+
+ Copyright (C) Ronnie Sahlberg 2007
+ Copyright (C) Andrew Tridgell 2007
+ Copyright (C) Martin Schwenke 2011
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <talloc.h>
+
+#include "lib/util/debug.h"
+
+#include "common/logging.h"
+#include "common/rb_tree.h"
+
+#include "protocol/protocol_util.h"
+
+#include "server/ipalloc_private.h"
+
+/* Initialise main ipalloc state and sub-structures */
+struct ipalloc_state *
+ipalloc_state_init(TALLOC_CTX *mem_ctx,
+ uint32_t num_nodes,
+ enum ipalloc_algorithm algorithm,
+ bool no_ip_takeover,
+ bool no_ip_failback,
+ uint32_t *force_rebalance_nodes)
+{
+ struct ipalloc_state *ipalloc_state =
+ talloc_zero(mem_ctx, struct ipalloc_state);
+ if (ipalloc_state == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
+ return NULL;
+ }
+
+ ipalloc_state->num = num_nodes;
+
+ ipalloc_state->algorithm = algorithm;
+ ipalloc_state->no_ip_takeover = no_ip_takeover;
+ ipalloc_state->no_ip_failback = no_ip_failback;
+ ipalloc_state->force_rebalance_nodes = force_rebalance_nodes;
+
+ return ipalloc_state;
+}
+
+static void *add_ip_callback(void *parm, void *data)
+{
+ struct public_ip_list *this_ip = parm;
+ struct public_ip_list *prev_ip = data;
+
+ if (prev_ip == NULL) {
+ return parm;
+ }
+ if (this_ip->pnn == CTDB_UNKNOWN_PNN) {
+ this_ip->pnn = prev_ip->pnn;
+ }
+
+ return parm;
+}
+
+static int getips_count_callback(void *param, void *data)
+{
+ struct public_ip_list **ip_list = (struct public_ip_list **)param;
+ struct public_ip_list *new_ip = (struct public_ip_list *)data;
+
+ new_ip->next = *ip_list;
+ *ip_list = new_ip;
+ return 0;
+}
+
+/* Nodes only know about those public addresses that they are
+ * configured to serve and no individual node has a full list of all
+ * public addresses configured across the cluster. Therefore, a
+ * merged list of all public addresses needs to be built so that IP
+ * allocation can be done. */
+static struct public_ip_list *
+create_merged_ip_list(struct ipalloc_state *ipalloc_state)
+{
+ unsigned int i, j;
+ struct public_ip_list *ip_list;
+ struct ctdb_public_ip_list *public_ips;
+ struct trbt_tree *ip_tree;
+ int ret;
+
+ ip_tree = trbt_create(ipalloc_state, 0);
+
+ if (ipalloc_state->known_public_ips == NULL) {
+ DEBUG(DEBUG_ERR, ("Known public IPs not set\n"));
+ return NULL;
+ }
+
+ for (i=0; i < ipalloc_state->num; i++) {
+
+ public_ips = &ipalloc_state->known_public_ips[i];
+
+ for (j=0; j < public_ips->num; j++) {
+ struct public_ip_list *tmp_ip;
+
+ /* This is returned as part of ip_list */
+ tmp_ip = talloc_zero(ipalloc_state, struct public_ip_list);
+ if (tmp_ip == NULL) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " out of memory\n"));
+ talloc_free(ip_tree);
+ return NULL;
+ }
+
+ /* Do not use information about IP addresses hosted
+ * on other nodes, it may not be accurate */
+ if (public_ips->ip[j].pnn == i) {
+ tmp_ip->pnn = public_ips->ip[j].pnn;
+ } else {
+ tmp_ip->pnn = CTDB_UNKNOWN_PNN;
+ }
+ tmp_ip->addr = public_ips->ip[j].addr;
+ tmp_ip->next = NULL;
+
+ trbt_insertarray32_callback(ip_tree,
+ IP_KEYLEN, ip_key(&public_ips->ip[j].addr),
+ add_ip_callback,
+ tmp_ip);
+ }
+ }
+
+ ip_list = NULL;
+ ret = trbt_traversearray32(ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
+ if (ret != 0) {
+ DBG_ERR("Error traversing the IP tree.\n");
+ }
+
+ talloc_free(ip_tree);
+
+ return ip_list;
+}
+
+static bool populate_bitmap(struct ipalloc_state *ipalloc_state)
+{
+ struct public_ip_list *ip = NULL;
+ unsigned int i, j;
+
+ for (ip = ipalloc_state->all_ips; ip != NULL; ip = ip->next) {
+
+ ip->known_on = bitmap_talloc(ip, ipalloc_state->num);
+ if (ip->known_on == NULL) {
+ return false;
+ }
+
+ ip->available_on = bitmap_talloc(ip, ipalloc_state->num);
+ if (ip->available_on == NULL) {
+ return false;
+ }
+
+ for (i = 0; i < ipalloc_state->num; i++) {
+ struct ctdb_public_ip_list *known =
+ &ipalloc_state->known_public_ips[i];
+ struct ctdb_public_ip_list *avail =
+ &ipalloc_state->available_public_ips[i];
+
+ /* Check to see if "ip" is available on node "i" */
+ for (j = 0; j < avail->num; j++) {
+ if (ctdb_sock_addr_same_ip(
+ &ip->addr, &avail->ip[j].addr)) {
+ bitmap_set(ip->available_on, i);
+ break;
+ }
+ }
+
+ /* Optimisation: available => known */
+ if (bitmap_query(ip->available_on, i)) {
+ bitmap_set(ip->known_on, i);
+ continue;
+ }
+
+ /* Check to see if "ip" is known on node "i" */
+ for (j = 0; j < known->num; j++) {
+ if (ctdb_sock_addr_same_ip(
+ &ip->addr, &known->ip[j].addr)) {
+ bitmap_set(ip->known_on, i);
+ break;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+void ipalloc_set_public_ips(struct ipalloc_state *ipalloc_state,
+ struct ctdb_public_ip_list *known_ips,
+ struct ctdb_public_ip_list *available_ips)
+{
+ ipalloc_state->available_public_ips = available_ips;
+ ipalloc_state->known_public_ips = known_ips;
+}
+
+/* This can only return false if there are no available IPs *and*
+ * there are no IP addresses currently allocated. If the latter is
+ * true then the cluster can clearly host IPs... just not necessarily
+ * right now... */
+bool ipalloc_can_host_ips(struct ipalloc_state *ipalloc_state)
+{
+ unsigned int i;
+ bool have_ips = false;
+
+ for (i=0; i < ipalloc_state->num; i++) {
+ struct ctdb_public_ip_list *ips =
+ ipalloc_state->known_public_ips;
+ if (ips[i].num != 0) {
+ unsigned int j;
+ have_ips = true;
+ /* Succeed if an address is hosted on node i */
+ for (j=0; j < ips[i].num; j++) {
+ if (ips[i].ip[j].pnn == i) {
+ return true;
+ }
+ }
+ }
+ }
+
+ if (! have_ips) {
+ return false;
+ }
+
+ /* At this point there are known addresses but none are
+ * hosted. Need to check if cluster can now host some
+ * addresses.
+ */
+ for (i=0; i < ipalloc_state->num; i++) {
+ if (ipalloc_state->available_public_ips[i].num != 0) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/* The calculation part of the IP allocation algorithm. */
+struct public_ip_list *ipalloc(struct ipalloc_state *ipalloc_state)
+{
+ bool ret = false;
+
+ ipalloc_state->all_ips = create_merged_ip_list(ipalloc_state);
+ if (ipalloc_state->all_ips == NULL) {
+ return NULL;
+ }
+
+ if (!populate_bitmap(ipalloc_state)) {
+ return NULL;
+ }
+
+ switch (ipalloc_state->algorithm) {
+ case IPALLOC_LCP2:
+ ret = ipalloc_lcp2(ipalloc_state);
+ break;
+ case IPALLOC_DETERMINISTIC:
+ ret = ipalloc_deterministic(ipalloc_state);
+ break;
+ case IPALLOC_NONDETERMINISTIC:
+ ret = ipalloc_nondeterministic(ipalloc_state);
+ break;
+ }
+
+ /* at this point ->pnn is the node which will own each IP
+ or CTDB_UNKNOWN_PNN if there is no node that can cover this ip
+ */
+
+ return (ret ? ipalloc_state->all_ips : NULL);
+}
diff --git a/ctdb/server/ipalloc.h b/ctdb/server/ipalloc.h
new file mode 100644
index 0000000..42aec9e
--- /dev/null
+++ b/ctdb/server/ipalloc.h
@@ -0,0 +1,67 @@
+/*
+ CTDB IP takeover code
+
+ Copyright (C) Ronnie Sahlberg 2007
+ Copyright (C) Andrew Tridgell 2007
+ Copyright (C) Martin Schwenke 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_IPALLOC_H__
+#define __CTDB_IPALLOC_H__
+
+#include <talloc.h>
+
+#include "replace.h"
+#include "system/network.h"
+
+#include "lib/util/bitmap.h"
+
+struct public_ip_list {
+ struct public_ip_list *next;
+ uint32_t pnn;
+ ctdb_sock_addr addr;
+ struct bitmap *known_on;
+ struct bitmap *available_on;
+};
+
+#define IP_KEYLEN 4
+uint32_t *ip_key(ctdb_sock_addr *ip);
+
+/* Flags used in IP allocation algorithms. */
+enum ipalloc_algorithm {
+ IPALLOC_DETERMINISTIC,
+ IPALLOC_NONDETERMINISTIC,
+ IPALLOC_LCP2,
+};
+
+struct ipalloc_state;
+
+struct ipalloc_state * ipalloc_state_init(TALLOC_CTX *mem_ctx,
+ uint32_t num_nodes,
+ enum ipalloc_algorithm algorithm,
+ bool no_ip_takeover,
+ bool no_ip_failback,
+ uint32_t *force_rebalance_nodes);
+
+void ipalloc_set_public_ips(struct ipalloc_state *ipalloc_state,
+ struct ctdb_public_ip_list *known_ips,
+ struct ctdb_public_ip_list *available_ips);
+
+bool ipalloc_can_host_ips(struct ipalloc_state *ipalloc_state);
+
+struct public_ip_list *ipalloc(struct ipalloc_state *ipalloc_state);
+
+#endif /* __CTDB_IPALLOC_H__ */
diff --git a/ctdb/server/ipalloc_common.c b/ctdb/server/ipalloc_common.c
new file mode 100644
index 0000000..a5177d4
--- /dev/null
+++ b/ctdb/server/ipalloc_common.c
@@ -0,0 +1,192 @@
+/*
+ ctdb ip takeover code
+
+ Copyright (C) Ronnie Sahlberg 2007
+ Copyright (C) Andrew Tridgell 2007
+ Copyright (C) Martin Schwenke 2011
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include "ctdb_private.h"
+
+#include "lib/util/time.h"
+
+#include "lib/util/debug.h"
+#include "common/logging.h"
+
+#include "common/common.h"
+
+#include "protocol/protocol_util.h"
+
+#include "server/ipalloc_private.h"
+
+#define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
+
+/* Given a physical node, return the number of
+ public addresses that is currently assigned to this node.
+*/
+int node_ip_coverage(uint32_t pnn, struct public_ip_list *ips)
+{
+ int num=0;
+
+ for (;ips;ips=ips->next) {
+ if (ips->pnn == pnn) {
+ num++;
+ }
+ }
+ return num;
+}
+
+
+/* Can the given node host the given IP: is the public IP known to the
+ * node and is NOIPHOST unset?
+*/
+static bool can_node_host_ip(struct ipalloc_state *ipalloc_state,
+ int32_t pnn,
+ struct public_ip_list *ip)
+{
+ return bitmap_query(ip->available_on, pnn);
+}
+
+bool can_node_takeover_ip(struct ipalloc_state *ipalloc_state,
+ int32_t pnn,
+ struct public_ip_list *ip)
+{
+ if (ipalloc_state->no_ip_takeover) {
+ return false;
+ }
+
+ return can_node_host_ip(ipalloc_state, pnn, ip);
+}
+
+/* search the node lists list for a node to takeover this ip.
+ pick the node that currently are serving the least number of ips
+ so that the ips get spread out evenly.
+*/
+int find_takeover_node(struct ipalloc_state *ipalloc_state,
+ struct public_ip_list *ip)
+{
+ unsigned int pnn;
+ int min=0, num;
+ unsigned int i, numnodes;
+
+ numnodes = ipalloc_state->num;
+ pnn = CTDB_UNKNOWN_PNN;
+ for (i=0; i<numnodes; i++) {
+ /* verify that this node can serve this ip */
+ if (!can_node_takeover_ip(ipalloc_state, i, ip)) {
+ /* no it couldn't so skip to the next node */
+ continue;
+ }
+
+ num = node_ip_coverage(i, ipalloc_state->all_ips);
+ /* was this the first node we checked ? */
+ if (pnn == CTDB_UNKNOWN_PNN) {
+ pnn = i;
+ min = num;
+ } else {
+ if (num < min) {
+ pnn = i;
+ min = num;
+ }
+ }
+ }
+ if (pnn == CTDB_UNKNOWN_PNN) {
+ DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
+ ctdb_sock_addr_to_string(ipalloc_state,
+ &ip->addr,
+ false)));
+
+ return -1;
+ }
+
+ ip->pnn = pnn;
+ return 0;
+}
+
+uint32_t *ip_key(ctdb_sock_addr *ip)
+{
+ static uint32_t key[IP_KEYLEN];
+
+ bzero(key, sizeof(key));
+
+ switch (ip->sa.sa_family) {
+ case AF_INET:
+ key[3] = htonl(ip->ip.sin_addr.s_addr);
+ break;
+ case AF_INET6: {
+ uint32_t *s6_a32 = (uint32_t *)&(ip->ip6.sin6_addr.s6_addr);
+ key[0] = htonl(s6_a32[0]);
+ key[1] = htonl(s6_a32[1]);
+ key[2] = htonl(s6_a32[2]);
+ key[3] = htonl(s6_a32[3]);
+ break;
+ }
+ default:
+ DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
+ return key;
+ }
+
+ return key;
+}
+
+/* Allocate any unassigned IPs just by looping through the IPs and
+ * finding the best node for each.
+ */
+void basic_allocate_unassigned(struct ipalloc_state *ipalloc_state)
+{
+ struct public_ip_list *t;
+
+ /* loop over all ip's and find a physical node to cover for
+ each unassigned ip.
+ */
+ for (t = ipalloc_state->all_ips; t != NULL; t = t->next) {
+ if (t->pnn == CTDB_UNKNOWN_PNN) {
+ if (find_takeover_node(ipalloc_state, t)) {
+ DEBUG(DEBUG_WARNING,
+ ("Failed to find node to cover ip %s\n",
+ ctdb_sock_addr_to_string(ipalloc_state,
+ &t->addr,
+ false)));
+ }
+ }
+ }
+}
+
+void unassign_unsuitable_ips(struct ipalloc_state *ipalloc_state)
+{
+ struct public_ip_list *t;
+
+ /* verify that the assigned nodes can serve that public ip
+ and set it to CTDB_UNKNOWN_PNN if not
+ */
+ for (t = ipalloc_state->all_ips; t != NULL; t = t->next) {
+ if (t->pnn == CTDB_UNKNOWN_PNN) {
+ continue;
+ }
+ if (!can_node_host_ip(ipalloc_state, t->pnn, t) != 0) {
+ /* this node can not serve this ip. */
+ DEBUG(DEBUG_DEBUG,("Unassign IP: %s from %d\n",
+ ctdb_sock_addr_to_string(
+ ipalloc_state,
+ &t->addr, false),
+ t->pnn));
+ t->pnn = CTDB_UNKNOWN_PNN;
+ }
+ }
+}
diff --git a/ctdb/server/ipalloc_deterministic.c b/ctdb/server/ipalloc_deterministic.c
new file mode 100644
index 0000000..43680ba
--- /dev/null
+++ b/ctdb/server/ipalloc_deterministic.c
@@ -0,0 +1,191 @@
+/*
+ ctdb ip takeover code
+
+ Copyright (C) Ronnie Sahlberg 2007
+ Copyright (C) Andrew Tridgell 2007
+ Copyright (C) Martin Schwenke 2011
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include "lib/util/debug.h"
+#include "common/logging.h"
+#include "common/path.h"
+
+#include "protocol/protocol_util.h"
+#include "lib/util/smb_strtox.h"
+#include "lib/util/memory.h"
+
+#include "server/ipalloc_private.h"
+
+struct home_node {
+ ctdb_sock_addr addr;
+ uint32_t pnn;
+};
+
+static struct home_node *ipalloc_get_home_nodes(TALLOC_CTX *mem_ctx)
+{
+ char *line = NULL;
+ size_t len = 0;
+ char *fname = NULL;
+ FILE *fp = NULL;
+ struct home_node *result = NULL;
+
+ fname = path_etcdir_append(mem_ctx, "home_nodes");
+ if (fname == NULL) {
+ goto fail;
+ }
+
+ fp = fopen(fname, "r");
+ if (fp == NULL) {
+ goto fail;
+ }
+ TALLOC_FREE(fname);
+
+ while (true) {
+ size_t num_nodes = talloc_array_length(result);
+ char *saveptr = NULL, *addrstr = NULL, *nodestr = NULL;
+ struct home_node hn = {
+ .pnn = CTDB_UNKNOWN_PNN,
+ };
+ struct home_node *tmp = NULL;
+ ssize_t n = 0;
+ int ret;
+
+ n = getline(&line, &len, fp);
+ if (n < 0) {
+ if (!feof(fp)) {
+ /* real error */
+ goto fail;
+ }
+ break;
+ }
+ if ((n > 0) && (line[n - 1] == '\n')) {
+ line[n - 1] = '\0';
+ }
+
+ addrstr = strtok_r(line, " \t", &saveptr);
+ if (addrstr == NULL) {
+ continue;
+ }
+ nodestr = strtok_r(NULL, " \t", &saveptr);
+ if (nodestr == NULL) {
+ continue;
+ }
+
+ ret = ctdb_sock_addr_from_string(addrstr, &hn.addr, false);
+ if (ret != 0) {
+ DBG_WARNING("Could not parse %s: %s\n",
+ addrstr,
+ strerror(ret));
+ goto fail;
+ }
+
+ hn.pnn = smb_strtoul(nodestr,
+ NULL,
+ 10,
+ &ret,
+ SMB_STR_FULL_STR_CONV);
+ if (ret != 0) {
+ DBG_WARNING("Could not parse \"%s\"\n", nodestr);
+ goto fail;
+ }
+
+ tmp = talloc_realloc(mem_ctx,
+ result,
+ struct home_node,
+ num_nodes + 1);
+ if (tmp == NULL) {
+ goto fail;
+ }
+ result = tmp;
+ result[num_nodes] = hn;
+ }
+
+ fclose(fp);
+ fp = NULL;
+ return result;
+
+fail:
+ if (fp != NULL) {
+ fclose(fp);
+ fp = NULL;
+ }
+ SAFE_FREE(line);
+ TALLOC_FREE(fname);
+ TALLOC_FREE(result);
+ return NULL;
+}
+
+bool ipalloc_deterministic(struct ipalloc_state *ipalloc_state)
+{
+ struct home_node *home_nodes = ipalloc_get_home_nodes(ipalloc_state);
+ size_t num_home_nodes = talloc_array_length(home_nodes);
+ struct public_ip_list *t;
+ int i;
+ uint32_t numnodes;
+
+ numnodes = ipalloc_state->num;
+
+ DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
+ /* Allocate IPs to nodes in a modulo fashion so that IPs will
+ * always be allocated the same way for a specific set of
+ * available/unavailable nodes.
+ */
+
+ for (i = 0, t = ipalloc_state->all_ips; t!= NULL; t = t->next, i++) {
+ size_t j;
+
+ t->pnn = i % numnodes;
+
+ for (j = 0; j < num_home_nodes; j++) {
+ struct home_node *hn = &home_nodes[j];
+
+ if (ctdb_sock_addr_same_ip(&t->addr, &hn->addr)) {
+
+ if (hn->pnn >= numnodes) {
+ DBG_WARNING("pnn %" PRIu32
+ " too large\n",
+ hn->pnn);
+ break;
+ }
+
+ t->pnn = hn->pnn;
+ break;
+ }
+ }
+ }
+
+ /* IP failback doesn't make sense with deterministic
+ * IPs, since the modulo step above implicitly fails
+ * back IPs to their "home" node.
+ */
+ if (ipalloc_state->no_ip_failback) {
+ D_WARNING("WARNING: 'NoIPFailback' set but ignored - "
+ "incompatible with 'Deterministic IPs\n");
+ }
+
+ unassign_unsuitable_ips(ipalloc_state);
+
+ basic_allocate_unassigned(ipalloc_state);
+
+ /* No failback here! */
+
+ TALLOC_FREE(home_nodes);
+
+ return true;
+}
diff --git a/ctdb/server/ipalloc_lcp2.c b/ctdb/server/ipalloc_lcp2.c
new file mode 100644
index 0000000..996adcf
--- /dev/null
+++ b/ctdb/server/ipalloc_lcp2.c
@@ -0,0 +1,525 @@
+/*
+ ctdb ip takeover code
+
+ Copyright (C) Ronnie Sahlberg 2007
+ Copyright (C) Andrew Tridgell 2007
+ Copyright (C) Martin Schwenke 2011
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include "lib/util/debug.h"
+#include "common/logging.h"
+
+#include "protocol/protocol_util.h"
+
+#include "server/ipalloc_private.h"
+
+/*
+ * This is the length of the longtest common prefix between the IPs.
+ * It is calculated by XOR-ing the 2 IPs together and counting the
+ * number of leading zeroes. The implementation means that all
+ * addresses end up being 128 bits long.
+ *
+ * FIXME? Should we consider IPv4 and IPv6 separately given that the
+ * 12 bytes of 0 prefix padding will hurt the algorithm if there are
+ * lots of nodes and IP addresses?
+ */
+static uint32_t ip_distance(ctdb_sock_addr *ip1, ctdb_sock_addr *ip2)
+{
+ uint32_t ip1_k[IP_KEYLEN];
+ uint32_t *t;
+ int i;
+ uint32_t x;
+
+ uint32_t distance = 0;
+
+ memcpy(ip1_k, ip_key(ip1), sizeof(ip1_k));
+ t = ip_key(ip2);
+ for (i=0; i<IP_KEYLEN; i++) {
+ x = ip1_k[i] ^ t[i];
+ if (x == 0) {
+ distance += 32;
+ } else {
+ /* Count number of leading zeroes.
+ * FIXME? This could be optimised...
+ */
+ while ((x & ((uint32_t)1 << 31)) == 0) {
+ x <<= 1;
+ distance += 1;
+ }
+ }
+ }
+
+ return distance;
+}
+
+/* Calculate the IP distance for the given IP relative to IPs on the
+ given node. The ips argument is generally the all_ips variable
+ used in the main part of the algorithm.
+ */
+static uint32_t ip_distance_2_sum(ctdb_sock_addr *ip,
+ struct public_ip_list *ips,
+ unsigned int pnn)
+{
+ struct public_ip_list *t;
+ uint32_t d;
+
+ uint32_t sum = 0;
+
+ for (t = ips; t != NULL; t = t->next) {
+ if (t->pnn != pnn) {
+ continue;
+ }
+
+ /* Optimisation: We never calculate the distance
+ * between an address and itself. This allows us to
+ * calculate the effect of removing an address from a
+ * node by simply calculating the distance between
+ * that address and all of the existing addresses.
+ * Moreover, we assume that we're only ever dealing
+ * with addresses from all_ips so we can identify an
+ * address via a pointer rather than doing a more
+ * expensive address comparison. */
+ if (&(t->addr) == ip) {
+ continue;
+ }
+
+ d = ip_distance(ip, &(t->addr));
+ sum += d * d; /* Cheaper than pulling in math.h :-) */
+ }
+
+ return sum;
+}
+
+/* Return the LCP2 imbalance metric for addresses currently assigned
+ to the given node.
+ */
+static uint32_t lcp2_imbalance(struct public_ip_list * all_ips,
+ unsigned int pnn)
+{
+ struct public_ip_list *t;
+
+ uint32_t imbalance = 0;
+
+ for (t = all_ips; t != NULL; t = t->next) {
+ if (t->pnn != pnn) {
+ continue;
+ }
+ /* Pass the rest of the IPs rather than the whole
+ all_ips input list.
+ */
+ imbalance += ip_distance_2_sum(&(t->addr), t->next, pnn);
+ }
+
+ return imbalance;
+}
+
+static bool lcp2_init(struct ipalloc_state *ipalloc_state,
+ uint32_t **lcp2_imbalances,
+ bool **rebalance_candidates)
+{
+ unsigned int i, numnodes;
+ struct public_ip_list *t;
+
+ numnodes = ipalloc_state->num;
+
+ *rebalance_candidates = talloc_array(ipalloc_state, bool, numnodes);
+ if (*rebalance_candidates == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
+ return false;
+ }
+ *lcp2_imbalances = talloc_array(ipalloc_state, uint32_t, numnodes);
+ if (*lcp2_imbalances == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
+ return false;
+ }
+
+ for (i=0; i<numnodes; i++) {
+ (*lcp2_imbalances)[i] =
+ lcp2_imbalance(ipalloc_state->all_ips, i);
+ /* First step: assume all nodes are candidates */
+ (*rebalance_candidates)[i] = true;
+ }
+
+ /* 2nd step: if a node has IPs assigned then it must have been
+ * healthy before, so we remove it from consideration. This
+ * is overkill but is all we have because we don't maintain
+ * state between takeover runs. An alternative would be to
+ * keep state and invalidate it every time the recovery master
+ * changes.
+ */
+ for (t = ipalloc_state->all_ips; t != NULL; t = t->next) {
+ if (t->pnn != CTDB_UNKNOWN_PNN) {
+ (*rebalance_candidates)[t->pnn] = false;
+ }
+ }
+
+ /* 3rd step: if a node is forced to re-balance then
+ we allow failback onto the node */
+ if (ipalloc_state->force_rebalance_nodes == NULL) {
+ return true;
+ }
+ for (i = 0;
+ i < talloc_array_length(ipalloc_state->force_rebalance_nodes);
+ i++) {
+ uint32_t pnn = ipalloc_state->force_rebalance_nodes[i];
+ if (pnn >= numnodes) {
+ DEBUG(DEBUG_ERR,
+ (__location__ "unknown node %u\n", pnn));
+ continue;
+ }
+
+ DEBUG(DEBUG_NOTICE,
+ ("Forcing rebalancing of IPs to node %u\n", pnn));
+ (*rebalance_candidates)[pnn] = true;
+ }
+
+ return true;
+}
+
+/* Allocate any unassigned addresses using the LCP2 algorithm to find
+ * the IP/node combination that will cost the least.
+ */
+static void lcp2_allocate_unassigned(struct ipalloc_state *ipalloc_state,
+ uint32_t *lcp2_imbalances)
+{
+ struct public_ip_list *t;
+ unsigned int dstnode, numnodes;
+
+ unsigned int minnode;
+ uint32_t mindsum, dstdsum, dstimbl;
+ uint32_t minimbl = 0;
+ struct public_ip_list *minip;
+
+ bool should_loop = true;
+ bool have_unassigned = true;
+
+ numnodes = ipalloc_state->num;
+
+ while (have_unassigned && should_loop) {
+ should_loop = false;
+
+ DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
+ DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES (UNASSIGNED)\n"));
+
+ minnode = CTDB_UNKNOWN_PNN;
+ mindsum = 0;
+ minip = NULL;
+
+ /* loop over each unassigned ip. */
+ for (t = ipalloc_state->all_ips; t != NULL ; t = t->next) {
+ if (t->pnn != CTDB_UNKNOWN_PNN) {
+ continue;
+ }
+
+ for (dstnode = 0; dstnode < numnodes; dstnode++) {
+ /* only check nodes that can actually takeover this ip */
+ if (!can_node_takeover_ip(ipalloc_state,
+ dstnode,
+ t)) {
+ /* no it couldn't so skip to the next node */
+ continue;
+ }
+
+ dstdsum = ip_distance_2_sum(&(t->addr),
+ ipalloc_state->all_ips,
+ dstnode);
+ dstimbl = lcp2_imbalances[dstnode] + dstdsum;
+ DEBUG(DEBUG_DEBUG,
+ (" %s -> %d [+%d]\n",
+ ctdb_sock_addr_to_string(ipalloc_state,
+ &(t->addr),
+ false),
+ dstnode,
+ dstimbl - lcp2_imbalances[dstnode]));
+
+
+ if (minnode == CTDB_UNKNOWN_PNN ||
+ dstdsum < mindsum) {
+ minnode = dstnode;
+ minimbl = dstimbl;
+ mindsum = dstdsum;
+ minip = t;
+ should_loop = true;
+ }
+ }
+ }
+
+ DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
+
+ /* If we found one then assign it to the given node. */
+ if (minnode != CTDB_UNKNOWN_PNN) {
+ minip->pnn = minnode;
+ lcp2_imbalances[minnode] = minimbl;
+ DEBUG(DEBUG_INFO,(" %s -> %d [+%d]\n",
+ ctdb_sock_addr_to_string(
+ ipalloc_state,
+ &(minip->addr), false),
+ minnode,
+ mindsum));
+ }
+
+ /* There might be a better way but at least this is clear. */
+ have_unassigned = false;
+ for (t = ipalloc_state->all_ips; t != NULL; t = t->next) {
+ if (t->pnn == CTDB_UNKNOWN_PNN) {
+ have_unassigned = true;
+ }
+ }
+ }
+
+ /* We know if we have an unassigned addresses so we might as
+ * well optimise.
+ */
+ if (have_unassigned) {
+ for (t = ipalloc_state->all_ips; t != NULL; t = t->next) {
+ if (t->pnn == CTDB_UNKNOWN_PNN) {
+ DEBUG(DEBUG_WARNING,
+ ("Failed to find node to cover ip %s\n",
+ ctdb_sock_addr_to_string(ipalloc_state,
+ &t->addr,
+ false)));
+ }
+ }
+ }
+}
+
+/* LCP2 algorithm for rebalancing the cluster. Given a candidate node
+ * to move IPs from, determines the best IP/destination node
+ * combination to move from the source node.
+ */
+static bool lcp2_failback_candidate(struct ipalloc_state *ipalloc_state,
+ unsigned int srcnode,
+ uint32_t *lcp2_imbalances,
+ bool *rebalance_candidates)
+{
+ unsigned int dstnode, mindstnode, numnodes;
+ uint32_t srcdsum, dstimbl, dstdsum;
+ uint32_t minsrcimbl, mindstimbl;
+ struct public_ip_list *minip;
+ struct public_ip_list *t;
+
+ /* Find an IP and destination node that best reduces imbalance. */
+ minip = NULL;
+ minsrcimbl = 0;
+ mindstnode = CTDB_UNKNOWN_PNN;
+ mindstimbl = 0;
+
+ numnodes = ipalloc_state->num;
+
+ DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
+ DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES FROM %d [%d]\n",
+ srcnode, lcp2_imbalances[srcnode]));
+
+ for (t = ipalloc_state->all_ips; t != NULL; t = t->next) {
+ uint32_t srcimbl;
+
+ /* Only consider addresses on srcnode. */
+ if (t->pnn != srcnode) {
+ continue;
+ }
+
+ /* What is this IP address costing the source node? */
+ srcdsum = ip_distance_2_sum(&(t->addr),
+ ipalloc_state->all_ips,
+ srcnode);
+ srcimbl = lcp2_imbalances[srcnode] - srcdsum;
+
+ /* Consider this IP address would cost each potential
+ * destination node. Destination nodes are limited to
+ * those that are newly healthy, since we don't want
+ * to do gratuitous failover of IPs just to make minor
+ * balance improvements.
+ */
+ for (dstnode = 0; dstnode < numnodes; dstnode++) {
+ if (!rebalance_candidates[dstnode]) {
+ continue;
+ }
+
+ /* only check nodes that can actually takeover this ip */
+ if (!can_node_takeover_ip(ipalloc_state, dstnode,
+ t)) {
+ /* no it couldn't so skip to the next node */
+ continue;
+ }
+
+ dstdsum = ip_distance_2_sum(&(t->addr),
+ ipalloc_state->all_ips,
+ dstnode);
+ dstimbl = lcp2_imbalances[dstnode] + dstdsum;
+ DEBUG(DEBUG_DEBUG,(" %d [%d] -> %s -> %d [+%d]\n",
+ srcnode, -srcdsum,
+ ctdb_sock_addr_to_string(
+ ipalloc_state,
+ &(t->addr), false),
+ dstnode, dstdsum));
+
+ if ((dstimbl < lcp2_imbalances[srcnode]) &&
+ (dstdsum < srcdsum) && \
+ ((mindstnode == CTDB_UNKNOWN_PNN) || \
+ ((srcimbl + dstimbl) < (minsrcimbl + mindstimbl)))) {
+
+ minip = t;
+ minsrcimbl = srcimbl;
+ mindstnode = dstnode;
+ mindstimbl = dstimbl;
+ }
+ }
+ }
+ DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
+
+ if (mindstnode != CTDB_UNKNOWN_PNN) {
+ /* We found a move that makes things better... */
+ DEBUG(DEBUG_INFO,
+ ("%d [%d] -> %s -> %d [+%d]\n",
+ srcnode, minsrcimbl - lcp2_imbalances[srcnode],
+ ctdb_sock_addr_to_string(ipalloc_state,
+ &(minip->addr), false),
+ mindstnode, mindstimbl - lcp2_imbalances[mindstnode]));
+
+
+ lcp2_imbalances[srcnode] = minsrcimbl;
+ lcp2_imbalances[mindstnode] = mindstimbl;
+ minip->pnn = mindstnode;
+
+ return true;
+ }
+
+ return false;
+}
+
+struct lcp2_imbalance_pnn {
+ uint32_t imbalance;
+ unsigned int pnn;
+};
+
+static int lcp2_cmp_imbalance_pnn(const void * a, const void * b)
+{
+ const struct lcp2_imbalance_pnn * lipa = (const struct lcp2_imbalance_pnn *) a;
+ const struct lcp2_imbalance_pnn * lipb = (const struct lcp2_imbalance_pnn *) b;
+
+ if (lipa->imbalance > lipb->imbalance) {
+ return -1;
+ } else if (lipa->imbalance == lipb->imbalance) {
+ return 0;
+ } else {
+ return 1;
+ }
+}
+
+/* LCP2 algorithm for rebalancing the cluster. This finds the source
+ * node with the highest LCP2 imbalance, and then determines the best
+ * IP/destination node combination to move from the source node.
+ */
+static void lcp2_failback(struct ipalloc_state *ipalloc_state,
+ uint32_t *lcp2_imbalances,
+ bool *rebalance_candidates)
+{
+ int i, numnodes;
+ struct lcp2_imbalance_pnn * lips;
+ bool again;
+
+ numnodes = ipalloc_state->num;
+
+try_again:
+ /* Put the imbalances and nodes into an array, sort them and
+ * iterate through candidates. Usually the 1st one will be
+ * used, so this doesn't cost much...
+ */
+ DEBUG(DEBUG_DEBUG,("+++++++++++++++++++++++++++++++++++++++++\n"));
+ DEBUG(DEBUG_DEBUG,("Selecting most imbalanced node from:\n"));
+ lips = talloc_array(ipalloc_state, struct lcp2_imbalance_pnn, numnodes);
+ for (i = 0; i < numnodes; i++) {
+ lips[i].imbalance = lcp2_imbalances[i];
+ lips[i].pnn = i;
+ DEBUG(DEBUG_DEBUG,(" %d [%d]\n", i, lcp2_imbalances[i]));
+ }
+ qsort(lips, numnodes, sizeof(struct lcp2_imbalance_pnn),
+ lcp2_cmp_imbalance_pnn);
+
+ again = false;
+ for (i = 0; i < numnodes; i++) {
+ /* This means that all nodes had 0 or 1 addresses, so
+ * can't be imbalanced.
+ */
+ if (lips[i].imbalance == 0) {
+ break;
+ }
+
+ if (lcp2_failback_candidate(ipalloc_state,
+ lips[i].pnn,
+ lcp2_imbalances,
+ rebalance_candidates)) {
+ again = true;
+ break;
+ }
+ }
+
+ talloc_free(lips);
+ if (again) {
+ goto try_again;
+ }
+}
+
+bool ipalloc_lcp2(struct ipalloc_state *ipalloc_state)
+{
+ uint32_t *lcp2_imbalances;
+ bool *rebalance_candidates;
+ int numnodes, i;
+ bool have_rebalance_candidates;
+ bool ret = true;
+
+ unassign_unsuitable_ips(ipalloc_state);
+
+ if (!lcp2_init(ipalloc_state,
+ &lcp2_imbalances, &rebalance_candidates)) {
+ ret = false;
+ goto finished;
+ }
+
+ lcp2_allocate_unassigned(ipalloc_state, lcp2_imbalances);
+
+ /* If we don't want IPs to fail back then don't rebalance IPs. */
+ if (ipalloc_state->no_ip_failback) {
+ goto finished;
+ }
+
+ /* It is only worth continuing if we have suitable target
+ * nodes to transfer IPs to. This check is much cheaper than
+ * continuing on...
+ */
+ numnodes = ipalloc_state->num;
+ have_rebalance_candidates = false;
+ for (i=0; i<numnodes; i++) {
+ if (rebalance_candidates[i]) {
+ have_rebalance_candidates = true;
+ break;
+ }
+ }
+ if (!have_rebalance_candidates) {
+ goto finished;
+ }
+
+ /* Now, try to make sure the ip addresses are evenly distributed
+ across the nodes.
+ */
+ lcp2_failback(ipalloc_state, lcp2_imbalances, rebalance_candidates);
+
+finished:
+ return ret;
+}
diff --git a/ctdb/server/ipalloc_nondeterministic.c b/ctdb/server/ipalloc_nondeterministic.c
new file mode 100644
index 0000000..9da7d6c
--- /dev/null
+++ b/ctdb/server/ipalloc_nondeterministic.c
@@ -0,0 +1,150 @@
+/*
+ ctdb ip takeover code
+
+ Copyright (C) Ronnie Sahlberg 2007
+ Copyright (C) Andrew Tridgell 2007
+ Copyright (C) Martin Schwenke 2011
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include "ctdb_private.h"
+
+#include "lib/util/debug.h"
+#include "common/logging.h"
+#include "common/common.h"
+
+#include "protocol/protocol_util.h"
+
+#include "server/ipalloc_private.h"
+
+/* Basic non-deterministic rebalancing algorithm.
+ */
+static void basic_failback(struct ipalloc_state *ipalloc_state,
+ int num_ips)
+{
+ unsigned int i, numnodes, maxnode, minnode;
+ int maxnum, minnum, num, retries;
+ struct public_ip_list *t;
+
+ numnodes = ipalloc_state->num;
+ retries = 0;
+
+try_again:
+ maxnum=0;
+ minnum=0;
+
+ /* for each ip address, loop over all nodes that can serve
+ this ip and make sure that the difference between the node
+ serving the most and the node serving the least ip's are
+ not greater than 1.
+ */
+ for (t = ipalloc_state->all_ips; t != NULL; t = t->next) {
+ if (t->pnn == CTDB_UNKNOWN_PNN) {
+ continue;
+ }
+
+ /* Get the highest and lowest number of ips's served by any
+ valid node which can serve this ip.
+ */
+ maxnode = CTDB_UNKNOWN_PNN;
+ minnode = CTDB_UNKNOWN_PNN;
+ for (i=0; i<numnodes; i++) {
+ /* only check nodes that can actually serve this ip */
+ if (!can_node_takeover_ip(ipalloc_state, i,
+ t)) {
+ /* no it couldn't so skip to the next node */
+ continue;
+ }
+
+ num = node_ip_coverage(i, ipalloc_state->all_ips);
+ if (maxnode == CTDB_UNKNOWN_PNN) {
+ maxnode = i;
+ maxnum = num;
+ } else {
+ if (num > maxnum) {
+ maxnode = i;
+ maxnum = num;
+ }
+ }
+ if (minnode == CTDB_UNKNOWN_PNN) {
+ minnode = i;
+ minnum = num;
+ } else {
+ if (num < minnum) {
+ minnode = i;
+ minnum = num;
+ }
+ }
+ }
+ if (maxnode == CTDB_UNKNOWN_PNN) {
+ DEBUG(DEBUG_WARNING,
+ (__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
+ ctdb_sock_addr_to_string(ipalloc_state,
+ &t->addr, false)));
+
+ continue;
+ }
+
+ /* if the spread between the smallest and largest coverage by
+ a node is >=2 we steal one of the ips from the node with
+ most coverage to even things out a bit.
+ try to do this a limited number of times since we dont
+ want to spend too much time balancing the ip coverage.
+ */
+ if ((maxnum > minnum+1) &&
+ (retries < (num_ips + 5))){
+ struct public_ip_list *tt;
+
+ /* Reassign one of maxnode's VNNs */
+ for (tt = ipalloc_state->all_ips; tt != NULL; tt = tt->next) {
+ if (tt->pnn == maxnode) {
+ (void)find_takeover_node(ipalloc_state,
+ tt);
+ retries++;
+ goto try_again;;
+ }
+ }
+ }
+ }
+}
+
+bool ipalloc_nondeterministic(struct ipalloc_state *ipalloc_state)
+{
+ /* This should be pushed down into basic_failback. */
+ struct public_ip_list *t;
+ int num_ips = 0;
+ for (t = ipalloc_state->all_ips; t != NULL; t = t->next) {
+ num_ips++;
+ }
+
+ unassign_unsuitable_ips(ipalloc_state);
+
+ basic_allocate_unassigned(ipalloc_state);
+
+ /* If we don't want IPs to fail back then don't rebalance IPs. */
+ if (ipalloc_state->no_ip_failback) {
+ return true;
+ }
+
+ /* Now, try to make sure the ip addresses are evenly distributed
+ across the nodes.
+ */
+ basic_failback(ipalloc_state, num_ips);
+
+ return true;
+}
diff --git a/ctdb/server/ipalloc_private.h b/ctdb/server/ipalloc_private.h
new file mode 100644
index 0000000..3ea3d31
--- /dev/null
+++ b/ctdb/server/ipalloc_private.h
@@ -0,0 +1,57 @@
+/*
+ CTDB IP takeover code
+
+ Copyright (C) Ronnie Sahlberg 2007
+ Copyright (C) Andrew Tridgell 2007
+ Copyright (C) Martin Schwenke 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_IPALLOC_PRIVATE_H__
+#define __CTDB_IPALLOC_PRIVATE_H__
+
+#include "protocol/protocol.h"
+
+#include "server/ipalloc.h"
+
+struct ipalloc_state {
+ uint32_t num;
+
+ /* Arrays with data for each node */
+ struct ctdb_public_ip_list *available_public_ips;
+ struct ctdb_public_ip_list *known_public_ips;
+
+ struct public_ip_list *all_ips;
+ enum ipalloc_algorithm algorithm;
+ bool no_ip_failback;
+ bool no_ip_takeover;
+ uint32_t *force_rebalance_nodes;
+};
+
+bool can_node_takeover_ip(struct ipalloc_state *ipalloc_state,
+ int32_t pnn,
+ struct public_ip_list *ip);
+int node_ip_coverage(uint32_t pnn, struct public_ip_list *ips);
+int find_takeover_node(struct ipalloc_state *ipalloc_state,
+ struct public_ip_list *ip);
+
+void unassign_unsuitable_ips(struct ipalloc_state *ipalloc_state);
+void basic_allocate_unassigned(struct ipalloc_state *ipalloc_state);
+
+bool ipalloc_nondeterministic(struct ipalloc_state *ipalloc_state);
+bool ipalloc_deterministic(struct ipalloc_state *ipalloc_state);
+bool ipalloc_lcp2(struct ipalloc_state *ipalloc_state);
+
+#endif /* __CTDB_IPALLOC_PRIVATE_H__ */
diff --git a/ctdb/server/legacy_conf.c b/ctdb/server/legacy_conf.c
new file mode 100644
index 0000000..3391a3b
--- /dev/null
+++ b/ctdb/server/legacy_conf.c
@@ -0,0 +1,80 @@
+/*
+ CTDB legacy config handling
+
+ Copyright (C) Martin Schwenke 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include "lib/util/debug.h"
+
+#include "common/conf.h"
+#include "common/logging.h"
+
+#include "legacy_conf.h"
+
+#define LEGACY_SCRIPT_LOG_LEVEL_DEFAULT "ERROR"
+
+static bool legacy_conf_validate_script_log_level(const char *key,
+ const char *old_loglevel,
+ const char *new_loglevel,
+ enum conf_update_mode mode)
+{
+ int log_level;
+ bool ok;
+
+ ok = debug_level_parse(new_loglevel, &log_level);
+ if (!ok) {
+ D_ERR("Invalid value for [%s] -> %s = %s\n",
+ LEGACY_CONF_SECTION,
+ key,
+ new_loglevel);
+ return false;
+ }
+
+ return true;
+}
+
+void legacy_conf_init(struct conf_context *conf)
+{
+ conf_define_section(conf, LEGACY_CONF_SECTION, NULL);
+
+ conf_define_boolean(conf,
+ LEGACY_CONF_SECTION,
+ LEGACY_CONF_REALTIME_SCHEDULING,
+ true,
+ NULL);
+ conf_define_boolean(conf,
+ LEGACY_CONF_SECTION,
+ LEGACY_CONF_LMASTER_CAPABILITY,
+ true,
+ NULL);
+ conf_define_boolean(conf,
+ LEGACY_CONF_SECTION,
+ LEGACY_CONF_START_AS_STOPPED,
+ false,
+ NULL);
+ conf_define_boolean(conf,
+ LEGACY_CONF_SECTION,
+ LEGACY_CONF_START_AS_DISABLED,
+ false,
+ NULL);
+ conf_define_string(conf,
+ LEGACY_CONF_SECTION,
+ LEGACY_CONF_SCRIPT_LOG_LEVEL,
+ LEGACY_SCRIPT_LOG_LEVEL_DEFAULT,
+ legacy_conf_validate_script_log_level);
+}
diff --git a/ctdb/server/legacy_conf.h b/ctdb/server/legacy_conf.h
new file mode 100644
index 0000000..b6b4b57
--- /dev/null
+++ b/ctdb/server/legacy_conf.h
@@ -0,0 +1,35 @@
+/*
+ CTDB legacy config handling
+
+ Copyright (C) Martin Schwenke 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_LEGACY_CONF_H__
+#define __CTDB_LEGACY_CONF_H__
+
+#include "common/conf.h"
+
+#define LEGACY_CONF_SECTION "legacy"
+
+#define LEGACY_CONF_REALTIME_SCHEDULING "realtime scheduling"
+#define LEGACY_CONF_LMASTER_CAPABILITY "lmaster capability"
+#define LEGACY_CONF_START_AS_STOPPED "start as stopped"
+#define LEGACY_CONF_START_AS_DISABLED "start as disabled"
+#define LEGACY_CONF_SCRIPT_LOG_LEVEL "script log level"
+
+void legacy_conf_init(struct conf_context *conf);
+
+#endif /* __CTDB_LEGACY_CONF_H__ */
diff --git a/ctdb/tcp/ctdb_tcp.h b/ctdb/tcp/ctdb_tcp.h
new file mode 100644
index 0000000..cb8d66f
--- /dev/null
+++ b/ctdb/tcp/ctdb_tcp.h
@@ -0,0 +1,56 @@
+/*
+ ctdb database library
+
+ Copyright (C) Andrew Tridgell 2006
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CTDB_TCP_H
+#define _CTDB_TCP_H
+
+/* ctdb_tcp main state */
+struct ctdb_tcp {
+ struct ctdb_context *ctdb;
+ int listen_fd;
+};
+
+/*
+ state associated with one tcp node
+*/
+struct ctdb_tcp_node {
+ int out_fd;
+ struct ctdb_queue *out_queue;
+
+ struct tevent_fd *connect_fde;
+ struct tevent_timer *connect_te;
+
+ struct ctdb_context *ctdb;
+ struct ctdb_queue *in_queue;
+};
+
+
+/* prototypes internal to tcp transport */
+int ctdb_tcp_queue_pkt(struct ctdb_node *node, uint8_t *data, uint32_t length);
+int ctdb_tcp_listen(struct ctdb_context *ctdb);
+void ctdb_tcp_node_connect(struct tevent_context *ev, struct tevent_timer *te,
+ struct timeval t, void *private_data);
+void ctdb_tcp_read_cb(uint8_t *data, size_t cnt, void *args);
+void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data);
+void ctdb_tcp_stop_outgoing(struct ctdb_node *node);
+void ctdb_tcp_stop_incoming(struct ctdb_node *node);
+
+#define CTDB_TCP_ALIGNMENT 8
+
+#endif /* _CTDB_TCP_H */
diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c
new file mode 100644
index 0000000..6f5862a
--- /dev/null
+++ b/ctdb/tcp/tcp_connect.c
@@ -0,0 +1,599 @@
+/*
+ ctdb over TCP
+
+ Copyright (C) Andrew Tridgell 2006
+ Copyright (C) Ronnie Sahlberg 2008
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/debug.h"
+#include "lib/util/time.h"
+#include "lib/util/blocking.h"
+
+#include "ctdb_private.h"
+
+#include "common/system.h"
+#include "common/common.h"
+#include "common/logging.h"
+
+#include "ctdb_tcp.h"
+
+/*
+ stop any outgoing connection (established or pending) to a node
+ */
+void ctdb_tcp_stop_outgoing(struct ctdb_node *node)
+{
+ struct ctdb_tcp_node *tnode = talloc_get_type(
+ node->transport_data, struct ctdb_tcp_node);
+
+ TALLOC_FREE(tnode->out_queue);
+ TALLOC_FREE(tnode->connect_te);
+ TALLOC_FREE(tnode->connect_fde);
+ if (tnode->out_fd != -1) {
+ close(tnode->out_fd);
+ tnode->out_fd = -1;
+ }
+}
+
+/*
+ stop incoming connection to a node
+ */
+void ctdb_tcp_stop_incoming(struct ctdb_node *node)
+{
+ struct ctdb_tcp_node *tnode = talloc_get_type(
+ node->transport_data, struct ctdb_tcp_node);
+
+ TALLOC_FREE(tnode->in_queue);
+}
+
+/*
+ called when a complete packet has come in - should not happen on this socket
+ unless the other side closes the connection with RST or FIN
+ */
+void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data)
+{
+ struct ctdb_node *node = talloc_get_type(private_data, struct ctdb_node);
+
+ node->ctdb->upcalls->node_dead(node);
+
+ TALLOC_FREE(data);
+}
+
+/*
+ called when socket becomes writeable on connect
+*/
+static void ctdb_node_connect_write(struct tevent_context *ev,
+ struct tevent_fd *fde,
+ uint16_t flags, void *private_data)
+{
+ struct ctdb_node *node = talloc_get_type(private_data,
+ struct ctdb_node);
+ struct ctdb_tcp_node *tnode = talloc_get_type(node->transport_data,
+ struct ctdb_tcp_node);
+ struct ctdb_context *ctdb = node->ctdb;
+ int error = 0;
+ socklen_t len = sizeof(error);
+ int one = 1;
+ int ret;
+
+ TALLOC_FREE(tnode->connect_te);
+
+ ret = getsockopt(tnode->out_fd, SOL_SOCKET, SO_ERROR, &error, &len);
+ if (ret != 0 || error != 0) {
+ ctdb_tcp_stop_outgoing(node);
+ tnode->connect_te = tevent_add_timer(ctdb->ev, tnode,
+ timeval_current_ofs(1, 0),
+ ctdb_tcp_node_connect, node);
+ return;
+ }
+
+ TALLOC_FREE(tnode->connect_fde);
+
+ ret = setsockopt(tnode->out_fd,
+ IPPROTO_TCP,
+ TCP_NODELAY,
+ (char *)&one,
+ sizeof(one));
+ if (ret == -1) {
+ DBG_WARNING("Failed to set TCP_NODELAY on fd - %s\n",
+ strerror(errno));
+ }
+ ret = setsockopt(tnode->out_fd,
+ SOL_SOCKET,
+ SO_KEEPALIVE,(char *)&one,
+ sizeof(one));
+ if (ret == -1) {
+ DBG_WARNING("Failed to set KEEPALIVE on fd - %s\n",
+ strerror(errno));
+ }
+
+ tnode->out_queue = ctdb_queue_setup(node->ctdb,
+ tnode,
+ tnode->out_fd,
+ CTDB_TCP_ALIGNMENT,
+ ctdb_tcp_tnode_cb,
+ node,
+ "to-node-%s",
+ node->name);
+ if (tnode->out_queue == NULL) {
+ DBG_ERR("Failed to set up outgoing queue\n");
+ ctdb_tcp_stop_outgoing(node);
+ tnode->connect_te = tevent_add_timer(ctdb->ev,
+ tnode,
+ timeval_current_ofs(1, 0),
+ ctdb_tcp_node_connect,
+ node);
+ return;
+ }
+
+ /* the queue subsystem now owns this fd */
+ tnode->out_fd = -1;
+
+ /*
+ * Mark the node to which this connection has been established
+ * as connected, but only if the corresponding listening
+ * socket is also connected
+ */
+ if (tnode->in_queue != NULL) {
+ node->ctdb->upcalls->node_connected(node);
+ }
+}
+
+
+static void ctdb_tcp_node_connect_timeout(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t,
+ void *private_data);
+
+/*
+ called when we should try and establish a tcp connection to a node
+*/
+static void ctdb_tcp_start_outgoing(struct ctdb_node *node)
+{
+ struct ctdb_tcp_node *tnode = talloc_get_type(node->transport_data,
+ struct ctdb_tcp_node);
+ struct ctdb_context *ctdb = node->ctdb;
+ ctdb_sock_addr sock_in;
+ int sockin_size;
+ int sockout_size;
+ ctdb_sock_addr sock_out;
+ int ret;
+
+ sock_out = node->address;
+
+ tnode->out_fd = socket(sock_out.sa.sa_family, SOCK_STREAM, IPPROTO_TCP);
+ if (tnode->out_fd == -1) {
+ DBG_ERR("Failed to create socket\n");
+ goto failed;
+ }
+
+ ret = set_blocking(tnode->out_fd, false);
+ if (ret != 0) {
+ DBG_ERR("Failed to set socket non-blocking (%s)\n",
+ strerror(errno));
+ goto failed;
+ }
+
+ set_close_on_exec(tnode->out_fd);
+
+ DBG_DEBUG("Created TCP SOCKET FD:%d\n", tnode->out_fd);
+
+ /* Bind our side of the socketpair to the same address we use to listen
+ * on incoming CTDB traffic.
+ * We must specify this address to make sure that the address we expose to
+ * the remote side is actually routable in case CTDB traffic will run on
+ * a dedicated non-routeable network.
+ */
+ sock_in = *ctdb->address;
+
+ /* AIX libs check to see if the socket address and length
+ arguments are consistent with each other on calls like
+ connect(). Can not get by with just sizeof(sock_in),
+ need sizeof(sock_in.ip).
+ */
+ switch (sock_in.sa.sa_family) {
+ case AF_INET:
+ sock_in.ip.sin_port = 0 /* Any port */;
+ sockin_size = sizeof(sock_in.ip);
+ sockout_size = sizeof(sock_out.ip);
+ break;
+ case AF_INET6:
+ sock_in.ip6.sin6_port = 0 /* Any port */;
+ sockin_size = sizeof(sock_in.ip6);
+ sockout_size = sizeof(sock_out.ip6);
+ break;
+ default:
+ DBG_ERR("Unknown address family %u\n", sock_in.sa.sa_family);
+ /* Can't happen to due to address parsing restrictions */
+ goto failed;
+ }
+
+ ret = bind(tnode->out_fd, (struct sockaddr *)&sock_in, sockin_size);
+ if (ret == -1) {
+ DBG_ERR("Failed to bind socket (%s)\n", strerror(errno));
+ goto failed;
+ }
+
+ ret = connect(tnode->out_fd,
+ (struct sockaddr *)&sock_out,
+ sockout_size);
+ if (ret != 0 && errno != EINPROGRESS) {
+ goto failed;
+ }
+
+ /* non-blocking connect - wait for write event */
+ tnode->connect_fde = tevent_add_fd(node->ctdb->ev,
+ tnode,
+ tnode->out_fd,
+ TEVENT_FD_WRITE|TEVENT_FD_READ,
+ ctdb_node_connect_write,
+ node);
+
+ /* don't give it long to connect - retry in one second. This ensures
+ that we find a node is up quickly (tcp normally backs off a syn reply
+ delay by quite a lot) */
+ tnode->connect_te = tevent_add_timer(ctdb->ev,
+ tnode,
+ timeval_current_ofs(1, 0),
+ ctdb_tcp_node_connect_timeout,
+ node);
+
+ return;
+
+failed:
+ ctdb_tcp_stop_outgoing(node);
+ tnode->connect_te = tevent_add_timer(ctdb->ev,
+ tnode,
+ timeval_current_ofs(1, 0),
+ ctdb_tcp_node_connect,
+ node);
+}
+
+void ctdb_tcp_node_connect(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t,
+ void *private_data)
+{
+ struct ctdb_node *node = talloc_get_type_abort(private_data,
+ struct ctdb_node);
+
+ ctdb_tcp_start_outgoing(node);
+}
+
+static void ctdb_tcp_node_connect_timeout(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t,
+ void *private_data)
+{
+ struct ctdb_node *node = talloc_get_type_abort(private_data,
+ struct ctdb_node);
+
+ ctdb_tcp_stop_outgoing(node);
+ ctdb_tcp_start_outgoing(node);
+}
+
+/*
+ called when we get contacted by another node
+ currently makes no attempt to check if the connection is really from a ctdb
+ node in our cluster
+*/
+static void ctdb_listen_event(struct tevent_context *ev, struct tevent_fd *fde,
+ uint16_t flags, void *private_data)
+{
+ struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
+ struct ctdb_tcp *ctcp = talloc_get_type(ctdb->transport_data,
+ struct ctdb_tcp);
+ ctdb_sock_addr addr;
+ socklen_t len;
+ int fd;
+ struct ctdb_node *node;
+ struct ctdb_tcp_node *tnode;
+ int one = 1;
+ int ret;
+
+ memset(&addr, 0, sizeof(addr));
+ len = sizeof(addr);
+ fd = accept(ctcp->listen_fd, (struct sockaddr *)&addr, &len);
+ if (fd == -1) return;
+ smb_set_close_on_exec(fd);
+
+ node = ctdb_ip_to_node(ctdb, &addr);
+ if (node == NULL) {
+ D_ERR("Refused connection from unknown node %s\n",
+ ctdb_addr_to_str(&addr));
+ close(fd);
+ return;
+ }
+
+ tnode = talloc_get_type_abort(node->transport_data,
+ struct ctdb_tcp_node);
+ if (tnode == NULL) {
+ /* This can't happen - see ctdb_tcp_initialise() */
+ DBG_ERR("INTERNAL ERROR setting up connection from node %s\n",
+ ctdb_addr_to_str(&addr));
+ close(fd);
+ return;
+ }
+
+ if (tnode->in_queue != NULL) {
+ DBG_ERR("Incoming queue active, rejecting connection from %s\n",
+ ctdb_addr_to_str(&addr));
+ close(fd);
+ return;
+ }
+
+ ret = set_blocking(fd, false);
+ if (ret != 0) {
+ DBG_ERR("Failed to set socket non-blocking (%s)\n",
+ strerror(errno));
+ close(fd);
+ return;
+ }
+
+ set_close_on_exec(fd);
+
+ DBG_DEBUG("Created SOCKET FD:%d to incoming ctdb connection\n", fd);
+
+ ret = setsockopt(fd,
+ SOL_SOCKET,
+ SO_KEEPALIVE,
+ (char *)&one,
+ sizeof(one));
+ if (ret == -1) {
+ DBG_WARNING("Failed to set KEEPALIVE on fd - %s\n",
+ strerror(errno));
+ }
+
+ tnode->in_queue = ctdb_queue_setup(ctdb,
+ tnode,
+ fd,
+ CTDB_TCP_ALIGNMENT,
+ ctdb_tcp_read_cb,
+ node,
+ "ctdbd-%s",
+ ctdb_addr_to_str(&addr));
+ if (tnode->in_queue == NULL) {
+ DBG_ERR("Failed to set up incoming queue\n");
+ close(fd);
+ return;
+ }
+
+ /*
+ * Mark the connecting node as connected, but only if the
+ * corresponding outbound connected is also up
+ */
+ if (tnode->out_queue != NULL) {
+ node->ctdb->upcalls->node_connected(node);
+ }
+ }
+
+
+/*
+ automatically find which address to listen on
+*/
+static int ctdb_tcp_listen_automatic(struct ctdb_context *ctdb)
+{
+ struct ctdb_tcp *ctcp = talloc_get_type(ctdb->transport_data,
+ struct ctdb_tcp);
+ ctdb_sock_addr sock;
+ int lock_fd;
+ unsigned int i;
+ const char *lock_path = CTDB_RUNDIR "/.socket_lock";
+ struct flock lock;
+ int one = 1;
+ int sock_size;
+ struct tevent_fd *fde;
+
+ /* If there are no nodes, then it won't be possible to find
+ * the first one. Log a failure and short circuit the whole
+ * process.
+ */
+ if (ctdb->num_nodes == 0) {
+ DEBUG(DEBUG_CRIT,("No nodes available to attempt bind to - is the nodes file empty?\n"));
+ return -1;
+ }
+
+ /* in order to ensure that we don't get two nodes with the
+ same address, we must make the bind() and listen() calls
+ atomic. The SO_REUSEADDR setsockopt only prevents double
+ binds if the first socket is in LISTEN state */
+ lock_fd = open(lock_path, O_RDWR|O_CREAT, 0666);
+ if (lock_fd == -1) {
+ DEBUG(DEBUG_CRIT,("Unable to open %s\n", lock_path));
+ return -1;
+ }
+
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 1;
+ lock.l_pid = 0;
+
+ if (fcntl(lock_fd, F_SETLKW, &lock) != 0) {
+ DEBUG(DEBUG_CRIT,("Unable to lock %s\n", lock_path));
+ close(lock_fd);
+ return -1;
+ }
+
+ for (i=0; i < ctdb->num_nodes; i++) {
+ if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
+ continue;
+ }
+ sock = ctdb->nodes[i]->address;
+
+ switch (sock.sa.sa_family) {
+ case AF_INET:
+ sock_size = sizeof(sock.ip);
+ break;
+ case AF_INET6:
+ sock_size = sizeof(sock.ip6);
+ break;
+ default:
+ DEBUG(DEBUG_ERR, (__location__ " unknown family %u\n",
+ sock.sa.sa_family));
+ continue;
+ }
+
+ ctcp->listen_fd = socket(sock.sa.sa_family, SOCK_STREAM, IPPROTO_TCP);
+ if (ctcp->listen_fd == -1) {
+ ctdb_set_error(ctdb, "socket failed\n");
+ continue;
+ }
+
+ set_close_on_exec(ctcp->listen_fd);
+
+ if (setsockopt(ctcp->listen_fd,SOL_SOCKET,SO_REUSEADDR,
+ (char *)&one,sizeof(one)) == -1) {
+ DEBUG(DEBUG_WARNING, ("Failed to set REUSEADDR on fd - %s\n",
+ strerror(errno)));
+ }
+
+ if (bind(ctcp->listen_fd, (struct sockaddr * )&sock, sock_size) == 0) {
+ break;
+ }
+
+ if (errno == EADDRNOTAVAIL) {
+ DEBUG(DEBUG_DEBUG,(__location__ " Failed to bind() to socket. %s(%d)\n",
+ strerror(errno), errno));
+ } else {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to bind() to socket. %s(%d)\n",
+ strerror(errno), errno));
+ }
+
+ close(ctcp->listen_fd);
+ ctcp->listen_fd = -1;
+ }
+
+ if (i == ctdb->num_nodes) {
+ DEBUG(DEBUG_CRIT,("Unable to bind to any of the node addresses - giving up\n"));
+ goto failed;
+ }
+ ctdb->address = talloc_memdup(ctdb,
+ &ctdb->nodes[i]->address,
+ sizeof(ctdb_sock_addr));
+ if (ctdb->address == NULL) {
+ ctdb_set_error(ctdb, "Out of memory at %s:%d",
+ __FILE__, __LINE__);
+ goto failed;
+ }
+
+ ctdb->name = talloc_asprintf(ctdb, "%s:%u",
+ ctdb_addr_to_str(ctdb->address),
+ ctdb_addr_to_port(ctdb->address));
+ if (ctdb->name == NULL) {
+ ctdb_set_error(ctdb, "Out of memory at %s:%d",
+ __FILE__, __LINE__);
+ goto failed;
+ }
+ DEBUG(DEBUG_INFO,("ctdb chose network address %s\n", ctdb->name));
+
+ if (listen(ctcp->listen_fd, 10) == -1) {
+ goto failed;
+ }
+
+ fde = tevent_add_fd(ctdb->ev, ctcp, ctcp->listen_fd, TEVENT_FD_READ,
+ ctdb_listen_event, ctdb);
+ tevent_fd_set_auto_close(fde);
+
+ close(lock_fd);
+
+ return 0;
+
+failed:
+ close(lock_fd);
+ if (ctcp->listen_fd != -1) {
+ close(ctcp->listen_fd);
+ ctcp->listen_fd = -1;
+ }
+ return -1;
+}
+
+
+/*
+ listen on our own address
+*/
+int ctdb_tcp_listen(struct ctdb_context *ctdb)
+{
+ struct ctdb_tcp *ctcp = talloc_get_type(ctdb->transport_data,
+ struct ctdb_tcp);
+ ctdb_sock_addr sock;
+ int sock_size;
+ int one = 1;
+ struct tevent_fd *fde;
+
+ /* we can either auto-bind to the first available address, or we can
+ use a specified address */
+ if (!ctdb->address) {
+ return ctdb_tcp_listen_automatic(ctdb);
+ }
+
+ sock = *ctdb->address;
+
+ switch (sock.sa.sa_family) {
+ case AF_INET:
+ sock_size = sizeof(sock.ip);
+ break;
+ case AF_INET6:
+ sock_size = sizeof(sock.ip6);
+ break;
+ default:
+ DEBUG(DEBUG_ERR, (__location__ " unknown family %u\n",
+ sock.sa.sa_family));
+ goto failed;
+ }
+
+ ctcp->listen_fd = socket(sock.sa.sa_family, SOCK_STREAM, IPPROTO_TCP);
+ if (ctcp->listen_fd == -1) {
+ ctdb_set_error(ctdb, "socket failed\n");
+ return -1;
+ }
+
+ set_close_on_exec(ctcp->listen_fd);
+
+ if (setsockopt(ctcp->listen_fd,SOL_SOCKET,SO_REUSEADDR,(char *)&one,sizeof(one)) == -1) {
+ DEBUG(DEBUG_WARNING, ("Failed to set REUSEADDR on fd - %s\n",
+ strerror(errno)));
+ }
+
+ if (bind(ctcp->listen_fd, (struct sockaddr * )&sock, sock_size) != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to bind() to socket. %s(%d)\n", strerror(errno), errno));
+ goto failed;
+ }
+
+ if (listen(ctcp->listen_fd, 10) == -1) {
+ goto failed;
+ }
+
+ fde = tevent_add_fd(ctdb->ev, ctcp, ctcp->listen_fd, TEVENT_FD_READ,
+ ctdb_listen_event, ctdb);
+ tevent_fd_set_auto_close(fde);
+
+ return 0;
+
+failed:
+ if (ctcp->listen_fd != -1) {
+ close(ctcp->listen_fd);
+ }
+ ctcp->listen_fd = -1;
+ return -1;
+}
+
diff --git a/ctdb/tcp/tcp_init.c b/ctdb/tcp/tcp_init.c
new file mode 100644
index 0000000..97ebe1d
--- /dev/null
+++ b/ctdb/tcp/tcp_init.c
@@ -0,0 +1,215 @@
+/*
+ ctdb over TCP
+
+ Copyright (C) Andrew Tridgell 2006
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/time.h"
+#include "lib/util/debug.h"
+
+#include "ctdb_private.h"
+
+#include "common/common.h"
+#include "common/logging.h"
+
+#include "ctdb_tcp.h"
+
+static int tnode_destructor(struct ctdb_tcp_node *tnode)
+{
+ // struct ctdb_node *node = talloc_find_parent_bytype(tnode, struct ctdb_node);
+
+ if (tnode->out_fd != -1) {
+ close(tnode->out_fd);
+ tnode->out_fd = -1;
+ }
+
+ return 0;
+}
+
+/*
+ initialise tcp portion of a ctdb node
+*/
+static int ctdb_tcp_add_node(struct ctdb_node *node)
+{
+ struct ctdb_tcp_node *tnode;
+ tnode = talloc_zero(node, struct ctdb_tcp_node);
+ CTDB_NO_MEMORY(node->ctdb, tnode);
+
+ tnode->out_fd = -1;
+ tnode->ctdb = node->ctdb;
+
+ node->transport_data = tnode;
+ talloc_set_destructor(tnode, tnode_destructor);
+
+ return 0;
+}
+
+/*
+ initialise transport structures
+*/
+static int ctdb_tcp_initialise(struct ctdb_context *ctdb)
+{
+ unsigned int i;
+
+ /* listen on our own address */
+ if (ctdb_tcp_listen(ctdb) != 0) {
+ DEBUG(DEBUG_CRIT, (__location__ " Failed to start listening on the CTDB socket\n"));
+ exit(1);
+ }
+
+ for (i=0; i < ctdb->num_nodes; i++) {
+ if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
+ continue;
+ }
+ if (ctdb_tcp_add_node(ctdb->nodes[i]) != 0) {
+ DEBUG(DEBUG_CRIT, ("methods->add_node failed at %d\n", i));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ start the protocol going
+*/
+static int ctdb_tcp_connect_node(struct ctdb_node *node)
+{
+ struct ctdb_context *ctdb = node->ctdb;
+ struct ctdb_tcp_node *tnode = talloc_get_type(
+ node->transport_data, struct ctdb_tcp_node);
+
+ /* startup connection to the other server - will happen on
+ next event loop */
+ if (!ctdb_same_address(ctdb->address, &node->address)) {
+ tnode->connect_te = tevent_add_timer(ctdb->ev, tnode,
+ timeval_zero(),
+ ctdb_tcp_node_connect,
+ node);
+ }
+
+ return 0;
+}
+
+/*
+ shutdown and try to restart a connection to a node after it has been
+ disconnected
+*/
+static void ctdb_tcp_restart(struct ctdb_node *node)
+{
+ struct ctdb_tcp_node *tnode = talloc_get_type(
+ node->transport_data, struct ctdb_tcp_node);
+
+ DEBUG(DEBUG_NOTICE,("Tearing down connection to dead node :%d\n", node->pnn));
+ ctdb_tcp_stop_incoming(node);
+ ctdb_tcp_stop_outgoing(node);
+
+ tnode->connect_te = tevent_add_timer(node->ctdb->ev, tnode,
+ timeval_zero(),
+ ctdb_tcp_node_connect, node);
+}
+
+
+/*
+ shutdown the transport
+*/
+static void ctdb_tcp_shutdown(struct ctdb_context *ctdb)
+{
+ uint32_t i;
+
+ TALLOC_FREE(ctdb->transport_data);
+
+ for (i=0; i<ctdb->num_nodes; i++) {
+ TALLOC_FREE(ctdb->nodes[i]->transport_data);
+ }
+}
+
+/*
+ start the transport
+*/
+static int ctdb_tcp_start(struct ctdb_context *ctdb)
+{
+ unsigned int i;
+
+ for (i=0; i < ctdb->num_nodes; i++) {
+ if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
+ continue;
+ }
+ ctdb_tcp_connect_node(ctdb->nodes[i]);
+ }
+
+ return 0;
+}
+
+
+/*
+ transport packet allocator - allows transport to control memory for packets
+*/
+static void *ctdb_tcp_allocate_pkt(TALLOC_CTX *mem_ctx, size_t size)
+{
+ /* tcp transport needs to round to 8 byte alignment to ensure
+ that we can use a length header and 64 bit elements in
+ structures */
+ size = (size+(CTDB_TCP_ALIGNMENT-1)) & ~(CTDB_TCP_ALIGNMENT-1);
+ return talloc_size(mem_ctx, size);
+}
+
+
+static const struct ctdb_methods ctdb_tcp_methods = {
+ .initialise = ctdb_tcp_initialise,
+ .start = ctdb_tcp_start,
+ .queue_pkt = ctdb_tcp_queue_pkt,
+ .add_node = ctdb_tcp_add_node,
+ .connect_node = ctdb_tcp_connect_node,
+ .allocate_pkt = ctdb_tcp_allocate_pkt,
+ .shutdown = ctdb_tcp_shutdown,
+ .restart = ctdb_tcp_restart,
+};
+
+static int tcp_ctcp_destructor(struct ctdb_tcp *ctcp)
+{
+ ctcp->ctdb->transport_data = NULL;
+ ctcp->ctdb->methods = NULL;
+
+ return 0;
+}
+
+
+/*
+ initialise tcp portion of ctdb
+*/
+int ctdb_tcp_init(struct ctdb_context *ctdb)
+{
+ struct ctdb_tcp *ctcp;
+ ctcp = talloc_zero(ctdb, struct ctdb_tcp);
+ CTDB_NO_MEMORY(ctdb, ctcp);
+
+ ctcp->listen_fd = -1;
+ ctcp->ctdb = ctdb;
+ ctdb->transport_data = ctcp;
+ ctdb->methods = &ctdb_tcp_methods;
+
+ talloc_set_destructor(ctcp, tcp_ctcp_destructor);
+ return 0;
+}
+
diff --git a/ctdb/tcp/tcp_io.c b/ctdb/tcp/tcp_io.c
new file mode 100644
index 0000000..bcb18fb
--- /dev/null
+++ b/ctdb/tcp/tcp_io.c
@@ -0,0 +1,96 @@
+/*
+ ctdb over TCP
+
+ Copyright (C) Andrew Tridgell 2006
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+
+#include "lib/util/dlinklist.h"
+#include "lib/util/debug.h"
+
+#include "ctdb_private.h"
+
+#include "common/common.h"
+#include "common/logging.h"
+
+#include "ctdb_tcp.h"
+
+
+/*
+ called when a complete packet has come in
+ */
+void ctdb_tcp_read_cb(uint8_t *data, size_t cnt, void *args)
+{
+ struct ctdb_node *node = talloc_get_type_abort(args, struct ctdb_node);
+ struct ctdb_tcp_node *tnode = talloc_get_type_abort(
+ node->transport_data, struct ctdb_tcp_node);
+ struct ctdb_req_header *hdr = (struct ctdb_req_header *)data;
+
+ if (data == NULL) {
+ /* incoming socket has died */
+ goto failed;
+ }
+
+ if (cnt < sizeof(*hdr)) {
+ DEBUG(DEBUG_ALERT,(__location__ " Bad packet length %u\n", (unsigned)cnt));
+ goto failed;
+ }
+
+ if (cnt & (CTDB_TCP_ALIGNMENT-1)) {
+ DEBUG(DEBUG_ALERT,(__location__ " Length 0x%x not multiple of alignment\n",
+ (unsigned)cnt));
+ goto failed;
+ }
+
+ if (hdr->ctdb_magic != CTDB_MAGIC) {
+ DEBUG(DEBUG_ALERT,(__location__ " Non CTDB packet 0x%x rejected\n",
+ hdr->ctdb_magic));
+ goto failed;
+ }
+
+ if (hdr->ctdb_version != CTDB_PROTOCOL) {
+ DEBUG(DEBUG_ALERT, (__location__ " Bad CTDB version 0x%x rejected\n",
+ hdr->ctdb_version));
+ goto failed;
+ }
+
+ /* tell the ctdb layer above that we have a packet */
+ tnode->ctdb->upcalls->recv_pkt(tnode->ctdb, data, cnt);
+ return;
+
+failed:
+ node->ctdb->upcalls->node_dead(node);
+
+ TALLOC_FREE(data);
+}
+
+/*
+ queue a packet for sending
+*/
+int ctdb_tcp_queue_pkt(struct ctdb_node *node, uint8_t *data, uint32_t length)
+{
+ struct ctdb_tcp_node *tnode = talloc_get_type(node->transport_data,
+ struct ctdb_tcp_node);
+ if (tnode->out_queue == NULL) {
+ DBG_DEBUG("No outgoing connection, dropping packet\n");
+ return 0;
+ }
+
+ return ctdb_queue_send(tnode->out_queue, data, length);
+}
diff --git a/ctdb/tests/CLUSTER/complex/11_ctdb_delip_removes_ip.sh b/ctdb/tests/CLUSTER/complex/11_ctdb_delip_removes_ip.sh
new file mode 100755
index 0000000..072780a
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/11_ctdb_delip_removes_ip.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Verify that a node's public IP address can be deleted using 'ctdb deleteip'.
+
+# This is an extended version of simple/17_ctdb_config_delete_ip.sh
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+test_node_has_test_ip()
+{
+ # $test_node and $test_ip set by select_test_node_and_ips()
+ # shellcheck disable=SC2154
+ try_command_on_node "$test_node" "ip addr show to ${test_ip}"
+ [ -n "$out" ]
+}
+
+ctdb_test_init
+
+select_test_node_and_ips
+
+# $test_node and $test_ip set by select_test_node_and_ips()
+# shellcheck disable=SC2154
+echo "Checking that node ${test_node} hosts ${test_ip}..."
+test_node_has_test_ip
+
+echo "Attempting to remove ${test_ip} from node ${test_node}."
+ctdb_onnode "$test_node" "delip ${test_ip}"
+ctdb_onnode "$test_node" "ipreallocate"
+wait_until_ips_are_on_node '!' "$test_node" "$test_ip"
+
+echo "Waiting for ${test_ip} to disappear from node ${test_node}..."
+wait_until 60/5 '!' test_node_has_test_ip
+
+echo "GOOD: IP was successfully removed!"
diff --git a/ctdb/tests/CLUSTER/complex/18_ctdb_reloadips.sh b/ctdb/tests/CLUSTER/complex/18_ctdb_reloadips.sh
new file mode 100755
index 0000000..150aeea
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/18_ctdb_reloadips.sh
@@ -0,0 +1,257 @@
+#!/bin/bash
+
+# Verify that adding/deleting IPs using 'ctdb reloadips' works
+
+# Checks that when IPs are added to and deleted from a single node then
+# those IPs are actually assigned and unassigned from the specified
+# interface.
+
+# Prerequisites:
+
+# * An active CTDB cluster with public IP addresses configured
+
+# Expected results:
+
+# * When IPs are added to a single node then they are assigned to an
+# interface.
+
+# * When IPs are deleted from a single node then they disappear from an
+# interface.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+
+####################
+
+# Search for an unused 10.B.1.0/24 network on which to add public IP
+# addresses.
+
+# The initial search is for a 10.B.0.0/16 network since some
+# configurations may use a whole class B for the private network.
+# Check that there are no public IP addresses (as reported by "ctdb ip
+# all") or other IP addresses (as reported by "ip addr show") with
+# the provided prefix. Note that this is an IPv4-specific test.
+
+echo "Getting public IP information from CTDB..."
+ctdb_onnode "$test_node" "ip -X -v all"
+ctdb_ip_info=$(awk -F'|' 'NR > 1 { print $2, $3, $5 }' "$outfile")
+
+echo "Getting IP information from interfaces..."
+try_command_on_node all "ip addr show"
+ip_addr_info=$(awk '$1 == "inet" { ip = $2; sub(/\/.*/, "", ip); print ip }' \
+ "$outfile")
+
+prefix=""
+for b in $(seq 0 255) ; do
+ prefix="10.${b}"
+
+ # Does the prefix match any IP address returned by "ip addr info"?
+ while read ip ; do
+ if [ "${ip#${prefix}.}" != "$ip" ] ; then
+ prefix=""
+ continue 2
+ fi
+ done <<<"$ip_addr_info"
+
+ # Does the prefix match any public IP address "ctdb ip all"?
+ while read ip pnn iface ; do
+ if [ "${ip#${prefix}.}" != "$ip" ] ; then
+ prefix=""
+ continue 2
+ fi
+ done <<<"$ctdb_ip_info"
+
+ # Got through the IPs without matching prefix - done!
+ break
+done
+
+[ -n "$prefix" ] || die "Unable to find a usable IP address prefix"
+
+# We really want a class C: 10.B.1.0/24
+prefix="${prefix}.1"
+
+####################
+
+iface=$(echo "$ctdb_ip_info" | awk -v pnn=$test_node '$2 == pnn { print $3 ; exit }')
+
+####################
+
+# This needs to be set only on the recmaster. All nodes should do the trick.
+new_takeover_timeout=90
+echo "Setting TakeoverTimeout=${new_takeover_timeout} to avoid potential bans"
+try_command_on_node all "$CTDB setvar TakeoverTimeout ${new_takeover_timeout}"
+
+####################
+
+try_command_on_node $test_node $CTDB_TEST_WRAPPER ctdb_base_show
+addresses="${out}/public_addresses"
+echo "Public addresses file on node $test_node is \"$addresses\""
+backup="${addresses}.$$"
+
+backup_public_addresses ()
+{
+ try_command_on_node $test_node "cp -a $addresses $backup"
+}
+
+restore_public_addresses ()
+{
+ try_command_on_node $test_node "mv $backup $addresses >/dev/null 2>&1 || true"
+}
+ctdb_test_exit_hook_add restore_public_addresses
+
+# Now create that backup
+backup_public_addresses
+
+####################
+
+add_ips_to_original_config ()
+{
+ local test_node="$1"
+ local addresses="$2"
+ local iface="$3"
+ local prefix="$4"
+ local first="$5"
+ local last="$6"
+
+ echo "Adding new public IPs to original config on node ${test_node}..."
+ echo "IPs will be ${prefix}.${first}/24..${prefix}.${last}/24"
+
+ # Implement this by completely rebuilding the public_addresses
+ # file. This is easier than deleting entries on a remote node.
+ restore_public_addresses
+ backup_public_addresses
+
+ # Note that tee is a safe way of creating a file on a remote node.
+ # This avoids potential fragility with quoting or redirection.
+ for i in $(seq $first $last) ; do
+ echo "${prefix}.${i}/24 ${iface}"
+ done |
+ try_command_on_node -i $test_node "tee -a $addresses"
+}
+
+check_ips ()
+{
+ local test_node="$1"
+ local iface="$2"
+ local prefix="$3"
+ local first="$4"
+ local last="$5"
+
+ # If just 0 specified then this is an empty range
+ local public_ips_file=$(mktemp)
+ if [ "$first" = 0 -a -z "$last" ] ; then
+ echo "Checking that there are no IPs in ${prefix}.0/24"
+ else
+ local prefix_regexp="inet *${prefix//./\.}"
+
+ echo "Checking IPs in range ${prefix}.${first}/24..${prefix}.${last}/24"
+
+ local i
+ for i in $(seq $first $last) ; do
+ echo "${prefix}.${i}"
+ done | sort >"$public_ips_file"
+ fi
+
+ try_command_on_node $test_node "ip addr show dev ${iface}"
+ local ip_addrs_file=$(mktemp)
+ cat "$outfile" | \
+ sed -n -e "s@.*inet * \(${prefix//./\.}\.[0-9]*\)/.*@\1@p" | \
+ sort >"$ip_addrs_file"
+
+ local diffs=$(diff "$public_ips_file" "$ip_addrs_file") || true
+ rm -f "$ip_addrs_file" "$public_ips_file"
+
+ if [ -z "$diffs" ] ; then
+ echo "GOOD: IP addresses are as expected"
+ else
+ echo "BAD: IP addresses are incorrect:"
+ echo "$diffs"
+ exit 1
+ fi
+}
+
+# ctdb reloadips will fail if it can't disable takover runs. The most
+# likely reason for this is that there is already a takeover run in
+# progress. We can't predict when this will happen, so retry if this
+# occurs.
+do_ctdb_reloadips ()
+{
+ local retry_max=10
+ local retry_count=0
+ while : ; do
+ if try_command_on_node "$test_node" "$CTDB reloadips" ; then
+ return 0
+ fi
+
+ if [ "$out" != "Failed to disable takeover runs" ] ; then
+ return 1
+ fi
+
+ if [ $retry_count -ge $retry_max ] ; then
+ return 1
+ fi
+
+ retry_count=$((retry_count + 1))
+ echo "Retrying..."
+ sleep_for 1
+ done
+}
+
+####################
+
+new_ip_max=100
+
+####################
+
+add_ips_to_original_config \
+ $test_node "$addresses" "$iface" "$prefix" 1 $new_ip_max
+
+do_ctdb_reloadips
+
+check_ips $test_node "$iface" "$prefix" 1 $new_ip_max
+
+ctdb_onnode "$test_node" sync
+
+####################
+
+# This should be the primary. Ensure that no other IPs are lost
+echo "Using 'ctdb reloadips' to remove the 1st address just added..."
+
+add_ips_to_original_config \
+ $test_node "$addresses" "$iface" "$prefix" 2 $new_ip_max
+
+do_ctdb_reloadips
+
+check_ips $test_node "$iface" "$prefix" 2 $new_ip_max
+
+ctdb_onnode "$test_node" sync
+
+####################
+
+# Get rid of about 1/2 the IPs
+start=$(($new_ip_max / 2 + 1))
+echo "Updating to include only about 1/2 of the new IPs..."
+
+add_ips_to_original_config \
+ $test_node "$addresses" "$iface" "$prefix" $start $new_ip_max
+
+do_ctdb_reloadips
+
+check_ips $test_node "$iface" "$prefix" $start $new_ip_max
+
+ctdb_onnode "$test_node" sync
+
+####################
+
+# Delete the rest
+echo "Restoring original IP configuration..."
+restore_public_addresses
+
+do_ctdb_reloadips
+
+check_ips $test_node "$iface" "$prefix" 0
diff --git a/ctdb/tests/CLUSTER/complex/30_nfs_tickle_killtcp.sh b/ctdb/tests/CLUSTER/complex/30_nfs_tickle_killtcp.sh
new file mode 100755
index 0000000..4d8f617
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/30_nfs_tickle_killtcp.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+# Verify that NFS connections are monitored and that NFS tickles are sent.
+
+# Create a connection to the NFS server on a node. Then disable the
+# relevant NFS server node and ensure that it sends an appropriate reset
+# packet. The packet must come from the releasing node.
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+# local daemons.
+
+# * Test must not be run from a cluster node.
+
+# * Cluster nodes must be listening on the NFS TCP port (2049).
+
+# Expected results:
+
+# * CTDB on the releasing node should correctly send a reset packet when
+# the node is disabled.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+
+test_port=2049
+
+echo "Connecting to node ${test_node} on IP ${test_ip}:${test_port} with netcat..."
+
+sleep 30 | nc $test_ip $test_port &
+nc_pid=$!
+ctdb_test_exit_hook_add "kill $nc_pid >/dev/null 2>&1"
+
+wait_until_get_src_socket "tcp" "${test_ip}:${test_port}" $nc_pid "nc"
+src_socket="$out"
+echo "Source socket is $src_socket"
+
+echo "Getting MAC address associated with ${test_ip}..."
+releasing_mac=$(ip neigh show $test_prefix | awk '$4 == "lladdr" {print $5}')
+[ -n "$releasing_mac" ] || die "Couldn't get MAC address for ${test_prefix}"
+echo "MAC address is: ${releasing_mac}"
+
+tcptickle_sniff_start $src_socket "${test_ip}:${test_port}"
+
+echo "Disabling node $test_node"
+try_command_on_node 1 $CTDB disable -n $test_node
+wait_until_node_has_status $test_node disabled
+
+# Only look for a reset from the releasing node
+tcptickle_sniff_wait_show "$releasing_mac"
diff --git a/ctdb/tests/CLUSTER/complex/31_nfs_tickle.sh b/ctdb/tests/CLUSTER/complex/31_nfs_tickle.sh
new file mode 100755
index 0000000..e3f1540
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/31_nfs_tickle.sh
@@ -0,0 +1,77 @@
+#!/bin/bash
+
+# Verify that NFS connections are monitored and that NFS tickles are sent.
+
+# We create a connection to the NFS server on a node and confirm that
+# this connection is registered in the nfs-tickles/ subdirectory in
+# shared storage. Then kill ctdbd on the relevant NFS server node and
+# ensure that the takeover node sends an appropriate reset packet.
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+# local daemons.
+
+# * Test must not be run from a cluster node.
+
+# * Cluster nodes must be listening on the NFS TCP port (2049).
+
+# Expected results:
+
+# * CTDB should correctly record the socket and on failover the takeover
+# node should send a reset packet.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+try_command_on_node $test_node "$CTDB listnodes | wc -l"
+numnodes="$out"
+
+# We need this for later, so we know how long to run nc for.
+ctdb_onnode "$test_node" "getvar MonitorInterval"
+monitor_interval="${out#*= }"
+
+test_port=2049
+
+echo "Connecting to node ${test_node} on IP ${test_ip}:${test_port} with netcat..."
+
+sleep $((monitor_interval * 4)) | nc $test_ip $test_port &
+nc_pid=$!
+ctdb_test_exit_hook_add "kill $nc_pid >/dev/null 2>&1"
+
+wait_until_get_src_socket "tcp" "${test_ip}:${test_port}" $nc_pid "nc"
+src_socket="$out"
+echo "Source socket is $src_socket"
+
+wait_for_monitor_event $test_node
+
+echo "Wait until NFS connection is tracked by CTDB on test node ..."
+wait_until 10 check_tickles $test_node $test_ip $test_port $src_socket
+
+echo "Getting TicklesUpdateInterval..."
+try_command_on_node $test_node $CTDB getvar TickleUpdateInterval
+update_interval="$out"
+
+echo "Wait until NFS connection is tracked by CTDB on all nodes..."
+wait_until $(($update_interval * 2)) \
+ check_tickles_all $numnodes $test_ip $test_port $src_socket
+
+tcptickle_sniff_start $src_socket "${test_ip}:${test_port}"
+
+# We need to be nasty to make that the node being failed out doesn't
+# get a chance to send any tickles and confuse our sniff. IPs also
+# need to be dropped because we're simulating a dead node rather than
+# a CTDB failure. To properly handle a CTDB failure we would need a
+# watchdog to drop the IPs when CTDB disappears.
+echo "Killing ctdbd on ${test_node}..."
+try_command_on_node -v $test_node "killall -9 ctdbd ; $CTDB_TEST_WRAPPER drop_ips ${test_node_ips}"
+
+wait_until_node_has_status $test_node disconnected
+
+tcptickle_sniff_wait_show
diff --git a/ctdb/tests/CLUSTER/complex/32_cifs_tickle.sh b/ctdb/tests/CLUSTER/complex/32_cifs_tickle.sh
new file mode 100755
index 0000000..78b8948
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/32_cifs_tickle.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+# Verify that CIFS connections are monitored and that CIFS tickles are sent.
+
+# We create a connection to the CIFS server on a node and confirm that
+# this connection is registered by CTDB. Then disable the relevant CIFS
+# server node and ensure that the takeover node sends an appropriate
+# reset packet.
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+# local daemons.
+
+# * Test must not be run from a cluster node.
+
+# * Clustered Samba must be listening on TCP port 445.
+
+# Expected results:
+
+# * CTDB should correctly record the connection and the takeover node
+# should send a reset packet.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+# We need this for later, so we know how long to sleep.
+try_command_on_node 0 $CTDB getvar MonitorInterval
+monitor_interval="${out#*= }"
+#echo "Monitor interval on node $test_node is $monitor_interval seconds."
+
+select_test_node_and_ips
+
+test_port=445
+
+echo "Connecting to node ${test_node} on IP ${test_ip}:${test_port} with netcat..."
+
+sleep $((monitor_interval * 4)) | nc $test_ip $test_port &
+nc_pid=$!
+ctdb_test_exit_hook_add "kill $nc_pid >/dev/null 2>&1"
+
+wait_until_get_src_socket "tcp" "${test_ip}:${test_port}" $nc_pid "nc"
+src_socket="$out"
+echo "Source socket is $src_socket"
+
+# This should happen as soon as connection is up... but unless we wait
+# we sometimes beat the registration.
+echo "Checking if CIFS connection is tracked by CTDB on test node..."
+wait_until 10 check_tickles $test_node $test_ip $test_port $src_socket
+
+# This is almost immediate. However, it is sent between nodes
+# asynchronously, so it is worth checking...
+echo "Wait until CIFS connection is tracked by CTDB on all nodes..."
+try_command_on_node $test_node "$CTDB listnodes | wc -l"
+numnodes="$out"
+wait_until 5 \
+ check_tickles_all $numnodes $test_ip $test_port $src_socket
+tcptickle_sniff_start $src_socket "${test_ip}:${test_port}"
+
+echo "Disabling node $test_node"
+try_command_on_node 1 $CTDB disable -n $test_node
+wait_until_node_has_status $test_node disabled
+
+tcptickle_sniff_wait_show
diff --git a/ctdb/tests/CLUSTER/complex/33_gratuitous_arp.sh b/ctdb/tests/CLUSTER/complex/33_gratuitous_arp.sh
new file mode 100755
index 0000000..7a0944f
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/33_gratuitous_arp.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+
+# Verify that a gratuitous ARP is sent when a node is failed out.
+
+# We ping a public IP and lookup the MAC address in the ARP table. We
+# then disable the node and check the ARP table again - the MAC address
+# should have changed. This test does NOT test connectivity after the
+# failover.
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+# local daemons.
+
+# * Test must not be run from a cluster node.
+
+# Steps:
+
+# 1. Verify that the cluster is healthy.
+# 2. Select a public address and its corresponding node.
+# 3. Remove any entries for the chosen address from the ARP table.
+# 4. Send a single ping request packet to the selected public address.
+# 5. Determine the MAC address corresponding to the public address by
+# checking the ARP table.
+# 6. Disable the selected node.
+# 7. Check the ARP table and check the MAC associated with the public
+# address.
+
+# Expected results:
+
+# * When a node is disabled the MAC address associated with public
+# addresses on that node should change.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+
+echo "Removing ${test_ip} from the local ARP table..."
+ip neigh flush "$test_prefix" >/dev/null 2>&1 || true
+
+echo "Pinging ${test_ip}..."
+ping_wrapper -q -n -c 1 $test_ip
+
+echo "Getting MAC address associated with ${test_ip}..."
+original_mac=$(ip neigh show $test_prefix | awk '$4 == "lladdr" {print $5}')
+[ -n "$original_mac" ] || die "Couldn't get MAC address for ${test_prefix}"
+
+echo "MAC address is: ${original_mac}"
+
+gratarp_sniff_start
+
+echo "Disabling node $test_node"
+try_command_on_node 1 $CTDB disable -n $test_node
+wait_until_node_has_status $test_node disabled
+
+gratarp_sniff_wait_show
+
+echo "Getting MAC address associated with ${test_ip} again..."
+new_mac=$(ip neigh show $test_prefix | awk '$4 == "lladdr" {print $5}')
+[ -n "$new_mac" ] || die "Couldn't get MAC address for ${test_prefix}"
+
+echo "MAC address is: ${new_mac}"
+
+if [ "$original_mac" != "$new_mac" ] ; then
+ echo "GOOD: MAC address changed"
+else
+ die "BAD: MAC address did not change"
+fi
diff --git a/ctdb/tests/CLUSTER/complex/34_nfs_tickle_restart.sh b/ctdb/tests/CLUSTER/complex/34_nfs_tickle_restart.sh
new file mode 100755
index 0000000..b81510d
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/34_nfs_tickle_restart.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+
+# Verify that a newly started CTDB node gets updated tickle details
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+# local daemons.
+
+# * Cluster nodes must be listening on the NFS TCP port (2049).
+
+# Steps:
+
+# As with 31_nfs_tickle.sh but restart a node after the tickle is
+# registered.
+
+# Expected results:
+
+# * CTDB should correctly communicated tickles to new CTDB instances as
+# they join the cluster.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+try_command_on_node $test_node "$CTDB listnodes -X"
+listnodes_output="$out"
+numnodes=$(wc -l <<<"$listnodes_output")
+
+test_port=2049
+
+echo "Connecting to node ${test_node} on IP ${test_ip}:${test_port} with netcat..."
+
+sleep 600 | nc $test_ip $test_port &
+nc_pid=$!
+ctdb_test_exit_hook_add "kill $nc_pid >/dev/null 2>&1"
+
+wait_until_get_src_socket "tcp" "${test_ip}:${test_port}" $nc_pid "nc"
+src_socket="$out"
+echo "Source socket is $src_socket"
+
+wait_for_monitor_event $test_node
+
+echo "Wait until NFS connection is tracked by CTDB on test node ..."
+wait_until 10 check_tickles $test_node $test_ip $test_port $src_socket
+
+echo "Select a node to restart ctdbd"
+rn=$(awk -F'|' -v test_node=$test_node \
+ '$2 != test_node { print $2 ; exit }' <<<"$listnodes_output")
+
+echo "Restarting CTDB on node ${rn}"
+ctdb_nodes_restart "$rn"
+
+# In some theoretical world this is racy. In practice, the node will
+# take quite a while to become healthy, so this will beat any
+# assignment of IPs to the node.
+echo "Setting NoIPTakeover on node ${rn}"
+try_command_on_node $rn $CTDB setvar NoIPTakeover 1
+
+wait_until_ready
+
+echo "Getting TickleUpdateInterval..."
+try_command_on_node $test_node $CTDB getvar TickleUpdateInterval
+update_interval="$out"
+
+echo "Wait until NFS connection is tracked by CTDB on all nodes..."
+if ! wait_until $(($update_interval * 2)) \
+ check_tickles_all $numnodes $test_ip $test_port $src_socket ; then
+ echo "BAD: connection not tracked on all nodes:"
+ echo "$out"
+ exit 1
+fi
+
+# We could go on to test whether the tickle ACK gets sent. However,
+# this is tested in previous tests and the use of NoIPTakeover
+# complicates things on a 2 node cluster.
diff --git a/ctdb/tests/CLUSTER/complex/36_smb_reset_server.sh b/ctdb/tests/CLUSTER/complex/36_smb_reset_server.sh
new file mode 100755
index 0000000..d0f3d08
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/36_smb_reset_server.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+# Verify that the server end of an SMB connection is correctly reset
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+# local daemons.
+
+# * Test must not be run from a cluster node.
+
+# * Clustered Samba must be listening on TCP port 445.
+
+# Expected results:
+
+# * CTDB should correctly record the connection and the releasing node
+# should reset the server end of the connection.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+# We need this for later, so we know how long to sleep.
+try_command_on_node 0 $CTDB getvar MonitorInterval
+monitor_interval="${out#*= }"
+
+select_test_node_and_ips
+
+test_port=445
+
+echo "Set NoIPTakeover=1 on all nodes"
+try_command_on_node all $CTDB setvar NoIPTakeover 1
+
+echo "Give the recovery daemon some time to reload tunables"
+sleep_for 5
+
+echo "Connecting to node ${test_node} on IP ${test_ip}:${test_port} with nc..."
+
+sleep $((monitor_interval * 4)) | nc $test_ip $test_port &
+nc_pid=$!
+ctdb_test_exit_hook_add "kill $nc_pid >/dev/null 2>&1"
+
+wait_until_get_src_socket "tcp" "${test_ip}:${test_port}" $nc_pid "nc"
+src_socket="$out"
+echo "Source socket is $src_socket"
+
+# This should happen as soon as connection is up... but unless we wait
+# we sometimes beat the registration.
+echo "Waiting until SMB connection is tracked by CTDB on test node..."
+wait_until 10 check_tickles $test_node $test_ip $test_port $src_socket
+
+# It would be nice if ss consistently used local/peer instead of src/dst
+ss_filter="src ${test_ip}:${test_port} dst ${src_socket}"
+
+try_command_on_node $test_node \
+ "ss -tn state established '${ss_filter}' | tail -n +2"
+if [ -z "$out" ] ; then
+ echo "BAD: ss did not list the socket"
+ exit 1
+fi
+echo "GOOD: ss lists the socket:"
+cat "$outfile"
+
+echo "Disabling node $test_node"
+try_command_on_node 1 $CTDB disable -n $test_node
+wait_until_node_has_status $test_node disabled
+
+try_command_on_node $test_node \
+ "ss -tn state established '${ss_filter}' | tail -n +2"
+if [ -n "$out" ] ; then
+ echo "BAD: ss listed the socket after failover"
+ exit 1
+fi
+echo "GOOD: ss no longer lists the socket"
diff --git a/ctdb/tests/CLUSTER/complex/37_nfs_reset_server.sh b/ctdb/tests/CLUSTER/complex/37_nfs_reset_server.sh
new file mode 100755
index 0000000..3e249f9
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/37_nfs_reset_server.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+# Verify that the server end of an NFS connection is correctly reset
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+# local daemons.
+
+# * Test must not be run from a cluster node.
+
+# * Cluster nodes must be listening on the NFS TCP port (2049).
+
+# Expected results:
+
+# * CTDB should correctly record the connection and the releasing node
+# should reset the server end of the connection.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+# We need this for later, so we know how long to sleep.
+try_command_on_node 0 $CTDB getvar MonitorInterval
+monitor_interval="${out#*= }"
+
+select_test_node_and_ips
+
+test_port=2049
+
+echo "Set NoIPTakeover=1 on all nodes"
+try_command_on_node all $CTDB setvar NoIPTakeover 1
+
+echo "Give the recovery daemon some time to reload tunables"
+sleep_for 5
+
+echo "Connecting to node ${test_node} on IP ${test_ip}:${test_port} with nc..."
+
+sleep $((monitor_interval * 4)) | nc $test_ip $test_port &
+nc_pid=$!
+ctdb_test_exit_hook_add "kill $nc_pid >/dev/null 2>&1"
+
+wait_until_get_src_socket "tcp" "${test_ip}:${test_port}" $nc_pid "nc"
+src_socket="$out"
+echo "Source socket is $src_socket"
+
+echo "Wait until NFS connection is tracked by CTDB on test node ..."
+wait_until $((monitor_interval * 2)) \
+ check_tickles $test_node $test_ip $test_port $src_socket
+cat "$outfile"
+
+# It would be nice if ss consistently used local/peer instead of src/dst
+ss_filter="src ${test_ip}:${test_port} dst ${src_socket}"
+
+try_command_on_node $test_node \
+ "ss -tn state established '${ss_filter}' | tail -n +2"
+if [ -z "$out" ] ; then
+ echo "BAD: ss did not list the socket"
+ exit 1
+fi
+echo "GOOD: ss lists the socket:"
+cat "$outfile"
+
+echo "Disabling node $test_node"
+try_command_on_node 1 $CTDB disable -n $test_node
+wait_until_node_has_status $test_node disabled
+
+try_command_on_node $test_node \
+ "ss -tn state established '${ss_filter}' | tail -n +2"
+if [ -n "$out" ] ; then
+ echo "BAD: ss listed the socket after failover"
+ exit 1
+fi
+echo "GOOD: ss no longer lists the socket"
diff --git a/ctdb/tests/CLUSTER/complex/41_failover_ping_discrete.sh b/ctdb/tests/CLUSTER/complex/41_failover_ping_discrete.sh
new file mode 100755
index 0000000..539d25e
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/41_failover_ping_discrete.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+# Verify that it is possible to ping a public address after disabling a node.
+
+# We ping a public IP, disable the node hosting it and then ping the
+# public IP again.
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+# local daemons.
+
+# * Test must not be run from a cluster node.
+
+# Steps:
+
+# 1. Verify that the cluster is healthy.
+# 2. Select a public address and its corresponding node.
+# 3. Send a single ping request packet to the selected public address.
+# 4. Disable the selected node.
+# 5. Send another single ping request packet to the selected public address.
+
+# Expected results:
+
+# * When a node is disabled the public address fails over and the
+# address is still pingable.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+
+echo "Removing ${test_ip} from the local neighbor table..."
+ip neigh flush "$test_prefix" >/dev/null 2>&1 || true
+
+echo "Pinging ${test_ip}..."
+ping_wrapper -q -n -c 1 $test_ip
+
+gratarp_sniff_start
+
+echo "Disabling node $test_node"
+try_command_on_node 1 $CTDB disable -n $test_node
+wait_until_node_has_status $test_node disabled
+
+gratarp_sniff_wait_show
+
+echo "Removing ${test_ip} from the local neighbor table again..."
+ip neigh flush "$test_prefix" >/dev/null 2>&1 || true
+
+echo "Pinging ${test_ip} again..."
+ping_wrapper -q -n -c 1 $test_ip
diff --git a/ctdb/tests/CLUSTER/complex/42_failover_ssh_hostname.sh b/ctdb/tests/CLUSTER/complex/42_failover_ssh_hostname.sh
new file mode 100755
index 0000000..233819b
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/42_failover_ssh_hostname.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+# Verify that it is possible to SSH to a public address after disabling a node.
+
+# We SSH to a public IP and check the hostname, disable the node hosting
+# it and then SSH again to confirm that the hostname has changed.
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+# local daemons.
+
+# * Test must not be run from a cluster node.
+
+# Steps:
+
+# 1. Verify that the cluster is healthy.
+# 2. Select a public address and its corresponding node.
+# 3. SSH to the selected public address and run hostname.
+# 4. Disable the selected node.
+# 5. SSH to the selected public address again and run hostname.
+
+# Expected results:
+
+# * When a node is disabled the public address fails over and it is
+# still possible to SSH to the node. The hostname should change.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+
+echo "Removing ${test_ip} from the local neighbor table..."
+ip neigh flush "$test_prefix" >/dev/null 2>&1 || true
+
+echo "SSHing to ${test_ip} and running hostname..."
+if ! original_hostname=$(ssh -o "StrictHostKeyChecking no" $test_ip hostname) ; then
+ die "Failed to get original hostname via SSH..."
+fi
+
+echo "Hostname is: ${original_hostname}"
+
+gratarp_sniff_start
+
+echo "Disabling node $test_node"
+try_command_on_node 1 $CTDB disable -n $test_node
+wait_until_node_has_status $test_node disabled
+
+gratarp_sniff_wait_show
+
+echo "SSHing to ${test_ip} and running hostname (again)..."
+if ! new_hostname=$(ssh -o "StrictHostKeyChecking no" $test_ip hostname) ; then
+ echo "Failed to get new hostname via SSH..."
+ echo "DEBUG:"
+ ip neigh show
+ exit 1
+fi
+
+echo "Hostname is: ${new_hostname}"
+
+if [ "$original_hostname" != "$new_hostname" ] ; then
+ echo "GOOD: hostname changed"
+else
+ die "BAD: hostname did not change"
+fi
diff --git a/ctdb/tests/CLUSTER/complex/43_failover_nfs_basic.sh b/ctdb/tests/CLUSTER/complex/43_failover_nfs_basic.sh
new file mode 100755
index 0000000..ac2cafd
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/43_failover_nfs_basic.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+# Verify that a mounted NFS share is still operational after failover.
+
+# We mount an NFS share from a node, write a file via NFS and then
+# confirm that we can correctly read the file after a failover.
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+# local daemons.
+
+# * Test must not be run from a cluster node.
+
+# Steps:
+
+# 1. Verify that the cluster is healthy.
+# 2. Select a public address and its corresponding node.
+# 3. Select the 1st NFS share exported on the node.
+# 4. Mount the selected NFS share.
+# 5. Create a file in the NFS mount and calculate its checksum.
+# 6. Disable the selected node.
+# 7. Read the file and calculate its checksum.
+# 8. Compare the checksums.
+
+# Expected results:
+
+# * When a node is disabled the public address fails over and it is
+# possible to correctly read a file over NFS. The checksums should be
+# the same before and after.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+nfs_test_setup
+
+echo "Create file containing random data..."
+dd if=/dev/urandom of=$nfs_local_file bs=1k count=1
+original_sum=$(sum $nfs_local_file)
+[ $? -eq 0 ]
+
+gratarp_sniff_start
+
+echo "Disabling node $test_node"
+try_command_on_node 0 $CTDB disable -n $test_node
+wait_until_node_has_status $test_node disabled
+
+gratarp_sniff_wait_show
+
+new_sum=$(sum $nfs_local_file)
+[ $? -eq 0 ]
+
+if [ "$original_md5" = "$new_md5" ] ; then
+ echo "GOOD: file contents unchanged after failover"
+else
+ die "BAD: file contents are different after failover"
+fi
diff --git a/ctdb/tests/CLUSTER/complex/44_failover_nfs_oneway.sh b/ctdb/tests/CLUSTER/complex/44_failover_nfs_oneway.sh
new file mode 100755
index 0000000..5c8324c
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/44_failover_nfs_oneway.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+
+# Verify that a file created on a node is readable via NFS after a failover.
+
+# We write a file into an exported directory on a node, mount the NFS
+# share from a node, verify that we can read the file via NFS and that
+# we can still read it after a failover.
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+# local daemons.
+
+# * Test must not be run from a cluster node.
+
+# Steps:
+
+# 1. Verify that the cluster is healthy.
+# 2. Select a public address and its corresponding node.
+# 3. Select the 1st NFS share exported on the node.
+# 4. Write a file into exported directory on the node and calculate its
+# checksum.
+# 5. Mount the selected NFS share.
+# 6. Read the file via the NFS mount and calculate its checksum.
+# 7. Compare checksums.
+# 8. Disable the selected node.
+# 9. Read the file via NFS and calculate its checksum.
+# 10. Compare the checksums.
+
+# Expected results:
+
+# * Checksums for the file on all 3 occasions should be the same.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+nfs_test_setup
+
+echo "Create file containing random data..."
+local_f=$(mktemp)
+ctdb_test_exit_hook_add rm -f "$local_f"
+dd if=/dev/urandom of=$local_f bs=1k count=1
+local_sum=$(sum $local_f)
+
+scp -p "$local_f" "[${test_ip}]:${nfs_remote_file}"
+try_command_on_node $test_node "chmod 644 $nfs_remote_file"
+
+nfs_sum=$(sum $nfs_local_file)
+
+if [ "$local_sum" = "$nfs_sum" ] ; then
+ echo "GOOD: file contents read correctly via NFS"
+else
+ echo "BAD: file contents are different over NFS"
+ echo " original file: $local_sum"
+ echo " NFS file: $nfs_sum"
+ exit 1
+fi
+
+gratarp_sniff_start
+
+echo "Disabling node $test_node"
+try_command_on_node 0 $CTDB disable -n $test_node
+wait_until_node_has_status $test_node disabled
+
+gratarp_sniff_wait_show
+
+new_sum=$(sum $nfs_local_file)
+[ $? -eq 0 ]
+
+if [ "$nfs_sum" = "$new_sum" ] ; then
+ echo "GOOD: file contents unchanged after failover"
+else
+ echo "BAD: file contents are different after failover"
+ echo " original file: $nfs_sum"
+ echo " NFS file: $new_sum"
+ exit 1
+fi
diff --git a/ctdb/tests/CLUSTER/complex/45_failover_nfs_kill.sh b/ctdb/tests/CLUSTER/complex/45_failover_nfs_kill.sh
new file mode 100755
index 0000000..2d15748
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/45_failover_nfs_kill.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+# Verify that a mounted NFS share is still operational after failover.
+
+# We mount an NFS share from a node, write a file via NFS and then
+# confirm that we can correctly read the file after a failover.
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+# local daemons.
+
+# * Test must not be run from a cluster node.
+
+# Steps:
+
+# 1. Verify that the cluster is healthy.
+# 2. Select a public address and its corresponding node.
+# 3. Select the 1st NFS share exported on the node.
+# 4. Mount the selected NFS share.
+# 5. Create a file in the NFS mount and calculate its checksum.
+# 6. Kill CTDB on the selected node.
+# 7. Read the file and calculate its checksum.
+# 8. Compare the checksums.
+
+# Expected results:
+
+# * When a node is disabled the public address fails over and it is
+# possible to correctly read a file over NFS. The checksums should be
+# the same before and after.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+nfs_test_setup
+
+echo "Create file containing random data..."
+dd if=/dev/urandom of=$nfs_local_file bs=1k count=1
+original_sum=$(sum $nfs_local_file)
+[ $? -eq 0 ]
+
+gratarp_sniff_start
+
+echo "Killing node $test_node"
+try_command_on_node $test_node $CTDB getpid
+pid=${out#*:}
+# We need to be nasty to make that the node being failed out doesn't
+# get a chance to send any tickles or doing anything else clever. IPs
+# also need to be dropped because we're simulating a dead node rather
+# than a CTDB failure. To properly handle a CTDB failure we would
+# need a watchdog to drop the IPs when CTDB disappears.
+try_command_on_node -v $test_node "kill -9 $pid ; $CTDB_TEST_WRAPPER drop_ips ${test_node_ips}"
+wait_until_node_has_status $test_node disconnected
+
+gratarp_sniff_wait_show
+
+new_sum=$(sum $nfs_local_file)
+[ $? -eq 0 ]
+
+if [ "$original_md5" = "$new_md5" ] ; then
+ echo "GOOD: file contents unchanged after failover"
+else
+ die "BAD: file contents are different after failover"
+fi
diff --git a/ctdb/tests/CLUSTER/complex/60_rogueip_releaseip.sh b/ctdb/tests/CLUSTER/complex/60_rogueip_releaseip.sh
new file mode 100755
index 0000000..efa9ef2
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/60_rogueip_releaseip.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+# Verify that the recovery daemon correctly handles a rogue IP
+
+# It should be released...
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+
+echo "Using $test_ip, which is onnode $test_node"
+
+# This test depends on being able to assign a duplicate address on a
+# 2nd node. However, IPv6 guards against this and causes the test to
+# fail.
+case "$test_ip" in
+*:*) ctdb_test_skip "This test is not supported for IPv6 addresses" ;;
+esac
+
+get_test_ip_mask_and_iface
+
+echo "Finding another node that knows about $test_ip"
+ctdb_get_all_pnns
+other_node=""
+for i in $all_pnns ; do
+ if [ "$i" = "$test_node" ] ; then
+ continue
+ fi
+ try_command_on_node $i "$CTDB ip"
+ n=$(awk -v ip="$test_ip" '$1 == ip { print }' "$outfile")
+ if [ -n "$n" ] ; then
+ other_node="$i"
+ break
+ fi
+done
+if [ -z "$other_node" ] ; then
+ die "Unable to find another node that knows about $test_ip"
+fi
+
+echo "Adding $test_ip on node $other_node"
+try_command_on_node $other_node "ip addr add ${test_ip}/${mask} dev ${iface}"
+
+rogue_ip_is_gone ()
+{
+ local pnn="$1"
+ local test_ip="$2"
+ try_command_on_node $pnn $CTDB_TEST_WRAPPER ip_maskbits_iface $test_ip
+ [ -z "$out" ]
+}
+
+echo "Waiting until rogue IP is no longer assigned..."
+wait_until 30 rogue_ip_is_gone $other_node $test_ip
diff --git a/ctdb/tests/CLUSTER/complex/61_rogueip_takeip.sh b/ctdb/tests/CLUSTER/complex/61_rogueip_takeip.sh
new file mode 100755
index 0000000..5ee4e54
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/61_rogueip_takeip.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# Verify that TAKE_IP will work for an IP that is already on an interface
+
+# This is a variation of simple/60_recoverd_missing_ip.sh
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+
+echo "Running test against node $test_node and IP $test_ip"
+
+# This test puts an address on an interface and then needs to quickly
+# configure that address and cause an IP takeover. However, an IPv6
+# address will be tentative for a while so "quickly" is not possible".
+# When ctdb_control_takeover_ip() calls ctdb_sys_have_ip() it will
+# decide that the address is not present. It then attempts a takeip,
+# which can fail if the address is suddenly present because it is no
+# longer tentative.
+case "$test_ip" in
+*:*) ctdb_test_skip "This test is not supported for IPv6 addresses" ;;
+esac
+
+get_test_ip_mask_and_iface
+
+echo "Deleting IP $test_ip from all nodes"
+delete_ip_from_all_nodes $test_ip
+try_command_on_node -v $test_node $CTDB ipreallocate
+wait_until_ips_are_on_node ! $test_node $test_ip
+
+try_command_on_node -v all $CTDB ip
+
+# The window here needs to small, to try to avoid the address being
+# released. The test will still pass either way but if the first IP
+# takeover run does a release then this doesn't test the code path we
+# expect it to...
+echo "Adding IP $test_ip to $iface and CTDB on node $test_node"
+ip_cmd="ip addr add $test_ip/$mask dev $iface"
+ctdb_cmd="$CTDB addip $test_ip/$mask $iface && $CTDB ipreallocate"
+try_command_on_node $test_node "$ip_cmd && $ctdb_cmd"
+
+wait_until_ips_are_on_node $test_node $test_ip
diff --git a/ctdb/tests/CLUSTER/complex/README b/ctdb/tests/CLUSTER/complex/README
new file mode 100644
index 0000000..72de396
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/README
@@ -0,0 +1,2 @@
+Complex integration tests. These need a real or virtual cluster.
+That is, they can not be run against local daemons.
diff --git a/ctdb/tests/CLUSTER/complex/scripts/local.bash b/ctdb/tests/CLUSTER/complex/scripts/local.bash
new file mode 100644
index 0000000..0ef5c0a
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/scripts/local.bash
@@ -0,0 +1,289 @@
+# Hey Emacs, this is a -*- shell-script -*- !!! :-)
+
+# Thanks/blame to Stephen Rothwell for suggesting that this can be
+# done in the shell. ;-)
+ipv6_to_hex ()
+{
+ local addr="$1"
+
+ # Replace "::" by something special.
+ local foo="${addr/::/:@:}"
+
+ # Join the groups of digits together, 0-padding each group of
+ # digits out to 4 digits, and count the number of (non-@) groups
+ local out=""
+ local count=0
+ local i
+ for i in $(IFS=":" ; echo $foo ) ; do
+ if [ "$i" = "@" ] ; then
+ out="${out}@"
+ else
+ out="${out}$(printf '%04x' 0x${i})"
+ count=$(($count + 4))
+ fi
+ done
+
+ # Replace '@' with correct number of zeroes
+ local zeroes=$(printf "%0$((32 - $count))x" 0)
+ echo "${out/@/${zeroes}}"
+}
+
+#######################################
+
+get_src_socket ()
+{
+ local proto="$1"
+ local dst_socket="$2"
+ local pid="$3"
+ local prog="$4"
+
+ local pat="^${proto}6?[[:space:]]+[[:digit:]]+[[:space:]]+[[:digit:]]+[[:space:]]+[^[:space:]]+[[:space:]]+${dst_socket//./\\.}[[:space:]]+ESTABLISHED[[:space:]]+${pid}/${prog}[[:space:]]*\$"
+ out=$(netstat -tanp |
+ grep -E "$pat" |
+ awk '{ print $4 }')
+
+ [ -n "$out" ]
+}
+
+wait_until_get_src_socket ()
+{
+ local proto="$1"
+ local dst_socket="$2"
+ local pid="$3"
+ local prog="$4"
+
+ echo "Waiting for ${prog} to establish connection to ${dst_socket}..."
+
+ wait_until 5 get_src_socket "$@"
+}
+
+#######################################
+
+check_tickles ()
+{
+ local node="$1"
+ local test_ip="$2"
+ local test_port="$3"
+ local src_socket="$4"
+ try_command_on_node $node ctdb gettickles $test_ip $test_port
+ # SRC: 10.0.2.45:49091 DST: 10.0.2.143:445
+ grep -Fq "SRC: ${src_socket} " "$outfile"
+}
+
+check_tickles_all ()
+{
+ local numnodes="$1"
+ local test_ip="$2"
+ local test_port="$3"
+ local src_socket="$4"
+
+ try_command_on_node all ctdb gettickles $test_ip $test_port
+ # SRC: 10.0.2.45:49091 DST: 10.0.2.143:445
+ local count=$(grep -Fc "SRC: ${src_socket} " "$outfile" || true)
+ [ $count -eq $numnodes ]
+}
+
+
+
+#######################################
+
+# filename will be in $tcpdump_filename, pid in $tcpdump_pid
+tcpdump_start ()
+{
+ tcpdump_filter="$1" # global
+
+ echo "Running tcpdump..."
+ tcpdump_filename=$(mktemp)
+ ctdb_test_exit_hook_add "rm -f $tcpdump_filename"
+
+ # The only way of being sure that tcpdump is listening is to send
+ # some packets that it will see. So we use dummy pings - the -U
+ # option to tcpdump ensures that packets are flushed to the file
+ # as they are captured.
+ local dummy_addr="127.3.2.1"
+ local dummy="icmp and dst host ${dummy_addr} and icmp[icmptype] == icmp-echo"
+ tcpdump -n -p -s 0 -e -U -w $tcpdump_filename -i any "($tcpdump_filter) or ($dummy)" &
+ ctdb_test_exit_hook_add "kill $! >/dev/null 2>&1"
+
+ echo "Waiting for tcpdump output file to be ready..."
+ ping -q "$dummy_addr" >/dev/null 2>&1 &
+ ctdb_test_exit_hook_add "kill $! >/dev/null 2>&1"
+
+ tcpdump_listen_for_dummy ()
+ {
+ tcpdump -n -r $tcpdump_filename -c 1 "$dummy" >/dev/null 2>&1
+ }
+
+ wait_until 10 tcpdump_listen_for_dummy
+}
+
+# By default, wait for 1 matching packet.
+tcpdump_wait ()
+{
+ local count="${1:-1}"
+ local filter="${2:-${tcpdump_filter}}"
+
+ tcpdump_check ()
+ {
+ # It would be much nicer to add "ether src
+ # $releasing_mac" to the filter. However, tcpdump
+ # does not allow MAC filtering unless an ethernet
+ # interface is specified with -i. It doesn't work
+ # with "-i any" and it doesn't work when reading from
+ # a file. :-(
+ local found
+ if [ -n "$releasing_mac" ] ; then
+ found=$(tcpdump -n -e -r "$tcpdump_filename" \
+ "$filter" 2>/dev/null |
+ grep -c "In ${releasing_mac}")
+ else
+ found=$(tcpdump -n -e -r "$tcpdump_filename" \
+ "$filter" 2>/dev/null |
+ wc -l)
+ fi
+
+ [ $found -ge $count ]
+ }
+
+ echo "Waiting for tcpdump to capture some packets..."
+ if ! wait_until 30 tcpdump_check ; then
+ echo "DEBUG AT $(date '+%F %T'):"
+ local i
+ for i in "onnode -q 0 $CTDB status" \
+ "netstat -tanp" \
+ "tcpdump -n -e -r $tcpdump_filename" ; do
+ echo "$i"
+ $i || true
+ done
+ return 1
+ fi
+}
+
+tcpdump_show ()
+{
+ local filter="${1:-${tcpdump_filter}}"
+
+ tcpdump -n -e -vv -XX -r $tcpdump_filename "$filter" 2>/dev/null
+}
+
+tcp4tickle_sniff_start ()
+{
+ local src="$1"
+ local dst="$2"
+
+ local in="src host ${dst%:*} and tcp src port ${dst##*:} and dst host ${src%:*} and tcp dst port ${src##*:}"
+ local out="src host ${src%:*} and tcp src port ${src##*:} and dst host ${dst%:*} and tcp dst port ${dst##*:}"
+ local tickle_ack="${in} and (tcp[tcpflags] & tcp-ack != 0) and (tcp[14:2] == 1234)" # win == 1234
+ local ack_ack="${out} and (tcp[tcpflags] & tcp-ack != 0)"
+ tcptickle_reset="${in} and tcp[tcpflags] & tcp-rst != 0"
+ local filter="(${tickle_ack}) or (${ack_ack}) or (${tcptickle_reset})"
+
+ tcpdump_start "$filter"
+}
+
+# tcp[] does not work for IPv6 (in some versions of tcpdump)
+tcp6tickle_sniff_start ()
+{
+ local src="$1"
+ local dst="$2"
+
+ local in="src host ${dst%:*} and tcp src port ${dst##*:} and dst host ${src%:*} and tcp dst port ${src##*:}"
+ local out="src host ${src%:*} and tcp src port ${src##*:} and dst host ${dst%:*} and tcp dst port ${dst##*:}"
+ local tickle_ack="${in} and (ip6[53] & tcp-ack != 0) and (ip6[54:2] == 1234)" # win == 1234
+ local ack_ack="${out} and (ip6[53] & tcp-ack != 0)"
+ tcptickle_reset="${in} and ip6[53] & tcp-rst != 0"
+ local filter="(${tickle_ack}) or (${ack_ack}) or (${tcptickle_reset})"
+
+ tcpdump_start "$filter"
+}
+
+tcptickle_sniff_start ()
+{
+ local src="$1"
+ local dst="$2"
+
+ case "${dst%:*}" in
+ *:*) tcp6tickle_sniff_start "$src" "$dst" ;;
+ *) tcp4tickle_sniff_start "$src" "$dst" ;;
+ esac
+}
+
+tcptickle_sniff_wait_show ()
+{
+ local releasing_mac="$1" # optional, used by tcpdump_wait()
+
+ tcpdump_wait 1 "$tcptickle_reset"
+
+ echo "GOOD: here are some TCP tickle packets:"
+ tcpdump_show
+}
+
+gratarp4_sniff_start ()
+{
+ tcpdump_start "arp host ${test_ip}"
+}
+
+gratarp6_sniff_start ()
+{
+ local neighbor_advertisement="icmp6 and ip6[40] == 136"
+ local hex=$(ipv6_to_hex "$test_ip")
+ local match_target="ip6[48:4] == 0x${hex:0:8} and ip6[52:4] == 0x${hex:8:8} and ip6[56:4] == 0x${hex:16:8} and ip6[60:4] == 0x${hex:24:8}"
+
+ tcpdump_start "${neighbor_advertisement} and ${match_target}"
+}
+
+gratarp_sniff_start ()
+{
+ case "$test_ip" in
+ *:*) gratarp6_sniff_start ;;
+ *) gratarp4_sniff_start ;;
+ esac
+}
+
+gratarp_sniff_wait_show ()
+{
+ tcpdump_wait 2
+
+ echo "GOOD: this should be the some gratuitous ARPs:"
+ tcpdump_show
+}
+
+ping_wrapper ()
+{
+ case "$*" in
+ *:*) ping6 "$@" ;;
+ *) ping "$@" ;;
+ esac
+}
+
+#######################################
+
+nfs_test_setup ()
+{
+ select_test_node_and_ips
+
+ nfs_first_export=$(showmount -e $test_ip | sed -n -e '2s/ .*//p')
+
+ echo "Creating test subdirectory..."
+ try_command_on_node $test_node "TMPDIR=$nfs_first_export mktemp -d"
+ nfs_test_dir="$out"
+ try_command_on_node $test_node "chmod 777 $nfs_test_dir"
+
+ nfs_mnt_d=$(mktemp -d)
+ nfs_local_file="${nfs_mnt_d}/${nfs_test_dir##*/}/TEST_FILE"
+ nfs_remote_file="${nfs_test_dir}/TEST_FILE"
+
+ ctdb_test_exit_hook_add nfs_test_cleanup
+
+ echo "Mounting ${test_ip}:${nfs_first_export} on ${nfs_mnt_d} ..."
+ mount -o timeo=1,hard,intr,vers=3 \
+ "[${test_ip}]:${nfs_first_export}" ${nfs_mnt_d}
+}
+
+nfs_test_cleanup ()
+{
+ rm -f "$nfs_local_file"
+ umount -f "$nfs_mnt_d"
+ rmdir "$nfs_mnt_d"
+ onnode -q $test_node rmdir "$nfs_test_dir"
+}
diff --git a/ctdb/tests/INTEGRATION/database/basics.001.attach.sh b/ctdb/tests/INTEGRATION/database/basics.001.attach.sh
new file mode 100755
index 0000000..1fbffc5
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/database/basics.001.attach.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb getdbmap' operates as expected
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node
+
+# test_node set by select_test_node() above
+# shellcheck disable=SC2154
+ctdb_onnode -v "$test_node" getdbmap
+
+dbid='dbid:0x[[:xdigit:]]+'
+name='name:[^[:space:]]+'
+path='path:[^[:space:]]+'
+opts='( (PERSISTENT|STICKY|READONLY|REPLICATED|UNHEALTHY))*'
+line="${dbid} ${name} ${path}${opts}"
+dbmap_pattern="^(Number of databases:[[:digit:]]+|${line})\$"
+
+# outfile set by ctdb_onnode() above
+# shellcheck disable=SC2154
+num_db_init=$(sed -n -e '1s/.*://p' "$outfile")
+
+sanity_check_output $(($num_db_init + 1)) "$dbmap_pattern"
+
+for i in $(seq 1 5) ; do
+ f="attach_test_${i}.tdb"
+ echo "Creating test database: $f"
+ ctdb_onnode "$test_node" "attach ${f}"
+
+ ctdb_onnode "$test_node" getdbmap
+ sanity_check_output $((num_db_init + 1)) "$dbmap_pattern"
+ num=$(sed -n -e '1s/^.*://p' "$outfile")
+ if [ "$num" = $((num_db_init + i)) ] ; then
+ echo "OK: correct number of additional databases"
+ else
+ ctdb_test_fail "BAD: no additional database"
+ fi
+ if awk '{print $2}' "$outfile" | grep -Fqx "name:$f" ; then
+ echo "OK: getdbmap knows about \"$f\""
+ else
+ ctdb_test_fail "BAD: getdbmap does not know about \"$f\""
+ fi
+done
diff --git a/ctdb/tests/INTEGRATION/database/basics.002.attach.sh b/ctdb/tests/INTEGRATION/database/basics.002.attach.sh
new file mode 100755
index 0000000..6a5c812
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/database/basics.002.attach.sh
@@ -0,0 +1,116 @@
+#!/usr/bin/env bash
+
+# Verify that databases are attached a node joins the cluster:
+# 1. Shut down CTDB on one node
+# 2. Attach test databases
+# 3. Check that databases are attached on all up nodes
+# 4. Start CTDB on the node where it is shut down
+# 5. Verify that the test databases are attached on this node
+# 6. Restart one of the nodes
+# 7. Verify that the test databases are attached on this node
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+######################################################################
+
+try_command_on_node 0 "$CTDB listnodes -X | wc -l"
+numnodes="$out"
+lastnode=$(( numnodes - 1 ))
+
+######################################################################
+
+# Confirm that the database is attached with appropriate flags
+check_db_once ()
+{
+ local pnn="$1"
+ local db="$2"
+
+ try_command_on_node "$pnn" $CTDB getdbmap
+ if grep -qF "name:${db}" "$outfile" >/dev/null ; then
+ return 0
+ else
+ return 1
+ fi
+}
+
+check_db ()
+{
+ local pnn="$1"
+ local db="$2"
+ local flag="$3"
+
+ local flags
+
+ echo "Waiting until database ${db} is attached on node ${pnn}"
+ wait_until 10 check_db_once "$pnn" "$db"
+
+ flags=$(awk -v db="$db" '$2 == "name:" db {print $4}' "$outfile")
+ if [ "$flags" = "$flag" ]; then
+ echo "GOOD: db ${db} attached on node ${pnn} with flag $flag"
+ else
+ echo "BAD: db ${db} attached on node ${pnn} with wrong flag"
+ cat "$outfile"
+ exit 1
+ fi
+}
+
+######################################################################
+
+testdb1="test_volatile.tdb"
+testdb2="test_persistent.tdb"
+testdb3="test_replicated.tdb"
+
+test_node="0"
+
+echo "Shutting down node $test_node"
+ctdb_nodes_stop "$test_node"
+sleep 1
+wait_until_node_has_status 1 recovered
+try_command_on_node -v 1 $CTDB status
+
+echo "Create test databases"
+try_command_on_node 1 $CTDB attach "$testdb1"
+try_command_on_node 1 $CTDB attach "$testdb2" persistent
+try_command_on_node 1 $CTDB attach "$testdb3" replicated
+
+echo
+echo "Checking if database is attached with correct flags"
+for node in $(seq 0 $lastnode) ; do
+ if [ $node -ne $test_node ] ; then
+ check_db $node $testdb1 ""
+ check_db $node $testdb2 PERSISTENT
+ check_db $node $testdb3 REPLICATED
+ fi
+done
+
+######################################################################
+
+echo
+echo "Start node $test_node"
+ctdb_nodes_start "$test_node"
+sleep 1
+wait_until_ready
+
+echo
+echo "Checking if database is attached with correct flags"
+check_db $test_node $testdb1 ""
+check_db $test_node $testdb2 PERSISTENT
+check_db $test_node $testdb3 REPLICATED
+
+######################################################################
+
+echo
+echo "Restarting node $test_node"
+ctdb_nodes_restart "$test_node"
+sleep 1
+wait_until_ready
+
+echo
+echo "Checking if database is attached with correct flags"
+check_db $test_node $testdb1 ""
+check_db $test_node $testdb2 PERSISTENT
+check_db $test_node $testdb3 REPLICATED
diff --git a/ctdb/tests/INTEGRATION/database/basics.003.detach.sh b/ctdb/tests/INTEGRATION/database/basics.003.detach.sh
new file mode 100755
index 0000000..cb44955
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/database/basics.003.detach.sh
@@ -0,0 +1,166 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb detach' works as expected:
+# 1. Attach test databases
+# 2. Detach test databases
+# 3. Confirm test databases are not attached
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+######################################################################
+
+try_command_on_node 0 "$CTDB listnodes -X | wc -l"
+numnodes="$out"
+
+######################################################################
+
+# Confirm that the database is attached
+check_db_once ()
+{
+ local db="$1"
+
+ local num_db
+
+ try_command_on_node all "$CTDB getdbmap"
+ num_db=$(grep -cF "name:${db}" "$outfile") || true
+ if [ "$num_db" -eq "$numnodes" ]; then
+ return 0
+ else
+ return 1
+ fi
+}
+
+check_db ()
+{
+ local db="$1"
+
+ echo "Waiting until database ${db} is attached on all nodes"
+ wait_until 10 check_db_once "$db"
+}
+
+# Confirm that no nodes have databases attached
+check_no_db_once ()
+{
+ local db="$1"
+
+ local num_db
+
+ try_command_on_node all "$CTDB getdbmap"
+ num_db=$(grep -cF "name:${db}" "$outfile") || true
+ if [ "$num_db" -eq 0 ]; then
+ return 0
+ else
+ return 1
+ fi
+}
+
+check_no_db ()
+{
+ local db="$1"
+
+ echo "Waiting until database ${db} is detached on all nodes"
+ wait_until 10 check_no_db_once "$db"
+}
+
+######################################################################
+
+testdb1="detach_test1.tdb"
+testdb2="detach_test2.tdb"
+testdb3="detach_test3.tdb"
+testdb4="detach_test4.tdb"
+
+echo "Create test databases"
+for db in "$testdb1" "$testdb2" "$testdb3" "$testdb4" ; do
+ echo " $db"
+ try_command_on_node 0 $CTDB attach "$db"
+done
+
+for db in "$testdb1" "$testdb2" "$testdb3" "$testdb4" ; do
+ check_db "$db"
+done
+
+######################################################################
+
+echo
+echo "Ensuring AllowClientDBAttach=1 on all nodes"
+try_command_on_node all $CTDB setvar AllowClientDBAttach 1
+
+echo "Check failure detaching single test database $testdb1"
+try_command_on_node 1 "! $CTDB detach $testdb1"
+check_db "$testdb1"
+
+echo
+echo "Setting AllowClientDBAttach=0 on node 0"
+try_command_on_node 0 $CTDB setvar AllowClientDBAttach 0
+
+echo "Check failure detaching single test database $testdb1"
+try_command_on_node 1 "! $CTDB detach $testdb1"
+check_db "$testdb1"
+
+echo
+echo "Setting AllowClientDBAttach=0 on all nodes"
+try_command_on_node all $CTDB setvar AllowClientDBAttach 0
+
+echo "Check detaching single test database $testdb1"
+try_command_on_node 1 "$CTDB detach $testdb1"
+check_no_db "$testdb1"
+
+######################################################################
+
+echo
+echo "Detach multiple test databases"
+echo " $testdb2, $testdb3, $testdb4"
+try_command_on_node 0 $CTDB detach $testdb2 $testdb3 $testdb4
+
+for db in "$testdb2" "$testdb3" "$testdb4" ; do
+ check_no_db "$db"
+done
+
+######################################################################
+
+echo
+echo "Attach a single test database"
+try_command_on_node all $CTDB setvar AllowClientDBAttach 1
+try_command_on_node 0 $CTDB attach $testdb1
+check_db "$testdb1"
+
+echo
+echo "Write a key to database"
+try_command_on_node 0 $CTDB writekey $testdb1 foo bar
+try_command_on_node 0 $CTDB catdb $testdb1
+num_keys=$(sed -n -e 's/Dumped \([0-9]*\) records/\1/p' "$outfile") || true
+if [ -n "$num_keys" -a $num_keys -eq 1 ]; then
+ echo "GOOD: Key added to database"
+else
+ echo "BAD: Key did not get added to database"
+ cat "$outfile"
+ exit 1
+fi
+
+echo
+echo "Detach test database"
+try_command_on_node all $CTDB setvar AllowClientDBAttach 0
+try_command_on_node 0 $CTDB detach $testdb1
+check_no_db "$testdb1"
+
+echo
+echo "Re-attach test database"
+try_command_on_node all $CTDB setvar AllowClientDBAttach 1
+try_command_on_node 0 $CTDB attach $testdb1
+check_db "$testdb1"
+
+echo
+echo "Check if the database is empty"
+try_command_on_node 0 $CTDB catdb $testdb1
+num_keys=$(sed -n -e 's/Dumped \([0-9]*\) records/\1/p' "$outfile") || true
+if [ -n "$num_keys" -a $num_keys -eq 0 ]; then
+ echo "GOOD: Database $testdb1 is empty"
+else
+ echo "BAD: Database $testdb1 is not empty"
+ cat "$outfile"
+ exit 1
+fi
diff --git a/ctdb/tests/INTEGRATION/database/basics.004.wipe.sh b/ctdb/tests/INTEGRATION/database/basics.004.wipe.sh
new file mode 100755
index 0000000..115d64c
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/database/basics.004.wipe.sh
@@ -0,0 +1,56 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb wipedb' can clear a persistent database:
+# 1. Verify that the status on all of the ctdb nodes is 'OK'.
+# 2. Create a persistent test database
+# 3. Add some records to node 0 and node 1
+# 4. Run wipedb on node 0
+# 5. verify the database is empty on both node 0 and 1
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+try_command_on_node 0 "$CTDB listnodes | wc -l"
+num_nodes="$out"
+
+# 2.
+test_db="persistent_test.tdb"
+echo "Create persistent test database \"$test_db\""
+try_command_on_node 0 $CTDB attach "$test_db" persistent
+
+# 3.
+# add one record to node 0 key==ABC data==ABC
+echo "Store key(ABC) data(ABC) on node 0"
+db_ctdb_tstore 0 "$test_db" "ABC" "ABC"
+
+# add one record to node 1 key==DEF data==DEF
+echo "Store key(DEF) data(DEF) on node 1"
+db_ctdb_tstore 1 "$test_db" "DEF" "DEF"
+
+# 4.
+echo "Wipe database"
+try_command_on_node 0 $CTDB wipedb "$test_db"
+
+# check that the database is wiped
+num_records=$(db_ctdb_cattdb_count_records 1 "$test_db")
+if [ $num_records = "0" ] ; then
+ echo "OK: Database was wiped"
+else
+ echo "BAD: We did not end up with an empty database"
+ exit 1
+fi
+
+echo "Force a recovery"
+try_command_on_node 0 $CTDB recover
+
+# check that the database is wiped
+num_records=$(db_ctdb_cattdb_count_records 1 "$test_db")
+if [ $num_records = "0" ] ; then
+ echo "OK: Database was wiped"
+else
+ echo "BAD: We did not end up with an empty database"
+ exit 1
+fi
diff --git a/ctdb/tests/INTEGRATION/database/basics.010.backup_restore.sh b/ctdb/tests/INTEGRATION/database/basics.010.backup_restore.sh
new file mode 100755
index 0000000..8c469d4
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/database/basics.010.backup_restore.sh
@@ -0,0 +1,97 @@
+#!/usr/bin/env bash
+
+# Confirm that 'ctdb restoredb' works correctly:
+# 1. Create a persistent test database
+# 2. Add some records to test database
+# 3. Backup database
+# 4. Wipe database and verify the database is empty on all nodes
+# 5. Restore database and make sure all the records are restored
+# 6. Make sure no recovery has been triggered
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+try_command_on_node 0 $CTDB status
+generation=$(sed -n -e 's/^Generation:\([0-9]*\)/\1/p' "$outfile")
+
+try_command_on_node 0 "$CTDB listnodes | wc -l"
+num_nodes="$out"
+
+# 2.
+test_db="restoredb_test.tdb"
+test_dump=$(mktemp)
+echo $test_dump
+echo "Create persistent test database \"$test_db\""
+try_command_on_node 0 $CTDB attach "$test_db" persistent
+try_command_on_node 0 $CTDB wipedb "$test_db"
+
+# 3.
+# add 10,000 records to database
+echo "Adding 10000 records to database"
+(
+for i in $(seq 1 10000) ; do
+ echo "\"key$i\" \"value$i\""
+done
+) | try_command_on_node -i 0 $CTDB ptrans "$test_db"
+
+num_records=$(db_ctdb_cattdb_count_records 1 "$test_db")
+if [ $num_records = "10000" ] ; then
+ echo "OK: Records added"
+else
+ echo "BAD: We did not end up with 10000 records"
+ echo "num records = $num_records"
+ exit 1
+fi
+
+ctdb_test_exit_hook_add "rm -f $test_dump"
+
+# 4.
+echo "Backup database"
+try_command_on_node 0 $CTDB backupdb "$test_db" "$test_dump"
+
+# 5.
+echo "Wipe database"
+try_command_on_node 0 $CTDB wipedb "$test_db"
+
+# check that the database is restored
+num_records=$(db_ctdb_cattdb_count_records 1 "$test_db")
+if [ $num_records = "0" ] ; then
+ echo "OK: Database was wiped"
+else
+ echo "BAD: We did not end up with an empty database"
+ echo "num records = $num_records"
+ exit 1
+fi
+
+# 6.
+echo "Restore database"
+try_command_on_node 0 $CTDB restoredb "$test_dump" "$test_db"
+
+# check that the database is restored
+num_records=$(db_ctdb_cattdb_count_records 1 "$test_db")
+if [ $num_records = "10000" ] ; then
+ echo "OK: Database was restored"
+else
+ echo "BAD: We did not end up with 10000 records"
+ echo "num records = $num_records"
+ exit 1
+fi
+
+# 7.
+wait_until_ready
+
+try_command_on_node 0 $CTDB status
+new_generation=$(sed -n -e 's/^Generation:\([0-9]*\)/\1/p' "$outfile")
+
+echo "Old generation = $generation"
+echo "New generation = $new_generation"
+
+if [ "$generation" = "$new_generation" ]; then
+ echo "OK: Database recovery not triggered."
+else
+ echo "BAD: Database recovery triggered."
+ exit 1
+fi
diff --git a/ctdb/tests/INTEGRATION/database/fetch.001.ring.sh b/ctdb/tests/INTEGRATION/database/fetch.001.ring.sh
new file mode 100755
index 0000000..4d7d392
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/database/fetch.001.ring.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+
+# Run the fetch_ring test and sanity check the output
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+try_command_on_node 0 "$CTDB listnodes | wc -l"
+num_nodes="$out"
+
+echo "Running fetch_ring on all $num_nodes nodes."
+testprog_onnode -v -p all \
+ fetch_ring -n "$num_nodes" -D "fetch_ring.tdb" -k "testkey"
+
+pat='^(Waiting for cluster|Fetch\[[[:digit:]]+\]: [[:digit:]]+(\.[[:digit:]]+)? msgs/sec)$'
+sanity_check_output 1 "$pat"
+
+# Get the last line of output.
+last=$(tail -n 1 "$outfile")
+
+# $last should look like this:
+# Fetch[1]: 10670.93 msgs/sec
+stuff="${last##*Fetch\[*\]: }"
+mps="${stuff% msgs/sec*}"
+
+if [ ${mps%.*} -ge 10 ] ; then
+ echo "OK: $mps msgs/sec >= 10 msgs/sec"
+else
+ echo "BAD: $mps msgs/sec < 10 msgs/sec"
+ exit 1
+fi
diff --git a/ctdb/tests/INTEGRATION/database/fetch.002.ring-hotkeys.sh b/ctdb/tests/INTEGRATION/database/fetch.002.ring-hotkeys.sh
new file mode 100755
index 0000000..6d44253
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/database/fetch.002.ring-hotkeys.sh
@@ -0,0 +1,161 @@
+#!/usr/bin/env bash
+
+# Run the fetch_ring test, sanity check the output and check hot keys
+# statistics
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+testdb="fetch_ring.tdb"
+
+ctdb_get_all_pnns
+# $all_pnns is set above
+# shellcheck disable=SC2154
+num_nodes=$(echo "$all_pnns" | wc -w | tr -d '[:space:]')
+first=$(echo "$all_pnns" | sed -n -e '1p')
+
+get_key ()
+{
+ _n="$1"
+
+ echo "testkey${_n}"
+}
+
+run_fetch_ring ()
+{
+ _timelimit="$1"
+ _key_num="$2"
+
+ _key=$(get_key "$_key_num")
+ _base_cmd="fetch_ring -n ${num_nodes} -D ${testdb}"
+ _cmd="${_base_cmd} -t ${_timelimit} -k ${_key}"
+ echo "Running \"${_cmd}\" on all $num_nodes nodes."
+ testprog_onnode -v -p all "$_cmd"
+
+ _pat='^(Waiting for cluster|Fetch\[[[:digit:]]+\]: [[:digit:]]+(\.[[:digit:]]+)? msgs/sec)$'
+ sanity_check_output 1 "$_pat"
+
+ # Get the last line of output.
+ # $outfile is set above by testprog_onnode()
+ # shellcheck disable=SC2154
+ _last=$(tail -n 1 "$outfile")
+
+ # $last should look like this:
+ # Fetch[1]: 10670.93 msgs/sec
+ _stuff="${_last##*Fetch\[*\]: }"
+ _mps="${_stuff% msgs/sec*}"
+
+ if [ "${_mps%.*}" -ge 10 ] ; then
+ echo "OK: ${_mps} msgs/sec >= 10 msgs/sec"
+ else
+ ctdb_test_fail "BAD: ${_mps} msgs/sec < 10 msgs/sec"
+ fi
+}
+
+check_hot_keys ()
+{
+ _pnn="$1"
+ _first_key="$2"
+ _num_keys="$3"
+
+ echo
+ echo "Checking hot keys on node ${_pnn}"
+
+ ctdb_onnode "$_pnn" dbstatistics "$testdb"
+
+ # Get hot keys with a non-empty key
+ _hotkeys=$(grep -Ex '[[:space:]]+Count:[[:digit:]]+ Key:[[:xdigit:]]+' \
+ "$outfile") || true
+
+ # Check that there are the right number of non-empty slots
+ if [ -z "$_hotkeys" ] ; then
+ _num=0
+ else
+ _num=$(echo "$_hotkeys" | wc -l | tr -d '[:space:]')
+ fi
+ _msg="hot key slots in use = ${_num}"
+ if [ "$_num_keys" -ne "$_num" ] ; then
+ echo
+ cat "$outfile"
+ ctdb_test_fail "BAD: ${_msg} (expected ${_num_keys})"
+ fi
+ echo "GOOD: ${_msg}"
+
+ # No hot keys? Done...
+ if [ "$_num" = 0 ] ; then
+ return
+ fi
+
+ # Check that hot key counts are correctly sorted
+ #
+ # Try to be as POSIX as possible
+ # shellcheck disable=SC2001
+ _counts=$(echo "$_hotkeys" | \
+ sed -e 's|.*Count:\([[:digit:]][[:digit:]]*\).*|\1|')
+ _counts_sorted=$(echo "$_counts" | sort -n)
+ if [ "$_counts" != "$_counts_sorted" ] ; then
+ echo
+ cat "$outfile"
+ ctdb_test_fail "BAD: hot keys not sorted"
+ fi
+ echo "GOOD: hot key counts are correctly sorted"
+
+ # Check that all keys are considered hot
+ for _j in $(seq "$_first_key" $((_first_key + _num_keys - 1))) ; do
+ _key=$(get_key "$_j")
+ _key_hex=$(printf '%s' "$_key" | \
+ od -A n -t x1 | \
+ tr -d '[:space:]')
+ if ! echo "$_hotkeys" | grep -q "Key:${_key_hex}\$" ; then
+ echo
+ cat "$outfile"
+ ctdb_test_fail "BAD: key \"${_key}\" is not a hot key"
+ fi
+ done
+ echo "GOOD: all keys are listed as hot keys"
+}
+
+# Run fetch_ring for each of 10 keys. After each run confirm that all
+# keys used so far are considered hot keys (and do other hot key
+# sanity checks) on all nodes.
+for i in $(seq 1 10) ; do
+ run_fetch_ring 5 "$i"
+
+ for pnn in $all_pnns ; do
+ check_hot_keys "$pnn" 1 "$i"
+ done
+
+ echo
+done
+
+echo
+echo "Resetting statistics on node ${first}"
+ctdb_onnode "$first" statisticsreset
+
+# Ensure that only node $first has had statistics reset
+for pnn in $all_pnns ; do
+ if [ "$pnn" = "$first" ] ; then
+ check_hot_keys "$pnn" 1 0
+ else
+ check_hot_keys "$pnn" 1 10
+ fi
+done
+
+echo
+
+# Run fetch_ring for each of 3 new keys. After each run confirm that
+# the new keys used so far are considered hot keys (and do other hot
+# key sanity checks) on node $first.
+#
+# Note that nothing can be said about hot keys on other nodes, since
+# they may be an arbitrary blend of old and new keys.
+for i in $(seq 1 3) ; do
+ run_fetch_ring 5 $((100 + i))
+
+ check_hot_keys 0 101 "$i"
+
+ echo
+done
diff --git a/ctdb/tests/INTEGRATION/database/readonly.001.basic.sh b/ctdb/tests/INTEGRATION/database/readonly.001.basic.sh
new file mode 100755
index 0000000..aeb9740
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/database/readonly.001.basic.sh
@@ -0,0 +1,178 @@
+#!/usr/bin/env bash
+
+# Test support for read-only records
+
+# Read-only records can be activated at runtime using a ctdb command.
+# If read-only records are not activated, then any attempt to fetch a
+# read-only copy should be automatically upgraded to a read-write
+# fetch_locked().
+
+# If read-only delegations are present, then any attempt to acquire a
+# read-write fetch_lock will trigger revocation of all delegations
+# before the fetch_locked().
+
+# 1. Create a test database and some records
+# 2. Try to fetch read-only records, this should not result in any delegations
+# 3. Activate read-only support
+# 4. Try to fetch read-only records, this should result in delegations
+# 5. Do a fetchlock and the delegations should be revoked
+# 6. Try to fetch read-only records, this should result in delegations
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+######################################################################
+
+# Confirm that no nodes have databases with read-only delegations
+check_no_readonly ()
+{
+ try_command_on_node all $CTDB cattdb $testdb
+ local ro_flags="RO_HAVE_READONLY|RO_HAVE_DELEGATIONS"
+ local numreadonly=$(grep -c -E "$ro_flags" "$outfile") || true
+ if [ $numreadonly -eq 0 ] ; then
+ echo "GOOD: no read-only delegations"
+ else
+ echo "BAD: there are read-only delegations"
+ cat "$outfile"
+ exit 1
+ fi
+}
+
+# Check that the test record has the correct read-only flags on the
+# given nodes. The first node is the dmaster, which should know there
+# are delegations but should not be flagged as having a read-only
+# copy. Subsequent nodes should have a read-only copy but not know
+# about any (other) delegations.
+check_readonly ()
+{
+ local dmaster="$1" ; shift
+ local others="$*"
+
+ local count
+
+ try_command_on_node $dmaster $CTDB cattdb $testdb
+ count=$(grep -c -E "RO_HAVE_DELEGATIONS" "$outfile") || true
+ if [ $count -eq 1 ] ; then
+ echo "GOOD: dmaster ${dmaster} has read-only delegations"
+ else
+ echo "BAD: dmaster ${dmaster} has no read-only delegations"
+ cat "$outfile"
+ exit 1
+ fi
+ count=$(grep -c -E "RO_HAVE_READONLY" "$outfile") || true
+ if [ $count -ne 0 ] ; then
+ echo "BAD: dmaster ${dmaster} has a read-only copy"
+ cat "$outfile"
+ exit 1
+ fi
+
+ local o
+ for o in $others ; do
+ try_command_on_node $o $CTDB cattdb $testdb
+ count=$(grep -c -E "RO_HAVE_READONLY" "$outfile") || true
+ if [ $count -eq 1 ] ; then
+ echo "GOOD: node ${o} has a read-only copy"
+ else
+ echo "BAD: node ${o} has no read-only copy"
+ cat "$outfile"
+ exit 1
+ fi
+ count=$(grep -c -E "RO_HAVE_DELEGATIONS" "$outfile") || true
+ if [ $count -ne 0 ] ; then
+ echo "BAD: other node ${o} has read-only delegations"
+ cat "$outfile"
+ exit 1
+ fi
+ done
+}
+
+######################################################################
+
+echo "Get list of nodes..."
+ctdb_onnode 0 "-X listnodes"
+all_nodes=$(awk -F'|' '{print $2}' "$outfile")
+
+######################################################################
+
+testdb="test.tdb"
+echo "Create test database \"${testdb}\""
+try_command_on_node 0 $CTDB attach $testdb
+
+echo "Create some records..."
+try_command_on_node all $CTDB_TEST_WRAPPER $VALGRIND update_record \
+ -D ${testdb} -k testkey
+
+######################################################################
+
+echo "Try some readonly fetches, these should all be upgraded to full fetchlocks..."
+try_command_on_node all $CTDB_TEST_WRAPPER $VALGRIND fetch_readonly \
+ -D ${testdb} -k testkey
+
+check_no_readonly
+
+######################################################################
+
+echo "Activate read-only record support for \"$testdb\"..."
+try_command_on_node all $CTDB setdbreadonly $testdb
+
+# Database should be tagged as READONLY
+try_command_on_node 0 $CTDB getdbmap
+db_details=$(awk -v db="$testdb" '$2 == foo="name:" db { print }' "$outfile")
+if grep -q "READONLY" <<<"$db_details" ; then
+ echo "GOOD: read-only record support is enabled"
+else
+ echo "BAD: could not activate read-only support"
+ echo "$db_details"
+ exit 1
+fi
+
+######################################################################
+
+echo "Create 1 read-only delegation ..."
+# dmaster=1
+try_command_on_node 1 $CTDB_TEST_WRAPPER $VALGRIND update_record \
+ -D ${testdb} -k testkey
+
+# Fetch read-only to node 0
+try_command_on_node 0 $CTDB_TEST_WRAPPER $VALGRIND fetch_readonly \
+ -D ${testdb} -k testkey
+
+check_readonly 1 0
+
+######################################################################
+
+echo "Verify that a fetchlock revokes read-only delegations..."
+# Node 1 becomes dmaster
+try_command_on_node 1 $CTDB_TEST_WRAPPER $VALGRIND update_record \
+ -D ${testdb} -k testkey
+
+check_no_readonly
+
+######################################################################
+
+echo "Create more read-only delegations..."
+dmaster=1
+try_command_on_node $dmaster $CTDB_TEST_WRAPPER $VALGRIND update_record \
+ -D ${testdb} -k testkey
+
+others=""
+for n in $all_nodes ; do
+ if [ "$n" != "$dmaster" ] ; then
+ # Fetch read-only copy to this node
+ try_command_on_node $n $CTDB_TEST_WRAPPER $VALGRIND fetch_readonly \
+ -D ${testdb} -k testkey
+ others="${others} ${n}"
+ fi
+done
+
+check_readonly $dmaster $others
+
+######################################################################
+
+echo "Verify that a recovery will revoke the delegations..."
+try_command_on_node 0 $CTDB recover
+
+check_no_readonly
diff --git a/ctdb/tests/INTEGRATION/database/recovery.001.volatile.sh b/ctdb/tests/INTEGRATION/database/recovery.001.volatile.sh
new file mode 100755
index 0000000..d7aaa3b
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/database/recovery.001.volatile.sh
@@ -0,0 +1,118 @@
+#!/usr/bin/env bash
+
+# Test that recovery correctly handles RSNs
+
+# Recovery can under certain circumstances lead to old record copies
+# resurrecting: Recovery selects the newest record copy purely by RSN. At
+# the end of the recovery, the leader is the dmaster for all
+# records in all (non-persistent) databases. And the other nodes locally
+# hold the complete copy of the databases. The bug is that the recovery
+# process does not increment the RSN on the leader at the end of
+# the recovery. Now clients acting directly on the leader will
+# directly change a record's content on the leader without migration
+# and hence without RSN bump. So a subsequent recovery can not tell that
+# the leader's copy is newer than the copies on the other nodes, since
+# their RSN is the same. Hence, if the leader is not node 0 (or more
+# precisely not the active node with the lowest node number), the recovery
+# will choose copies from nodes with lower number and stick to these.
+
+# 1. Create a test database
+# 2. Add a record with value value1 on leader
+# 3. Force a recovery
+# 4. Update the record with value value2 on leader
+# 5. Force a recovery
+# 6. Confirm that the value is value2
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+#
+# Main test
+#
+TESTDB="rec_test.tdb"
+
+status=0
+
+# Make sure node 0 is not the leader
+echo "find out which node is leader"
+ctdb_onnode 0 leader
+leader="$out"
+if [ "$leader" = "0" ]; then
+ echo "node 0 is leader, disable leader role on node 0"
+ #
+ # Note:
+ # It should be sufficient to run "ctdb setleaderrole off"
+ # on node 0 and wait for election and recovery to finish.
+ # But there were problems related to this in this automatic
+ # test, so for now use "ctdb stop" and "ctdb continue".
+ #
+ echo "stop node 0"
+ try_command_on_node 0 $CTDB stop
+ wait_until_node_has_status 0 stopped
+ echo "continue node 0"
+ try_command_on_node 0 $CTDB continue
+ wait_until_node_has_status 0 notstopped
+
+ ctdb_onnode 0 leader
+ leader="$out"
+ if [ "$leader" = "0" ]; then
+ echo "failed to move leader to different node"
+ exit 1
+ fi
+fi
+
+echo "Leader:${leader}"
+
+# Create a temporary non-persistent database to test with
+echo "create test database $TESTDB"
+ctdb_onnode "$leader" attach "$TESTDB"
+
+# Wipe Test database
+echo "wipe test database"
+ctdb_onnode "$leader" wipedb "$TESTDB"
+
+# Add a record key=test1 data=value1
+echo "store key(test1) data(value1)"
+ctdb_onnode "$leader" writekey "$TESTDB" test1 value1
+
+# Fetch a record key=test1
+echo "read key(test1)"
+ctdb_onnode "$leader" readkey "$TESTDB" test1
+cat "$outfile"
+
+# Do a recovery
+echo "force recovery"
+ctdb_onnode "$leader" recover
+
+wait_until_node_has_status "$leader" recovered
+
+# Add a record key=test1 data=value2
+echo "store key(test1) data(value2)"
+ctdb_onnode "$leader" writekey "$TESTDB" test1 value2
+
+# Fetch a record key=test1
+echo "read key(test1)"
+ctdb_onnode "$leader" readkey "$TESTDB" test1
+cat "$outfile"
+
+# Do a recovery
+echo "force recovery"
+ctdb_onnode "$leader" recover
+
+wait_until_node_has_status "$leader" recovered
+
+# Verify record key=test1
+echo "read key(test1)"
+ctdb_onnode "$leader" readkey "$TESTDB" test1
+cat "$outfile"
+if [ "$out" = "Data: size:6 ptr:[value2]" ]; then
+ echo "GOOD: Recovery did not corrupt database"
+else
+ echo "BAD: Recovery corrupted database"
+ status=1
+fi
+
+exit $status
diff --git a/ctdb/tests/INTEGRATION/database/recovery.002.large.sh b/ctdb/tests/INTEGRATION/database/recovery.002.large.sh
new file mode 100755
index 0000000..4736071
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/database/recovery.002.large.sh
@@ -0,0 +1,106 @@
+#!/usr/bin/env bash
+
+# Test recovery of large volatile and persistent databases
+
+# Recovery now uses DB_PULL and DB_PUSH_START/DB_PUSH_CONFIRM
+# controls. This sends the records in batches of ~RecBufferSizeLimit
+# in size at a time. Test that large databases are re-assembled
+# correctly.
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+#
+# Main test
+#
+TEST1DB="large_persistent_db.tdb"
+TEST2DB="large_volatile_db.tdb"
+RECDATA=$(onnode 0 mktemp)
+
+# Create a persistent database to test
+echo "create persistent test database $TEST1DB"
+try_command_on_node 0 $CTDB attach $TEST1DB persistent
+
+# Wipe Test database
+echo "wipe test database $TEST1DB"
+try_command_on_node 0 $CTDB wipedb $TEST1DB
+
+# Create dummy record data
+echo "creating dummy record data"
+onnode 0 dd if=/dev/urandom of=$RECDATA bs=10K count=1
+
+# Add 345 records
+echo "Adding 345 records"
+for i in $(seq 1 345) ; do
+ try_command_on_node 0 $CTDB pstore $TEST1DB record$i $RECDATA || exit 1
+done
+
+num_records=$(db_ctdb_cattdb_count_records 0 $TEST1DB)
+if [ $num_records = "345" ] ; then
+ echo "OK: records added correctly"
+else
+ echo "BAD: persistent database has $num_records of 345 records"
+ try_command_on_node -v 0 "$CTDB cattdb $TEST1DB | tail -n 1"
+ exit 1
+fi
+
+# Create a volatile database to test
+echo "create volatile test database $TEST2DB"
+try_command_on_node 0 $CTDB attach $TEST2DB
+
+# Wipe Test database
+echo "wipe test database $TEST2DB"
+try_command_on_node 0 $CTDB wipedb $TEST2DB
+
+# Create dummy record data
+v1="1234567890"
+v2="$v1$v1$v1$v1$v1$v1$v1$v1$v1$v1"
+v3="$v2$v2$v2$v2$v2$v2$v2$v2$v2$v2"
+
+# Add 1234 records
+echo "Adding 1234 records"
+for i in $(seq 1 1234) ; do
+ try_command_on_node 0 $CTDB writekey $TEST2DB record$i $v3 || exit 1
+done
+
+num_records=$(db_ctdb_cattdb_count_records 0 $TEST2DB)
+if [ $num_records = "1234" ] ; then
+ echo "OK: records added correctly"
+else
+ echo "BAD: volatile database has $num_records of 1234 records"
+ try_command_on_node -v 0 "$CTDB cattdb $TEST2DB | tail -n 1"
+ exit 1
+fi
+
+echo
+leader_get 0
+# Set RecBufferSizeLimit to 10000
+ctdb_onnode "$leader" setvar RecBufferSizeLimit 10000
+
+# Do a recovery
+echo "force recovery"
+try_command_on_node 0 $CTDB recover
+
+wait_until_node_has_status 0 recovered 30
+
+# check that there are correct number of records
+num_records=$(db_ctdb_cattdb_count_records 0 $TEST1DB)
+if [ $num_records = "345" ] ; then
+ echo "OK: persistent database recovered correctly"
+else
+ echo "BAD: persistent database has $num_records of 345 records"
+ try_command_on_node -v 0 "$CTDB cattdb $TEST1DB | tail -n 1"
+ exit 1
+fi
+
+num_records=$(db_ctdb_cattdb_count_records 0 $TEST2DB)
+if [ $num_records = "1234" ] ; then
+ echo "OK: volatile database recovered correctly"
+else
+ echo "BAD: volatile database has $num_records of 1234 records"
+ try_command_on_node -v 0 "$CTDB cattdb $TEST2DB | tail -n 1"
+ exit 1
+fi
diff --git a/ctdb/tests/INTEGRATION/database/recovery.003.no_resurrect.sh b/ctdb/tests/INTEGRATION/database/recovery.003.no_resurrect.sh
new file mode 100755
index 0000000..b314d4d
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/database/recovery.003.no_resurrect.sh
@@ -0,0 +1,63 @@
+#!/usr/bin/env bash
+
+# Ensure recovery doesn't resurrect deleted records from recently
+# inactive nodes
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+testdb="rec_test.tdb"
+
+echo "Getting list of nodes..."
+ctdb_get_all_pnns
+
+first=$(echo "$all_pnns" | sed -n -e '1p')
+second=$(echo "$all_pnns" | sed -n -e '2p')
+notfirst=$(echo "$all_pnns" | tail -n +2)
+
+echo "Create/wipe test database ${testdb}"
+try_command_on_node $first $CTDB attach "$testdb"
+try_command_on_node $first $CTDB wipedb "$testdb"
+
+echo "store key(test1) data(value1)"
+try_command_on_node $first $CTDB writekey "$testdb" test1 value1
+
+echo "Migrate key(test1) to all nodes"
+try_command_on_node all $CTDB readkey "$testdb" test1
+
+echo "Stop node ${first}"
+try_command_on_node $first $CTDB stop
+wait_until_node_has_status $first stopped
+
+echo "Delete key(test1)"
+try_command_on_node $second $CTDB deletekey "$testdb" test1
+
+database_has_zero_records ()
+{
+ # shellcheck disable=SC2086
+ # $notfirst can be multi-word
+ check_cattdb_num_records "$testdb" 0 "$notfirst"
+}
+
+echo "Trigger a recovery"
+try_command_on_node "$second" $CTDB recover
+
+echo "Checking that database has 0 records"
+database_has_zero_records
+
+echo "Continue node ${first}"
+try_command_on_node $first $CTDB continue
+wait_until_node_has_status $first notstopped
+
+echo "Get database contents"
+try_command_on_node -v $first $CTDB catdb "$testdb"
+
+if grep -q '^key(' "$outfile" ; then
+ echo "BAD: Deleted record has been resurrected"
+ exit 1
+fi
+
+echo "GOOD: Deleted record is still gone"
diff --git a/ctdb/tests/INTEGRATION/database/recovery.010.persistent.sh b/ctdb/tests/INTEGRATION/database/recovery.010.persistent.sh
new file mode 100755
index 0000000..d13a9a5
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/database/recovery.010.persistent.sh
@@ -0,0 +1,103 @@
+#!/usr/bin/env bash
+
+# Ensure that persistent databases are correctly recovered by database
+# sequence number
+#
+# 1. Create and wipe a persistent test database
+# 2. Directly add a single record to the database on each node
+# 3. Trigger a recover
+# 4. Ensure that the database contains only a single record
+#
+# Repeat but with sequence numbers set by hand on each node
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+try_command_on_node 0 "$CTDB listnodes | wc -l"
+num_nodes="$out"
+
+add_record_per_node ()
+{
+ _i=0
+ while [ $_i -lt $num_nodes ] ; do
+ _k="KEY${_i}"
+ _d="DATA${_i}"
+ echo "Store key(${_k}) data(${_d}) on node ${_i}"
+ db_ctdb_tstore $_i "$test_db" "$_k" "$_d"
+ _i=$(($_i + 1))
+ done
+}
+
+test_db="persistent_test.tdb"
+echo "Create persistent test database \"$test_db\""
+try_command_on_node 0 $CTDB attach "$test_db" persistent
+
+# 3,
+# If no __db_sequence_number__ recover whole database
+#
+
+echo
+echo "Test that no __db_sequence_number__ does not blend the database during recovery"
+
+# wipe database
+echo "Wipe the test database"
+try_command_on_node 0 $CTDB wipedb "$test_db"
+
+add_record_per_node
+
+# force a recovery
+echo force a recovery
+try_command_on_node 0 $CTDB recover
+
+# Check that we now have 1 record on node 0
+num_records=$(db_ctdb_cattdb_count_records 0 "$test_db")
+if [ $num_records = "1" ] ; then
+ echo "OK: databases were not blended"
+else
+ echo "BAD: we did not end up with the expected single record after the recovery"
+ exit 1
+fi
+
+
+# 4,
+# If __db_sequence_number__ recover whole database
+#
+
+echo
+echo test that __db_sequence_number__ does not blend the database during recovery
+
+# wipe database
+echo wipe the test database
+try_command_on_node 0 $CTDB wipedb persistent_test.tdb
+
+add_record_per_node
+
+echo "Add __db_sequence_number__==5 record to all nodes"
+pnn=0
+while [ $pnn -lt $num_nodes ] ; do
+ db_ctdb_tstore_dbseqnum $pnn "$test_db" 5
+ pnn=$(($pnn + 1))
+done
+
+echo "Set __db_sequence_number__ to 7 on node 0"
+db_ctdb_tstore_dbseqnum 0 "$test_db" 7
+
+echo "Set __db_sequence_number__ to 8 on node 1"
+db_ctdb_tstore_dbseqnum 1 "$test_db" 8
+
+
+# force a recovery
+echo force a recovery
+try_command_on_node 0 $CTDB recover
+
+# check that we now have both records on node 0
+num_records=$(db_ctdb_cattdb_count_records 0 "$test_db")
+if [ $num_records = "1" ] ; then
+ echo "OK: databases were not blended"
+else
+ echo "BAD: we did not end up with the expected single record after the recovery"
+ exit 1
+fi
diff --git a/ctdb/tests/INTEGRATION/database/recovery.011.continue.sh b/ctdb/tests/INTEGRATION/database/recovery.011.continue.sh
new file mode 100755
index 0000000..995b282
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/database/recovery.011.continue.sh
@@ -0,0 +1,73 @@
+#!/usr/bin/env bash
+
+# Confirm that the deleted records are not resurrected after recovery
+#
+# 1. Create a persistent database
+# 2. Add a record and update it few times.
+# 3. Delete the record
+# 4. Use "ctdb stop" to stop one of the nodes
+# 5. Add a record with same key.
+# 6. Continue on the stopped node
+# 7. Confirm that the record still exists
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+do_test()
+{
+# Wipe Test database
+echo "wipe test database"
+try_command_on_node 0 $CTDB wipedb $TESTDB
+
+# Add a record key=test1 data=value1
+# and update values
+for value in value1 value2 value3 value4 value5 ; do
+ echo "store key(test1) data($value)"
+ echo "\"test1\" \"$value\"" | try_command_on_node -i 0 $CTDB ptrans "$TESTDB"
+done
+
+# Delete record
+echo "delete key(test1)"
+try_command_on_node 0 $CTDB pdelete $TESTDB test1
+
+# Stop a node
+echo "stop node 1"
+try_command_on_node 1 $CTDB stop
+
+wait_until_node_has_status 1 stopped
+
+# Add a record key=test1 data=value2
+echo "store key(test1) data(newvalue1)"
+echo '"test1" "newvalue1"' | try_command_on_node -i 0 $CTDB ptrans "$TESTDB"
+
+# Continue node
+echo "continue node 1"
+try_command_on_node 1 $CTDB continue
+
+wait_until_node_has_status 1 notstopped
+
+}
+
+#
+# Main test
+#
+TESTDB="persistent_test.tdb"
+
+status=0
+
+# Create a temporary persistent database to test with
+echo "create persistent test database $TESTDB"
+try_command_on_node 0 $CTDB attach $TESTDB persistent
+
+do_test
+if try_command_on_node 0 $CTDB pfetch $TESTDB test1 ; then
+ echo "GOOD: Record was not deleted (recovery by sequence number worked)"
+else
+ echo "BAD: Record was deleted"
+ status=1
+fi
+
+exit $status
diff --git a/ctdb/tests/INTEGRATION/database/scripts/local.bash b/ctdb/tests/INTEGRATION/database/scripts/local.bash
new file mode 100644
index 0000000..ae2e0d5
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/database/scripts/local.bash
@@ -0,0 +1,116 @@
+# Hey Emacs, this is a -*- shell-script -*- !!! :-)
+
+check_cattdb_num_records ()
+{
+ local db="$1"
+ local num="$2"
+ local nodes="$3"
+
+ # $nodes has embedded newlines - put list on 1 line for printing
+ local t
+ t=$(echo "$nodes" | xargs)
+ echo "Confirm that ${db} has ${num} record(s) on node(s): ${t}"
+
+ local ret=0
+ local node
+ for node in $nodes ; do
+ local num_found
+
+ num_found=$(db_ctdb_cattdb_count_records "$node" "$db")
+ if [ "$num_found" = "$num" ] ; then
+ continue
+ fi
+
+ printf 'BAD: %s on node %d has %d record(s), expected %d\n' \
+ "$db" "$node" "$num_found" "$num"
+ ctdb_onnode -v "$node" "cattdb $db"
+ ret=1
+ done
+
+ return $ret
+}
+
+_key_dmaster_check ()
+{
+ local node="$1"
+ local db="$2"
+ local key="$3"
+ local dmaster="${4:-${node}}"
+
+ testprog_onnode "$node" "ctdb-db-test local-read ${db} ${key}"
+
+ # shellcheck disable=SC2154
+ # $outfile is set above by try_command_on_node()
+ grep -Fqx "dmaster: ${dmaster}" "$outfile"
+}
+
+_key_dmaster_fail ()
+{
+ local dmaster="$1"
+
+ echo "BAD: node ${dmaster} is not dmaster"
+ # shellcheck disable=SC2154
+ # $outfile is set by the caller via _key_dmaster_check()
+ cat "$outfile"
+ ctdb_test_fail
+}
+
+vacuum_test_key_dmaster ()
+{
+ local node="$1"
+ local db="$2"
+ local key="$3"
+ local dmaster="${4:-${node}}"
+
+ if ! _key_dmaster_check "$node" "$db" "$key" "$dmaster" ; then
+ _key_dmaster_fail "$dmaster"
+ fi
+}
+
+vacuum_test_wait_key_dmaster ()
+{
+ local node="$1"
+ local db="$2"
+ local key="$3"
+ local dmaster="${4:-${node}}"
+
+ if ! wait_until 30 \
+ _key_dmaster_check "$node" "$db" "$key" "$dmaster" ; then
+ _key_dmaster_fail "$dmaster"
+ fi
+}
+
+vacuum_confirm_key_empty_dmaster ()
+{
+ local node="$1"
+ local db="$2"
+ local key="$3"
+ local dmaster="${4:-${node}}"
+
+ echo "Confirm record key=\"${key}\" is empty and dmaster=${dmaster}"
+
+ vacuum_test_key_dmaster "$node" "$db" "$key" "$dmaster"
+
+ if ! grep -Fqx 'data(0) = ""' "$outfile" ; then
+ echo "BAD: record not empty"
+ cat "$outfile"
+ ctdb_test_fail
+ fi
+}
+
+db_confirm_key_has_value ()
+{
+ local node="$1"
+ local db="$2"
+ local key="$3"
+ local val="$4"
+
+ local out
+
+ ctdb_onnode "$node" "readkey ${db} ${key}"
+ outv=$(echo "$out" | sed -n 's|^Data: size:.* ptr:\[\(.*\)\]$|\1|p')
+ if [ "$val" != "$outv" ] ; then
+ ctdb_test_fail \
+ "BAD: value for \"${key}\"=\"${outv}\" (not \"${val}\")"
+ fi
+}
diff --git a/ctdb/tests/INTEGRATION/database/transaction.001.ptrans.sh b/ctdb/tests/INTEGRATION/database/transaction.001.ptrans.sh
new file mode 100755
index 0000000..556e523
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/database/transaction.001.ptrans.sh
@@ -0,0 +1,110 @@
+#!/usr/bin/env bash
+
+# Verify that the 'ctdb ptrans' works as expected
+#
+# Pipe some operation to ctdb ptrans and validate the TDB contents
+# with ctdb catdb
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+TESTDB="ptrans_test.tdb"
+
+# Create a temporary persistent database to test with
+echo "create persistent test database $TESTDB"
+try_command_on_node 0 $CTDB attach $TESTDB persistent
+
+# Wipe Test database
+echo "wipe test database"
+try_command_on_node 0 $CTDB wipedb $TESTDB
+
+##########
+
+echo "Adding 3 records"
+
+items='
+"key1" "value1"
+"key2" "value1"
+"key3" "value1"'
+
+echo "$items" | try_command_on_node -i 0 $CTDB ptrans "$TESTDB"
+
+try_command_on_node 0 $CTDB catdb "$TESTDB"
+
+n=$(grep -c '^key.*= "key.*"' "$outfile" || true)
+
+if [ $n -ne 3 ] ; then
+ echo "BAD: expected 3 keys in..."
+ cat "$outfile"
+ exit 1
+else
+ echo "GOOD: 3 records were inserted"
+fi
+
+##########
+
+echo "Deleting 1 record, updating 1, adding 1 new record, 1 bogus input line"
+
+items='
+"key1" ""
+"key2" "value2"
+"key3"
+"key4" "value1"'
+
+echo "$items" | try_command_on_node -i 0 $CTDB ptrans "$TESTDB"
+
+try_command_on_node 0 $CTDB catdb "$TESTDB"
+
+n=$(grep -c '^key.*= "key.*"' "$outfile" || true)
+
+if [ $n -ne 3 ] ; then
+ echo "BAD: expected 3 keys in..."
+ cat "$outfile"
+ exit 1
+else
+ echo "GOOD: 3 records found"
+fi
+
+##########
+
+echo "Verifying records"
+
+while read key value ; do
+ try_command_on_node 0 $CTDB pfetch "$TESTDB" "$key"
+ if [ "$value" != "$out" ] ; then
+ echo "BAD: for key \"$key\" expected \"$value\" but got \"$out\""
+ exit 1
+ else
+ echo "GOOD: for key \"$key\" got \"$out\""
+ fi
+done <<EOF
+key2 value2
+key3 value1
+key4 value1
+EOF
+
+##########
+
+echo "Deleting all records"
+
+items='
+"key2" ""
+"key3" ""
+"key4" ""'
+
+echo "$items" | try_command_on_node -i 0 $CTDB ptrans "$TESTDB"
+
+try_command_on_node 0 $CTDB catdb "$TESTDB"
+
+n=$(grep -c '^key.*= "key.*"' "$outfile" || true)
+
+if [ $n -ne 0 ] ; then
+ echo "BAD: expected 0 keys in..."
+ cat "$outfile"
+ exit 1
+else
+ echo "GOOD: 0 records found"
+fi
diff --git a/ctdb/tests/INTEGRATION/database/transaction.002.loop.sh b/ctdb/tests/INTEGRATION/database/transaction.002.loop.sh
new file mode 100755
index 0000000..d633c7c
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/database/transaction.002.loop.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+
+# Verify that the transaction_loop test succeeds
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+TESTDB="persistent_trans.tdb"
+
+try_command_on_node 0 "$CTDB attach $TESTDB persistent"
+try_command_on_node 0 "$CTDB wipedb $TESTDB"
+
+try_command_on_node 0 "$CTDB listnodes | wc -l"
+num_nodes="$out"
+
+if [ -z "$CTDB_TEST_TIMELIMIT" ] ; then
+ CTDB_TEST_TIMELIMIT=30
+fi
+
+t="$CTDB_TEST_WRAPPER $VALGRIND transaction_loop \
+ -n ${num_nodes} -t ${CTDB_TEST_TIMELIMIT} \
+ -D ${TESTDB} -T persistent -k testkey"
+
+echo "Running transaction_loop on all $num_nodes nodes."
+try_command_on_node -v -p all "$t"
diff --git a/ctdb/tests/INTEGRATION/database/transaction.003.loop_recovery.sh b/ctdb/tests/INTEGRATION/database/transaction.003.loop_recovery.sh
new file mode 100755
index 0000000..05aadba
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/database/transaction.003.loop_recovery.sh
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+
+# Verify that the transaction_loop test succeeds with recoveries.
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+recovery_loop()
+{
+ local COUNT=1
+
+ while true ; do
+ echo Recovery $COUNT
+ try_command_on_node 0 $CTDB recover
+ sleep 2
+ COUNT=$((COUNT + 1))
+ done
+}
+
+recovery_loop_start()
+{
+ recovery_loop >/dev/null &
+ RECLOOP_PID=$!
+ ctdb_test_exit_hook_add "kill $RECLOOP_PID >/dev/null 2>&1"
+}
+
+TESTDB="persistent_trans.tdb"
+
+try_command_on_node 0 "$CTDB attach $TESTDB persistent"
+try_command_on_node 0 "$CTDB wipedb $TESTDB"
+
+try_command_on_node 0 "$CTDB listnodes | wc -l"
+num_nodes="$out"
+
+if [ -z "$CTDB_TEST_TIMELIMIT" ] ; then
+ CTDB_TEST_TIMELIMIT=30
+fi
+
+t="$CTDB_TEST_WRAPPER $VALGRIND transaction_loop \
+ -n ${num_nodes} -t ${CTDB_TEST_TIMELIMIT} \
+ -D ${TESTDB} -T persistent -k testkey"
+
+echo "Starting recovery loop"
+recovery_loop_start
+
+echo "Running transaction_loop on all $num_nodes nodes."
+try_command_on_node -v -p all "$t"
diff --git a/ctdb/tests/INTEGRATION/database/transaction.004.update_record.sh b/ctdb/tests/INTEGRATION/database/transaction.004.update_record.sh
new file mode 100755
index 0000000..528303a
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/database/transaction.004.update_record.sh
@@ -0,0 +1,80 @@
+#!/usr/bin/env bash
+
+# Verify that "ctdb update_record_persistent" creates new records and
+# updates existing records in a persistent database
+#
+# 1. Create and wipe a persistent test database
+# 2. Do a recovery
+# 3. Confirm that the database is empty
+# 4. Create a new record using "ctdb update_record_persistent"
+# 5. Confirm the record exists in the database using "ctdb cattdb"
+# 6. Update the record's value using "ctdb update_record_persistent"
+# 7. Confirm that the original value no longer exists using "ctdb cattdb"
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+try_command_on_node 0 "$CTDB listnodes | wc -l"
+num_nodes="$out"
+
+test_db="persistent_test.tdb"
+
+# create a temporary persistent database to test with
+echo "Create persistent test database \"$test_db\""
+try_command_on_node 0 $CTDB attach "$test_db" persistent
+
+
+# 3.
+echo "Wipe the persistent test database"
+try_command_on_node 0 $CTDB wipedb "$test_db"
+echo "Force a recovery"
+try_command_on_node 0 $CTDB recover
+
+# check that the database is wiped
+num_records=$(db_ctdb_cattdb_count_records 1 "$test_db")
+if [ $num_records = "0" ] ; then
+ echo "OK: database was wiped"
+else
+ echo "BAD: we did not end up with an empty database"
+ exit 1
+fi
+
+# 4.
+echo "Create a new record in the persistent database using UPDATE_RECORD"
+try_command_on_node 0 $CTDB_TEST_WRAPPER $VALGRIND update_record_persistent \
+ -D "$test_db" -k "Update_Record_Persistent" -v "FirstValue"
+
+try_command_on_node 0 "$CTDB cattdb "$test_db" | grep 'FirstValue' | wc -l"
+if [ "$out" = 1 ] ; then
+ echo "GOOD: we did not find the record after the create/update"
+else
+ echo "BAD: we did find the record after the create/update"
+ exit 1
+fi
+
+# 5.
+echo Modify an existing record in the persistent database using UPDATE_RECORD
+try_command_on_node 0 $CTDB_TEST_WRAPPER $VALGRIND update_record_persistent \
+ -D "$test_db" -k "Update_Record_Persistent" -v "SecondValue"
+
+try_command_on_node 0 "$CTDB cattdb "$test_db" | grep 'FirstValue' | wc -l"
+if [ "$out" = 0 ] ; then
+ echo "GOOD: did not find old record after the modify/update"
+else
+ echo "BAD: we still found the old record after the modify/update"
+ exit 1
+fi
+
+try_command_on_node 0 "$CTDB cattdb "$test_db" | grep 'SecondValue' | wc -l"
+if [ "$out" = 1 ] ; then
+ echo "GOOD: found the record after the modify/update"
+else
+ echo "BAD: could not find the record after the modify/update"
+ exit 1
+fi
+
+echo "Wipe the persistent test databases and clean up"
+try_command_on_node 0 $CTDB wipedb "$test_db"
diff --git a/ctdb/tests/INTEGRATION/database/transaction.010.loop_recovery.sh b/ctdb/tests/INTEGRATION/database/transaction.010.loop_recovery.sh
new file mode 100755
index 0000000..9de6c34
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/database/transaction.010.loop_recovery.sh
@@ -0,0 +1,51 @@
+#!/usr/bin/env bash
+
+# Verify that the transaction_loop test succeeds with recoveries for
+# replicated databases
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+recovery_loop()
+{
+ local COUNT=1
+
+ while true ; do
+ echo Recovery $COUNT
+ try_command_on_node 0 $CTDB recover
+ sleep 2
+ COUNT=$((COUNT + 1))
+ done
+}
+
+recovery_loop_start()
+{
+ recovery_loop >/dev/null &
+ RECLOOP_PID=$!
+ ctdb_test_exit_hook_add "kill $RECLOOP_PID >/dev/null 2>&1"
+}
+
+TESTDB="replicated_trans.tdb"
+
+try_command_on_node 0 "$CTDB attach $TESTDB replicated"
+try_command_on_node 0 "$CTDB wipedb $TESTDB"
+
+try_command_on_node 0 "$CTDB listnodes | wc -l"
+num_nodes="$out"
+
+if [ -z "$CTDB_TEST_TIMELIMIT" ] ; then
+ CTDB_TEST_TIMELIMIT=30
+fi
+
+t="$CTDB_TEST_WRAPPER $VALGRIND transaction_loop \
+ -n ${num_nodes} -t ${CTDB_TEST_TIMELIMIT} \
+ -D ${TESTDB} -T replicated -k testkey"
+
+echo "Starting recovery loop"
+recovery_loop_start
+
+echo "Running transaction_loop on all $num_nodes nodes."
+try_command_on_node -v -p all "$t"
diff --git a/ctdb/tests/INTEGRATION/database/traverse.001.one.sh b/ctdb/tests/INTEGRATION/database/traverse.001.one.sh
new file mode 100755
index 0000000..1b3b7c2
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/database/traverse.001.one.sh
@@ -0,0 +1,116 @@
+#!/usr/bin/env bash
+
+# Confirm that traverses of volatile databases work as expected
+
+# This is a very simple example. It writes a single record, updates it
+# on another node and then confirms that the correct value is found when
+# traversing. It then repeats this after removing the LMASTER role from
+# the node where the value is updated.
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+#
+# Main test
+#
+TESTDB="traverse_db.tdb"
+
+echo "create volatile test database $TESTDB"
+try_command_on_node 0 $CTDB attach "$TESTDB"
+
+echo "wipe test database $TESTDB"
+try_command_on_node 0 $CTDB wipedb "$TESTDB"
+
+echo "write foo=bar0 on node 0"
+try_command_on_node 0 $CTDB writekey "$TESTDB" "foo" "bar0"
+
+echo "write foo=bar1 on node 1"
+try_command_on_node 1 $CTDB writekey "$TESTDB" "foo" "bar1"
+
+echo
+
+check_db_num_records ()
+{
+ local node="$1"
+ local db="$2"
+ local n="$3"
+
+ echo "Checking on node ${node} to ensure ${db} has ${n} records..."
+ try_command_on_node "$node" "${CTDB} catdb ${db}"
+
+ num=$(sed -n -e 's|^Dumped \(.*\) records$|\1|p' "$outfile")
+ if [ "$num" = "$n" ] ; then
+ echo "OK: Number of records=${num}"
+ echo
+ else
+ echo "BAD: There were ${num} (!= ${n}) records"
+ cat "$outfile"
+ exit 1
+ fi
+}
+
+check_db_num_records 0 "$TESTDB" 1
+check_db_num_records 1 "$TESTDB" 1
+
+cat <<EOF
+
+Again, this time with 10 records, rewriting 5 of them on the 2nd node
+
+EOF
+
+echo "wipe test database $TESTDB"
+try_command_on_node 0 $CTDB wipedb "$TESTDB"
+
+for i in $(seq 0 9) ; do
+ k="foo${i}"
+ v="bar${i}@0"
+ echo "write ${k}=${v} on node 0"
+ try_command_on_node 0 "${CTDB} writekey ${TESTDB} ${k} ${v}"
+done
+
+for i in $(seq 1 5) ; do
+ k="foo${i}"
+ v="bar${i}@1"
+ echo "write ${k}=${v} on node 1"
+ try_command_on_node 1 "${CTDB} writekey ${TESTDB} ${k} ${v}"
+done
+
+check_db_num_records 0 "$TESTDB" 10
+check_db_num_records 1 "$TESTDB" 10
+
+cat <<EOF
+
+Again, this time with lmaster role off on node 1
+
+EOF
+
+echo "wipe test database $TESTDB"
+try_command_on_node 0 $CTDB wipedb "$TESTDB"
+
+echo "switching off lmaster role on node 1"
+try_command_on_node 1 $CTDB setlmasterrole off
+
+try_command_on_node -v 1 $CTDB getcapabilities
+
+wait_until_node_has_status 1 notlmaster 10 0
+
+echo "write foo=bar0 on node 0"
+try_command_on_node 0 $CTDB writekey "$TESTDB" "foo" "bar0"
+
+echo "write foo=bar1 on node 1"
+try_command_on_node 1 $CTDB writekey "$TESTDB" "foo" "bar1"
+
+echo
+
+check_db_num_records 0 "$TESTDB" 1
+check_db_num_records 1 "$TESTDB" 1
+
+if grep -q "^data(4) = \"bar1\"\$" "$outfile" ; then
+ echo "OK: Data from node 1 was returned"
+else
+ echo "BAD: Data from node 1 was not returned"
+ exit 1
+fi
diff --git a/ctdb/tests/INTEGRATION/database/traverse.002.many.sh b/ctdb/tests/INTEGRATION/database/traverse.002.many.sh
new file mode 100755
index 0000000..fb0dc98
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/database/traverse.002.many.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+
+# Test cluster wide traverse code
+#
+# 1. Create a volatile test database
+# 2. Add records on different nodes
+# 3. Use "ctdb catdb" to confirm that all added records are present
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+try_command_on_node 0 "$CTDB listnodes"
+num_nodes=$(echo "$out" | wc -l)
+
+num_records=1000
+
+TESTDB="traverse_test.tdb"
+
+echo "create test database $TESTDB"
+try_command_on_node 0 $CTDB attach $TESTDB
+
+echo "wipe test database $TESTDB"
+try_command_on_node 0 $CTDB wipedb $TESTDB
+
+echo "Add $num_records records to database"
+i=0
+while [ $i -lt $num_records ]; do
+ key=$(printf "key-%04x" $i)
+ value="value-$i"
+
+ n=$[ $i % $num_nodes ]
+ try_command_on_node $n $CTDB writekey $TESTDB $key $value
+
+ i=$[ $i + 1 ]
+done
+
+echo "Start a traverse and collect records"
+try_command_on_node 0 $CTDB catdb $TESTDB
+
+num_read=$(tail -n 1 "$outfile" | cut -d\ -f2)
+if [ $num_read -eq $num_records ]; then
+ echo "GOOD: All $num_records records retrieved"
+ status=0
+else
+ echo "BAD: Only $num_read/$num_records records retrieved"
+ status=1
+fi
+
+exit $status
diff --git a/ctdb/tests/INTEGRATION/database/vacuum.001.fast.sh b/ctdb/tests/INTEGRATION/database/vacuum.001.fast.sh
new file mode 100755
index 0000000..27a2225
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/database/vacuum.001.fast.sh
@@ -0,0 +1,159 @@
+#!/usr/bin/env bash
+
+# Ensure that vacuuming deletes records on all nodes
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+vacuum_test ()
+{
+ local db="$1"
+ local num_records="$2"
+ local delete_from_lmaster="${3:-false}"
+
+ local t
+ if "$delete_from_lmaster" ; then
+ t="lmaster"
+ else
+ t="non-lmaster"
+ fi
+
+ echo
+ echo '............................................................'
+ printf 'Creating %d record(s)\n' "$num_records"
+ printf 'Testing vacuuming of 1 record deleted from %s\n' "$t"
+ echo '............................................................'
+
+ echo
+ echo "Stall vacuuming on all nodes"
+ ctdb_onnode -p all "setvar VacuumInterval 99999"
+
+ echo
+ echo "Getting list of nodes..."
+ local all_pnns
+ ctdb_get_all_pnns
+
+ local first
+ first=$(echo "$all_pnns" | sed -n -e '1p')
+
+ echo
+ echo "Create/wipe test database ${db}"
+ ctdb_onnode "$first" "attach ${db}"
+ ctdb_onnode "$first" "wipedb ${db}"
+
+ echo
+ echo "Write ${num_records} records to ${db}"
+ local i
+ for i in $(seq 1 "$num_records") ; do
+ ctdb_onnode "$first" "writekey ${db} test${i} value${i}"
+ done
+
+ echo
+ echo "Migrate record(s) to all nodes"
+ for i in $(seq 1 "$num_records") ; do
+ ctdb_onnode all "readkey ${db} test${i}"
+ done
+
+ echo
+ echo "Confirm that all nodes have all the records"
+ check_cattdb_num_records "$db" "$num_records" "$all_pnns"
+
+ local key="test1"
+ echo
+ echo "Delete key ${key}"
+
+ echo " Find lmaster for key \"${key}\""
+ testprog_onnode "$first" "ctdb-db-test get-lmaster ${key}"
+ # out is set above
+ # shellcheck disable=SC2154
+ lmaster="$out"
+ echo " lmaster=${lmaster}"
+
+ if "$delete_from_lmaster" ; then
+ echo " Delete key ${key} on lmaster node ${lmaster}"
+ dnode="$lmaster"
+ else
+ for i in $all_pnns ; do
+ if [ "$i" != "$lmaster" ] ; then
+ dnode="$i"
+ break
+ fi
+ done
+ echo " Delete key ${key} on non-lmaster node ${dnode}"
+ fi
+ ctdb_onnode "$dnode" "deletekey ${db} ${key}"
+
+ echo
+ vacuum_confirm_key_empty_dmaster "$dnode" "$db" "$key"
+
+ echo
+ echo "Confirm all records still exist on all nodes"
+ check_cattdb_num_records "$db" "$num_records" "$all_pnns"
+
+ if ! "$delete_from_lmaster" ; then
+ # Ask the lmaster to fetch the deleted record
+ echo
+ echo "Vacuum on non-lmaster node ${dnode}"
+ testprog_onnode "$dnode" "ctdb-db-test vacuum ${db}"
+
+ echo
+ vacuum_confirm_key_empty_dmaster "$dnode" "$db" "$key"
+
+ # Fetch the record and put it in the delete queue in
+ # the main daemon for processing in next vacuuming run
+ # on the lmaster
+ echo
+ echo "Vacuum on lmaster node ${lmaster}"
+ testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}"
+
+ echo
+ echo "Confirm all records still exist on all node nodes"
+ check_cattdb_num_records "$db" "$num_records" "$all_pnns"
+
+ echo
+ vacuum_confirm_key_empty_dmaster "$lmaster" "$db" "$key"
+ fi
+
+ echo
+ # In the delete-from-lmaster case, the record is already in
+ # the lmaster's delete-queue so only a single run is needed
+ echo "Vacuum on lmaster node ${lmaster}"
+ testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}"
+
+ echo
+ echo "Confirm a record has been deleted on all nodes"
+ local n=$((num_records - 1))
+ check_cattdb_num_records "$db" "$n" "$all_pnns"
+
+ echo
+ echo "Confirm all other records still exist with expected values"
+ local i
+ for i in $(seq 1 "$num_records") ; do
+ local k="test${i}"
+ local v="value${i}"
+
+ if [ "$k" = "$key" ] ; then
+ continue
+ fi
+
+ db_confirm_key_has_value "$first" "$db" "$k" "$v"
+ done
+ echo "GOOD"
+}
+
+testdb="vacuum_test.tdb"
+
+# 1 record, delete from non-lmaster
+vacuum_test "$testdb" 1 false
+
+# 10 records, delete from non-lmaster
+vacuum_test "$testdb" 10 false
+
+# 1 record, delete from lmaster
+vacuum_test "$testdb" 1 true
+
+# 10 records, delete from lmaster
+vacuum_test "$testdb" 10 true
diff --git a/ctdb/tests/INTEGRATION/database/vacuum.002.full.sh b/ctdb/tests/INTEGRATION/database/vacuum.002.full.sh
new file mode 100755
index 0000000..0dc8372
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/database/vacuum.002.full.sh
@@ -0,0 +1,96 @@
+#!/usr/bin/env bash
+
+# Ensure a full vacuuming run deletes records
+
+# Create some records, delete some of them on their lmaster (with a
+# test tool that doesn't do SCHEDULE_FOR_DELETION), run some fast
+# vacuuming runs (to ensure they don't delete records that haven't
+# been added to the delete queue) and then try a full vacuuming run,
+# which will actually do a traverse of the database to find empty
+# records and delete them. Confirm that records that haven't been
+# deleted are still there, with expected values.
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+db="vacuum_test.tdb"
+
+echo "Stall vacuuming on all nodes"
+ctdb_onnode -p all "setvar VacuumInterval 99999"
+
+echo
+echo "Getting list of nodes..."
+ctdb_get_all_pnns
+
+# all_pnns is set above by ctdb_get_all_pnns()
+# shellcheck disable=SC2154
+first=$(echo "$all_pnns" | sed -n -e '1p')
+
+echo
+echo "Create/wipe test database ${db}"
+ctdb_onnode "$first" "attach ${db}"
+ctdb_onnode "$first" "wipedb ${db}"
+
+echo
+echo "Create records in ${db}"
+for i in $(seq 1 10) ; do
+ ctdb_onnode "$first" "writekey ${db} delete${i} value${i}"
+ ctdb_onnode "$first" "writekey ${db} keep${i} value${i}"
+done
+
+echo
+echo "Migrate record(s) to all nodes"
+for i in $(seq 1 10) ; do
+ ctdb_onnode all "readkey ${db} delete${i}"
+ ctdb_onnode all "readkey ${db} keep${i}"
+done
+
+echo
+echo "Confirm that all nodes have all the records"
+check_cattdb_num_records "$db" 20 "$all_pnns"
+
+echo
+echo "Delete all 10 records from their lmaster node"
+for i in $(seq 1 10) ; do
+ key="delete${i}"
+
+ testprog_onnode "$first" "ctdb-db-test get-lmaster ${key}"
+ # $out is set above by testprog_onnode()
+ # shellcheck disable=SC2154
+ lmaster="$out"
+
+ echo
+ echo "Delete ${key} from lmaster node ${lmaster}"
+ testprog_onnode "$lmaster" \
+ "ctdb-db-test fetch-local-delete $db ${key}"
+
+ vacuum_confirm_key_empty_dmaster "$lmaster" "$db" "$key"
+done
+
+echo "Do fast vacuuming run on all nodes"
+testprog_onnode "all" "ctdb-db-test vacuum ${db}"
+
+echo
+echo "Confirm all records still exist on all nodes"
+check_cattdb_num_records "$db" 20 "$all_pnns"
+
+echo
+echo "Do full vacuuming run on all nodes"
+testprog_onnode "all" "ctdb-db-test vacuum ${db} full"
+
+echo
+echo "Confirm 10 records exist on all nodes"
+check_cattdb_num_records "$db" 10 "$all_pnns"
+
+echo
+echo "Confirm that remaining records still exist with expected values"
+for i in $(seq 1 10) ; do
+ k="keep${i}"
+ v="value${i}"
+
+ db_confirm_key_has_value "$first" "$db" "$k" "$v"
+done
+echo "GOOD"
diff --git a/ctdb/tests/INTEGRATION/database/vacuum.003.recreate.sh b/ctdb/tests/INTEGRATION/database/vacuum.003.recreate.sh
new file mode 100755
index 0000000..acb7b13
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/database/vacuum.003.recreate.sh
@@ -0,0 +1,139 @@
+#!/usr/bin/env bash
+
+# Ensure that vacuuming does not delete a record that is recreated
+# before vacuuming completes. This needs at least 3 nodes.
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+db="vacuum_test.tdb"
+
+echo "Stall vacuuming on all nodes"
+ctdb_onnode -p all "setvar VacuumInterval 99999"
+
+echo
+echo "Getting list of nodes..."
+ctdb_get_all_pnns
+
+# all_pnns is set above by ctdb_get_all_pnns()
+# shellcheck disable=SC2154
+first=$(echo "$all_pnns" | sed -n -e '1p')
+
+echo
+echo "Create/wipe test database ${db}"
+ctdb_onnode "$first" "attach ${db}"
+ctdb_onnode "$first" "wipedb ${db}"
+
+echo
+echo "Create a record in ${db}"
+ctdb_onnode "$first" "writekey ${db} key value1"
+
+echo
+echo "Migrate record to all nodes"
+ctdb_onnode all "readkey ${db} key"
+
+echo
+echo "Confirm that all nodes have the record"
+check_cattdb_num_records "$db" 1 "$all_pnns"
+
+echo
+echo "Determine lmaster node for key"
+testprog_onnode "$first" "ctdb-db-test get-lmaster key"
+# $out is set above by testprog_onnode()
+# shellcheck disable=SC2154
+lmaster="$out"
+echo "lmaster=${lmaster}"
+
+non_lmaster=""
+# Find a non-lmaster node
+for i in $all_pnns ; do
+ if [ "$i" != "$lmaster" ] ; then
+ non_lmaster="$i"
+ break
+ fi
+done
+if [ -z "$non_lmaster" ] ; then
+ ctdb_test_fail "Could not find non-lmaster node for key"
+fi
+
+another_non_lmaster=""
+# Find another non-lmaster node
+for i in $all_pnns ; do
+ if [ "$i" != "$lmaster" ] && [ "$i" != "$non_lmaster" ] ; then
+ another_non_lmaster="$i"
+ break
+ fi
+done
+if [ -z "$another_non_lmaster" ] ; then
+ ctdb_test_fail "Could not find another non-lmaster node for key"
+fi
+
+vacuum_test ()
+{
+ local db="$1"
+ local key="$2"
+ local val="$3"
+ local dnode="$4"
+ local rnode="$5"
+ local rrun="$6"
+
+ echo
+ echo '............................................................'
+ printf 'Delete key %s on node %d\n' "$key" "$dnode"
+ printf 'Recreate on node %d after %d vacuuming run(s)\n' \
+ "$rnode" "$rrun"
+ echo '............................................................'
+
+ echo
+ echo "Delete key \"${key}\" from node ${dnode}"
+ ctdb_onnode "$dnode" "deletekey ${db} ${key}"
+
+ if [ "$rrun" -eq 0 ] ; then
+ echo "Recreate record on node ${rnode}"
+ ctdb_onnode "$rnode" "writekey ${db} ${key} ${val}"
+ fi
+
+ echo "Do a fast vacuuming run on node ${dnode}"
+ testprog_onnode "$dnode" "ctdb-db-test vacuum ${db}"
+
+ if [ "$rrun" -eq 1 ] ; then
+ echo "Recreate record on node ${rnode}"
+ ctdb_onnode "$rnode" "writekey ${db} ${key} ${val}"
+ fi
+
+ echo "Do a fast vacuuming run on lmaster node ${lmaster}"
+ testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}"
+
+ if [ "$rrun" -eq 2 ] ; then
+ echo "Recreate record on node ${rnode}"
+ ctdb_onnode "$rnode" "writekey ${db} ${key} ${val}"
+ fi
+
+ echo "Do a fast vacuuming run on lmaster node ${lmaster}"
+ testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}"
+
+ echo
+ echo "Confirm the record still exists on all nodes"
+ check_cattdb_num_records "$db" 1 "$all_pnns"
+
+ echo
+ echo "Confirm the record contains correct value"
+ db_confirm_key_has_value "$first" "$db" "$key" "$val"
+}
+
+vacuum_test "$db" "key" "value01" "$non_lmaster" "$non_lmaster" 0
+vacuum_test "$db" "key" "value02" "$non_lmaster" "$another_non_lmaster" 0
+vacuum_test "$db" "key" "value03" "$non_lmaster" "$lmaster" 0
+vacuum_test "$db" "key" "value04" "$lmaster" "$non_lmaster" 0
+vacuum_test "$db" "key" "value05" "$lmaster" "$lmaster" 0
+
+vacuum_test "$db" "key" "value06" "$non_lmaster" "$non_lmaster" 1
+vacuum_test "$db" "key" "value07" "$non_lmaster" "$lmaster" 1
+vacuum_test "$db" "key" "value08" "$non_lmaster" "$another_non_lmaster" 1
+
+vacuum_test "$db" "key" "value09" "$non_lmaster" "$non_lmaster" 2
+vacuum_test "$db" "key" "value10" "$non_lmaster" "$lmaster" 2
+vacuum_test "$db" "key" "value11" "$non_lmaster" "$another_non_lmaster" 2
diff --git a/ctdb/tests/INTEGRATION/database/vacuum.030.locked.sh b/ctdb/tests/INTEGRATION/database/vacuum.030.locked.sh
new file mode 100755
index 0000000..3862526
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/database/vacuum.030.locked.sh
@@ -0,0 +1,102 @@
+#!/usr/bin/env bash
+
+# Confirm that a record is not vacuumed if it is locked when the 1st
+# fast vacuuming run occurs on the node on which it was deleted, but
+# is dropped from the delete queue
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+db="vacuum_test.tdb"
+key="key"
+
+echo "Stall vacuuming on all nodes"
+ctdb_onnode -p all "setvar VacuumInterval 99999"
+
+echo
+echo "Getting list of nodes..."
+ctdb_get_all_pnns
+
+# all_pnns is set above by ctdb_get_all_pnns()
+# shellcheck disable=SC2154
+first=$(echo "$all_pnns" | sed -n -e '1p')
+
+echo
+echo "Determine lmaster node for key"
+testprog_onnode "$first" "ctdb-db-test get-lmaster key"
+# $out is set above by testprog_onnode()
+# shellcheck disable=SC2154
+lmaster="$out"
+echo "lmaster=${lmaster}"
+
+non_lmaster=""
+# Find a non-lmaster node
+for i in $all_pnns ; do
+ if [ "$i" != "$lmaster" ] ; then
+ non_lmaster="$i"
+ break
+ fi
+done
+if [ -z "$non_lmaster" ] ; then
+ ctdb_test_fail "Could not find non-lmaster node for key"
+fi
+
+echo "............................................................"
+echo "Delete key ${key} on non-lmaster node ${non_lmaster}"
+echo "Lock on node ${non_lmaster} during 1st vacuuming run"
+echo "............................................................"
+
+echo
+
+echo "Create/wipe test database ${db}"
+ctdb_onnode "$first" "attach ${db}"
+ctdb_onnode "$first" "wipedb ${db}"
+
+echo "Create a record in ${db}"
+ctdb_onnode "$first" "writekey ${db} ${key} value1"
+
+echo "Migrate record to all nodes"
+ctdb_onnode all "readkey ${db} ${key}"
+
+echo "Confirm that all nodes have the record"
+check_cattdb_num_records "$db" 1 "$all_pnns"
+
+echo
+
+echo "Delete key \"${key}\" from node ${non_lmaster}"
+ctdb_onnode "$non_lmaster" "deletekey $db ${key}"
+
+echo "Lock record on node ${non_lmaster}"
+testprog_onnode "$non_lmaster" "ctdb-db-test local-lock ${db} ${key}"
+pid="${out#OK }"
+ctdb_test_cleanup_pid_set "$non_lmaster" "$pid"
+
+echo "Do a fast vacuuming run on node ${non_lmaster}"
+testprog_onnode "$non_lmaster" "ctdb-db-test vacuum ${db}"
+
+echo "Kill lock process ${pid} on node ${non_lmaster}"
+try_command_on_node "$non_lmaster" "kill ${pid}"
+ctdb_test_cleanup_pid_clear
+
+echo
+
+# If the record is still in the delete queue then this will process it
+echo "Do a fast vacuuming run on node ${non_lmaster}"
+testprog_onnode "$non_lmaster" "ctdb-db-test vacuum ${db}"
+
+echo "Do a fast vacuuming run on lmaster node ${lmaster}"
+testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}"
+
+echo "Do a fast vacuuming run on lmaster node ${lmaster}"
+testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}"
+
+echo
+
+echo "Confirm the record still exists on all nodes"
+check_cattdb_num_records "$db" 1 "$all_pnns"
+
+echo
+vacuum_confirm_key_empty_dmaster "$non_lmaster" "$db" "$key"
diff --git a/ctdb/tests/INTEGRATION/database/vacuum.031.locked.sh b/ctdb/tests/INTEGRATION/database/vacuum.031.locked.sh
new file mode 100755
index 0000000..d16482e
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/database/vacuum.031.locked.sh
@@ -0,0 +1,114 @@
+#!/usr/bin/env bash
+
+# Confirm that a record is vacuumed if it is locked on the deleting
+# node when the 2nd fast vacuuming run occurs, but vacuuming is
+# delayed until the lock is released
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+db="vacuum_test.tdb"
+key="key"
+
+echo "Stall vacuuming on all nodes"
+ctdb_onnode -p all "setvar VacuumInterval 99999"
+
+echo
+echo "Getting list of nodes..."
+ctdb_get_all_pnns
+
+# all_pnns is set above by ctdb_get_all_pnns()
+# shellcheck disable=SC2154
+first=$(echo "$all_pnns" | sed -n -e '1p')
+
+echo
+echo "Determine lmaster node for key"
+testprog_onnode "$first" "ctdb-db-test get-lmaster key"
+# $out is set above by testprog_onnode()
+# shellcheck disable=SC2154
+lmaster="$out"
+echo "lmaster=${lmaster}"
+
+non_lmaster=""
+# Find a non-lmaster node
+for i in $all_pnns ; do
+ if [ "$i" != "$lmaster" ] ; then
+ non_lmaster="$i"
+ break
+ fi
+done
+if [ -z "$non_lmaster" ] ; then
+ ctdb_test_fail "Could not find non-lmaster node for key"
+fi
+
+echo "............................................................"
+echo "Delete key ${key} on node ${non_lmaster}"
+echo "Lock on non-lmaster node ${non_lmaster} during 2nd vacuuming run"
+echo "............................................................"
+
+echo
+
+echo "Create/wipe test database ${db}"
+ctdb_onnode "$first" "attach ${db}"
+ctdb_onnode "$first" "wipedb ${db}"
+
+echo "Create a record in ${db}"
+ctdb_onnode "$first" "writekey ${db} ${key} value1"
+
+echo "Migrate record to all nodes"
+ctdb_onnode all "readkey ${db} ${key}"
+
+echo "Confirm that all nodes have the record"
+check_cattdb_num_records "$db" 1 "$all_pnns"
+
+echo
+
+echo "Delete key \"${key}\" from node ${non_lmaster}"
+ctdb_onnode "$non_lmaster" "deletekey $db ${key}"
+
+echo
+echo "Do a fast vacuuming run on node ${non_lmaster}"
+testprog_onnode "$non_lmaster" "ctdb-db-test vacuum ${db}"
+
+echo
+echo "Confirm that all nodes still have the record"
+check_cattdb_num_records "$db" 1 "$all_pnns"
+
+echo
+echo "Lock record on non-lmaster node ${non_lmaster}"
+testprog_onnode "$non_lmaster" "ctdb-db-test local-lock ${db} ${key}"
+pid="${out#OK }"
+ctdb_test_cleanup_pid_set "$non_lmaster" "$pid"
+
+echo
+echo "Do a fast vacuuming run on lmaster node ${lmaster} - THIS WILL FAIL"
+status=0
+testprog_onnode "$lmaster" "ctdb-db-test -t 10 vacuum ${db}" || status=$?
+
+if [ $status -ne 110 ] ; then
+ ctdb_test_fail "$out"
+fi
+
+echo "Confirm record key=\"${key}\" has dmaster=${non_lmaster}"
+vacuum_test_key_dmaster "$lmaster" "$db" "$key" "$non_lmaster"
+
+echo "Kill lock process ${pid} on node ${non_lmaster}"
+try_command_on_node "$non_lmaster" "kill ${pid}"
+ctdb_test_cleanup_pid_clear
+
+echo "Wait until record is migrated to lmaster node ${lmaster}"
+vacuum_test_wait_key_dmaster "$lmaster" "$db" "$key"
+
+echo
+echo "Confirm that all nodes still have the record"
+check_cattdb_num_records "$db" 1 "$all_pnns"
+
+echo "Do a fast vacuuming run on node ${lmaster}"
+testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}"
+
+echo
+echo "Confirm that the record is gone from all nodes"
+check_cattdb_num_records "$db" 0 "$all_pnns"
diff --git a/ctdb/tests/INTEGRATION/database/vacuum.032.locked.sh b/ctdb/tests/INTEGRATION/database/vacuum.032.locked.sh
new file mode 100755
index 0000000..481d1d4
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/database/vacuum.032.locked.sh
@@ -0,0 +1,102 @@
+#!/usr/bin/env bash
+
+# Confirm that a record is not vacuumed if it is locked on the lmaster
+# when the 3rd fast vacuuming run occurs, but is dropped from the
+# lmaster delete queue
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+db="vacuum_test.tdb"
+key="key"
+
+echo "Stall vacuuming on all nodes"
+ctdb_onnode -p all "setvar VacuumInterval 99999"
+
+echo
+echo "Getting list of nodes..."
+ctdb_get_all_pnns
+
+# all_pnns is set above by ctdb_get_all_pnns()
+# shellcheck disable=SC2154
+first=$(echo "$all_pnns" | sed -n -e '1p')
+
+echo
+echo "Determine lmaster node for key"
+testprog_onnode "$first" "ctdb-db-test get-lmaster key"
+# $out is set above by testprog_onnode()
+# shellcheck disable=SC2154
+lmaster="$out"
+echo "lmaster=${lmaster}"
+
+non_lmaster=""
+# Find a non-lmaster node
+for i in $all_pnns ; do
+ if [ "$i" != "$lmaster" ] ; then
+ non_lmaster="$i"
+ break
+ fi
+done
+if [ -z "$non_lmaster" ] ; then
+ ctdb_test_fail "Could not find non-lmaster node for key"
+fi
+
+echo "............................................................"
+echo "Delete key ${key} on node ${non_lmaster}"
+echo "Lock on lmaster node ${lmaster} during 3rd vacuuming run"
+echo "............................................................"
+
+echo
+
+echo "Create/wipe test database ${db}"
+ctdb_onnode "$first" "attach ${db}"
+ctdb_onnode "$first" "wipedb ${db}"
+
+echo "Create a record in ${db}"
+ctdb_onnode "$first" "writekey ${db} ${key} value1"
+
+echo "Migrate record to all nodes"
+ctdb_onnode all "readkey ${db} ${key}"
+
+echo "Confirm that all nodes have the record"
+check_cattdb_num_records "$db" 1 "$all_pnns"
+
+echo
+
+echo "Delete key \"${key}\" from node ${non_lmaster}"
+ctdb_onnode "$non_lmaster" "deletekey $db ${key}"
+
+echo "Do a fast vacuuming run on node ${non_lmaster}"
+testprog_onnode "$non_lmaster" "ctdb-db-test vacuum ${db}"
+
+echo "Do a fast vacuuming run on lmaster node ${lmaster}"
+testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}"
+
+echo "Lock record on lmaster node ${lmaster}"
+testprog_onnode "$lmaster" "ctdb-db-test local-lock ${db} ${key}"
+pid="${out#OK }"
+ctdb_test_cleanup_pid_set "$lmaster" "$pid"
+
+echo "Do a fast vacuuming run on node ${lmaster}"
+testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}"
+
+echo "Kill lock process ${pid} on node ${lmaster}"
+try_command_on_node "$lmaster" "kill ${pid}"
+ctdb_test_cleanup_pid_clear
+
+echo
+
+# If the record is still in the delete queue then this will process it
+echo "Do a fast vacuuming run on lmaster node ${lmaster}"
+testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}"
+
+echo
+
+echo "Confirm the record still exists on all nodes"
+check_cattdb_num_records "$db" 1 "$all_pnns"
+
+echo
+vacuum_confirm_key_empty_dmaster "$lmaster" "$db" "$key"
diff --git a/ctdb/tests/INTEGRATION/database/vacuum.033.locked.sh b/ctdb/tests/INTEGRATION/database/vacuum.033.locked.sh
new file mode 100755
index 0000000..63d7d1f
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/database/vacuum.033.locked.sh
@@ -0,0 +1,117 @@
+#!/usr/bin/env bash
+
+# Confirm that a record is not vacuumed if it is locked on the
+# deleting node when the 3rd fast vacuuming run occurs, but is dropped
+# from the lmaster delete list
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+db="vacuum_test.tdb"
+key="key"
+
+echo "Stall vacuuming on all nodes"
+ctdb_onnode -p all "setvar VacuumInterval 99999"
+
+echo
+echo "Getting list of nodes..."
+ctdb_get_all_pnns
+
+# all_pnns is set above by ctdb_get_all_pnns()
+# shellcheck disable=SC2154
+first=$(echo "$all_pnns" | sed -n -e '1p')
+
+echo
+echo "Determine lmaster node for key"
+testprog_onnode "$first" "ctdb-db-test get-lmaster key"
+# $out is set above by testprog_onnode()
+# shellcheck disable=SC2154
+lmaster="$out"
+echo "lmaster=${lmaster}"
+
+non_lmaster=""
+# Find a non-lmaster node
+for i in $all_pnns ; do
+ if [ "$i" != "$lmaster" ] ; then
+ non_lmaster="$i"
+ break
+ fi
+done
+if [ -z "$non_lmaster" ] ; then
+ ctdb_test_fail "Could not find non-lmaster node for key"
+fi
+
+echo "............................................................"
+echo "Delete key ${key} on node ${non_lmaster}"
+echo "Lock on non-lmaster node ${non_lmaster} during 3rd vacuuming run"
+echo "............................................................"
+
+echo
+
+echo "Create/wipe test database ${db}"
+ctdb_onnode "$first" "attach ${db}"
+ctdb_onnode "$first" "wipedb ${db}"
+
+echo "Create a record in ${db}"
+ctdb_onnode "$first" "writekey ${db} ${key} value1"
+
+echo "Migrate record to all nodes"
+ctdb_onnode all "readkey ${db} ${key}"
+
+echo "Confirm that all nodes have the record"
+check_cattdb_num_records "$db" 1 "$all_pnns"
+
+echo
+
+echo "Delete key \"${key}\" from node ${non_lmaster}"
+ctdb_onnode "$non_lmaster" "deletekey $db ${key}"
+
+echo
+echo "Do a fast vacuuming run on node ${non_lmaster}"
+testprog_onnode "$non_lmaster" "ctdb-db-test vacuum ${db}"
+
+echo
+echo "Confirm that all nodes still have the record"
+check_cattdb_num_records "$db" 1 "$all_pnns"
+
+echo
+echo "Do a fast vacuuming run on lmaster node ${lmaster}"
+testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}"
+
+echo
+echo "Confirm that all nodes still have the record"
+check_cattdb_num_records "$db" 1 "$all_pnns"
+
+echo
+echo "Lock record on non-lmaster node ${non_lmaster}"
+testprog_onnode "$non_lmaster" "ctdb-db-test local-lock ${db} ${key}"
+pid="${out#OK }"
+ctdb_test_cleanup_pid_set "$non_lmaster" "$pid"
+
+echo "Do a fast vacuuming run on node ${lmaster}"
+testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}"
+
+echo "Kill lock process ${pid} on node ${non_lmaster}"
+try_command_on_node "$non_lmaster" "kill ${pid}"
+ctdb_test_cleanup_pid_clear
+
+echo
+echo "Confirm that nodes ${lmaster} and ${non_lmaster} still have the record"
+check_cattdb_num_records "$db" 1 "${lmaster} ${non_lmaster}"
+
+vacuum_confirm_key_empty_dmaster "$lmaster" "$db" "$key"
+
+echo
+
+# Record has been dropped from the delete list so this will not pick it up
+echo "Do a fast vacuuming run on lmaster node ${lmaster}"
+testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}"
+
+echo
+echo "Confirm that nodes ${lmaster} and ${non_lmaster} still have the record"
+check_cattdb_num_records "$db" 1 "${lmaster} ${non_lmaster}"
+
+vacuum_confirm_key_empty_dmaster "$lmaster" "$db" "$key"
diff --git a/ctdb/tests/INTEGRATION/database/vacuum.034.locked.sh b/ctdb/tests/INTEGRATION/database/vacuum.034.locked.sh
new file mode 100755
index 0000000..7f37ada
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/database/vacuum.034.locked.sh
@@ -0,0 +1,129 @@
+#!/usr/bin/env bash
+
+# Confirm that a record is not vacuumed if it is locked on another
+# (non-lmaster, non-deleting) node when the 3rd fast vacuuming run
+# occurs, but is dropped from the lmaster delete tree
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+db="vacuum_test.tdb"
+key="key"
+
+echo "Stall vacuuming on all nodes"
+ctdb_onnode -p all "setvar VacuumInterval 99999"
+
+echo
+echo "Getting list of nodes..."
+ctdb_get_all_pnns
+
+# all_pnns is set above by ctdb_get_all_pnns()
+# shellcheck disable=SC2154
+first=$(echo "$all_pnns" | sed -n -e '1p')
+
+echo
+echo "Determine lmaster node for key"
+testprog_onnode "$first" "ctdb-db-test get-lmaster key"
+# $out is set above by testprog_onnode()
+# shellcheck disable=SC2154
+lmaster="$out"
+echo "lmaster=${lmaster}"
+
+non_lmaster=""
+# Find a non-lmaster node
+for i in $all_pnns ; do
+ if [ "$i" != "$lmaster" ] ; then
+ non_lmaster="$i"
+ break
+ fi
+done
+if [ -z "$non_lmaster" ] ; then
+ ctdb_test_fail "Could not find non-lmaster node for key"
+fi
+
+another_node=""
+# Find another node
+for i in $all_pnns ; do
+ if [ "$i" != "$lmaster" ] && [ "$i" != "$non_lmaster" ] ; then
+ another_node="$i"
+ break
+ fi
+done
+if [ -z "$another_node" ] ; then
+ ctdb_test_fail "Could not find another non-lmaster node for key"
+fi
+
+echo "............................................................"
+echo "Delete key ${key} on node ${non_lmaster}"
+echo "Lock on non-lmaster node ${non_lmaster} during 3rd vacuuming run"
+echo "............................................................"
+
+echo
+
+echo "Create/wipe test database ${db}"
+ctdb_onnode "$first" "attach ${db}"
+ctdb_onnode "$first" "wipedb ${db}"
+
+echo "Create a record in ${db}"
+ctdb_onnode "$first" "writekey ${db} ${key} value1"
+
+echo "Migrate record to all nodes"
+ctdb_onnode all "readkey ${db} ${key}"
+
+echo "Confirm that all nodes have the record"
+check_cattdb_num_records "$db" 1 "$all_pnns"
+
+echo
+
+echo "Delete key \"${key}\" from node ${non_lmaster}"
+ctdb_onnode "$non_lmaster" "deletekey $db ${key}"
+
+echo
+echo "Do a fast vacuuming run on node ${non_lmaster}"
+testprog_onnode "$non_lmaster" "ctdb-db-test vacuum ${db}"
+
+echo
+echo "Confirm that all nodes still have the record"
+check_cattdb_num_records "$db" 1 "$all_pnns"
+
+echo
+echo "Do a fast vacuuming run on lmaster node ${lmaster}"
+testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}"
+
+echo
+echo "Confirm that all nodes still have the record"
+check_cattdb_num_records "$db" 1 "$all_pnns"
+
+echo
+echo "Lock record on non-lmaster node ${another_node}"
+testprog_onnode "$another_node" "ctdb-db-test local-lock ${db} ${key}"
+pid="${out#OK }"
+ctdb_test_cleanup_pid_set "$another_node" "$pid"
+
+echo "Do a fast vacuuming run on node ${lmaster}"
+testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}"
+
+echo "Kill lock process ${pid} on node ${another_node}"
+try_command_on_node "$another_node" "kill ${pid}"
+ctdb_test_cleanup_pid_clear
+
+echo
+echo "Confirm that nodes ${lmaster} and ${another_node} still have the record"
+check_cattdb_num_records "$db" 1 "${lmaster} ${another_node}"
+
+vacuum_confirm_key_empty_dmaster "$lmaster" "$db" "$key"
+
+echo
+
+# Record has been dropped from the delete list so this will not pick it up
+echo "Do a fast vacuuming run on lmaster node ${lmaster}"
+testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}"
+
+echo
+echo "Confirm that nodes ${lmaster} and ${another_node} still have the record"
+check_cattdb_num_records "$db" 1 "${lmaster} ${another_node}"
+
+vacuum_confirm_key_empty_dmaster "$lmaster" "$db" "$key"
diff --git a/ctdb/tests/INTEGRATION/failover/pubips.001.list.sh b/ctdb/tests/INTEGRATION/failover/pubips.001.list.sh
new file mode 100755
index 0000000..2fc75b7
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/failover/pubips.001.list.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb ip' shows the correct output
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+echo "Getting list of public IPs..."
+try_command_on_node -v 1 "$CTDB ip all | tail -n +2"
+ips=$(sed \
+ -e 's@ node\[@ @' \
+ -e 's@\].*$@@' \
+ "$outfile")
+machineout=$(sed -r \
+ -e 's@^| |$@\|@g' \
+ -e 's@[[:alpha:]]+\[@@g' \
+ -e 's@\]@@g' \
+ "$outfile")
+
+if ctdb_test_on_cluster ; then
+ while read ip pnn ; do
+ try_command_on_node $pnn "ip addr show to ${ip}"
+ if [ -n "$out" ] ; then
+ echo "GOOD: node $pnn appears to have $ip assigned"
+ else
+ die "BAD: node $pnn does not appear to have $ip assigned"
+ fi
+ done <<<"$ips" # bashism to avoid problem setting variable in pipeline.
+fi
+
+echo "Looks good!"
+
+cmd="$CTDB -X ip all | tail -n +2"
+echo "Checking that \"$cmd\" produces expected output..."
+
+try_command_on_node 1 "$cmd"
+if [ "$out" = "$machineout" ] ; then
+ echo "Yep, looks good!"
+else
+ echo "Nope, it looks like this:"
+ echo "$out"
+ echo "Should be like this:"
+ echo "$machineout"
+ exit 1
+fi
diff --git a/ctdb/tests/INTEGRATION/failover/pubips.010.addip.sh b/ctdb/tests/INTEGRATION/failover/pubips.010.addip.sh
new file mode 100755
index 0000000..aba85dd
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/failover/pubips.010.addip.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+
+# Verify that an IP address can be added to a node using 'ctdb addip'
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+get_test_ip_mask_and_iface
+
+echo "Deleting IP $test_ip from all nodes"
+delete_ip_from_all_nodes $test_ip
+try_command_on_node -v $test_node $CTDB ipreallocate
+wait_until_ips_are_on_node '!' $test_node $test_ip
+
+# Debugging...
+try_command_on_node -v all $CTDB ip
+
+echo "Adding IP ${test_ip}/${mask} on ${iface}, node ${test_node}"
+try_command_on_node $test_node $CTDB addip ${test_ip}/${mask} $iface
+try_command_on_node $test_node $CTDB ipreallocate
+wait_until_ips_are_on_node $test_node $test_ip
diff --git a/ctdb/tests/INTEGRATION/failover/pubips.011.delip.sh b/ctdb/tests/INTEGRATION/failover/pubips.011.delip.sh
new file mode 100755
index 0000000..5235a9d
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/failover/pubips.011.delip.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+
+# Verify that a node's public IP address can be deleted using 'ctdb deleteip'
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+
+echo "Deleting IP ${test_ip} from node ${test_node}"
+try_command_on_node $test_node $CTDB delip $test_ip
+try_command_on_node $test_node $CTDB ipreallocate
+wait_until_ips_are_on_node '!' $test_node $test_ip
diff --git a/ctdb/tests/INTEGRATION/failover/pubips.012.reloadips.sh b/ctdb/tests/INTEGRATION/failover/pubips.012.reloadips.sh
new file mode 100755
index 0000000..a3bb3af
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/failover/pubips.012.reloadips.sh
@@ -0,0 +1,117 @@
+#!/usr/bin/env bash
+
+# Verify that IPs can be reconfigured using 'ctdb reloadips'
+
+# Various sub-tests that remove addresses from the public_addresses file
+# on a node or delete the entire contents of the public_addresses file.
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+
+try_command_on_node $test_node $CTDB_TEST_WRAPPER ctdb_base_show
+addresses="${out}/public_addresses"
+echo "Public addresses file on node $test_node is \"$addresses\""
+backup="${addresses}.$$"
+
+restore_public_addresses ()
+{
+ try_command_on_node $test_node "mv $backup $addresses >/dev/null 2>&1 || true"
+}
+ctdb_test_exit_hook_add restore_public_addresses
+
+# ctdb reloadips will fail if it can't disable takover runs. The most
+# likely reason for this is that there is already a takeover run in
+# progress. We can't predict when this will happen, so retry if this
+# occurs.
+do_ctdb_reloadips ()
+{
+ local retry_max=10
+ local retry_count=0
+ while : ; do
+ if ctdb_onnode "$test_node" "reloadips all" ; then
+ return 0
+ fi
+
+ if [ "$out" != "Failed to disable takeover runs" ] ; then
+ return 1
+ fi
+
+ if [ $retry_count -ge $retry_max ] ; then
+ return 1
+ fi
+
+ retry_count=$((retry_count + 1))
+ echo "Retrying..."
+ sleep_for 1
+ done
+}
+
+
+echo "Removing IP $test_ip from node $test_node"
+
+try_command_on_node $test_node "mv $addresses $backup && grep -v '^${test_ip}/' $backup >$addresses"
+
+do_ctdb_reloadips
+
+try_command_on_node $test_node $CTDB ip
+
+if grep "^${test_ip} " <<<"$out" ; then
+ cat <<EOF
+BAD: node $test_node can still host IP $test_ip:
+$out
+EOF
+ exit 1
+fi
+
+cat <<EOF
+GOOD: node $test_node is no longer hosting IP $test_ip:
+$out
+EOF
+
+ctdb_onnode "$test_node" sync
+
+
+echo "Restoring addresses"
+restore_public_addresses
+
+do_ctdb_reloadips
+
+echo "Getting list of public IPs on node $test_node"
+try_command_on_node $test_node "$CTDB ip | tail -n +2"
+
+if [ -z "$out" ] ; then
+ echo "BAD: node $test_node has no ips"
+ exit 1
+fi
+
+cat <<EOF
+GOOD: node $test_node has these addresses:
+$out
+EOF
+
+ctdb_onnode "$test_node" sync
+
+
+echo "Emptying public addresses file on $test_node"
+
+try_command_on_node $test_node "mv $addresses $backup && touch $addresses"
+
+do_ctdb_reloadips
+
+echo "Getting list of public IPs on node $test_node"
+try_command_on_node $test_node "$CTDB ip | tail -n +2"
+
+if [ -n "$out" ] ; then
+ cat <<EOF
+BAD: node $test_node still has ips:
+$out
+EOF
+ exit 1
+fi
+
+echo "GOOD: no IPs left on node $test_node"
diff --git a/ctdb/tests/INTEGRATION/failover/pubips.013.failover_noop.sh b/ctdb/tests/INTEGRATION/failover/pubips.013.failover_noop.sh
new file mode 100755
index 0000000..77f9a63
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/failover/pubips.013.failover_noop.sh
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+
+# Check that CTDB operates correctly if:
+
+# * failover is disabled; or
+# * there are 0 public IPs configured
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_skip_on_cluster
+
+ctdb_test_init -n
+
+echo "Starting CTDB with failover disabled..."
+ctdb_nodes_start_custom -F
+
+select_test_node
+
+echo "Getting IP allocation..."
+
+# $test_node set above by select_test_node()
+# shellcheck disable=SC2154
+try_command_on_node -v "$test_node" "$CTDB ip all | tail -n +2"
+
+while read ip pnn ; do
+ if [ "$pnn" != "-1" ] ; then
+ die "BAD: IP address ${ip} is assigned to node ${pnn}"
+ fi
+done <"$outfile"
+
+echo "GOOD: All IP addresses are unassigned"
+
+echo "----------------------------------------"
+
+echo "Starting CTDB with an empty public addresses configuration..."
+ctdb_nodes_start_custom -P /dev/null
+
+echo "Trying explicit ipreallocate..."
+ctdb_onnode "$test_node" ipreallocate
+
+echo "Good, that seems to work!"
+echo
diff --git a/ctdb/tests/INTEGRATION/failover/pubips.014.iface_gc.sh b/ctdb/tests/INTEGRATION/failover/pubips.014.iface_gc.sh
new file mode 100755
index 0000000..845b4b5
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/failover/pubips.014.iface_gc.sh
@@ -0,0 +1,51 @@
+#!/usr/bin/env bash
+
+# Verify that an interface is deleted when all IPs on it are deleted
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+
+# Find interfaces on test node
+try_command_on_node $test_node "$CTDB ifaces -X"
+ifaces=$(awk -F'|' 'NR > 1 { print $2 }' "$outfile")
+echo "Node ${test_node} has interfaces: ${ifaces}"
+
+# Delete all IPs on each interface... deleting IPs from one interface
+# can cause other interfaces to disappear, so we need to be careful...
+for i in $ifaces ; do
+ try_command_on_node $test_node "$CTDB ifaces -X"
+ info=$(awk -F'|' -v iface="$i" '$2 == iface { print $0 }' "$outfile")
+
+ if [ -z "$info" ] ; then
+ echo "Interface ${i} missing... assuming already deleted!"
+ continue
+ fi
+
+ echo "Deleting IPs on interface ${i}, with this information:"
+ echo " $info"
+
+ try_command_on_node $test_node "$CTDB ip -v -X | tail -n +2"
+ awk -F'|' -v i="$i" \
+ '$6 == i { print $2 }' "$outfile" |
+ while read ip ; do
+ echo " $ip"
+ try_command_on_node $test_node "$CTDB delip $ip"
+ done
+ try_command_on_node $test_node "$CTDB ipreallocate"
+
+ try_command_on_node $test_node "$CTDB ifaces -X"
+ info=$(awk -F'|' -v iface="$i" '$2 == iface { print $0 }' "$outfile")
+
+ if [ -z "$info" ] ; then
+ echo "GOOD: Interface ${i} has been garbage collected"
+ else
+ echo "BAD: Interface ${i} still exists"
+ echo "$out"
+ exit 1
+ fi
+done
diff --git a/ctdb/tests/INTEGRATION/failover/pubips.020.moveip.sh b/ctdb/tests/INTEGRATION/failover/pubips.020.moveip.sh
new file mode 100755
index 0000000..8daf3f5
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/failover/pubips.020.moveip.sh
@@ -0,0 +1,76 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb moveip' allows movement of public IPs between nodes
+
+# This test does not do any network level checks to make sure IP
+# addresses are actually on interfaces. It just consults "ctdb ip".
+
+# To work, this test ensures that IPAllocAlgorithm is not set to 0
+# (Deterministic IPs) and sets NoIPFailback.
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+
+sanity_check_ips ()
+{
+ echo "Sanity checking IPs..."
+
+ local x ipp prev
+ prev=""
+ while read x ipp ; do
+ [ "$ipp" = "-1" ] && break
+ if [ -n "$prev" -a "$ipp" != "$prev" ] ; then
+ echo "OK"
+ return 0
+ fi
+ prev="$ipp"
+ done <"$outfile"
+
+ echo "BAD: a node was -1 or IPs are only assigned to one node:"
+ cat "$outfile"
+ echo "Are you running an old version of CTDB?"
+ return 1
+}
+
+sanity_check_ips
+
+# Find a target node - it must be willing to host $test_ip
+
+# $test_node set above by select_test_node_and_ips()
+# shellcheck disable=SC2154
+try_command_on_node "$test_node" "$CTDB listnodes | wc -l"
+num_nodes="$out"
+to_node=""
+for i in $(seq 0 $(($num_nodes - 1)) ) ; do
+ [ $i -ne $test_node ] || continue
+ all_ips_on_node $i
+ while read ip x ; do
+ if [ "$ip" = "$test_ip" ] ; then
+ to_node="$i"
+ break 2
+ fi
+ done <"$outfile"
+done
+
+if [ -z "$to_node" ] ; then
+ echo "Unable to find target node"
+ exit 1
+fi
+
+echo "Target node is ${to_node}"
+
+echo "Setting IPAllocAlgorithm=2 to avoid Deterministic IPs..."
+try_command_on_node -q all $CTDB setvar IPAllocAlgorithm 2
+
+echo "Turning on NoIPFailback..."
+try_command_on_node -q all $CTDB setvar NoIPFailback 1
+
+echo "Attempting to move ${test_ip} from node ${test_node} to node ${to_node}"
+try_command_on_node $test_node $CTDB moveip $test_ip $to_node
+wait_until_ips_are_on_node '!' $test_node $test_ip
+wait_until_ips_are_on_node $to_node $test_ip
diff --git a/ctdb/tests/INTEGRATION/failover/pubips.030.disable_enable.sh b/ctdb/tests/INTEGRATION/failover/pubips.030.disable_enable.sh
new file mode 100755
index 0000000..3f40097
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/failover/pubips.030.disable_enable.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+
+# Verify the operation of "ctdb disable" and "ctdb enable"
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+########################################
+
+select_test_node_and_ips
+
+echo "Disabling node $test_node"
+try_command_on_node 1 $CTDB disable -n $test_node
+wait_until_node_has_status $test_node disabled 30 all
+wait_until_node_has_no_ips "$test_node"
+
+echo "Re-enabling node $test_node"
+try_command_on_node 1 $CTDB enable -n $test_node
+wait_until_node_has_status $test_node enabled 30 all
+wait_until_node_has_some_ips "$test_node"
diff --git a/ctdb/tests/INTEGRATION/failover/pubips.032.stop_continue.sh b/ctdb/tests/INTEGRATION/failover/pubips.032.stop_continue.sh
new file mode 100755
index 0000000..f5936b0
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/failover/pubips.032.stop_continue.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+
+# Verify the operation of "ctdb stop" and "ctdb continue"
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+
+echo "Stopping node ${test_node}..."
+try_command_on_node 1 $CTDB stop -n $test_node
+wait_until_node_has_status $test_node stopped
+wait_until_node_has_no_ips "$test_node"
+
+echo "Continuing node $test_node"
+try_command_on_node 1 $CTDB continue -n $test_node
+wait_until_node_has_status $test_node notstopped
+wait_until_node_has_some_ips "$test_node"
diff --git a/ctdb/tests/INTEGRATION/failover/pubips.040.NoIPTakeover.sh b/ctdb/tests/INTEGRATION/failover/pubips.040.NoIPTakeover.sh
new file mode 100755
index 0000000..e99a265
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/failover/pubips.040.NoIPTakeover.sh
@@ -0,0 +1,71 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb setvar NoIPTakeover 1' stops IP addresses being taken over
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+ctdb_get_all_pnns
+# out is set above
+# shellcheck disable=SC2154
+num_nodes=$(echo "$out" | wc -l | tr -d '[:space:]')
+echo "There are $num_nodes nodes..."
+
+if [ "$num_nodes" -lt 2 ] ; then
+ echo "Less than 2 nodes!"
+ exit 1
+fi
+
+select_test_node_and_ips
+
+
+# sets: num
+count_ips_on_node ()
+{
+ local node="$1"
+
+ ctdb_onnode "$node" ip
+ # outfile is set by ctdb_onnode() above
+ # shellcheck disable=SC2154,SC2126
+ # * || true is needed to avoid command failure when there are no matches
+ # * Using "wc -l | tr -d '[:space:]'" is our standard
+ # pattern... and "grep -c" requires handling of special case
+ # for no match
+ num=$(grep -v 'Public' "$outfile" | \
+ grep " ${node}\$" | \
+ wc -l | \
+ tr -d '[:space:]')
+ echo "Number of addresses on node ${node}: ${num}"
+}
+
+
+# test_node is set by select_test_node_and_ips() above
+# shellcheck disable=SC2154
+count_ips_on_node "$test_node"
+
+echo "Turning on NoIPTakeover on all nodes"
+ctdb_onnode all "setvar NoIPTakeover 1"
+ctdb_onnode "$test_node" ipreallocate
+
+echo "Disable node ${test_node}"
+ctdb_onnode "$test_node" disable
+
+count_ips_on_node "$test_node"
+if [ "$num" != "0" ] ; then
+ test_fail "BAD: node 1 still hosts IP addresses"
+fi
+
+
+echo "Enable node 1 again"
+ctdb_onnode "$test_node" enable
+
+count_ips_on_node "$test_node"
+if [ "$num" != "0" ] ; then
+ test_fail "BAD: node 1 took over IP addresses"
+fi
+
+
+echo "OK: IP addresses were not taken over"
diff --git a/ctdb/tests/INTEGRATION/failover/pubips.050.missing_ip.sh b/ctdb/tests/INTEGRATION/failover/pubips.050.missing_ip.sh
new file mode 100755
index 0000000..543f9a9
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/failover/pubips.050.missing_ip.sh
@@ -0,0 +1,71 @@
+#!/usr/bin/env bash
+
+# Verify that the recovery daemon handles unhosted IPs properly
+
+# This test does not do any network level checks to make sure the IP
+# address is actually on an interface. It just consults "ctdb ip".
+
+# This is a variation of the "addip" test.
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+
+echo "Running test against node $test_node and IP $test_ip"
+
+get_test_ip_mask_and_iface
+
+echo "Deleting IP $test_ip from all nodes"
+delete_ip_from_all_nodes $test_ip
+try_command_on_node -v $test_node $CTDB ipreallocate
+wait_until_ips_are_on_node ! $test_node $test_ip
+
+try_command_on_node -v all $CTDB ip
+
+my_exit_hook ()
+{
+ if ctdb_test_on_cluster ; then
+ onnode -q all $CTDB event script enable legacy "10.interface"
+ fi
+}
+
+ctdb_test_exit_hook_add my_exit_hook
+
+# This forces us to wait until the ipreallocated associated with the
+# delips is complete.
+try_command_on_node $test_node $CTDB sync
+
+# Wait for a monitor event. Then the next steps are unlikely to occur
+# in the middle of a monitor event and will have the expected effect.
+wait_for_monitor_event $test_node
+
+if ctdb_test_on_cluster ; then
+ # Stop monitor events from bringing up the link status of an interface
+ try_command_on_node $test_node $CTDB event script disable legacy 10.interface
+fi
+
+echo "Marking interface $iface down on node $test_node"
+try_command_on_node $test_node $CTDB setifacelink $iface down
+
+echo "Adding IP $test_ip to node $test_node"
+try_command_on_node $test_node $CTDB addip $test_ip/$mask $iface
+try_command_on_node $test_node $CTDB ipreallocate
+
+echo "Wait long enough for IP verification to have taken place"
+sleep_for 15
+
+echo "Ensuring that IP ${test_ip} is not hosted on node ${test_node} when interface is down"
+if ips_are_on_node '!' $test_node $test_ip; then
+ echo "GOOD: the IP has not been hosted while the interface is down"
+else
+ echo "BAD: the IP is hosted but the interface is down"
+ exit 1
+fi
+
+echo "Marking interface $iface up on node $test_node"
+try_command_on_node $test_node $CTDB setifacelink $iface up
+wait_until_ips_are_on_node $test_node $test_ip
diff --git a/ctdb/tests/INTEGRATION/simple/README b/ctdb/tests/INTEGRATION/simple/README
new file mode 100644
index 0000000..3ac738d
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/README
@@ -0,0 +1,2 @@
+Simple integration tests. These can be run against a pool of CTDB
+daemons running on the local machine - aka "local daemons".
diff --git a/ctdb/tests/INTEGRATION/simple/basics.000.onnode.sh b/ctdb/tests/INTEGRATION/simple/basics.000.onnode.sh
new file mode 100755
index 0000000..4ca6e46
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/basics.000.onnode.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+
+# Use 'onnode' to confirm connectivity between all cluster nodes
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+echo "Checking connectivity between nodes..."
+onnode all onnode -p all hostname
diff --git a/ctdb/tests/INTEGRATION/simple/basics.001.listnodes.sh b/ctdb/tests/INTEGRATION/simple/basics.001.listnodes.sh
new file mode 100755
index 0000000..aafe27e
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/basics.001.listnodes.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb listnodes' shows the list of nodes
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+try_command_on_node -v 0 "$CTDB listnodes"
+
+num_nodes=$(wc -l <"$outfile")
+
+# Each line should look like an IP address.
+ipv4_pat='[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+'
+ipv6_pat='[[:xdigit:]]+:[[:xdigit:]:]+[[:xdigit:]]+'
+sanity_check_output \
+ 2 \
+ "^${ipv4_pat}|${ipv6_pat}\$"
+
+out_0="$out"
+
+echo "Checking other nodes..."
+
+n=1
+while [ $n -lt $num_nodes ] ; do
+ echo -n "Node ${n}: "
+ try_command_on_node $n "$CTDB listnodes"
+ if [ "$out_0" = "$out" ] ; then
+ echo "OK"
+ else
+ echo "DIFFERs from node 0:"
+ echo "$out"
+ exit 1
+ fi
+ n=$(($n + 1))
+done
diff --git a/ctdb/tests/INTEGRATION/simple/basics.002.tunables.sh b/ctdb/tests/INTEGRATION/simple/basics.002.tunables.sh
new file mode 100755
index 0000000..6f362c6
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/basics.002.tunables.sh
@@ -0,0 +1,67 @@
+#!/usr/bin/env bash
+
+# Verify the operation of "ctdb listvars", "ctdb getvar", "ctdb setvar"
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+try_command_on_node -v 0 "$CTDB listvars"
+
+sanity_check_output \
+ 5 \
+ '^[[:alpha:]][[:alnum:]]+[[:space:]]*=[[:space:]]*[[:digit:]]+$'
+
+echo "Verifying all variable values using \"ctdb getvar\"..."
+
+while read var x val ; do
+ try_command_on_node 0 "$CTDB getvar $var"
+
+ val2="${out#*= }"
+
+ if [ "$val" != "$val2" ] ; then
+ echo "MISMATCH on $var: $val != $val2"
+ exit 1
+ fi
+done <"$outfile"
+
+echo "GOOD: all tunables match"
+
+var="RecoverTimeout"
+
+try_command_on_node -v 0 $CTDB getvar $var
+
+val="${out#*= }"
+
+echo "Going to try incrementing it..."
+
+incr=$(($val + 1))
+
+try_command_on_node 0 $CTDB setvar $var $incr
+
+echo "That seemed to work, let's check the value..."
+
+try_command_on_node -v 0 $CTDB getvar $var
+
+newval="${out#*= }"
+
+if [ "$incr" != "$newval" ] ; then
+ echo "Nope, that didn't work..."
+ exit 1
+fi
+
+echo "Look's good! Now verifying with \"ctdb listvars\""
+try_command_on_node -v 0 "$CTDB listvars | grep '^$var'"
+
+check="${out#*= }"
+
+if [ "$incr" != "$check" ] ; then
+ echo "Nope, that didn't work..."
+ exit 1
+fi
+
+echo "Look's good! Putting the old value back..."
+cmd="$CTDB setvar $var $val"
+try_command_on_node 0 $cmd
diff --git a/ctdb/tests/INTEGRATION/simple/basics.003.ping.sh b/ctdb/tests/INTEGRATION/simple/basics.003.ping.sh
new file mode 100755
index 0000000..8071762
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/basics.003.ping.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+
+# Verify the operation of the 'ctdb ping' command
+#
+# 1. Run the 'ctdb ping' command on one of the nodes and verify that it
+# shows valid and expected output.
+# 2. Shutdown one of the cluster nodes, using the 'ctdb shutdown'
+# command.
+# 3. Run the 'ctdb ping -n <node>' command from another node to this
+# node.
+# 4. Verify that the command is not successful since th ctdb daemon is
+# not running on the node.
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+try_command_on_node -v 0 "$CTDB ping -n 1"
+
+sanity_check_output \
+ 1 \
+ '^response from 1 time=-?[.0-9]+ sec[[:space:]]+\([[:digit:]]+ clients\)$'
+
+ctdb_onnode -v 1 "shutdown"
+
+wait_until_node_has_status 1 disconnected 30 0
+
+try_command_on_node -v 0 "! $CTDB ping -n 1"
+
+sanity_check_output \
+ 1 \
+ "(: ctdb_control error: ('ctdb_control to disconnected node'|'node is disconnected')|Unable to get ping response from node 1|Node 1 is DISCONNECTED|ctdb_control for getpnn failed|: Can not access node. Node is not operational\.|Node 1 has status DISCONNECTED\|UNHEALTHY\|INACTIVE$)"
diff --git a/ctdb/tests/INTEGRATION/simple/basics.004.getpid.sh b/ctdb/tests/INTEGRATION/simple/basics.004.getpid.sh
new file mode 100755
index 0000000..27025df
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/basics.004.getpid.sh
@@ -0,0 +1,55 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb getpid' works as expected
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+try_command_on_node 0 "$CTDB listnodes | wc -l"
+num_nodes="$out"
+echo "There are $num_nodes nodes..."
+
+# Call getpid a few different ways and make sure the answer is always the same.
+
+try_command_on_node -v 0 "onnode -q all $CTDB getpid"
+pids_onnode="$out"
+
+cmd=""
+n=0
+while [ $n -lt $num_nodes ] ; do
+ cmd="${cmd}${cmd:+; }$CTDB getpid -n $n"
+ n=$(($n + 1))
+done
+try_command_on_node -v 0 "( $cmd )"
+pids_getpid_n="$out"
+
+if [ "$pids_onnode" = "$pids_getpid_n" ] ; then
+ echo "They're the same... cool!"
+else
+ die "Error: they differ."
+fi
+
+echo "Checking each PID for validity"
+
+n=0
+while [ $n -lt $num_nodes ] ; do
+ read pid
+ try_command_on_node $n "ls -l /proc/${pid}/exe | sed -e 's@.*/@@'"
+ echo -n "Node ${n}, PID ${pid} looks to be running \"$out\" - "
+ case "$out" in
+ ctdbd) : ;;
+ memcheck*)
+ if [ -z "$VALGRIND" ] ; then
+ die "BAD"
+ fi
+ ;;
+ *) die "BAD"
+ esac
+
+ echo "GOOD!"
+
+ n=$(($n + 1))
+done <<<"$pids_onnode"
diff --git a/ctdb/tests/INTEGRATION/simple/basics.005.process_exists.sh b/ctdb/tests/INTEGRATION/simple/basics.005.process_exists.sh
new file mode 100755
index 0000000..c6212fd
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/basics.005.process_exists.sh
@@ -0,0 +1,66 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb process-exists' shows correct information
+
+# The implementation is creative about how it gets PIDs for existing and
+# non-existing processes.
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+test_node=1
+srvid=0xAE00000012345678
+
+# Execute a ctdb client on $test_node that will last for 60 seconds.
+# It should still be there when we check.
+try_command_on_node -v $test_node \
+ "$CTDB_TEST_WRAPPER exec dummy_client -n 10 -S ${srvid} >/dev/null 2>&1 & echo \$!"
+client_pid="$out"
+
+cleanup ()
+{
+ if [ -n "$client_pid" ] ; then
+ onnode $test_node kill -9 "$client_pid"
+ fi
+}
+
+ctdb_test_exit_hook_add cleanup
+
+echo "Waiting until PID $client_pid is registered on node $test_node"
+status=0
+wait_until 30 try_command_on_node $test_node \
+ "$CTDB process-exists ${client_pid}" || status=$?
+echo "$out"
+
+if [ $status -eq 0 ] ; then
+ echo "OK"
+else
+ die "BAD"
+fi
+
+echo "Checking for PID $client_pid with SRVID $srvid on node $test_node"
+status=0
+try_command_on_node $test_node \
+ "$CTDB process-exists ${client_pid} ${srvid}" || status=$?
+echo "$out"
+
+if [ $status -eq 0 ] ; then
+ echo "OK"
+else
+ die "BAD"
+fi
+
+echo "Checking for PID $client_pid with SRVID $client_pid on node $test_node"
+try_command_on_node -v $test_node \
+ "! $CTDB process-exists ${client_pid} ${client_pid}"
+
+# Now just echo the PID of the ctdb daemon on test node.
+# This is not a ctdb client and process-exists should return error.
+try_command_on_node $test_node "ctdb getpid"
+pid="$out"
+
+echo "Checking for PID $pid on node $test_node"
+try_command_on_node -v $test_node "! $CTDB process-exists ${pid}"
diff --git a/ctdb/tests/INTEGRATION/simple/basics.010.statistics.sh b/ctdb/tests/INTEGRATION/simple/basics.010.statistics.sh
new file mode 100755
index 0000000..d97e035
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/basics.010.statistics.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb statistics' works as expected
+
+# This is pretty superficial and could do more validation.
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+pattern='^(CTDB version 1|Current time of statistics[[:space:]]*:.*|Statistics collected since[[:space:]]*:.*|Gathered statistics for [[:digit:]]+ nodes|[[:space:]]+[[:alpha:]_]+[[:space:]]+[[:digit:]]+|[[:space:]]+(node|client|timeouts|locks)|[[:space:]]+([[:alpha:]_]+_latency|max_reclock_[[:alpha:]]+)[[:space:]]+[[:digit:]-]+\.[[:digit:]]+[[:space:]]sec|[[:space:]]*(locks_latency|reclock_ctdbd|reclock_recd|call_latency|lockwait_latency|childwrite_latency)[[:space:]]+MIN/AVG/MAX[[:space:]]+[-.[:digit:]]+/[-.[:digit:]]+/[-.[:digit:]]+ sec out of [[:digit:]]+|[[:space:]]+(hop_count_buckets|lock_buckets):[[:space:][:digit:]]+)$'
+
+try_command_on_node -v 1 "$CTDB statistics"
+
+sanity_check_output 40 "$pattern"
diff --git a/ctdb/tests/INTEGRATION/simple/basics.011.statistics_reset.sh b/ctdb/tests/INTEGRATION/simple/basics.011.statistics_reset.sh
new file mode 100755
index 0000000..51f34d9
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/basics.011.statistics_reset.sh
@@ -0,0 +1,62 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb statisticsreset' works as expected
+
+# This is pretty superficial. It just checks that a few particular
+# items reduce.
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+try_command_on_node 0 "$CTDB listnodes | wc -l"
+num_nodes="$out"
+
+get_stat ()
+{
+ local label="$1"
+
+ cat "$outfile" |
+ sed -rn -e "s@^[[:space:]]+${label}[[:space:]]+([[:digit:]])@\1@p" |
+ head -1
+}
+
+check_reduced ()
+{
+ local label="$1"
+ local before="$2"
+ local after="$3"
+
+ if [ $after -lt $before ] ; then
+ echo "GOOD: ${label} reduced from ${before} to ${after}"
+ else
+ die "BAD: ${label} did not reduce from ${before} to ${after}"
+ fi
+}
+
+n=0
+while [ $n -lt $num_nodes ] ; do
+ echo "Getting initial statistics for node ${n}..."
+
+ try_command_on_node -v $n $CTDB statistics
+
+ before_req_control=$(get_stat "req_control")
+ before_reply_control=$(get_stat "reply_control")
+ before_node_packets_recv=$(get_stat "node_packets_recv")
+
+ try_command_on_node $n $CTDB statisticsreset
+
+ try_command_on_node -v $n $CTDB statistics
+
+ after_req_control=$(get_stat "req_control")
+ after_reply_control=$(get_stat "reply_control")
+ after_node_packets_recv=$(get_stat "node_packets_recv")
+
+ check_reduced "req_control" "$before_req_control" "$after_req_control"
+ check_reduced "reply_control" "$before_reply_control" "$after_reply_control"
+ check_reduced "node_packets_recv" "$before_node_packets_recv" "$after_node_packets_recv"
+
+ n=$(($n + 1))
+done
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.001.stop_leader_yield.sh b/ctdb/tests/INTEGRATION/simple/cluster.001.stop_leader_yield.sh
new file mode 100755
index 0000000..180b4ae
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/cluster.001.stop_leader_yield.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb stop' causes a node to yield the leader role
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+# This is the node used to execute commands
+select_test_node
+echo
+
+# test_node set by select_test_node()
+# shellcheck disable=SC2154
+leader_get "$test_node"
+
+# leader set by leader_get()
+# shellcheck disable=SC2154
+echo "Stopping leader ${leader}..."
+ctdb_onnode "$test_node" stop -n "$leader"
+
+wait_until_node_has_status "$leader" stopped
+
+wait_until_leader_has_changed "$test_node"
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.002.ban_leader_yield.sh b/ctdb/tests/INTEGRATION/simple/cluster.002.ban_leader_yield.sh
new file mode 100755
index 0000000..234869c
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/cluster.002.ban_leader_yield.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb ban' causes a node to yield the leader role
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+# This is the node used to execute commands
+select_test_node
+echo
+
+# test_node set by select_test_node()
+# shellcheck disable=SC2154
+leader_get "$test_node"
+
+# leader set by leader_get()
+# shellcheck disable=SC2154
+echo "Banning leader ${leader}..."
+ctdb_onnode "$test_node" ban 300 -n "$leader"
+
+wait_until_node_has_status "$leader" banned
+
+wait_until_leader_has_changed "$test_node"
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.003.capability_leader_yield.sh b/ctdb/tests/INTEGRATION/simple/cluster.003.capability_leader_yield.sh
new file mode 100755
index 0000000..94bcf27
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/cluster.003.capability_leader_yield.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb ban' causes a node to yield the leader role
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+# This is the node used to execute commands
+select_test_node
+echo
+
+# test_node set by select_test_node()
+# shellcheck disable=SC2154
+leader_get "$test_node"
+
+# leader set by leader_get()
+# shellcheck disable=SC2154
+echo "Removing leader capability from leader ${leader}..."
+ctdb_onnode "$test_node" setleaderrole off -n "$leader"
+
+wait_until_leader_has_changed "$test_node"
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.006.stop_leader_yield_no_lock.sh b/ctdb/tests/INTEGRATION/simple/cluster.006.stop_leader_yield_no_lock.sh
new file mode 100755
index 0000000..95f522d
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/cluster.006.stop_leader_yield_no_lock.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb stop' causes a node to yield the leader role
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_skip_on_cluster
+
+ctdb_test_init -n
+
+ctdb_nodes_start_custom -C "cluster lock"
+
+# This is the node used to execute commands
+select_test_node
+echo
+
+# test_node set by select_test_node()
+# shellcheck disable=SC2154
+leader_get "$test_node"
+
+# leader set by leader_get()
+# shellcheck disable=SC2154
+echo "Stopping leader ${leader}..."
+ctdb_onnode "$test_node" stop -n "$leader"
+
+wait_until_node_has_status "$leader" stopped
+
+wait_until_leader_has_changed "$test_node"
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.007.ban_leader_yield_no_lock.sh b/ctdb/tests/INTEGRATION/simple/cluster.007.ban_leader_yield_no_lock.sh
new file mode 100755
index 0000000..0ef4e2b
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/cluster.007.ban_leader_yield_no_lock.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb ban' causes a node to yield the leader role
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_skip_on_cluster
+
+ctdb_test_init -n
+
+ctdb_nodes_start_custom -C "cluster lock"
+
+# This is the node used to execute commands
+select_test_node
+echo
+
+# test_node set by select_test_node()
+# shellcheck disable=SC2154
+leader_get "$test_node"
+
+# leader set by leader_get()
+# shellcheck disable=SC2154
+echo "Banning leader ${leader}..."
+ctdb_onnode "$test_node" ban 300 -n "$leader"
+
+wait_until_node_has_status "$leader" banned
+
+wait_until_leader_has_changed "$test_node"
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.008.capability_leader_yield_no_lock.sh b/ctdb/tests/INTEGRATION/simple/cluster.008.capability_leader_yield_no_lock.sh
new file mode 100755
index 0000000..4489bc5
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/cluster.008.capability_leader_yield_no_lock.sh
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+
+# Verify that removing the the leader capability causes a node to
+# yield the leader role
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_skip_on_cluster
+
+ctdb_test_init -n
+
+ctdb_nodes_start_custom -C "cluster lock"
+
+# This is the node used to execute commands
+select_test_node
+echo
+
+# test_node set by select_test_node()
+# shellcheck disable=SC2154
+leader_get "$test_node"
+
+# leader set by leader_get()
+# shellcheck disable=SC2154
+echo "Removing leader capability from leader ${leader}..."
+ctdb_onnode "$test_node" setleaderrole off -n "$leader"
+
+wait_until_leader_has_changed "$test_node"
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.010.getrelock.sh b/ctdb/tests/INTEGRATION/simple/cluster.010.getrelock.sh
new file mode 100755
index 0000000..3a76654
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/cluster.010.getrelock.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+
+# Verify that "ctdb getreclock" gets the recovery lock correctly
+
+# Make sure the recovery lock is consistent across all nodes.
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+echo "Check that recovery lock is set the same on all nodes..."
+ctdb_onnode all getreclock
+
+# outfile is set above by ctdb_onnode
+# shellcheck disable=SC2154
+n=$(sort -u "$outfile" | wc -l | tr -d '[:space:]')
+
+case "$n" in
+0) echo "GOOD: Recovery lock is unset on all nodes" ;;
+1) echo "GOOD: All nodes have the same recovery lock setting" ;;
+*) ctdb_test_fail "BAD: Recovery lock setting differs across nodes" ;;
+esac
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.012.reclock_command.sh b/ctdb/tests/INTEGRATION/simple/cluster.012.reclock_command.sh
new file mode 100755
index 0000000..d043c7e
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/cluster.012.reclock_command.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+
+# Check that CTDB operates correctly if the recovery lock is configured
+# as a command.
+
+# This test works only with local daemons. On a real cluster it has
+# no way of updating configuration.
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_skip_on_cluster
+
+ctdb_test_init -n
+
+echo "Starting CTDB with recovery lock command configured..."
+ctdb_nodes_start_custom -R
+
+echo "Good, that seems to work!"
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh b/ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh
new file mode 100755
index 0000000..2bb058c
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh
@@ -0,0 +1,80 @@
+#!/usr/bin/env bash
+
+# Verify that the cluster recovers if the recovery lock is removed.
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_skip_on_cluster
+
+ctdb_test_init -n
+
+echo "Starting CTDB with cluster lock recheck interval set to 5s..."
+ctdb_nodes_start_custom -r 5
+
+generation_has_changed ()
+{
+ local node="$1"
+ local generation_init="$2"
+
+ # Leak this so it can be printed by test
+ generation_new=""
+
+ ctdb_onnode "$node" status
+ # shellcheck disable=SC2154
+ # $outfile set by ctdb_onnode() above
+ generation_new=$(sed -n -e 's/^Generation:\([0-9]*\)/\1/p' "$outfile")
+
+ [ "$generation_new" != "$generation_init" ]
+}
+
+select_test_node
+
+echo "Get recovery lock setting"
+# shellcheck disable=SC2154
+# $test_node set by select_test_node() above
+ctdb_onnode "$test_node" getreclock
+# shellcheck disable=SC2154
+# $out set by ctdb_onnode() above
+reclock_setting="$out"
+
+if [ -z "$reclock_setting" ] ; then
+ ctdb_test_skip "Recovery lock is not set"
+fi
+
+t="${reclock_setting% 5}"
+reclock="${t##* }"
+
+if [ ! -f "$reclock" ] ; then
+ ctdb_test_error "Recovery lock file \"${reclock}\" is missing"
+fi
+
+echo "Recovery lock setting is \"${reclock_setting}\""
+echo "Recovery lock file is \"${reclock}\""
+echo
+
+leader_get "$test_node"
+
+generation_get
+
+echo "Remove recovery lock"
+rm "$reclock"
+echo
+
+# This will mean an election has taken place and a recovery has occurred
+wait_until_generation_has_changed "$test_node"
+
+# shellcheck disable=SC2154
+# $leader set by leader_get() above
+leader_old="$leader"
+
+leader_get "$test_node"
+
+if [ "$leader" != "$leader_old" ] ; then
+ echo "OK: Leader has changed to node ${leader_new}"
+fi
+echo "GOOD: Leader is still node ${leader}"
+echo
+
+cluster_is_healthy
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.016.reclock_move_lock_dir.sh b/ctdb/tests/INTEGRATION/simple/cluster.016.reclock_move_lock_dir.sh
new file mode 100755
index 0000000..147547d
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/cluster.016.reclock_move_lock_dir.sh
@@ -0,0 +1,92 @@
+#!/usr/bin/env bash
+
+# Verify that if the directory containing the cluster lock is moved
+# then the current cluster leader no longer claims to be leader, and
+# no other node claims to be leader. Confirm that if the directory is
+# moved back then a node will become leader.
+
+# This simulates the cluster filesystem containing the cluster lock
+# being unmounted and remounted.
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_skip_on_cluster
+
+ctdb_test_init -n
+
+echo "Starting CTDB with cluster lock recheck interval set to 5s..."
+ctdb_nodes_start_custom -r 5
+
+select_test_node
+
+echo "Get cluster lock setting"
+# shellcheck disable=SC2154
+# $test_node set by select_test_node() above
+ctdb_onnode "$test_node" getreclock
+# shellcheck disable=SC2154
+# $out set by ctdb_onnode() above
+reclock_setting="$out"
+
+if [ -z "$reclock_setting" ] ; then
+ ctdb_test_skip "Cluster lock is not set"
+fi
+
+t="${reclock_setting% 5}"
+reclock="${t##* }"
+
+if [ ! -f "$reclock" ] ; then
+ ctdb_test_error "Cluster lock file \"${reclock}\" is missing"
+fi
+
+echo "Cluster lock setting is \"${reclock_setting}\""
+echo "Cluster lock file is \"${reclock}\""
+echo
+
+leader_get "$test_node"
+
+dir=$(dirname "$reclock")
+
+echo "Rename cluster lock directory"
+mv "$dir" "${dir}.$$"
+
+wait_until_leader_has_changed "$test_node"
+echo
+
+# shellcheck disable=SC2154
+# $leader set by leader_get() & wait_until_leader_has_changed(), above
+if [ "$leader" != "UNKNOWN" ]; then
+ test_fail "BAD: leader is ${leader}"
+fi
+
+echo "OK: leader is UNKNOWN"
+echo
+
+echo 'Get "leader timeout":'
+conf_tool="${CTDB_SCRIPTS_HELPER_BINDIR}/ctdb-config"
+# shellcheck disable=SC2154
+# $test_node set by select_test_node() above
+try_command_on_node "$test_node" "${conf_tool} get cluster 'leader timeout'"
+# shellcheck disable=SC2154
+# $out set by ctdb_onnode() above
+leader_timeout="$out"
+echo "Leader timeout is ${leader_timeout}s"
+echo
+
+sleep_time=$((2 * leader_timeout))
+echo "Waiting for ${sleep_time}s to confirm leader stays UNKNOWN"
+sleep_for $sleep_time
+
+leader_get "$test_node"
+if [ "$leader" = "UNKNOWN" ]; then
+ echo "OK: leader is UNKNOWN"
+ echo
+else
+ test_fail "BAD: leader is ${leader}"
+fi
+
+echo "Restore cluster lock directory"
+mv "${dir}.$$" "$dir"
+
+wait_until_leader_has_changed "$test_node"
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.020.message_ring.sh b/ctdb/tests/INTEGRATION/simple/cluster.020.message_ring.sh
new file mode 100755
index 0000000..b841f5b
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/cluster.020.message_ring.sh
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+
+# Run the message_ring test and sanity check the output
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+try_command_on_node 0 "$CTDB listnodes | wc -l"
+num_nodes="$out"
+
+echo "Running message_ring on all $num_nodes nodes."
+try_command_on_node -v -p all $CTDB_TEST_WRAPPER $VALGRIND message_ring -n $num_nodes
+
+# Get the last line of output.
+last=$(tail -n 1 "$outfile")
+
+pat='^(Waiting for cluster|Ring\[[[:digit:]]+\]: [[:digit:]]+(\.[[:digit:]]+)? msgs/sec \(\+ve=[[:digit:]]+ -ve=[[:digit:]]+\))$'
+sanity_check_output 1 "$pat"
+
+# $last should look like this:
+# Ring[1]: 10670.93 msgs/sec (+ve=53391 -ve=53373)
+stuff="${last##Ring\[*\]: }"
+mps="${stuff% msgs/sec*}"
+
+if [ ${mps%.*} -ge 10 ] ; then
+ echo "OK: $mps msgs/sec >= 10 msgs/sec"
+else
+ echo "BAD: $mps msgs/sec < 10 msgs/sec"
+ exit 1
+fi
+
+stuff="${stuff#*msgs/sec (+ve=}"
+positive="${stuff%% *}"
+
+if [ $positive -ge 10 ] ; then
+ echo "OK: +ive ($positive) >= 10"
+else
+ echo "BAD: +ive ($positive) < 10"
+ exit 1
+fi
+
+stuff="${stuff#*-ve=}"
+negative="${stuff%)}"
+
+if [ $negative -ge 10 ] ; then
+ echo "OK: -ive ($negative) >= 10"
+else
+ echo "BAD: -ive ($negative) < 10"
+ exit 1
+fi
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.021.tunnel_ring.sh b/ctdb/tests/INTEGRATION/simple/cluster.021.tunnel_ring.sh
new file mode 100755
index 0000000..f86d080
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/cluster.021.tunnel_ring.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+
+# Run tunnel_test and sanity check the output
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+try_command_on_node 0 "$CTDB listnodes | wc -l"
+num_nodes="$out"
+
+echo "Running tunnel_test on all $num_nodes nodes."
+try_command_on_node -v -p all $CTDB_TEST_WRAPPER $VALGRIND \
+ tunnel_test -t 30 -n $num_nodes
+
+# Get the last line of output.
+last=$(tail -n 1 "$outfile")
+
+pat='^(Waiting for cluster|pnn\[[[:digit:]]+\] [[:digit:]]+(\.[[:digit:]]+)? msgs/sec)$'
+sanity_check_output 1 "$pat"
+
+# $last should look like this:
+# pnn[2] count=85400
+stuff="${last##pnn\[*\] }"
+mps="${stuff% msgs/sec}"
+
+if [ ${mps%.*} -ge 10 ] ; then
+ echo "OK: $mps msgs/sec >= 10 msgs/sec"
+else
+ echo "BAD: $mps msgs/sec < 10 msgs/sec"
+ exit 1
+fi
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.030.node_stall_leader_timeout.sh b/ctdb/tests/INTEGRATION/simple/cluster.030.node_stall_leader_timeout.sh
new file mode 100755
index 0000000..7bca58c
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/cluster.030.node_stall_leader_timeout.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+
+# Verify that nothing bad occurs if a node stalls and the leader
+# broadcast timeout triggers
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node
+echo
+
+echo 'Get "leader timeout":'
+conf_tool="${CTDB_SCRIPTS_HELPER_BINDIR}/ctdb-config"
+# shellcheck disable=SC2154
+# $test_node set by select_test_node() above
+try_command_on_node "$test_node" "${conf_tool} get cluster 'leader timeout'"
+# shellcheck disable=SC2154
+# $out set by ctdb_onnode() above
+leader_timeout="$out"
+echo "Leader timeout is ${leader_timeout} seconds"
+echo
+
+# Assume leader timeout is reasonable and doesn't cause node to be
+# disconnected
+stall_time=$((leader_timeout * 2))
+
+generation_get "$test_node"
+
+echo "Get ctdbd PID on node ${test_node}..."
+ctdb_onnode -v "$test_node" "getpid"
+ctdbd_pid="$out"
+echo
+
+echo "Sending SIGSTOP to ctdbd on ${test_node}"
+try_command_on_node "$test_node" "kill -STOP ${ctdbd_pid}"
+
+sleep_for "$stall_time"
+
+echo "Sending SIGCONT to ctdbd on ${test_node}"
+try_command_on_node "$test_node" "kill -CONT ${ctdbd_pid}"
+echo
+
+wait_until_generation_has_changed "$test_node"
+
+cluster_is_healthy
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.090.unreachable.sh b/ctdb/tests/INTEGRATION/simple/cluster.090.unreachable.sh
new file mode 100755
index 0000000..1410a12
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/cluster.090.unreachable.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+
+# Verify an error occurs if a ctdb command is run against a node
+# without a ctdbd
+
+# That is, check that an error message is printed if an attempt is made
+# to execute a ctdb command against a node that is not running ctdbd.
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+test_node=1
+
+try_command_on_node 0 "$CTDB listnodes | wc -l"
+num_nodes="$out"
+echo "There are $num_nodes nodes."
+
+echo "Shutting down node ${test_node}..."
+try_command_on_node $test_node $CTDB shutdown
+
+wait_until_node_has_status $test_node disconnected 30 0
+
+wait_until_node_has_status 0 recovered 30 0
+
+pat="ctdb_control error: 'ctdb_control to disconnected node'|ctdb_control error: 'node is disconnected'|Node $test_node is DISCONNECTED|Node $test_node has status DISCONNECTED\|UNHEALTHY\|INACTIVE"
+
+for i in ip disable enable "ban 0" unban listvars ; do
+ try_command_on_node -v 0 ! $CTDB $i -n $test_node
+
+ if grep -Eq "$pat" "$outfile" ; then
+ echo "OK: \"ctdb ${i}\" fails with expected \"disconnected node\" message"
+ else
+ echo "BAD: \"ctdb ${i}\" does not fail with expected \"disconnected node\" message"
+ exit 1
+ fi
+done
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.091.version_check.sh b/ctdb/tests/INTEGRATION/simple/cluster.091.version_check.sh
new file mode 100755
index 0000000..be71750
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/cluster.091.version_check.sh
@@ -0,0 +1,55 @@
+#!/usr/bin/env bash
+
+# Check that the CTDB version consistency checking operates correctly
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_skip_on_cluster
+
+ctdb_test_init
+
+select_test_node
+
+try_command_on_node -v "$test_node" ctdb version
+version="$out"
+
+major="${version%%.*}"
+rest="${version#*.}"
+minor="${rest%%.*}"
+
+echo "Node ${test_node} has version ${major}.${minor}"
+
+# Unchanged version - this should work
+export CTDB_TEST_SAMBA_VERSION=$(( (major << 16) | minor ))
+printf '\nRestarting node %d with CTDB_TEST_SAMBA_VERSION=0x%08x\n' \
+ "$test_node" \
+ "$CTDB_TEST_SAMBA_VERSION"
+ctdb_nodes_restart "$test_node"
+wait_until_ready
+echo "GOOD: ctdbd restarted successfully on node ${test_node}"
+
+d="$CTDB_SCRIPTS_HELPER_BINDIR"
+try_command_on_node "$test_node" "${d}/ctdb-path" "pidfile" "ctdbd"
+pidfile="$out"
+
+# Changed major version - this should fail
+export CTDB_TEST_SAMBA_VERSION=$(( ((major + 1) << 16) | minor ))
+printf '\nRestarting node %d with CTDB_TEST_SAMBA_VERSION=0x%08x\n' \
+ "$test_node" \
+ "$CTDB_TEST_SAMBA_VERSION"
+ctdb_nodes_restart "$test_node"
+echo "Will use PID file ${pidfile} to check for ctdbd exit"
+wait_until 30 ! test -f "$pidfile"
+echo "GOOD: ctdbd exited early on node ${test_node}"
+
+# Changed minor version - this should fail
+export CTDB_TEST_SAMBA_VERSION=$(( (major << 16) | (minor + 1) ))
+printf '\nRestarting node %d with CTDB_TEST_SAMBA_VERSION=0x%08x\n' \
+ "$test_node" \
+ "$CTDB_TEST_SAMBA_VERSION"
+ctdb_nodes_start "$test_node"
+echo "Will use PID file ${pidfile} to check for ctdbd exit"
+wait_until 30 ! test -f "$pidfile"
+echo "GOOD: ctdbd exited early on node ${test_node}"
diff --git a/ctdb/tests/INTEGRATION/simple/debug.001.getdebug.sh b/ctdb/tests/INTEGRATION/simple/debug.001.getdebug.sh
new file mode 100755
index 0000000..2220a20
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/debug.001.getdebug.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb getdebug' works as expected
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+try_command_on_node 0 "$CTDB listnodes | wc -l"
+num_nodes="$out"
+
+try_command_on_node -v 1 "onnode -q all $CTDB getdebug"
+getdebug_onnode="$out"
+
+sanity_check_output \
+ $num_nodes \
+ '^(ERROR|WARNING|NOTICE|INFO|DEBUG)$'
+
+cmd=""
+n=0
+while [ $n -lt $num_nodes ] ; do
+ cmd="${cmd}${cmd:+; }$CTDB getdebug -n $n"
+ n=$(($n + 1))
+done
+try_command_on_node -v 1 "$cmd"
+getdebug_n="$out"
+
+if [ "$getdebug_onnode" = "$getdebug_n" ] ; then
+ echo "They're the same... cool!"
+else
+ die "Error: they differ."
+fi
+
+seps=""
+nl="
+"
+while read line ; do
+ t=$(echo "$line" | sed -r -e 's@Node [[:digit:]]+ is at debug level ([[:alpha:]]+) \((-?[[:digit:]]+)\)$@\|\1\|\2|@')
+ seps="${seps}${seps:+${nl}}|Name|Level|${nl}${t}"
+done <<<"$getdebug_onnode"
diff --git a/ctdb/tests/INTEGRATION/simple/debug.002.setdebug.sh b/ctdb/tests/INTEGRATION/simple/debug.002.setdebug.sh
new file mode 100755
index 0000000..dd5949e
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/debug.002.setdebug.sh
@@ -0,0 +1,74 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb setdebug' works as expected.
+
+# This is a little superficial. It checks that CTDB thinks the debug
+# level has been changed but doesn't actually check that logging occurs
+# at the new level.
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node
+
+get_debug ()
+{
+ # Sets: check_debug
+ local node="$1"
+
+ local out
+
+ try_command_on_node -v $node "$CTDB getdebug"
+ check_debug="$out"
+}
+
+set_and_check_debug ()
+{
+ local node="$1"
+ local level="$2"
+ local levelstr="${3:-$level}"
+
+ echo "Setting debug level on node ${node} to ${level}."
+ try_command_on_node $node "$CTDB setdebug ${level}"
+
+ local check_debug
+ get_debug $node
+
+ if [ "$levelstr" != "$check_debug" ] ; then
+ die "BAD: Debug level \"$levelstr\" != \"$check_debug\"."
+ fi
+}
+
+get_debug $test_node
+initial_debug="$check_debug"
+
+levels="ERROR WARNING NOTICE INFO DEBUG"
+
+for new_debug in $levels ; do
+ [ "$initial_debug" != "$new_debug" ] || continue
+
+ echo
+ set_and_check_debug $test_node "$new_debug"
+done
+
+while read new_debug i ; do
+ [ "$initial_debug" != "$i" ] || continue
+
+ echo
+ set_and_check_debug $test_node "$i" "$new_debug"
+done <<EOF
+ERROR 0
+WARNING 1
+WARNING 2
+NOTICE 3
+NOTICE 4
+INFO 5
+INFO 6
+INFO 7
+INFO 8
+INFO 9
+DEBUG 10
+EOF
diff --git a/ctdb/tests/INTEGRATION/simple/debug.003.dumpmemory.sh b/ctdb/tests/INTEGRATION/simple/debug.003.dumpmemory.sh
new file mode 100755
index 0000000..6205c27
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/debug.003.dumpmemory.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb dumpmemory' shows expected output
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+pat='^([[:space:]].+[[:space:]]+contains[[:space:]]+[[:digit:]]+ bytes in[[:space:]]+[[:digit:]]+ blocks \(ref [[:digit:]]+\)[[:space:]]+0x[[:xdigit:]]+|[[:space:]]+reference to: .+|full talloc report on .+ \(total[[:space:]]+[[:digit:]]+ bytes in [[:digit:]]+ blocks\))$'
+
+try_command_on_node -v 0 "$CTDB dumpmemory"
+sanity_check_output 10 "$pat"
+
+echo
+try_command_on_node -v 0 "$CTDB rddumpmemory"
+sanity_check_output 10 "$pat"
diff --git a/ctdb/tests/INTEGRATION/simple/eventscripts.001.zero_scripts.sh b/ctdb/tests/INTEGRATION/simple/eventscripts.001.zero_scripts.sh
new file mode 100755
index 0000000..4fdf61c
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/eventscripts.001.zero_scripts.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+
+# Check that CTDB operates correctly if there are 0 event scripts
+
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_skip_on_cluster
+
+ctdb_test_init -n
+
+ctdb_nodes_start_custom --no-event-scripts
+
+echo "Good, that seems to work!"
diff --git a/ctdb/tests/INTEGRATION/simple/eventscripts.090.debug_hung.sh b/ctdb/tests/INTEGRATION/simple/eventscripts.090.debug_hung.sh
new file mode 100755
index 0000000..046989c
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/eventscripts.090.debug_hung.sh
@@ -0,0 +1,76 @@
+#!/usr/bin/env bash
+
+# Verify CTDB's debugging of timed out eventscripts
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_skip_on_cluster
+
+ctdb_test_init
+
+select_test_node
+
+####################
+
+echo "Setting monitor events to time out..."
+try_command_on_node $test_node 'echo $CTDB_BASE'
+ctdb_base="$out"
+script_options="${ctdb_base}/script.options"
+ctdb_test_exit_hook_add "onnode $test_node rm -f $script_options"
+
+debug_output="${ctdb_base}/debug-hung-script.log"
+ctdb_test_exit_hook_add "onnode $test_node rm -f $debug_output"
+
+try_command_on_node -i "$test_node" tee "$script_options" <<EOF
+CTDB_RUN_TIMEOUT_MONITOR=yes
+CTDB_DEBUG_HUNG_SCRIPT_LOGFILE='$debug_output'
+CTDB_DEBUG_HUNG_SCRIPT_STACKPAT='exportfs|rpcinfo|sleep'
+CTDB_SCRIPT_VARDIR='$ctdb_base'
+EOF
+
+####################
+
+wait_for_monitor_event $test_node
+
+echo "Waiting for debugging output to appear..."
+# Use test -s because the file is created above using mktemp
+wait_until 60 test -s "$debug_output"
+
+echo
+echo "Debugging output:"
+cat "$debug_output"
+echo
+
+echo "Checking output of hung script debugging..."
+
+# Can we actually read kernel stacks
+if try_command_on_node $test_node "cat /proc/$$/stack >/dev/null 2>&1" ; then
+ stackpat='
+---- Stack trace of interesting process [0-9]*\\[sleep\\] ----
+[<[0-9a-f]*>] .*sleep+.*
+'
+else
+ stackpat=''
+fi
+
+while IFS="" read pattern ; do
+ [ -n "$pattern" ] || continue
+ if grep -q -- "^${pattern}\$" "$debug_output" ; then
+ printf 'GOOD: output contains "%s"\n' "$pattern"
+ else
+ printf 'BAD: output does not contain "%s"\n' "$pattern"
+ exit 1
+ fi
+done <<EOF
+===== Start of hung script debug for PID=".*", event="monitor" =====
+===== End of hung script debug for PID=".*", event="monitor" =====
+pstree -p -a .*:
+00\\\\.test\\\\.script,.*
+ *\`-sleep,.*
+${stackpat}
+---- ctdb scriptstatus monitor: ----
+00\\.test *TIMEDOUT.*
+ *OUTPUT: Sleeping for [0-9]* seconds\\\\.\\\\.\\\\.
+EOF
diff --git a/ctdb/tests/README b/ctdb/tests/README
new file mode 100644
index 0000000..80f3311
--- /dev/null
+++ b/ctdb/tests/README
@@ -0,0 +1,145 @@
+Introduction
+------------
+
+For a developer, the simplest way of running most tests on a local
+machine from within the git repository is:
+
+ make test
+
+This runs all UNIT and INTEGRATION tests.
+
+tests/run_tests.sh
+------------------
+
+This script can be used to manually run all tests or selected tests,
+with a variety of options. For usage, run:
+
+ tests/run_tests.sh -h
+
+If no tests are specified this runs all of the UNIT and INTEGRATION
+tests.
+
+By default:
+
+* INTEGRATION tests are run against 3 local daemons
+
+* When testing is complete, a summary showing a list is printed
+ showing the tests run and their results
+
+Tests can be selected in various ways:
+
+* tests/run_tests.sh UNIT INTEGRATION
+
+ runs all UNIT and INTEGRATION tests, and is like specifying no tests
+
+* tests/run_tests.sh UNIT/tool
+
+ runs all of the "tool" UNIT tests
+
+* tests/run_tests.sh tests/UNIT/eventscripts/00.ctdb.setup.001.sh
+ tests/run_tests.sh tests/INTEGRATION/simple/basics.001.listnodes.sh
+
+ each runs a single specified test case
+
+* tests/run_tests.sh UNIT/eventscripts UNIT/tool tests/UNIT/onnode/0001.sh
+
+ runs a combination of UNIT test suites and a single UNIT test
+
+Testing on a cluster
+--------------------
+
+INTEGRATION and CLUSTER tests can be run on a real or virtual cluster
+using tests/run_cluster_tests.sh (or "tests/run_tests.sh -c"). The
+test code needs to be available on all cluster nodes, as well as the
+test client node. The test client node needs to have a nodes file
+where the onnode(1) command will find it.
+
+If the all of the cluster nodes have the CTDB git tree in the same
+location as on the test client then no special action is necessary.
+The simplest way of doing this is to share the tree to cluster nodes
+and test clients via NFS.
+
+Alternatively, the tests can be installed on all nodes. One technique
+is to build a package containing the tests and install it on all
+nodes. CTDB developers do a lot of testing this way using the
+provided sample packaging, which produces a ctdb-tests RPM package.
+
+Finally, if the test code is installed in a different place on the
+cluster nodes, then CTDB_TEST_REMOTE_DIR can be set on the test client
+node to point to a directory that contains the test_wrap script on the
+cluster nodes.
+
+Running tests under valgrind
+----------------------------
+
+The easiest way of doing this is something like:
+
+ VALGRIND="valgrind -q" tests/run_tests ...
+
+This can be used to cause all invocations of the ctdb tool, test
+programs and, with local daemons, the ctdbd daemons themselves to run
+under valgrind.
+
+How is the ctdb tool invoked?
+-----------------------------
+
+$CTDB determines how to invoke the ctdb client. If not already set
+and if $VALGRIND is set, this is set to "$VALGRIND ctdb". If this is
+not already set but $VALGRIND is not set, this is simply set to "ctdb"
+
+Test and debugging variable options
+-----------------------------------
+
+ CTDB_TEST_MODE
+
+ Set this environment variable to enable test mode.
+
+ This enables daemons and tools to locate their socket and
+ PID file relative to CTDB_BASE.
+
+ When testing with multiple local daemons on a single
+ machine this does 3 extra things:
+
+ * Disables checks related to public IP addresses
+
+ * Speeds up the initial recovery during startup at the
+ expense of some consistency checking
+
+ * Disables real-time scheduling
+
+ CTDB_DEBUG_HUNG_SCRIPT_LOGFILE=FILENAME
+ FILENAME specifies where log messages should go when
+ debugging hung eventscripts. This is a testing option. See
+ also CTDB_DEBUG_HUNG_SCRIPT.
+
+ No default. Messages go to stdout/stderr and are logged to
+ the same place as other CTDB log messages.
+
+ CTDB_SYS_ETCDIR=DIRECTORY
+ DIRECTORY containing system configuration files. This is
+ used to provide alternate configuration when testing and
+ should not need to be changed from the default.
+
+ Default is /etc.
+
+ CTDB_RUN_TIMEOUT_MONITOR=yes|no
+ Whether CTDB should simulate timing out monitor
+ events in local daemon tests.
+
+ Default is no.
+
+ CTDB_TEST_SAMBA_VERSION=VERSION
+
+ VERSION is a 32-bit number containing the Samba major
+ version in the most significant 16 bits and the minor
+ version in the least significant 16 bits. This can be
+ used to test CTDB's checking of incompatible versions
+ without installing an incompatible version. This is
+ probably best set like this:
+
+ export CTDB_TEST_SAMBA_VERSION=$(( (4 << 16) | 12 ))
+
+ CTDB_VARDIR=DIRECTORY
+ DIRECTORY containing CTDB files that are modified at runtime.
+
+ Defaults to /usr/local/var/lib/ctdb.
diff --git a/ctdb/tests/TODO b/ctdb/tests/TODO
new file mode 100644
index 0000000..be471cc
--- /dev/null
+++ b/ctdb/tests/TODO
@@ -0,0 +1,4 @@
+* Make tests know about IPv6.
+* Tests that write to database.
+* Tests that check actual network connectivity on failover.
+* Handle interrupting tests better.
diff --git a/ctdb/tests/UNIT/cunit/cluster_mutex_001.sh b/ctdb/tests/UNIT/cunit/cluster_mutex_001.sh
new file mode 100755
index 0000000..7976143
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/cluster_mutex_001.sh
@@ -0,0 +1,66 @@
+#!/bin/sh
+
+# This tests the fcntl helper, configured via a lock file
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+t="${CTDB_SCRIPTS_HELPER_BINDIR}/ctdb_mutex_fcntl_helper"
+export CTDB_CLUSTER_MUTEX_HELPER="$t"
+
+lockfile="${CTDB_TEST_TMP_DIR}/cluster_mutex.lockfile"
+trap 'rm -f ${lockfile}' 0
+
+test_case "No contention: lock, unlock"
+ok <<EOF
+LOCK
+UNLOCK
+EOF
+unit_test cluster_mutex_test lock-unlock "$lockfile"
+
+test_case "Contention: lock, lock, unlock"
+ok <<EOF
+LOCK
+CONTENTION
+NOLOCK
+UNLOCK
+EOF
+unit_test cluster_mutex_test lock-lock-unlock "$lockfile"
+
+test_case "No contention: lock, unlock, lock, unlock"
+ok <<EOF
+LOCK
+UNLOCK
+LOCK
+UNLOCK
+EOF
+unit_test cluster_mutex_test lock-unlock-lock-unlock "$lockfile"
+
+test_case "Cancelled: unlock while lock still in progress"
+ok <<EOF
+CANCEL
+NOLOCK
+EOF
+unit_test cluster_mutex_test lock-cancel-check "$lockfile"
+
+test_case "Cancelled: unlock while lock still in progress, unlock again"
+ok <<EOF
+CANCEL
+UNLOCK
+EOF
+unit_test cluster_mutex_test lock-cancel-unlock "$lockfile"
+
+test_case "PPID doesn't go away: lock, wait, unlock"
+ok <<EOF
+LOCK
+UNLOCK
+EOF
+unit_test cluster_mutex_test lock-wait-unlock "$lockfile"
+
+test_case "PPID goes away: lock, wait, lock, unlock"
+ok <<EOF
+LOCK
+parent gone
+LOCK
+UNLOCK
+EOF
+unit_test cluster_mutex_test lock-ppid-gone-lock-unlock "$lockfile"
diff --git a/ctdb/tests/UNIT/cunit/cluster_mutex_002.sh b/ctdb/tests/UNIT/cunit/cluster_mutex_002.sh
new file mode 100755
index 0000000..c672eaf
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/cluster_mutex_002.sh
@@ -0,0 +1,132 @@
+#!/bin/sh
+
+# This tests the fcntl helper, externally configured via !
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+export CTDB_CLUSTER_MUTEX_HELPER="/bin/false"
+
+lockfile="${CTDB_TEST_TMP_DIR}/cluster_mutex.lockfile"
+trap 'rm ${lockfile}' 0
+
+t="${CTDB_SCRIPTS_HELPER_BINDIR}/ctdb_mutex_fcntl_helper"
+helper="!${t} ${lockfile}"
+
+test_case "No contention: lock, unlock"
+ok <<EOF
+LOCK
+UNLOCK
+EOF
+unit_test cluster_mutex_test lock-unlock "$helper"
+
+test_case "Contention: lock, lock, unlock"
+ok <<EOF
+LOCK
+CONTENTION
+NOLOCK
+UNLOCK
+EOF
+unit_test cluster_mutex_test lock-lock-unlock "$helper"
+
+test_case "No contention: lock, unlock, lock, unlock"
+ok <<EOF
+LOCK
+UNLOCK
+LOCK
+UNLOCK
+EOF
+unit_test cluster_mutex_test lock-unlock-lock-unlock "$helper"
+
+test_case "Cancelled: unlock while lock still in progress"
+ok <<EOF
+CANCEL
+NOLOCK
+EOF
+unit_test cluster_mutex_test lock-cancel-check "$helper"
+
+test_case "Cancelled: unlock while lock still in progress, unlock again"
+ok <<EOF
+CANCEL
+UNLOCK
+EOF
+unit_test cluster_mutex_test lock-cancel-unlock "$helper"
+
+test_case "PPID doesn't go away: lock, wait, unlock"
+ok <<EOF
+LOCK
+UNLOCK
+EOF
+unit_test cluster_mutex_test lock-wait-unlock "$helper"
+
+test_case "PPID goes away: lock, wait, lock, unlock"
+ok <<EOF
+LOCK
+parent gone
+LOCK
+UNLOCK
+EOF
+unit_test cluster_mutex_test lock-ppid-gone-lock-unlock "$helper"
+
+test_case "Recheck off, lock file removed"
+ok <<EOF
+LOCK
+LOCK
+UNLOCK
+UNLOCK
+EOF
+unit_test cluster_mutex_test lock-file-removed-no-recheck \
+ "$helper 0" "$lockfile"
+
+test_case "Recheck on, lock file not removed"
+ok <<EOF
+LOCK
+UNLOCK
+EOF
+unit_test cluster_mutex_test lock-file-wait-recheck-unlock \
+ "$helper 5" 10
+
+test_case "Recheck on, lock file removed"
+ok <<EOF
+LOCK
+ctdb_mutex_fcntl_helper: lock lost - lock file "${lockfile}" open failed (ret=2)
+LOST
+EOF
+unit_test cluster_mutex_test lock-file-removed "$helper 5" "$lockfile"
+
+test_case "Recheck on, lock file replaced"
+ok <<EOF
+LOCK
+ctdb_mutex_fcntl_helper: lock lost - lock file "${lockfile}" inode changed
+LOST
+EOF
+unit_test cluster_mutex_test lock-file-changed "$helper 10" "$lockfile"
+
+test_case "Recheck on, ping on, child isn't blocked"
+ok <<EOF
+LOCK
+UNLOCK
+EOF
+unit_test cluster_mutex_test lock-io-timeout "$helper 5 7" "$lockfile" 0 0
+
+test_case "Recheck on, ping on, child waits, child isn't blocked"
+ok <<EOF
+LOCK
+UNLOCK
+EOF
+unit_test cluster_mutex_test lock-io-timeout "$helper 5 3" "$lockfile" 7 0
+
+test_case "Recheck on, ping on, child waits, child blocks for short time"
+ok <<EOF
+LOCK
+UNLOCK
+EOF
+unit_test cluster_mutex_test lock-io-timeout "$helper 5 7" "$lockfile" 1 2
+
+
+test_case "Recheck on, ping on, child waits, child blocks causing ping timeout"
+ok <<EOF
+LOCK
+ctdb_mutex_fcntl_helper: ping timeout from lock test child
+LOST
+EOF
+unit_test cluster_mutex_test lock-io-timeout "$helper 5 3" "$lockfile" 1 7
diff --git a/ctdb/tests/UNIT/cunit/cluster_mutex_003.sh b/ctdb/tests/UNIT/cunit/cluster_mutex_003.sh
new file mode 100755
index 0000000..57319bd
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/cluster_mutex_003.sh
@@ -0,0 +1,75 @@
+#!/bin/sh
+
+# This tests a helper, externally configured via !
+
+# By default this is the fcntl helper, so this is a subset of test 002.
+# However, other helps can be tested by setting CTDB_TEST_MUTEX_HELPER.
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+export CTDB_CLUSTER_MUTEX_HELPER="/bin/false"
+
+lockfile="${CTDB_TEST_TMP_DIR}/cluster_mutex.lockfile"
+trap 'rm ${lockfile}' 0
+
+if [ -n "$CTDB_TEST_MUTEX_HELPER" ] ; then
+ helper="$CTDB_TEST_MUTEX_HELPER"
+else
+ t="${CTDB_SCRIPTS_HELPER_BINDIR}/ctdb_mutex_fcntl_helper"
+ helper="!${t} ${lockfile}"
+fi
+
+test_case "No contention: lock, unlock"
+ok <<EOF
+LOCK
+UNLOCK
+EOF
+unit_test cluster_mutex_test lock-unlock "$helper"
+
+test_case "Contention: lock, lock, unlock"
+ok <<EOF
+LOCK
+CONTENTION
+NOLOCK
+UNLOCK
+EOF
+unit_test cluster_mutex_test lock-lock-unlock "$helper"
+
+test_case "No contention: lock, unlock, lock, unlock"
+ok <<EOF
+LOCK
+UNLOCK
+LOCK
+UNLOCK
+EOF
+unit_test cluster_mutex_test lock-unlock-lock-unlock "$helper"
+
+test_case "Cancelled: unlock while lock still in progress"
+ok <<EOF
+CANCEL
+NOLOCK
+EOF
+unit_test cluster_mutex_test lock-cancel-check "$helper"
+
+test_case "Cancelled: unlock while lock still in progress, unlock again"
+ok <<EOF
+CANCEL
+UNLOCK
+EOF
+unit_test cluster_mutex_test lock-cancel-unlock "$helper"
+
+test_case "PPID doesn't go away: lock, wait, unlock"
+ok <<EOF
+LOCK
+UNLOCK
+EOF
+unit_test cluster_mutex_test lock-wait-unlock "$helper"
+
+test_case "PPID goes away: lock, wait, lock, unlock"
+ok <<EOF
+LOCK
+parent gone
+LOCK
+UNLOCK
+EOF
+unit_test cluster_mutex_test lock-ppid-gone-lock-unlock "$helper"
diff --git a/ctdb/tests/UNIT/cunit/cmdline_test_001.sh b/ctdb/tests/UNIT/cunit/cmdline_test_001.sh
new file mode 100755
index 0000000..e959000
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/cmdline_test_001.sh
@@ -0,0 +1,98 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+ok_null
+unit_test cmdline_test 1
+
+ok <<EOF
+Command 'nofunc' has no implementation function
+Command 'nohelp' has no help msg
+Command 'really really long command with lots of words' is too long (85)
+Command 'longhelp' help too long (90)
+EOF
+unit_test cmdline_test 2
+
+ok <<EOF
+Option has no long name
+Option 'debug' has unsupported type
+Option 'debug' has invalid arg
+EOF
+unit_test cmdline_test 3
+
+ok <<EOF
+Usage: test4 [<options>] <command> [<args>]
+
+Help Options:
+ -h, --help Show this help message
+
+Options:
+ -c, --count=INT Option help of length thirty.
+ -v, --value=Value help of length 23 Short description
+
+Commands:
+ A really really long command <a long arguments message> This is a really long help message
+ short command <short arg msg> short msg for short command
+Usage: test4 [-h] [-h|--help] [-c|--count=INT]
+ [-v|--value=Value help of length 23] <command> [<args>]
+
+ short command <short arg msg> short msg for short command
+EOF
+unit_test cmdline_test 4
+
+ok <<EOF
+Usage: test5 [<options>] <command> [<args>]
+
+Help Options:
+ -h, --help Show this help message
+
+Action Commands:
+ action one action one help
+ action two action two help
+Usage: test5 [<options>] <command> [<args>]
+
+Help Options:
+ -h, --help Show this help message
+
+Action Commands:
+ action one action one help
+ action two action two help
+Usage: test5 [<options>] <command> [<args>]
+
+Help Options:
+ -h, --help Show this help message
+
+Action Commands:
+ action one action one help
+ action two action two help
+EOF
+unit_test cmdline_test 5
+
+ok <<EOF
+arg1
+EOF
+unit_test cmdline_test 6
+
+ok <<EOF
+Usage: test7 [<options>] <command> [<args>]
+
+Help Options:
+ -h, --help Show this help message
+
+Basic Commands:
+ cmd1 command one help
+ cmd2 command two help
+
+Advanced Commands:
+ cmd3 command three help
+ cmd4 command four help
+
+Ultimate Commands:
+ cmd5 command five help
+ cmd6 command six help
+
+one
+three
+six
+EOF
+unit_test cmdline_test 7
diff --git a/ctdb/tests/UNIT/cunit/comm_test_001.sh b/ctdb/tests/UNIT/cunit/comm_test_001.sh
new file mode 100755
index 0000000..ac09f5c
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/comm_test_001.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+
+ok_null
+unit_test comm_test 1
+
+ok_null
+unit_test comm_test 2
+
+ok "100 2048 500 4096 1024 8192 200 16384 300 32768 400 65536 1048576 "
+unit_test comm_test 3
diff --git a/ctdb/tests/UNIT/cunit/comm_test_002.sh b/ctdb/tests/UNIT/cunit/comm_test_002.sh
new file mode 100755
index 0000000..a2fbf51
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/comm_test_002.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+socket="${CTDB_TEST_TMP_DIR}/test_sock.$$"
+num_clients=10
+
+remove_socket ()
+{
+ rm -f "$socket"
+}
+
+test_cleanup remove_socket
+
+ok_null
+
+unit_test comm_server_test "$socket" $num_clients &
+pid=$!
+
+for i in $(seq 1 $num_clients) ; do
+ unit_test comm_client_test "$socket"
+done
+
+wait $pid
diff --git a/ctdb/tests/UNIT/cunit/conf_test_001.sh b/ctdb/tests/UNIT/cunit/conf_test_001.sh
new file mode 100755
index 0000000..188964e
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/conf_test_001.sh
@@ -0,0 +1,196 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+conffile="${CTDB_TEST_TMP_DIR}/config.$$"
+
+remove_files ()
+{
+ rm -f "$conffile"
+}
+
+test_cleanup remove_files
+
+ok_null
+unit_test conf_test 1
+
+ok <<EOF
+conf: unknown section [section1]
+EOF
+unit_test conf_test 2
+
+ok <<EOF
+conf: option "key1" already exists
+EOF
+unit_test conf_test 3
+
+ok <<EOF
+conf: option "key1" already exists
+EOF
+unit_test conf_test 4
+
+ok_null
+unit_test conf_test 5
+
+ok <<EOF
+[section1]
+ key1 = foobar # temporary
+ key2 = 20 # temporary
+ key3 = false # temporary
+EOF
+unit_test conf_test 6
+
+ok <<EOF
+conf: validation for option "key1" failed
+conf: validation for option "key2" failed
+conf: validation for option "key3" failed
+EOF
+unit_test conf_test 7
+
+cat > "$conffile" <<EOF
+[section1]
+EOF
+
+required_error EINVAL <<EOF
+conf: validation for section [section1] failed
+[section1]
+ # key1 = default
+EOF
+unit_test conf_test 8 "$conffile"
+
+cat > "$conffile" <<EOF
+[section1]
+ key1 = unknown
+EOF
+
+required_error EINVAL <<EOF
+conf: validation for section [section1] failed
+[section1]
+ # key1 = default
+EOF
+unit_test conf_test 8 "$conffile"
+
+cat > "$conffile" <<EOF
+[section1]
+ key1 =
+EOF
+
+required_error EINVAL <<EOF
+conf: empty value [section1] -> "key1"
+[section1]
+ # key1 = value1
+ # key2 = 10
+ key3 = false # temporary
+EOF
+unit_test conf_test 9 "$conffile"
+
+cat > "$conffile" <<EOF
+[section1]
+ key3 =
+EOF
+
+required_error EINVAL <<EOF
+conf: empty value [section1] -> "key3"
+[section1]
+ # key1 = value1
+ # key2 = 10
+ key3 = false # temporary
+EOF
+unit_test conf_test 9 "$conffile"
+
+cat > "$conffile" <<EOF
+
+[section1]
+ key1 = value2
+ key2 = 20 # comment
+key3 = false
+EOF
+
+ok <<EOF
+[section1]
+ key1 = value2
+ key2 = 20
+ # key3 = true
+EOF
+unit_test conf_test 9 "$conffile"
+
+cat > "$conffile" <<EOF
+[section1]
+key1 = value2
+EOF
+
+ok <<EOF
+[section1]
+ key1 = value2
+ # key2 = 10
+ # key3 = true
+EOF
+unit_test conf_test 9 "$conffile"
+
+cat > "$conffile" <<EOF
+[section2]
+ foo = bar
+EOF
+
+required_error EINVAL <<EOF
+conf: unknown section [section2]
+conf: unknown section for option "foo"
+[section1]
+ # key1 = value1
+ # key2 = 10
+ key3 = false # temporary
+EOF
+unit_test conf_test 10 "$conffile"
+
+cat > "$conffile" <<EOF
+[section1]
+ key1 = value2
+ foo = bar
+ key2 = 20
+EOF
+
+required_error EINVAL <<EOF
+conf: unknown option [section1] -> "foo"
+[section1]
+ # key1 = value1
+ # key2 = 10
+ key3 = false # temporary
+EOF
+unit_test conf_test 10 "$conffile"
+
+cat > "$conffile" <<EOF
+[section1]
+ key1 = value2
+ key2 = 20
+ key3 = false
+EOF
+
+touch "${conffile}.reload"
+
+ok <<EOF
+[section1]
+ # key1 = value1
+ # key2 = 10
+ # key3 = true
+EOF
+unit_test conf_test 11 "$conffile"
+
+cat > "$conffile" <<EOF
+[section1]
+ key1 = value2
+ key2 = 20
+ key3 = false
+EOF
+
+cat > "${conffile}.reload" <<EOF
+[section1]
+ key1 = value3
+EOF
+
+ok <<EOF
+[section1]
+ key1 = value3
+ # key2 = 10
+ # key3 = true
+EOF
+unit_test conf_test 11 "$conffile"
diff --git a/ctdb/tests/UNIT/cunit/config_test_001.sh b/ctdb/tests/UNIT/cunit/config_test_001.sh
new file mode 100755
index 0000000..70bf77f
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/config_test_001.sh
@@ -0,0 +1,115 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+PATH="$PATH:$CTDB_SCRIPTS_TOOLS_HELPER_DIR"
+
+setup_ctdb_base "${CTDB_TEST_TMP_DIR}" "ctdb-etc"
+
+conffile="${CTDB_BASE}/ctdb.conf"
+
+remove_files ()
+{
+ rm -f "$conffile"
+}
+
+test_cleanup remove_files
+
+# Get the default values that are dependent on install prefix
+logging_location=$(ctdb-config get "logging" "location")
+database_volatile_dbdir=$(ctdb-config get \
+ "database" \
+ "volatile database directory")
+database_persistent_dbdir=$(ctdb-config get \
+ "database" \
+ "persistent database directory")
+database_state_dbdir=$(ctdb-config get \
+ "database" \
+ "state database directory")
+
+ok <<EOF
+[logging]
+ # location = ${logging_location}
+ # log level = NOTICE
+[cluster]
+ # transport = tcp
+ # node address =
+ # cluster lock =
+ # recovery lock =
+ # leader timeout = 5
+ # leader capability = true
+[database]
+ # volatile database directory = ${database_volatile_dbdir}
+ # persistent database directory = ${database_persistent_dbdir}
+ # state database directory = ${database_state_dbdir}
+ # lock debug script =
+ # tdb mutexes = true
+[event]
+ # debug script =
+[failover]
+ # disabled = false
+[legacy]
+ # realtime scheduling = true
+ # lmaster capability = true
+ # start as stopped = false
+ # start as disabled = false
+ # script log level = ERROR
+EOF
+unit_test ctdb-config dump
+
+required_result 2 <<EOF
+Failed to load config file $conffile
+EOF
+unit_test ctdb-config validate
+
+cat > "$conffile" <<EOF
+EOF
+
+ok_null
+unit_test ctdb-config validate
+
+cat > "$conffile" <<EOF
+[foobar]
+EOF
+
+required_result 22 <<EOF
+conf: unknown section [foobar]
+Failed to load config file $conffile
+EOF
+unit_test ctdb-config validate
+
+cat > "$conffile" <<EOF
+foobar = cat
+EOF
+
+required_result 22 <<EOF
+conf: unknown section for option "foobar"
+Failed to load config file $conffile
+EOF
+unit_test ctdb-config validate
+
+required_result 2 <<EOF
+Configuration option [section] -> "key" not defined
+EOF
+unit_test ctdb-config get section key
+
+# Confirm that an unknown key doesn't stop the rest of the file from
+# loading
+cat > "$conffile" <<EOF
+[database]
+ unknown key = 123
+
+[logging]
+ log level = debug
+EOF
+
+required_error EINVAL <<EOF
+conf: unknown option [database] -> "unknown key"
+Failed to load config file $conffile
+EOF
+unit_test ctdb-config validate
+
+ok <<EOF
+debug
+EOF
+unit_test ctdb-config get "logging" "log level"
diff --git a/ctdb/tests/UNIT/cunit/config_test_002.sh b/ctdb/tests/UNIT/cunit/config_test_002.sh
new file mode 100755
index 0000000..23b0863
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/config_test_002.sh
@@ -0,0 +1,65 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+PATH="$PATH:$CTDB_SCRIPTS_TOOLS_HELPER_DIR"
+
+setup_ctdb_base "${CTDB_TEST_TMP_DIR}" "ctdb-etc"
+
+conffile="${CTDB_BASE}/ctdb.conf"
+
+remove_files ()
+{
+ rm -f "$conffile"
+}
+
+test_cleanup remove_files
+
+cat > "$conffile" <<EOF
+EOF
+
+ok <<EOF
+NOTICE
+EOF
+unit_test ctdb-config get "logging" "log level"
+
+cat > "$conffile" <<EOF
+[logging]
+ location = syslog:magic
+EOF
+
+required_result 22 <<EOF
+conf: validation for option "location" failed
+Failed to load config file $conffile
+EOF
+unit_test ctdb-config validate
+
+cat > "$conffile" <<EOF
+[logging]
+ log level = high
+EOF
+
+required_result 22 <<EOF
+conf: validation for option "log level" failed
+Failed to load config file $conffile
+EOF
+unit_test ctdb-config validate
+
+cat > "$conffile" <<EOF
+[logging]
+ location = syslog
+ log level = notice
+EOF
+
+ok_null
+unit_test ctdb-config validate
+
+ok <<EOF
+syslog
+EOF
+unit_test ctdb-config get "logging" "location"
+
+ok <<EOF
+notice
+EOF
+unit_test ctdb-config get "logging" "log level"
diff --git a/ctdb/tests/UNIT/cunit/config_test_003.sh b/ctdb/tests/UNIT/cunit/config_test_003.sh
new file mode 100755
index 0000000..4e8d553
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/config_test_003.sh
@@ -0,0 +1,52 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+PATH="$PATH:$CTDB_SCRIPTS_TOOLS_HELPER_DIR"
+
+setup_ctdb_base "${CTDB_TEST_TMP_DIR}" "ctdb-etc"
+
+conffile="${CTDB_BASE}/ctdb.conf"
+scriptfile="${CTDB_BASE}/debug-hung-script.sh"
+
+remove_files ()
+{
+ rm -f "$conffile"
+}
+
+test_cleanup remove_files
+
+cat > "$conffile" <<EOF
+EOF
+
+ok <<EOF
+EOF
+unit_test ctdb-config get "event" "debug script"
+
+cat > "$conffile" <<EOF
+[event]
+ debug script = debug-hung-script.sh
+EOF
+
+touch "$scriptfile"
+
+required_result 22 <<EOF
+debug script $scriptfile is not executable
+conf: validation for option "debug script" failed
+Failed to load config file $conffile
+EOF
+unit_test ctdb-config validate
+
+chmod +x "$scriptfile"
+
+ok_null
+unit_test ctdb-config validate
+
+rm -f "$scriptfile"
+
+required_result 22 <<EOF
+debug script $scriptfile does not exist
+conf: validation for option "debug script" failed
+Failed to load config file $conffile
+EOF
+unit_test ctdb-config validate
diff --git a/ctdb/tests/UNIT/cunit/config_test_004.sh b/ctdb/tests/UNIT/cunit/config_test_004.sh
new file mode 100755
index 0000000..ebbc05b
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/config_test_004.sh
@@ -0,0 +1,144 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+PATH="$PATH:$CTDB_SCRIPTS_TOOLS_HELPER_DIR"
+
+setup_ctdb_base "${CTDB_TEST_TMP_DIR}" "ctdb-etc"
+
+conffile="$CTDB_BASE/ctdb.conf"
+
+remove_files ()
+{
+ rm -f "$conffile"
+}
+
+test_cleanup remove_files
+
+cat > "$conffile" <<EOF
+EOF
+
+ok <<EOF
+tcp
+EOF
+unit_test ctdb-config get "cluster" "transport"
+
+ok <<EOF
+EOF
+unit_test ctdb-config get "cluster" "node address"
+
+ok <<EOF
+EOF
+unit_test ctdb-config get "cluster" "cluster lock"
+
+ok <<EOF
+5
+EOF
+unit_test ctdb-config get "cluster" "leader timeout"
+
+ok <<EOF
+true
+EOF
+unit_test ctdb-config get "cluster" "leader capability"
+
+cat > "$conffile" <<EOF
+[cluster]
+ transport = invalid
+EOF
+
+required_result 22 <<EOF
+Invalid value for [cluster] -> transport = invalid
+conf: validation for option "transport" failed
+Failed to load config file $conffile
+EOF
+unit_test ctdb-config validate
+
+cat > "$conffile" <<EOF
+[cluster]
+ node address = 10.1.2.3
+EOF
+
+ok <<EOF
+EOF
+unit_test ctdb-config validate
+
+cat > "$conffile" <<EOF
+[cluster]
+ node address = fc00:10:1:2::123
+EOF
+
+ok <<EOF
+EOF
+unit_test ctdb-config validate
+
+cat > "$conffile" <<EOF
+[cluster]
+ node address = 10.1.2.3:123
+EOF
+
+required_result 22 <<EOF
+Invalid value for [cluster] -> node address = 10.1.2.3:123
+conf: validation for option "node address" failed
+Failed to load config file $conffile
+EOF
+unit_test ctdb-config validate
+
+cat > "$conffile" <<EOF
+[cluster]
+ cluster lock = /foo/bar
+EOF
+
+required_result 0 <<EOF
+EOF
+unit_test ctdb-config validate
+
+cat > "$conffile" <<EOF
+[cluster]
+ recovery lock = /foo/bar
+EOF
+
+required_result 0 <<EOF
+Configuration option [cluster] -> recovery lock is deprecated
+EOF
+unit_test ctdb-config -d WARNING validate
+
+cat > "$conffile" <<EOF
+[cluster]
+ leader timeout = 10
+EOF
+
+required_result 0 <<EOF
+EOF
+unit_test ctdb-config validate
+
+cat > "$conffile" <<EOF
+[cluster]
+ leader timeout = 0
+EOF
+
+required_result 22 <<EOF
+Invalid value for [cluster] -> leader timeout = 0
+conf: validation for option "leader timeout" failed
+Failed to load config file $conffile
+EOF
+unit_test ctdb-config validate
+
+cat > "$conffile" <<EOF
+[cluster]
+ leader timeout = -5
+EOF
+
+required_result 22 <<EOF
+conf: invalid value [cluster] -> "leader timeout" = "-5"
+Failed to load config file $conffile
+EOF
+unit_test ctdb-config validate
+
+cat > "$conffile" <<EOF
+[cluster]
+ leader capability = false
+EOF
+
+required_result 0 <<EOF
+EOF
+unit_test ctdb-config validate
diff --git a/ctdb/tests/UNIT/cunit/config_test_005.sh b/ctdb/tests/UNIT/cunit/config_test_005.sh
new file mode 100755
index 0000000..c16a43f
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/config_test_005.sh
@@ -0,0 +1,97 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+PATH="$PATH:$CTDB_SCRIPTS_HELPER_BINDIR"
+
+setup_ctdb_base "${CTDB_TEST_TMP_DIR}" "ctdb-etc"
+
+conffile="${CTDB_BASE}/ctdb.conf"
+scriptfile="${CTDB_BASE}/debug_locks.sh"
+dbdir="${CTDB_BASE}/dbdir"
+dbdir_volatile="${dbdir}/volatile"
+dbdir_persistent="${dbdir}/persistent"
+dbdir_state="${dbdir}/state"
+
+remove_files ()
+{
+ rm -f "$conffile" "$scriptfile"
+}
+
+test_cleanup remove_files
+
+cat > "$conffile" <<EOF
+[database]
+ volatile database directory = ${dbdir_volatile}
+ persistent database directory = ${dbdir_persistent}
+ state database directory = ${dbdir_state}
+EOF
+
+required_result 22 <<EOF
+volatile database directory "${dbdir_volatile}" does not exist
+conf: validation for option "volatile database directory" failed
+persistent database directory "${dbdir_persistent}" does not exist
+conf: validation for option "persistent database directory" failed
+state database directory "${dbdir_state}" does not exist
+conf: validation for option "state database directory" failed
+Failed to load config file $conffile
+EOF
+unit_test ctdb-config validate
+
+mkdir -p "$dbdir_volatile"
+
+required_result 22 <<EOF
+persistent database directory "${dbdir_persistent}" does not exist
+conf: validation for option "persistent database directory" failed
+state database directory "${dbdir_state}" does not exist
+conf: validation for option "state database directory" failed
+Failed to load config file $conffile
+EOF
+unit_test ctdb-config validate
+
+mkdir -p "$dbdir_persistent"
+
+required_result 22 <<EOF
+state database directory "${dbdir_state}" does not exist
+conf: validation for option "state database directory" failed
+Failed to load config file $conffile
+EOF
+unit_test ctdb-config validate
+
+mkdir -p "$dbdir_state"
+
+required_result 0 <<EOF
+EOF
+unit_test ctdb-config validate
+
+ok <<EOF
+EOF
+unit_test ctdb-config get "database" "lock debug script"
+
+cat > "$conffile" <<EOF
+[database]
+ lock debug script = $scriptfile
+EOF
+
+touch "$scriptfile"
+
+required_result 22 <<EOF
+lock debug script $scriptfile is not executable
+conf: validation for option "lock debug script" failed
+Failed to load config file $conffile
+EOF
+unit_test ctdb-config validate
+
+chmod +x "$scriptfile"
+
+ok_null
+unit_test ctdb-config validate
+
+rm -f "$scriptfile"
+
+required_result 22 <<EOF
+lock debug script $scriptfile does not exist
+conf: validation for option "lock debug script" failed
+Failed to load config file $conffile
+EOF
+unit_test ctdb-config validate
diff --git a/ctdb/tests/UNIT/cunit/config_test_006.sh b/ctdb/tests/UNIT/cunit/config_test_006.sh
new file mode 100755
index 0000000..622fb66
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/config_test_006.sh
@@ -0,0 +1,56 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+PATH="$PATH:$CTDB_SCRIPTS_HELPER_BINDIR"
+
+setup_ctdb_base "${CTDB_TEST_TMP_DIR}" "ctdb-etc"
+
+conffile="${CTDB_BASE}/ctdb.conf"
+
+remove_files ()
+{
+ rm -f "$conffile"
+}
+
+test_cleanup remove_files
+
+cat > "$conffile" <<EOF
+EOF
+
+ok <<EOF
+true
+EOF
+unit_test ctdb-config get "legacy" "realtime scheduling"
+
+ok <<EOF
+true
+EOF
+unit_test ctdb-config get "legacy" "lmaster capability"
+
+ok <<EOF
+false
+EOF
+unit_test ctdb-config get "legacy" "start as stopped"
+
+ok <<EOF
+false
+EOF
+unit_test ctdb-config get "legacy" "start as disabled"
+
+ok <<EOF
+ERROR
+EOF
+unit_test ctdb-config get "legacy" "script log level"
+
+cat > "$conffile" <<EOF
+[legacy]
+ script log level = INVALID
+EOF
+
+required_result 22 <<EOF
+Invalid value for [legacy] -> script log level = INVALID
+conf: validation for option "script log level" failed
+Failed to load config file ${conffile}
+EOF
+unit_test ctdb-config validate
diff --git a/ctdb/tests/UNIT/cunit/config_test_007.sh b/ctdb/tests/UNIT/cunit/config_test_007.sh
new file mode 100755
index 0000000..8804448
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/config_test_007.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+PATH="$PATH:$CTDB_SCRIPTS_HELPER_BINDIR"
+
+setup_ctdb_base "${CTDB_TEST_TMP_DIR}" "ctdb-etc"
+
+conffile="${CTDB_BASE}/ctdb.conf"
+
+remove_files ()
+{
+ rm -f "$conffile"
+}
+
+test_cleanup remove_files
+
+cat > "$conffile" <<EOF
+EOF
+
+ok <<EOF
+false
+EOF
+unit_test ctdb-config get "failover" "disabled"
diff --git a/ctdb/tests/UNIT/cunit/ctdb_io_test_001.sh b/ctdb/tests/UNIT/cunit/ctdb_io_test_001.sh
new file mode 100755
index 0000000..b6d3bce
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/ctdb_io_test_001.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+ok_null
+
+unit_test ctdb_io_test 1
+unit_test ctdb_io_test 2
+unit_test ctdb_io_test 3
+unit_test ctdb_io_test 4
diff --git a/ctdb/tests/UNIT/cunit/db_hash_test_001.sh b/ctdb/tests/UNIT/cunit/db_hash_test_001.sh
new file mode 100755
index 0000000..76c38fe
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/db_hash_test_001.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+ok_null
+
+unit_test db_hash_test
diff --git a/ctdb/tests/UNIT/cunit/event_protocol_test_001.sh b/ctdb/tests/UNIT/cunit/event_protocol_test_001.sh
new file mode 100755
index 0000000..8d5f932
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/event_protocol_test_001.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+ok_null
+
+unit_test event_protocol_test 1 100
diff --git a/ctdb/tests/UNIT/cunit/event_script_test_001.sh b/ctdb/tests/UNIT/cunit/event_script_test_001.sh
new file mode 100755
index 0000000..0d6a38e
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/event_script_test_001.sh
@@ -0,0 +1,127 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+scriptdir="${CTDB_TEST_TMP_DIR}/scriptdir"
+mkdir -p "${scriptdir}"
+
+scriptdir=$(cd "$scriptdir" && echo "$PWD")
+
+test_cleanup "rm -rf ${scriptdir}"
+
+# Invalid path
+invalid="${scriptdir}/notfound"
+ok <<EOF
+Script list ${invalid} failed with result=$(errcode ENOENT)
+EOF
+unit_test event_script_test list "${invalid}"
+
+# Empty directory
+ok <<EOF
+No scripts found
+EOF
+unit_test event_script_test list "$scriptdir"
+
+# Invalid script, doesn't end in ".script"
+touch "${scriptdir}/prog"
+
+ok <<EOF
+No scripts found
+EOF
+unit_test event_script_test list "$scriptdir"
+
+# Is not found because enabling "prog" actually looks for "prog.script"
+ok <<EOF
+Script enable ${scriptdir} prog completed with result=$(errcode ENOENT)
+EOF
+unit_test event_script_test enable "$scriptdir" "prog"
+
+required_result 1 <<EOF
+EOF
+unit_test test -x "${scriptdir}/prog"
+
+# Is not found because enabling "prog" actually looks for "prog.script"
+ok <<EOF
+Script disable ${scriptdir} prog completed with result=$(errcode ENOENT)
+EOF
+unit_test event_script_test disable "$scriptdir" "prog"
+
+# Valid script
+touch "$scriptdir/11.foo.script"
+
+ok <<EOF
+11.foo
+EOF
+unit_test event_script_test list "$scriptdir"
+
+ok <<EOF
+Script enable ${scriptdir} 11.foo completed with result=0
+EOF
+unit_test event_script_test enable "$scriptdir" "11.foo"
+
+ok <<EOF
+EOF
+unit_test test -x "${scriptdir}/11.foo.script"
+
+ok <<EOF
+Script disable ${scriptdir} 11.foo.script completed with result=0
+EOF
+unit_test event_script_test disable "$scriptdir" "11.foo.script"
+
+required_result 1 <<EOF
+EOF
+unit_test test -x "${scriptdir}/11.foo.script"
+
+# Multiple scripts
+touch "${scriptdir}/22.bar.script"
+
+ok <<EOF
+11.foo
+22.bar
+EOF
+unit_test event_script_test list "$scriptdir"
+
+# Symlink to existing file
+ln -s "${scriptdir}/prog" "${scriptdir}/33.link.script"
+
+ok <<EOF
+11.foo
+22.bar
+33.link
+EOF
+unit_test event_script_test list "$scriptdir"
+
+ok <<EOF
+Script enable ${scriptdir} 33.link completed with result=$(errcode EINVAL)
+EOF
+unit_test event_script_test enable "$scriptdir" "33.link"
+
+
+ok <<EOF
+Script disable ${scriptdir} 33.link.script completed with result=$(errcode EINVAL)
+EOF
+unit_test event_script_test disable "$scriptdir" "33.link.script"
+
+# Dangling symlink
+rm "${scriptdir}/33.link.script"
+ln -s "${scriptdir}/nosuchfile" "${scriptdir}/33.link.script"
+
+ok <<EOF
+11.foo
+22.bar
+33.link
+EOF
+unit_test event_script_test list "$scriptdir"
+
+ok <<EOF
+Script enable ${scriptdir} 33.link completed with result=$(errcode ENOENT)
+EOF
+unit_test event_script_test enable "$scriptdir" "33.link"
+
+
+ok <<EOF
+Script disable ${scriptdir} 33.link.script completed with result=$(errcode ENOENT)
+EOF
+unit_test event_script_test disable "$scriptdir" "33.link.script"
+
+exit 0
diff --git a/ctdb/tests/UNIT/cunit/hash_count_test_001.sh b/ctdb/tests/UNIT/cunit/hash_count_test_001.sh
new file mode 100755
index 0000000..3958706
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/hash_count_test_001.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+ok_null
+
+unit_test hash_count_test
diff --git a/ctdb/tests/UNIT/cunit/line_test_001.sh b/ctdb/tests/UNIT/cunit/line_test_001.sh
new file mode 100755
index 0000000..5676893
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/line_test_001.sh
@@ -0,0 +1,90 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+tfile="${CTDB_TEST_TMP_DIR}/line.$$"
+
+remove_files ()
+{
+ rm -f "$tfile"
+}
+
+test_cleanup remove_files
+
+> "$tfile"
+
+ok_null
+unit_test line_test "$tfile"
+
+printf "\0" > "$tfile"
+
+required_result 1 <<EOF
+
+EOF
+
+unit_test line_test "$tfile"
+
+echo -n "hello" > "$tfile"
+
+ok_null
+unit_test line_test "$tfile"
+
+cat <<EOF > "$tfile"
+hello
+world
+EOF
+
+required_result 2 << EOF
+hello
+world
+EOF
+unit_test line_test "$tfile"
+
+required_result 2 << EOF
+hello
+world
+EOF
+unit_test line_test "$tfile"
+
+cat <<EOF > "$tfile"
+This is a really long long line full of random words and hopefully it will be read properly by the line test program and identified as a single line
+EOF
+
+required_result 1 <<EOF
+This is a really long long line full of random words and hopefully it will be read properly by the line test program and identified as a single line
+EOF
+unit_test line_test "$tfile"
+
+cat <<EOF > "$tfile"
+line number one
+line number two
+line number one
+line number two
+line number one
+EOF
+
+required_result 5 <<EOF
+line number one
+line number two
+line number one
+line number two
+line number one
+EOF
+unit_test line_test "$tfile" 64
+
+cat <<EOF > "$tfile"
+this is line number one
+this is line number two
+this is line number three
+this is line number four
+this is line number five
+EOF
+
+required_result 5 <<EOF
+this is line number one
+this is line number two
+this is line number three
+this is line number four
+this is line number five
+EOF
+unit_test line_test "$tfile" 64
diff --git a/ctdb/tests/UNIT/cunit/path_tests_001.sh b/ctdb/tests/UNIT/cunit/path_tests_001.sh
new file mode 100755
index 0000000..5713fc8
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/path_tests_001.sh
@@ -0,0 +1,62 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+PATH="$PATH:$CTDB_SCRIPTS_TOOLS_HELPER_DIR"
+
+setup_ctdb_base "${CTDB_TEST_TMP_DIR}" "ctdb-etc"
+
+ok <<EOF
+$CTDB_BASE/ctdb.conf
+EOF
+unit_test ctdb-path config
+
+ok <<EOF
+$CTDB_BASE/run/foobar.pid
+EOF
+unit_test ctdb-path pidfile foobar
+
+ok <<EOF
+$CTDB_BASE/run/foobar.socket
+EOF
+unit_test ctdb-path socket foobar
+
+ok <<EOF
+$CTDB_BASE/share
+EOF
+unit_test ctdb-path datadir
+
+ok <<EOF
+$CTDB_BASE
+EOF
+unit_test ctdb-path etcdir
+
+ok <<EOF
+$CTDB_BASE/run
+EOF
+unit_test ctdb-path rundir
+
+ok <<EOF
+$CTDB_BASE/var
+EOF
+unit_test ctdb-path vardir
+
+ok <<EOF
+$CTDB_BASE/share/foobar
+EOF
+unit_test ctdb-path datadir append foobar
+
+ok <<EOF
+$CTDB_BASE/foobar
+EOF
+unit_test ctdb-path etcdir append foobar
+
+ok <<EOF
+$CTDB_BASE/run/foobar
+EOF
+unit_test ctdb-path rundir append foobar
+
+ok <<EOF
+$CTDB_BASE/var/foobar
+EOF
+unit_test ctdb-path vardir append foobar
diff --git a/ctdb/tests/UNIT/cunit/pidfile_test_001.sh b/ctdb/tests/UNIT/cunit/pidfile_test_001.sh
new file mode 100755
index 0000000..cf48403
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/pidfile_test_001.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+pidfile=$(TMPDIR="$CTDB_TEST_TMP_DIR" mktemp)
+
+ok_null
+unit_test pidfile_test $pidfile
diff --git a/ctdb/tests/UNIT/cunit/pkt_read_001.sh b/ctdb/tests/UNIT/cunit/pkt_read_001.sh
new file mode 100755
index 0000000..c951f39
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/pkt_read_001.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+ok_null
+
+unit_test pkt_read_test
diff --git a/ctdb/tests/UNIT/cunit/pkt_write_001.sh b/ctdb/tests/UNIT/cunit/pkt_write_001.sh
new file mode 100755
index 0000000..131af05
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/pkt_write_001.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+ok_null
+
+unit_test pkt_write_test
diff --git a/ctdb/tests/UNIT/cunit/porting_tests_001.sh b/ctdb/tests/UNIT/cunit/porting_tests_001.sh
new file mode 100755
index 0000000..bdb7fc5
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/porting_tests_001.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+socket="${CTDB_TEST_TMP_DIR}/test_sock.$$"
+
+remove_socket ()
+{
+ rm -f "$socket"
+}
+
+test_cleanup remove_socket
+
+ok_null
+unit_test porting_tests --socket="$socket"
diff --git a/ctdb/tests/UNIT/cunit/protocol_test_001.sh b/ctdb/tests/UNIT/cunit/protocol_test_001.sh
new file mode 100755
index 0000000..7f68c48
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/protocol_test_001.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+ok_null
+
+unit_test protocol_basic_test 1 1000
diff --git a/ctdb/tests/UNIT/cunit/protocol_test_002.sh b/ctdb/tests/UNIT/cunit/protocol_test_002.sh
new file mode 100755
index 0000000..51e0513
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/protocol_test_002.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+ok_null
+
+unit_test protocol_types_test 1 1000
diff --git a/ctdb/tests/UNIT/cunit/protocol_test_012.sh b/ctdb/tests/UNIT/cunit/protocol_test_012.sh
new file mode 100755
index 0000000..b9fd492
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/protocol_test_012.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+ok_null
+
+unit_test protocol_types_compat_test 1 1000
diff --git a/ctdb/tests/UNIT/cunit/protocol_test_101.sh b/ctdb/tests/UNIT/cunit/protocol_test_101.sh
new file mode 100755
index 0000000..f944c6b
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/protocol_test_101.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+ok_null
+
+unit_test protocol_ctdb_test 1 100
diff --git a/ctdb/tests/UNIT/cunit/protocol_test_111.sh b/ctdb/tests/UNIT/cunit/protocol_test_111.sh
new file mode 100755
index 0000000..28d190c
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/protocol_test_111.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+ok_null
+
+unit_test protocol_ctdb_compat_test 1 100
diff --git a/ctdb/tests/UNIT/cunit/protocol_test_201.sh b/ctdb/tests/UNIT/cunit/protocol_test_201.sh
new file mode 100755
index 0000000..012db90
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/protocol_test_201.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+ok_null
+unit_test protocol_util_test
diff --git a/ctdb/tests/UNIT/cunit/rb_test_001.sh b/ctdb/tests/UNIT/cunit/rb_test_001.sh
new file mode 100755
index 0000000..25d3ceb
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/rb_test_001.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+output="\
+testing trbt_insertarray32_callback
+traverse data:3
+traverse data:2
+traverse data:1
+
+deleting key4
+traverse data:3
+traverse data:2
+traverse data:1
+
+deleting key2
+traverse data:3
+traverse data:1
+
+deleting key3
+traverse data:3
+
+deleting key1
+
+run random insert and delete for 60 seconds
+
+deleting all entries"
+
+ok "$output"
+
+unit_test rb_test
diff --git a/ctdb/tests/UNIT/cunit/reqid_test_001.sh b/ctdb/tests/UNIT/cunit/reqid_test_001.sh
new file mode 100755
index 0000000..06259ba
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/reqid_test_001.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+output=$(
+for i in $(seq 0 1023) ; do
+ echo "WARNING: attempt to remove unset id $i in idtree"
+done
+)
+
+ok "$output"
+
+unit_test reqid_test
diff --git a/ctdb/tests/UNIT/cunit/run_event_001.sh b/ctdb/tests/UNIT/cunit/run_event_001.sh
new file mode 100755
index 0000000..4df3b4b
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/run_event_001.sh
@@ -0,0 +1,137 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+# Invalid path
+required_result 1 <<EOF
+run_event_init() failed, ret=2
+EOF
+unit_test run_event_test /a/b/c list
+
+scriptdir=$(TMPDIR="$CTDB_TEST_TMP_DIR" mktemp -d)
+
+# Empty directory
+ok <<EOF
+No event scripts found
+EOF
+unit_test run_event_test "$scriptdir" list
+
+cat > "$scriptdir/prog" <<EOF
+#!/bin/sh
+
+echo hello
+EOF
+
+# Invalid script, doesn't end in ".script"
+ok <<EOF
+No event scripts found
+EOF
+unit_test run_event_test "$scriptdir" list
+
+# Is not found because enabling "prog" actually looks for "prog.script"
+ok <<EOF
+Script enable prog completed with result=2
+EOF
+unit_test run_event_test "$scriptdir" enable prog
+
+required_result 1 <<EOF
+EOF
+unit_test test -x "${scriptdir}/prog"
+
+cat > "$scriptdir/11.foo.script" <<EOF
+#!/bin/sh
+
+echo hello
+EOF
+
+# Valid script
+ok <<EOF
+11.foo
+EOF
+unit_test run_event_test "$scriptdir" list
+
+ok <<EOF
+Script enable 11.foo completed with result=0
+EOF
+unit_test run_event_test "$scriptdir" enable 11.foo
+
+ok <<EOF
+EOF
+unit_test test -x "${scriptdir}/11.foo.script"
+
+ok <<EOF
+11.foo: hello
+Event monitor completed with result=0
+11.foo result=0
+EOF
+unit_test run_event_test "$scriptdir" run 10 monitor
+
+cat > "$scriptdir/22.bar.script" <<EOF
+#!/bin/sh
+
+exit 1
+EOF
+
+# Multiple scripts
+ok <<EOF
+11.foo
+22.bar
+EOF
+unit_test run_event_test "$scriptdir" list
+
+ok <<EOF
+Script enable 22.bar completed with result=0
+EOF
+unit_test run_event_test "$scriptdir" enable 22.bar
+
+ok <<EOF
+11.foo: hello
+Event monitor completed with result=1
+11.foo result=0
+22.bar result=1
+EOF
+unit_test run_event_test "$scriptdir" run 10 monitor
+
+# Disable script
+ok <<EOF
+Script disable 22.bar completed with result=0
+EOF
+unit_test run_event_test "$scriptdir" disable 22.bar
+
+required_result 1 <<EOF
+EOF
+unit_test test -x "${scriptdir}/22.bar.script"
+
+ok <<EOF
+11.foo: hello
+Event monitor completed with result=0
+11.foo result=0
+22.bar result=-$(errcode ENOEXEC)
+EOF
+unit_test run_event_test "$scriptdir" run 10 monitor
+
+cat > "$scriptdir/22.bar.script" <<EOF
+#!/bin/sh
+
+echo before sleep
+sleep 10
+echo after sleep
+EOF
+
+# Timed out script
+ok <<EOF
+Script enable 22.bar completed with result=0
+EOF
+unit_test run_event_test "$scriptdir" enable 22.bar
+
+ok <<EOF
+11.foo: hello
+22.bar: before sleep
+Event monitor completed with result=-$(errcode ETIMEDOUT)
+11.foo result=0
+22.bar result=-$(errcode ETIMEDOUT)
+EOF
+unit_test run_event_test "$scriptdir" run 5 monitor
+
+rm -rf "$scriptdir"
+exit 0
diff --git a/ctdb/tests/UNIT/cunit/run_proc_001.sh b/ctdb/tests/UNIT/cunit/run_proc_001.sh
new file mode 100755
index 0000000..3f48885
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/run_proc_001.sh
@@ -0,0 +1,159 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+# Invalid path
+ok <<EOF
+Process exited with error $(errcode ENOENT)
+EOF
+unit_test run_proc_test 0 -1 /a/b/c
+
+# Non-executable path
+prog=$(TMPDIR="$CTDB_TEST_TMP_DIR" mktemp)
+cat > "$prog" <<EOF
+echo hello
+EOF
+
+ok <<EOF
+Process exited with error $(errcode EACCES)
+EOF
+unit_test run_proc_test 0 -1 "$prog"
+
+# Executable path
+chmod +x "$prog"
+
+ok <<EOF
+Process exited with error $(errcode ENOEXEC)
+EOF
+unit_test run_proc_test 0 -1 "$prog"
+
+# Capture output
+cat > "$prog" <<EOF
+#!/bin/sh
+echo hello
+EOF
+
+ok <<EOF
+Process exited with status 0
+Output = (hello
+)
+EOF
+unit_test run_proc_test 0 -1 "$prog"
+
+# Specify timeout
+ok <<EOF
+Process exited with status 0
+Output = (hello
+)
+EOF
+unit_test run_proc_test 5 -1 "$prog"
+
+# Redirected output
+output=$(TMPDIR="$CTDB_TEST_TMP_DIR" mktemp)
+cat > "$prog" <<EOF
+#!/bin/sh
+exec >"$output" 2>&1
+echo hello
+EOF
+
+ok <<EOF
+Process exited with status 0
+EOF
+unit_test run_proc_test 0 -1 "$prog"
+
+ok <<EOF
+hello
+EOF
+unit_test cat "$output"
+
+# Exit with error
+cat > "$prog" <<EOF
+#!/bin/sh
+exit 1
+EOF
+
+ok <<EOF
+Process exited with status 1
+EOF
+unit_test run_proc_test 0 -1 "$prog"
+
+# Exit with signal
+cat > "$prog" <<EOF
+#!/bin/sh
+kill \$$
+EOF
+
+ok <<EOF
+Process exited with signal 15
+EOF
+unit_test run_proc_test 0 -1 "$prog"
+
+# Exit with timeout
+cat > "$prog" <<EOF
+#!/bin/sh
+echo "Sleeping for 5 seconds"
+sleep 5
+EOF
+
+result_filter ()
+{
+ _pid="[0-9][0-9]*"
+ sed -e "s|= ${_pid}|= PID|"
+}
+
+ok <<EOF
+Process exited with error $(errcode ETIMEDOUT)
+Child = PID
+Output = (Sleeping for 5 seconds
+)
+EOF
+unit_test run_proc_test 1 -1 "$prog"
+
+# No zombie processes
+pidfile=$(TMPDIR="$CTDB_TEST_TMP_DIR" mktemp)
+
+cat > "$prog" <<EOF
+#!/bin/sh
+echo \$$ > "$pidfile"
+sleep 10
+EOF
+
+ok <<EOF
+Process exited with error $(errcode ETIMEDOUT)
+Child = PID
+EOF
+unit_test run_proc_test 1 -1 "$prog"
+
+result_filter ()
+{
+ _header=" *PID *TTY *TIME *CMD"
+ _header2=" *PID *TT *STAT *TIME *COMMAND"
+ sed -e "s|^${_header}|HEADER|" -e "s|^${_header2}|HEADER|"
+}
+
+pid=$(cat "$pidfile")
+required_result 1 <<EOF
+HEADER
+EOF
+unit_test ps -p "$pid"
+
+# Redirect stdin
+cat > "$prog" <<EOF
+#!/bin/sh
+cat -
+EOF
+
+cat > "$output" <<EOF
+this is sample input
+EOF
+
+ok <<EOF
+Process exited with status 0
+Output = (this is sample input
+)
+EOF
+(unit_test run_proc_test 0 4 "$prog") 4<"$output"
+
+rm -f "$pidfile"
+rm -f "$output"
+rm -f "$prog"
diff --git a/ctdb/tests/UNIT/cunit/sock_daemon_test_001.sh b/ctdb/tests/UNIT/cunit/sock_daemon_test_001.sh
new file mode 100755
index 0000000..6f360f7
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/sock_daemon_test_001.sh
@@ -0,0 +1,135 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+pidfile="${CTDB_TEST_TMP_DIR}/sock_daemon_test.pid.$$"
+sockpath="${CTDB_TEST_TMP_DIR}/sock_daemon_test.sock.$$"
+
+remove_files ()
+{
+ rm -f "$pidfile"
+ rm -f "$sockpath"
+}
+
+test_cleanup remove_files
+
+result_filter ()
+{
+ _pid="[0-9][0-9]*"
+ sed -e "s|pid=${_pid}|pid=PID|" \
+ -e "s|PID ${_pid}|PID PID|"
+}
+
+
+ok <<EOF
+daemon started, pid=PID
+startup failed, ret=1
+daemon started, pid=PID
+startup failed, ret=2
+daemon started, pid=PID
+startup completed successfully
+listening on $sockpath
+Shutting down
+EOF
+unit_test sock_daemon_test "$pidfile" "$sockpath" 1
+
+ok <<EOF
+daemon started, pid=PID
+startup completed successfully
+listening on $sockpath
+Received signal $(sigcode SIGUSR1)
+reconfigure failed, ret=1
+Received signal $(sigcode SIGUSR1)
+reconfigure completed successfully
+Received signal 1
+reopen logs, ret=1
+Received signal 1
+reopen logs completed successfully
+Received signal $(sigcode SIGTERM)
+Shutting down
+daemon started, pid=PID
+startup completed successfully
+listening on $sockpath
+Received signal $(sigcode SIGUSR1)
+reconfigure failed, ret=2
+Received signal $(sigcode SIGUSR1)
+reconfigure completed successfully
+Received signal 1
+reopen logs failed, ret=2
+Received signal 1
+reopen logs completed successfully
+Received signal $(sigcode SIGTERM)
+Shutting down
+EOF
+unit_test sock_daemon_test "$pidfile" "$sockpath" 2
+
+ok <<EOF
+daemon started, pid=PID
+listening on $sockpath
+PID PID gone away, exiting
+Shutting down
+EOF
+unit_test sock_daemon_test "$pidfile" "$sockpath" 3
+
+ok <<EOF
+daemon started, pid=PID
+Shutting down
+EOF
+unit_test sock_daemon_test "$pidfile" "$sockpath" 4
+
+ok <<EOF
+daemon started, pid=PID
+listening on $sockpath
+Received signal $(sigcode SIGTERM)
+Shutting down
+EOF
+unit_test sock_daemon_test "$pidfile" "$sockpath" 5
+
+ok <<EOF
+daemon started, pid=PID
+listening on $sockpath
+Shutting down
+EOF
+unit_test sock_daemon_test "$pidfile" "$sockpath" 6
+
+ok <<EOF
+daemon started, pid=PID
+startup completed successfully
+Received signal $(sigcode SIGTERM)
+Shutting down
+EOF
+unit_test sock_daemon_test "$pidfile" "$sockpath" 7
+
+ok <<EOF
+daemon started, pid=PID
+startup completed successfully
+Received signal $(sigcode SIGTERM)
+Shutting down
+daemon started, pid=PID
+startup completed successfully
+Received signal $(sigcode SIGTERM)
+Shutting down
+EOF
+unit_test sock_daemon_test "$pidfile" "$sockpath" 8
+
+ok <<EOF
+daemon started, pid=PID
+startup completed successfully
+Received signal $(sigcode SIGTERM)
+Shutting down
+daemon started, pid=PID
+startup completed successfully
+Received signal $(sigcode SIGTERM)
+Shutting down
+EOF
+unit_test sock_daemon_test "$pidfile" "$sockpath" 9
+
+ok <<EOF
+daemon started, pid=PID
+listening on $sockpath
+daemon started, pid=PID
+listening on $sockpath
+Received signal $(sigcode SIGTERM)
+Shutting down
+EOF
+unit_test sock_daemon_test "$pidfile" "$sockpath" 10
diff --git a/ctdb/tests/UNIT/cunit/sock_io_test_001.sh b/ctdb/tests/UNIT/cunit/sock_io_test_001.sh
new file mode 100755
index 0000000..09a280c
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/sock_io_test_001.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+sockpath="${CTDB_TEST_TMP_DIR}/sock_daemon_test.sock.$$"
+
+ok_null
+
+unit_test sock_io_test "$sockpath"
diff --git a/ctdb/tests/UNIT/cunit/srvid_test_001.sh b/ctdb/tests/UNIT/cunit/srvid_test_001.sh
new file mode 100755
index 0000000..ed09535
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/srvid_test_001.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+ok_null
+
+unit_test srvid_test
diff --git a/ctdb/tests/UNIT/cunit/system_socket_test_001.sh b/ctdb/tests/UNIT/cunit/system_socket_test_001.sh
new file mode 100755
index 0000000..389cec6
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/system_socket_test_001.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+ok_null
+unit_test system_socket_test types
diff --git a/ctdb/tests/UNIT/cunit/system_socket_test_002.sh b/ctdb/tests/UNIT/cunit/system_socket_test_002.sh
new file mode 100755
index 0000000..c20bcfe
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/system_socket_test_002.sh
@@ -0,0 +1,68 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+tcp_test ()
+{
+ unit_test system_socket_test tcp "$@"
+}
+
+test_case "ACK, IPv4, seq# 0, ack# 0"
+ok <<EOF
+000000 45 00 00 08 00 00 00 00 ff 06 00 00 c0 a8 01 19
+000010 c0 a8 02 4b 01 bd d4 31 00 00 00 00 00 00 00 00
+000020 50 10 04 d2 50 5f 00 00
+000028
+EOF
+tcp_test "192.168.1.25:445" "192.168.2.75:54321" 0 0 0
+
+test_case "RST, IPv4, seq# 0, ack# 0"
+ok <<EOF
+000000 45 00 00 08 00 00 00 00 ff 06 00 00 c0 a8 01 19
+000010 c0 a8 02 4b 01 bd d4 31 00 00 00 00 00 00 00 00
+000020 50 14 04 d2 50 5b 00 00
+000028
+EOF
+tcp_test "192.168.1.25:445" "192.168.2.75:54321" 0 0 1
+
+test_case "RST, IPv4, seq# 12345, ack# 23456"
+ok <<EOF
+000000 45 00 00 08 00 00 00 00 ff 06 00 00 c0 a8 01 19
+000010 c0 a8 02 4b 01 bd d4 31 39 30 00 00 a0 5b 00 00
+000020 50 14 04 d2 76 cf 00 00
+000028
+EOF
+tcp_test "192.168.1.25:445" "192.168.2.75:54321" 12345 23456 1
+
+test_case "ACK, IPv6, seq# 0, ack# 0"
+ok <<EOF
+000000 60 00 00 00 00 14 06 40 fe 80 00 00 00 00 00 00
+000010 6a f7 28 ff fe fa d1 36 fe 80 00 00 00 00 00 00
+000020 6a f7 28 ff fe fb d1 37 01 bd d4 31 00 00 00 00
+000030 00 00 00 00 50 10 04 d2 0f c0 00 00
+00003c
+EOF
+tcp_test "[fe80::6af7:28ff:fefa:d136]:445" \
+ "[fe80::6af7:28ff:fefb:d137]:54321" 0 0 0
+
+test_case "RST, IPv6, seq# 0, ack# 0"
+ok <<EOF
+000000 60 00 00 00 00 14 06 40 fe 80 00 00 00 00 00 00
+000010 6a f7 28 ff fe fa d1 36 fe 80 00 00 00 00 00 00
+000020 6a f7 28 ff fe fb d1 37 01 bd d4 31 00 00 00 00
+000030 00 00 00 00 50 14 04 d2 0f bc 00 00
+00003c
+EOF
+tcp_test "[fe80::6af7:28ff:fefa:d136]:445" \
+ "[fe80::6af7:28ff:fefb:d137]:54321" 0 0 1
+
+test_case "RST, IPv6, seq# 12345, ack# 23456"
+ok <<EOF
+000000 60 00 00 00 00 14 06 40 fe 80 00 00 00 00 00 00
+000010 6a f7 28 ff fe fa d1 36 fe 80 00 00 00 00 00 00
+000020 6a f7 28 ff fe fb d1 37 01 bd d4 31 39 30 00 00
+000030 a0 5b 00 00 50 14 04 d2 36 30 00 00
+00003c
+EOF
+tcp_test "[fe80::6af7:28ff:fefa:d136]:445" \
+ "[fe80::6af7:28ff:fefb:d137]:54321" 12345 23456 1
diff --git a/ctdb/tests/UNIT/cunit/system_socket_test_003.sh b/ctdb/tests/UNIT/cunit/system_socket_test_003.sh
new file mode 100755
index 0000000..c94ac30
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/system_socket_test_003.sh
@@ -0,0 +1,42 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+ctdb_test_check_supported_OS "Linux"
+
+arp_test ()
+{
+ unit_test system_socket_test arp "$@"
+}
+
+test_case "IPv4 ARP send"
+ok <<EOF
+000000 ff ff ff ff ff ff 12 34 56 78 9a bc 08 06 00 01
+000010 08 00 06 04 00 01 12 34 56 78 9a bc c0 a8 01 19
+000020 00 00 00 00 00 00 c0 a8 01 19 00 00 00 00 00 00
+000030 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+000040
+EOF
+arp_test "192.168.1.25" "12:34:56:78:9a:bc"
+
+test_case "IPv4 ARP reply"
+ok <<EOF
+000000 ff ff ff ff ff ff 12 34 56 78 9a bc 08 06 00 01
+000010 08 00 06 04 00 02 12 34 56 78 9a bc c0 a8 01 19
+000020 12 34 56 78 9a bc c0 a8 01 19 00 00 00 00 00 00
+000030 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+000040
+EOF
+arp_test "192.168.1.25" "12:34:56:78:9a:bc" reply
+
+test_case "IPv6 neighbor advertisement"
+ok <<EOF
+000000 33 33 00 00 00 01 12 34 56 78 9a bc 86 dd 60 00
+000010 00 00 00 20 3a ff fe 80 00 00 00 00 00 00 6a f7
+000020 28 ff fe fa d1 36 ff 02 00 00 00 00 00 00 00 00
+000030 00 00 00 00 00 01 88 00 8d e4 20 00 00 00 fe 80
+000040 00 00 00 00 00 00 6a f7 28 ff fe fa d1 36 02 01
+000050 12 34 56 78 9a bc
+000056
+EOF
+arp_test "fe80::6af7:28ff:fefa:d136" "12:34:56:78:9a:bc"
diff --git a/ctdb/tests/UNIT/cunit/tmon_test_001.sh b/ctdb/tests/UNIT/cunit/tmon_test_001.sh
new file mode 100755
index 0000000..96f706c
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/tmon_test_001.sh
@@ -0,0 +1,195 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+epipe=$(errcode EPIPE)
+eio=$(errcode EIO)
+etimedout=$(errcode ETIMEDOUT)
+
+test_case "No pings, only child monitors, so gets EPIPE"
+ok <<EOF
+parent: async wait start 5
+child: async wait start 10
+parent: async wait end
+child: pipe closed
+EOF
+unit_test tmon_ping_test false 0 5 0 0 false 0 10 0 "$epipe"
+
+test_case "No pings, only parent monitors, so gets EPIPE"
+ok <<EOF
+parent: async wait start 10
+child: async wait start 5
+child: async wait end
+parent: pipe closed
+EOF
+unit_test tmon_ping_test false 0 10 0 "$epipe" false 0 5 0 0
+
+test_case "No pings, Child exits first, parent notices"
+ok <<EOF
+parent: async wait start 10
+child: async wait start 1
+child: async wait end
+parent: pipe closed
+EOF
+unit_test tmon_ping_test false 0 10 0 "$epipe" false 0 1 0 0
+
+test_case "No pings, parent exits first, child notices"
+ok <<EOF
+parent: async wait start 1
+child: async wait start 10
+parent: async wait end
+child: pipe closed
+EOF
+unit_test tmon_ping_test false 0 1 0 0 false 0 10 0 "$epipe"
+
+test_case "Parent pings, child doesn't expect them, EIO"
+ok <<EOF
+parent: async wait start 5
+child: async wait start 5
+child: error ($eio)
+parent: pipe closed
+EOF
+unit_test tmon_ping_test true 0 5 0 "$epipe" false 0 5 0 "$eio"
+
+test_case "Child pings, parent doesn't expect them, EIO"
+ok <<EOF
+parent: async wait start 5
+child: async wait start 5
+parent: error ($eio)
+child: pipe closed
+EOF
+unit_test tmon_ping_test false 0 5 0 "$eio" true 0 5 0 "$epipe"
+
+test_case "Both ping, child doesn't expect them, EIO"
+ok <<EOF
+parent: async wait start 5
+child: async wait start 5
+child: error ($eio)
+parent: pipe closed
+EOF
+unit_test tmon_ping_test true 3 5 0 "$epipe" true 0 5 0 "$eio"
+
+test_case "Both ping, parent doesn't expect them, EIO"
+ok <<EOF
+parent: async wait start 5
+child: async wait start 5
+parent: error ($eio)
+child: pipe closed
+EOF
+unit_test tmon_ping_test true 0 5 0 "$eio" true 3 5 0 "$epipe"
+
+test_case "Child pings, no ping timeout error, child exits first"
+ok <<EOF
+parent: async wait start 10
+child: async wait start 5
+child: async wait end
+parent: pipe closed
+EOF
+unit_test tmon_ping_test false 3 10 0 "$epipe" true 0 5 0 0
+
+test_case "Parent pings, no ping timeout error, parent exits first"
+ok <<EOF
+parent: async wait start 5
+child: async wait start 10
+parent: async wait end
+child: pipe closed
+EOF
+unit_test tmon_ping_test true 0 5 0 0 false 3 10 0 "$epipe"
+
+test_case "Both ping, no ping timeout error, parent exits first"
+ok <<EOF
+parent: async wait start 5
+child: async wait start 10
+parent: async wait end
+child: pipe closed
+EOF
+unit_test tmon_ping_test true 3 5 0 0 true 3 10 0 "$epipe"
+
+test_case "Both ping, no ping timeout error, child exits first"
+ok <<EOF
+parent: async wait start 10
+child: async wait start 5
+child: async wait end
+parent: pipe closed
+EOF
+unit_test tmon_ping_test true 3 10 0 "$epipe" true 3 5 0 0
+
+test_case "Both ping, child blocks, parent ping timeout error"
+ok <<EOF
+parent: async wait start 20
+child: blocking sleep start 7
+parent: ping timeout
+child: blocking sleep end
+EOF
+unit_test tmon_ping_test true 3 20 0 "$etimedout" true 3 0 7 0
+
+test_case "Both ping, parent blocks, child ping timeout error"
+ok <<EOF
+parent: blocking sleep start 7
+child: async wait start 20
+child: ping timeout
+parent: blocking sleep end
+EOF
+unit_test tmon_ping_test true 3 0 7 0 true 3 20 0 "$etimedout"
+
+test_case "Both ping, child waits, child blocks, parent ping timeout error"
+ok <<EOF
+parent: async wait start 20
+child: async wait start 2
+child: async wait end
+child: blocking sleep start 7
+parent: ping timeout
+child: blocking sleep end
+EOF
+unit_test tmon_ping_test true 3 20 0 "$etimedout" true 3 2 7 0
+
+test_case "Both ping, parent waits, parent blocks, child ping timeout error"
+ok <<EOF
+parent: async wait start 2
+child: async wait start 20
+parent: async wait end
+parent: blocking sleep start 7
+child: ping timeout
+parent: blocking sleep end
+EOF
+unit_test tmon_ping_test true 3 2 7 0 true 3 20 0 "$etimedout"
+
+test_case "Both ping, child blocks for less than ping timeout"
+ok <<EOF
+parent: async wait start 20
+child: blocking sleep start 3
+child: blocking sleep end
+parent: pipe closed
+EOF
+unit_test tmon_ping_test true 7 20 0 "$epipe" true 7 0 3 0
+
+test_case "Both ping, parent blocks for less than ping timeout"
+ok <<EOF
+parent: blocking sleep start 3
+child: async wait start 20
+parent: blocking sleep end
+child: pipe closed
+EOF
+unit_test tmon_ping_test true 7 0 3 0 true 7 20 3 "$epipe"
+
+test_case "Both ping, child waits, child blocks for less than ping timeout"
+ok <<EOF
+parent: async wait start 20
+child: async wait start 2
+child: async wait end
+child: blocking sleep start 3
+child: blocking sleep end
+parent: pipe closed
+EOF
+unit_test tmon_ping_test true 7 20 0 "$epipe" true 7 2 3 0
+
+test_case "Both ping, parent waits, parent blocks for less than ping timeout"
+ok <<EOF
+parent: async wait start 2
+child: async wait start 20
+parent: async wait end
+parent: blocking sleep start 3
+parent: blocking sleep end
+child: pipe closed
+EOF
+unit_test tmon_ping_test true 7 2 3 0 true 7 20 0 "$epipe"
diff --git a/ctdb/tests/UNIT/cunit/tmon_test_002.sh b/ctdb/tests/UNIT/cunit/tmon_test_002.sh
new file mode 100755
index 0000000..e4118a3
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/tmon_test_002.sh
@@ -0,0 +1,142 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+epipe=$(errcode EPIPE)
+etimedout=$(errcode ETIMEDOUT)
+edom=$(errcode EDOM)
+
+test_cases()
+{
+ test_case "no packets, sender exits, 3s timeout"
+ ok <<EOF
+WRITER OK
+READER ERR=$epipe
+EOF
+ unit_test tmon_test "" false 3 false
+
+ test_case "no packets, sender exits, 3s timeout, close ok"
+ ok <<EOF
+WRITER OK
+READER OK
+EOF
+ unit_test tmon_test "" true 3 false
+
+ test_case "Exit packet @ 1s, no timeout"
+ ok <<EOF
+READER OK
+WRITER OK
+EOF
+ unit_test tmon_test "0" false 0 false
+
+ test_case "errno 7 packet @ 1s, no timeout"
+ ok <<EOF
+READER ERR=7
+WRITER OK
+EOF
+ unit_test tmon_test "7" false 0 false
+
+ test_case "errno 110 packet @ 1s, no timeout"
+ ok <<EOF
+READER ERR=110
+WRITER OK
+EOF
+ unit_test tmon_test "#110" false 0 false
+
+ test_case "errno 0 error causes EDOM @ 1s, no timeout"
+ ok <<EOF
+WRITER ERR=$edom
+READER ERR=$epipe
+EOF
+ unit_test tmon_test "#0;" false 0 false
+
+ test_case "errno -1 error causes EDOM @ 1s, no timeout"
+ ok <<EOF
+WRITER ERR=$edom
+READER ERR=$epipe
+EOF
+ unit_test tmon_test "#-1;" false 0 false
+
+ test_case "errno 70000 error causes EDOM @ 1s, no timeout"
+ ok <<EOF
+WRITER ERR=$edom
+READER ERR=$epipe
+EOF
+ unit_test tmon_test "#70000;!0" false 0 false
+
+ test_case "Exit packet @ 3s, no timeout"
+ ok <<EOF
+READER OK
+WRITER OK
+EOF
+ unit_test tmon_test "..0" false 0 false
+
+ test_case "errno 7 packet @ 3s, no timeout"
+ ok <<EOF
+READER ERR=7
+WRITER OK
+EOF
+ unit_test tmon_test "..7" false 0 false
+
+ test_case "no packets for 5s, 3s timeout"
+ ok <<EOF
+READER ERR=$etimedout
+WRITER OK
+EOF
+ unit_test tmon_test "....." false 3 false
+
+ test_case "no packets for 5s, 3s timeout, timeout ok"
+ ok <<EOF
+READER OK
+WRITER OK
+EOF
+ unit_test tmon_test "....." false 3 true
+
+ test_case "4 pings then exit, 3s timeout"
+ ok <<EOF
+PING
+PING
+PING
+PING
+READER OK
+WRITER OK
+EOF
+ unit_test tmon_test "!!!!0" false 3 false
+
+ test_case "ASCII Hello, errno 7, 3s timeout"
+ ok <<EOF
+ASCII H
+ASCII e
+ASCII l
+ASCII l
+ASCII o
+READER ERR=7
+WRITER OK
+EOF
+ unit_test tmon_test "Hello7" false 3 false
+
+ test_case "Hi there! 3s timeout"
+ ok <<EOF
+ASCII H
+ASCII i
+CUSTOM 0x20
+ASCII t
+ASCII h
+ASCII e
+ASCII r
+ASCII e
+PING
+WRITER OK
+READER ERR=$epipe
+EOF
+ unit_test tmon_test "Hi there!" false 3 false
+}
+
+echo "PASS #1: Run test cases in default mode"
+test_cases
+
+echo
+echo "=================================================="
+
+echo "PASS #2: Run test cases in write-skip mode"
+CTDB_TEST_TMON_WRITE_SKIP_MODE=1 test_cases
diff --git a/ctdb/tests/UNIT/cunit/tunable_test_001.sh b/ctdb/tests/UNIT/cunit/tunable_test_001.sh
new file mode 100755
index 0000000..c68cd69
--- /dev/null
+++ b/ctdb/tests/UNIT/cunit/tunable_test_001.sh
@@ -0,0 +1,312 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+tfile="${CTDB_TEST_TMP_DIR}/tunable.$$"
+
+remove_files ()
+{
+ rm -f "$tfile"
+}
+test_cleanup remove_files
+
+defaults="\
+SeqnumInterval=1000
+ControlTimeout=60
+TraverseTimeout=20
+KeepaliveInterval=5
+KeepaliveLimit=5
+RecoverTimeout=30
+RecoverInterval=1
+ElectionTimeout=3
+TakeoverTimeout=9
+MonitorInterval=15
+TickleUpdateInterval=20
+EventScriptTimeout=30
+MonitorTimeoutCount=20
+RecoveryGracePeriod=120
+RecoveryBanPeriod=300
+DatabaseHashSize=100001
+DatabaseMaxDead=5
+RerecoveryTimeout=10
+EnableBans=1
+NoIPFailback=0
+VerboseMemoryNames=0
+RecdPingTimeout=60
+RecdFailCount=10
+LogLatencyMs=0
+RecLockLatencyMs=1000
+RecoveryDropAllIPs=120
+VacuumInterval=10
+VacuumMaxRunTime=120
+RepackLimit=10000
+VacuumFastPathCount=60
+MaxQueueDropMsg=1000000
+AllowUnhealthyDBRead=0
+StatHistoryInterval=1
+DeferredAttachTO=120
+AllowClientDBAttach=1
+FetchCollapse=1
+HopcountMakeSticky=50
+StickyDuration=600
+StickyPindown=200
+NoIPTakeover=0
+DBRecordCountWarn=100000
+DBRecordSizeWarn=10000000
+DBSizeWarn=100000000
+PullDBPreallocation=10485760
+LockProcessesPerDB=200
+RecBufferSizeLimit=1000000
+QueueBufferSize=1024
+IPAllocAlgorithm=2
+AllowMixedVersions=0
+"
+
+ok_tunable_defaults ()
+{
+ ok "$defaults"
+}
+
+# Set required output to a version of $defaults where values for
+# tunables specified in $tfile replace the default values
+ok_tunable ()
+{
+ # Construct a version of $defaults prepended with a lowercase
+ # version of the tunable variable, to allow case-insensitive
+ # matching. This would be easier with the GNU sed
+ # case-insensitivity flag, but that is less portable. The $0
+ # condition in awk causes empty lines to be skipped, in case
+ # there are trailing empty lines in $defaults.
+ _map=$(echo "$defaults" |
+ awk -F= '$0 { printf "%s:%s=%s\n", tolower($1), $1, $2 }')
+
+ # Replace values for tunables set in $tfile
+ while IFS='= ' read -r _var _val ; do
+ case "$_var" in
+ \#* | "") continue ;;
+ esac
+ _decval=$((_val))
+ _vl=$(echo "$_var" | tr '[:upper:]' '[:lower:]')
+ _map=$(echo "$_map" |
+ sed -e "s|^\\(${_vl}:.*=\\).*\$|\\1${_decval}|")
+ done <"$tfile"
+
+ # Set result, stripping off lowercase tunable prefix
+ ok "$(echo "$_map" | awk -F: '{ print $2 }')"
+}
+
+test_case "Unreadable file"
+: >"$tfile"
+chmod a-r "$tfile"
+uid=$(id -u)
+# root can read unreadable files
+if [ "$uid" = 0 ]; then
+ ok_tunable_defaults
+else
+ required_error EINVAL <<EOF
+ctdb_tunable_load_file: Failed to open ${tfile}
+EOF
+fi
+unit_test tunable_test "$tfile"
+rm -f "$tfile"
+
+test_case "Invalid file, contains 1 word"
+echo "Hello" >"$tfile"
+required_error EINVAL <<EOF
+ctdb_tunable_load_file: Invalid line containing "Hello"
+EOF
+unit_test tunable_test "$tfile"
+
+test_case "Invalid file, contains multiple words"
+echo "Hello world!" >"$tfile"
+required_error EINVAL <<EOF
+ctdb_tunable_load_file: Invalid line containing "Hello world!"
+EOF
+unit_test tunable_test "$tfile"
+
+test_case "Invalid file, missing value"
+echo "EnableBans=" >"$tfile"
+required_error EINVAL <<EOF
+ctdb_tunable_load_file: Invalid line containing "EnableBans"
+EOF
+unit_test tunable_test "$tfile"
+
+test_case "Invalid file, invalid value (not a number)"
+echo "EnableBans=value" >"$tfile"
+required_error EINVAL <<EOF
+ctdb_tunable_load_file: Invalid value "value" for tunable "EnableBans"
+EOF
+unit_test tunable_test "$tfile"
+
+test_case "Invalid file, missing key"
+echo "=123" >"$tfile"
+required_error EINVAL <<EOF
+ctdb_tunable_load_file: Syntax error
+EOF
+unit_test tunable_test "$tfile"
+
+test_case "Invalid file, missing key but space before ="
+cat >"$tfile" <<EOF
+ =0
+EOF
+required_error EINVAL <<EOF
+ctdb_tunable_load_file: Syntax error
+EOF
+unit_test tunable_test "$tfile"
+
+test_case "Invalid file, unknown tunable"
+echo "HelloWorld=123" >"$tfile"
+required_error EINVAL <<EOF
+ctdb_tunable_load_file: Unknown tunable "HelloWorld"
+EOF
+unit_test tunable_test "$tfile"
+
+test_case "Invalid file, obsolete tunable"
+echo "MaxRedirectCount=123" >"$tfile"
+required_error EINVAL <<EOF
+ctdb_tunable_load_file: Obsolete tunable "MaxRedirectCount"
+EOF
+unit_test tunable_test "$tfile"
+
+test_case "Invalid file, trailing non-whitespace garbage"
+echo "EnableBans=0xgg" >"$tfile"
+required_error EINVAL <<EOF
+ctdb_tunable_load_file: Invalid value "0xgg" for tunable "EnableBans"
+EOF
+unit_test tunable_test "$tfile"
+
+test_case "Invalid file, multiple errors"
+cat >"$tfile" <<EOF
+EnableBans=
+EnableBans=value
+=123
+HelloWorld=123
+MaxRedirectCount =123
+EOF
+required_error EINVAL <<EOF
+ctdb_tunable_load_file: Invalid line containing "EnableBans"
+ctdb_tunable_load_file: Invalid value "value" for tunable "EnableBans"
+ctdb_tunable_load_file: Syntax error
+EOF
+unit_test tunable_test "$tfile"
+
+test_case "Invalid file, errors followed by valid"
+cat >"$tfile" <<EOF
+HelloWorld=123
+EnableBans=value
+EnableBans=0
+EOF
+required_error EINVAL <<EOF
+ctdb_tunable_load_file: Unknown tunable "HelloWorld"
+ctdb_tunable_load_file: Invalid value "value" for tunable "EnableBans"
+EOF
+unit_test tunable_test "$tfile"
+
+test_case "OK, missing file"
+rm -f "$tfile"
+ok_tunable_defaults
+unit_test tunable_test "$tfile"
+
+test_case "OK, empty file"
+: >"$tfile"
+ok_tunable_defaults
+unit_test tunable_test "$tfile"
+
+test_case "OK, comments and blanks only"
+cat >"$tfile" <<EOF
+# This is a comment
+
+# There are also some blank lines
+
+
+EOF
+ok_tunable_defaults
+unit_test tunable_test "$tfile"
+
+test_case "OK, 1 tunable"
+cat >"$tfile" <<EOF
+EnableBans=0
+EOF
+ok_tunable
+unit_test tunable_test "$tfile"
+
+test_case "OK, 1 tunable, hex"
+cat >"$tfile" <<EOF
+EnableBans=0xf
+EOF
+ok_tunable
+unit_test tunable_test "$tfile"
+
+test_case "OK, 1 tunable, octal"
+cat >"$tfile" <<EOF
+EnableBans=072
+EOF
+ok_tunable
+unit_test tunable_test "$tfile"
+
+test_case "OK, 1 tunable, tab before ="
+cat >"$tfile" <<EOF
+EnableBans =0
+EOF
+ok_tunable
+unit_test tunable_test "$tfile"
+
+test_case "OK, 1 tunable, space after ="
+cat >"$tfile" <<EOF
+EnableBans= 0
+EOF
+ok_tunable
+unit_test tunable_test "$tfile"
+
+test_case "OK, 2 tunables, multiple spaces around ="
+cat >"$tfile" <<EOF
+EnableBans = 0
+RecoverInterval = 10
+EOF
+ok_tunable
+unit_test tunable_test "$tfile"
+
+test_case "OK, 2 tunables, whitespace everywhere"
+cat >"$tfile" <<EOF
+ EnableBans = 0
+ RecoverInterval = 10
+EOF
+ok_tunable
+unit_test tunable_test "$tfile"
+
+test_case "OK, several tunables"
+cat >"$tfile" <<EOF
+EnableBans=0
+RecoverInterval=10
+ElectionTimeout=5
+EOF
+ok_tunable
+unit_test tunable_test "$tfile"
+
+test_case "OK, several tunables, varying case"
+cat >"$tfile" <<EOF
+enablebans=0
+ReCoVerInTeRvAl=10
+ELECTIONTIMEOUT=5
+EOF
+ok_tunable
+unit_test tunable_test "$tfile"
+
+test_case "OK, miscellaneous..."
+cat >"$tfile" <<EOF
+# Leading comment
+enablebans=0
+ReCoVerInTeRvAl = 10
+
+# Intermediate comment after a blank line
+ ELECTIONTIMEOUT=25
+
+
+# Final comment among blanks lines
+
+
+
+
+EOF
+ok_tunable
+unit_test tunable_test "$tfile"
diff --git a/ctdb/tests/UNIT/eventd/README b/ctdb/tests/UNIT/eventd/README
new file mode 100644
index 0000000..742b2c5
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/README
@@ -0,0 +1 @@
+Unit tests for event daemon
diff --git a/ctdb/tests/UNIT/eventd/etc-ctdb/ctdb.conf b/ctdb/tests/UNIT/eventd/etc-ctdb/ctdb.conf
new file mode 100644
index 0000000..59bc9bb
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/etc-ctdb/ctdb.conf
@@ -0,0 +1,6 @@
+[logging]
+ location = file:
+ log level = DEBUG
+
+[event]
+ debug script = debug-script.sh
diff --git a/ctdb/tests/UNIT/eventd/etc-ctdb/debug-script.sh b/ctdb/tests/UNIT/eventd/etc-ctdb/debug-script.sh
new file mode 100755
index 0000000..d54de7e
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/etc-ctdb/debug-script.sh
@@ -0,0 +1,22 @@
+#!/bin/sh
+
+log="${CTDB_BASE}/debug_script.log"
+
+case "$2" in
+"timeout")
+ echo "args: $*" > "$log"
+ ;;
+
+"verbosetimeout")
+ (ctdb-event status random $2) > "$log"
+ ;;
+
+"verbosetimeout2")
+ exec > "$log" 2>&1
+ ctdb-event status random $2
+ ;;
+
+*)
+ ;;
+
+esac
diff --git a/ctdb/tests/UNIT/eventd/etc-ctdb/events/data/03.notalink.script b/ctdb/tests/UNIT/eventd/etc-ctdb/events/data/03.notalink.script
new file mode 100644
index 0000000..039e4d0
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/etc-ctdb/events/data/03.notalink.script
@@ -0,0 +1,2 @@
+#!/bin/sh
+exit 0
diff --git a/ctdb/tests/UNIT/eventd/etc-ctdb/events/data/README b/ctdb/tests/UNIT/eventd/etc-ctdb/events/data/README
new file mode 100644
index 0000000..f38a189
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/etc-ctdb/events/data/README
@@ -0,0 +1 @@
+initially empty event scripts directory
diff --git a/ctdb/tests/UNIT/eventd/etc-ctdb/events/empty/README b/ctdb/tests/UNIT/eventd/etc-ctdb/events/empty/README
new file mode 100644
index 0000000..a5614a9
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/etc-ctdb/events/empty/README
@@ -0,0 +1 @@
+empty event scripts directory
diff --git a/ctdb/tests/UNIT/eventd/etc-ctdb/events/multi/01.test.script b/ctdb/tests/UNIT/eventd/etc-ctdb/events/multi/01.test.script
new file mode 100755
index 0000000..d16f0de
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/etc-ctdb/events/multi/01.test.script
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+case "$1" in
+"startup") sleep 5; exit 0 ;;
+"monitor") sleep 5; exit 0 ;;
+"event1") sleep 1; exit 0 ;;
+"event2") sleep 1; exit 0 ;;
+"event3") exit 3 ;;
+"timeout1") sleep 99 ;;
+*) exit 0 ;;
+esac
diff --git a/ctdb/tests/UNIT/eventd/etc-ctdb/events/multi/02.test.script b/ctdb/tests/UNIT/eventd/etc-ctdb/events/multi/02.test.script
new file mode 100755
index 0000000..5c841aa
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/etc-ctdb/events/multi/02.test.script
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+case "$1" in
+"monitor") sleep 1; exit 0 ;;
+"event1") exit 1 ;;
+"event2") sleep 1; exit 0 ;;
+"timeout2") sleep 99 ;;
+*) exit 0 ;;
+esac
diff --git a/ctdb/tests/UNIT/eventd/etc-ctdb/events/multi/03.test.script b/ctdb/tests/UNIT/eventd/etc-ctdb/events/multi/03.test.script
new file mode 100755
index 0000000..b48b68c
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/etc-ctdb/events/multi/03.test.script
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+case "$1" in
+"monitor") sleep 1; exit 0 ;;
+"event1") sleep 1; exit 0 ;;
+"event2") exit 2 ;;
+"timeout3") sleep 99 ;;
+*) exit 0 ;;
+esac
diff --git a/ctdb/tests/UNIT/eventd/etc-ctdb/events/random/01.disabled.script b/ctdb/tests/UNIT/eventd/etc-ctdb/events/random/01.disabled.script
new file mode 100644
index 0000000..c52d3c2
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/etc-ctdb/events/random/01.disabled.script
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+exit 0
diff --git a/ctdb/tests/UNIT/eventd/etc-ctdb/events/random/02.enabled.script b/ctdb/tests/UNIT/eventd/etc-ctdb/events/random/02.enabled.script
new file mode 100755
index 0000000..ace80fd
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/etc-ctdb/events/random/02.enabled.script
@@ -0,0 +1,49 @@
+#!/bin/sh
+
+case "$1" in
+"monitor") exit 0 ;;
+"failure") exit 1 ;;
+"timeout") sleep 99 ;;
+"verbose") echo "Running event $1" ; exit 0 ;;
+"verbosemultiline")
+ cat <<EOF
+Running event $1
+There are multiple output lines
+
+^^^ including blank lines...
+
+EOF
+ exit 0
+ ;;
+"verbosemultilinenonl")
+ cat <<EOF
+Running event $1
+Multiple output lines
+
+EOF
+ printf 'No trailing newline'
+ exit 0
+ ;;
+"verbosenewlinesonly")
+ cat <<EOF
+
+
+
+EOF
+ exit 0
+ ;;
+"verbosefailure") echo "args: $*"; exit 1 ;;
+"verbosemultilinefailure")
+ cat <<EOF
+Failing event $1
+There are multiple output lines
+
+args: $*
+
+EOF
+ exit 2
+ ;;
+"verbosetimeout") echo "Sleeping for 99 seconds"; sleep 99 ;;
+"verbosetimeout2") echo "Sleeping for 99 seconds"; sleep 99 ;;
+*) exit 0 ;;
+esac
diff --git a/ctdb/tests/UNIT/eventd/etc-ctdb/events/random/README.script b/ctdb/tests/UNIT/eventd/etc-ctdb/events/random/README.script
new file mode 100644
index 0000000..9086add
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/etc-ctdb/events/random/README.script
@@ -0,0 +1 @@
+Random collection of files and event scripts
diff --git a/ctdb/tests/UNIT/eventd/etc-ctdb/events/random/a.script b/ctdb/tests/UNIT/eventd/etc-ctdb/events/random/a.script
new file mode 100755
index 0000000..2bb8d86
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/etc-ctdb/events/random/a.script
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+exit 1
diff --git a/ctdb/tests/UNIT/eventd/etc-ctdb/share/events/data/01.dummy.script b/ctdb/tests/UNIT/eventd/etc-ctdb/share/events/data/01.dummy.script
new file mode 100755
index 0000000..9c56f5b
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/etc-ctdb/share/events/data/01.dummy.script
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+case "$1" in
+"failure") exit 1 ;;
+*) exit 0 ;;
+esac
diff --git a/ctdb/tests/UNIT/eventd/etc-ctdb/share/events/data/02.disabled.script b/ctdb/tests/UNIT/eventd/etc-ctdb/share/events/data/02.disabled.script
new file mode 100755
index 0000000..9c56f5b
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/etc-ctdb/share/events/data/02.disabled.script
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+case "$1" in
+"failure") exit 1 ;;
+*) exit 0 ;;
+esac
diff --git a/ctdb/tests/UNIT/eventd/etc-ctdb/share/events/empty/README b/ctdb/tests/UNIT/eventd/etc-ctdb/share/events/empty/README
new file mode 100644
index 0000000..a5614a9
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/etc-ctdb/share/events/empty/README
@@ -0,0 +1 @@
+empty event scripts directory
diff --git a/ctdb/tests/UNIT/eventd/etc-ctdb/share/events/random/01.disabled.script b/ctdb/tests/UNIT/eventd/etc-ctdb/share/events/random/01.disabled.script
new file mode 100644
index 0000000..c52d3c2
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/etc-ctdb/share/events/random/01.disabled.script
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+exit 0
diff --git a/ctdb/tests/UNIT/eventd/etc-ctdb/share/events/random/02.enabled.script b/ctdb/tests/UNIT/eventd/etc-ctdb/share/events/random/02.enabled.script
new file mode 100755
index 0000000..f25e724
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/etc-ctdb/share/events/random/02.enabled.script
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+case "$1" in
+"monitor") exit 0 ;;
+"failure") exit 1 ;;
+"timeout") sleep 99 ;;
+"verbose") echo "Running event $1" ; exit 0 ;;
+"verbosefailure") echo "args: $*"; exit 1 ;;
+"verbosetimeout") echo "Sleeping for 99 seconds"; sleep 99 ;;
+"verbosetimeout2") echo "Sleeping for 99 seconds"; sleep 99 ;;
+*) exit 0 ;;
+esac
diff --git a/ctdb/tests/UNIT/eventd/etc-ctdb/share/events/random/README.script b/ctdb/tests/UNIT/eventd/etc-ctdb/share/events/random/README.script
new file mode 100644
index 0000000..9086add
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/etc-ctdb/share/events/random/README.script
@@ -0,0 +1 @@
+Random collection of files and event scripts
diff --git a/ctdb/tests/UNIT/eventd/etc-ctdb/share/events/random/a.script b/ctdb/tests/UNIT/eventd/etc-ctdb/share/events/random/a.script
new file mode 100755
index 0000000..2bb8d86
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/etc-ctdb/share/events/random/a.script
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+exit 1
diff --git a/ctdb/tests/UNIT/eventd/eventd_001.sh b/ctdb/tests/UNIT/eventd/eventd_001.sh
new file mode 100755
index 0000000..7d4ee9e
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/eventd_001.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "non-existent eventscript directory"
+
+setup_eventd
+
+required_error ENOENT <<EOF
+Event dir for foobar does not exist
+EOF
+simple_test status foobar monitor
+
+required_error ENOENT <<EOF
+Event dir for foobar does not exist
+EOF
+simple_test run 10 foobar monitor
+
+required_error ENOENT <<EOF
+Script 01.test does not exist in foobar
+EOF
+simple_test script enable foobar 01.test
+
+required_error ENOENT <<EOF
+Command script list finished with result=$(errcode ENOENT)
+EOF
+simple_test script list foobar
diff --git a/ctdb/tests/UNIT/eventd/eventd_002.sh b/ctdb/tests/UNIT/eventd/eventd_002.sh
new file mode 100755
index 0000000..f964adf
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/eventd_002.sh
@@ -0,0 +1,21 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "empty eventscript directory"
+
+setup_eventd
+
+required_error EINVAL <<EOF
+Event monitor has never run in empty
+EOF
+simple_test status empty monitor
+
+ok_null
+simple_test run 10 empty monitor
+
+ok_null
+simple_test status empty monitor
+
+ok_null
+simple_test script list empty
diff --git a/ctdb/tests/UNIT/eventd/eventd_003.sh b/ctdb/tests/UNIT/eventd/eventd_003.sh
new file mode 100755
index 0000000..8625057
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/eventd_003.sh
@@ -0,0 +1,45 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "eventscript directory with random files"
+
+setup_eventd
+
+required_error EINVAL <<EOF
+Script README is invalid in random
+EOF
+simple_test script enable random README
+
+required_error EINVAL <<EOF
+Script a is invalid in random
+EOF
+simple_test script disable random a
+
+required_error ENOENT <<EOF
+Script 00.foobar does not exist in random
+EOF
+simple_test script enable random 00.foobar
+
+required_error EINVAL <<EOF
+Event monitor has never run in random
+EOF
+simple_test status random monitor
+
+ok_null
+simple_test run 10 random monitor
+
+ok <<EOF
+01.disabled DISABLED
+02.enabled OK DURATION DATETIME
+EOF
+simple_test status random monitor
+
+ok <<EOF
+ 01.disabled
+ 02.enabled
+
+ 01.disabled
+* 02.enabled
+EOF
+simple_test script list random
diff --git a/ctdb/tests/UNIT/eventd/eventd_004.sh b/ctdb/tests/UNIT/eventd/eventd_004.sh
new file mode 100755
index 0000000..fe69d1d
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/eventd_004.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "disabled event script"
+
+setup_eventd
+
+ok_null
+simple_test script disable random 01.disabled
+
+ok_null
+simple_test script disable random 01.disabled
+
+ok_null
+simple_test script enable random 01.disabled
+
+ok_null
+simple_test script disable random 01.disabled
+
+required_error EINVAL <<EOF
+Event monitor has never run in random
+EOF
+simple_test status random monitor
+
+ok_null
+simple_test run 10 random monitor
+
+ok <<EOF
+01.disabled DISABLED
+02.enabled OK DURATION DATETIME
+EOF
+simple_test status random monitor
diff --git a/ctdb/tests/UNIT/eventd/eventd_005.sh b/ctdb/tests/UNIT/eventd/eventd_005.sh
new file mode 100755
index 0000000..28f4935
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/eventd_005.sh
@@ -0,0 +1,34 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "enabled event script"
+
+setup_eventd
+
+ok_null
+simple_test script enable random 02.enabled
+
+ok_null
+simple_test script enable random 02.enabled
+
+ok_null
+simple_test run 10 random monitor
+
+ok <<EOF
+01.disabled DISABLED
+02.enabled OK DURATION DATETIME
+EOF
+simple_test status random monitor
+
+ok_null
+simple_test script enable random 01.disabled
+
+ok_null
+simple_test run 10 random monitor
+
+ok <<EOF
+01.disabled OK DURATION DATETIME
+02.enabled OK DURATION DATETIME
+EOF
+simple_test status random monitor
diff --git a/ctdb/tests/UNIT/eventd/eventd_006.sh b/ctdb/tests/UNIT/eventd/eventd_006.sh
new file mode 100755
index 0000000..a7a2d41
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/eventd_006.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "failing event script"
+
+setup_eventd
+
+required_error ENOEXEC <<EOF
+Event failure in random failed
+EOF
+simple_test run 10 random failure
+
+required_result 1 <<EOF
+01.disabled DISABLED
+02.enabled ERROR DURATION DATETIME
+ OUTPUT:
+EOF
+simple_test status random failure
diff --git a/ctdb/tests/UNIT/eventd/eventd_007.sh b/ctdb/tests/UNIT/eventd/eventd_007.sh
new file mode 100755
index 0000000..e8ee403
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/eventd_007.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "timing out event script"
+
+setup_eventd
+
+required_error ETIMEDOUT <<EOF
+Event timeout in random timed out
+EOF
+simple_test run 5 random timeout
+
+required_error ETIMEDOUT <<EOF
+01.disabled DISABLED
+02.enabled TIMEDOUT DATETIME
+ OUTPUT:
+EOF
+simple_test status random timeout
diff --git a/ctdb/tests/UNIT/eventd/eventd_008.sh b/ctdb/tests/UNIT/eventd/eventd_008.sh
new file mode 100755
index 0000000..bd0fc50
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/eventd_008.sh
@@ -0,0 +1,83 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "capture event script output"
+
+setup_eventd
+
+required_result 8 <<EOF
+Event verbosefailure in random failed
+EOF
+simple_test run 10 random verbosefailure with some args
+
+required_result 1 <<EOF
+01.disabled DISABLED
+02.enabled ERROR DURATION DATETIME
+ OUTPUT: args: verbosefailure with some args
+EOF
+simple_test status random verbosefailure
+
+ok_null
+simple_test run 10 random verbose
+
+ok <<EOF
+01.disabled DISABLED
+02.enabled OK DURATION DATETIME
+ OUTPUT: Running event verbose
+EOF
+simple_test status random verbose
+
+ok_null
+simple_test run 10 random verbosemultiline
+
+ok <<EOF
+01.disabled DISABLED
+02.enabled OK DURATION DATETIME
+ OUTPUT:
+ Running event verbosemultiline
+ There are multiple output lines
+
+ ^^^ including blank lines...
+EOF
+simple_test status random verbosemultiline
+
+ok_null
+simple_test run 10 random verbosemultilinenonl
+
+ok <<EOF
+01.disabled DISABLED
+02.enabled OK DURATION DATETIME
+ OUTPUT:
+ Running event verbosemultilinenonl
+ Multiple output lines
+
+ No trailing newline
+EOF
+simple_test status random verbosemultilinenonl
+
+ok_null
+simple_test run 10 random verbosenewlinesonly
+
+ok <<EOF
+01.disabled DISABLED
+02.enabled OK DURATION DATETIME
+ OUTPUT:
+EOF
+simple_test status random verbosenewlinesonly
+
+required_result 8 <<EOF
+Event verbosemultilinefailure in random failed
+EOF
+simple_test run 10 random verbosemultilinefailure with some args
+
+required_result 2 <<EOF
+01.disabled DISABLED
+02.enabled ERROR DURATION DATETIME
+ OUTPUT:
+ Failing event verbosemultilinefailure
+ There are multiple output lines
+
+ args: verbosemultilinefailure with some args
+EOF
+simple_test status random verbosemultilinefailure
diff --git a/ctdb/tests/UNIT/eventd/eventd_009.sh b/ctdb/tests/UNIT/eventd/eventd_009.sh
new file mode 100755
index 0000000..39e5cd6
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/eventd_009.sh
@@ -0,0 +1,155 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "eventscript directory with links"
+
+setup_eventd
+
+ok <<EOF
+ 01.dummy
+ 02.disabled
+
+ 03.notalink
+EOF
+simple_test script list data
+
+# Should be a no-op
+ok_null
+simple_test script disable data 03.notalink
+
+ok_null
+simple_test run 10 data failure
+
+ok_null
+simple_test script enable data 01.dummy
+
+required_result 8 <<EOF
+Event failure in data failed
+EOF
+simple_test run 10 data failure
+
+ok <<EOF
+* 01.dummy
+ 02.disabled
+
+ 03.notalink
+EOF
+simple_test script list data
+
+required_result 1 <<EOF
+01.dummy ERROR DURATION DATETIME
+ OUTPUT:
+EOF
+simple_test status data failure
+
+ok_null
+simple_test run 10 data monitor
+
+ok <<EOF
+01.dummy OK DURATION DATETIME
+03.notalink DISABLED
+EOF
+simple_test status data monitor
+
+ok_null
+simple_test script enable data 03.notalink
+
+ok <<EOF
+* 01.dummy
+ 02.disabled
+
+* 03.notalink
+EOF
+simple_test script list data
+
+# Local/3rd-party link, not enabled
+touch "${CTDB_BASE}/foo"
+chmod 644 "${CTDB_BASE}/foo"
+abs_base=$(cd "$CTDB_BASE" && echo "$PWD")
+ln -s "${abs_base}/foo" "${CTDB_BASE}/events/data/04.locallink.script"
+
+ok <<EOF
+* 01.dummy
+ 02.disabled
+
+* 03.notalink
+ 04.locallink
+EOF
+simple_test script list data
+
+ok_null
+simple_test script enable data 04.locallink
+
+required_result 1 ""
+unit_test test -x "${CTDB_BASE}/foo"
+
+ok_null
+simple_test script disable data 04.locallink
+
+ok_null
+unit_test test -f "${CTDB_BASE}/foo"
+
+ok <<EOF
+* 01.dummy
+ 02.disabled
+
+* 03.notalink
+EOF
+simple_test script list data
+
+# Local/3rd-party link, enabled
+chmod +x "${CTDB_BASE}/foo"
+ln -s "${abs_base}/foo" "${CTDB_BASE}/events/data/04.locallink.script"
+
+ok <<EOF
+* 01.dummy
+ 02.disabled
+
+* 03.notalink
+* 04.locallink
+EOF
+simple_test script list data
+
+ok_null
+simple_test script disable data 01.dummy
+
+ok_null
+simple_test script disable data 04.locallink
+
+ok_null
+unit_test test -f "${CTDB_BASE}/foo"
+
+ok <<EOF
+ 01.dummy
+ 02.disabled
+
+* 03.notalink
+EOF
+simple_test script list data
+
+ok_null
+simple_test run 10 data failure
+
+# Local/3rd-party link, dangling
+ln -s "${CTDB_BASE}/doesnotexist" "${CTDB_BASE}/events/data/04.locallink.script"
+
+ok <<EOF
+ 01.dummy
+ 02.disabled
+
+* 03.notalink
+ 04.locallink
+EOF
+simple_test script list data
+
+ok_null
+simple_test script disable data 04.locallink
+
+ok <<EOF
+ 01.dummy
+ 02.disabled
+
+* 03.notalink
+EOF
+simple_test script list data
diff --git a/ctdb/tests/UNIT/eventd/eventd_011.sh b/ctdb/tests/UNIT/eventd/eventd_011.sh
new file mode 100755
index 0000000..ce75613
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/eventd_011.sh
@@ -0,0 +1,40 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "multiple events"
+
+setup_eventd
+
+ok_null
+simple_test run 10 random monitor
+
+ok <<EOF
+01.disabled DISABLED
+02.enabled OK DURATION DATETIME
+EOF
+simple_test status random monitor
+
+required_error ENOEXEC <<EOF
+Event failure in random failed
+EOF
+simple_test run 10 random failure
+
+required_result 1 <<EOF
+01.disabled DISABLED
+02.enabled ERROR DURATION DATETIME
+ OUTPUT:
+EOF
+simple_test status random failure
+
+required_error ENOEXEC <<EOF
+Event verbosefailure in random failed
+EOF
+simple_test run 10 random verbosefailure
+
+required_result 1 <<EOF
+01.disabled DISABLED
+02.enabled ERROR DURATION DATETIME
+ OUTPUT: args: verbosefailure
+EOF
+simple_test status random verbosefailure
diff --git a/ctdb/tests/UNIT/eventd/eventd_012.sh b/ctdb/tests/UNIT/eventd/eventd_012.sh
new file mode 100755
index 0000000..5e6857b
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/eventd_012.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "cancel new monitor event"
+
+setup_eventd
+
+ok_null
+simple_test_background run 10 multi startup
+
+required_error ECANCELED <<EOF
+Event monitor in multi got cancelled
+EOF
+simple_test run 10 multi monitor
+
+ok <<EOF
+01.test OK DURATION DATETIME
+02.test OK DURATION DATETIME
+03.test OK DURATION DATETIME
+EOF
+simple_test status multi startup
+
+required_error EINVAL <<EOF
+Event monitor has never run in multi
+EOF
+simple_test status multi monitor
diff --git a/ctdb/tests/UNIT/eventd/eventd_013.sh b/ctdb/tests/UNIT/eventd/eventd_013.sh
new file mode 100755
index 0000000..5bbb4dc
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/eventd_013.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "cancel running monitor event"
+
+setup_eventd
+
+required_error ECANCELED <<EOF
+Event monitor in multi got cancelled
+EOF
+simple_test_background run 10 multi monitor
+
+ok_null
+simple_test run 10 multi startup
+
+ok <<EOF
+01.test OK DURATION DATETIME
+02.test OK DURATION DATETIME
+03.test OK DURATION DATETIME
+EOF
+simple_test status multi startup
+
+required_error EINVAL <<EOF
+Event monitor has never run in multi
+EOF
+simple_test status multi monitor
diff --git a/ctdb/tests/UNIT/eventd/eventd_014.sh b/ctdb/tests/UNIT/eventd/eventd_014.sh
new file mode 100755
index 0000000..63b34b4
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/eventd_014.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "queue events"
+
+setup_eventd
+
+ok_null
+simple_test_background run 10 multi queue1
+
+ok_null
+simple_test run 10 multi queue2
+
+ok <<EOF
+01.test OK DURATION DATETIME
+02.test OK DURATION DATETIME
+03.test OK DURATION DATETIME
+EOF
+simple_test status multi queue1
+
+ok <<EOF
+01.test OK DURATION DATETIME
+02.test OK DURATION DATETIME
+03.test OK DURATION DATETIME
+EOF
+simple_test status multi queue2
diff --git a/ctdb/tests/UNIT/eventd/eventd_021.sh b/ctdb/tests/UNIT/eventd/eventd_021.sh
new file mode 100755
index 0000000..935373a
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/eventd_021.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "debug script"
+
+setup_eventd
+
+result_filter ()
+{
+ _pid="[0-9][0-9]*"
+ sed -e "s| ${_pid}| PID|"
+}
+
+required_error ETIMEDOUT <<EOF
+Event timeout in random timed out
+EOF
+simple_test run 5 random timeout
+
+# wait for debug hung script
+sleep 5
+
+ok <<EOF
+args: PID timeout
+EOF
+unit_test cat "${CTDB_BASE}/debug_script.log"
diff --git a/ctdb/tests/UNIT/eventd/eventd_022.sh b/ctdb/tests/UNIT/eventd/eventd_022.sh
new file mode 100755
index 0000000..3f1c4f6
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/eventd_022.sh
@@ -0,0 +1,22 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "status output in debug script"
+
+setup_eventd
+
+required_error ETIMEDOUT <<EOF
+Event verbosetimeout in random timed out
+EOF
+simple_test run 5 random verbosetimeout
+
+# wait for debug hung script
+sleep 5
+
+ok <<EOF
+01.disabled DISABLED
+02.enabled TIMEDOUT DATETIME
+ OUTPUT: Sleeping for 99 seconds
+EOF
+unit_test cat "${CTDB_BASE}/debug_script.log"
diff --git a/ctdb/tests/UNIT/eventd/eventd_023.sh b/ctdb/tests/UNIT/eventd/eventd_023.sh
new file mode 100755
index 0000000..8914218
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/eventd_023.sh
@@ -0,0 +1,22 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "redirected status output in debug script"
+
+setup_eventd
+
+required_error ETIMEDOUT <<EOF
+Event verbosetimeout2 in random timed out
+EOF
+simple_test run 5 random verbosetimeout2
+
+# wait for debug hung script
+sleep 5
+
+ok <<EOF
+01.disabled DISABLED
+02.enabled TIMEDOUT DATETIME
+ OUTPUT: Sleeping for 99 seconds
+EOF
+unit_test cat "${CTDB_BASE}/debug_script.log"
diff --git a/ctdb/tests/UNIT/eventd/eventd_024.sh b/ctdb/tests/UNIT/eventd/eventd_024.sh
new file mode 100755
index 0000000..db68d01
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/eventd_024.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "process terminated after debug"
+
+setup_eventd
+
+result_filter()
+{
+ _pid="[0-9][0-9]*"
+ sed -e "s|${_pid}|PID|"
+}
+
+required_error ETIMEDOUT <<EOF
+Event timeout in random timed out
+EOF
+simple_test run 5 random timeout
+
+# wait for debug hung script
+sleep 5
+
+ok <<EOF
+args: PID timeout
+EOF
+unit_test cat "${CTDB_BASE}/debug_script.log"
+
+pid=$(cat "${CTDB_BASE}/debug_script.log" | awk '{print $2}')
+
+ok_null
+unit_test pstree "$pid"
diff --git a/ctdb/tests/UNIT/eventd/eventd_031.sh b/ctdb/tests/UNIT/eventd/eventd_031.sh
new file mode 100755
index 0000000..07efa80
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/eventd_031.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "multiple scripts"
+
+setup_eventd
+
+ok_null
+simple_test run 30 multi monitor
+
+ok <<EOF
+01.test OK DURATION DATETIME
+02.test OK DURATION DATETIME
+03.test OK DURATION DATETIME
+EOF
+simple_test status multi monitor
diff --git a/ctdb/tests/UNIT/eventd/eventd_032.sh b/ctdb/tests/UNIT/eventd/eventd_032.sh
new file mode 100755
index 0000000..778acdb
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/eventd_032.sh
@@ -0,0 +1,43 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "failures with multiple scripts"
+
+setup_eventd
+
+required_error ENOEXEC <<EOF
+Event event1 in multi failed
+EOF
+simple_test run 10 multi event1
+
+required_result 1 <<EOF
+01.test OK DURATION DATETIME
+02.test ERROR DURATION DATETIME
+ OUTPUT:
+EOF
+simple_test status multi event1
+
+required_error ENOEXEC <<EOF
+Event event2 in multi failed
+EOF
+simple_test run 10 multi event2
+
+required_result 2 <<EOF
+01.test OK DURATION DATETIME
+02.test OK DURATION DATETIME
+03.test ERROR DURATION DATETIME
+ OUTPUT:
+EOF
+simple_test status multi event2
+
+required_error ENOEXEC <<EOF
+Event event3 in multi failed
+EOF
+simple_test run 10 multi event3
+
+required_result 3 <<EOF
+01.test ERROR DURATION DATETIME
+ OUTPUT:
+EOF
+simple_test status multi event3
diff --git a/ctdb/tests/UNIT/eventd/eventd_033.sh b/ctdb/tests/UNIT/eventd/eventd_033.sh
new file mode 100755
index 0000000..ba99b11
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/eventd_033.sh
@@ -0,0 +1,43 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "timeouts with multiple scripts"
+
+setup_eventd
+
+required_error ETIMEDOUT <<EOF
+Event timeout1 in multi timed out
+EOF
+simple_test run 5 multi timeout1
+
+required_error ETIMEDOUT <<EOF
+01.test TIMEDOUT DATETIME
+ OUTPUT:
+EOF
+simple_test status multi timeout1
+
+required_error ETIMEDOUT <<EOF
+Event timeout2 in multi timed out
+EOF
+simple_test run 5 multi timeout2
+
+required_error ETIMEDOUT <<EOF
+01.test OK DURATION DATETIME
+02.test TIMEDOUT DATETIME
+ OUTPUT:
+EOF
+simple_test status multi timeout2
+
+required_error ETIMEDOUT <<EOF
+Event timeout3 in multi timed out
+EOF
+simple_test run 5 multi timeout3
+
+required_error ETIMEDOUT <<EOF
+01.test OK DURATION DATETIME
+02.test OK DURATION DATETIME
+03.test TIMEDOUT DATETIME
+ OUTPUT:
+EOF
+simple_test status multi timeout3
diff --git a/ctdb/tests/UNIT/eventd/eventd_041.sh b/ctdb/tests/UNIT/eventd/eventd_041.sh
new file mode 100755
index 0000000..ca4a99c
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/eventd_041.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "multiple components"
+
+setup_eventd
+
+ok_null
+simple_test_background run 10 multi monitor
+
+ok_null
+simple_test run 10 random monitor
+
+ok <<EOF
+01.test OK DURATION DATETIME
+02.test OK DURATION DATETIME
+03.test OK DURATION DATETIME
+EOF
+simple_test status multi monitor
+
+ok <<EOF
+01.disabled DISABLED
+02.enabled OK DURATION DATETIME
+EOF
+simple_test status random monitor
diff --git a/ctdb/tests/UNIT/eventd/eventd_042.sh b/ctdb/tests/UNIT/eventd/eventd_042.sh
new file mode 100755
index 0000000..862cf6c
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/eventd_042.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "multiple components with failure"
+
+setup_eventd
+
+ok_null
+simple_test_background run 10 multi monitor
+
+required_error ENOEXEC <<EOF
+Event failure in random failed
+EOF
+simple_test run 10 random failure
+
+ok <<EOF
+01.test OK DURATION DATETIME
+02.test OK DURATION DATETIME
+03.test OK DURATION DATETIME
+EOF
+simple_test status multi monitor
+
+required_result 1 <<EOF
+01.disabled DISABLED
+02.enabled ERROR DURATION DATETIME
+ OUTPUT:
+EOF
+simple_test status random failure
diff --git a/ctdb/tests/UNIT/eventd/eventd_043.sh b/ctdb/tests/UNIT/eventd/eventd_043.sh
new file mode 100755
index 0000000..2304d23
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/eventd_043.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "multiple components with timeout"
+
+setup_eventd
+
+ok_null
+simple_test_background run 10 multi monitor
+
+required_error ETIMEDOUT <<EOF
+Event timeout in random timed out
+EOF
+simple_test run 10 random timeout
+
+ok <<EOF
+01.test OK DURATION DATETIME
+02.test OK DURATION DATETIME
+03.test OK DURATION DATETIME
+EOF
+simple_test status multi monitor
+
+required_error ETIMEDOUT <<EOF
+01.disabled DISABLED
+02.enabled TIMEDOUT DATETIME
+ OUTPUT:
+EOF
+simple_test status random timeout
diff --git a/ctdb/tests/UNIT/eventd/eventd_044.sh b/ctdb/tests/UNIT/eventd/eventd_044.sh
new file mode 100755
index 0000000..8c0e931
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/eventd_044.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "new component"
+
+setup_eventd
+
+ok_null
+mkdir "${eventd_scriptdir}/foobar"
+
+ok_null
+cp "${eventd_scriptdir}/random/01.disabled.script" "${eventd_scriptdir}/foobar"
+
+required_result 22 <<EOF
+Event monitor has never run in foobar
+EOF
+simple_test status foobar monitor
+
+ok_null
+simple_test run 10 foobar monitor
+
+ok <<EOF
+01.disabled DISABLED
+EOF
+simple_test status foobar monitor
+
+ok_null
+simple_test script enable foobar 01.disabled
+
+ok_null
+simple_test run 10 foobar monitor
+
+ok <<EOF
+01.disabled OK DURATION DATETIME
+EOF
+simple_test status foobar monitor
diff --git a/ctdb/tests/UNIT/eventd/eventd_051.sh b/ctdb/tests/UNIT/eventd/eventd_051.sh
new file mode 100755
index 0000000..c00cb2e
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/eventd_051.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "logging check"
+
+setup_eventd
+
+ok_null
+simple_test run 10 random verbose
+
+ok <<EOF
+02.enabled: Running event verbose
+EOF
+unit_test grep "02.enabled:" "$eventd_logfile"
diff --git a/ctdb/tests/UNIT/eventd/eventd_052.sh b/ctdb/tests/UNIT/eventd/eventd_052.sh
new file mode 100755
index 0000000..75f9572
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/eventd_052.sh
@@ -0,0 +1,35 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "run through failure"
+
+setup_eventd
+
+export CTDB_EVENT_RUN_ALL=1
+
+required_error ENOEXEC <<EOF
+Event event1 in multi failed
+EOF
+simple_test run 10 multi event1
+
+required_result 1 <<EOF
+01.test OK DURATION DATETIME
+02.test ERROR DURATION DATETIME
+ OUTPUT:
+03.test OK DURATION DATETIME
+EOF
+simple_test status multi event1
+
+required_error ENOEXEC <<EOF
+Event event2 in multi failed
+EOF
+simple_test run 10 multi event2
+
+required_result 2 <<EOF
+01.test OK DURATION DATETIME
+02.test OK DURATION DATETIME
+03.test ERROR DURATION DATETIME
+ OUTPUT:
+EOF
+simple_test status multi event2
diff --git a/ctdb/tests/UNIT/eventd/scripts/local.sh b/ctdb/tests/UNIT/eventd/scripts/local.sh
new file mode 100644
index 0000000..04cce63
--- /dev/null
+++ b/ctdb/tests/UNIT/eventd/scripts/local.sh
@@ -0,0 +1,122 @@
+# Hey Emacs, this is a -*- shell-script -*- !!! :-)
+
+. "${TEST_SCRIPTS_DIR}/script_install_paths.sh"
+
+PATH="$PATH:$CTDB_SCRIPTS_TOOLS_HELPER_DIR"
+
+if "$CTDB_TEST_VERBOSE" ; then
+ debug () { echo "$@" ; }
+else
+ debug () { : ; }
+fi
+
+setup_ctdb_base "$CTDB_TEST_TMP_DIR" "ctdb-etc"
+
+ctdb_config=$(ctdb-path config)
+eventd_socket=$(ctdb-path socket eventd)
+eventd_pidfile=$(ctdb-path pidfile eventd)
+eventd_scriptdir=$(ctdb-path etcdir append events)
+eventd_logfile="${CTDB_BASE}/eventd.log"
+
+define_test ()
+{
+ _f=$(basename "$0" ".sh")
+
+ printf "%-28s - %s\n" "$_f" "$1"
+}
+
+cleanup_eventd ()
+{
+ debug "Cleaning up eventd"
+
+ pid=$(cat "$eventd_pidfile" 2>/dev/null || echo)
+ if [ -n "$pid" ] ; then
+ kill $pid || true
+ fi
+}
+
+setup_eventd ()
+{
+ echo "Setting up eventd"
+
+ $VALGRIND ctdb-eventd 2>&1 | tee "$eventd_logfile" &
+ # Wait till eventd is running
+ wait_until 10 test -S "$eventd_socket" || \
+ die "ctdb_eventd failed to start"
+
+ test_cleanup cleanup_eventd
+}
+
+simple_test_background ()
+{
+ background_log="${CTDB_BASE}/background.log"
+ background_status="${CTDB_BASE}/background.status"
+ background_running=1
+
+ (
+ (unit_test ctdb-event "$@") > "$background_log" 2>&1
+ echo $? > "$background_status"
+ ) &
+ background_pid=$!
+}
+
+background_wait ()
+{
+ [ -n "$background_running" ] || return
+
+ count=0
+ while [ ! -s "$background_status" -a $count -lt 30 ] ; do
+ count=$(( $count + 1 ))
+ sleep 1
+ done
+
+ if [ ! -s "$background_status" ] ; then
+ kill -9 "$background_pid"
+ echo TIMEOUT > "$background_status"
+ fi
+}
+
+background_output ()
+{
+ [ -n "$background_running" ] || return
+
+ bg_status=$(cat "$background_status")
+ rm -f "$background_status"
+ echo "--- Background ---"
+ if [ "$bg_status" = "TIMEOUT" ] ; then
+ echo "Background process did not complete"
+ bg_status=1
+ else
+ cat "$background_log"
+ rm -f "$background_log"
+ fi
+ echo "--- Background ---"
+ unset background_running
+ [ $bg_status -eq 0 ] || exit $bg_status
+}
+
+simple_test ()
+{
+ (unit_test ctdb-event "$@")
+ status=$?
+
+ background_wait
+ background_output
+
+ [ $status -eq 0 ] || exit $status
+}
+
+result_filter ()
+{
+ _duration="\<[0-9][0-9]*\.[0-9][0-9][0-9]\>"
+ _day="[FMSTW][aehoru][deintu]"
+ _month="[ADFJMNOS][aceopu][bcglnprtvy]"
+ _date="[ 0-9][0-9]"
+ _time="[0-9][0-9]:[0-9][0-9]:[0-9][0-9]"
+ _year="[0-9][0-9][0-9][0-9]"
+ _datetime="${_day} ${_month} ${_date} ${_time} ${_year}"
+ _pid="[0-9][0-9]*"
+ sed -e "s#${_duration}#DURATION#" \
+ -e "s#${_datetime}#DATETIME#" \
+ -e "s#,${_pid}#,PID#"
+}
diff --git a/ctdb/tests/UNIT/eventscripts/00.ctdb.init.001.sh b/ctdb/tests/UNIT/eventscripts/00.ctdb.init.001.sh
new file mode 100755
index 0000000..807f3ef
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/00.ctdb.init.001.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "TDB check, tdbtool supports check"
+
+setup
+
+FAKE_TDBTOOL_SUPPORTS_CHECK="yes"
+
+ok_null
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/00.ctdb.init.002.sh b/ctdb/tests/UNIT/eventscripts/00.ctdb.init.002.sh
new file mode 100755
index 0000000..7ff5385
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/00.ctdb.init.002.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "TDB check, tdbtool does no support check"
+
+setup
+
+FAKE_TDBTOOL_SUPPORTS_CHECK="no"
+
+ok <<EOF
+WARNING: The installed 'tdbtool' does not offer the 'check' subcommand.
+ Using 'tdbdump' for database checks.
+ Consider updating 'tdbtool' for better checks!
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/00.ctdb.init.003.sh b/ctdb/tests/UNIT/eventscripts/00.ctdb.init.003.sh
new file mode 100755
index 0000000..2d1fb0d
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/00.ctdb.init.003.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "TDB check, tdbtool supports check, good TDB"
+
+setup
+
+FAKE_TDBTOOL_SUPPORTS_CHECK="yes"
+
+touch "${CTDB_DBDIR}/foo.tdb.0"
+FAKE_TDB_IS_OK="yes"
+
+ok_null
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/00.ctdb.init.004.sh b/ctdb/tests/UNIT/eventscripts/00.ctdb.init.004.sh
new file mode 100755
index 0000000..196d7c2
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/00.ctdb.init.004.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "TDB check, tdbtool supports check, bad TDB"
+
+setup
+
+db="${CTDB_DBDIR}/foo.tdb.0"
+touch "$db"
+FAKE_TDB_IS_OK="no"
+
+ok <<EOF
+WARNING: database ${db} is corrupted.
+ Moving to backup ${db}.DATE.TIME.corrupt for later analysis.
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/00.ctdb.init.005.sh b/ctdb/tests/UNIT/eventscripts/00.ctdb.init.005.sh
new file mode 100755
index 0000000..3f85de4
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/00.ctdb.init.005.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "TDB check, tdbtool does not support check, good TDB"
+
+setup
+
+FAKE_TDBTOOL_SUPPORTS_CHECK="no"
+
+touch "${CTDB_DBDIR}/foo.tdb.0"
+FAKE_TDB_IS_OK="yes"
+
+ok <<EOF
+WARNING: The installed 'tdbtool' does not offer the 'check' subcommand.
+ Using 'tdbdump' for database checks.
+ Consider updating 'tdbtool' for better checks!
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/00.ctdb.init.006.sh b/ctdb/tests/UNIT/eventscripts/00.ctdb.init.006.sh
new file mode 100755
index 0000000..29794d7
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/00.ctdb.init.006.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "TDB check, tdbtool does not support check, bad TDB"
+
+setup
+
+FAKE_TDBTOOL_SUPPORTS_CHECK="no"
+
+db="${CTDB_DBDIR}/foo.tdb.0"
+touch "$db"
+FAKE_TDB_IS_OK="no"
+
+ok <<EOF
+WARNING: The installed 'tdbtool' does not offer the 'check' subcommand.
+ Using 'tdbdump' for database checks.
+ Consider updating 'tdbtool' for better checks!
+WARNING: database ${db} is corrupted.
+ Moving to backup ${db}.DATE.TIME.corrupt for later analysis.
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/00.ctdb.init.007.sh b/ctdb/tests/UNIT/eventscripts/00.ctdb.init.007.sh
new file mode 100755
index 0000000..5121513
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/00.ctdb.init.007.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "TDB check, tdbtool supports check, good persistent TDB"
+
+setup
+
+FAKE_TDBTOOL_SUPPORTS_CHECK="yes"
+
+touch "${CTDB_DBDIR_PERSISTENT}/foo.tdb.0"
+FAKE_TDB_IS_OK="yes"
+
+ok_null
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/00.ctdb.init.008.sh b/ctdb/tests/UNIT/eventscripts/00.ctdb.init.008.sh
new file mode 100755
index 0000000..120aefc
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/00.ctdb.init.008.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "TDB check, tdbtool supports check, bad persistent TDB"
+
+setup
+
+FAKE_TDBTOOL_SUPPORTS_CHECK="yes"
+
+db="${CTDB_DBDIR_PERSISTENT}/foo.tdb.0"
+touch "$db"
+FAKE_TDB_IS_OK="no"
+
+required_result 1 <<EOF
+Persistent database ${db} is corrupted! CTDB will not start.
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/00.ctdb.init.009.sh b/ctdb/tests/UNIT/eventscripts/00.ctdb.init.009.sh
new file mode 100755
index 0000000..92a0e25
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/00.ctdb.init.009.sh
@@ -0,0 +1,51 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "TDB check, bad TDB multiple times"
+
+setup
+
+db="${CTDB_DBDIR}/foo.tdb.0"
+FAKE_TDB_IS_OK="no"
+
+required_result_tdbcheck ()
+{
+ ok <<EOF
+WARNING: database ${db} is corrupted.
+ Moving to backup ${db}.DATE.TIME.corrupt for later analysis.
+EOF
+}
+
+# List the corrupt databases
+test_num_corrupt ()
+{
+ (cd "$CTDB_DBDIR" && ls foo.tdb.0.*.corrupt)
+}
+
+# Required result is a list of up to 10 corrupt databases
+required_result_num_corrupt ()
+{
+ _num="$1"
+
+ if [ "$_num" -gt 10 ] ; then
+ _num=10
+ fi
+
+ _t=""
+ for _x in $(seq 1 $_num) ; do
+ _t="${_t:+${_t}
+}foo.tdb.0.DATE.TIME.corrupt"
+ done
+
+ ok "$_t"
+}
+
+for i in $(seq 1 15) ; do
+ FAKE_SLEEP_REALLY=yes sleep 1
+ touch "$db"
+ required_result_tdbcheck
+ simple_test
+ required_result_num_corrupt "$i"
+ simple_test_command test_num_corrupt
+done
diff --git a/ctdb/tests/UNIT/eventscripts/01.reclock.init.001.sh b/ctdb/tests/UNIT/eventscripts/01.reclock.init.001.sh
new file mode 100755
index 0000000..c495a47
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/01.reclock.init.001.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "unset, check no-op"
+
+setup ""
+
+ok_null
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/01.reclock.init.002.sh b/ctdb/tests/UNIT/eventscripts/01.reclock.init.002.sh
new file mode 100755
index 0000000..1bd409c
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/01.reclock.init.002.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "set to use helper, check no-op"
+
+setup "!/bin/false"
+
+ok_null
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/01.reclock.init.003.sh b/ctdb/tests/UNIT/eventscripts/01.reclock.init.003.sh
new file mode 100755
index 0000000..a8b6abd
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/01.reclock.init.003.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "set to default lock file, directory is created"
+
+setup
+
+dir=$(dirname "$CTDB_RECOVERY_LOCK")
+
+# Ensure directory doesn't exist before
+required_result 1 ""
+unit_test test -d "$dir"
+
+ok_null
+simple_test
+
+# Ensure directory exists after
+ok_null
+unit_test test -d "$dir"
diff --git a/ctdb/tests/UNIT/eventscripts/05.system.monitor.001.sh b/ctdb/tests/UNIT/eventscripts/05.system.monitor.001.sh
new file mode 100755
index 0000000..4171f3d
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/05.system.monitor.001.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Filesystem use check, error situation, default checks enabled"
+
+setup
+
+set_fs_usage 100
+ok <<EOF
+WARNING: Filesystem ${CTDB_DBDIR_BASE} utilization 100% >= threshold 90%
+EOF
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/05.system.monitor.002.sh b/ctdb/tests/UNIT/eventscripts/05.system.monitor.002.sh
new file mode 100755
index 0000000..4e78a56
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/05.system.monitor.002.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Filesystem use check, good situation, 1 error check enabled"
+
+setup
+
+setup_script_options <<EOF
+CTDB_MONITOR_FILESYSTEM_USAGE="/var::80"
+EOF
+
+ok_null
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/05.system.monitor.003.sh b/ctdb/tests/UNIT/eventscripts/05.system.monitor.003.sh
new file mode 100755
index 0000000..41fd914
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/05.system.monitor.003.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Filesystem use check, error situation, 1 error check enabled"
+
+setup
+
+setup_script_options <<EOF
+CTDB_MONITOR_FILESYSTEM_USAGE="/var::80"
+EOF
+
+set_fs_usage 90
+required_result 1 <<EOF
+ERROR: Filesystem /var utilization 90% >= threshold 80%
+EOF
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/05.system.monitor.004.sh b/ctdb/tests/UNIT/eventscripts/05.system.monitor.004.sh
new file mode 100755
index 0000000..3400393
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/05.system.monitor.004.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Filesystem use check, warn situation, only error check enabled"
+
+setup
+
+setup_script_options <<EOF
+CTDB_MONITOR_FILESYSTEM_USAGE="/var::80"
+EOF
+
+set_fs_usage 70
+ok_null
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/05.system.monitor.005.sh b/ctdb/tests/UNIT/eventscripts/05.system.monitor.005.sh
new file mode 100755
index 0000000..7e1a953
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/05.system.monitor.005.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Filesystem use check, warn situation, both checks enabled"
+
+setup
+
+setup_script_options <<EOF
+CTDB_MONITOR_FILESYSTEM_USAGE="/var:80:90"
+EOF
+
+set_fs_usage 85
+ok <<EOF
+WARNING: Filesystem /var utilization 85% >= threshold 80%
+EOF
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/05.system.monitor.006.sh b/ctdb/tests/UNIT/eventscripts/05.system.monitor.006.sh
new file mode 100755
index 0000000..48008d9
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/05.system.monitor.006.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Filesystem use check, error situation, both checks enabled"
+
+setup
+
+setup_script_options <<EOF
+CTDB_MONITOR_FILESYSTEM_USAGE="/var:80:90"
+EOF
+
+set_fs_usage 95
+required_result 1 <<EOF
+ERROR: Filesystem /var utilization 95% >= threshold 90%
+EOF
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/05.system.monitor.007.sh b/ctdb/tests/UNIT/eventscripts/05.system.monitor.007.sh
new file mode 100755
index 0000000..68b99cf
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/05.system.monitor.007.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Filesystem use check, good situation, both checks enabled, multiple filesystems"
+
+setup
+
+setup_script_options <<EOF
+CTDB_MONITOR_FILESYSTEM_USAGE="/var:80:90 /:90:95"
+EOF
+
+ok_null
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/05.system.monitor.011.sh b/ctdb/tests/UNIT/eventscripts/05.system.monitor.011.sh
new file mode 100755
index 0000000..6cd1dab
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/05.system.monitor.011.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Memory check (default), warning situation"
+
+setup
+
+set_mem_usage 100 100
+ok <<EOF
+WARNING: System memory utilization 100% >= threshold 80%
+EOF
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/05.system.monitor.012.sh b/ctdb/tests/UNIT/eventscripts/05.system.monitor.012.sh
new file mode 100755
index 0000000..9e84056
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/05.system.monitor.012.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Memory check (custom, both), good situation"
+
+setup
+
+setup_script_options <<EOF
+CTDB_MONITOR_MEMORY_USAGE="80:90"
+EOF
+
+ok_null
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/05.system.monitor.014.sh b/ctdb/tests/UNIT/eventscripts/05.system.monitor.014.sh
new file mode 100755
index 0000000..9e2b21c
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/05.system.monitor.014.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Memory check (custom, warning only), warning situation"
+
+setup
+
+setup_script_options <<EOF
+CTDB_MONITOR_MEMORY_USAGE="85:"
+EOF
+
+set_mem_usage 90 90
+ok <<EOF
+WARNING: System memory utilization 90% >= threshold 85%
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/05.system.monitor.015.sh b/ctdb/tests/UNIT/eventscripts/05.system.monitor.015.sh
new file mode 100755
index 0000000..76b73a3
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/05.system.monitor.015.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Memory check (custom, error only), error situation"
+
+setup
+
+setup_script_options <<EOF
+CTDB_MONITOR_MEMORY_USAGE=":85"
+EOF
+
+set_mem_usage 90 90
+required_result 1 <<EOF
+ERROR: System memory utilization 90% >= threshold 85%
+$FAKE_PROC_MEMINFO
+$(ps auxfww)
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/05.system.monitor.017.sh b/ctdb/tests/UNIT/eventscripts/05.system.monitor.017.sh
new file mode 100755
index 0000000..b2e5029
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/05.system.monitor.017.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Memory check (custom, both), error situation"
+
+setup
+
+setup_script_options <<EOF
+CTDB_MONITOR_MEMORY_USAGE="70:80"
+EOF
+
+set_mem_usage 87 87
+required_result 1 <<EOF
+ERROR: System memory utilization 87% >= threshold 80%
+$FAKE_PROC_MEMINFO
+$(ps auxfww)
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/05.system.monitor.018.sh b/ctdb/tests/UNIT/eventscripts/05.system.monitor.018.sh
new file mode 100755
index 0000000..427adc6
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/05.system.monitor.018.sh
@@ -0,0 +1,82 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Memory check (custom, both), check throttling of warnings"
+
+setup
+
+setup_script_options <<EOF
+CTDB_MONITOR_MEMORY_USAGE="70:80"
+EOF
+
+# Below threshold, nothing logged
+set_mem_usage 67 67
+ok_null
+simple_test
+
+set_mem_usage 71 71
+ok "WARNING: System memory utilization 71% >= threshold 70%"
+simple_test
+
+# 2nd time at same level, nothing logged
+set_mem_usage 71 71
+ok_null
+simple_test
+
+set_mem_usage 73 73
+ok "WARNING: System memory utilization 73% >= threshold 70%"
+simple_test
+
+# 2nd time at same level, nothing logged
+set_mem_usage 73 73
+ok_null
+simple_test
+
+set_mem_usage 79 79
+ok "WARNING: System memory utilization 79% >= threshold 70%"
+simple_test
+
+set_mem_usage 80 80
+required_result 1 <<EOF
+ERROR: System memory utilization 80% >= threshold 80%
+$FAKE_PROC_MEMINFO
+$(ps auxfww)
+EOF
+simple_test
+
+# Fall back into warning at same level as last warning... should log
+set_mem_usage 79 79
+ok "WARNING: System memory utilization 79% >= threshold 70%"
+simple_test
+
+# Below threshold, notice
+set_mem_usage 69 69
+ok <<EOF
+NOTICE: System memory utilization 69% < threshold 70%
+EOF
+simple_test
+
+# Further reduction, nothing logged
+set_mem_usage 68 68
+ok_null
+simple_test
+
+# Back up into warning at same level as last warning... should log
+set_mem_usage 79 79
+ok "WARNING: System memory utilization 79% >= threshold 70%"
+simple_test
+
+# Back up above critical threshold... unhealthy
+set_mem_usage 81 81
+required_result 1 <<EOF
+ERROR: System memory utilization 81% >= threshold 80%
+$FAKE_PROC_MEMINFO
+$(ps auxfww)
+EOF
+simple_test
+
+# Straight back down to a good level... notice
+set_mem_usage 65 65
+ok "NOTICE: System memory utilization 65% < threshold 70%"
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/06.nfs.releaseip.001.sh b/ctdb/tests/UNIT/eventscripts/06.nfs.releaseip.001.sh
new file mode 100755
index 0000000..0546863
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/06.nfs.releaseip.001.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "callout succeeds"
+
+setup
+
+setup_nfs_callout
+
+ok_null
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/06.nfs.releaseip.002.sh b/ctdb/tests/UNIT/eventscripts/06.nfs.releaseip.002.sh
new file mode 100755
index 0000000..dc44d2d
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/06.nfs.releaseip.002.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "callout causes releaseip-pre to fail"
+
+setup
+
+setup_nfs_callout "releaseip-pre"
+
+required_result 1 "releaseip-pre"
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/06.nfs.takeip.001.sh b/ctdb/tests/UNIT/eventscripts/06.nfs.takeip.001.sh
new file mode 100755
index 0000000..0546863
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/06.nfs.takeip.001.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "callout succeeds"
+
+setup
+
+setup_nfs_callout
+
+ok_null
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/06.nfs.takeip.002.sh b/ctdb/tests/UNIT/eventscripts/06.nfs.takeip.002.sh
new file mode 100755
index 0000000..c9f3db9
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/06.nfs.takeip.002.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "callout causes takeip-pre to fail"
+
+setup
+
+setup_nfs_callout "takeip-pre"
+
+required_result 1 "takeip-pre"
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.010.sh b/ctdb/tests/UNIT/eventscripts/10.interface.010.sh
new file mode 100755
index 0000000..171a697
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.010.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Release 1 IP, 10 connections killed OK"
+
+setup
+
+ctdb_get_1_public_address |
+while read dev ip bits ; do
+ ok_null
+ simple_test_event "takeip" $dev $ip $bits
+
+ count=10
+ setup_tcp_connections $count \
+ "$ip" 445 10.254.254.0 12300
+
+ ok <<EOF
+Killed ${count}/${count} TCP connections to released IP $ip
+EOF
+
+ simple_test_event "releaseip" $dev $ip $bits
+done
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.011.sh b/ctdb/tests/UNIT/eventscripts/10.interface.011.sh
new file mode 100755
index 0000000..7f4302d
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.011.sh
@@ -0,0 +1,28 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Release 1 IP, 10 connections killed, 1 fails"
+
+setup
+
+ctdb_get_1_public_address |
+while read dev ip bits ; do
+ ok_null
+ simple_test_event "takeip" $dev $ip $bits
+
+ count=10
+ setup_tcp_connections $count \
+ "$ip" 445 10.254.254.0 12300
+
+ setup_tcp_connections_unkillable 1 \
+ "$ip" 445 10.254.254.0 43210
+
+ ok <<EOF
+Killed 10/11 TCP connections to released IP ${ip}
+Remaining connections:
+ ${ip}:445 10.254.254.1:43211
+EOF
+
+ simple_test_event "releaseip" $dev $ip $bits
+done
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.012.sh b/ctdb/tests/UNIT/eventscripts/10.interface.012.sh
new file mode 100755
index 0000000..2ef0fe6
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.012.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Release 1 IP, 10 connections killed, 3 fail"
+
+setup
+
+ctdb_get_1_public_address |
+while read dev ip bits ; do
+ ok_null
+ simple_test_event "takeip" $dev $ip $bits
+
+ count=10
+
+ setup_tcp_connections $count \
+ "$ip" 445 10.254.254.0 12300
+
+ setup_tcp_connections_unkillable 3 \
+ "$ip" 445 10.254.254.0 43210
+
+ ok <<EOF
+Killed 10/13 TCP connections to released IP ${ip}
+Remaining connections:
+ ${ip}:445 10.254.254.1:43211
+ ${ip}:445 10.254.254.2:43212
+ ${ip}:445 10.254.254.3:43213
+EOF
+
+ simple_test_event "releaseip" $dev $ip $bits
+done
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.013.sh b/ctdb/tests/UNIT/eventscripts/10.interface.013.sh
new file mode 100755
index 0000000..e9a4c30
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.013.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Release 1 IP, all 10 connections kills fail"
+
+setup
+
+ctdb_get_1_public_address |
+while read dev ip bits ; do
+ ok_null
+ simple_test_event "takeip" $dev $ip $bits
+
+ setup_tcp_connections 0
+
+ count=10
+ setup_tcp_connections_unkillable $count \
+ "$ip" 445 10.254.254.0 43210
+
+ ok <<EOF
+Killed 0/$count TCP connections to released IP ${ip}
+Remaining connections:
+ ${ip}:445 10.254.254.1:43211
+ ${ip}:445 10.254.254.2:43212
+ ${ip}:445 10.254.254.3:43213
+ ${ip}:445 10.254.254.4:43214
+ ${ip}:445 10.254.254.5:43215
+ ${ip}:445 10.254.254.6:43216
+ ${ip}:445 10.254.254.7:43217
+ ${ip}:445 10.254.254.8:43218
+ ${ip}:445 10.254.254.9:43219
+ ${ip}:445 10.254.254.10:43220
+EOF
+
+ simple_test_event "releaseip" $dev $ip $bits
+done
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.init.001.sh b/ctdb/tests/UNIT/eventscripts/10.interface.init.001.sh
new file mode 100755
index 0000000..7f370b2
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.init.001.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "no public addresses"
+
+setup
+
+rm -f "${CTDB_BASE}/public_addresses"
+
+ok "No public addresses file found"
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.init.002.sh b/ctdb/tests/UNIT/eventscripts/10.interface.init.002.sh
new file mode 100755
index 0000000..1862eac
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.init.002.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "all interfaces up"
+
+setup
+
+ok_null
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.init.021.sh b/ctdb/tests/UNIT/eventscripts/10.interface.init.021.sh
new file mode 100755
index 0000000..fd89c87
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.init.021.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Check public IP dropping, none assigned"
+
+setup
+
+ok_null
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.init.022.sh b/ctdb/tests/UNIT/eventscripts/10.interface.init.022.sh
new file mode 100755
index 0000000..ee7fa14
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.init.022.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Check public IP dropping, 1 assigned"
+
+setup
+
+ctdb_get_1_public_address |
+while read dev ip bits ; do
+ ip addr add "${ip}/${bits}" dev "$dev"
+
+ ok <<EOF
+Removing public address ${ip}/${bits} from device ${dev}
+EOF
+
+ simple_test
+done
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.init.023.sh b/ctdb/tests/UNIT/eventscripts/10.interface.init.023.sh
new file mode 100755
index 0000000..b39b67a
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.init.023.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Check public IP dropping, all assigned"
+
+setup
+
+nl="
+"
+ctdb_get_my_public_addresses | {
+ out=""
+ while read dev ip bits ; do
+ ip addr add "${ip}/${bits}" dev "$dev"
+
+ msg="Removing public address ${ip}/${bits} from device ${dev}"
+ out="${out}${out:+${nl}}${msg}"
+ done
+
+ ok "$out"
+
+ simple_test
+}
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.monitor.001.sh b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.001.sh
new file mode 100755
index 0000000..c829efc
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.001.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "no public addresses"
+
+setup
+
+rm -f "${CTDB_BASE}/public_addresses"
+
+ok_null
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.monitor.002.sh b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.002.sh
new file mode 100755
index 0000000..1862eac
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.002.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "all interfaces up"
+
+setup
+
+ok_null
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.monitor.003.sh b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.003.sh
new file mode 100755
index 0000000..db1b2c6
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.003.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "1 interface down"
+
+setup
+
+iface=$(ctdb_get_1_interface)
+
+ethtool_interfaces_down $iface
+
+required_result 1 "ERROR: No link on the public network interface $iface"
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.monitor.004.sh b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.004.sh
new file mode 100755
index 0000000..3f20fdc
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.004.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "all interfaces up, 1 is a bond"
+
+setup
+
+iface=$(ctdb_get_1_interface)
+
+setup_bond $iface
+
+ok_null
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.monitor.005.sh b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.005.sh
new file mode 100755
index 0000000..1042d15
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.005.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "1 bond, no active slaves"
+
+setup
+
+iface=$(ctdb_get_1_interface)
+
+setup_bond $iface "None"
+
+required_result 1 "ERROR: No active slaves for bond device $iface"
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.monitor.006.sh b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.006.sh
new file mode 100755
index 0000000..5facf08
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.006.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "1 bond, active slaves, link down"
+
+setup
+
+iface=$(ctdb_get_1_interface)
+
+setup_bond $iface "" "down"
+
+required_result 1 "ERROR: public network interface $iface is down"
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.monitor.009.sh b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.009.sh
new file mode 100755
index 0000000..93ed68b
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.009.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "CTDB_PARTIALLY_ONLINE_INTERFACES, 1 down"
+
+setup
+
+iface=$(ctdb_get_1_interface)
+
+setup_script_options <<EOF
+CTDB_PARTIALLY_ONLINE_INTERFACES=yes
+EOF
+
+ethtool_interfaces_down "$iface"
+
+ok "ERROR: No link on the public network interface $iface"
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.monitor.010.sh b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.010.sh
new file mode 100755
index 0000000..5287893
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.010.sh
@@ -0,0 +1,25 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "CTDB_PARTIALLY_ONLINE_INTERFACES, all down"
+
+setup
+
+ifaces=$(ctdb_get_interfaces)
+
+setup_script_options <<EOF
+CTDB_PARTIALLY_ONLINE_INTERFACES=yes
+EOF
+
+ethtool_interfaces_down $ifaces
+
+msg=$(
+ for i in $ifaces ; do
+ echo "ERROR: No link on the public network interface $i"
+ done
+ )
+
+required_result 1 "$msg"
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.monitor.011.sh b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.011.sh
new file mode 100755
index 0000000..824bd32
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.011.sh
@@ -0,0 +1,21 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "CTDB_PARTIALLY_ONLINE_INTERFACES, 1 bond down"
+
+setup
+
+iface=$(ctdb_get_1_interface)
+
+setup_bond $iface "None"
+
+setup_script_options <<EOF
+CTDB_PARTIALLY_ONLINE_INTERFACES=yes
+EOF
+
+ethtool_interfaces_down "$iface"
+
+ok "ERROR: No active slaves for bond device $iface"
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.monitor.012.sh b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.012.sh
new file mode 100755
index 0000000..1315980
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.012.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "CTDB_PARTIALLY_ONLINE_INTERFACES, 1 bond down"
+
+setup
+
+ifaces=$(ctdb_get_interfaces)
+
+for i in $ifaces ; do
+ setup_bond $i "None"
+done
+
+setup_script_options <<EOF
+CTDB_PARTIALLY_ONLINE_INTERFACES=yes
+EOF
+
+ethtool_interfaces_down $ifaces
+
+msg=$(
+ for i in $ifaces ; do
+ echo "ERROR: No active slaves for bond device $i"
+ done
+ )
+
+required_result 1 "$msg"
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.monitor.013.sh b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.013.sh
new file mode 100755
index 0000000..2aa0a8e
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.013.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "1 bond, active slaves, link down"
+
+setup
+
+iface=$(ctdb_get_1_interface)
+
+setup_bond $iface "" "up" "down"
+
+required_result 1 "ERROR: No active slaves for 802.ad bond device $iface"
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.monitor.014.sh b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.014.sh
new file mode 100755
index 0000000..1dd8ff0
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.014.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "spurious addresses on interface, no action"
+
+setup
+
+iface=$(ctdb_get_1_interface)
+
+ip addr add 192.168.253.253/24 dev $iface
+ip addr add 192.168.254.254/24 dev $iface
+
+ok_null
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.monitor.015.sh b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.015.sh
new file mode 100755
index 0000000..b7b4787
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.015.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Missing interface, fail"
+
+setup
+
+iface=$(ctdb_get_1_interface)
+ip link delete "$iface"
+
+required_result 1 <<EOF
+ERROR: Monitored interface dev123 does not exist
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.monitor.016.sh b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.016.sh
new file mode 100755
index 0000000..bd7f302
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.016.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Missing interface, CTDB_PARTIALLY_ONLINE_INTERFACES=yes, warn"
+
+setup
+
+setup_script_options <<EOF
+CTDB_PARTIALLY_ONLINE_INTERFACES=yes
+EOF
+
+iface=$(ctdb_get_1_interface)
+ip link delete "$iface"
+
+ok <<EOF
+ERROR: Monitored interface dev123 does not exist
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.monitor.017.sh b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.017.sh
new file mode 100755
index 0000000..bae0886
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.017.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "1 VLAN, link down"
+
+setup
+
+iface=$(ctdb_get_1_interface)
+
+ethtool_interfaces_down "$iface"
+
+# This just exercises the VLAN checking code, which will allow us to
+# determine that real0 is not a bond.
+realiface="real0"
+ip link add link "$realiface" name "$iface" type vlan id 11
+ip link set "${iface}@${realiface}" up
+
+required_result 1 "ERROR: No link on the public network interface ${iface}"
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.monitor.018.sh b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.018.sh
new file mode 100755
index 0000000..8006d92
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.monitor.018.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "VLAN on bond, active slaves, link down"
+
+setup
+
+iface=$(ctdb_get_1_interface)
+
+bond="bond0"
+
+setup_bond "$bond" "" "down"
+
+ip link add link "$bond" name "$iface" type vlan id 11
+ip link set "${iface}@${bond}" up
+
+required_result 1 "ERROR: public network interface ${bond} is down"
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.multi.001.sh b/ctdb/tests/UNIT/eventscripts/10.interface.multi.001.sh
new file mode 100755
index 0000000..867cc24
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.multi.001.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "takeip, removeip"
+
+setup
+
+public_address=$(ctdb_get_1_public_address)
+
+ok_null
+
+simple_test_event "takeip" $public_address
+simple_test_event "releaseip" $public_address
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.releaseip.001.sh b/ctdb/tests/UNIT/eventscripts/10.interface.releaseip.001.sh
new file mode 100755
index 0000000..2ac0b86
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.releaseip.001.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "error - no args given"
+
+setup
+
+iface=$(ctdb_get_1_interface)
+
+required_result 1 "ERROR: must supply interface, IP and maskbits"
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.releaseip.002.sh b/ctdb/tests/UNIT/eventscripts/10.interface.releaseip.002.sh
new file mode 100755
index 0000000..a60adbd
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.releaseip.002.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "error - remove a non-existent ip"
+
+setup
+
+public_address=$(ctdb_get_1_public_address)
+ip="${public_address% *}" ; ip="${ip#* }"
+
+required_result 1 "ERROR: Unable to determine interface for IP ${ip}"
+
+simple_test $public_address
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.startup.001.sh b/ctdb/tests/UNIT/eventscripts/10.interface.startup.001.sh
new file mode 100755
index 0000000..c829efc
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.startup.001.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "no public addresses"
+
+setup
+
+rm -f "${CTDB_BASE}/public_addresses"
+
+ok_null
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.startup.002.sh b/ctdb/tests/UNIT/eventscripts/10.interface.startup.002.sh
new file mode 100755
index 0000000..1862eac
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.startup.002.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "all interfaces up"
+
+setup
+
+ok_null
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.takeip.001.sh b/ctdb/tests/UNIT/eventscripts/10.interface.takeip.001.sh
new file mode 100755
index 0000000..2ac0b86
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.takeip.001.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "error - no args given"
+
+setup
+
+iface=$(ctdb_get_1_interface)
+
+required_result 1 "ERROR: must supply interface, IP and maskbits"
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.takeip.002.sh b/ctdb/tests/UNIT/eventscripts/10.interface.takeip.002.sh
new file mode 100755
index 0000000..e267f16
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.takeip.002.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "add an ip"
+
+setup
+
+public_address=$(ctdb_get_1_public_address)
+
+ok_null
+
+simple_test $public_address
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.takeip.003.sh b/ctdb/tests/UNIT/eventscripts/10.interface.takeip.003.sh
new file mode 100755
index 0000000..acb9b04
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/10.interface.takeip.003.sh
@@ -0,0 +1,22 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "error - add same IP twice"
+
+setup
+
+public_address=$(ctdb_get_1_public_address)
+dev="${public_address%% *}"
+t="${public_address#* }"
+ip="${t% *}"
+bits="${t#* }"
+
+ok_null
+simple_test $public_address
+
+required_result 1 <<EOF
+RTNETLINK answers: File exists
+Failed to add $ip/$bits on dev $dev
+EOF
+simple_test $public_address
diff --git a/ctdb/tests/UNIT/eventscripts/11.natgw.001.sh b/ctdb/tests/UNIT/eventscripts/11.natgw.001.sh
new file mode 100755
index 0000000..06b2cd3
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/11.natgw.001.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "not configured"
+
+setup
+
+ok_null
+simple_test_event "ipreallocate"
+
+check_routes 0
diff --git a/ctdb/tests/UNIT/eventscripts/11.natgw.002.sh b/ctdb/tests/UNIT/eventscripts/11.natgw.002.sh
new file mode 100755
index 0000000..90b1399
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/11.natgw.002.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "missing config file"
+
+setup
+
+setup_ctdb_natgw <<EOF
+192.168.1.21 leader
+192.168.1.22
+192.168.1.23
+192.168.1.24
+EOF
+
+rm -f "$CTDB_NATGW_NODES"
+
+required_result 1 <<EOF
+error: CTDB_NATGW_NODES=${CTDB_NATGW_NODES} unreadable
+EOF
+
+for i in "startup" "ipreallocated" ; do
+ simple_test_event "$i"
+done
diff --git a/ctdb/tests/UNIT/eventscripts/11.natgw.003.sh b/ctdb/tests/UNIT/eventscripts/11.natgw.003.sh
new file mode 100755
index 0000000..370c10d
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/11.natgw.003.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "CTDB_NATGW_PUBLIC_IFACE unset, not follower-only"
+
+setup
+
+setup_ctdb_natgw <<EOF
+192.168.1.21 leader
+192.168.1.22
+192.168.1.23
+192.168.1.24
+EOF
+
+setup_script_options <<EOF
+CTDB_NATGW_PUBLIC_IFACE=""
+EOF
+
+required_result 1 "Invalid configuration: CTDB_NATGW_PUBLIC_IFACE not set"
+
+for i in "startup" "ipreallocated" ; do
+ simple_test_event "$i"
+done
diff --git a/ctdb/tests/UNIT/eventscripts/11.natgw.004.sh b/ctdb/tests/UNIT/eventscripts/11.natgw.004.sh
new file mode 100755
index 0000000..0f06be1
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/11.natgw.004.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "CTDB_NATGW_PUBLIC_IP unset, not follower-only"
+
+setup
+
+setup_ctdb_natgw <<EOF
+192.168.1.21 leader
+192.168.1.22
+192.168.1.23
+192.168.1.24
+EOF
+
+setup_script_options <<EOF
+CTDB_NATGW_PUBLIC_IP=""
+EOF
+
+required_result 1 "Invalid configuration: CTDB_NATGW_PUBLIC_IP not set"
+
+for i in "startup" "ipreallocated" ; do
+ simple_test_event "$i"
+done
diff --git a/ctdb/tests/UNIT/eventscripts/11.natgw.011.sh b/ctdb/tests/UNIT/eventscripts/11.natgw.011.sh
new file mode 100755
index 0000000..407f049
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/11.natgw.011.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "leader node, basic configuration"
+
+setup
+
+setup_ctdb_natgw <<EOF
+192.168.1.21 leader
+192.168.1.22
+192.168.1.23
+192.168.1.24
+EOF
+
+ok_null
+simple_test_event "ipreallocated"
+
+ok "default via ${CTDB_NATGW_DEFAULT_GATEWAY} dev ethXXX metric 10 "
+simple_test_command ip route show
+
+ok_natgw_leader_ip_addr_show
+simple_test_command ip addr show "$CTDB_NATGW_PUBLIC_IFACE"
diff --git a/ctdb/tests/UNIT/eventscripts/11.natgw.012.sh b/ctdb/tests/UNIT/eventscripts/11.natgw.012.sh
new file mode 100755
index 0000000..fdec8ee
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/11.natgw.012.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "follower node, basic configuration"
+
+setup
+
+setup_ctdb_natgw <<EOF
+192.168.1.21
+192.168.1.22 leader
+192.168.1.23
+192.168.1.24
+EOF
+
+ok_null
+simple_test_event "ipreallocated"
+
+ok "default via ${FAKE_CTDB_NATGW_LEADER} dev ethXXX metric 10 "
+simple_test_command ip route show
+
+ok_natgw_follower_ip_addr_show
+simple_test_command ip addr show "$CTDB_NATGW_PUBLIC_IFACE"
diff --git a/ctdb/tests/UNIT/eventscripts/11.natgw.013.sh b/ctdb/tests/UNIT/eventscripts/11.natgw.013.sh
new file mode 100755
index 0000000..cb9af46
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/11.natgw.013.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "leader node, no gateway"
+
+setup
+
+setup_ctdb_natgw <<EOF
+192.168.1.21 leader
+192.168.1.22
+192.168.1.23
+192.168.1.24
+EOF
+
+setup_script_options <<EOF
+CTDB_NATGW_DEFAULT_GATEWAY=""
+EOF
+
+ok_null
+simple_test_event "ipreallocated"
+
+ok_null
+simple_test_command ip route show
+
+ok_natgw_leader_ip_addr_show
+simple_test_command ip addr show "$CTDB_NATGW_PUBLIC_IFACE"
diff --git a/ctdb/tests/UNIT/eventscripts/11.natgw.014.sh b/ctdb/tests/UNIT/eventscripts/11.natgw.014.sh
new file mode 100755
index 0000000..0fc3ccc
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/11.natgw.014.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "follower node, no gateway"
+
+setup
+
+setup_ctdb_natgw <<EOF
+192.168.1.21
+192.168.1.22 leader
+192.168.1.23
+192.168.1.24
+EOF
+
+setup_script_options <<EOF
+CTDB_NATGW_DEFAULT_GATEWAY=""
+EOF
+
+ok_null
+simple_test_event "ipreallocated"
+
+ok "default via ${FAKE_CTDB_NATGW_LEADER} dev ethXXX metric 10 "
+simple_test_command ip route show
+
+ok_natgw_follower_ip_addr_show
+simple_test_command ip addr show "$CTDB_NATGW_PUBLIC_IFACE"
diff --git a/ctdb/tests/UNIT/eventscripts/11.natgw.015.sh b/ctdb/tests/UNIT/eventscripts/11.natgw.015.sh
new file mode 100755
index 0000000..84cc17b
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/11.natgw.015.sh
@@ -0,0 +1,61 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "basic configuration, multiple transitions"
+
+setup
+
+echo "*** Leader node..."
+
+setup_ctdb_natgw <<EOF
+192.168.1.21 leader
+192.168.1.22
+192.168.1.23
+192.168.1.24
+EOF
+
+ok_null
+simple_test_event "ipreallocated"
+
+ok "default via ${CTDB_NATGW_DEFAULT_GATEWAY} dev ethXXX metric 10 "
+simple_test_command ip route show
+
+ok_natgw_leader_ip_addr_show
+simple_test_command ip addr show "$CTDB_NATGW_PUBLIC_IFACE"
+
+echo "*** Follower node..."
+
+setup_ctdb_natgw <<EOF
+192.168.1.21
+192.168.1.22 leader
+192.168.1.23
+192.168.1.24
+EOF
+
+ok_null
+simple_test_event "ipreallocated"
+
+ok "default via ${FAKE_CTDB_NATGW_LEADER} dev ethXXX metric 10 "
+simple_test_command ip route show
+
+ok_natgw_follower_ip_addr_show
+simple_test_command ip addr show "$CTDB_NATGW_PUBLIC_IFACE"
+
+echo "*** Leader node again..."
+
+setup_ctdb_natgw <<EOF
+192.168.1.21 leader
+192.168.1.22
+192.168.1.23
+192.168.1.24
+EOF
+
+ok_null
+simple_test_event "ipreallocated"
+
+ok "default via ${CTDB_NATGW_DEFAULT_GATEWAY} dev ethXXX metric 10 "
+simple_test_command ip route show
+
+ok_natgw_leader_ip_addr_show
+simple_test_command ip addr show "$CTDB_NATGW_PUBLIC_IFACE"
diff --git a/ctdb/tests/UNIT/eventscripts/11.natgw.021.sh b/ctdb/tests/UNIT/eventscripts/11.natgw.021.sh
new file mode 100755
index 0000000..7d73c37
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/11.natgw.021.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "leader node, static routes"
+
+setup
+
+setup_ctdb_natgw <<EOF
+192.168.1.21 leader
+192.168.1.22
+192.168.1.23
+192.168.1.24
+EOF
+
+setup_script_options<<EOF
+CTDB_NATGW_STATIC_ROUTES="10.1.1.0/24 10.1.2.0/24"
+EOF
+
+ok_null
+simple_test_event "ipreallocated"
+
+ok_natgw_leader_static_routes
+simple_test_command ip route show
+
+ok_natgw_leader_ip_addr_show
+simple_test_command ip addr show "$CTDB_NATGW_PUBLIC_IFACE"
diff --git a/ctdb/tests/UNIT/eventscripts/11.natgw.022.sh b/ctdb/tests/UNIT/eventscripts/11.natgw.022.sh
new file mode 100755
index 0000000..2a4dd47
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/11.natgw.022.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "follower node, static routes"
+
+setup
+
+setup_ctdb_natgw <<EOF
+192.168.1.21
+192.168.1.22 leader
+192.168.1.23
+192.168.1.24
+EOF
+
+setup_script_options <<EOF
+CTDB_NATGW_STATIC_ROUTES="10.1.1.0/24 10.1.2.0/24"
+EOF
+
+ok_null
+simple_test_event "ipreallocated"
+
+ok_natgw_follower_static_routes
+simple_test_command ip route show
+
+ok_natgw_follower_ip_addr_show
+simple_test_command ip addr show "$CTDB_NATGW_PUBLIC_IFACE"
diff --git a/ctdb/tests/UNIT/eventscripts/11.natgw.023.sh b/ctdb/tests/UNIT/eventscripts/11.natgw.023.sh
new file mode 100755
index 0000000..9fdf734
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/11.natgw.023.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "leader node, static routes, custom gateway"
+
+setup
+
+setup_ctdb_natgw <<EOF
+192.168.1.21 leader
+192.168.1.22
+192.168.1.23
+192.168.1.24
+EOF
+
+setup_script_options <<EOF
+CTDB_NATGW_STATIC_ROUTES="10.1.1.0/24 10.1.2.0/24@10.1.1.253"
+EOF
+
+ok_null
+simple_test_event "ipreallocated"
+
+ok_natgw_leader_static_routes
+simple_test_command ip route show
+
+ok_natgw_leader_ip_addr_show
+simple_test_command ip addr show "$CTDB_NATGW_PUBLIC_IFACE"
diff --git a/ctdb/tests/UNIT/eventscripts/11.natgw.024.sh b/ctdb/tests/UNIT/eventscripts/11.natgw.024.sh
new file mode 100755
index 0000000..24f677d
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/11.natgw.024.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "follower node, static routes, custom gateway"
+
+setup
+
+setup_ctdb_natgw <<EOF
+192.168.1.21
+192.168.1.22 leader
+192.168.1.23
+192.168.1.24
+EOF
+
+setup_script_options <<EOF
+CTDB_NATGW_STATIC_ROUTES="10.1.1.0/24 10.1.2.0/24@10.1.1.253"
+EOF
+
+ok_null
+simple_test_event "ipreallocated"
+
+ok_natgw_follower_static_routes
+simple_test_command ip route show
+
+ok_natgw_follower_ip_addr_show
+simple_test_command ip addr show "$CTDB_NATGW_PUBLIC_IFACE"
diff --git a/ctdb/tests/UNIT/eventscripts/11.natgw.025.sh b/ctdb/tests/UNIT/eventscripts/11.natgw.025.sh
new file mode 100755
index 0000000..d4221c2
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/11.natgw.025.sh
@@ -0,0 +1,65 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "static routes, custom gateway, multiple transitions"
+
+setup
+
+setup_script_options <<EOF
+CTDB_NATGW_STATIC_ROUTES="10.1.1.0/24 10.1.2.0/24@10.1.1.253"
+EOF
+
+echo "*** Leader node..."
+
+setup_ctdb_natgw <<EOF
+192.168.1.21 leader
+192.168.1.22
+192.168.1.23
+192.168.1.24
+EOF
+
+ok_null
+simple_test_event "ipreallocated"
+
+ok_natgw_leader_static_routes
+simple_test_command ip route show
+
+ok_natgw_leader_ip_addr_show
+simple_test_command ip addr show "$CTDB_NATGW_PUBLIC_IFACE"
+
+echo "*** Follower node..."
+
+setup_ctdb_natgw <<EOF
+192.168.1.21
+192.168.1.22 leader
+192.168.1.23
+192.168.1.24
+EOF
+
+ok_null
+simple_test_event "ipreallocated"
+
+ok_natgw_follower_static_routes
+simple_test_command ip route show
+
+ok_natgw_follower_ip_addr_show
+simple_test_command ip addr show "$CTDB_NATGW_PUBLIC_IFACE"
+
+echo "*** Leader node again..."
+
+setup_ctdb_natgw <<EOF
+192.168.1.21 leader
+192.168.1.22
+192.168.1.23
+192.168.1.24
+EOF
+
+ok_null
+simple_test_event "ipreallocated"
+
+ok_natgw_leader_static_routes
+simple_test_command ip route show
+
+ok_natgw_leader_ip_addr_show
+simple_test_command ip addr show "$CTDB_NATGW_PUBLIC_IFACE"
diff --git a/ctdb/tests/UNIT/eventscripts/11.natgw.031.sh b/ctdb/tests/UNIT/eventscripts/11.natgw.031.sh
new file mode 100755
index 0000000..6a5bcad
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/11.natgw.031.sh
@@ -0,0 +1,62 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "leader node, static routes, custom gateway, config change"
+
+setup
+
+setup_script_options <<EOF
+CTDB_NATGW_STATIC_ROUTES="10.1.1.0/24 10.1.2.0/24@10.1.1.253"
+EOF
+
+echo "##################################################"
+echo "Static routes..."
+
+setup_ctdb_natgw <<EOF
+192.168.1.21 leader
+192.168.1.22
+192.168.1.23
+192.168.1.24
+EOF
+
+ok_null
+simple_test_event "ipreallocated"
+
+ok_natgw_leader_static_routes
+simple_test_command ip route show
+
+ok_natgw_leader_ip_addr_show
+simple_test_command ip addr show "$CTDB_NATGW_PUBLIC_IFACE"
+
+echo "##################################################"
+echo "Default routes..."
+
+setup_script_options <<EOF
+CTDB_NATGW_STATIC_ROUTES=""
+EOF
+
+ok "NAT gateway configuration has changed"
+simple_test_event "ipreallocated"
+
+ok "default via ${CTDB_NATGW_DEFAULT_GATEWAY} dev ethXXX metric 10 "
+simple_test_command ip route show
+
+ok_natgw_leader_ip_addr_show
+simple_test_command ip addr show "$CTDB_NATGW_PUBLIC_IFACE"
+
+echo "##################################################"
+echo "Static routes again..."
+
+setup_script_options <<EOF
+CTDB_NATGW_STATIC_ROUTES="10.1.3.0/24 10.1.4.4/32 10.1.2.0/24@10.1.1.252"
+EOF
+
+ok "NAT gateway configuration has changed"
+simple_test_event "ipreallocated"
+
+ok_natgw_leader_static_routes
+simple_test_command ip route show
+
+ok_natgw_leader_ip_addr_show
+simple_test_command ip addr show "$CTDB_NATGW_PUBLIC_IFACE"
diff --git a/ctdb/tests/UNIT/eventscripts/11.natgw.041.sh b/ctdb/tests/UNIT/eventscripts/11.natgw.041.sh
new file mode 100755
index 0000000..1cbe5b3
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/11.natgw.041.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "follower-only, CTDB_NATGW_PUBLIC_IFACE unset"
+
+setup
+
+setup_ctdb_natgw <<EOF
+192.168.1.21 follower-only
+192.168.1.22 leader
+192.168.1.23
+192.168.1.24
+EOF
+
+setup_script_options <<EOF
+CTDB_NATGW_PUBLIC_IFACE=""
+EOF
+
+ok_null
+simple_test_event "ipreallocated"
+
+ok "default via ${FAKE_CTDB_NATGW_LEADER} dev ethXXX metric 10 "
+simple_test_command ip route show
diff --git a/ctdb/tests/UNIT/eventscripts/11.natgw.042.sh b/ctdb/tests/UNIT/eventscripts/11.natgw.042.sh
new file mode 100755
index 0000000..b643fd3
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/11.natgw.042.sh
@@ -0,0 +1,25 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "follower-only, CTDB_NATGW_PUBLIC_IP unset"
+
+setup
+
+setup_ctdb_natgw <<EOF
+192.168.1.21 follower-only
+192.168.1.22 leader
+192.168.1.23
+192.168.1.24
+EOF
+
+setup_script_options <<EOF
+CTDB_NATGW_PUBLIC_IFACE=""
+CTDB_NATGW_PUBLIC_IP=""
+EOF
+
+ok_null
+simple_test_event "ipreallocated"
+
+ok "default via ${FAKE_CTDB_NATGW_LEADER} dev ethXXX metric 10 "
+simple_test_command ip route show
diff --git a/ctdb/tests/UNIT/eventscripts/11.natgw.051.sh b/ctdb/tests/UNIT/eventscripts/11.natgw.051.sh
new file mode 100755
index 0000000..6c711c0
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/11.natgw.051.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Monitor CTDB_NATGW_PUBLIC_IFACE, follower, up"
+
+setup
+
+setup_ctdb_natgw <<EOF
+192.168.1.21
+192.168.1.22 leader
+192.168.1.23
+192.168.1.24
+EOF
+
+ok_null
+simple_test_event "monitor"
diff --git a/ctdb/tests/UNIT/eventscripts/11.natgw.052.sh b/ctdb/tests/UNIT/eventscripts/11.natgw.052.sh
new file mode 100755
index 0000000..ad02003
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/11.natgw.052.sh
@@ -0,0 +1,21 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Monitor CTDB_NATGW_PUBLIC_IFACE, follower, down"
+
+setup
+
+setup_ctdb_natgw <<EOF
+192.168.1.21
+192.168.1.22 leader
+192.168.1.23
+192.168.1.24
+EOF
+
+ethtool_interfaces_down "$CTDB_NATGW_PUBLIC_IFACE"
+
+required_result 1 <<EOF
+ERROR: No link on the public network interface ${CTDB_NATGW_PUBLIC_IFACE}
+EOF
+simple_test_event "monitor"
diff --git a/ctdb/tests/UNIT/eventscripts/11.natgw.053.sh b/ctdb/tests/UNIT/eventscripts/11.natgw.053.sh
new file mode 100755
index 0000000..e9bded1
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/11.natgw.053.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Monitor CTDB_NATGW_PUBLIC_IFACE, leader, up"
+
+setup
+
+setup_ctdb_natgw <<EOF
+192.168.1.21 leader
+192.168.1.22
+192.168.1.23
+192.168.1.24
+EOF
+
+ok_null
+simple_test_event "monitor"
diff --git a/ctdb/tests/UNIT/eventscripts/11.natgw.054.sh b/ctdb/tests/UNIT/eventscripts/11.natgw.054.sh
new file mode 100755
index 0000000..2a79cde
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/11.natgw.054.sh
@@ -0,0 +1,21 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Monitor CTDB_NATGW_PUBLIC_IFACE, leader, down"
+
+setup
+
+setup_ctdb_natgw <<EOF
+192.168.1.21 leader
+192.168.1.22
+192.168.1.23
+192.168.1.24
+EOF
+
+ethtool_interfaces_down "$CTDB_NATGW_PUBLIC_IFACE"
+
+required_result 1 <<EOF
+ERROR: No link on the public network interface ${CTDB_NATGW_PUBLIC_IFACE}
+EOF
+simple_test_event "monitor"
diff --git a/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.001.sh b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.001.sh
new file mode 100755
index 0000000..55c8c64
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.001.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "not configured"
+
+setup
+
+setup_script_options <<EOF
+CTDB_PER_IP_ROUTING_CONF=""
+EOF
+
+ok_null
+simple_test_event "takeip"
+
+ok_null
+simple_test_event "ipreallocate"
+
+check_routes 0
diff --git a/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.002.sh b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.002.sh
new file mode 100755
index 0000000..6925983
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.002.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "missing config file"
+
+setup
+
+# Error because policy routing is configured but the configuration
+# file is missing.
+required_result 1 <<EOF
+error: CTDB_PER_IP_ROUTING_CONF=${CTDB_BASE}/policy_routing file not found
+EOF
+
+for i in "startup" "ipreallocated" "monitor" ; do
+ simple_test_event "$i"
+done
+
diff --git a/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.003.sh b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.003.sh
new file mode 100755
index 0000000..4eac963
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.003.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "empty config, ipreallocated"
+
+setup
+
+create_policy_routing_config 0
+
+# ipreallocated should silently add any missing routes
+ok_null
+simple_test_event "ipreallocated"
+
+# empty configuration file should mean there are no routes
+check_routes 0
diff --git a/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.004.sh b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.004.sh
new file mode 100755
index 0000000..3724de0
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.004.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "empty config, takeip"
+
+setup
+
+create_policy_routing_config 0
+
+public_address=$(ctdb_get_1_public_address)
+
+ok_null
+simple_test_event "takeip" $public_address
+
+# empty configuration file should mean there are no routes
+check_routes 0
diff --git a/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.005.sh b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.005.sh
new file mode 100755
index 0000000..baafbbb
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.005.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "1 IP configured, takeip"
+
+setup
+
+# Configuration for 1 IP
+create_policy_routing_config 1 default
+
+# takeip should add routes for the given address
+ctdb_get_1_public_address |
+while read dev ip bits ; do
+ ok_null
+ simple_test_event "takeip" $dev $ip $bits
+done
+
+# Should have routes for 1 IP
+check_routes 1 default
diff --git a/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.006.sh b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.006.sh
new file mode 100755
index 0000000..6c4d686
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.006.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "1 IP configured, takeip, releaseip"
+
+setup
+
+# create config for 1 IP
+create_policy_routing_config 1 default
+
+ctdb_get_1_public_address |
+while read dev ip bits ; do
+ # takeip adds routes
+ ok_null
+ simple_test_event "takeip" $dev $ip $bits
+
+ # releaseip removes routes
+ ok_null
+ simple_test_event "releaseip" $dev $ip $bits
+done
+
+# should have no routes
+check_routes 0
diff --git a/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.007.sh b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.007.sh
new file mode 100755
index 0000000..4cf46e6
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.007.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "1 IP configured, ipreallocated"
+
+setup
+
+# create config for 1 IP
+create_policy_routing_config 1 default
+
+# no takeip, but ipreallocated should add any missing routes
+ok_null
+simple_test_event "ipreallocated"
+
+# should have routes for 1 IP
+check_routes 1 default
diff --git a/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.008.sh b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.008.sh
new file mode 100755
index 0000000..889b4c4
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.008.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "1 IP configured, takeip twice"
+
+setup
+
+# create config for 1 IP
+create_policy_routing_config 1 default
+
+ctdb_get_1_public_address |
+while read dev ip bits ; do
+ ok_null
+ simple_test_event "takeip" $dev $ip $bits
+
+ # 2nd takeip event for the same IP should be a no-op
+ ok_null
+ simple_test_event "takeip" $dev $ip $bits
+done
+
+# should be routes for 1 IP
+check_routes 1 default
diff --git a/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.009.sh b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.009.sh
new file mode 100755
index 0000000..c887feb
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.009.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "All IPs configured, takeip 1 address"
+
+setup
+
+# configure all addresses
+create_policy_routing_config all default
+
+# add routes for all 1 IP
+ctdb_get_1_public_address |
+while read dev ip bits ; do
+ ok_null
+ simple_test_event "takeip" $dev $ip $bits
+done
+
+# for 1 IP
+check_routes 1 default
diff --git a/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.010.sh b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.010.sh
new file mode 100755
index 0000000..7297f96
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.010.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "All IPs configured, takeip on all nodes"
+
+setup
+
+# create config for all IPs
+create_policy_routing_config all default
+
+ctdb_get_my_public_addresses |
+while read dev ip bits ; do
+ ok_null
+ simple_test_event "takeip" $dev $ip $bits
+done
+
+# should have routes for all IPs
+check_routes all default
diff --git a/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.011.sh b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.011.sh
new file mode 100755
index 0000000..8d96c8d
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.011.sh
@@ -0,0 +1,21 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "__auto_link_local__, takeip all on node"
+
+setup
+
+# do link local fu instead of creating configuration
+setup_script_options <<EOF
+CTDB_PER_IP_ROUTING_CONF="__auto_link_local__"
+EOF
+
+# add routes for all addresses
+ctdb_get_my_public_addresses |
+while read dev ip bits ; do
+ ok_null
+ simple_test_event "takeip" $dev $ip $bits
+done
+
+check_routes all
diff --git a/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.012.sh b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.012.sh
new file mode 100755
index 0000000..48aab21
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.012.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "1 IP configured, takeip, releaseip, ipreallocated"
+
+# This partly tests the test infrastructure. If the (stub) "ctdb
+# moveip" doesn't do anything then the IP being released will still be
+# on the node and the ipreallocated event will add the routes back.
+
+setup
+
+create_policy_routing_config 1 default
+
+ctdb_get_1_public_address |
+while read dev ip bits ; do
+ ok_null
+ simple_test_event "takeip" $dev $ip $bits
+
+ ok_null
+ ctdb moveip $ip 1
+ simple_test_event "releaseip" $dev $ip $bits
+
+ ok_null
+ simple_test_event "ipreallocated"
+done
+
+# all routes should have been removed and not added back
+check_routes 0
diff --git a/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.013.sh b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.013.sh
new file mode 100755
index 0000000..2262083
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.013.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "1 IP configured, releaseip of unassigned"
+
+setup
+
+create_policy_routing_config 1 default
+
+ctdb_get_1_public_address |
+while read dev ip bits ; do
+ ok <<EOF
+WARNING: Failed to delete policy routing rule
+ Command "ip rule del from $ip pref $CTDB_PER_IP_ROUTING_RULE_PREF table ctdb.$ip" failed:
+ RTNETLINK answers: No such file or directory
+EOF
+
+ simple_test_event "releaseip" $dev $ip $bits
+done
+
+# there should be no routes
+check_routes 0
diff --git a/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.014.sh b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.014.sh
new file mode 100755
index 0000000..a63e134
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.014.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "1 IP configured, takeip, moveip, ipreallocated"
+
+# We move the IP to another node but don't run releaseip.
+# ipreallocated should remove the bogus routes.
+
+setup
+
+create_policy_routing_config 1 default
+
+ctdb_get_1_public_address |
+while read dev ip bits ; do
+ ok_null
+ # Set up the routes for an IP that we have
+ simple_test_event "takeip" $dev $ip $bits
+
+ # Now move that IPs but don't run the associated "releaseip"
+ ctdb moveip $ip 1
+
+ # This should handle removal of the routes
+ ok "Removing ip rule/routes for unhosted public address $ip"
+ simple_test_event "ipreallocated"
+done
+
+# no routes left
+check_routes 0
diff --git a/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.015.sh b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.015.sh
new file mode 100755
index 0000000..742cfd4
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.015.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "1 IP configured, releaseip of unassigned"
+
+setup
+
+export IP_ROUTE_BAD_TABLE_ID=true
+
+create_policy_routing_config 1 default
+
+ctdb_get_1_public_address |
+{
+ read dev ip bits
+
+ ok <<EOF
+WARNING: Failed to delete policy routing rule
+ Command "ip rule del from $ip pref $CTDB_PER_IP_ROUTING_RULE_PREF table ctdb.$ip" failed:
+ Error: argument ctdb.$ip is wrong: invalid table ID
+ Error: argument ctdb.$ip is wrong: table id value is invalid
+EOF
+
+ simple_test_event "releaseip" $dev $ip $bits
+}
+
+
+# there should be no routes
+check_routes 0
diff --git a/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.016.sh b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.016.sh
new file mode 100755
index 0000000..4856ba5
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.016.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "empty config, reconfigure, NOOP"
+
+setup
+
+create_policy_routing_config 0
+
+ok "Reconfiguring service \"${service_name}\"..."
+simple_test_event "reconfigure"
+
+check_routes 0
diff --git a/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.017.sh b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.017.sh
new file mode 100755
index 0000000..d26ab9c
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.017.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "1 IP configured, reconfigure"
+
+setup
+
+create_policy_routing_config 1 default
+
+# no takeip, but reconfigure should add any missing routes
+ok "Reconfiguring service \"${service_name}\"..."
+simple_test_event "reconfigure"
+
+check_routes 1 default
diff --git a/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.018.sh b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.018.sh
new file mode 100755
index 0000000..4d89dc2
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.018.sh
@@ -0,0 +1,21 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "1 IP configured, ipreallocated, more routes, reconfigure"
+
+setup
+
+create_policy_routing_config 1
+
+# no takeip, but ipreallocated should add any missing routes
+ok_null
+simple_test_event "ipreallocated"
+
+create_policy_routing_config 1 default
+
+# reconfigure should update routes even though rules are unchanged
+ok "Reconfiguring service \"${service_name}\"..."
+simple_test_event "reconfigure"
+
+check_routes 1 default
diff --git a/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.019.sh b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.019.sh
new file mode 100755
index 0000000..7575466
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.019.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "1 IP configured, ipreallocated, less routes, reconfigure"
+
+setup
+
+create_policy_routing_config 1 default
+
+# no takeip, but ipreallocated should add any missing routes
+ok_null
+simple_test_event "ipreallocated"
+
+# rewrite the configuration to take out the default routes, as per the
+# above change to $args
+create_policy_routing_config 1
+
+# reconfigure should update routes even though rules are unchanged
+ok "Reconfiguring service \""${service_name}\""..."
+simple_test_event "reconfigure"
+
+check_routes 1
diff --git a/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.021.sh b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.021.sh
new file mode 100755
index 0000000..876b600
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.021.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Invalid table ID range - includes system tables"
+
+setup
+
+setup_script_options <<EOF
+CTDB_PER_IP_ROUTING_TABLE_ID_LOW=100
+CTDB_PER_IP_ROUTING_TABLE_ID_HIGH=500
+EOF
+
+required_result 1 "error: range CTDB_PER_IP_ROUTING_TABLE_ID_LOW[${CTDB_PER_IP_ROUTING_TABLE_ID_LOW}]..CTDB_PER_IP_ROUTING_TABLE_ID_HIGH[${CTDB_PER_IP_ROUTING_TABLE_ID_HIGH}] must not include 253-255"
+simple_test_event "ipreallocated"
diff --git a/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.022.sh b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.022.sh
new file mode 100755
index 0000000..6f0638e
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.022.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Invalid table ID range - reversed"
+
+setup
+
+setup_script_options <<EOF
+CTDB_PER_IP_ROUTING_TABLE_ID_LOW=9000
+CTDB_PER_IP_ROUTING_TABLE_ID_HIGH=1000
+EOF
+
+required_result 1 "error: CTDB_PER_IP_ROUTING_TABLE_ID_LOW[${CTDB_PER_IP_ROUTING_TABLE_ID_LOW}] and/or CTDB_PER_IP_ROUTING_TABLE_ID_HIGH[${CTDB_PER_IP_ROUTING_TABLE_ID_HIGH}] improperly configured"
+simple_test_event "ipreallocated"
diff --git a/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.023.sh b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.023.sh
new file mode 100755
index 0000000..a94b58b
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.023.sh
@@ -0,0 +1,25 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "1 IP configured, broken configuration, takeip"
+
+setup
+
+# Configuration for 1 IP
+create_policy_routing_config 1 default
+
+# takeip should add routes for the given address
+ctdb_get_1_public_address |
+while read dev ip bits ; do
+ # Now add configuration breakage by changing default route into a
+ # link local route with a gateway
+ net=$(ipv4_host_addr_to_net "$ip" "$bits")
+ sed -i -e "s@0\.0\.0\.0/0@${net}@" "$CTDB_PER_IP_ROUTING_CONF"
+
+ ok <<EOF
+RTNETLINK answers: File exists
+add_routing_for_ip: failed to add route: ${net} via ${net%.*}.254 dev ${dev} table ctdb.${ip}
+EOF
+ simple_test_event "takeip" $dev $ip $bits
+done
diff --git a/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.024.sh b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.024.sh
new file mode 100755
index 0000000..7b1af37
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/13.per_ip_routing.024.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Single IP, restores original rt_tables"
+
+setup
+
+create_policy_routing_config 1 default
+
+_rt_tables="$CTDB_SYS_ETCDIR/iproute2/rt_tables"
+_rt_orig=$(TMPDIR="$CTDB_TEST_TMP_DIR" mktemp)
+cp "$_rt_tables" "$_rt_orig"
+
+ctdb_get_1_public_address | {
+ read dev ip bits
+
+ ok_null
+ simple_test_event "takeip" $dev $ip $bits
+
+ ok <<EOF
+Removing ip rule for public address ${ip} for routing table ctdb.${ip}
+EOF
+ simple_test_event "shutdown"
+}
+
+ok_null
+simple_test_command diff -u "$_rt_orig" "$_rt_tables"
+
+check_routes 0
diff --git a/ctdb/tests/UNIT/eventscripts/20.multipathd.monitor.001.sh b/ctdb/tests/UNIT/eventscripts/20.multipathd.monitor.001.sh
new file mode 100755
index 0000000..4991765
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/20.multipathd.monitor.001.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "No multipath devices configure to check"
+
+setup
+
+ok_null
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/20.multipathd.monitor.002.sh b/ctdb/tests/UNIT/eventscripts/20.multipathd.monitor.002.sh
new file mode 100755
index 0000000..f57f476
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/20.multipathd.monitor.002.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 multipath devices configure to check, all up"
+
+setup "mpatha" "mpathb" "mpathc"
+
+ok_null
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/20.multipathd.monitor.003.sh b/ctdb/tests/UNIT/eventscripts/20.multipathd.monitor.003.sh
new file mode 100755
index 0000000..0d768a0
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/20.multipathd.monitor.003.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 multipath devices configure to check, one down"
+
+setup "mpatha" "!mpathb" "mpathc"
+
+required_result 1 <<EOF
+ERROR: multipath device "mpathb" has no active paths
+multipath monitoring failed
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/20.multipathd.monitor.004.sh b/ctdb/tests/UNIT/eventscripts/20.multipathd.monitor.004.sh
new file mode 100755
index 0000000..a655b83
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/20.multipathd.monitor.004.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 multipath devices configure to check, multipath hangs"
+
+setup "mpatha" "!mpathb" "mpathc"
+export FAKE_MULTIPATH_HANG="yes"
+
+required_result 1 <<EOF
+ERROR: callout to multipath checks hung
+multipath monitoring failed
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/31.clamd.monitor.002.sh b/ctdb/tests/UNIT/eventscripts/31.clamd.monitor.002.sh
new file mode 100755
index 0000000..48d3cbf
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/31.clamd.monitor.002.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Managed, clamd not listening"
+
+setup
+
+setup_script_options <<EOF
+CTDB_CLAMD_SOCKET="/var/run/clamd.sock"
+EOF
+
+required_result 1 <<EOF
+ERROR: clamd not listening on $CTDB_CLAMD_SOCKET
+EOF
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/31.clamd.monitor.003.sh b/ctdb/tests/UNIT/eventscripts/31.clamd.monitor.003.sh
new file mode 100755
index 0000000..f4e37d2
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/31.clamd.monitor.003.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Managed, clamd listening"
+
+setup
+
+setup_script_options <<EOF
+CTDB_CLAMD_SOCKET="/var/run/clamd.sock"
+EOF
+
+unix_socket_listening "$CTDB_CLAMD_SOCKET"
+
+ok_null
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/40.vsftpd.monitor.002.sh b/ctdb/tests/UNIT/eventscripts/40.vsftpd.monitor.002.sh
new file mode 100755
index 0000000..f825be4
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/40.vsftpd.monitor.002.sh
@@ -0,0 +1,52 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "up once, down with recovery"
+
+setup "up"
+
+ok_null
+simple_test
+
+setup "down"
+
+ok <<EOF
+vsftpd not listening on TCP port 21
+WARNING: vsftpd listening on TCP port 21: fail count 1 >= threshold 1
+EOF
+simple_test
+
+setup "up"
+
+ok <<EOF
+NOTICE: vsftpd listening on TCP port 21: no longer failing
+EOF
+simple_test
+
+setup "down"
+
+ok <<EOF
+vsftpd not listening on TCP port 21
+WARNING: vsftpd listening on TCP port 21: fail count 1 >= threshold 1
+EOF
+simple_test
+
+required_result 1 <<EOF
+vsftpd not listening on TCP port 21
+ERROR: vsftpd listening on TCP port 21: fail count 2 >= threshold 2
+EOF
+simple_test
+
+required_result 1 <<EOF
+vsftpd not listening on TCP port 21
+ERROR: vsftpd listening on TCP port 21: fail count 3 >= threshold 2
+EOF
+simple_test
+
+setup "up"
+
+ok <<EOF
+NOTICE: vsftpd listening on TCP port 21: no longer failing
+EOF
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/40.vsftpd.shutdown.002.sh b/ctdb/tests/UNIT/eventscripts/40.vsftpd.shutdown.002.sh
new file mode 100755
index 0000000..fe65278
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/40.vsftpd.shutdown.002.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "managed"
+
+setup "up"
+
+ok <<EOF
+Stopping vsftpd: OK
+EOF
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/40.vsftpd.startup.002.sh b/ctdb/tests/UNIT/eventscripts/40.vsftpd.startup.002.sh
new file mode 100755
index 0000000..dd39860
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/40.vsftpd.startup.002.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "managed"
+
+setup "down"
+
+ok <<EOF
+Starting vsftpd: OK
+EOF
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/41.httpd.monitor.002.sh b/ctdb/tests/UNIT/eventscripts/41.httpd.monitor.002.sh
new file mode 100755
index 0000000..383040c
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/41.httpd.monitor.002.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "managed, down - 5 times"
+
+setup "down"
+
+ok_null
+simple_test
+
+ok <<EOF
+HTTPD is not running. Trying to restart HTTPD.
+service: can't stop httpd - not running
+Starting httpd: OK
+EOF
+simple_test
+
+ok_null
+simple_test
+
+ok_null
+simple_test
+
+required_result 1 <<EOF
+HTTPD is not running. Trying to restart HTTPD.
+Stopping httpd: OK
+Starting httpd: OK
+EOF
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/41.httpd.shutdown.002.sh b/ctdb/tests/UNIT/eventscripts/41.httpd.shutdown.002.sh
new file mode 100755
index 0000000..4e342fb
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/41.httpd.shutdown.002.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "managed"
+
+setup "up"
+
+ok <<EOF
+Stopping httpd: OK
+EOF
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/41.httpd.startup.002.sh b/ctdb/tests/UNIT/eventscripts/41.httpd.startup.002.sh
new file mode 100755
index 0000000..1722785
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/41.httpd.startup.002.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "managed"
+
+setup "down"
+
+ok <<EOF
+Starting httpd: OK
+EOF
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/48.netbios.shutdown.011.sh b/ctdb/tests/UNIT/eventscripts/48.netbios.shutdown.011.sh
new file mode 100755
index 0000000..0649813
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/48.netbios.shutdown.011.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "shutdown, Debian init style"
+
+setup
+
+export EVENTSCRIPT_TESTS_INIT_STYLE="debian"
+
+ok <<EOF
+Stopping nmbd: OK
+EOF
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/48.netbios.startup.011.sh b/ctdb/tests/UNIT/eventscripts/48.netbios.startup.011.sh
new file mode 100755
index 0000000..40b90a1
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/48.netbios.startup.011.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "shutdown, Debian init style"
+
+setup
+
+export EVENTSCRIPT_TESTS_INIT_STYLE="debian"
+
+ok <<EOF
+Starting nmbd: OK
+EOF
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/49.winbind.monitor.101.sh b/ctdb/tests/UNIT/eventscripts/49.winbind.monitor.101.sh
new file mode 100755
index 0000000..3884a33
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/49.winbind.monitor.101.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "all OK"
+
+setup
+
+ok_null
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/49.winbind.monitor.102.sh b/ctdb/tests/UNIT/eventscripts/49.winbind.monitor.102.sh
new file mode 100755
index 0000000..24e4ed2
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/49.winbind.monitor.102.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "winbind down"
+
+setup
+
+wbinfo_down
+
+required_result 1 "ERROR: wbinfo -p returned error"
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/49.winbind.shutdown.002.sh b/ctdb/tests/UNIT/eventscripts/49.winbind.shutdown.002.sh
new file mode 100755
index 0000000..dc6f160
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/49.winbind.shutdown.002.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "managed"
+
+setup "up"
+
+ok <<EOF
+Stopping winbind: OK
+EOF
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/49.winbind.startup.002.sh b/ctdb/tests/UNIT/eventscripts/49.winbind.startup.002.sh
new file mode 100755
index 0000000..dd0c1ad
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/49.winbind.startup.002.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "managed"
+
+setup "down"
+
+ok <<EOF
+Starting winbind: OK
+EOF
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/50.samba.monitor.101.sh b/ctdb/tests/UNIT/eventscripts/50.samba.monitor.101.sh
new file mode 100755
index 0000000..3884a33
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/50.samba.monitor.101.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "all OK"
+
+setup
+
+ok_null
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/50.samba.monitor.103.sh b/ctdb/tests/UNIT/eventscripts/50.samba.monitor.103.sh
new file mode 100755
index 0000000..e9232a6
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/50.samba.monitor.103.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "port 445 down"
+
+setup
+
+tcp_port_down 445
+
+required_result 1 "samba not listening on TCP port 445"
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/50.samba.monitor.104.sh b/ctdb/tests/UNIT/eventscripts/50.samba.monitor.104.sh
new file mode 100755
index 0000000..8e9d789
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/50.samba.monitor.104.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "port 139 down"
+
+setup
+
+tcp_port_down 139
+
+required_result 1 "samba not listening on TCP port 139"
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/50.samba.monitor.105.sh b/ctdb/tests/UNIT/eventscripts/50.samba.monitor.105.sh
new file mode 100755
index 0000000..7208aca
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/50.samba.monitor.105.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "non-existent share path"
+
+setup
+
+out=$(shares_missing "samba" 2)
+
+required_result 1 "$out"
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/50.samba.monitor.106.sh b/ctdb/tests/UNIT/eventscripts/50.samba.monitor.106.sh
new file mode 100755
index 0000000..80bccef
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/50.samba.monitor.106.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "non-existent share - not checked"
+
+setup
+
+setup_script_options <<EOF
+CTDB_SAMBA_SKIP_SHARE_CHECK="yes"
+EOF
+
+ok_null
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/50.samba.monitor.110.sh b/ctdb/tests/UNIT/eventscripts/50.samba.monitor.110.sh
new file mode 100755
index 0000000..9645c5a
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/50.samba.monitor.110.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "testparm fails"
+
+setup
+
+export FAKE_TESTPARM_FAIL="yes"
+required_result 1 <<EOF
+ERROR: smb.conf cache create failed - testparm failed with:
+Load smb config files from ${CTDB_SYS_ETCDIR}/samba/smb.conf
+rlimit_max: increasing rlimit_max (2048) to minimum Windows limit (16384)
+Processing section "[share1]"
+Processing section "[share2]"
+Processing section "[share3]"
+Loaded services file OK.
+WARNING: 'workgroup' and 'netbios name' must differ.
+EOF
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/50.samba.monitor.111.sh b/ctdb/tests/UNIT/eventscripts/50.samba.monitor.111.sh
new file mode 100755
index 0000000..d72855f
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/50.samba.monitor.111.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "testparm fails on 2nd time through"
+
+setup
+
+ok_null
+simple_test
+
+export FAKE_TESTPARM_FAIL="yes"
+ok <<EOF
+WARNING: smb.conf cache update failed - using old cache file
+Load smb config files from ${CTDB_SYS_ETCDIR}/samba/smb.conf
+rlimit_max: increasing rlimit_max (2048) to minimum Windows limit (16384)
+Processing section "[share1]"
+Processing section "[share2]"
+Processing section "[share3]"
+Loaded services file OK.
+WARNING: 'workgroup' and 'netbios name' must differ.
+EOF
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/50.samba.monitor.112.sh b/ctdb/tests/UNIT/eventscripts/50.samba.monitor.112.sh
new file mode 100755
index 0000000..f714472
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/50.samba.monitor.112.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "testparm times out"
+
+setup
+
+export FAKE_TIMEOUT="yes"
+required_result 1 <<EOF
+ERROR: smb.conf cache create failed - testparm command timed out
+EOF
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/50.samba.monitor.113.sh b/ctdb/tests/UNIT/eventscripts/50.samba.monitor.113.sh
new file mode 100755
index 0000000..faadda1
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/50.samba.monitor.113.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "testparm times out on 2nd time through"
+
+setup
+
+ok_null
+simple_test
+
+export FAKE_TIMEOUT="yes"
+ok <<EOF
+WARNING: smb.conf cache update timed out - using old cache file
+EOF
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/50.samba.shutdown.001.sh b/ctdb/tests/UNIT/eventscripts/50.samba.shutdown.001.sh
new file mode 100755
index 0000000..76ac985
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/50.samba.shutdown.001.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "shutdown, simple"
+
+setup
+
+ok <<EOF
+Stopping smb: OK
+EOF
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/50.samba.shutdown.002.sh b/ctdb/tests/UNIT/eventscripts/50.samba.shutdown.002.sh
new file mode 100755
index 0000000..f692026
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/50.samba.shutdown.002.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "shutdown, simple"
+
+setup
+
+samba_setup_fake_threads 1 2 3 4 5 6
+
+ok <<EOF
+Stopping smb: OK
+$SAMBA_STACK_TRACES
+EOF
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/50.samba.shutdown.011.sh b/ctdb/tests/UNIT/eventscripts/50.samba.shutdown.011.sh
new file mode 100755
index 0000000..94867e0
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/50.samba.shutdown.011.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "shutdown, Debian init style"
+
+setup
+
+export EVENTSCRIPT_TESTS_INIT_STYLE="debian"
+
+ok <<EOF
+Stopping smbd: OK
+EOF
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/50.samba.startup.011.sh b/ctdb/tests/UNIT/eventscripts/50.samba.startup.011.sh
new file mode 100755
index 0000000..8c4699d
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/50.samba.startup.011.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "shutdown, Debian init style"
+
+setup
+
+export EVENTSCRIPT_TESTS_INIT_STYLE="debian"
+
+ok <<EOF
+Starting smbd: OK
+EOF
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.101.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.101.sh
new file mode 100755
index 0000000..293b724
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.101.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "all services available"
+
+setup
+
+ok_null
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.102.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.102.sh
new file mode 100755
index 0000000..2f83b2c
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.102.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "all services available, check nfsd thread count, count matches"
+
+setup
+
+RPCNFSDCOUNT=8
+nfs_setup_fake_threads "nfsd" 1 2 3 4 5 6 7 8
+
+ok_null
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.103.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.103.sh
new file mode 100755
index 0000000..c0bb305
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.103.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "all services available, not enough nfsd threads"
+
+setup
+
+RPCNFSDCOUNT=8
+nfs_setup_fake_threads "nfsd" 1 2 3 4 5
+
+ok "Attempting to correct number of nfsd threads from 5 to 8"
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.104.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.104.sh
new file mode 100755
index 0000000..d568892
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.104.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+# Add this extra test to catch a design change where we only ever
+# increase the number of threads. That is, this test would need to be
+# consciously removed.
+define_test "all services available, check nfsd thread count, too many threads"
+
+setup
+
+RPCNFSDCOUNT=4
+nfs_setup_fake_threads "nfsd" 1 2 3 4 5 6
+
+ok "Attempting to correct number of nfsd threads from 6 to 4"
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.105.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.105.sh
new file mode 100755
index 0000000..e83ead8
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.105.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "all services available, 10 iterations with ok_null"
+
+setup
+
+ok_null
+nfs_iterate_test 10
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.106.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.106.sh
new file mode 100755
index 0000000..43d6b2f
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.106.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "portmapper down, 2 iterations"
+
+setup
+
+rpc_services_down "portmapper"
+
+nfs_iterate_test 2 "portmapper"
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.107.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.107.sh
new file mode 100755
index 0000000..8bf0fa2
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.107.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "callout is 'true'"
+
+setup
+
+setup_script_options <<EOF
+CTDB_NFS_CALLOUT="true"
+EOF
+
+ok_null
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.108.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.108.sh
new file mode 100755
index 0000000..39aba84
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.108.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "callout causes monitor-pre to fail"
+
+setup
+
+setup_nfs_callout "monitor-pre"
+
+required_result 1 "monitor-pre"
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.109.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.109.sh
new file mode 100755
index 0000000..36572e9
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.109.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "callout causes monitor-post to fail"
+
+setup
+
+setup_nfs_callout "monitor-post"
+
+required_result 1 "monitor-post"
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.111.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.111.sh
new file mode 100755
index 0000000..2bbda96
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.111.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "knfsd down, 1 iteration"
+
+setup
+
+rpc_services_down "nfs"
+
+ok_null
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.112.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.112.sh
new file mode 100755
index 0000000..4000b5d
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.112.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "knfsd down, 10 iterations"
+
+# knfsd fails and attempts to restart it fail.
+
+setup
+
+rpc_services_down "nfs"
+
+nfs_iterate_test 10 "nfs"
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.113.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.113.sh
new file mode 100755
index 0000000..744966c
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.113.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "knfsd down, 10 iterations, no hung threads"
+
+# knfsd fails and attempts to restart it fail.
+setup
+
+rpc_services_down "nfs"
+
+nfs_setup_fake_threads "nfsd"
+
+nfs_iterate_test 10 "nfs"
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.114.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.114.sh
new file mode 100755
index 0000000..7170fff
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.114.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "knfsd down, 10 iterations, 3 hung threads"
+
+# knfsd fails and attempts to restart it fail.
+setup
+
+rpc_services_down "nfs"
+
+nfs_setup_fake_threads "nfsd" 1001 1002 1003
+
+nfs_iterate_test 10 "nfs"
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.121.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.121.sh
new file mode 100755
index 0000000..1cda276
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.121.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "lockd down, 7 iterations"
+
+# This simulates an ongoing failure in the eventscript's automated
+# attempts to restart the service. That is, the eventscript is unable
+# to restart the service.
+
+setup
+
+rpc_services_down "nlockmgr"
+
+nfs_iterate_test 7 "nlockmgr"
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.122.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.122.sh
new file mode 100755
index 0000000..eae7ca0
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.122.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "lockd down, 7 iterations, back up after 2"
+
+# This simulates a success the eventscript's automated attempts to
+# restart the service.
+
+setup
+
+rpc_services_down "nlockmgr"
+
+# Iteration 2 should try to restart rpc.lockd. However, our test
+# stub rpc.lockd does nothing, so we have to explicitly flag it as up.
+
+nfs_iterate_test 7 "nlockmgr" \
+ 3 "rpc_services_up nlockmgr"
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.131.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.131.sh
new file mode 100755
index 0000000..33e1cf4
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.131.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "rquotad down, 7 iterations"
+
+setup
+
+rpc_services_down "rquotad"
+
+nfs_iterate_test 7 "rquotad"
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.132.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.132.sh
new file mode 100755
index 0000000..207d872
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.132.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "rquotad down, 7 iterations, back up after 2"
+
+# rquotad fails once but then comes back after restart after 2nd
+# failure.
+
+setup
+
+rpc_services_down "rquotad"
+
+nfs_iterate_test 7 "rquotad" \
+ 3 'rpc_services_up "rquotad"'
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.141.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.141.sh
new file mode 100755
index 0000000..5a8c5ce
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.141.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "statd down, 7 iterations"
+
+# statd fails and attempts to restart it fail.
+
+setup
+
+rpc_services_down "status"
+
+nfs_iterate_test 7 "status"
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.142.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.142.sh
new file mode 100755
index 0000000..694bf92
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.142.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "statd down, 7 iterations, back up after 2"
+
+# statd fails and the first attempt to restart it succeeds.
+
+setup
+
+rpc_services_down "status"
+
+nfs_iterate_test 7 "status" \
+ 3 'rpc_services_up "status"'
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.143.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.143.sh
new file mode 100755
index 0000000..d17277e
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.143.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "statd down, 2 iterations, stuck process"
+
+# statd fails and the first attempt to restart it succeeds.
+
+setup
+
+rpc_services_down "status"
+nfs_setup_fake_threads "rpc.status" 1001
+
+nfs_iterate_test 2 "status"
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.144.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.144.sh
new file mode 100755
index 0000000..5a8c5ce
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.144.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "statd down, 7 iterations"
+
+# statd fails and attempts to restart it fail.
+
+setup
+
+rpc_services_down "status"
+
+nfs_iterate_test 7 "status"
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.151.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.151.sh
new file mode 100755
index 0000000..9ab1807
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.151.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "mountd down, 1 iteration"
+
+setup
+
+rpc_services_down "mountd"
+
+ok_null
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.152.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.152.sh
new file mode 100755
index 0000000..c3a6b8b
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.152.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "mountd down, 7 iterations"
+
+# This simulates an ongoing failure in the eventscript's automated
+# attempts to restart the service. That is, the eventscript is unable
+# to restart the service.
+
+setup
+
+rpc_services_down "mountd"
+
+nfs_iterate_test 7 "mountd"
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.153.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.153.sh
new file mode 100755
index 0000000..a09315b
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.153.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "mountd down, 7 iterations, back up after 2"
+
+setup
+
+rpc_services_down "mountd"
+
+# Iteration 2 should try to restart rpc.mountd. However, our test
+# stub rpc.mountd does nothing, so we have to explicitly flag it as
+# up.
+nfs_iterate_test 7 "mountd" \
+ 3 "rpc_services_up mountd"
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.161.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.161.sh
new file mode 100755
index 0000000..1fa73bb
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.161.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "2nd share missing"
+
+setup
+
+out=$(shares_missing "nfs" 2)
+
+required_result 1 "$out"
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.162.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.162.sh
new file mode 100755
index 0000000..9e3438f
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.162.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "2nd share missing, skipping share checks"
+
+setup
+
+setup_script_options <<EOF
+CTDB_NFS_SKIP_SHARE_CHECK="yes"
+EOF
+
+ok_null
+
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.multi.001.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.multi.001.sh
new file mode 100755
index 0000000..baa5701
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.multi.001.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "takeip, ipreallocated -> reconfigure"
+
+setup
+
+public_address=$(ctdb_get_1_public_address)
+
+ok_null
+
+simple_test_event "takeip" $public_address
+
+ok <<EOF
+Reconfiguring service "nfs"...
+EOF
+
+simple_test_event "ipreallocated"
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.multi.002.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.multi.002.sh
new file mode 100755
index 0000000..846380f
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.multi.002.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "takeip, monitor -> no reconfigure"
+
+setup
+
+public_address=$(ctdb_get_1_public_address)
+
+ok_null
+
+simple_test_event "takeip" $public_address
+
+ok_null
+
+simple_test_event "monitor"
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.releaseip.001.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.releaseip.001.sh
new file mode 100755
index 0000000..8bf0fa2
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.releaseip.001.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "callout is 'true'"
+
+setup
+
+setup_script_options <<EOF
+CTDB_NFS_CALLOUT="true"
+EOF
+
+ok_null
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.releaseip.002.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.releaseip.002.sh
new file mode 100755
index 0000000..998c3ba
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.releaseip.002.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "callout causes releaseip to fail"
+
+setup
+
+setup_nfs_callout "releaseip"
+
+required_result 1 "releaseip"
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.shutdown.001.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.shutdown.001.sh
new file mode 100755
index 0000000..8bf0fa2
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.shutdown.001.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "callout is 'true'"
+
+setup
+
+setup_script_options <<EOF
+CTDB_NFS_CALLOUT="true"
+EOF
+
+ok_null
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.shutdown.002.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.shutdown.002.sh
new file mode 100755
index 0000000..9db0656
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.shutdown.002.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "callout causes shutdown to fail"
+
+setup
+
+setup_nfs_callout "shutdown"
+
+required_result 1 "shutdown"
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.startup.001.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.startup.001.sh
new file mode 100755
index 0000000..8bf0fa2
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.startup.001.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "callout is 'true'"
+
+setup
+
+setup_script_options <<EOF
+CTDB_NFS_CALLOUT="true"
+EOF
+
+ok_null
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.startup.002.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.startup.002.sh
new file mode 100755
index 0000000..bf881d9
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.startup.002.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "callout causes startup to fail"
+
+setup
+
+setup_nfs_callout "startup"
+
+required_result 1 "startup"
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.takeip.001.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.takeip.001.sh
new file mode 100755
index 0000000..8bf0fa2
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.takeip.001.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "callout is 'true'"
+
+setup
+
+setup_script_options <<EOF
+CTDB_NFS_CALLOUT="true"
+EOF
+
+ok_null
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.takeip.002.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.takeip.002.sh
new file mode 100755
index 0000000..3f247ff
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/60.nfs.takeip.002.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "callout causes takeip to fail"
+
+setup
+
+setup_nfs_callout "takeip"
+
+required_result 1 "takeip"
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/91.lvs.001.sh b/ctdb/tests/UNIT/eventscripts/91.lvs.001.sh
new file mode 100755
index 0000000..9fc8d02
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/91.lvs.001.sh
@@ -0,0 +1,54 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "sanity check ipvsadm stub"
+
+setup<<EOF
+EOF
+
+check_ipvsadm NULL
+
+ipvsadm -A -u 10.1.1.201 -s lc -p 1999999
+ipvsadm -a -u 10.1.1.201 -r 192.168.1.3 -g
+ipvsadm -a -u 10.1.1.201 -r 192.168.1.1 -g
+ipvsadm -a -u 10.1.1.201 -r 192.168.1.2:0 -g
+ipvsadm -a -u 10.1.1.201 -r 127.0.0.1
+
+check_ipvsadm <<EOF
+UDP 10.1.1.201:0 lc persistent 1999999
+ -> 127.0.0.1:0 Local 1 0 0
+ -> 192.168.1.1:0 Route 1 0 0
+ -> 192.168.1.2:0 Route 1 0 0
+ -> 192.168.1.3:0 Route 1 0 0
+EOF
+
+ipvsadm -A -t 10.1.1.201 -s lc -p 1999999
+ipvsadm -a -t 10.1.1.201 -r 192.168.1.3 -g
+ipvsadm -a -t 10.1.1.201 -r 192.168.1.1 -g
+ipvsadm -a -t 10.1.1.201 -r 192.168.1.2:0 -g
+
+check_ipvsadm <<EOF
+TCP 10.1.1.201:0 lc persistent 1999999
+ -> 192.168.1.1:0 Route 1 0 0
+ -> 192.168.1.2:0 Route 1 0 0
+ -> 192.168.1.3:0 Route 1 0 0
+UDP 10.1.1.201:0 lc persistent 1999999
+ -> 127.0.0.1:0 Local 1 0 0
+ -> 192.168.1.1:0 Route 1 0 0
+ -> 192.168.1.2:0 Route 1 0 0
+ -> 192.168.1.3:0 Route 1 0 0
+EOF
+
+ipvsadm -D -u 10.1.1.201
+
+check_ipvsadm <<EOF
+TCP 10.1.1.201:0 lc persistent 1999999
+ -> 192.168.1.1:0 Route 1 0 0
+ -> 192.168.1.2:0 Route 1 0 0
+ -> 192.168.1.3:0 Route 1 0 0
+EOF
+
+ipvsadm -D -t 10.1.1.201
+
+check_ipvsadm NULL
diff --git a/ctdb/tests/UNIT/eventscripts/91.lvs.ipreallocated.011.sh b/ctdb/tests/UNIT/eventscripts/91.lvs.ipreallocated.011.sh
new file mode 100755
index 0000000..6866047
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/91.lvs.ipreallocated.011.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "configured, no nodes in config"
+
+setup "10.1.1.201" "eth0" <<EOF
+EOF
+
+ok_null
+simple_test
+
+check_ipvsadm NULL
+check_lvs_ip host
diff --git a/ctdb/tests/UNIT/eventscripts/91.lvs.ipreallocated.012.sh b/ctdb/tests/UNIT/eventscripts/91.lvs.ipreallocated.012.sh
new file mode 100755
index 0000000..15328ef
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/91.lvs.ipreallocated.012.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "nodes in config, no leader (e.g. all inactive)"
+
+setup "10.1.1.201" "eth0" <<EOF
+192.168.1.1
+192.168.1.2
+192.168.1.3
+EOF
+
+ok_null
+simple_test
+
+check_ipvsadm NULL
+check_lvs_ip host
diff --git a/ctdb/tests/UNIT/eventscripts/91.lvs.ipreallocated.013.sh b/ctdb/tests/UNIT/eventscripts/91.lvs.ipreallocated.013.sh
new file mode 100755
index 0000000..918b18d
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/91.lvs.ipreallocated.013.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "nodes in config, other node is leader"
+
+setup "10.1.1.201" "eth0" <<EOF
+192.168.1.1
+192.168.1.2 leader
+192.168.1.3
+EOF
+
+ok_null
+simple_test
+
+check_ipvsadm NULL
+check_lvs_ip host
diff --git a/ctdb/tests/UNIT/eventscripts/91.lvs.ipreallocated.014.sh b/ctdb/tests/UNIT/eventscripts/91.lvs.ipreallocated.014.sh
new file mode 100755
index 0000000..8af31d7
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/91.lvs.ipreallocated.014.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "nodes in config, this is leader"
+
+setup "10.1.1.201" "eth0" <<EOF
+192.168.1.1 leader
+192.168.1.2
+192.168.1.3
+EOF
+
+ok_null
+simple_test
+
+check_ipvsadm <<EOF
+TCP 10.1.1.201:0 lc persistent 1999999
+ -> 127.0.0.1:0 Local 1 0 0
+ -> 192.168.1.2:0 Route 1 0 0
+ -> 192.168.1.3:0 Route 1 0 0
+UDP 10.1.1.201:0 lc persistent 1999999
+ -> 127.0.0.1:0 Local 1 0 0
+ -> 192.168.1.2:0 Route 1 0 0
+ -> 192.168.1.3:0 Route 1 0 0
+EOF
+
+check_lvs_ip global
diff --git a/ctdb/tests/UNIT/eventscripts/91.lvs.monitor.001.sh b/ctdb/tests/UNIT/eventscripts/91.lvs.monitor.001.sh
new file mode 100755
index 0000000..42831fb
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/91.lvs.monitor.001.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "not configured"
+
+setup <<EOF
+EOF
+
+ok_null
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/91.lvs.monitor.002.sh b/ctdb/tests/UNIT/eventscripts/91.lvs.monitor.002.sh
new file mode 100755
index 0000000..a808017
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/91.lvs.monitor.002.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "configured, interface up"
+
+setup "10.1.1.201" "eth0" <<EOF
+192.168.1.1
+192.168.1.2
+192.168.1.3
+EOF
+
+ok_null
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/91.lvs.monitor.003.sh b/ctdb/tests/UNIT/eventscripts/91.lvs.monitor.003.sh
new file mode 100755
index 0000000..89f443e
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/91.lvs.monitor.003.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "configured, interface up"
+
+setup "10.1.1.201" "eth0" <<EOF
+192.168.1.1
+192.168.1.2
+192.168.1.3
+EOF
+
+ethtool_interfaces_down "$CTDB_LVS_PUBLIC_IFACE"
+
+required_result 1 <<EOF
+ERROR: No link on the public network interface ${CTDB_LVS_PUBLIC_IFACE}
+EOF
+simple_test
+
diff --git a/ctdb/tests/UNIT/eventscripts/91.lvs.shutdown.001.sh b/ctdb/tests/UNIT/eventscripts/91.lvs.shutdown.001.sh
new file mode 100755
index 0000000..42831fb
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/91.lvs.shutdown.001.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "not configured"
+
+setup <<EOF
+EOF
+
+ok_null
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/91.lvs.shutdown.002.sh b/ctdb/tests/UNIT/eventscripts/91.lvs.shutdown.002.sh
new file mode 100755
index 0000000..61c7f96
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/91.lvs.shutdown.002.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "configured"
+
+setup "10.1.1.201" "eth0" <<EOF
+EOF
+
+ipvsadm -A -t "$CTDB_LVS_PUBLIC_IP" -s lc -p 1999999
+ipvsadm -A -u "$CTDB_LVS_PUBLIC_IP" -s lc -p 1999999
+ip addr add $CTDB_LVS_PUBLIC_IP/32 dev lo
+
+ok_null
+simple_test
+
+check_ipvsadm NULL
+check_lvs_ip NULL
diff --git a/ctdb/tests/UNIT/eventscripts/91.lvs.startup.001.sh b/ctdb/tests/UNIT/eventscripts/91.lvs.startup.001.sh
new file mode 100755
index 0000000..42831fb
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/91.lvs.startup.001.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "not configured"
+
+setup <<EOF
+EOF
+
+ok_null
+simple_test
diff --git a/ctdb/tests/UNIT/eventscripts/91.lvs.startup.002.sh b/ctdb/tests/UNIT/eventscripts/91.lvs.startup.002.sh
new file mode 100755
index 0000000..e4c5e8d
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/91.lvs.startup.002.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "configured"
+
+setup "10.1.1.201" "eth0" <<EOF
+EOF
+
+ok_null
+simple_test
+
+check_ipvsadm NULL
+check_lvs_ip "host"
diff --git a/ctdb/tests/UNIT/eventscripts/README b/ctdb/tests/UNIT/eventscripts/README
new file mode 100644
index 0000000..304cdba
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/README
@@ -0,0 +1,46 @@
+eventscript unit tests
+======================
+
+This directory contains some eventscript unit tests for CTDB. These
+tests can be run as a non-privileged user. There are a lot of stub
+implementations of commands (located in stubs/) used to make the
+eventscripts think they're running against a real system.
+
+Test case filenames look like:
+
+ <eventscript>.<event>.NNN.sh
+
+The test helper functions will run <eventscript> with specified
+options. If using the simple_test() helper function then the 1st
+<event> argument is automatically passed. When simple_test_event() is
+used the event name must be explicitly passed as the 1st argument -
+this is more flexible and supports multiple events per test.
+
+Examples:
+
+* ../run_tests.sh .
+
+ Run all tests, displaying minimal output.
+
+* ../run_tests.sh -s .
+
+ Run all tests, displaying minimal output and a summary.
+
+* ../run_tests.sh -s ./10.interface.*.sh
+
+ Run all the tests against the 10.interface eventscript.
+
+* ../run_tests.sh -v -s .
+
+ Run all tests, displaying extra output and a summary.
+
+* ../run_tests.sh -sq .
+
+ Run all tests, displaying only a summary.
+
+* ../run_tests.sh -X ./10.interface.startup.002.sh
+
+ Run a test and have the eventscript itself run with "sh -x". This
+ will usually make a test fail because the (undesirable) trace output
+ will be included with the output of the eventscript. However, this
+ is useful for finding out why a test might be failing.
diff --git a/ctdb/tests/UNIT/eventscripts/debug_locks.sh.001.sh b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.001.sh
new file mode 100755
index 0000000..8f10200
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.001.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "DB S+ DB"
+
+setup
+
+do_test "DB" "S+" "DB"
diff --git a/ctdb/tests/UNIT/eventscripts/debug_locks.sh.002.sh b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.002.sh
new file mode 100755
index 0000000..31ae3df
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.002.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "DB D. DB"
+
+setup
+
+do_test "DB" "D." "DB"
diff --git a/ctdb/tests/UNIT/eventscripts/debug_locks.sh.003.sh b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.003.sh
new file mode 100755
index 0000000..89ab2f1
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.003.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "RECORD S+ DB"
+
+setup
+
+do_test "RECORD" "S+" "DB"
diff --git a/ctdb/tests/UNIT/eventscripts/debug_locks.sh.004.sh b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.004.sh
new file mode 100755
index 0000000..35500cb
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.004.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "RECORD D. DB"
+
+setup
+
+do_test "RECORD" "D." "DB"
diff --git a/ctdb/tests/UNIT/eventscripts/debug_locks.sh.005.sh b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.005.sh
new file mode 100755
index 0000000..10cbade
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.005.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "DB S+ RECORD"
+
+setup
+
+do_test "DB" "S+" "RECORD"
diff --git a/ctdb/tests/UNIT/eventscripts/debug_locks.sh.006.sh b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.006.sh
new file mode 100755
index 0000000..c4988b7
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.006.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "DB D. RECORD"
+
+setup
+
+do_test "DB" "D." "RECORD"
diff --git a/ctdb/tests/UNIT/eventscripts/debug_locks.sh.007.sh b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.007.sh
new file mode 100755
index 0000000..b186d20
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.007.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "RECORD S+ RECORD"
+
+setup
+
+do_test "RECORD" "S+" "RECORD"
diff --git a/ctdb/tests/UNIT/eventscripts/debug_locks.sh.008.sh b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.008.sh
new file mode 100755
index 0000000..7b7ac9b
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.008.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "RECORD D. RECORD"
+
+setup
+
+do_test "RECORD" "D." "RECORD"
diff --git a/ctdb/tests/UNIT/eventscripts/debug_locks.sh.021.sh b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.021.sh
new file mode 100755
index 0000000..f324803
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.021.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "DB S+ DB MUTEX"
+
+setup
+
+do_test "DB" "S+" "DB" "MUTEX"
diff --git a/ctdb/tests/UNIT/eventscripts/debug_locks.sh.022.sh b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.022.sh
new file mode 100755
index 0000000..0e70771
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.022.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "DB D. DB MUTEX"
+
+setup
+
+do_test "DB" "D." "DB" "MUTEX"
diff --git a/ctdb/tests/UNIT/eventscripts/debug_locks.sh.023.sh b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.023.sh
new file mode 100755
index 0000000..de84c81
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.023.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "RECORD S+ DB MUTEX"
+
+setup
+
+do_test "RECORD" "S+" "DB" "MUTEX"
diff --git a/ctdb/tests/UNIT/eventscripts/debug_locks.sh.024.sh b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.024.sh
new file mode 100755
index 0000000..30ad6bd
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.024.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "RECORD D. DB MUTEX"
+
+setup
+
+do_test "RECORD" "D." "DB" "MUTEX"
diff --git a/ctdb/tests/UNIT/eventscripts/debug_locks.sh.025.sh b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.025.sh
new file mode 100755
index 0000000..f259db5
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.025.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "DB S+ RECORD MUTEX"
+
+setup
+
+do_test "DB" "S+" "RECORD" "MUTEX"
diff --git a/ctdb/tests/UNIT/eventscripts/debug_locks.sh.026.sh b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.026.sh
new file mode 100755
index 0000000..9e057af
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.026.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "DB D. RECORD MUTEX"
+
+setup
+
+do_test "DB" "D." "RECORD" "MUTEX"
diff --git a/ctdb/tests/UNIT/eventscripts/debug_locks.sh.027.sh b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.027.sh
new file mode 100755
index 0000000..d70e7b7
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.027.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "RECORD S+ RECORD MUTEX"
+
+setup
+
+do_test "RECORD" "S+" "RECORD" "MUTEX"
diff --git a/ctdb/tests/UNIT/eventscripts/debug_locks.sh.028.sh b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.028.sh
new file mode 100755
index 0000000..7199035
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/debug_locks.sh.028.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "RECORD D. RECORD MUTEX"
+
+setup
+
+do_test "RECORD" "D." "RECORD" "MUTEX"
diff --git a/ctdb/tests/UNIT/eventscripts/etc-ctdb/public_addresses b/ctdb/tests/UNIT/eventscripts/etc-ctdb/public_addresses
new file mode 100644
index 0000000..cd2f6be
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/etc-ctdb/public_addresses
@@ -0,0 +1,9 @@
+10.0.0.1/24 dev123
+10.0.0.2/24 dev123
+10.0.0.3/24 dev123
+10.0.0.4/24 dev123
+10.0.0.5/24 dev123
+10.0.0.6/24 dev123
+10.0.1.1/24 dev456
+10.0.1.2/24 dev456
+10.0.1.3/24 dev456
diff --git a/ctdb/tests/UNIT/eventscripts/etc-ctdb/rc.local b/ctdb/tests/UNIT/eventscripts/etc-ctdb/rc.local
new file mode 100755
index 0000000..777aeaf
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/etc-ctdb/rc.local
@@ -0,0 +1,56 @@
+# Hey Emacs, this is a -*- shell-script -*- !!! :-)
+
+# Always use stub version of service command
+service ()
+{
+ "${CTDB_HELPER_BINDIR}/service" "$@"
+}
+
+nice_service ()
+{
+ nice "${CTDB_HELPER_BINDIR}/service" "$@"
+}
+
+# Always succeeds
+set_proc () { : ; }
+set_proc_maybe () { : ; }
+
+get_proc ()
+{
+ case "$1" in
+ net/bonding/*)
+ cat "$FAKE_PROC_NET_BONDING/${1##*/}"
+ ;;
+ sys/net/ipv4/conf/all/arp_filter)
+ echo 1
+ ;;
+ sys/net/ipv4/conf/all/promote_secondaries)
+ echo 1
+ ;;
+ fs/nfsd/threads)
+ echo "$FAKE_NFSD_THREAD_PIDS" | wc -w
+ ;;
+ */stack)
+ echo "[<ffffffff87654321>] fake_stack_trace_for_pid_${1}+0x0/0xff"
+ ;;
+ meminfo)
+ echo "$FAKE_PROC_MEMINFO"
+ ;;
+ locks)
+ echo "$FAKE_PROC_LOCKS"
+ ;;
+ *)
+ echo "get_proc: \"$1\" not implemented"
+ exit 1
+ esac
+}
+
+# Do not actually background - we want to see the output
+background_with_logging ()
+{
+ "$@" 2>&1 </dev/null | sed -e 's@^@\&@'
+}
+
+if [ -n "$EVENTSCRIPT_TESTS_INIT_STYLE" ]; then
+ CTDB_INIT_STYLE="$EVENTSCRIPT_TESTS_INIT_STYLE"
+fi
diff --git a/ctdb/tests/UNIT/eventscripts/etc/init.d/nfs b/ctdb/tests/UNIT/eventscripts/etc/init.d/nfs
new file mode 100755
index 0000000..43eb308
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/etc/init.d/nfs
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+# This is not used. The fake "service" script is used instead. This
+# is only needed to shut up functions like startstop_nfs(), which look
+# for this script.
+
+exit 0
diff --git a/ctdb/tests/UNIT/eventscripts/etc/init.d/nfslock b/ctdb/tests/UNIT/eventscripts/etc/init.d/nfslock
new file mode 100755
index 0000000..43eb308
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/etc/init.d/nfslock
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+# This is not used. The fake "service" script is used instead. This
+# is only needed to shut up functions like startstop_nfs(), which look
+# for this script.
+
+exit 0
diff --git a/ctdb/tests/UNIT/eventscripts/etc/os-release b/ctdb/tests/UNIT/eventscripts/etc/os-release
new file mode 100644
index 0000000..f0057cc
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/etc/os-release
@@ -0,0 +1,2 @@
+ID="rocky"
+ID_LIKE="rhel centos fedora"
diff --git a/ctdb/tests/UNIT/eventscripts/etc/samba/smb.conf b/ctdb/tests/UNIT/eventscripts/etc/samba/smb.conf
new file mode 100644
index 0000000..45976cd
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/etc/samba/smb.conf
@@ -0,0 +1,43 @@
+[global]
+ # enable clustering
+ clustering=yes
+ ctdb:registry.tdb=yes
+
+ security = ADS
+ auth methods = guest sam winbind
+
+ netbios name = cluster1
+ workgroup = CLUSTER1
+ realm = CLUSTER1.COM
+ server string = "Clustered Samba"
+ disable netbios = yes
+ disable spoolss = yes
+ fileid:mapping = fsname
+ use mmap = yes
+ gpfs:sharemodes = yes
+ gpfs:leases = yes
+ passdb backend = tdbsam
+ preferred master = no
+ kernel oplocks = yes
+ syslog = 1
+ host msdfs = no
+ notify:inotify = no
+ vfs objects = shadow_copy2 syncops gpfs fileid
+ shadow:snapdir = .snapshots
+ shadow:fixinodes = yes
+ wide links = no
+ smbd:backgroundqueue = False
+ read only = no
+ use sendfile = yes
+ strict locking = yes
+ posix locking = yes
+ large readwrite = yes
+ force unknown acl user = yes
+ nfs4:mode = special
+ nfs4:chown = yes
+ nfs4:acedup = merge
+ nfs4:sidmap = /etc/samba/sidmap.tdb
+ map readonly = no
+ ea support = yes
+ dmapi support = no
+ smb ports = 445 139
diff --git a/ctdb/tests/UNIT/eventscripts/etc/sysconfig/nfs b/ctdb/tests/UNIT/eventscripts/etc/sysconfig/nfs
new file mode 100644
index 0000000..090d786
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/etc/sysconfig/nfs
@@ -0,0 +1,2 @@
+NFS_HOSTNAME="cluster1"
+STATD_HOSTNAME="$NFS_HOSTNAME -H /etc/ctdb/statd-callout "
diff --git a/ctdb/tests/UNIT/eventscripts/scripts/00.ctdb.sh b/ctdb/tests/UNIT/eventscripts/scripts/00.ctdb.sh
new file mode 100644
index 0000000..a192e05
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/scripts/00.ctdb.sh
@@ -0,0 +1,24 @@
+setup()
+{
+ setup_dbdir
+ setup_date
+
+ export FAKE_TDBTOOL_SUPPORTS_CHECK="yes"
+ export FAKE_TDB_IS_OK="yes"
+
+ export FAKE_CTDB_TUNABLES_OK="
+ MonitorInterval
+ DatabaseHashSize
+ "
+ export FAKE_CTDB_TUNABLES_OBSOLETE="
+ EventScriptUnhealthyOnTimeout
+ "
+}
+
+result_filter()
+{
+ _date="[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]"
+ _time="[0-9][0-9][0-9][0-9][0-9][0-9]"
+ _date_time="${_date}\.${_time}"
+ sed -e "s|\.${_date_time}\.|.DATE.TIME.|"
+}
diff --git a/ctdb/tests/UNIT/eventscripts/scripts/01.reclock.sh b/ctdb/tests/UNIT/eventscripts/scripts/01.reclock.sh
new file mode 100644
index 0000000..7365dd8
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/scripts/01.reclock.sh
@@ -0,0 +1,16 @@
+setup()
+{
+ if [ $# -eq 1 ]; then
+ reclock="$1"
+ else
+ reclock="${CTDB_TEST_TMP_DIR}/reclock_subdir/rec.lock"
+ fi
+ CTDB_RECOVERY_LOCK="$reclock"
+
+ if [ -n "$CTDB_RECOVERY_LOCK" ]; then
+ cat >>"${CTDB_BASE}/ctdb.conf" <<EOF
+[cluster]
+ recovery lock = $CTDB_RECOVERY_LOCK
+EOF
+ fi
+}
diff --git a/ctdb/tests/UNIT/eventscripts/scripts/05.system.sh b/ctdb/tests/UNIT/eventscripts/scripts/05.system.sh
new file mode 100644
index 0000000..0191e55
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/scripts/05.system.sh
@@ -0,0 +1,48 @@
+# shellcheck disable=SC2120
+# Arguments used in testcases
+set_mem_usage()
+{
+ _mem_usage="${1:-10}" # Default is 10%
+ _swap_usage="${2:-0}" # Default is 0%
+
+ _swap_total=5857276
+ _swap_free=$(((100 - _swap_usage) * _swap_total / 100))
+
+ _mem_total=3940712
+ _mem_free=225268
+ _mem_buffers=146120
+ _mem_cached=$((_mem_total * (100 - _mem_usage) / 100 - \
+ _mem_free - _mem_buffers))
+
+ export FAKE_PROC_MEMINFO="\
+MemTotal: ${_mem_total} kB
+MemFree: ${_mem_free} kB
+Buffers: ${_mem_buffers} kB
+Cached: ${_mem_cached} kB
+SwapCached: 56016 kB
+Active: 2422104 kB
+Inactive: 1019928 kB
+Active(anon): 1917580 kB
+Inactive(anon): 523080 kB
+Active(file): 504524 kB
+Inactive(file): 496848 kB
+Unevictable: 4844 kB
+Mlocked: 4844 kB
+SwapTotal: ${_swap_total} kB
+SwapFree: ${_swap_free} kB
+..."
+}
+
+set_fs_usage()
+{
+ export FAKE_FS_USE="${1:-10}" # Default is 10% usage
+}
+
+setup()
+{
+ setup_dbdir
+
+ # Tests use default unless explicitly set
+ set_mem_usage
+ set_fs_usage
+}
diff --git a/ctdb/tests/UNIT/eventscripts/scripts/06.nfs.sh b/ctdb/tests/UNIT/eventscripts/scripts/06.nfs.sh
new file mode 100644
index 0000000..5912a4b
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/scripts/06.nfs.sh
@@ -0,0 +1,4 @@
+setup()
+{
+ :
+}
diff --git a/ctdb/tests/UNIT/eventscripts/scripts/10.interface.sh b/ctdb/tests/UNIT/eventscripts/scripts/10.interface.sh
new file mode 100644
index 0000000..579f3ee
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/scripts/10.interface.sh
@@ -0,0 +1,72 @@
+setup()
+{
+ setup_public_addresses
+}
+
+_tcp_connections()
+{
+ _count="$1"
+ _sip="$2"
+ _sport="$3"
+ _cip_base="$4"
+ _cport_base="$5"
+
+ _cip_prefix="${_cip_base%.*}"
+ _cip_suffix="${_cip_base##*.}"
+
+ for _i in $(seq 1 "$_count"); do
+ _cip_last=$((_cip_suffix + _i))
+ _cip="${_cip_prefix}.${_cip_last}"
+ _cport=$((_cport_base + _i))
+ echo "${_sip}:${_sport} ${_cip}:${_cport}"
+ done
+}
+
+setup_tcp_connections()
+{
+ _t="${FAKE_NETWORK_STATE}/tcp-established"
+ export FAKE_NETSTAT_TCP_ESTABLISHED_FILE="$_t"
+ _tcp_connections "$@" >"$FAKE_NETSTAT_TCP_ESTABLISHED_FILE"
+}
+
+setup_tcp_connections_unkillable()
+{
+ # These connections are listed by the "ss" stub but are not
+ # killed by the "ctdb killtcp" stub. So killing these
+ # connections will never succeed... and will look like a time
+ # out.
+ _t=$(_tcp_connections "$@" | sed -e 's/ /|/g')
+ export FAKE_NETSTAT_TCP_ESTABLISHED="$_t"
+}
+
+# Setup some fake /proc/net/bonding files with just enough info for
+# the eventscripts.
+
+# arg1 is interface name, arg2 is currently active slave (use "None"
+# if none), arg3 is MII status ("up" or "down").
+setup_bond()
+{
+ _iface="$1"
+ _slave="${2:-${_iface}_sl_0}"
+ _mii_s="${3:-up}"
+ _mii_subs="${4:-${_mii_s:-up}}"
+
+ cat <<EOF
+Setting $_iface to be a bond with active slave $_slave and MII status $_mii_s
+EOF
+
+ _t="${FAKE_NETWORK_STATE}/proc-net-bonding"
+ export FAKE_PROC_NET_BONDING="$_t"
+ mkdir -p "$FAKE_PROC_NET_BONDING"
+
+ cat >"${FAKE_PROC_NET_BONDING}/$_iface" <<EOF
+Bonding Mode: IEEE 802.3ad Dynamic link aggregation
+Currently Active Slave: $_slave
+# Status of the bond
+MII Status: $_mii_s
+# Status of 1st pretend adapter
+MII Status: $_mii_subs
+# Status of 2nd pretend adapter
+MII Status: $_mii_subs
+EOF
+}
diff --git a/ctdb/tests/UNIT/eventscripts/scripts/11.natgw.sh b/ctdb/tests/UNIT/eventscripts/scripts/11.natgw.sh
new file mode 100644
index 0000000..3b19895
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/scripts/11.natgw.sh
@@ -0,0 +1,120 @@
+setup()
+{
+ debug "Setting up NAT gateway"
+
+ natgw_nodes="${CTDB_BASE}/natgw_nodes"
+
+ ctdb_set_pnn
+}
+
+# A separate function for this makes sense because it can be done
+# multiple times per test
+setup_ctdb_natgw()
+{
+ # Read from stdin
+ while read -r _ip _opts; do
+ case "$_opts" in
+ leader)
+ export FAKE_CTDB_NATGW_LEADER="$_ip"
+ echo "$_ip"
+ ;;
+ follower-only)
+ printf "%s\tfollower-only\n" "$_ip"
+ ;;
+ *)
+ echo "$_ip"
+ ;;
+ esac
+ done >"$natgw_nodes"
+
+ # Assume all of the nodes are on a /24 network and have IPv4
+ # addresses:
+ read -r _ip <"$natgw_nodes"
+
+ setup_script_options <<EOF
+CTDB_NATGW_NODES="$natgw_nodes"
+CTDB_NATGW_PRIVATE_NETWORK="${_ip%.*}.0/24"
+# These are fixed. Probably don't use the same network for the
+# private node IPs. To unset the default gateway just set it to
+# "". :-)
+CTDB_NATGW_PUBLIC_IP="10.1.1.121/24"
+CTDB_NATGW_PUBLIC_IFACE="eth1"
+CTDB_NATGW_DEFAULT_GATEWAY="10.1.1.254"
+EOF
+}
+
+ok_natgw_leader_ip_addr_show()
+{
+ _mac=$(echo "$CTDB_NATGW_PUBLIC_IFACE" |
+ cksum |
+ sed -r -e 's@(..)(..)(..).*@fe:fe:fe:\1:\2:\3@')
+
+ # This is based on CTDB_NATGW_PUBLIC_IP
+ _brd="10.1.1.255"
+
+ ok <<EOF
+1: ${CTDB_NATGW_PUBLIC_IFACE}: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
+ link/ether ${_mac} brd ff:ff:ff:ff:ff:ff
+ inet ${CTDB_NATGW_PUBLIC_IP} brd ${_brd} scope global ${CTDB_NATGW_PUBLIC_IFACE}
+ valid_lft forever preferred_lft forever
+EOF
+}
+
+ok_natgw_follower_ip_addr_show()
+{
+ _mac=$(echo "$CTDB_NATGW_PUBLIC_IFACE" |
+ cksum |
+ sed -r -e 's@(..)(..)(..).*@fe:fe:fe:\1:\2:\3@')
+
+ ok <<EOF
+1: ${CTDB_NATGW_PUBLIC_IFACE}: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
+ link/ether ${_mac} brd ff:ff:ff:ff:ff:ff
+EOF
+}
+
+ok_natgw_leader_static_routes()
+{
+ _nl="
+"
+ _t=""
+ for _i in $CTDB_NATGW_STATIC_ROUTES; do
+ # This is intentionally different to the code in 11.natgw ;-)
+ case "$_i" in
+ *@*)
+ _net=$(echo "$_i" | sed -e 's|@.*||')
+ _gw=$(echo "$_i" | sed -e 's|.*@||')
+ ;;
+ *)
+ _net="$_i"
+ _gw="$CTDB_NATGW_DEFAULT_GATEWAY"
+ ;;
+ esac
+
+ [ -n "$_gw" ] || continue
+ _t="${_t}${_t:+${_nl}}"
+ _t="${_t}${_net} via ${_gw} dev ethXXX metric 10 "
+ done
+ _t=$(echo "$_t" | sort)
+ ok "$_t"
+}
+
+ok_natgw_follower_static_routes()
+{
+ _nl="
+"
+ _t=""
+ for _i in $CTDB_NATGW_STATIC_ROUTES; do
+ # This is intentionally different to the code in 11.natgw ;-)
+ _net=$(echo "$_i" | sed -e 's|@.*||')
+
+ # The interface for the private network isn't
+ # specified as part of the NATGW configuration and
+ # isn't part of the command to add the route. It is
+ # implicitly added by "ip route" but our stub doesn't
+ # do this and adds "ethXXX".
+ _t="${_t}${_t:+${_nl}}"
+ _t="${_t}${_net} via ${FAKE_CTDB_NATGW_LEADER} dev ethXXX metric 10 "
+ done
+ _t=$(echo "$_t" | sort)
+ ok "$_t"
+}
diff --git a/ctdb/tests/UNIT/eventscripts/scripts/13.per_ip_routing.sh b/ctdb/tests/UNIT/eventscripts/scripts/13.per_ip_routing.sh
new file mode 100644
index 0000000..aac2c3d
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/scripts/13.per_ip_routing.sh
@@ -0,0 +1,47 @@
+setup()
+{
+ setup_public_addresses
+
+ # shellcheck disable=SC2034
+ # Used in expected output
+ service_name="per_ip_routing"
+
+ setup_script_options <<EOF
+CTDB_PER_IP_ROUTING_CONF="${CTDB_BASE}/policy_routing"
+CTDB_PER_IP_ROUTING_RULE_PREF=100
+CTDB_PER_IP_ROUTING_TABLE_ID_LOW=1000
+CTDB_PER_IP_ROUTING_TABLE_ID_HIGH=2000
+EOF
+
+ # Tests need to create and populate this file
+ rm -f "$CTDB_PER_IP_ROUTING_CONF"
+}
+
+# Create policy routing configuration in $CTDB_PER_IP_ROUTING_CONF.
+# $1 is the number of assigned IPs to use (<num>, all), defaulting to
+# 1. If $2 is "default" then a default route is also added.
+create_policy_routing_config()
+{
+ _num_ips="${1:-1}"
+ _should_add_default="$2"
+
+ ctdb_get_my_public_addresses |
+ if [ "$_num_ips" = "all" ]; then
+ cat
+ else
+ {
+ head -n "$_num_ips"
+ cat >/dev/null
+ }
+ fi |
+ while read -r _dev _ip _bits; do
+ _net=$(ipv4_host_addr_to_net "$_ip" "$_bits")
+ _gw="${_net%.*}.254" # a dumb, calculated default
+
+ echo "$_ip $_net"
+
+ if [ "$_should_add_default" = "default" ]; then
+ echo "$_ip 0.0.0.0/0 $_gw"
+ fi
+ done >"$CTDB_PER_IP_ROUTING_CONF"
+}
diff --git a/ctdb/tests/UNIT/eventscripts/scripts/20.multipathd.sh b/ctdb/tests/UNIT/eventscripts/scripts/20.multipathd.sh
new file mode 100644
index 0000000..9add0bc
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/scripts/20.multipathd.sh
@@ -0,0 +1,25 @@
+setup()
+{
+ _failures=""
+ _devices=""
+ for i; do
+ case "$i" in
+ \!*)
+ _t="${i#!}"
+ echo "Marking ${_t} as having no active paths"
+ _failures="${_failures}${_failures:+ }${_t}"
+ ;;
+ *)
+ _t="$i"
+ ;;
+ esac
+ _devices="${_devices}${_devices:+ }${_t}"
+ done
+
+ setup_script_options <<EOF
+CTDB_MONITOR_MPDEVICES="$_devices"
+EOF
+
+ export FAKE_MULTIPATH_FAILURES="$_failures"
+ export FAKE_SLEEP_FORCE=0.1
+}
diff --git a/ctdb/tests/UNIT/eventscripts/scripts/31.clamd.sh b/ctdb/tests/UNIT/eventscripts/scripts/31.clamd.sh
new file mode 100644
index 0000000..27016cb
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/scripts/31.clamd.sh
@@ -0,0 +1,8 @@
+setup()
+{
+ setup_script_options <<EOF
+CTDB_CLAMD_SOCKET="/var/run/clamd.sock"
+EOF
+
+ setup_unix_listen
+}
diff --git a/ctdb/tests/UNIT/eventscripts/scripts/40.vsftpd.sh b/ctdb/tests/UNIT/eventscripts/scripts/40.vsftpd.sh
new file mode 100644
index 0000000..236d130
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/scripts/40.vsftpd.sh
@@ -0,0 +1,14 @@
+setup()
+{
+ debug "Setting up VSFTPD environment: service $1, not managed by CTDB"
+
+ _service_name="vsftpd"
+
+ if [ "$1" != "down" ]; then
+ service "$_service_name" start
+ setup_tcp_listen 21
+ else
+ service "$_service_name" force-stopped
+ setup_tcp_listen ""
+ fi
+}
diff --git a/ctdb/tests/UNIT/eventscripts/scripts/41.httpd.sh b/ctdb/tests/UNIT/eventscripts/scripts/41.httpd.sh
new file mode 100644
index 0000000..3fac4f0
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/scripts/41.httpd.sh
@@ -0,0 +1,14 @@
+setup()
+{
+ debug "Setting up HTTPD environment: service $1, not managed by CTDB"
+
+ if [ "$1" != "down" ]; then
+ for _service_name in "apache2" "httpd"; do
+ service "$_service_name" start
+ done
+ else
+ for _service_name in "apache2" "httpd"; do
+ service "$_service_name" force-stopped
+ done
+ fi
+}
diff --git a/ctdb/tests/UNIT/eventscripts/scripts/48.netbios.sh b/ctdb/tests/UNIT/eventscripts/scripts/48.netbios.sh
new file mode 100644
index 0000000..6efcd8a
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/scripts/48.netbios.sh
@@ -0,0 +1,23 @@
+setup()
+{
+ # shellcheck disable=SC2034
+ # Used in expected output
+ service_name="netbios"
+
+ if [ "$1" != "down" ]; then
+
+ debug "Marking Netbios name services as up, listening and managed by CTDB"
+
+ # All possible service names for all known distros.
+ for i in "nmb" "nmbd"; do
+ service "$i" force-started
+ done
+ else
+ debug "Marking Netbios name services as down, not listening and not managed by CTDB"
+
+ # All possible service names for all known distros.
+ for i in "nmb" "nmbd"; do
+ service "$i" force-stopped
+ done
+ fi
+}
diff --git a/ctdb/tests/UNIT/eventscripts/scripts/49.winbind.sh b/ctdb/tests/UNIT/eventscripts/scripts/49.winbind.sh
new file mode 100644
index 0000000..bbe1de2
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/scripts/49.winbind.sh
@@ -0,0 +1,28 @@
+setup()
+{
+ # shellcheck disable=SC2034
+ # Used in expected output
+ service_name="winbind"
+
+ if [ "$1" != "down" ]; then
+
+ debug "Marking Winbind service as up and managed by CTDB"
+
+ service "winbind" force-started
+
+ export FAKE_WBINFO_FAIL="no"
+
+ else
+ debug "Marking Winbind service as down and not managed by CTDB"
+
+ service "winbind" force-stopped
+
+ export FAKE_WBINFO_FAIL="yes"
+ fi
+}
+
+wbinfo_down()
+{
+ debug "Making wbinfo commands fail"
+ FAKE_WBINFO_FAIL="yes"
+}
diff --git a/ctdb/tests/UNIT/eventscripts/scripts/50.samba.sh b/ctdb/tests/UNIT/eventscripts/scripts/50.samba.sh
new file mode 100644
index 0000000..88af69b
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/scripts/50.samba.sh
@@ -0,0 +1,58 @@
+setup()
+{
+ # shellcheck disable=SC2034
+ # Used in expected output
+ service_name="samba"
+
+ if [ "$1" != "down" ]; then
+
+ debug "Marking Samba services as up, listening and managed by CTDB"
+
+ # All possible service names for all known distros.
+ for i in "smb" "samba" "smbd"; do
+ service "$i" force-started
+ done
+
+ setup_tcp_listen 445 139
+
+ # Some things in 50.samba are backgrounded and waited
+ # for. If we don't sleep at all then timeouts can
+ # happen. This avoids that... :-)
+ export FAKE_SLEEP_FORCE=0.1
+ else
+ debug "Marking Samba services as down, not listening and not managed by CTDB"
+
+ # All possible service names for all known distros.
+ for i in "smb" "samba" "smbd"; do
+ service "$i" force-stopped
+ done
+
+ setup_tcp_listen
+ fi
+
+ setup_script_options <<EOF
+CTDB_SAMBA_SKIP_SHARE_CHECK="no"
+EOF
+
+ setup_shares
+
+}
+
+samba_setup_fake_threads()
+{
+ export FAKE_SMBD_THREAD_PIDS="$*"
+
+ _nl="
+"
+ _out=""
+ _count=0
+ for _pid; do
+ [ "$_count" -lt 5 ] || break
+ _t=$(program_stack_trace "smbd" "$_pid")
+ _out="${_out:+${_out}${_nl}}${_t}"
+ _count=$((_count + 1))
+ done
+ # shellcheck disable=SC2034
+ # Used in expected output
+ SAMBA_STACK_TRACES="$_out"
+}
diff --git a/ctdb/tests/UNIT/eventscripts/scripts/60.nfs.sh b/ctdb/tests/UNIT/eventscripts/scripts/60.nfs.sh
new file mode 100644
index 0000000..9c614c7
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/scripts/60.nfs.sh
@@ -0,0 +1,435 @@
+setup()
+{
+ setup_public_addresses
+ setup_shares
+
+ # shellcheck disable=SC2034
+ # Used in expected output
+ service_name="nfs"
+
+ if [ -z "$CTDB_NFS_DISTRO_STYLE" ]; then
+ # Currently supported: sysvinit-redhat, systemd-redhat
+ CTDB_NFS_DISTRO_STYLE="systemd-redhat"
+ fi
+
+ export FAKE_RPCINFO_SERVICES=""
+
+ setup_script_options <<EOF
+CTDB_NFS_SKIP_SHARE_CHECK="no"
+# This doesn't even need to exist
+CTDB_NFS_EXPORTS_FILE="${CTDB_TEST_TMP_DIR}/etc-exports"
+EOF
+
+ export RPCNFSDCOUNT
+
+ if [ "$1" != "down" ]; then
+ debug <<EOF
+Setting up NFS environment: all RPC services up, NFS managed by CTDB
+EOF
+
+ case "$CTDB_NFS_DISTRO_STYLE" in
+ sysvinit-*)
+ service "nfs" force-started
+ service "nfslock" force-started
+ ;;
+ systemd-*)
+ service "nfs-service" force-started
+ service "nfs-mountd" force-started
+ service "rpc-rquotad" force-started
+ service "rpc-statd" force-started
+ ;;
+ esac
+
+ rpc_services_up \
+ "portmapper" "nfs" "mountd" "rquotad" \
+ "nlockmgr" "status"
+
+ nfs_setup_fake_threads "nfsd"
+ nfs_setup_fake_threads "rpc.foobar" # Set the variable to empty
+ else
+ debug <<EOF
+Setting up NFS environment: all RPC services down, NFS not managed by CTDB
+EOF
+
+ case "$CTDB_NFS_DISTRO_STYLE" in
+ sysvinit-*)
+ service "nfs" force-stopped
+ service "nfslock" force-stopped
+ service "nfs-kernel-server" force-stopped
+ ;;
+ systemd-*)
+ service "nfs-server" force-stopped
+ service "nfs-mountd" force-stopped
+ service "rpc-quotad" force-stopped
+ service "rpc-statd" force-stopped
+ ;;
+ esac
+ fi
+
+ # This is really nasty. However, when we test NFS we don't
+ # actually test statd-callout. If we leave it there then left
+ # over, backgrounded instances of statd-callout will do
+ # horrible things with the "ctdb ip" stub and cause the actual
+ # statd-callout tests that follow to fail.
+ rm "${CTDB_BASE}/statd-callout"
+}
+
+rpc_services_down()
+{
+ _out=""
+ for _s in $FAKE_RPCINFO_SERVICES; do
+ for _i; do
+ if [ "$_i" = "${_s%%:*}" ]; then
+ debug "Marking RPC service \"${_i}\" as UNAVAILABLE"
+ continue 2
+ fi
+ done
+ _out="${_out}${_out:+ }${_s}"
+ done
+ FAKE_RPCINFO_SERVICES="$_out"
+}
+
+rpc_services_up()
+{
+ _out="$FAKE_RPCINFO_SERVICES"
+ for _i; do
+ debug "Marking RPC service \"${_i}\" as available"
+ case "$_i" in
+ portmapper) _t="2:4" ;;
+ nfs) _t="2:3" ;;
+ mountd) _t="1:3" ;;
+ rquotad) _t="1:2" ;;
+ nlockmgr) _t="3:4" ;;
+ status) _t="1:1" ;;
+ *) die "Internal error - unsupported RPC service \"${_i}\"" ;;
+ esac
+
+ _out="${_out}${_out:+ }${_i}:${_t}"
+ done
+ export FAKE_RPCINFO_SERVICES="$_out"
+}
+
+nfs_setup_fake_threads()
+{
+ _prog="$1"
+ shift
+
+ case "$_prog" in
+ nfsd)
+ export PROCFS_PATH="${CTDB_TEST_TMP_DIR}/proc"
+ _threads="${PROCFS_PATH}/fs/nfsd/threads"
+ mkdir -p "$(dirname "$_threads")"
+ echo $# >"$_threads"
+ export FAKE_NFSD_THREAD_PIDS="$*"
+ ;;
+ *)
+ export FAKE_RPC_THREAD_PIDS="$*"
+ ;;
+ esac
+}
+
+guess_output()
+{
+ case "$1" in
+ "${CTDB_NFS_CALLOUT} start nlockmgr")
+ case "$CTDB_NFS_DISTRO_STYLE" in
+ sysvinit-redhat)
+ echo "&Starting nfslock: OK"
+ ;;
+ sysvinit-debian)
+ cat <<EOF
+&Starting nfs-kernel-server: OK
+EOF
+ ;;
+ systemd-*)
+ echo "&Starting rpc-statd: OK"
+ ;;
+ esac
+ ;;
+ "${CTDB_NFS_CALLOUT} start nfs")
+ case "$CTDB_NFS_DISTRO_STYLE" in
+ sysvinit-redhat)
+ cat <<EOF
+&Starting nfslock: OK
+&Starting nfs: OK
+EOF
+ ;;
+ sysvinit-debian)
+ cat <<EOF
+&Starting nfs-kernel-server: OK
+EOF
+ ;;
+ systemd-redhat)
+ cat <<EOF
+&Starting rpc-statd: OK
+&Starting nfs-server: OK
+&Starting rpc-rquotad: OK
+EOF
+ ;;
+ systemd-debian)
+ cat <<EOF
+&Starting rpc-statd: OK
+&Starting nfs-server: OK
+&Starting quotarpc: OK
+EOF
+ ;;
+ esac
+ ;;
+ "${CTDB_NFS_CALLOUT} stop mountd")
+ case "$CTDB_NFS_DISTRO_STYLE" in
+ systemd-*)
+ echo "Stopping nfs-mountd: OK"
+ ;;
+ esac
+ ;;
+ "${CTDB_NFS_CALLOUT} stop rquotad")
+ case "$CTDB_NFS_DISTRO_STYLE" in
+ systemd-redhat)
+ echo "Stopping rpc-rquotad: OK"
+ ;;
+ systemd-debian)
+ if service "quotarpc" status >/dev/null; then
+ echo "Stopping quotarpc: OK"
+ else
+ echo "service: can't stop quotarpc - not running"
+ fi
+ ;;
+ esac
+ ;;
+ "${CTDB_NFS_CALLOUT} stop status")
+ case "$CTDB_NFS_DISTRO_STYLE" in
+ systemd-*)
+ echo "Stopping rpc-statd: OK"
+ ;;
+ esac
+ ;;
+ "${CTDB_NFS_CALLOUT} start mountd")
+ case "$CTDB_NFS_DISTRO_STYLE" in
+ systemd-*)
+ echo "&Starting nfs-mountd: OK"
+ ;;
+ esac
+ ;;
+ "${CTDB_NFS_CALLOUT} start rquotad")
+ case "$CTDB_NFS_DISTRO_STYLE" in
+ systemd-redhat)
+ echo "&Starting rpc-rquotad: OK"
+ ;;
+ systemd-debian)
+ echo "&Starting quotarpc: OK"
+ ;;
+ esac
+ ;;
+ "${CTDB_NFS_CALLOUT} start status")
+ case "$CTDB_NFS_DISTRO_STYLE" in
+ systemd-*)
+ echo "&Starting rpc-statd: OK"
+ ;;
+ esac
+ ;;
+ *)
+ : # Nothing
+ ;;
+ esac
+}
+
+# Set the required result for a particular RPC program having failed
+# for a certain number of iterations. This is probably still a work
+# in progress. Note that we could hook aggressively
+# nfs_check_rpc_service() to try to implement this but we're better
+# off testing nfs_check_rpc_service() using independent code... even
+# if it is incomplete and hacky. So, if the 60.nfs eventscript
+# changes and the tests start to fail then it may be due to this
+# function being incomplete.
+rpc_set_service_failure_response()
+{
+ _rpc_service="$1"
+ _numfails="${2:-1}" # default 1
+
+ # Default
+ ok_null
+ if [ "$_numfails" -eq 0 ]; then
+ return
+ fi
+
+ nfs_load_config
+
+ # A handy newline. :-)
+ _nl="
+"
+
+ _dir="${CTDB_NFS_CHECKS_DIR:-${CTDB_BASE}/nfs-checks.d}"
+
+ _file=$(ls "$_dir"/[0-9][0-9]."${_rpc_service}.check")
+ [ -r "$_file" ] ||
+ die "RPC check file \"$_file\" does not exist or is not unique"
+
+ _out="${CTDB_TEST_TMP_DIR}/rpc_failure_output"
+ : >"$_out"
+ _rc_file="${CTDB_TEST_TMP_DIR}/rpc_result"
+
+ (
+ # Subshell to restrict scope variables...
+
+ # Defaults
+ # shellcheck disable=SC2034
+ # Unused, but for completeness, possible future use
+ family="tcp"
+ version=""
+ unhealthy_after=1
+ restart_every=0
+ service_stop_cmd=""
+ service_start_cmd=""
+ # shellcheck disable=SC2034
+ # Unused, but for completeness, possible future use
+ service_check_cmd=""
+ service_debug_cmd=""
+
+ # Don't bother syntax checking, eventscript does that...
+ . "$_file"
+
+ # Just use the first version, or use default. This is
+ # dumb but handles all the cases that we care about
+ # now...
+ if [ -n "$version" ]; then
+ _ver="${version%% *}"
+ else
+ case "$_rpc_service" in
+ portmapper) _ver="" ;;
+ *) _ver=1 ;;
+ esac
+ fi
+ _rpc_check_out="\
+$_rpc_service failed RPC check:
+rpcinfo: RPC: Program not registered
+program $_rpc_service${_ver:+ version }${_ver} is not available"
+
+ if [ $unhealthy_after -gt 0 ] &&
+ [ "$_numfails" -ge $unhealthy_after ]; then
+ _unhealthy=true
+ echo 1 >"$_rc_file"
+ echo "ERROR: ${_rpc_check_out}" >>"$_out"
+ else
+ _unhealthy=false
+ echo 0 >"$_rc_file"
+ fi
+
+ if [ $restart_every -gt 0 ] &&
+ [ $((_numfails % restart_every)) -eq 0 ]; then
+ if ! $_unhealthy; then
+ echo "WARNING: ${_rpc_check_out}" >>"$_out"
+ fi
+
+ echo "Trying to restart service \"${_rpc_service}\"..." \
+ >>"$_out"
+
+ guess_output "$service_stop_cmd" >>"$_out"
+
+ if [ -n "$service_debug_cmd" ]; then
+ $service_debug_cmd >>"$_out" 2>&1
+ fi
+
+ guess_output "$service_start_cmd" >>"$_out"
+ fi
+ )
+
+ read -r _rc <"$_rc_file"
+ required_result "$_rc" <"$_out"
+
+ rm -f "$_out" "$_rc_file"
+}
+
+program_stack_traces()
+{
+ _prog="$1"
+ _max="${2:-1}"
+
+ _count=1
+ if [ "$_prog" = "nfsd" ]; then
+ _pids="$FAKE_NFSD_THREAD_PIDS"
+ else
+ _pids="$FAKE_RPC_THREAD_PIDS"
+ fi
+ for _pid in $_pids; do
+ [ $_count -le "$_max" ] || break
+
+ program_stack_trace "$_prog" "$_pid"
+ _count=$((_count + 1))
+ done
+}
+
+# Run an NFS eventscript iteratively.
+#
+# - 1st argument is the number of iterations.
+#
+# - 2nd argument is the NFS/RPC service being tested
+#
+# rpcinfo is used on each iteration to test the availability of the
+# service
+#
+# If this is not set or null then no RPC service is checked and the
+# required output is not reset on each iteration. This is useful in
+# baseline tests to confirm that the eventscript and test
+# infrastructure is working correctly.
+#
+# - Subsequent arguments come in pairs: an iteration number and
+# something to eval before that iteration. Each time an iteration
+# number is matched the associated argument is given to eval after
+# the default setup is done. The iteration numbers need to be given
+# in ascending order.
+#
+# These arguments can allow a service to be started or stopped
+# before a particular iteration.
+#
+nfs_iterate_test()
+{
+ _repeats="$1"
+ _rpc_service="$2"
+ if [ -n "$2" ]; then
+ shift 2
+ else
+ shift
+ fi
+
+ # shellcheck disable=SC2154
+ # Variables defined in define_test()
+ echo "Running $_repeats iterations of \"$script $event\" $args"
+
+ _iterate_failcount=0
+ for _iteration in $(seq 1 "$_repeats"); do
+ # This is not a numerical comparison because $1 will
+ # often not be set.
+ if [ "$_iteration" = "$1" ]; then
+ debug <<EOF
+##################################################
+EOF
+ eval "$2"
+ debug <<EOF
+##################################################
+EOF
+ shift 2
+ fi
+ if [ -n "$_rpc_service" ]; then
+ if rpcinfo -T tcp localhost "$_rpc_service" \
+ >/dev/null 2>&1 ; then
+ _iterate_failcount=0
+ else
+ _iterate_failcount=$((_iterate_failcount + 1))
+ fi
+ rpc_set_service_failure_response \
+ "$_rpc_service" $_iterate_failcount
+ fi
+ _out=$(simple_test 2>&1)
+ _ret=$?
+ if "$CTDB_TEST_VERBOSE" || [ $_ret -ne 0 ]; then
+ cat <<EOF
+##################################################
+Iteration ${_iteration}:
+$_out
+EOF
+ fi
+ if [ $_ret -ne 0 ]; then
+ exit $_ret
+ fi
+ done
+}
diff --git a/ctdb/tests/UNIT/eventscripts/scripts/91.lvs.sh b/ctdb/tests/UNIT/eventscripts/scripts/91.lvs.sh
new file mode 100644
index 0000000..1e307c5
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/scripts/91.lvs.sh
@@ -0,0 +1,76 @@
+setup()
+{
+ _ip="$1"
+ _iface="$2"
+
+ export FAKE_LVS_STATE_DIR="${FAKE_NETWORK_STATE}/lvs"
+ mkdir -p "$FAKE_LVS_STATE_DIR"
+
+ lvs_header=$(ipvsadm -l -n)
+
+ [ -n "$_ip" ] || return 0
+ [ -n "$_iface" ] || return 0
+
+ setup_script_options <<EOF
+CTDB_LVS_NODES="${CTDB_BASE}/lvs_nodes"
+CTDB_LVS_PUBLIC_IP="$_ip"
+CTDB_LVS_PUBLIC_IFACE="$_iface"
+EOF
+
+ export FAKE_CTDB_LVS_LEADER=""
+
+ # Read from stdin
+ _pnn=0
+ while read -r _ip _opts; do
+ case "$_opts" in
+ leader)
+ FAKE_CTDB_LVS_LEADER="$_pnn"
+ echo "$_ip"
+ ;;
+ follower-only)
+ printf "%s\tfollower-only\n" "$_ip"
+ ;;
+ *)
+ echo "$_ip"
+ ;;
+ esac
+ _pnn=$((_pnn + 1))
+ done >"$CTDB_LVS_NODES"
+}
+
+check_ipvsadm()
+{
+ if [ "$1" = "NULL" ]; then
+ required_result 0 <<EOF
+$lvs_header
+EOF
+ else
+ required_result 0 <<EOF
+$lvs_header
+$(cat)
+EOF
+ fi
+
+ simple_test_command ipvsadm -l -n
+}
+
+check_lvs_ip()
+{
+ _scope="$1"
+
+ if [ "$_scope" = "NULL" ]; then
+ required_result 0 <<EOF
+1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN
+ link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
+EOF
+ else
+ required_result 0 <<EOF
+1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN
+ link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
+ inet ${CTDB_LVS_PUBLIC_IP}/32 scope ${_scope} lo
+ valid_lft forever preferred_lft forever
+EOF
+ fi
+
+ simple_test_command ip addr show dev lo
+}
diff --git a/ctdb/tests/UNIT/eventscripts/scripts/debug_locks.sh b/ctdb/tests/UNIT/eventscripts/scripts/debug_locks.sh
new file mode 100644
index 0000000..b0cd039
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/scripts/debug_locks.sh
@@ -0,0 +1,272 @@
+setup()
+{
+ setup_dbdir
+}
+
+result_filter()
+{
+ sed -e 's|\( of debug locks PID=\)[0-9]*|\1PID|'
+}
+
+tdb_path()
+{
+ echo "${CTDB_DBDIR}/${1}.${FAKE_CTDB_PNN}"
+}
+
+fake_file_id()
+{
+ _path="$1"
+
+ echo "$FAKE_FILE_ID_MAP" |
+ awk -v path="$_path" '$1 == path { print $2 }'
+}
+
+fake_stack_trace()
+{
+ _pid="$1"
+ _command="${2:-smbd}"
+ _state="$3"
+
+ echo "----- Stack trace for PID=${_pid} -----"
+
+ case "$_state" in
+ D*)
+ cat <<EOF
+----- Process in D state, printing kernel stack only
+[<ffffffff87654321>] fake_stack_trace_for_pid_${_pid}/stack+0x0/0xff
+EOF
+ ;;
+ *)
+ cat <<EOF
+Thread 1 (Thread 0x7f688fbfb180 (LWP ${_pid}) "${_command}"):
+#0 0x00007f688ff7a076 in open (FAKE ARGS...) at FAKE PLACE
+....
+#3 0x000055cd368ead72 in main (argc=<optimized out>, argv=<optimized out>) at ${_command}.c
+EOF
+ ;;
+ esac
+}
+
+do_test()
+{
+ _holder_scope="$1"
+ _holder_state="$2"
+ _helper_scope="$3"
+ _lock_type="${4:-FCNTL}"
+
+ _lock_helper_pid="4132032"
+
+ FAKE_PS_MAP=$(
+ cat <<EOF
+1234567 ctdbd S
+2345678 smbd S
+4131931 smbd ${_holder_state}
+${_lock_helper_pid} ctdb_lock_helpe S+
+EOF
+ )
+ export FAKE_PS_MAP
+
+ FAKE_FILE_ID_MAP=""
+ _tdbs="locking.tdb brlock.tdb test.tdb foo.tdb"
+ _n=1
+ for _t in $_tdbs; do
+ _path=$(tdb_path "$_t")
+ _inode=$((19690818 + _n))
+ FAKE_FILE_ID_MAP=$(
+ cat <<EOF
+${FAKE_FILE_ID_MAP}
+${_path} 103:04:${_inode}
+EOF
+ )
+ rm -f "$_path"
+ touch "$_path"
+ _n=$((_n + 1))
+ done
+ export FAKE_FILE_ID_MAP
+
+ _path=$(tdb_path "locking.tdb")
+ _locking_tdb_id=$(fake_file_id "$_path")
+
+ _t=$(
+ cat <<EOF
+POSIX ADVISORY WRITE 3769740 103:04:24380821 1073741826 1073742335
+FLOCK ADVISORY WRITE 3632524 103:02:1059266 0 EOF
+FLOCK ADVISORY WRITE 4060231 00:17:17184 0 EOF
+POSIX ADVISORY READ 1234567 ${_locking_tdb_id} 4 4
+POSIX ADVISORY WRITE 59178 103:04:24380821 1073741826 1073742335
+POSIX ADVISORY READ 4427 103:04:22152234 1073741826 1073742335
+POSIX ADVISORY WRITE 4427 103:04:22152494 0 EOF
+POSIX ADVISORY READ 4427 103:04:22152702 1073741826 1073742335
+EOF
+ )
+
+ _holder_lock=""
+ if [ "$_holder_scope" = "DB" ]; then
+ if [ "$_lock_type" = "FCNTL" ]; then
+ _holder_lock=$(
+ cat <<EOF
+POSIX ADVISORY WRITE 4131931 ${_locking_tdb_id} 168 EOF
+EOF
+ )
+ elif [ "$_lock_type" = "MUTEX" ]; then
+ _holder_lock=$(
+ cat <<EOF
+POSIX ADVISORY WRITE 4131931 ${_locking_tdb_id} 400172 EOF
+EOF
+ )
+ fi
+ elif [ "$_holder_scope" = "RECORD" ] &&
+ [ "$_lock_type" = "FCNTL" ]; then
+ _holder_lock=$(
+ cat <<EOF
+POSIX ADVISORY WRITE 2345678 ${_locking_tdb_id} 112736 112736
+POSIX ADVISORY WRITE 4131931 ${_locking_tdb_id} 225472 225472
+EOF
+ )
+ fi
+
+ _t=$(
+ cat <<EOF
+$_t
+$_holder_lock
+EOF
+ )
+
+ _helper_lock=""
+ if [ "$_helper_scope" = "DB" ] &&
+ [ "$_lock_type" = "FCNTL" ]; then
+ _helper_lock=$(
+ cat <<EOF
+-> POSIX ADVISORY WRITE ${_lock_helper_pid} ${_locking_tdb_id} 168 170
+EOF
+ )
+ elif [ "$_helper_scope" = "RECORD" ] &&
+ [ "$_lock_type" = "FCNTL" ]; then
+ _helper_lock=$(
+ cat <<EOF
+-> POSIX ADVISORY WRITE ${_lock_helper_pid} ${_locking_tdb_id} 112736 112736
+EOF
+ )
+ fi
+ _t=$(
+ cat <<EOF
+$_t
+$_helper_lock
+EOF
+ )
+
+ if [ "$_holder_scope" = "DB" ]; then
+ _t=$(
+ cat <<EOF
+$_t
+POSIX ADVISORY READ 4131931 ${_locking_tdb_id} 4 4
+EOF
+ )
+ elif [ "$_holder_scope" = "RECORD" ] &&
+ [ "$_lock_type" = "FCNTL" ]; then
+ _t=$(
+ cat <<EOF
+$_t
+POSIX ADVISORY READ 2345678 ${_locking_tdb_id} 4 4
+POSIX ADVISORY READ 4131931 ${_locking_tdb_id} 4 4
+EOF
+ )
+ fi
+
+ _t=$(
+ cat <<EOF
+$_t
+POSIX ADVISORY READ 3769740 103:04:24390149 1073741826 1073742335
+POSIX ADVISORY WRITE 3769740 103:04:24380839 1073741826 1073742335
+FLOCK ADVISORY WRITE 3769302 103:02:1180313 0 EOF
+FLOCK ADVISORY WRITE 3769302 103:02:1177487 0 EOF
+FLOCK ADVISORY WRITE 3769302 103:02:1180308 0 EOF
+OFDLCK ADVISORY READ -1 00:05:6 0 EOF
+EOF
+ )
+
+ FAKE_PROC_LOCKS=$(echo "$_t" | awk '{ printf "%d: %s\n", NR, $0 }')
+ export FAKE_PROC_LOCKS
+
+ _holder_mutex_lock=""
+ if [ "$_lock_type" = "MUTEX" ]; then
+ if [ "$_holder_scope" = "RECORD" ]; then
+ _holder_mutex_lock=$(
+ cat <<EOF
+2345678 28142
+4131931 56284
+EOF
+ )
+ fi
+ fi
+
+ FAKE_TDB_MUTEX_CHECK="$_holder_mutex_lock"
+ export FAKE_TDB_MUTEX_CHECK
+
+ _out=''
+ _nl='
+'
+ _db="locking.tdb.${FAKE_CTDB_PNN}"
+
+ if [ -n "$_helper_lock" ]; then
+ read -r _ _ _ _ _pid _ _start _end <<EOF
+$_helper_lock
+EOF
+ _out="Waiter:${_nl}"
+ _out="${_out}${_pid} ctdb_lock_helpe ${_db} ${_start} ${_end}"
+ fi
+
+ # fake lock info
+ _pids=''
+ _out="${_out:+${_out}${_nl}}Lock holders:"
+ if [ -n "$_holder_mutex_lock" ]; then
+ while read -r _pid _chain; do
+ _comm="smbd"
+ _out="${_out}${_nl}"
+ _out="${_out}${_pid} smbd ${_db} ${_chain}"
+ _pids="${_pids:+${_pids} }${_pid}"
+ done <<EOF
+$_holder_mutex_lock
+EOF
+ else
+ while read -r _ _ _ _pid _ _start _end; do
+ _comm="smbd"
+ _out="${_out}${_nl}"
+ _out="${_out}${_pid} smbd ${_db} ${_start} ${_end}"
+ _pids="${_pids:+${_pids} }${_pid}"
+ done <<EOF
+$_holder_lock
+EOF
+ fi
+
+ # fake stack traces
+ for _pid in $_pids; do
+ _comm="smbd"
+ if [ "$_pid" = "4131931" ]; then
+ _state="$_holder_state"
+ else
+ _state="S"
+ fi
+ _out=$(
+ cat <<EOF
+$_out
+$(fake_stack_trace "$_pid" "$_comm" "$_state")
+EOF
+ )
+ done
+
+ ok <<EOF
+===== Start of debug locks PID=PID =====
+$_out
+===== End of debug locks PID=PID =====
+EOF
+
+ # shellcheck disable=SC2154
+ # script_dir and script set in define_test()
+ script_test "${script_dir}/${script}" \
+ "$_lock_helper_pid" \
+ "$_helper_scope" \
+ "$_path" \
+ "$_lock_type"
+
+}
diff --git a/ctdb/tests/UNIT/eventscripts/scripts/local.sh b/ctdb/tests/UNIT/eventscripts/scripts/local.sh
new file mode 100644
index 0000000..3c28181
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/scripts/local.sh
@@ -0,0 +1,568 @@
+# Hey Emacs, this is a -*- shell-script -*- !!! :-)
+
+#
+# Augment PATH with relevant stubs/ directories.
+#
+
+stubs_dir="${CTDB_TEST_SUITE_DIR}/stubs"
+[ -d "${stubs_dir}" ] || die "Failed to locate stubs/ subdirectory"
+
+# Make the path absolute for tests that change directory
+case "$stubs_dir" in
+/*) : ;;
+*) stubs_dir="${PWD}/${stubs_dir}" ;;
+esac
+
+# Use stubs as helpers
+export CTDB_HELPER_BINDIR="$stubs_dir"
+
+PATH="${stubs_dir}:${PATH}"
+
+export CTDB="ctdb"
+
+# Force this to be absolute - event scripts can change directory
+CTDB_TEST_TMP_DIR=$(cd "$CTDB_TEST_TMP_DIR" && echo "$PWD")
+
+export CTDB_LOGGING="file:"
+
+if [ -d "${CTDB_TEST_SUITE_DIR}/etc" ]; then
+ cp -a "${CTDB_TEST_SUITE_DIR}/etc" "$CTDB_TEST_TMP_DIR"
+ export CTDB_SYS_ETCDIR="${CTDB_TEST_TMP_DIR}/etc"
+else
+ die "Unable to setup \$CTDB_SYS_ETCDIR"
+fi
+
+setup_ctdb_base "$CTDB_TEST_TMP_DIR" "etc-ctdb" \
+ debug_locks.sh \
+ functions \
+ nfs-checks.d \
+ nfs-linux-kernel-callout \
+ statd-callout
+
+export FAKE_CTDB_STATE="${CTDB_TEST_TMP_DIR}/fake-ctdb"
+mkdir -p "$FAKE_CTDB_STATE"
+
+export FAKE_NETWORK_STATE="${CTDB_TEST_TMP_DIR}/fake-network-state"
+mkdir -p "$FAKE_NETWORK_STATE"
+
+######################################################################
+
+if "$CTDB_TEST_VERBOSE"; then
+ debug()
+ {
+ if [ -n "$1" ]; then
+ echo "$@" >&2
+ else
+ cat >&2
+ fi
+ }
+else
+ debug()
+ {
+ :
+ }
+fi
+
+######################################################################
+
+# General setup fakery
+
+# Default is to use script name with ".options" appended. With
+# arguments, this can specify an alternate script name (and
+# component).
+setup_script_options()
+{
+ if [ $# -eq 2 ]; then
+ _script="$2"
+ elif [ $# -eq 0 ]; then
+ _script=""
+ else
+ die "usage: setup_script_options [ component script ]"
+ fi
+
+ if [ -n "$_script" ]; then
+ _options="${CTDB_BASE}/events/legacy/${_script}.options"
+ else
+ _options="${script_dir}/${script%.script}.options"
+ fi
+
+ cat >>"$_options"
+
+ # Source the options so that tests can use the variables
+ . "$_options"
+}
+
+setup_dbdir()
+{
+ export CTDB_DBDIR_BASE="${CTDB_TEST_TMP_DIR}/db"
+ CTDB_DBDIR="${CTDB_DBDIR_BASE}/volatile"
+ CTDB_DBDIR_PERSISTENT="${CTDB_DBDIR_BASE}/persistent"
+ CTDB_DBDIR_STATE="${CTDB_DBDIR_BASE}/state"
+ cat >>"${CTDB_BASE}/ctdb.conf" <<EOF
+[database]
+ volatile database directory = ${CTDB_DBDIR}
+ persistent database directory = ${CTDB_DBDIR_PERSISTENT}
+ state database directory = ${CTDB_DBDIR_STATE}
+EOF
+ mkdir -p "$CTDB_DBDIR"
+ mkdir -p "$CTDB_DBDIR_PERSISTENT"
+ mkdir -p "$CTDB_DBDIR_STATE"
+}
+
+setup_date()
+{
+ export FAKE_DATE_OUTPUT="$1"
+}
+
+setup_tcp_listen()
+{
+ export FAKE_TCP_LISTEN="$*"
+}
+
+tcp_port_listening()
+{
+ for _i; do
+ FAKE_TCP_LISTEN="${FAKE_TCP_LISTEN} ${_i}"
+ done
+}
+
+tcp_port_down()
+{
+ _port="$1"
+ debug "Marking TCP port \"${_port}\" as not listening"
+
+ _t=""
+ for _i in $FAKE_TCP_LISTEN; do
+ if [ "$_i" = "$_port" ]; then
+ continue
+ fi
+ _t="${_t} ${_i}"
+ done
+
+ FAKE_TCP_LISTEN="$_t"
+}
+
+setup_unix_listen()
+{
+ export FAKE_NETSTAT_UNIX_LISTEN="$*"
+}
+
+unix_socket_listening()
+{
+ _s="$1"
+
+ FAKE_NETSTAT_UNIX_LISTEN="${FAKE_NETSTAT_UNIX_LISTEN} ${_s}"
+}
+
+setup_shares()
+{
+ debug "Setting up shares (3 existing shares)"
+ # Create 3 fake shares/exports.
+ export FAKE_SHARES=""
+ for i in $(seq 1 3); do
+ _s="${CTDB_TEST_TMP_DIR}/shares/share${i}"
+ mkdir -p "$_s"
+ FAKE_SHARES="${FAKE_SHARES}${FAKE_SHARES:+ }${_s}"
+ done
+}
+
+shares_missing()
+{
+ # Mark some shares as non-existent
+ _type="$1"
+ shift
+
+ _out=""
+ _nl="
+"
+
+ _n=1
+ for _i in $FAKE_SHARES; do
+ for _j; do
+ if [ $_n -ne "$_j" ]; then
+ continue
+ fi
+
+ debug "Mark share $_n as missing share \"$_i\""
+ rmdir "$_i"
+ _t=$(printf "ERROR: %s directory \"%s\" not available" \
+ "$_type" "${_i}")
+ _out="${_out}${_out:+${_nl}}${_t}"
+ done
+ _n=$((_n + 1))
+ done
+
+ echo "$_out"
+}
+
+_ethtool_setup()
+{
+ FAKE_ETHTOOL_LINK_DOWN="${FAKE_NETWORK_STATE}/ethtool-link-down"
+ export FAKE_ETHTOOL_LINK_DOWN
+ mkdir -p "$FAKE_ETHTOOL_LINK_DOWN"
+}
+
+ethtool_interfaces_down()
+{
+ _ethtool_setup
+
+ for _i; do
+ echo "Marking interface $_i DOWN for ethtool"
+ touch "${FAKE_ETHTOOL_LINK_DOWN}/${_i}"
+ done
+}
+
+ethtool_interfaces_up()
+{
+ _ethtool_setup
+
+ for _i; do
+ echo "Marking interface $_i UP for ethtool"
+ rm -f "${FAKE_ETHTOOL_LINK_DOWN}/${_i}"
+ done
+}
+
+dump_routes()
+{
+ echo "# ip rule show"
+ ip rule show
+
+ ip rule show |
+ while read -r _p _ _i _ _t; do
+ # Remove trailing colon after priority/preference.
+ _p="${_p%:}"
+ # Only remove rules that match our priority/preference.
+ [ "$CTDB_PER_IP_ROUTING_RULE_PREF" = "$_p" ] || continue
+
+ echo "# ip route show table $_t"
+ ip route show table "$_t"
+ done
+}
+
+# Copied from 13.per_ip_routing for now... so this is lazy testing :-(
+ipv4_host_addr_to_net()
+{
+ _host="$1"
+ _maskbits="$2"
+
+ # Convert the host address to an unsigned long by splitting out
+ # the octets and doing the math.
+ _host_ul=0
+ for _o in $(
+ export IFS="."
+ # shellcheck disable=SC2086
+ # Intentional word splitting
+ echo $_host
+ ); do
+ _host_ul=$(((_host_ul << 8) + _o)) # work around Emacs color bug
+ done
+
+ # Calculate the mask and apply it.
+ _mask_ul=$((0xffffffff << (32 - _maskbits)))
+ _net_ul=$((_host_ul & _mask_ul))
+
+ # Now convert to a network address one byte at a time.
+ _net=""
+ for _o in $(seq 1 4); do
+ _net="$((_net_ul & 255))${_net:+.}${_net}"
+ _net_ul=$((_net_ul >> 8))
+ done
+
+ echo "${_net}/${_maskbits}"
+}
+
+######################################################################
+
+# CTDB fakery
+
+# shellcheck disable=SC2120
+# Argument can be used in testcases
+setup_numnodes()
+{
+ export FAKE_CTDB_NUMNODES="${1:-3}"
+ echo "Setting up CTDB with ${FAKE_CTDB_NUMNODES} fake nodes"
+}
+
+# For now this creates the same public addresses each time. However,
+# it could be made more flexible.
+setup_public_addresses()
+{
+ _f="${CTDB_BASE}/public_addresses"
+
+ echo "Setting up public addresses in ${_f}"
+ cat >"$_f" <<EOF
+# This is a comment
+10.0.0.1/24 dev123
+10.0.0.2/24 dev123
+10.0.0.3/24 dev123
+10.0.0.4/24 dev123
+10.0.0.5/24 dev123
+10.0.0.6/24 dev123
+10.0.1.1/24 dev456
+10.0.1.2/24 dev456
+10.0.1.3/24 dev456
+EOF
+
+ # Needed for IP allocation
+ setup_numnodes
+}
+
+# Need to cope with ctdb_get_pnn(). If a test changes PNN then it
+# needs to be using a different state directory, otherwise the wrong
+# PNN can already be cached in the state directory.
+ctdb_set_pnn()
+{
+ export FAKE_CTDB_PNN="$1"
+ echo "Setting up PNN ${FAKE_CTDB_PNN}"
+
+ CTDB_SCRIPT_VARDIR="${CTDB_TEST_TMP_DIR}/scripts/${FAKE_CTDB_PNN}"
+ export CTDB_SCRIPT_VARDIR
+ mkdir -p "$CTDB_SCRIPT_VARDIR"
+}
+
+ctdb_get_interfaces()
+{
+ ctdb ifaces -X | awk -F'|' 'FNR > 1 {print $2}' | xargs
+}
+
+ctdb_get_1_interface()
+{
+ _t=$(ctdb_get_interfaces)
+ echo "${_t%% *}"
+}
+
+# Print public addresses on this node as: interface IP maskbits
+# Each line is suitable for passing to takeip/releaseip
+ctdb_get_my_public_addresses()
+{
+ ctdb ip -v -X | {
+ read -r _ # skip header line
+
+ while IFS="|" read -r _ _ip _ _iface _; do
+ [ -n "$_iface" ] || continue
+ while IFS="/$IFS" read -r _i _maskbits _; do
+ if [ "$_ip" = "$_i" ]; then
+ echo "$_iface $_ip $_maskbits"
+ break
+ fi
+ done <"${CTDB_BASE}/public_addresses"
+ done
+ }
+}
+
+# Prints the 1st public address as: interface IP maskbits
+# This is suitable for passing to takeip/releaseip
+ctdb_get_1_public_address()
+{
+ ctdb_get_my_public_addresses | {
+ head -n 1
+ cat >/dev/null
+ }
+}
+
+# Check the routes against those that are expected. $1 is the number
+# of assigned IPs to use (<num>, all), defaulting to 1. If $2 is
+# "default" then expect default routes to have been added.
+check_routes()
+{
+ _num_ips="${1:-1}"
+ _should_add_default="$2"
+
+ _policy_rules=""
+ _policy_routes=""
+
+ ctdb_get_my_public_addresses |
+ if [ "$_num_ips" = "all" ]; then
+ cat
+ else
+ {
+ head -n "$_num_ips"
+ cat >/dev/null
+ }
+ fi | {
+ while read -r _dev _ip _bits; do
+ _net=$(ipv4_host_addr_to_net "$_ip" "$_bits")
+ _gw="${_net%.*}.254" # a dumb, calculated default
+
+ _policy_rules="${_policy_rules}
+${CTDB_PER_IP_ROUTING_RULE_PREF}: from $_ip lookup ctdb.$_ip "
+ _policy_routes="${_policy_routes}
+# ip route show table ctdb.$_ip
+$_net dev $_dev scope link "
+
+ if [ "$_should_add_default" = "default" ]; then
+ _policy_routes="${_policy_routes}
+default via $_gw dev $_dev "
+ fi
+ done
+
+ ok <<EOF
+# ip rule show
+0: from all lookup local ${_policy_rules}
+32766: from all lookup main
+32767: from all lookup default ${_policy_routes}
+EOF
+
+ simple_test_command dump_routes
+ } || test_fail
+}
+
+######################################################################
+
+nfs_load_config()
+{
+ _etc="$CTDB_SYS_ETCDIR" # shortcut for readability
+ for _c in "$_etc/sysconfig/nfs" "$_etc/default/nfs" "$_etc/ctdb/sysconfig/nfs"; do
+ if [ -r "$_c" ]; then
+ . "$_c"
+ break
+ fi
+ done
+}
+
+setup_nfs_callout()
+{
+ export CTDB_NFS_CALLOUT="${CTDB_HELPER_BINDIR}/nfs-fake-callout"
+ export NFS_FAKE_CALLOUT_MAGIC="$1"
+}
+
+program_stack_trace()
+{
+ _prog="$1"
+ _pid="$2"
+
+ cat <<EOF
+Stack trace for ${_prog}[${_pid}]:
+[<ffffffff87654321>] fake_stack_trace_for_pid_${_pid}/stack+0x0/0xff
+EOF
+}
+
+######################################################################
+
+# Result and test functions
+
+############################################################
+
+setup()
+{
+ die "setup() is not defined"
+}
+
+# Set some globals and print the summary.
+define_test()
+{
+ desc="$1"
+
+ _f=$(basename "$0" ".sh")
+
+ # Remaining format should be NN.script.event.NUM or
+ # NN.script.NUM or script.NUM:
+ _num="${_f##*.}"
+ _f="${_f%.*}"
+
+ case "$_f" in
+ [0-9][0-9].*)
+ case "$_f" in
+ [0-9][0-9].*.*)
+ script="${_f%.*}.script"
+ event="${_f##*.}"
+ ;;
+ [0-9][0-9].*)
+ script="${_f}.script"
+ unset event
+ ;;
+ esac
+ # "Enable" the script
+ _subdir="events/legacy"
+ script_dir="${CTDB_BASE}/${_subdir}"
+ # Symlink target needs to be absolute
+ case "$CTDB_SCRIPTS_DATA_DIR" in
+ /*) _data_dir="${CTDB_SCRIPTS_DATA_DIR}/${_subdir}" ;;
+ *) _data_dir="${PWD}/${CTDB_SCRIPTS_DATA_DIR}/${_subdir}" ;;
+ esac
+ mkdir -p "$script_dir"
+ ln -s "${_data_dir}/${script}" "$script_dir"
+ ;;
+ *)
+ script="${_f%.*}"
+ script="$_f"
+ unset event
+ script_dir="${CTDB_BASE}"
+ ;;
+ esac
+
+ _s="${script_dir}/${script}"
+ [ -r "$_s" ] ||
+ die "Internal error - unable to find script \"${_s}\""
+
+ case "$script" in
+ *.script) script_short="${script%.script}" ;;
+ *.sh) script_short="${script%.sh}" ;;
+ *) script_short="$script" ;;
+ esac
+
+ printf "%-17s %-10s %-4s - %s\n\n" \
+ "$script_short" "$event" "$_num" "$desc"
+
+ _f="${CTDB_TEST_SUITE_DIR}/scripts/${script_short}.sh"
+ if [ -r "$_f" ]; then
+ . "$_f"
+ fi
+
+ ctdb_set_pnn 0
+}
+
+# Run an eventscript once. The test passes if the return code and
+# output match those required.
+
+# Any args are passed to the eventscript.
+
+simple_test()
+{
+ [ -n "$event" ] || die 'simple_test: event not set'
+
+ args="$*"
+
+ # shellcheck disable=SC2317
+ # used in unit_test(), etc.
+ test_header()
+ {
+ echo "Running script \"$script $event${args:+ }$args\""
+ }
+
+ # shellcheck disable=SC2317
+ # used in unit_test(), etc.
+ extra_header()
+ {
+ cat <<EOF
+
+##################################################
+CTDB_BASE="$CTDB_BASE"
+CTDB_SYS_ETCDIR="$CTDB_SYS_ETCDIR"
+ctdb client is "$(which ctdb)"
+ip command is "$(which ip)"
+EOF
+ }
+
+ script_test "${script_dir}/${script}" "$event" "$@"
+
+ reset_test_header
+ reset_extra_header
+}
+
+simple_test_event()
+{
+ # If something has previously failed then don't continue.
+ : "${_passed:=true}"
+ $_passed || return 1
+
+ event="$1"
+ shift
+ echo "=================================================="
+ simple_test "$@"
+}
+
+simple_test_command()
+{
+ unit_test_notrace "$@"
+}
diff --git a/ctdb/tests/UNIT/eventscripts/scripts/statd-callout.sh b/ctdb/tests/UNIT/eventscripts/scripts/statd-callout.sh
new file mode 100644
index 0000000..e966cb4
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/scripts/statd-callout.sh
@@ -0,0 +1,65 @@
+setup()
+{
+ ctdb_set_pnn
+ setup_public_addresses
+ setup_date "123456789"
+}
+
+ctdb_catdb_format_pairs()
+{
+ _count=0
+
+ while read -r _k _v; do
+ _kn=$(printf '%s' "$_k" | wc -c)
+ _vn=$(printf '%s' "$_v" | wc -c)
+ cat <<EOF
+key(${_kn}) = "${_k}"
+dmaster: 0
+rsn: 1
+data(${_vn}) = "${_v}"
+
+EOF
+ _count=$((_count + 1))
+ done
+
+ echo "Dumped ${_count} records"
+}
+
+check_ctdb_tdb_statd_state()
+{
+ ctdb_get_my_public_addresses |
+ while read -r _ _sip _; do
+ for _cip; do
+ cat <<EOF
+statd-state@${_sip}@${_cip} $(date)
+EOF
+ done
+ done |
+ ctdb_catdb_format_pairs | {
+ ok
+ simple_test_command ctdb catdb ctdb.tdb
+ } || exit $?
+}
+
+check_statd_callout_smnotify()
+{
+ _state_even=$(( $(date '+%s') / 2 * 2))
+ _state_odd=$((_state_even + 1))
+
+ nfs_load_config
+
+ ctdb_get_my_public_addresses |
+ while read -r _ _sip _; do
+ for _cip; do
+ cat <<EOF
+SM_NOTIFY: ${_sip} -> ${_cip}, MON_NAME=${_sip}, STATE=${_state_even}
+SM_NOTIFY: ${_sip} -> ${_cip}, MON_NAME=${NFS_HOSTNAME}, STATE=${_state_even}
+SM_NOTIFY: ${_sip} -> ${_cip}, MON_NAME=${_sip}, STATE=${_state_odd}
+SM_NOTIFY: ${_sip} -> ${_cip}, MON_NAME=${NFS_HOSTNAME}, STATE=${_state_odd}
+EOF
+ done
+ done | {
+ ok
+ simple_test_event "notify"
+ } || exit $?
+}
diff --git a/ctdb/tests/UNIT/eventscripts/statd-callout.001.sh b/ctdb/tests/UNIT/eventscripts/statd-callout.001.sh
new file mode 100755
index 0000000..7293390
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/statd-callout.001.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "single add-client"
+
+setup
+
+ok_null
+simple_test_event "add-client" "192.168.123.45"
+simple_test_event "update"
+
+check_ctdb_tdb_statd_state "192.168.123.45"
diff --git a/ctdb/tests/UNIT/eventscripts/statd-callout.002.sh b/ctdb/tests/UNIT/eventscripts/statd-callout.002.sh
new file mode 100755
index 0000000..ce9f139
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/statd-callout.002.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "2 x add-client, update"
+
+setup
+
+ok_null
+simple_test_event "add-client" "192.168.123.45"
+simple_test_event "add-client" "192.168.123.46"
+simple_test_event "update"
+
+check_ctdb_tdb_statd_state "192.168.123.45" "192.168.123.46"
diff --git a/ctdb/tests/UNIT/eventscripts/statd-callout.003.sh b/ctdb/tests/UNIT/eventscripts/statd-callout.003.sh
new file mode 100755
index 0000000..25bec29
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/statd-callout.003.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "add-client, update, del-client, update"
+
+setup
+
+ok_null
+simple_test_event "add-client" "192.168.123.45"
+simple_test_event "update"
+
+simple_test_event "del-client" "192.168.123.45"
+simple_test_event "update"
+
+check_ctdb_tdb_statd_state
diff --git a/ctdb/tests/UNIT/eventscripts/statd-callout.004.sh b/ctdb/tests/UNIT/eventscripts/statd-callout.004.sh
new file mode 100755
index 0000000..dc2156b
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/statd-callout.004.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "single add-client, notify"
+
+setup
+
+ok_null
+simple_test_event "add-client" "192.168.123.45"
+simple_test_event "update"
+
+check_ctdb_tdb_statd_state "192.168.123.45"
+
+check_statd_callout_smnotify "192.168.123.45"
+
+check_ctdb_tdb_statd_state
diff --git a/ctdb/tests/UNIT/eventscripts/statd-callout.005.sh b/ctdb/tests/UNIT/eventscripts/statd-callout.005.sh
new file mode 100755
index 0000000..1f802a2
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/statd-callout.005.sh
@@ -0,0 +1,25 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "2 x add-client to different nodes, notify on 1"
+
+setup
+
+ok_null
+simple_test_event "add-client" "192.168.123.45"
+simple_test_event "update"
+
+ctdb_set_pnn 1
+
+ok_null
+simple_test_event "add-client" "192.168.123.46"
+simple_test_event "update"
+
+ctdb_set_pnn 0
+
+check_statd_callout_smnotify "192.168.123.45"
+
+ctdb_set_pnn 1
+
+check_ctdb_tdb_statd_state "192.168.123.46"
diff --git a/ctdb/tests/UNIT/eventscripts/statd-callout.006.sh b/ctdb/tests/UNIT/eventscripts/statd-callout.006.sh
new file mode 100755
index 0000000..8ecba5c
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/statd-callout.006.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "2 x add-client to different nodes, notify on both"
+
+setup
+
+ok_null
+simple_test_event "add-client" "192.168.123.45"
+simple_test_event "update"
+
+ctdb_set_pnn 1
+
+ok_null
+simple_test_event "add-client" "192.168.123.46"
+simple_test_event "update"
+
+ctdb_set_pnn 0
+
+check_statd_callout_smnotify "192.168.123.45"
+
+ctdb_set_pnn 1
+
+check_statd_callout_smnotify "192.168.123.46"
+
+check_ctdb_tdb_statd_state
diff --git a/ctdb/tests/UNIT/eventscripts/statd-callout.007.sh b/ctdb/tests/UNIT/eventscripts/statd-callout.007.sh
new file mode 100755
index 0000000..4445fff
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/statd-callout.007.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "add-client, del-client, update"
+
+setup
+
+ok_null
+simple_test_event "add-client" "192.168.123.45"
+simple_test_event "del-client" "192.168.123.45"
+simple_test_event "update"
+
+check_ctdb_tdb_statd_state
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/ctdb b/ctdb/tests/UNIT/eventscripts/stubs/ctdb
new file mode 100755
index 0000000..20135eb
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/ctdb
@@ -0,0 +1,481 @@
+#!/bin/sh
+
+prog="ctdb"
+
+# Print a message and exit.
+die()
+{
+ echo "$1" >&2
+ exit "${2:-1}"
+}
+
+not_implemented_exit_code=1
+
+usage()
+{
+ cat >&2 <<EOF
+Usage: $prog [-X] cmd
+
+A fake CTDB stub that prints items depending on the variables
+FAKE_CTDB_PNN (default 0) depending on command-line options.
+EOF
+ exit 1
+}
+
+not_implemented()
+{
+ echo "${prog}: command \"$1\" not implemented in stub" >&2
+ exit $not_implemented_exit_code
+}
+
+verbose=false
+machine_readable=false
+nodespec=""
+
+args=""
+
+# Options and command argument can appear in any order, so when
+# getopts thinks it is done, process any non-option arguments and go
+# around again.
+while [ $# -gt 0 ]; do
+ while getopts "Xvhn:?" opt; do
+ case "$opt" in
+ X) machine_readable=true ;;
+ v) verbose=true ;;
+ n) nodespec="$OPTARG" ;;
+ \? | *) usage ;;
+ esac
+ done
+ shift $((OPTIND - 1))
+
+ # Anything left over must be a non-option arg
+ if [ $# -gt 0 ]; then
+ args="${args}${args:+ }${1}"
+ shift
+ fi
+done
+
+[ -n "$args" ] || usage
+# Want word splitting
+# shellcheck disable=SC2086
+set -- $args
+
+setup_tickles()
+{
+ # Make sure tickles file exists.
+ tickles_file="${CTDB_TEST_TMP_DIR}/fake-ctdb/tickles"
+ mkdir -p "$(dirname "$tickles_file")"
+ touch "$tickles_file"
+}
+
+ctdb_gettickles()
+{
+ _ip="$1"
+ _port="$2"
+
+ setup_tickles
+
+ echo "|source ip|port|destination ip|port|"
+ while read -r _src _dst; do
+ if [ -z "$_ip" ] || [ "$_ip" = "${_dst%:*}" ]; then
+ if [ -z "$_port" ] || [ "$_port" = "${_dst##*:}" ]; then
+ echo "|${_src%:*}|${_src##*:}|${_dst%:*}|${_dst##*:}|"
+ fi
+ fi
+ done <"$tickles_file"
+}
+
+ctdb_addtickle()
+{
+ _src="$1"
+ _dst="$2"
+
+ setup_tickles
+
+ if [ -n "$_dst" ]; then
+ echo "${_src} ${_dst}" >>"$tickles_file"
+ else
+ cat >>"$tickles_file"
+ fi
+}
+
+ctdb_deltickle()
+{
+ _src="$1"
+ _dst="$2"
+
+ setup_tickles
+
+ if [ -n "$_dst" ]; then
+ _t=$(grep -F -v "${_src} $${_dst}" "$tickles_file")
+ else
+ _t=$(cat "$tickles_file")
+ while read -r _src _dst; do
+ _t=$(echo "$_t" | grep -F -v "${_src} ${_dst}")
+ done
+ fi
+ echo "$_t" >"$tickles_file"
+}
+
+parse_nodespec()
+{
+ if [ "$nodespec" = "all" ]; then
+ nodes="$(seq 0 $((FAKE_CTDB_NUMNODES - 1)))"
+ elif [ -n "$nodespec" ]; then
+ nodes="$(echo "$nodespec" | sed -e 's@,@ @g')"
+ else
+ nodes=$(ctdb_pnn)
+ fi
+}
+
+# For testing backward compatibility...
+for i in $CTDB_NOT_IMPLEMENTED; do
+ if [ "$i" = "$1" ]; then
+ not_implemented "$i"
+ fi
+done
+
+ctdb_pnn()
+{
+ # Defaults to 0
+ echo "${FAKE_CTDB_PNN:-0}"
+}
+
+######################################################################
+
+FAKE_CTDB_NODE_STATE="$FAKE_CTDB_STATE/node-state"
+FAKE_CTDB_NODES_DISABLED="$FAKE_CTDB_NODE_STATE/0x4"
+
+######################################################################
+
+# NOTE: all nodes share public addresses file
+
+FAKE_CTDB_IP_LAYOUT="$FAKE_CTDB_STATE/ip-layout"
+
+ip_reallocate()
+{
+ touch "$FAKE_CTDB_IP_LAYOUT"
+
+ # ShellCheck doesn't understand this flock pattern
+ # shellcheck disable=SC2094
+ (
+ flock 0
+
+ _pa="${CTDB_BASE}/public_addresses"
+
+ if [ ! -s "$FAKE_CTDB_IP_LAYOUT" ]; then
+ sed -n -e 's@^\([^#][^/]*\)/.*@\1 -1@p' \
+ "$_pa" >"$FAKE_CTDB_IP_LAYOUT"
+ fi
+
+ _t="${FAKE_CTDB_IP_LAYOUT}.new"
+
+ _flags=""
+ for _i in $(seq 0 $((FAKE_CTDB_NUMNODES - 1))); do
+ if ls "$FAKE_CTDB_STATE/node-state/"*"/$_i" >/dev/null 2>&1; then
+ # Have non-zero flags
+ _this=0
+ for _j in "$FAKE_CTDB_STATE/node-state/"*"/$_i"; do
+ _tf="${_j%/*}" # dirname
+ _f="${_tf##*/}" # basename
+ _this=$((_this | _f))
+ done
+ else
+ _this="0"
+ fi
+ _flags="${_flags}${_flags:+,}${_this}"
+ done
+ CTDB_TEST_LOGLEVEL=NOTICE \
+ "ctdb_takeover_tests" \
+ "ipalloc" "$_flags" <"$FAKE_CTDB_IP_LAYOUT" |
+ sort >"$_t"
+ mv "$_t" "$FAKE_CTDB_IP_LAYOUT"
+ ) <"$FAKE_CTDB_IP_LAYOUT"
+}
+
+ctdb_ip()
+{
+ # If nobody has done any IP-fu then generate a layout.
+ [ -f "$FAKE_CTDB_IP_LAYOUT" ] || ip_reallocate
+
+ _mypnn=$(ctdb_pnn)
+
+ if $machine_readable; then
+ if $verbose; then
+ echo "|Public IP|Node|ActiveInterface|AvailableInterfaces|ConfiguredInterfaces|"
+ else
+ echo "|Public IP|Node|"
+ fi
+ else
+ echo "Public IPs on node ${_mypnn}"
+ fi
+
+ # Join public addresses file with $FAKE_CTDB_IP_LAYOUT, and
+ # process output line by line...
+ _pa="${CTDB_BASE}/public_addresses"
+ sed -e 's@/@ @' "$_pa" | sort | join - "$FAKE_CTDB_IP_LAYOUT" |
+ while read -r _ip _ _ifaces _pnn; do
+ if $verbose; then
+ # If more than 1 interface, assume all addresses are on the 1st.
+ _first_iface="${_ifaces%%,*}"
+ # Only show interface if address is on this node.
+ _my_iface=""
+ if [ "$_pnn" = "$_mypnn" ]; then
+ _my_iface="$_first_iface"
+ fi
+ if $machine_readable; then
+ echo "|${_ip}|${_pnn}|${_my_iface}|${_first_iface}|${_ifaces}|"
+ else
+ echo "${_ip} node[${_pnn}] active[${_my_iface}] available[${_first_iface}] configured[[${_ifaces}]"
+ fi
+ else
+ if $machine_readable; then
+ echo "|${_ip}|${_pnn}|"
+ else
+ echo "${_ip} ${_pnn}"
+ fi
+ fi
+ done
+}
+
+ctdb_moveip()
+{
+ _ip="$1"
+ _target="$2"
+
+ ip_reallocate # should be harmless and ensures we have good state
+
+ # ShellCheck doesn't understand this flock pattern
+ # shellcheck disable=SC2094
+ (
+ flock 0
+
+ _t="${FAKE_CTDB_IP_LAYOUT}.new"
+
+ while read -r _i _pnn; do
+ if [ "$_ip" = "$_i" ]; then
+ echo "$_i $_target"
+ else
+ echo "$_i $_pnn"
+ fi
+ done | sort >"$_t"
+ mv "$_t" "$FAKE_CTDB_IP_LAYOUT"
+ ) <"$FAKE_CTDB_IP_LAYOUT"
+}
+
+######################################################################
+
+ctdb_enable()
+{
+ parse_nodespec
+
+ for _i in $nodes; do
+ rm -f "${FAKE_CTDB_NODES_DISABLED}/${_i}"
+ done
+
+ ip_reallocate
+}
+
+ctdb_disable()
+{
+ parse_nodespec
+
+ for _i in $nodes; do
+ mkdir -p "$FAKE_CTDB_NODES_DISABLED"
+ touch "${FAKE_CTDB_NODES_DISABLED}/${_i}"
+ done
+
+ ip_reallocate
+}
+
+######################################################################
+
+ctdb_shutdown()
+{
+ echo "CTDB says BYE!"
+}
+
+######################################################################
+
+# This is only used by the NAT and LVS gateway code at the moment, so
+# use a hack. Assume that $CTDB_NATGW_NODES or $CTDB_LVS_NODES
+# contains all nodes in the cluster (which is what current tests
+# assume). Use the PNN to find the address from this file. The NAT
+# gateway code only used the address, so just mark the node healthy.
+ctdb_nodestatus()
+{
+ echo '|Node|IP|Disconnected|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode|'
+ _line=$((FAKE_CTDB_PNN + 1))
+ _ip=$(sed -e "${_line}p" "${CTDB_NATGW_NODES:-${CTDB_LVS_NODES}}")
+ echo "|${FAKE_CTDB_PNN}|${_ip}|0|0|0|0|0|0|0|Y|"
+}
+
+######################################################################
+
+_t_setup()
+{
+ _t_dir="${CTDB_TEST_TMP_DIR}/fake-ctdb/fake-tdb/$1"
+ mkdir -p "$_t_dir"
+}
+
+_t_put()
+{
+ echo "$2" >"${_t_dir}/$1"
+}
+
+_t_get()
+{
+ cat "${_t_dir}/$1"
+}
+
+_t_del()
+{
+ rm -f "${_t_dir}/$1"
+}
+
+ctdb_pstore()
+{
+ _t_setup "$1"
+ _t_put "$2" "$3"
+}
+
+ctdb_pdelete()
+{
+ _t_setup "$1"
+ _t_del "$2"
+}
+
+ctdb_pfetch()
+{
+ _t_setup "$1"
+ _t_get "$2" >"$3" 2>/dev/null
+}
+
+ctdb_ptrans()
+{
+ _t_setup "$1"
+
+ while IFS="" read -r _line; do
+ _k=$(echo "$_line" | sed -n -e 's@^"\([^"]*\)" "[^"]*"$@\1@p')
+ _v=$(echo "$_line" | sed -e 's@^"[^"]*" "\([^"]*\)"$@\1@')
+ [ -n "$_k" ] || die "ctdb ptrans: bad line \"${_line}\""
+ if [ -n "$_v" ]; then
+ _t_put "$_k" "$_v"
+ else
+ _t_del "$_k"
+ fi
+ done
+}
+
+ctdb_catdb()
+{
+ _t_setup "$1"
+
+ # This will break on keys with spaces but we don't have any of
+ # those yet.
+ _count=0
+ for _i in "${_t_dir}/"*; do
+ [ -r "$_i" ] || continue
+ _k="${_i##*/}" # basename
+ _v=$(_t_get "$_k")
+ _kn=$(printf '%s' "$_k" | wc -c)
+ _vn=$(printf '%s' "$_v" | wc -c)
+ cat <<EOF
+key(${_kn}) = "${_k}"
+dmaster: 0
+rsn: 1
+data(${_vn}) = "${_v}"
+
+EOF
+ _count=$((_count + 1))
+ done
+
+ echo "Dumped ${_count} records"
+}
+
+######################################################################
+
+FAKE_CTDB_IFACES_DOWN="${FAKE_CTDB_STATE}/ifaces-down"
+rm -f "${FAKE_CTDB_IFACES_DOWN}"/*
+
+ctdb_ifaces()
+{
+ _f="${CTDB_BASE}/public_addresses"
+
+ if [ ! -f "$_f" ]; then
+ die "Public addresses file \"${_f}\" not found"
+ fi
+
+ # Assume -Y.
+ echo "|Name|LinkStatus|References|"
+ while read -r _ip _iface; do
+ case "$_ip" in
+ \#*) : ;;
+ *)
+ _status=1
+ # For now assume _iface contains only 1.
+ if [ -f "{FAKE_CTDB_IFACES_DOWN}/${_iface}" ]; then
+ _status=0
+ fi
+ # Nobody looks at references
+ echo "|${_iface}|${_status}|0|"
+ ;;
+ esac
+ done <"$_f" |
+ sort -u
+}
+
+ctdb_setifacelink()
+{
+ _iface="$1"
+ _state="$2"
+
+ mkdir -p "$FAKE_CTDB_IFACES_DOWN"
+
+ # Existence of file means CTDB thinks interface is down.
+ _f="${FAKE_CTDB_IFACES_DOWN}/${_iface}"
+
+ case "$_state" in
+ up) rm -f "$_f" ;;
+ down) touch "$_f" ;;
+ *) die "ctdb setifacelink: unsupported interface status ${_state}" ;;
+ esac
+}
+
+######################################################################
+
+ctdb_checktcpport()
+{
+ _port="$1"
+
+ for _i in $FAKE_TCP_LISTEN; do
+ if [ "$_port" = "$_i" ]; then
+ exit 98
+ fi
+ done
+
+ exit 0
+}
+
+ctdb_gratarp()
+{
+ # Do nothing for now
+ :
+}
+
+######################################################################
+
+cmd="$1"
+shift
+
+func="ctdb_${cmd}"
+
+# This could inadvertently run an external function instead of a local
+# function. However, this can only happen if testing a script
+# containing a new ctdb command that is not implemented, so this is
+# unlikely to do harm.
+if type "$func" >/dev/null 2>&1; then
+ "$func" "$@"
+else
+ not_implemented "$cmd"
+fi
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/ctdb-config b/ctdb/tests/UNIT/eventscripts/stubs/ctdb-config
new file mode 100755
index 0000000..818e3db
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/ctdb-config
@@ -0,0 +1,2 @@
+#!/bin/sh
+exec $VALGRIND "${CTDB_SCRIPTS_HELPER_BINDIR}/ctdb-config" "$@"
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/ctdb_killtcp b/ctdb/tests/UNIT/eventscripts/stubs/ctdb_killtcp
new file mode 100755
index 0000000..2a4bac4
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/ctdb_killtcp
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+# Only supports reading from stdin
+
+# shellcheck disable=SC2034
+iface="$1" # ignored
+
+while read -r src dst; do
+ sed -i -e "/^${dst} ${src}\$/d" "$FAKE_NETSTAT_TCP_ESTABLISHED_FILE"
+done
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/ctdb_lvs b/ctdb/tests/UNIT/eventscripts/stubs/ctdb_lvs
new file mode 100755
index 0000000..31f56e8
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/ctdb_lvs
@@ -0,0 +1,53 @@
+#!/bin/sh
+
+prog="ctdb_lvs"
+
+# Print a message and exit.
+die()
+{
+ echo "$1" >&2
+ exit "${2:-1}"
+}
+
+not_implemented_exit_code=1
+
+usage()
+{
+ cat >&2 <<EOF
+Usage: $prog { leader | list }
+EOF
+ exit 1
+}
+
+not_implemented()
+{
+ echo "${prog}: command \"$1\" not implemented in stub" >&2
+ exit $not_implemented_exit_code
+}
+
+ctdb_lvs_leader()
+{
+ if [ -n "$FAKE_CTDB_LVS_LEADER" ]; then
+ echo "$FAKE_CTDB_LVS_LEADER"
+ return 0
+ else
+ return 255
+ fi
+}
+
+ctdb_lvs_list()
+{
+ _pnn=0
+ while read -r _ip _; do
+ echo "${_pnn} ${_ip}"
+ _pnn=$((_pnn + 1))
+ done <"$CTDB_LVS_NODES"
+}
+
+######################################################################
+
+case "$1" in
+leader) ctdb_lvs_leader "$@" ;;
+list) ctdb_lvs_list "$@" ;;
+*) not_implemented "$1" ;;
+esac
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/ctdb_natgw b/ctdb/tests/UNIT/eventscripts/stubs/ctdb_natgw
new file mode 100755
index 0000000..22a2191
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/ctdb_natgw
@@ -0,0 +1,34 @@
+#!/bin/sh
+
+prog="ctdb_natgw"
+
+not_implemented_exit_code=1
+
+not_implemented()
+{
+ echo "${prog}: command \"$1\" not implemented in stub" >&2
+ exit $not_implemented_exit_code
+}
+
+ctdb_natgw_leader()
+{
+ [ -r "$CTDB_NATGW_NODES" ] ||
+ die "error: missing CTDB_NATGW_NODES=${CTDB_NATGW_NODES}"
+
+ # Determine the leader node
+ _leader="-1 0.0.0.0"
+ _pnn=0
+ while read -r _ip; do
+ if [ "$FAKE_CTDB_NATGW_LEADER" = "$_ip" ]; then
+ _leader="${_pnn} ${_ip}"
+ break
+ fi
+ _pnn=$((_pnn + 1))
+ done <"$CTDB_NATGW_NODES"
+ echo "$_leader"
+}
+
+case "$1" in
+leader) ctdb_natgw_leader "$@" ;;
+*) not_implemented "$1" ;;
+esac
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/date b/ctdb/tests/UNIT/eventscripts/stubs/date
new file mode 100755
index 0000000..8319c9c
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/date
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+if [ "$FAKE_DATE_OUTPUT" ]; then
+ echo "$FAKE_DATE_OUTPUT"
+else
+ /bin/date "$@"
+fi
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/df b/ctdb/tests/UNIT/eventscripts/stubs/df
new file mode 100755
index 0000000..858f0ef
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/df
@@ -0,0 +1,38 @@
+#!/bin/sh
+
+usage()
+{
+ echo "usage: df [-kP] [<mount-point>]"
+ exit 1
+}
+
+if [ "$1" = "-kP" ]; then
+ shift
+fi
+
+case "$1" in
+-*) usage ;;
+esac
+
+fs="${1:-/}"
+
+# Anything starting with CTDB_DBDIR_BASE gets canonicalised to
+# CTDB_DBDIR_BASE. This helps with the setting of defaults for the
+# filesystem checks.
+if [ "${fs#"${CTDB_DBDIR_BASE}"}" != "$fs" ]; then
+ fs="$CTDB_DBDIR_BASE"
+fi
+
+# A default, for tests that don't initialise this...
+if [ -z "$FAKE_FS_USE" ]; then
+ FAKE_FS_USE=10
+fi
+
+echo "Filesystem 1024-blocks Used Available Capacity Mounted on"
+
+blocks="1000000"
+used=$((blocks * FAKE_FS_USE / 100))
+available=$((blocks - used))
+
+printf "%-36s %10d %10d %10d %10d%% %s\n" \
+ "/dev/sda1" "$blocks" "$used" "$available" "$FAKE_FS_USE" "$fs"
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/ethtool b/ctdb/tests/UNIT/eventscripts/stubs/ethtool
new file mode 100755
index 0000000..3d4b889
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/ethtool
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+link="yes"
+
+if [ -f "${FAKE_ETHTOOL_LINK_DOWN}/${1}" ]; then
+ link="no"
+fi
+
+# Expect to add more fields later.
+cat <<EOF
+ Link detected: ${link}
+EOF
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/exportfs b/ctdb/tests/UNIT/eventscripts/stubs/exportfs
new file mode 100755
index 0000000..e0970c5
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/exportfs
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+opts="10.0.0.0/16(rw,async,insecure,no_root_squash,no_subtree_check)"
+
+for i in $FAKE_SHARES; do
+ # Directories longer than 15 characters are printed on their own
+ # line.
+ if [ ${#i} -ge 15 ]; then
+ printf '%s\n\t\t%s\n' "$i" "$opts"
+ else
+ printf '%s\t%s\n' "$i" "$opts"
+ fi
+done
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/gstack b/ctdb/tests/UNIT/eventscripts/stubs/gstack
new file mode 100755
index 0000000..1dec235
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/gstack
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+pid="$1"
+
+if [ -n "$FAKE_PS_MAP" ]; then
+ command=$(echo "$FAKE_PS_MAP" |
+ awk -v pid="$pid" '$1 == pid { print $2 }')
+fi
+
+if [ -z "$command" ]; then
+ command="smbd"
+fi
+
+cat <<EOF
+Thread 1 (Thread 0x7f688fbfb180 (LWP ${pid}) "${command}"):
+#0 0x00007f688ff7a076 in open (FAKE ARGS...) at FAKE PLACE
+....
+#3 0x000055cd368ead72 in main (argc=<optimized out>, argv=<optimized out>) at ${command}.c
+EOF
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/id b/ctdb/tests/UNIT/eventscripts/stubs/id
new file mode 100755
index 0000000..1ecd2f8
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/id
@@ -0,0 +1,3 @@
+#!/bin/sh
+# Make statd-callout happy
+echo 0
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/ip b/ctdb/tests/UNIT/eventscripts/stubs/ip
new file mode 100755
index 0000000..090afae
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/ip
@@ -0,0 +1,833 @@
+#!/bin/sh
+
+FAKE_IP_STATE="${FAKE_NETWORK_STATE}/ip-state"
+mkdir -p "$FAKE_IP_STATE"
+
+promote_secondaries=true
+
+not_implemented()
+{
+ echo "ip stub command: \"$1\" not implemented"
+ exit 127
+}
+
+######################################################################
+
+ip_link()
+{
+ case "$1" in
+ set)
+ shift
+ # iface="$1"
+ case "$2" in
+ up) ip_link_set_up "$1" ;;
+ down) ip_link_down_up "$1" ;;
+ *) not_implemented "\"$2\" in \"$orig_args\"" ;;
+ esac
+ ;;
+ show)
+ shift
+ ip_link_show "$@"
+ ;;
+ add*)
+ shift
+ ip_link_add "$@"
+ ;;
+ del*)
+ shift
+ ip_link_delete "$@"
+ ;;
+ *) not_implemented "$*" ;;
+ esac
+}
+
+ip_link_add()
+{
+ _link=""
+ _name=""
+ _type=""
+
+ while [ -n "$1" ]; do
+ case "$1" in
+ link)
+ _link="$2"
+ shift 2
+ ;;
+ name)
+ _name="$2"
+ shift 2
+ ;;
+ type)
+ if [ "$2" != "vlan" ]; then
+ not_implemented "link type $1"
+ fi
+ _type="$2"
+ shift 2
+ ;;
+ id) shift 2 ;;
+ *) not_implemented "$1" ;;
+ esac
+ done
+
+ case "$_type" in
+ vlan)
+ if [ -z "$_name" ] || [ -z "$_link" ]; then
+ not_implemented "ip link add with null name or link"
+ fi
+
+ mkdir -p "${FAKE_IP_STATE}/interfaces-vlan"
+ echo "$_link" >"${FAKE_IP_STATE}/interfaces-vlan/${_name}"
+ ip_link_set_down "$_name"
+ ;;
+ esac
+}
+
+ip_link_delete()
+{
+ mkdir -p "${FAKE_IP_STATE}/interfaces-deleted"
+ touch "${FAKE_IP_STATE}/interfaces-deleted/$1"
+ rm -f "${FAKE_IP_STATE}/interfaces-vlan/$1"
+}
+
+ip_link_set_up()
+{
+ rm -f "${FAKE_IP_STATE}/interfaces-down/$1"
+ rm -f "${FAKE_IP_STATE}/interfaces-deleted/$1"
+}
+
+ip_link_set_down()
+{
+ rm -f "${FAKE_IP_STATE}/interfaces-deleted/$1"
+ mkdir -p "${FAKE_IP_STATE}/interfaces-down"
+ touch "${FAKE_IP_STATE}/interfaces-down/$1"
+}
+
+ip_link_show()
+{
+ dev="$1"
+ if [ "$dev" = "dev" ] && [ -n "$2" ]; then
+ dev="$2"
+ fi
+
+ if [ -e "${FAKE_IP_STATE}/interfaces-deleted/$dev" ]; then
+ echo "Device \"${dev}\" does not exist." >&2
+ exit 255
+ fi
+
+ if [ -r "${FAKE_IP_STATE}/interfaces-vlan/${dev}" ]; then
+ read -r _link <"${FAKE_IP_STATE}/interfaces-vlan/${dev}"
+ dev="${dev}@${_link}"
+ fi
+
+ _state="UP"
+ _flags=",UP,LOWER_UP"
+ if [ -e "${FAKE_IP_STATE}/interfaces-down/$dev" ]; then
+ _state="DOWN"
+ _flags=""
+ fi
+ case "$dev" in
+ lo)
+ _mac="00:00:00:00:00:00"
+ _brd="00:00:00:00:00:00"
+ _type="loopback"
+ _state="UNKNOWN"
+ _status="<LOOPBACK${_flags}>"
+ _opts="mtu 65536 qdisc noqueue state ${_state}"
+ ;;
+ *)
+ _mac=$(echo "$dev" | cksum | sed -r -e 's@(..)(..)(..).*@fe:fe:fe:\1:\2:\3@')
+ _brd="ff:ff:ff:ff:ff:ff"
+ _type="ether"
+ _status="<BROADCAST,MULTICAST${_flags}>"
+ _opts="mtu 1500 qdisc pfifo_fast state ${_state} qlen 1000"
+ ;;
+ esac
+
+ if $brief; then
+ printf '%-16s %-14s %-17s %s\n' \
+ "$dev" "$_status" "$_mac" "$_status"
+ else
+ echo "${n:-42}: ${dev}: ${_status} ${_opts}"
+ echo " link/${_type} ${_mac} brd ${_brd}"
+ fi
+}
+
+# This is incomplete because it doesn't actually look up table ids in
+# /etc/iproute2/rt_tables. The rules/routes are actually associated
+# with the name instead of the number. However, we include a variable
+# to fake a bad table id.
+[ -n "$IP_ROUTE_BAD_TABLE_ID" ] || IP_ROUTE_BAD_TABLE_ID=false
+
+ip_check_table()
+{
+ _cmd="$1"
+
+ if [ "$_cmd" = "route" ] && [ -z "$_table" ]; then
+ _table="main"
+ fi
+
+ [ -n "$_table" ] || not_implemented "ip rule/route without \"table\""
+
+ # Only allow tables names from 13.per_ip_routing and "main". This
+ # is a cheap way of avoiding implementing the default/local
+ # tables.
+ case "$_table" in
+ ctdb.* | main)
+ if $IP_ROUTE_BAD_TABLE_ID; then
+ # Ouch. Simulate inconsistent errors from ip. :-(
+ case "$_cmd" in
+ route)
+ echo "Error: argument \"${_table}\" is wrong: table id value is invalid" >&2
+
+ ;;
+ *)
+ echo "Error: argument \"${_table}\" is wrong: invalid table ID" >&2
+ ;;
+ esac
+ exit 255
+ fi
+ ;;
+ *) not_implemented "table=${_table} ${orig_args}" ;;
+ esac
+}
+
+######################################################################
+
+ip_addr()
+{
+ case "$1" in
+ show | list | "")
+ shift
+ ip_addr_show "$@"
+ ;;
+ add*)
+ shift
+ ip_addr_add "$@"
+ ;;
+ del*)
+ shift
+ ip_addr_del "$@"
+ ;;
+ *) not_implemented "\"$1\" in \"$orig_args\"" ;;
+ esac
+}
+
+ip_addr_show()
+{
+ dev=""
+ primary=true
+ secondary=true
+ _to=""
+
+ if $brief; then
+ not_implemented "ip -br addr show in \"$orig_args\""
+ fi
+
+ while [ -n "$1" ]; do
+ case "$1" in
+ dev)
+ dev="$2"
+ shift 2
+ ;;
+ # Do stupid things and stupid things will happen!
+ primary)
+ primary=true
+ secondary=false
+ shift
+ ;;
+ secondary)
+ secondary=true
+ primary=false
+ shift
+ ;;
+ to)
+ _to="$2"
+ shift 2
+ ;;
+ *)
+ # Assume an interface name
+ dev="$1"
+ shift 1
+ ;;
+ esac
+ done
+ devices="$dev"
+ if [ -z "$devices" ]; then
+ # No device specified? Get all the primaries...
+ devices=$(find "${FAKE_IP_STATE}/addresses" -name "*-primary" |
+ sed -e 's@.*/@@' -e 's@-.*-primary$@@' |
+ sort -u)
+ fi
+ calc_brd()
+ {
+ case "${local#*/}" in
+ 24) brd="${local%.*}.255" ;;
+ 32) brd="" ;;
+ *) not_implemented "list ... fake bits other than 24/32: ${local#*/}" ;;
+ esac
+ }
+ show_iface()
+ {
+ ip_link_show "$dev"
+
+ nets=$(find "${FAKE_IP_STATE}/addresses" -name "${dev}-*-primary" |
+ sed -e 's@.*/@@' -e "s@${dev}-\(.*\)-primary\$@\1@")
+
+ for net in $nets; do
+ pf="${FAKE_IP_STATE}/addresses/${dev}-${net}-primary"
+ sf="${FAKE_IP_STATE}/addresses/${dev}-${net}-secondary"
+ if $primary && [ -r "$pf" ]; then
+ read -r local scope <"$pf"
+ if [ -z "$_to" ] || [ "${_to%/*}" = "${local%/*}" ]; then
+ calc_brd
+ echo " inet ${local} ${brd:+brd ${brd} }scope ${scope} ${dev}"
+ fi
+ fi
+ if $secondary && [ -r "$sf" ]; then
+ while read -r local scope; do
+ if [ -z "$_to" ] || [ "${_to%/*}" = "${local%/*}" ]; then
+ calc_brd
+ echo " inet ${local} ${brd:+brd }${brd} scope ${scope} secondary ${dev}"
+ fi
+ done <"$sf"
+ fi
+ if [ -z "$_to" ]; then
+ echo " valid_lft forever preferred_lft forever"
+ fi
+ done
+ }
+ n=1
+ for dev in $devices; do
+ if [ -z "$_to" ] ||
+ grep -F "${_to%/*}/" "${FAKE_IP_STATE}/addresses/${dev}-"* >/dev/null; then
+ show_iface
+ fi
+ n=$((n + 1))
+ done
+}
+
+# Copied from 13.per_ip_routing for now... so this is lazy testing :-(
+ipv4_host_addr_to_net()
+{
+ _addr="$1"
+
+ _host="${_addr%/*}"
+ _maskbits="${_addr#*/}"
+
+ # Convert the host address to an unsigned long by splitting out
+ # the octets and doing the math.
+ _host_ul=0
+ # Want word splitting here
+ # shellcheck disable=SC2086
+ for _o in $(
+ export IFS="."
+ echo $_host
+ ); do
+ _host_ul=$(((_host_ul << 8) + _o)) # work around Emacs color bug
+ done
+
+ # Calculate the mask and apply it.
+ _mask_ul=$((0xffffffff << (32 - _maskbits)))
+ _net_ul=$((_host_ul & _mask_ul))
+
+ # Now convert to a network address one byte at a time.
+ _net=""
+ for _o in $(seq 1 4); do
+ _net="$((_net_ul & 255))${_net:+.}${_net}"
+ _net_ul=$((_net_ul >> 8))
+ done
+
+ echo "${_net}/${_maskbits}"
+}
+
+ip_addr_add()
+{
+ local=""
+ dev=""
+ brd=""
+ scope="global"
+ while [ -n "$1" ]; do
+ case "$1" in
+ *.*.*.*/*)
+ local="$1"
+ shift
+ ;;
+ local)
+ local="$2"
+ shift 2
+ ;;
+ broadcast | brd)
+ # For now assume this is always '+'.
+ if [ "$2" != "+" ]; then
+ not_implemented "addr add ... brd $2 ..."
+ fi
+ shift 2
+ ;;
+ dev)
+ dev="$2"
+ shift 2
+ ;;
+ scope)
+ scope="$2"
+ shift 2
+ ;;
+ *)
+ not_implemented "$@"
+ ;;
+ esac
+ done
+ if [ -z "$dev" ]; then
+ not_implemented "addr add (without dev)"
+ fi
+ mkdir -p "${FAKE_IP_STATE}/addresses"
+ net_str=$(ipv4_host_addr_to_net "$local")
+ net_str=$(echo "$net_str" | sed -e 's@/@_@')
+ pf="${FAKE_IP_STATE}/addresses/${dev}-${net_str}-primary"
+ sf="${FAKE_IP_STATE}/addresses/${dev}-${net_str}-secondary"
+ # We could lock here... but we should be the only ones playing
+ # around here with these stubs.
+ if [ ! -f "$pf" ]; then
+ echo "$local $scope" >"$pf"
+ elif grep -Fq "$local" "$pf"; then
+ echo "RTNETLINK answers: File exists" >&2
+ exit 254
+ elif [ -f "$sf" ] && grep -Fq "$local" "$sf"; then
+ echo "RTNETLINK answers: File exists" >&2
+ exit 254
+ else
+ echo "$local $scope" >>"$sf"
+ fi
+}
+
+ip_addr_del()
+{
+ local=""
+ dev=""
+ while [ -n "$1" ]; do
+ case "$1" in
+ *.*.*.*/*)
+ local="$1"
+ shift
+ ;;
+ local)
+ local="$2"
+ shift 2
+ ;;
+ dev)
+ dev="$2"
+ shift 2
+ ;;
+ *)
+ not_implemented "addr del ... $1 ..."
+ ;;
+ esac
+ done
+ if [ -z "$dev" ]; then
+ not_implemented "addr del (without dev)"
+ fi
+ mkdir -p "${FAKE_IP_STATE}/addresses"
+ net_str=$(ipv4_host_addr_to_net "$local")
+ net_str=$(echo "$net_str" | sed -e 's@/@_@')
+ pf="${FAKE_IP_STATE}/addresses/${dev}-${net_str}-primary"
+ sf="${FAKE_IP_STATE}/addresses/${dev}-${net_str}-secondary"
+ # We could lock here... but we should be the only ones playing
+ # around here with these stubs.
+ if [ ! -f "$pf" ]; then
+ echo "RTNETLINK answers: Cannot assign requested address" >&2
+ exit 254
+ elif grep -Fq "$local" "$pf"; then
+ if $promote_secondaries && [ -s "$sf" ]; then
+ head -n 1 "$sf" >"$pf"
+ sed -i -e '1d' "$sf"
+ else
+ # Remove primaries AND SECONDARIES.
+ rm -f "$pf" "$sf"
+ fi
+ elif [ -f "$sf" ] && grep -Fq "$local" "$sf"; then
+ grep -Fv "$local" "$sf" >"${sf}.new"
+ mv "${sf}.new" "$sf"
+ else
+ echo "RTNETLINK answers: Cannot assign requested address" >&2
+ exit 254
+ fi
+}
+
+######################################################################
+
+ip_rule()
+{
+ case "$1" in
+ show | list | "")
+ shift
+ ip_rule_show "$@"
+ ;;
+ add)
+ shift
+ ip_rule_add "$@"
+ ;;
+ del*)
+ shift
+ ip_rule_del "$@"
+ ;;
+ *) not_implemented "$1 in \"$orig_args\"" ;;
+ esac
+
+}
+
+# All non-default rules are in $FAKE_IP_STATE_RULES/rules. As with
+# the real version, rules can be repeated. Deleting just deletes the
+# 1st match.
+
+ip_rule_show()
+{
+ if $brief; then
+ not_implemented "ip -br rule show in \"$orig_args\""
+ fi
+
+ ip_rule_show_1()
+ {
+ _pre="$1"
+ _table="$2"
+ _selectors="$3"
+ # potentially more options
+
+ printf "%d:\t%s lookup %s \n" "$_pre" "$_selectors" "$_table"
+ }
+
+ ip_rule_show_some()
+ {
+ _min="$1"
+ _max="$2"
+
+ [ -f "${FAKE_IP_STATE}/rules" ] || return
+
+ while read -r _pre _table _selectors; do
+ # Only print those in range
+ if [ "$_min" -le "$_pre" ] &&
+ [ "$_pre" -le "$_max" ]; then
+ ip_rule_show_1 "$_pre" "$_table" "$_selectors"
+ fi
+ done <"${FAKE_IP_STATE}/rules"
+ }
+
+ ip_rule_show_1 0 "local" "from all"
+
+ ip_rule_show_some 1 32765
+
+ ip_rule_show_1 32766 "main" "from all"
+ ip_rule_show_1 32767 "default" "from all"
+
+ ip_rule_show_some 32768 2147483648
+}
+
+ip_rule_common()
+{
+ _from=""
+ _pre=""
+ _table=""
+ while [ -n "$1" ]; do
+ case "$1" in
+ from)
+ _from="$2"
+ shift 2
+ ;;
+ pref)
+ _pre="$2"
+ shift 2
+ ;;
+ table)
+ _table="$2"
+ shift 2
+ ;;
+ *) not_implemented "$1 in \"$orig_args\"" ;;
+ esac
+ done
+
+ [ -n "$_pre" ] || not_implemented "ip rule without \"pref\""
+ ip_check_table "rule"
+ # Relax this if more selectors added later...
+ [ -n "$_from" ] || not_implemented "ip rule without \"from\""
+}
+
+ip_rule_add()
+{
+ ip_rule_common "$@"
+
+ _f="${FAKE_IP_STATE}/rules"
+ touch "$_f"
+ (
+ flock 0
+ # Filter order must be consistent with the comparison in ip_rule_del()
+ echo "$_pre $_table${_from:+ from }$_from" >>"$_f"
+ ) <"$_f"
+}
+
+ip_rule_del()
+{
+ ip_rule_common "$@"
+
+ _f="${FAKE_IP_STATE}/rules"
+ touch "$_f"
+ # ShellCheck doesn't understand this flock pattern
+ # shellcheck disable=SC2094
+ (
+ flock 0
+ _tmp="${_f}.new"
+ : >"$_tmp"
+ _found=false
+ while read -r _p _t _s; do
+ if ! $_found &&
+ [ "$_p" = "$_pre" ] && [ "$_t" = "$_table" ] &&
+ [ "$_s" = "${_from:+from }$_from" ]; then
+ # Found. Skip this one but not future ones.
+ _found=true
+ else
+ echo "$_p $_t $_s" >>"$_tmp"
+ fi
+ done
+ if cmp -s "$_tmp" "$_f"; then
+ # No changes, must not have found what we wanted to delete
+ echo "RTNETLINK answers: No such file or directory" >&2
+ rm -f "$_tmp"
+ exit 2
+ else
+ mv "$_tmp" "$_f"
+ fi
+ ) <"$_f" || exit $?
+}
+
+######################################################################
+
+ip_route()
+{
+ case "$1" in
+ show | list)
+ shift
+ ip_route_show "$@"
+ ;;
+ flush)
+ shift
+ ip_route_flush "$@"
+ ;;
+ add)
+ shift
+ ip_route_add "$@"
+ ;;
+ del*)
+ shift
+ ip_route_del "$@"
+ ;;
+ *) not_implemented "$1 in \"ip route\"" ;;
+ esac
+}
+
+ip_route_common()
+{
+ if [ "$1" = table ]; then
+ _table="$2"
+ shift 2
+ fi
+
+ ip_check_table "route"
+}
+
+# Routes are in a file per table in the directory
+# $FAKE_IP_STATE/routes. These routes just use the table ID
+# that is passed and don't do any lookup. This could be "improved" if
+# necessary.
+
+ip_route_show()
+{
+ ip_route_common "$@"
+
+ # Missing file is just an empty table
+ sort "$FAKE_IP_STATE/routes/${_table}" 2>/dev/null || true
+}
+
+ip_route_flush()
+{
+ ip_route_common "$@"
+
+ rm -f "$FAKE_IP_STATE/routes/${_table}"
+}
+
+ip_route_add()
+{
+ _prefix=""
+ _dev=""
+ _gw=""
+ _table=""
+ _metric=""
+
+ while [ -n "$1" ]; do
+ case "$1" in
+ *.*.*.*/* | *.*.*.*)
+ _prefix="$1"
+ shift 1
+ ;;
+ local)
+ _prefix="$2"
+ shift 2
+ ;;
+ dev)
+ _dev="$2"
+ shift 2
+ ;;
+ via)
+ _gw="$2"
+ shift 2
+ ;;
+ table)
+ _table="$2"
+ shift 2
+ ;;
+ metric)
+ _metric="$2"
+ shift 2
+ ;;
+ *) not_implemented "$1 in \"$orig_args\"" ;;
+ esac
+ done
+
+ ip_check_table "route"
+ [ -n "$_prefix" ] || not_implemented "ip route without inet prefix in \"$orig_args\""
+ # This can't be easily deduced, so print some garbage.
+ [ -n "$_dev" ] || _dev="ethXXX"
+
+ # Alias or add missing bits
+ case "$_prefix" in
+ 0.0.0.0/0) _prefix="default" ;;
+ */*) : ;;
+ *) _prefix="${_prefix}/32" ;;
+ esac
+
+ _f="$FAKE_IP_STATE/routes/${_table}"
+ mkdir -p "$FAKE_IP_STATE/routes"
+ touch "$_f"
+
+ # Check for duplicate
+ _prefix_regexp=$(echo "^${_prefix}" | sed -e 's@\.@\\.@g')
+ if [ -n "$_metric" ]; then
+ _prefix_regexp="${_prefix_regexp} .*metric ${_metric} "
+ fi
+ if grep -q "$_prefix_regexp" "$_f"; then
+ echo "RTNETLINK answers: File exists" >&2
+ exit 1
+ fi
+
+ (
+ flock 0
+
+ _out="${_prefix} "
+ [ -z "$_gw" ] || _out="${_out}via ${_gw} "
+ [ -z "$_dev" ] || _out="${_out}dev ${_dev} "
+ [ -n "$_gw" ] || _out="${_out} scope link "
+ [ -z "$_metric" ] || _out="${_out} metric ${_metric} "
+ echo "$_out" >>"$_f"
+ ) <"$_f"
+}
+
+ip_route_del()
+{
+ _prefix=""
+ _dev=""
+ _gw=""
+ _table=""
+ _metric=""
+
+ while [ -n "$1" ]; do
+ case "$1" in
+ *.*.*.*/* | *.*.*.*)
+ _prefix="$1"
+ shift 1
+ ;;
+ local)
+ _prefix="$2"
+ shift 2
+ ;;
+ dev)
+ _dev="$2"
+ shift 2
+ ;;
+ via)
+ _gw="$2"
+ shift 2
+ ;;
+ table)
+ _table="$2"
+ shift 2
+ ;;
+ metric)
+ _metric="$2"
+ shift 2
+ ;;
+ *) not_implemented "$1 in \"$orig_args\"" ;;
+ esac
+ done
+
+ ip_check_table "route"
+ [ -n "$_prefix" ] || not_implemented "ip route without inet prefix in \"$orig_args\""
+ # This can't be easily deduced, so print some garbage.
+ [ -n "$_dev" ] || _dev="ethXXX"
+
+ # Alias or add missing bits
+ case "$_prefix" in
+ 0.0.0.0/0) _prefix="default" ;;
+ */*) : ;;
+ *) _prefix="${_prefix}/32" ;;
+ esac
+
+ _f="$FAKE_IP_STATE/routes/${_table}"
+ mkdir -p "$FAKE_IP_STATE/routes"
+ touch "$_f"
+
+ # ShellCheck doesn't understand this flock pattern
+ # shellcheck disable=SC2094
+ (
+ flock 0
+
+ # Escape some dots
+ [ -z "$_gw" ] || _gw=$(echo "$_gw" | sed -e 's@\.@\\.@g')
+ _prefix=$(echo "$_prefix" | sed -e 's@\.@\\.@g' -e 's@/@\\/@')
+
+ _re="^${_prefix}\>.*"
+ [ -z "$_gw" ] || _re="${_re}\<via ${_gw}\>.*"
+ [ -z "$_dev" ] || _re="${_re}\<dev ${_dev}\>.*"
+ [ -z "$_metric" ] || _re="${_re}.*\<metric ${_metric}\>.*"
+ sed -i -e "/${_re}/d" "$_f"
+ ) <"$_f"
+}
+
+######################################################################
+
+orig_args="$*"
+
+brief=false
+case "$1" in
+-br*)
+ brief=true
+ shift
+ ;;
+esac
+
+case "$1" in
+link)
+ shift
+ ip_link "$@"
+ ;;
+addr*)
+ shift
+ ip_addr "$@"
+ ;;
+rule)
+ shift
+ ip_rule "$@"
+ ;;
+route)
+ shift
+ ip_route "$@"
+ ;;
+*) not_implemented "$1" ;;
+esac
+
+exit 0
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/ip6tables b/ctdb/tests/UNIT/eventscripts/stubs/ip6tables
new file mode 100755
index 0000000..2c65f7b
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/ip6tables
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+# Always succeed.
+
+exit 0
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/iptables b/ctdb/tests/UNIT/eventscripts/stubs/iptables
new file mode 100755
index 0000000..2c65f7b
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/iptables
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+# Always succeed.
+
+exit 0
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/ipvsadm b/ctdb/tests/UNIT/eventscripts/stubs/ipvsadm
new file mode 100755
index 0000000..31bdf2c
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/ipvsadm
@@ -0,0 +1,154 @@
+#!/bin/sh
+
+die()
+{
+ echo "$1" >&2
+ exit "${2:-1}"
+}
+
+[ -n "$FAKE_LVS_STATE_DIR" ] || die "FAKE_LVS_STATE_DIR not set"
+
+service_address=""
+scheduling_method="wlc"
+persistent_timeout=""
+real_server=""
+forwarding_method="Route"
+
+set_service_address()
+{
+ [ -z "$service_address" ] ||
+ die "multiple 'service-address' options specified" 2
+ case "$2" in
+ *:*) service_address="${1} ${2}" ;;
+ *) service_address="${1} ${2}:0" ;;
+ esac
+}
+
+set_real_server()
+{
+ [ -z "$real_server" ] ||
+ die "multiple 'real-server' options specified" 2
+ case "$1" in
+ *\]:*) real_server="${1}" ;;
+ *\]) real_server="${1}:0" ;;
+ *:*) real_server="${1}" ;;
+ *) real_server="${1}:0" ;;
+ esac
+
+ case "$real_server" in
+ 127.0.0.1:* | \[::1\]:*) forwarding_method="Local" ;;
+ esac
+}
+
+case "$1" in
+-A)
+ shift
+ while [ -n "$1" ]; do
+ case "$1" in
+ -t)
+ set_service_address "TCP" "$2"
+ shift 2
+ ;;
+ -u)
+ set_service_address "UDP" "$2"
+ shift 2
+ ;;
+ -s)
+ scheduling_method="$2"
+ shift 2
+ ;;
+ -p)
+ persistent_timeout="persistent $2"
+ shift 2
+ ;;
+ *) die "Unsupported -A option $1" ;;
+ esac
+ done
+ [ -n "$service_address" ] ||
+ die "You need to supply the 'service-address' option for the 'add-service' command" 2
+ d="${FAKE_LVS_STATE_DIR}/${service_address}"
+ mkdir "$d" 2>/dev/null || die "Service already exists" 255
+ t="${scheduling_method}${persistent_timeout:+ }${persistent_timeout}"
+ echo "$t" >"${d}/.info"
+ ;;
+-D)
+ shift
+ while [ -n "$1" ]; do
+ case "$1" in
+ -t)
+ set_service_address "TCP" "$2"
+ shift 2
+ ;;
+ -u)
+ set_service_address "UDP" "$2"
+ shift 2
+ ;;
+ *) die "Unsupported -D option $1" ;;
+ esac
+ done
+ [ -n "$service_address" ] ||
+ die "You need to supply the 'service-address' option for the 'delete-service' command" 2
+ d="${FAKE_LVS_STATE_DIR}/${service_address}"
+ rm -f "${d}/"*
+ rm -f "${d}/.info"
+ rmdir "$d" 2>/dev/null || die "No such service" 255
+ ;;
+-a)
+ shift
+ while [ -n "$1" ]; do
+ case "$1" in
+ -t)
+ set_service_address "TCP" "$2"
+ shift 2
+ ;;
+ -u)
+ set_service_address "UDP" "$2"
+ shift 2
+ ;;
+ -r)
+ set_real_server "$2"
+ shift 2
+ ;;
+ -g)
+ forwarding_method="Route"
+ shift 1
+ ;;
+ *) die "Unsupported -A option $1" ;;
+ esac
+ done
+ [ -n "$service_address" ] ||
+ die "You need to supply the 'service-address' option for the 'delete-service' command" 2
+ d="${FAKE_LVS_STATE_DIR}/${service_address}"
+ [ -d "$d" ] || die "Service not defined" 255
+ [ -n "$real_server" ] ||
+ die "You need to supply the 'real-server' option for the 'add-server' command" 2
+ f="${d}/${real_server}"
+ echo "$forwarding_method" >"$f"
+ ;;
+-l)
+ cat <<EOF
+IP Virtual Server version 1.2.1 (size=4096)
+Prot LocalAddress:Port Scheduler Flags
+ -> RemoteAddress:Port Forward Weight ActiveConn InActConn
+EOF
+ cd "$FAKE_LVS_STATE_DIR" || exit 0
+ (
+ for d in *; do
+ [ -d "$d" ] || continue
+ printf '%s ' "$d"
+ cat "${d}/.info"
+ for f in "${d}/"*; do
+ [ -f "$f" ] || continue
+ read -r forwarding_method <"$f"
+ printf " -> %-28s %-7s %-6s %-10s %-10s\n" \
+ "${f##*/}" "$forwarding_method" 1 0 0
+ done
+ done
+ )
+ ;;
+*)
+ die "Unknown option $1"
+ ;;
+esac
+
+exit 0
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/kill b/ctdb/tests/UNIT/eventscripts/stubs/kill
new file mode 100755
index 0000000..b69e3e6
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/kill
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+# Always succeed. This means that kill -0 will always find a
+# process and anything else will successfully kill. This should
+# exercise a good avriety of code paths.
+
+exit 0
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/killall b/ctdb/tests/UNIT/eventscripts/stubs/killall
new file mode 100755
index 0000000..1e182e1
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/killall
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+# Always succeed. This means that killall -0 will always find a
+# process and anything else will successfully kill. This should
+# exercise a good avriety of code paths.
+
+exit 0
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/multipath b/ctdb/tests/UNIT/eventscripts/stubs/multipath
new file mode 100755
index 0000000..319b734
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/multipath
@@ -0,0 +1,36 @@
+#!/bin/sh
+
+usage()
+{
+ die "usage: ${0} -ll device"
+}
+
+[ "$1" = "-ll" ] || usage
+shift
+[ $# -eq 1 ] || usage
+
+device="$1"
+
+if [ -n "$FAKE_MULTIPATH_HANG" ]; then
+ FAKE_SLEEP_REALLY="yes" sleep 999
+fi
+
+path1_state="active"
+path2_state="enabled"
+
+for i in $FAKE_MULTIPATH_FAILURES; do
+ if [ "$device" = "$i" ]; then
+ path1_state="inactive"
+ path2_state="inactive"
+ break
+ fi
+done
+
+cat <<EOF
+${device} (AUTO-01234567) dm-0 ,
+size=10G features='0' hwhandler='0' wp=rw
+|-+- policy='round-robin 0' prio=1 status=${path1_state}
+| \`- #:#:#:# vda 252:0 active ready running
+\`-+- policy='round-robin 0' prio=1 status=${path2_state}
+ \`- #:#:#:# vdb 252:16 active ready running
+EOF
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/net b/ctdb/tests/UNIT/eventscripts/stubs/net
new file mode 100755
index 0000000..3f96413
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/net
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+# Always succeed for now...
+
+exit 0
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/nfs-fake-callout b/ctdb/tests/UNIT/eventscripts/stubs/nfs-fake-callout
new file mode 100755
index 0000000..a4d43d0
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/nfs-fake-callout
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+case "$1" in
+register)
+ echo "ALL"
+ exit
+ ;;
+esac
+
+if [ "$NFS_FAKE_CALLOUT_MAGIC" = "$1" ]; then
+ echo "$1"
+ exit 1
+fi
+
+exit 0
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/nfsconf b/ctdb/tests/UNIT/eventscripts/stubs/nfsconf
new file mode 100755
index 0000000..84dd9ea
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/nfsconf
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+# This always fails for now, since there are no tests that expect to
+# use it.
+exit 1
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/pidof b/ctdb/tests/UNIT/eventscripts/stubs/pidof
new file mode 100755
index 0000000..6a25395
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/pidof
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+case "$1" in
+nfsd)
+ echo "$FAKE_NFSD_THREAD_PIDS"
+ ;;
+rpc.statd | rpc.rquotad | rpc.mountd)
+ echo "$FAKE_RPC_THREAD_PIDS"
+ ;;
+smbd)
+ echo "$FAKE_SMBD_THREAD_PIDS"
+ ;;
+*)
+ echo "pidof: \"$1\" not implemented"
+ exit 1
+ ;;
+esac
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/pkill b/ctdb/tests/UNIT/eventscripts/stubs/pkill
new file mode 100755
index 0000000..b3f1de5
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/pkill
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+# Always succeed. This means that pkill -0 will always find a
+# process and anything else will successfully kill. This should
+# exercise a good avriety of code paths.
+
+exit 0
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/ps b/ctdb/tests/UNIT/eventscripts/stubs/ps
new file mode 100755
index 0000000..0d33203
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/ps
@@ -0,0 +1,48 @@
+#!/bin/sh
+
+usage()
+{
+ echo "ps [ -p PID | -o FORMAT | aufxww ]"
+ exit 1
+}
+
+while getopts "o:p:h:?" opt; do
+ case "$opt" in
+ o) format="$OPTARG" ;;
+ p) pid="$OPTARG" ;;
+ \? | h) usage ;;
+ esac
+done
+shift $((OPTIND - 1))
+
+if [ -n "$pid" ] && [ -n "$FAKE_PS_MAP" ]; then
+ # shellcheck disable=SC1001
+ case "$format" in
+ comm\=)
+ echo "$FAKE_PS_MAP" |
+ awk -v pid="$pid" '$1 == pid { print $2 }'
+ ;;
+ state\=)
+ echo "$FAKE_PS_MAP" |
+ awk -v pid="$pid" '$1 == pid { print $3 }'
+ ;;
+ esac
+
+ exit
+fi
+
+if [ "$1" != "auxfww" ]; then
+ echo "option $1 not supported"
+ usage
+fi
+
+cat <<EOF
+USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
+root 2 0.0 0.0 0 0 ? S Aug28 0:00 [kthreadd]
+root 3 0.0 0.0 0 0 ? S Aug28 0:43 \_ [ksoftirqd/0]
+...
+root 1 0.0 0.0 2976 624 ? Ss Aug28 0:07 init [2]
+root 495 0.0 0.0 3888 1640 ? Ss Aug28 0:00 udevd --daemon
+...
+[MORE FAKE ps OUTPUT]
+EOF
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/rm b/ctdb/tests/UNIT/eventscripts/stubs/rm
new file mode 100755
index 0000000..6034d75
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/rm
@@ -0,0 +1,6 @@
+#!/bin/sh
+# Make statd-callout happy
+case "$*" in
+*/var/lib/nfs/statd/sm*) : ;;
+*) exec /bin/rm "$@" ;;
+esac
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/rpc.lockd b/ctdb/tests/UNIT/eventscripts/stubs/rpc.lockd
new file mode 100755
index 0000000..e71f6cd
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/rpc.lockd
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+# Restart always "works". However, the test infrastructure may
+# continue to mark the service as down, so that's what matters.
+
+exit 0
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/rpc.mountd b/ctdb/tests/UNIT/eventscripts/stubs/rpc.mountd
new file mode 100755
index 0000000..e71f6cd
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/rpc.mountd
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+# Restart always "works". However, the test infrastructure may
+# continue to mark the service as down, so that's what matters.
+
+exit 0
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/rpc.rquotad b/ctdb/tests/UNIT/eventscripts/stubs/rpc.rquotad
new file mode 100755
index 0000000..e71f6cd
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/rpc.rquotad
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+# Restart always "works". However, the test infrastructure may
+# continue to mark the service as down, so that's what matters.
+
+exit 0
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/rpc.statd b/ctdb/tests/UNIT/eventscripts/stubs/rpc.statd
new file mode 100755
index 0000000..e71f6cd
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/rpc.statd
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+# Restart always "works". However, the test infrastructure may
+# continue to mark the service as down, so that's what matters.
+
+exit 0
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/rpcinfo b/ctdb/tests/UNIT/eventscripts/stubs/rpcinfo
new file mode 100755
index 0000000..8732751
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/rpcinfo
@@ -0,0 +1,78 @@
+#!/bin/sh
+
+prog="rpcinfo"
+
+usage()
+{
+ cat >&2 <<EOF
+Usage: $prog -T tcp host program [version]
+
+A fake rpcinfo stub that succeeds for items in FAKE_RPCINFO_SERVICES,
+depending on command-line options.
+
+EOF
+ exit 1
+}
+
+parse_options()
+{
+ while getopts "T:h?" opt; do
+ case "$opt" in
+ T) netid="$OPTARG" ;;
+ \? | h) usage ;;
+ esac
+ done
+ shift $((OPTIND - 1))
+
+ [ "$netid" = "tcp" ] || usage
+
+ host="$1"
+ shift
+ [ "$host" = "localhost" ] || [ "$host" = "127.0.0.1" ] || usage
+
+ if [ $# -lt 1 ] || [ $# -gt 2 ]; then
+ usage
+ fi
+
+ p="$1"
+ v="$2"
+}
+
+parse_options "$@"
+
+for i in ${FAKE_RPCINFO_SERVICES}; do
+ # This is stupidly cumulative, but needs to happen after the
+ # initial split of the list above.
+ IFS="${IFS}:"
+ # Want glob expansion
+ # shellcheck disable=SC2086
+ set -- $i
+ # $1 = program, $2 = low version, $3 = high version
+
+ if [ "$1" = "$p" ]; then
+ if [ -n "$v" ]; then
+ if [ "$2" -le "$v" ] && [ "$v" -le "$3" ]; then
+ echo "program ${p} version ${v} ready and waiting"
+ exit 0
+ else
+ echo "rpcinfo: RPC: Program/version mismatch; low version = ${2}, high version = ${3}" >&2
+ echo "program ${p} version ${v} is not available"
+ exit 1
+ fi
+ else
+ for j in $(seq "$2" "$3"); do
+ echo "program ${p} version ${j} ready and waiting"
+ done
+ exit 0
+ fi
+ fi
+done
+
+echo "rpcinfo: RPC: Program not registered" >&2
+if [ -n "$v" ]; then
+ echo "program ${p} version ${v} is not available"
+else
+ echo "program ${p} is not available"
+fi
+
+exit 1
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/service b/ctdb/tests/UNIT/eventscripts/stubs/service
new file mode 100755
index 0000000..d706280
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/service
@@ -0,0 +1,65 @@
+#!/bin/sh
+
+service_status_dir="${CTDB_TEST_TMP_DIR}/service_fake_status"
+mkdir -p "$service_status_dir"
+
+service="$1"
+flag="${service_status_dir}/${service}"
+
+start()
+{
+ if [ -f "$flag" ]; then
+ echo "service: can't start ${service} - already running"
+ exit 1
+ else
+ touch "$flag"
+ echo "Starting ${service}: OK"
+ fi
+}
+
+stop()
+{
+ if [ -f "$flag" ]; then
+ echo "Stopping ${service}: OK"
+ rm -f "$flag"
+ else
+ echo "service: can't stop ${service} - not running"
+ exit 1
+ fi
+}
+
+case "$2" in
+start)
+ start
+ ;;
+stop)
+ stop
+ ;;
+restart | reload)
+ stop
+ start
+ ;;
+status)
+ if [ -f "$flag" ]; then
+ echo "$service running"
+ exit 0
+ else
+ echo "$service not running"
+ exit 3
+ fi
+ ;;
+force-started)
+ # For test setup...
+ touch "$flag"
+ ;;
+force-stopped)
+ # For test setup...
+ rm -f "$flag"
+ ;;
+*)
+ echo "service $service $2 not supported"
+ exit 1
+ ;;
+esac
+
+exit 0
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/sleep b/ctdb/tests/UNIT/eventscripts/stubs/sleep
new file mode 100755
index 0000000..0d0e82b
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/sleep
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+if [ "$FAKE_SLEEP_REALLY" = "yes" ]; then
+ /bin/sleep "$@"
+elif [ -n "$FAKE_SLEEP_FORCE" ]; then
+ /bin/sleep "$FAKE_SLEEP_FORCE"
+else
+ :
+fi
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/smnotify b/ctdb/tests/UNIT/eventscripts/stubs/smnotify
new file mode 100755
index 0000000..5606b3d
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/smnotify
@@ -0,0 +1,65 @@
+#!/bin/sh
+
+usage()
+{
+ _prog="${0##*/}" # basename
+ cat <<EOF
+Usage: ${_prog} --client=CLIENT --ip=IP --server=SERVER --stateval=STATEVAL
+EOF
+ exit 1
+}
+
+cip=""
+sip=""
+mon_name=""
+state=""
+
+while [ $# -gt 0 ]; do
+ case "$1" in
+ --client)
+ cip="$2"
+ shift 2
+ ;;
+ --client=*)
+ cip="${1#*=}"
+ shift
+ ;;
+ --ip)
+ sip="$2"
+ shift 2
+ ;;
+ --ip=*)
+ sip="${1#*=}"
+ shift
+ ;;
+ --server)
+ mon_name="$2"
+ shift 2
+ ;;
+ --server=*)
+ mon_name="${1#*=}"
+ shift
+ ;;
+ --stateval)
+ state="$2"
+ shift 2
+ ;;
+ --stateval=*)
+ state="${1#*=}"
+ shift
+ ;;
+ --)
+ shift
+ break
+ ;;
+ -*) usage ;;
+ *) break ;;
+ esac
+done
+[ $# -eq 0 ] || usage
+
+if [ -z "$cip" ] || [ -z "$sip" ] || [ -z "$mon_name" ] || [ -z "$state" ]; then
+ usage
+fi
+
+echo "SM_NOTIFY: ${sip} -> ${cip}, MON_NAME=${mon_name}, STATE=${state}"
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/ss b/ctdb/tests/UNIT/eventscripts/stubs/ss
new file mode 100755
index 0000000..c1199fe
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/ss
@@ -0,0 +1,206 @@
+#!/bin/sh
+
+prog="ss"
+
+usage()
+{
+ cat >&2 <<EOF
+Usage: $prog { -t|--tcp | -x|--unix } [options] [ FILTER ]
+
+A fake ss stub that prints items depending on the variables
+FAKE_NETSTAT_TCP_ESTABLISHED, FAKE_TCP_LISTEN,
+FAKE_NETSTAT_UNIX_LISTEN, depending on command-line options.
+
+Note that -n is ignored.
+
+EOF
+ exit 1
+}
+
+not_supported()
+{
+ echo "Options not supported in stub: $*" >&2
+ usage
+}
+
+############################################################
+
+#
+parse_filter()
+{
+ # Very limited implementation:
+ # We only expect to find || inside parentheses
+ # We don't expect to see && - it is implied by juxtaposition
+ # Operator for port comparison is ignored and assumed to be ==
+
+ # Build lists of source ports and source IP addresses where
+ # each entry is surrounded by '|' characters. These lists can
+ # be easily "searched" using the POSIX prefix and suffix
+ # removal operators.
+ in_parens=false
+ sports="|"
+ srcs="|"
+
+ while [ -n "$1" ]; do
+ case "$1" in
+ \()
+ in_parens=true
+ shift
+ ;;
+ \))
+ in_parens=false
+ shift
+ ;;
+ \|\|)
+ if ! $in_parens; then
+ not_supported "|| in parentheses"
+ fi
+ shift
+ ;;
+ sport)
+ p="${3#:}"
+ sports="${sports}${p}|"
+ shift 3
+ ;;
+ src)
+ ip="${2#\[}"
+ ip="${ip%\]}"
+ srcs="${srcs}${ip}|"
+ shift 2
+ ;;
+ *)
+ usage
+ ;;
+ esac
+ done
+}
+
+# Check if socket has matches in both ok_ips and ok_ports
+filter_socket()
+{
+ ok_ips="$1"
+ ok_ports="$2"
+ socket="$3"
+
+ ip="${socket%:*}"
+ port="${socket##*:}"
+
+ if [ "$ok_ports" != "|" ] &&
+ [ "${ok_ports#*|"${port}"|}" = "$ok_ports" ]; then
+ return 1
+ fi
+ if [ "$ok_ips" != "|" ] && [ "${ok_ips#*|"${ip}"|}" = "$ok_ips" ]; then
+ return 1
+ fi
+
+ return 0
+}
+
+ss_tcp_established()
+{
+ if $header; then
+ echo "Recv-Q Send-Q Local Address:Port Peer Address:Port"
+ fi
+
+ # Yes, lose the quoting so we can do a hacky parsing job
+ # shellcheck disable=SC2048,SC2086
+ parse_filter $*
+
+ for i in $FAKE_NETSTAT_TCP_ESTABLISHED; do
+ src="${i%|*}"
+ dst="${i#*|}"
+ if filter_socket "$srcs" "$sports" "$src"; then
+ echo 0 0 "$src" "$dst"
+ fi
+ done
+
+ if [ -z "$FAKE_NETSTAT_TCP_ESTABLISHED_FILE" ]; then
+ return
+ fi
+ while read -r src dst; do
+ if filter_socket "$srcs" "$sports" "$src"; then
+ echo 0 0 "$src" "$dst"
+ fi
+ done <"$FAKE_NETSTAT_TCP_ESTABLISHED_FILE"
+}
+
+############################################################
+
+unix_listen()
+{
+ if $header; then
+ cat <<EOF
+Netid State Recv-Q Send-Q Local Address:Port Peer Address:Port"
+EOF
+ fi
+
+ # Yes, lose the quoting so we can do a hacky parsing job
+ # shellcheck disable=SC2048,SC2086
+ parse_filter $*
+
+ _n=12345
+ for _s in $FAKE_NETSTAT_UNIX_LISTEN; do
+ # ss matches Unix domain sockets as either src or
+ # sport.
+ if filter_socket "$srcs" "$sports" "${_s}:" ||
+ filter_socket "$srcs" "$sports" ":${_s}"; then
+ printf "u_str LISTEN 0 128 %s %d * 0\n" "$_s" "$_n"
+ _n=$((_n + 1))
+ fi
+ done
+}
+
+############################################################
+
+# Defaults.
+tcp=false
+unix=false
+all=false
+listen=false
+header=true
+
+orig="$*"
+
+while getopts "txnalHh?" opt; do
+ case "$opt" in
+ t) tcp=true ;;
+ x) unix=true ;;
+ l) listen=true ;;
+ a) all=true ;;
+ H) header=false ;;
+ n) : ;;
+ \? | h) usage ;;
+ esac
+done
+shift $((OPTIND - 1))
+
+$tcp || $unix || not_supported "$*"
+if [ -z "$all" ]; then
+ nosupported "$*"
+fi
+
+if $tcp; then
+ if [ "$1" != "state" ] || [ "$2" != "established" ] || $listen; then
+ usage
+ fi
+
+ shift 2
+
+ # Yes, lose the quoting so we can do a hacky parsing job
+ # shellcheck disable=SC2048,SC2086
+ ss_tcp_established $*
+
+ exit
+fi
+
+if $unix; then
+ if ! $listen; then
+ not_supported "$orig"
+ fi
+
+ # Yes, lose the quoting so we can do a hacky parsing job
+ # shellcheck disable=SC2048,SC2086
+ unix_listen $*
+
+ exit
+fi
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/stat b/ctdb/tests/UNIT/eventscripts/stubs/stat
new file mode 100755
index 0000000..840265f
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/stat
@@ -0,0 +1,71 @@
+#!/bin/sh
+
+usage()
+{
+ echo "stat -c FMT FILE ..."
+ exit 1
+}
+
+format=""
+
+while getopts "c:h:?" opt; do
+ case "$opt" in
+ c) format="$OPTARG" ;;
+ \? | h) usage ;;
+ esac
+done
+shift $((OPTIND - 1))
+
+fake_device_id()
+{
+ _path="$1"
+
+ _t=$(echo "$FAKE_FILE_ID_MAP" |
+ awk -v path="${_path}" '$1 == path { print $2 }')
+ _major_minor="${_t%:*}"
+ _major="0x${_major_minor%:*}"
+ _minor="0x${_major_minor#*:}"
+ _device_id=$((_major * 256 + _minor))
+ echo "$_device_id"
+}
+
+fake_inode()
+{
+ _path="$1"
+
+ _t=$(echo "$FAKE_FILE_ID_MAP" |
+ awk -v path="${_path}" '$1 == path { print $2 }')
+ echo "${_t##*:}"
+}
+
+if [ -n "$format" ]; then
+ for f; do
+ if [ ! -e "$f" ]; then
+ continue
+ fi
+ case "$f" in
+ /*) path="$f" ;;
+ *) path="${PWD}/${f}" ;;
+ esac
+
+ case "$format" in
+ "s#[0-9a-f]*:[0-9a-f]*:%i #%n #")
+ inode=$(fake_inode "$path")
+ echo "s#[0-9a-f]*:[0-9a-f]*:${inode} #${f} #"
+ ;;
+ "%d:%i")
+ device_id=$(fake_device_id "$path")
+ inode=$(fake_inode "$path")
+ echo "${device_id}:${inode}"
+ ;;
+ *)
+ echo "Unsupported format \"${format}\""
+ usage
+ ;;
+ esac
+ done
+
+ exit
+fi
+
+usage
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/tdb_mutex_check b/ctdb/tests/UNIT/eventscripts/stubs/tdb_mutex_check
new file mode 100755
index 0000000..6cc7572
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/tdb_mutex_check
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+if [ -z "$FAKE_TDB_MUTEX_CHECK" ]; then
+ exit
+fi
+
+echo "$FAKE_TDB_MUTEX_CHECK" |
+ while read -r pid chain; do
+ echo "[${chain}] pid=${pid}"
+ done
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/tdbdump b/ctdb/tests/UNIT/eventscripts/stubs/tdbdump
new file mode 100755
index 0000000..92dcb8e
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/tdbdump
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+if [ "$FAKE_TDB_IS_OK" = "yes" ]; then
+ echo "TDB good"
+ exit 0
+else
+ echo "TDB busted"
+ exit 1
+fi
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/tdbtool b/ctdb/tests/UNIT/eventscripts/stubs/tdbtool
new file mode 100755
index 0000000..df83160
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/tdbtool
@@ -0,0 +1,36 @@
+#!/bin/sh
+
+do_help()
+{
+ if [ "$FAKE_TDBTOOL_SUPPORTS_CHECK" = "yes" ]; then
+ echo "check"
+ fi
+ exit 0
+}
+
+do_check()
+{
+ if [ "$FAKE_TDB_IS_OK" = "yes" ]; then
+ echo "Database integrity is OK"
+ else
+ echo "Database is busted"
+ fi
+ exit 0
+}
+
+do_cmd()
+{
+ case "$*" in
+ *check) do_check ;;
+ help) do_help ;;
+ "") read -r tdb_cmd && [ -n "$tdb_cmd" ] && do_cmd "$tdb_cmd" ;;
+ *)
+ echo "$0: Not implemented: $*"
+ exit 1
+ ;;
+ esac
+}
+
+do_cmd "$@"
+
+exit 0
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/testparm b/ctdb/tests/UNIT/eventscripts/stubs/testparm
new file mode 100755
index 0000000..3a97e91
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/testparm
@@ -0,0 +1,84 @@
+#!/bin/sh
+
+not_implemented()
+{
+ echo "testparm: option \"$1\" not implemented in stub" >&2
+ exit 2
+}
+
+error()
+{
+ cat >&2 <<EOF
+Load smb config files from ${CTDB_SYS_ETCDIR}/samba/smb.conf
+rlimit_max: increasing rlimit_max (2048) to minimum Windows limit (16384)
+EOF
+
+ for i in $FAKE_SHARES; do
+ bi=$(basename "$i")
+ echo "Processing section \"[${bi}]\""
+ done >&2
+
+ cat >&2 <<EOF
+Loaded services file OK.
+WARNING: 'workgroup' and 'netbios name' must differ.
+
+EOF
+
+ exit 1
+}
+
+timeout()
+{
+ echo "$0: INTERNAL ERROR - timeout stub should avoid this" >&2
+}
+
+if [ -n "$FAKE_TESTPARM_FAIL" ]; then
+ error
+fi
+
+if [ -n "$FAKE_TIMEOUT" ]; then
+ timeout
+fi
+
+# Ensure that testparm always uses our canned configuration instead of
+# the global one, unless some other file is specified.
+
+file=""
+param=""
+for i; do
+ case "$i" in
+ --parameter-name=*) param="${i#--parameter-name=}" ;;
+ -*) : ;;
+ *) file="$i" ;;
+ esac
+done
+
+# Parse out parameter request
+if [ -n "$param" ]; then
+ sed -n \
+ -e "s|^[[:space:]]*${param}[[:space:]]*=[[:space:]]\(..*\)|\1|p" \
+ "${file:-"${CTDB_SYS_ETCDIR}/samba/smb.conf"}"
+ exit 0
+fi
+
+if [ -n "$file" ]; then
+ # This should include the shares, since this is used when the
+ # samba eventscript caches the output.
+ cat "$file"
+else
+ # We force our own smb.conf and add the shares.
+ cat "${CTDB_SYS_ETCDIR}/samba/smb.conf"
+
+ for i in $FAKE_SHARES; do
+ bi=$(basename "$i")
+ cat <<EOF
+
+[${bi}]
+ path = $i
+ comment = fake share $bi
+ guest ok = no
+ read only = no
+ browsable = yes
+EOF
+ done
+fi
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/timeout b/ctdb/tests/UNIT/eventscripts/stubs/timeout
new file mode 100755
index 0000000..26132ee
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/timeout
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+if [ -n "$FAKE_TIMEOUT" ]; then
+ exit 124
+else
+ shift 1
+ exec "$@"
+fi
diff --git a/ctdb/tests/UNIT/eventscripts/stubs/wbinfo b/ctdb/tests/UNIT/eventscripts/stubs/wbinfo
new file mode 100755
index 0000000..b4bd9f2
--- /dev/null
+++ b/ctdb/tests/UNIT/eventscripts/stubs/wbinfo
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+if [ "$FAKE_WBINFO_FAIL" = "yes" ]; then
+ exit 1
+fi
+
+exit 0
diff --git a/ctdb/tests/UNIT/onnode/0001.sh b/ctdb/tests/UNIT/onnode/0001.sh
new file mode 100755
index 0000000..2853374
--- /dev/null
+++ b/ctdb/tests/UNIT/onnode/0001.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+cmd="$ONNODE all hostname"
+
+define_test "$cmd" "all nodes OK"
+
+required_result <<EOF
+
+>> NODE: 192.168.1.101 <<
+-n 192.168.1.101 hostname
+
+>> NODE: 192.168.1.102 <<
+-n 192.168.1.102 hostname
+
+>> NODE: 192.168.1.103 <<
+-n 192.168.1.103 hostname
+
+>> NODE: 192.168.1.104 <<
+-n 192.168.1.104 hostname
+EOF
+
+simple_test $cmd
diff --git a/ctdb/tests/UNIT/onnode/0002.sh b/ctdb/tests/UNIT/onnode/0002.sh
new file mode 100755
index 0000000..c3c8c77
--- /dev/null
+++ b/ctdb/tests/UNIT/onnode/0002.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+cmd="$ONNODE -q all hostname"
+
+define_test "$cmd" "all nodes OK"
+
+required_result <<EOF
+-n 192.168.1.101 hostname
+-n 192.168.1.102 hostname
+-n 192.168.1.103 hostname
+-n 192.168.1.104 hostname
+EOF
+
+simple_test $cmd
diff --git a/ctdb/tests/UNIT/onnode/0003.sh b/ctdb/tests/UNIT/onnode/0003.sh
new file mode 100755
index 0000000..d79bca2
--- /dev/null
+++ b/ctdb/tests/UNIT/onnode/0003.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+cmd="$ONNODE -p all hostname"
+
+define_test "$cmd" "all nodes OK"
+
+required_result <<EOF
+[192.168.1.101] -n 192.168.1.101 hostname
+[192.168.1.102] -n 192.168.1.102 hostname
+[192.168.1.103] -n 192.168.1.103 hostname
+[192.168.1.104] -n 192.168.1.104 hostname
+EOF
+
+simple_test -s $cmd
diff --git a/ctdb/tests/UNIT/onnode/0004.sh b/ctdb/tests/UNIT/onnode/0004.sh
new file mode 100755
index 0000000..d0986b2
--- /dev/null
+++ b/ctdb/tests/UNIT/onnode/0004.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+cmd="$ONNODE -pq all hostname"
+
+define_test "$cmd" "all nodes OK"
+
+required_result <<EOF
+-n 192.168.1.101 hostname
+-n 192.168.1.102 hostname
+-n 192.168.1.103 hostname
+-n 192.168.1.104 hostname
+EOF
+
+simple_test -s $cmd
diff --git a/ctdb/tests/UNIT/onnode/0005.sh b/ctdb/tests/UNIT/onnode/0005.sh
new file mode 100755
index 0000000..0eccbb0
--- /dev/null
+++ b/ctdb/tests/UNIT/onnode/0005.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+cmd="$ONNODE 3 hostname"
+
+define_test "$cmd" "all nodes OK"
+
+required_result <<EOF
+-n 192.168.1.104 hostname
+EOF
+
+simple_test $cmd
diff --git a/ctdb/tests/UNIT/onnode/0006.sh b/ctdb/tests/UNIT/onnode/0006.sh
new file mode 100755
index 0000000..b027850
--- /dev/null
+++ b/ctdb/tests/UNIT/onnode/0006.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+cmd="$ONNODE -v 3 hostname"
+
+define_test "$cmd" "all nodes OK"
+
+required_result <<EOF
+
+>> NODE: 192.168.1.104 <<
+-n 192.168.1.104 hostname
+EOF
+
+simple_test $cmd
diff --git a/ctdb/tests/UNIT/onnode/0010.sh b/ctdb/tests/UNIT/onnode/0010.sh
new file mode 100755
index 0000000..241cf58
--- /dev/null
+++ b/ctdb/tests/UNIT/onnode/0010.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+cmd="$ONNODE 4 hostname"
+
+define_test "$cmd" "invalid pnn 4"
+
+required_result 1 <<EOF
+onnode: "node 4" does not exist
+EOF
+
+simple_test $cmd
diff --git a/ctdb/tests/UNIT/onnode/0011.sh b/ctdb/tests/UNIT/onnode/0011.sh
new file mode 100755
index 0000000..4604533
--- /dev/null
+++ b/ctdb/tests/UNIT/onnode/0011.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+cmd="$ONNODE 99 hostname"
+
+define_test "$cmd" "invalid pnn 99"
+
+required_result 1 <<EOF
+onnode: "node 99" does not exist
+EOF
+
+simple_test $cmd
diff --git a/ctdb/tests/UNIT/onnode/0070.sh b/ctdb/tests/UNIT/onnode/0070.sh
new file mode 100755
index 0000000..d649f82
--- /dev/null
+++ b/ctdb/tests/UNIT/onnode/0070.sh
@@ -0,0 +1,32 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+cmd="$ONNODE ok hostname"
+
+define_test "$cmd" "all nodes OK"
+
+ctdb_set_output <<EOF
+|Node|IP|Disconnected|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode|
+|0|192.168.1.101|0|0|0|0|0|0|0|Y|
+|1|192.168.1.102|0|0|0|0|0|0|0|N|
+|2|192.168.1.103|0|0|0|0|0|0|0|N|
+|3|192.168.1.104|0|0|0|0|0|0|0|N|
+EOF
+
+required_result <<EOF
+
+>> NODE: 192.168.1.101 <<
+-n 192.168.1.101 hostname
+
+>> NODE: 192.168.1.102 <<
+-n 192.168.1.102 hostname
+
+>> NODE: 192.168.1.103 <<
+-n 192.168.1.103 hostname
+
+>> NODE: 192.168.1.104 <<
+-n 192.168.1.104 hostname
+EOF
+
+simple_test $cmd
diff --git a/ctdb/tests/UNIT/onnode/0071.sh b/ctdb/tests/UNIT/onnode/0071.sh
new file mode 100755
index 0000000..4f945ac
--- /dev/null
+++ b/ctdb/tests/UNIT/onnode/0071.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+cmd="$ONNODE ok hostname"
+
+define_test "$cmd" "2nd node disconnected"
+
+ctdb_set_output <<EOF
+|Node|IP|Disconnected|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode|
+|0|192.168.1.101|0|0|0|0|0|0|0|Y|
+|1|192.168.1.102|1|0|0|0|0|0|0|N|
+|2|192.168.1.103|0|0|0|0|0|0|0|N|
+|3|192.168.1.104|0|0|0|0|0|0|0|N|
+EOF
+
+required_result <<EOF
+
+>> NODE: 192.168.1.101 <<
+-n 192.168.1.101 hostname
+
+>> NODE: 192.168.1.103 <<
+-n 192.168.1.103 hostname
+
+>> NODE: 192.168.1.104 <<
+-n 192.168.1.104 hostname
+EOF
+
+simple_test $cmd
diff --git a/ctdb/tests/UNIT/onnode/0072.sh b/ctdb/tests/UNIT/onnode/0072.sh
new file mode 100755
index 0000000..51a4c46
--- /dev/null
+++ b/ctdb/tests/UNIT/onnode/0072.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+cmd="$ONNODE ok hostname"
+
+define_test "$cmd" "2nd node disconnected, extra status columns"
+
+ctdb_set_output <<EOF
+|Node|IP|Disconnected|Banned|Disabled|Unhealthy|Stopped|Inactive|X1|X2|X3|X4|
+|0|192.168.1.101|0|0|0|0|0|0|0|0|0|0|
+|1|192.168.1.102|1|0|0|0|0|0|0|0|0|0|
+|2|192.168.1.103|0|0|0|0|0|0|0|0|0|0|
+|3|192.168.1.104|0|0|0|0|0|0|0|0|0|0|
+EOF
+
+required_result <<EOF
+
+>> NODE: 192.168.1.101 <<
+-n 192.168.1.101 hostname
+
+>> NODE: 192.168.1.103 <<
+-n 192.168.1.103 hostname
+
+>> NODE: 192.168.1.104 <<
+-n 192.168.1.104 hostname
+EOF
+
+simple_test $cmd
diff --git a/ctdb/tests/UNIT/onnode/0075.sh b/ctdb/tests/UNIT/onnode/0075.sh
new file mode 100755
index 0000000..92fe220
--- /dev/null
+++ b/ctdb/tests/UNIT/onnode/0075.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+cmd="$ONNODE con hostname"
+
+define_test "$cmd" "1st node disconnected"
+
+ctdb_set_output <<EOF
+|Node|IP|Disconnected|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode|
+|0|192.168.1.101|1|0|0|0|0|0|0|N|
+|1|192.168.1.102|0|0|0|0|0|0|0|Y|
+|2|192.168.1.103|0|0|0|0|0|0|0|N|
+|3|192.168.1.104|0|0|0|0|0|0|0|N|
+EOF
+
+required_result <<EOF
+
+>> NODE: 192.168.1.102 <<
+-n 192.168.1.102 hostname
+
+>> NODE: 192.168.1.103 <<
+-n 192.168.1.103 hostname
+
+>> NODE: 192.168.1.104 <<
+-n 192.168.1.104 hostname
+EOF
+
+simple_test $cmd
diff --git a/ctdb/tests/UNIT/onnode/etc-ctdb/nodes b/ctdb/tests/UNIT/onnode/etc-ctdb/nodes
new file mode 100644
index 0000000..e2fe268
--- /dev/null
+++ b/ctdb/tests/UNIT/onnode/etc-ctdb/nodes
@@ -0,0 +1,4 @@
+192.168.1.101
+192.168.1.102
+192.168.1.103
+192.168.1.104
diff --git a/ctdb/tests/UNIT/onnode/scripts/local.sh b/ctdb/tests/UNIT/onnode/scripts/local.sh
new file mode 100644
index 0000000..5b830c8
--- /dev/null
+++ b/ctdb/tests/UNIT/onnode/scripts/local.sh
@@ -0,0 +1,64 @@
+# Hey Emacs, this is a -*- shell-script -*- !!! :-)
+
+# Default to just "onnode".
+: ${ONNODE:=onnode}
+
+# Augment PATH with relevant stubs/ directory
+stubs_dir="${CTDB_TEST_SUITE_DIR}/stubs"
+[ -d "${stubs_dir}" ] || die "Failed to locate stubs/ subdirectory"
+PATH="${stubs_dir}:${PATH}"
+
+setup_ctdb_base "$CTDB_TEST_TMP_DIR" "etc-ctdb" \
+ functions
+
+define_test ()
+{
+ _f=$(basename "$0")
+
+ echo "$_f $1 - $2"
+}
+
+# Set output for ctdb command. Option 1st argument is return code.
+ctdb_set_output ()
+{
+ _out="${CTDB_TEST_TMP_DIR}/ctdb.out"
+ cat >"$_out"
+
+ _rc="${CTDB_TEST_TMP_DIR}/ctdb.rc"
+ echo "${1:-0}" >"$_rc"
+
+ test_cleanup "rm -f $_out $_rc"
+}
+
+extra_footer ()
+{
+ cat <<EOF
+--------------------------------------------------
+CTDB_BASE="$CTDB_BASE"
+ctdb client is $(which ctdb)
+--------------------------------------------------
+EOF
+}
+
+simple_test ()
+{
+ _sort="cat"
+ if [ "$1" = "-s" ] ; then
+ shift
+ _sort="sort"
+ fi
+
+ if $CTDB_TEST_COMMAND_TRACE ; then
+ _onnode=$(which "$1") ; shift
+ _out=$(bash -x "$_onnode" "$@" 2>&1)
+ else
+ _out=$("$@" 2>&1)
+ fi
+ _rc=$?
+ _out=$(echo "$_out" | $_sort )
+
+ # Get the return code back into $?
+ (exit $_rc)
+
+ result_check
+}
diff --git a/ctdb/tests/UNIT/onnode/stubs/ctdb b/ctdb/tests/UNIT/onnode/stubs/ctdb
new file mode 100755
index 0000000..cca34c5
--- /dev/null
+++ b/ctdb/tests/UNIT/onnode/stubs/ctdb
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+# Fake ctdb client for onnode tests.
+
+out="${CTDB_TEST_TMP_DIR}/ctdb.out"
+if [ -r "$out" ] ; then
+ cat "$out"
+
+ rc="${CTDB_TEST_TMP_DIR}/ctdb.rc"
+ if [ -r "$rc" ] ; then
+ exit $(cat "$rc")
+ fi
+
+ exit 0
+fi
+
+echo "fake ctdb: no implementation for \"$*\""
+
+exit 1
diff --git a/ctdb/tests/UNIT/onnode/stubs/ssh b/ctdb/tests/UNIT/onnode/stubs/ssh
new file mode 100755
index 0000000..7be778f
--- /dev/null
+++ b/ctdb/tests/UNIT/onnode/stubs/ssh
@@ -0,0 +1,2 @@
+#!/bin/sh
+echo "$*"
diff --git a/ctdb/tests/UNIT/shellcheck/base_scripts.sh b/ctdb/tests/UNIT/shellcheck/base_scripts.sh
new file mode 100755
index 0000000..cbb8502
--- /dev/null
+++ b/ctdb/tests/UNIT/shellcheck/base_scripts.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "base scripts"
+
+shellcheck_test \
+ "${CTDB_SCRIPTS_BASE}/ctdb-crash-cleanup.sh" \
+ "${CTDB_SCRIPTS_BASE}/debug-hung-script.sh" \
+ "${CTDB_SCRIPTS_BASE}/debug_locks.sh" \
+ "${CTDB_SCRIPTS_BASE}/nfs-linux-kernel-callout" \
+ "${CTDB_SCRIPTS_BASE}/statd-callout"
diff --git a/ctdb/tests/UNIT/shellcheck/ctdb_helpers.sh b/ctdb/tests/UNIT/shellcheck/ctdb_helpers.sh
new file mode 100755
index 0000000..f6c7e31
--- /dev/null
+++ b/ctdb/tests/UNIT/shellcheck/ctdb_helpers.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "ctdb helpers"
+
+shellcheck_test \
+ "${CTDB_SCRIPTS_TOOLS_HELPER_DIR}/ctdb_lvs" \
+ "${CTDB_SCRIPTS_TOOLS_HELPER_DIR}/ctdb_natgw"
diff --git a/ctdb/tests/UNIT/shellcheck/event_scripts.sh b/ctdb/tests/UNIT/shellcheck/event_scripts.sh
new file mode 100755
index 0000000..dfb5ede
--- /dev/null
+++ b/ctdb/tests/UNIT/shellcheck/event_scripts.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "event scripts"
+
+shellcheck_test "${CTDB_SCRIPTS_DATA_DIR}/events/"*/[0-9][0-9].*
diff --git a/ctdb/tests/UNIT/shellcheck/functions.sh b/ctdb/tests/UNIT/shellcheck/functions.sh
new file mode 100755
index 0000000..7ce206d
--- /dev/null
+++ b/ctdb/tests/UNIT/shellcheck/functions.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "functions file"
+
+shellcheck_test -s sh "${CTDB_SCRIPTS_BASE}/functions"
diff --git a/ctdb/tests/UNIT/shellcheck/init_script.sh b/ctdb/tests/UNIT/shellcheck/init_script.sh
new file mode 100755
index 0000000..1e1d54c
--- /dev/null
+++ b/ctdb/tests/UNIT/shellcheck/init_script.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "init script"
+
+script="$CTDB_SCRIPTS_INIT_SCRIPT"
+
+if [ -z "$script" ] ; then
+ script="/etc/init.d/ctdb"
+ if [ ! -r "$script" ] ; then
+ script="/usr/local/etc/init.d/ctdb"
+ fi
+ if [ ! -r "$script" ] ; then
+ ctdb_test_skip "Unable to find ctdb init script"
+ fi
+fi
+
+shellcheck_test "$script"
diff --git a/ctdb/tests/UNIT/shellcheck/scripts/local.sh b/ctdb/tests/UNIT/shellcheck/scripts/local.sh
new file mode 100644
index 0000000..07e72c3
--- /dev/null
+++ b/ctdb/tests/UNIT/shellcheck/scripts/local.sh
@@ -0,0 +1,33 @@
+# Hey Emacs, this is a -*- shell-script -*- !!! :-)
+
+. "${TEST_SCRIPTS_DIR}/script_install_paths.sh"
+
+define_test ()
+{
+ _f=$(basename "$0" ".sh")
+
+ printf "%-28s - %s\n" "$_f" "$1"
+}
+shellcheck_test ()
+{
+ ok_null
+ if type shellcheck >/dev/null 2>&1 ; then
+ # Skip some recent checks:
+ #
+ # SC1090: Can't follow non-constant source. Use a
+ # directive to specify location.
+ # SC1091: Not following: FILE was not specified as
+ # input (see shellcheck -x).
+ # - Shellcheck doesn't handle our includes
+ # very well. Adding directives to handle
+ # include for both in-tree and installed
+ # cases just isn't going to be possible.
+ # SC2162: read without -r will mangle backslashes.
+ # - We never read things with backslashes,
+ # unnecessary churn.
+ _excludes="SC1090,SC1091,SC2162"
+ unit_test shellcheck --exclude="$_excludes" "$@"
+ else
+ ctdb_test_skip "shellcheck not installed"
+ fi
+}
diff --git a/ctdb/tests/UNIT/shellcheck/tests.sh b/ctdb/tests/UNIT/shellcheck/tests.sh
new file mode 100755
index 0000000..fe55381
--- /dev/null
+++ b/ctdb/tests/UNIT/shellcheck/tests.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "tests"
+
+if "$CTDB_TESTS_ARE_INSTALLED" ; then
+ run_tests="${CTDB_SCRIPTS_TESTS_BIN_DIR}/ctdb_run_tests"
+ local_daemons="${CTDB_SCRIPTS_TESTS_BIN_DIR}/ctdb_local_daemons"
+else
+ run_tests="${CTDB_TEST_DIR}/run_tests.sh"
+ local_daemons="${CTDB_TEST_DIR}/local_daemons.sh"
+fi
+
+# Scripts
+shellcheck_test \
+ "$run_tests" \
+ "$local_daemons" \
+ "${TEST_SCRIPTS_DIR}/test_wrap"
+
+# Includes
+shellcheck_test -s sh \
+ "${TEST_SCRIPTS_DIR}/common.sh" \
+ "${TEST_SCRIPTS_DIR}/script_install_paths.sh" \
+ "${TEST_SCRIPTS_DIR}/unit.sh"
+
+shellcheck_test -s bash \
+ "${TEST_SCRIPTS_DIR}/cluster.bash" \
+ "${TEST_SCRIPTS_DIR}/integration.bash" \
+ "${TEST_SCRIPTS_DIR}/integration_local_daemons.bash" \
+ "${TEST_SCRIPTS_DIR}/integration_real_cluster.bash"
+
+# Test scripts and stubs
+shellcheck_test -s sh \
+ "${CTDB_TEST_DIR}/UNIT/eventscripts/scripts/"* \
+ "${CTDB_TEST_DIR}/UNIT/eventscripts/stubs/"*
diff --git a/ctdb/tests/UNIT/shellcheck/tools.sh b/ctdb/tests/UNIT/shellcheck/tools.sh
new file mode 100755
index 0000000..2cd322c
--- /dev/null
+++ b/ctdb/tests/UNIT/shellcheck/tools.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "tools"
+
+shellcheck_test \
+ "${CTDB_SCRIPTS_TOOLS_BIN_DIR}/onnode" \
+ "${CTDB_SCRIPTS_TOOLS_BIN_DIR}/ctdb_diagnostics"
diff --git a/ctdb/tests/UNIT/takeover/README b/ctdb/tests/UNIT/takeover/README
new file mode 100644
index 0000000..764f389
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/README
@@ -0,0 +1,5 @@
+Unit tests for the CTDB IP allocation algorithm(s).
+
+Test case filenames look like <algorithm>.NNN.sh, where <algorithm>
+indicates the IP allocation algorithm to use. These use the
+ctdb_takeover_test test program.
diff --git a/ctdb/tests/UNIT/takeover/det.001.sh b/ctdb/tests/UNIT/takeover/det.001.sh
new file mode 100755
index 0000000..ad50287
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/det.001.sh
@@ -0,0 +1,38 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+setup_ctdb_base "$CTDB_TEST_TMP_DIR" "ctdb-etc"
+
+define_test "3 nodes, 1 healthy"
+
+required_result <<EOF
+${TEST_DATE_STAMP}Deterministic IPs enabled. Resetting all ip allocations
+${TEST_DATE_STAMP}Unassign IP: 192.168.21.254 from 0
+${TEST_DATE_STAMP}Unassign IP: 192.168.21.253 from 1
+${TEST_DATE_STAMP}Unassign IP: 192.168.20.254 from 0
+${TEST_DATE_STAMP}Unassign IP: 192.168.20.253 from 1
+${TEST_DATE_STAMP}Unassign IP: 192.168.20.251 from 0
+${TEST_DATE_STAMP}Unassign IP: 192.168.20.250 from 1
+192.168.21.254 2
+192.168.21.253 2
+192.168.21.252 2
+192.168.20.254 2
+192.168.20.253 2
+192.168.20.252 2
+192.168.20.251 2
+192.168.20.250 2
+192.168.20.249 2
+EOF
+
+simple_test 2,2,0 <<EOF
+192.168.20.249 0
+192.168.20.250 1
+192.168.20.251 2
+192.168.20.252 0
+192.168.20.253 1
+192.168.20.254 2
+192.168.21.252 0
+192.168.21.253 1
+192.168.21.254 2
+EOF
diff --git a/ctdb/tests/UNIT/takeover/det.002.sh b/ctdb/tests/UNIT/takeover/det.002.sh
new file mode 100755
index 0000000..b54edea
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/det.002.sh
@@ -0,0 +1,35 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+setup_ctdb_base "$CTDB_TEST_TMP_DIR" "ctdb-etc"
+
+define_test "3 nodes, 2 healthy"
+
+required_result <<EOF
+${TEST_DATE_STAMP}Deterministic IPs enabled. Resetting all ip allocations
+${TEST_DATE_STAMP}Unassign IP: 192.168.21.253 from 1
+${TEST_DATE_STAMP}Unassign IP: 192.168.20.253 from 1
+${TEST_DATE_STAMP}Unassign IP: 192.168.20.250 from 1
+192.168.21.254 0
+192.168.21.253 0
+192.168.21.252 2
+192.168.20.254 0
+192.168.20.253 2
+192.168.20.252 2
+192.168.20.251 0
+192.168.20.250 0
+192.168.20.249 2
+EOF
+
+simple_test 0,2,0 <<EOF
+192.168.20.249 0
+192.168.20.250 1
+192.168.20.251 2
+192.168.20.252 0
+192.168.20.253 1
+192.168.20.254 2
+192.168.21.252 0
+192.168.21.253 1
+192.168.21.254 2
+EOF
diff --git a/ctdb/tests/UNIT/takeover/det.003.sh b/ctdb/tests/UNIT/takeover/det.003.sh
new file mode 100755
index 0000000..931c498
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/det.003.sh
@@ -0,0 +1,32 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+setup_ctdb_base "$CTDB_TEST_TMP_DIR" "ctdb-etc"
+
+define_test "3 nodes, 1 -> all healthy"
+
+required_result <<EOF
+${TEST_DATE_STAMP}Deterministic IPs enabled. Resetting all ip allocations
+192.168.21.254 0
+192.168.21.253 1
+192.168.21.252 2
+192.168.20.254 0
+192.168.20.253 1
+192.168.20.252 2
+192.168.20.251 0
+192.168.20.250 1
+192.168.20.249 2
+EOF
+
+simple_test 0,0,0 <<EOF
+192.168.20.249 1
+192.168.20.250 1
+192.168.20.251 1
+192.168.20.252 1
+192.168.20.253 1
+192.168.20.254 1
+192.168.21.252 1
+192.168.21.253 1
+192.168.21.254 1
+EOF
diff --git a/ctdb/tests/UNIT/takeover/det.004.sh b/ctdb/tests/UNIT/takeover/det.004.sh
new file mode 100755
index 0000000..3673cc1
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/det.004.sh
@@ -0,0 +1,41 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+setup_ctdb_base "$CTDB_TEST_TMP_DIR" "ctdb-etc"
+
+define_test "3 nodes, all healthy with home_nodes"
+
+home_nodes="$CTDB_BASE"/home_nodes
+
+cat > "$home_nodes" <<EOF
+192.168.21.254 2
+192.168.20.251 1
+EOF
+
+required_result <<EOF
+${TEST_DATE_STAMP}Deterministic IPs enabled. Resetting all ip allocations
+192.168.21.254 2
+192.168.21.253 1
+192.168.21.252 2
+192.168.20.254 0
+192.168.20.253 1
+192.168.20.252 2
+192.168.20.251 1
+192.168.20.250 1
+192.168.20.249 2
+EOF
+
+simple_test 0,0,0 <<EOF
+192.168.20.249 1
+192.168.20.250 1
+192.168.20.251 1
+192.168.20.252 1
+192.168.20.253 1
+192.168.20.254 1
+192.168.21.252 1
+192.168.21.253 1
+192.168.21.254 1
+EOF
+
+rm "$home_nodes"
diff --git a/ctdb/tests/UNIT/takeover/det.005.sh b/ctdb/tests/UNIT/takeover/det.005.sh
new file mode 100755
index 0000000..aaa5e0f
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/det.005.sh
@@ -0,0 +1,45 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+setup_ctdb_base "$CTDB_TEST_TMP_DIR" "ctdb-etc"
+
+define_test "3 nodes, 2 healthy with home_nodes"
+
+home_nodes="$CTDB_BASE"/home_nodes
+
+cat > "$home_nodes" <<EOF
+192.168.21.254 2
+192.168.20.251 1
+EOF
+
+required_result <<EOF
+${TEST_DATE_STAMP}Deterministic IPs enabled. Resetting all ip allocations
+${TEST_DATE_STAMP}Unassign IP: 192.168.21.253 from 1
+${TEST_DATE_STAMP}Unassign IP: 192.168.20.253 from 1
+${TEST_DATE_STAMP}Unassign IP: 192.168.20.251 from 1
+${TEST_DATE_STAMP}Unassign IP: 192.168.20.250 from 1
+192.168.21.254 2
+192.168.21.253 0
+192.168.21.252 2
+192.168.20.254 0
+192.168.20.253 0
+192.168.20.252 2
+192.168.20.251 0
+192.168.20.250 0
+192.168.20.249 2
+EOF
+
+simple_test 0,2,0 <<EOF
+192.168.20.249 0
+192.168.20.250 1
+192.168.20.251 2
+192.168.20.252 0
+192.168.20.253 1
+192.168.20.254 2
+192.168.21.252 0
+192.168.21.253 1
+192.168.21.254 2
+EOF
+
+rm "$home_nodes"
diff --git a/ctdb/tests/UNIT/takeover/det.006.sh b/ctdb/tests/UNIT/takeover/det.006.sh
new file mode 100755
index 0000000..504c430
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/det.006.sh
@@ -0,0 +1,46 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+setup_ctdb_base "$CTDB_TEST_TMP_DIR" "ctdb-etc"
+
+define_test "3 nodes, 1 healthy with home_nodes"
+
+home_nodes="$CTDB_BASE"/home_nodes
+
+cat > "$home_nodes" <<EOF
+192.168.21.254 2
+192.168.20.251 1
+EOF
+
+required_result <<EOF
+${TEST_DATE_STAMP}Deterministic IPs enabled. Resetting all ip allocations
+${TEST_DATE_STAMP}Unassign IP: 192.168.21.253 from 1
+${TEST_DATE_STAMP}Unassign IP: 192.168.20.254 from 0
+${TEST_DATE_STAMP}Unassign IP: 192.168.20.253 from 1
+${TEST_DATE_STAMP}Unassign IP: 192.168.20.251 from 1
+${TEST_DATE_STAMP}Unassign IP: 192.168.20.250 from 1
+192.168.21.254 2
+192.168.21.253 2
+192.168.21.252 2
+192.168.20.254 2
+192.168.20.253 2
+192.168.20.252 2
+192.168.20.251 2
+192.168.20.250 2
+192.168.20.249 2
+EOF
+
+simple_test 2,2,0 <<EOF
+192.168.20.249 0
+192.168.20.250 1
+192.168.20.251 2
+192.168.20.252 0
+192.168.20.253 1
+192.168.20.254 2
+192.168.21.252 0
+192.168.21.253 1
+192.168.21.254 2
+EOF
+
+rm "$home_nodes"
diff --git a/ctdb/tests/UNIT/takeover/lcp2.001.sh b/ctdb/tests/UNIT/takeover/lcp2.001.sh
new file mode 100755
index 0000000..ee5b795
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/lcp2.001.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, 3 -> 1 healthy"
+
+export CTDB_TEST_LOGLEVEL=ERR
+
+required_result <<EOF
+192.168.21.254 2
+192.168.21.253 2
+192.168.21.252 2
+192.168.20.254 2
+192.168.20.253 2
+192.168.20.252 2
+192.168.20.251 2
+192.168.20.250 2
+192.168.20.249 2
+EOF
+
+simple_test 2,2,0 <<EOF
+192.168.20.249 0
+192.168.20.250 1
+192.168.20.251 2
+192.168.20.252 0
+192.168.20.253 1
+192.168.20.254 2
+192.168.21.252 0
+192.168.21.253 1
+192.168.21.254 2
+EOF
diff --git a/ctdb/tests/UNIT/takeover/lcp2.002.sh b/ctdb/tests/UNIT/takeover/lcp2.002.sh
new file mode 100755
index 0000000..6489388
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/lcp2.002.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, 3 -> 2 healthy"
+
+export CTDB_TEST_LOGLEVEL=ERR
+
+required_result <<EOF
+192.168.21.254 2
+192.168.21.253 2
+192.168.21.252 0
+192.168.20.254 2
+192.168.20.253 2
+192.168.20.252 0
+192.168.20.251 2
+192.168.20.250 0
+192.168.20.249 0
+EOF
+
+simple_test 0,2,0 <<EOF
+192.168.20.249 0
+192.168.20.250 1
+192.168.20.251 2
+192.168.20.252 0
+192.168.20.253 1
+192.168.20.254 2
+192.168.21.252 0
+192.168.21.253 1
+192.168.21.254 2
+EOF
diff --git a/ctdb/tests/UNIT/takeover/lcp2.003.sh b/ctdb/tests/UNIT/takeover/lcp2.003.sh
new file mode 100755
index 0000000..bdf2699
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/lcp2.003.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, 1 -> all healthy"
+
+export CTDB_TEST_LOGLEVEL=ERR
+
+required_result <<EOF
+192.168.21.254 2
+192.168.21.253 0
+192.168.21.252 1
+192.168.20.254 2
+192.168.20.253 0
+192.168.20.252 1
+192.168.20.251 2
+192.168.20.250 0
+192.168.20.249 1
+EOF
+
+simple_test 0,0,0 <<EOF
+192.168.20.249 1
+192.168.20.250 1
+192.168.20.251 1
+192.168.20.252 1
+192.168.20.253 1
+192.168.20.254 1
+192.168.21.252 1
+192.168.21.253 1
+192.168.21.254 1
+EOF
diff --git a/ctdb/tests/UNIT/takeover/lcp2.004.sh b/ctdb/tests/UNIT/takeover/lcp2.004.sh
new file mode 100755
index 0000000..7ce97c3
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/lcp2.004.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, 1 -> all healthy, info logging"
+
+export CTDB_TEST_LOGLEVEL=INFO
+
+required_result <<EOF
+${TEST_DATE_STAMP}1 [-121363] -> 192.168.20.253 -> 0 [+0]
+${TEST_DATE_STAMP}1 [-105738] -> 192.168.20.251 -> 2 [+0]
+${TEST_DATE_STAMP}1 [-88649] -> 192.168.21.253 -> 0 [+14161]
+${TEST_DATE_STAMP}1 [-75448] -> 192.168.20.254 -> 2 [+15625]
+${TEST_DATE_STAMP}1 [-59823] -> 192.168.20.250 -> 0 [+29786]
+${TEST_DATE_STAMP}1 [-44198] -> 192.168.21.254 -> 2 [+28322]
+192.168.21.254 2
+192.168.21.253 0
+192.168.21.252 1
+192.168.20.254 2
+192.168.20.253 0
+192.168.20.252 1
+192.168.20.251 2
+192.168.20.250 0
+192.168.20.249 1
+EOF
+
+simple_test 0,0,0 <<EOF
+192.168.20.249 1
+192.168.20.250 1
+192.168.20.251 1
+192.168.20.252 1
+192.168.20.253 1
+192.168.20.254 1
+192.168.21.252 1
+192.168.21.253 1
+192.168.21.254 1
+EOF
diff --git a/ctdb/tests/UNIT/takeover/lcp2.005.sh b/ctdb/tests/UNIT/takeover/lcp2.005.sh
new file mode 100755
index 0000000..f579a94
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/lcp2.005.sh
@@ -0,0 +1,198 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, 1 -> all healthy, debug logging"
+
+export CTDB_TEST_LOGLEVEL=DEBUG
+
+required_result <<EOF
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP} CONSIDERING MOVES (UNASSIGNED)
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP}+++++++++++++++++++++++++++++++++++++++++
+${TEST_DATE_STAMP}Selecting most imbalanced node from:
+${TEST_DATE_STAMP} 0 [0]
+${TEST_DATE_STAMP} 1 [539166]
+${TEST_DATE_STAMP} 2 [0]
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP} CONSIDERING MOVES FROM 1 [539166]
+${TEST_DATE_STAMP} 1 [-116718] -> 192.168.21.254 -> 0 [+0]
+${TEST_DATE_STAMP} 1 [-116718] -> 192.168.21.254 -> 2 [+0]
+${TEST_DATE_STAMP} 1 [-116971] -> 192.168.21.253 -> 0 [+0]
+${TEST_DATE_STAMP} 1 [-116971] -> 192.168.21.253 -> 2 [+0]
+${TEST_DATE_STAMP} 1 [-116971] -> 192.168.21.252 -> 0 [+0]
+${TEST_DATE_STAMP} 1 [-116971] -> 192.168.21.252 -> 2 [+0]
+${TEST_DATE_STAMP} 1 [-121110] -> 192.168.20.254 -> 0 [+0]
+${TEST_DATE_STAMP} 1 [-121110] -> 192.168.20.254 -> 2 [+0]
+${TEST_DATE_STAMP} 1 [-121363] -> 192.168.20.253 -> 0 [+0]
+${TEST_DATE_STAMP} 1 [-121363] -> 192.168.20.253 -> 2 [+0]
+${TEST_DATE_STAMP} 1 [-121363] -> 192.168.20.252 -> 0 [+0]
+${TEST_DATE_STAMP} 1 [-121363] -> 192.168.20.252 -> 2 [+0]
+${TEST_DATE_STAMP} 1 [-121363] -> 192.168.20.251 -> 0 [+0]
+${TEST_DATE_STAMP} 1 [-121363] -> 192.168.20.251 -> 2 [+0]
+${TEST_DATE_STAMP} 1 [-121363] -> 192.168.20.250 -> 0 [+0]
+${TEST_DATE_STAMP} 1 [-121363] -> 192.168.20.250 -> 2 [+0]
+${TEST_DATE_STAMP} 1 [-121110] -> 192.168.20.249 -> 0 [+0]
+${TEST_DATE_STAMP} 1 [-121110] -> 192.168.20.249 -> 2 [+0]
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP}1 [-121363] -> 192.168.20.253 -> 0 [+0]
+${TEST_DATE_STAMP}+++++++++++++++++++++++++++++++++++++++++
+${TEST_DATE_STAMP}Selecting most imbalanced node from:
+${TEST_DATE_STAMP} 0 [0]
+${TEST_DATE_STAMP} 1 [417803]
+${TEST_DATE_STAMP} 2 [0]
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP} CONSIDERING MOVES FROM 1 [417803]
+${TEST_DATE_STAMP} 1 [-102557] -> 192.168.21.254 -> 0 [+14161]
+${TEST_DATE_STAMP} 1 [-102557] -> 192.168.21.254 -> 2 [+0]
+${TEST_DATE_STAMP} 1 [-102810] -> 192.168.21.253 -> 0 [+14161]
+${TEST_DATE_STAMP} 1 [-102810] -> 192.168.21.253 -> 2 [+0]
+${TEST_DATE_STAMP} 1 [-102810] -> 192.168.21.252 -> 0 [+14161]
+${TEST_DATE_STAMP} 1 [-102810] -> 192.168.21.252 -> 2 [+0]
+${TEST_DATE_STAMP} 1 [-105234] -> 192.168.20.254 -> 0 [+15876]
+${TEST_DATE_STAMP} 1 [-105234] -> 192.168.20.254 -> 2 [+0]
+${TEST_DATE_STAMP} 1 [-105234] -> 192.168.20.252 -> 0 [+16129]
+${TEST_DATE_STAMP} 1 [-105234] -> 192.168.20.252 -> 2 [+0]
+${TEST_DATE_STAMP} 1 [-105738] -> 192.168.20.251 -> 0 [+15625]
+${TEST_DATE_STAMP} 1 [-105738] -> 192.168.20.251 -> 2 [+0]
+${TEST_DATE_STAMP} 1 [-105738] -> 192.168.20.250 -> 0 [+15625]
+${TEST_DATE_STAMP} 1 [-105738] -> 192.168.20.250 -> 2 [+0]
+${TEST_DATE_STAMP} 1 [-105485] -> 192.168.20.249 -> 0 [+15625]
+${TEST_DATE_STAMP} 1 [-105485] -> 192.168.20.249 -> 2 [+0]
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP}1 [-105738] -> 192.168.20.251 -> 2 [+0]
+${TEST_DATE_STAMP}+++++++++++++++++++++++++++++++++++++++++
+${TEST_DATE_STAMP}Selecting most imbalanced node from:
+${TEST_DATE_STAMP} 0 [0]
+${TEST_DATE_STAMP} 1 [312065]
+${TEST_DATE_STAMP} 2 [0]
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP} CONSIDERING MOVES FROM 1 [312065]
+${TEST_DATE_STAMP} 1 [-88396] -> 192.168.21.254 -> 0 [+14161]
+${TEST_DATE_STAMP} 1 [-88396] -> 192.168.21.254 -> 2 [+14161]
+${TEST_DATE_STAMP} 1 [-88649] -> 192.168.21.253 -> 0 [+14161]
+${TEST_DATE_STAMP} 1 [-88649] -> 192.168.21.253 -> 2 [+14161]
+${TEST_DATE_STAMP} 1 [-88649] -> 192.168.21.252 -> 0 [+14161]
+${TEST_DATE_STAMP} 1 [-88649] -> 192.168.21.252 -> 2 [+14161]
+${TEST_DATE_STAMP} 1 [-89609] -> 192.168.20.254 -> 0 [+15876]
+${TEST_DATE_STAMP} 1 [-89609] -> 192.168.20.254 -> 2 [+15625]
+${TEST_DATE_STAMP} 1 [-89609] -> 192.168.20.252 -> 0 [+16129]
+${TEST_DATE_STAMP} 1 [-89609] -> 192.168.20.252 -> 2 [+15625]
+${TEST_DATE_STAMP} 1 [-89609] -> 192.168.20.250 -> 0 [+15625]
+${TEST_DATE_STAMP} 1 [-89609] -> 192.168.20.250 -> 2 [+16129]
+${TEST_DATE_STAMP} 1 [-89609] -> 192.168.20.249 -> 0 [+15625]
+${TEST_DATE_STAMP} 1 [-89609] -> 192.168.20.249 -> 2 [+15876]
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP}1 [-88649] -> 192.168.21.253 -> 0 [+14161]
+${TEST_DATE_STAMP}+++++++++++++++++++++++++++++++++++++++++
+${TEST_DATE_STAMP}Selecting most imbalanced node from:
+${TEST_DATE_STAMP} 0 [14161]
+${TEST_DATE_STAMP} 1 [223416]
+${TEST_DATE_STAMP} 2 [0]
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP} CONSIDERING MOVES FROM 1 [223416]
+${TEST_DATE_STAMP} 1 [-72520] -> 192.168.21.254 -> 0 [+30037]
+${TEST_DATE_STAMP} 1 [-72520] -> 192.168.21.254 -> 2 [+14161]
+${TEST_DATE_STAMP} 1 [-72520] -> 192.168.21.252 -> 0 [+30290]
+${TEST_DATE_STAMP} 1 [-72520] -> 192.168.21.252 -> 2 [+14161]
+${TEST_DATE_STAMP} 1 [-75448] -> 192.168.20.254 -> 0 [+30037]
+${TEST_DATE_STAMP} 1 [-75448] -> 192.168.20.254 -> 2 [+15625]
+${TEST_DATE_STAMP} 1 [-75448] -> 192.168.20.252 -> 0 [+30290]
+${TEST_DATE_STAMP} 1 [-75448] -> 192.168.20.252 -> 2 [+15625]
+${TEST_DATE_STAMP} 1 [-75448] -> 192.168.20.250 -> 0 [+29786]
+${TEST_DATE_STAMP} 1 [-75448] -> 192.168.20.250 -> 2 [+16129]
+${TEST_DATE_STAMP} 1 [-75448] -> 192.168.20.249 -> 0 [+29786]
+${TEST_DATE_STAMP} 1 [-75448] -> 192.168.20.249 -> 2 [+15876]
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP}1 [-75448] -> 192.168.20.254 -> 2 [+15625]
+${TEST_DATE_STAMP}+++++++++++++++++++++++++++++++++++++++++
+${TEST_DATE_STAMP}Selecting most imbalanced node from:
+${TEST_DATE_STAMP} 0 [14161]
+${TEST_DATE_STAMP} 1 [147968]
+${TEST_DATE_STAMP} 2 [15625]
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP} CONSIDERING MOVES FROM 1 [147968]
+${TEST_DATE_STAMP} 1 [-58359] -> 192.168.21.254 -> 0 [+30037]
+${TEST_DATE_STAMP} 1 [-58359] -> 192.168.21.254 -> 2 [+28322]
+${TEST_DATE_STAMP} 1 [-58359] -> 192.168.21.252 -> 0 [+30290]
+${TEST_DATE_STAMP} 1 [-58359] -> 192.168.21.252 -> 2 [+28322]
+${TEST_DATE_STAMP} 1 [-59572] -> 192.168.20.252 -> 0 [+30290]
+${TEST_DATE_STAMP} 1 [-59572] -> 192.168.20.252 -> 2 [+31501]
+${TEST_DATE_STAMP} 1 [-59823] -> 192.168.20.250 -> 0 [+29786]
+${TEST_DATE_STAMP} 1 [-59823] -> 192.168.20.250 -> 2 [+31754]
+${TEST_DATE_STAMP} 1 [-59823] -> 192.168.20.249 -> 0 [+29786]
+${TEST_DATE_STAMP} 1 [-59823] -> 192.168.20.249 -> 2 [+31501]
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP}1 [-59823] -> 192.168.20.250 -> 0 [+29786]
+${TEST_DATE_STAMP}+++++++++++++++++++++++++++++++++++++++++
+${TEST_DATE_STAMP}Selecting most imbalanced node from:
+${TEST_DATE_STAMP} 0 [43947]
+${TEST_DATE_STAMP} 1 [88145]
+${TEST_DATE_STAMP} 2 [15625]
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP} CONSIDERING MOVES FROM 1 [88145]
+${TEST_DATE_STAMP} 1 [-44198] -> 192.168.21.254 -> 0 [+44198]
+${TEST_DATE_STAMP} 1 [-44198] -> 192.168.21.254 -> 2 [+28322]
+${TEST_DATE_STAMP} 1 [-44198] -> 192.168.21.252 -> 0 [+44451]
+${TEST_DATE_STAMP} 1 [-44198] -> 192.168.21.252 -> 2 [+28322]
+${TEST_DATE_STAMP} 1 [-43947] -> 192.168.20.252 -> 0 [+45915]
+${TEST_DATE_STAMP} 1 [-43947] -> 192.168.20.252 -> 2 [+31501]
+${TEST_DATE_STAMP} 1 [-43947] -> 192.168.20.249 -> 0 [+45662]
+${TEST_DATE_STAMP} 1 [-43947] -> 192.168.20.249 -> 2 [+31501]
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP}1 [-44198] -> 192.168.21.254 -> 2 [+28322]
+${TEST_DATE_STAMP}+++++++++++++++++++++++++++++++++++++++++
+${TEST_DATE_STAMP}Selecting most imbalanced node from:
+${TEST_DATE_STAMP} 0 [43947]
+${TEST_DATE_STAMP} 1 [43947]
+${TEST_DATE_STAMP} 2 [43947]
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP} CONSIDERING MOVES FROM 0 [43947]
+${TEST_DATE_STAMP} 0 [-28322] -> 192.168.21.253 -> 0 [+28322]
+${TEST_DATE_STAMP} 0 [-28322] -> 192.168.21.253 -> 2 [+44198]
+${TEST_DATE_STAMP} 0 [-29786] -> 192.168.20.253 -> 0 [+29786]
+${TEST_DATE_STAMP} 0 [-29786] -> 192.168.20.253 -> 2 [+45662]
+${TEST_DATE_STAMP} 0 [-29786] -> 192.168.20.250 -> 0 [+29786]
+${TEST_DATE_STAMP} 0 [-29786] -> 192.168.20.250 -> 2 [+45915]
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP} CONSIDERING MOVES FROM 1 [43947]
+${TEST_DATE_STAMP} 1 [-28322] -> 192.168.21.252 -> 0 [+44451]
+${TEST_DATE_STAMP} 1 [-28322] -> 192.168.21.252 -> 2 [+44198]
+${TEST_DATE_STAMP} 1 [-29786] -> 192.168.20.252 -> 0 [+45915]
+${TEST_DATE_STAMP} 1 [-29786] -> 192.168.20.252 -> 2 [+45662]
+${TEST_DATE_STAMP} 1 [-29786] -> 192.168.20.249 -> 0 [+45662]
+${TEST_DATE_STAMP} 1 [-29786] -> 192.168.20.249 -> 2 [+45662]
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP} CONSIDERING MOVES FROM 2 [43947]
+${TEST_DATE_STAMP} 2 [-28322] -> 192.168.21.254 -> 0 [+44198]
+${TEST_DATE_STAMP} 2 [-28322] -> 192.168.21.254 -> 2 [+28322]
+${TEST_DATE_STAMP} 2 [-29786] -> 192.168.20.254 -> 0 [+45662]
+${TEST_DATE_STAMP} 2 [-29786] -> 192.168.20.254 -> 2 [+29786]
+${TEST_DATE_STAMP} 2 [-29786] -> 192.168.20.251 -> 0 [+45915]
+${TEST_DATE_STAMP} 2 [-29786] -> 192.168.20.251 -> 2 [+29786]
+${TEST_DATE_STAMP} ----------------------------------------
+192.168.21.254 2
+192.168.21.253 0
+192.168.21.252 1
+192.168.20.254 2
+192.168.20.253 0
+192.168.20.252 1
+192.168.20.251 2
+192.168.20.250 0
+192.168.20.249 1
+EOF
+
+simple_test 0,0,0 <<EOF
+192.168.20.249 1
+192.168.20.250 1
+192.168.20.251 1
+192.168.20.252 1
+192.168.20.253 1
+192.168.20.254 1
+192.168.21.252 1
+192.168.21.253 1
+192.168.21.254 1
+EOF
diff --git a/ctdb/tests/UNIT/takeover/lcp2.006.sh b/ctdb/tests/UNIT/takeover/lcp2.006.sh
new file mode 100755
index 0000000..c527992
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/lcp2.006.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, 0 -> 1 healthy"
+
+export CTDB_TEST_LOGLEVEL=ERR
+
+required_result <<EOF
+192.168.21.254 1
+192.168.21.253 1
+192.168.21.252 1
+192.168.20.254 1
+192.168.20.253 1
+192.168.20.252 1
+192.168.20.251 1
+192.168.20.250 1
+192.168.20.249 1
+EOF
+
+simple_test 2,0,2 <<EOF
+192.168.20.249 -1
+192.168.20.250 -1
+192.168.20.251 -1
+192.168.20.252 -1
+192.168.20.253 -1
+192.168.20.254 -1
+192.168.21.252 -1
+192.168.21.253 -1
+192.168.21.254 -1
+EOF
diff --git a/ctdb/tests/UNIT/takeover/lcp2.007.sh b/ctdb/tests/UNIT/takeover/lcp2.007.sh
new file mode 100755
index 0000000..a514025
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/lcp2.007.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, 0 -> 2 healthy"
+
+export CTDB_TEST_LOGLEVEL=ERR
+
+required_result <<EOF
+192.168.21.254 1
+192.168.21.253 2
+192.168.21.252 1
+192.168.20.254 1
+192.168.20.253 2
+192.168.20.252 1
+192.168.20.251 1
+192.168.20.250 2
+192.168.20.249 2
+EOF
+
+simple_test 2,0,0 <<EOF
+192.168.20.249 -1
+192.168.20.250 -1
+192.168.20.251 -1
+192.168.20.252 -1
+192.168.20.253 -1
+192.168.20.254 -1
+192.168.21.252 -1
+192.168.21.253 -1
+192.168.21.254 -1
+EOF
diff --git a/ctdb/tests/UNIT/takeover/lcp2.008.sh b/ctdb/tests/UNIT/takeover/lcp2.008.sh
new file mode 100755
index 0000000..6387223
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/lcp2.008.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, 0 -> all healthy"
+
+export CTDB_TEST_LOGLEVEL=ERR
+
+required_result <<EOF
+192.168.21.254 0
+192.168.21.253 1
+192.168.21.252 2
+192.168.20.254 0
+192.168.20.253 1
+192.168.20.252 2
+192.168.20.251 0
+192.168.20.250 1
+192.168.20.249 2
+EOF
+
+simple_test 0,0,0 <<EOF
+192.168.20.249 -1
+192.168.20.250 -1
+192.168.20.251 -1
+192.168.20.252 -1
+192.168.20.253 -1
+192.168.20.254 -1
+192.168.21.252 -1
+192.168.21.253 -1
+192.168.21.254 -1
+EOF
diff --git a/ctdb/tests/UNIT/takeover/lcp2.009.sh b/ctdb/tests/UNIT/takeover/lcp2.009.sh
new file mode 100755
index 0000000..1b0c350
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/lcp2.009.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, 3 healthy -> all disconnected"
+
+export CTDB_TEST_LOGLEVEL=ERR
+
+required_result <<EOF
+192.168.21.254 -1
+192.168.21.253 -1
+192.168.21.252 -1
+192.168.20.254 -1
+192.168.20.253 -1
+192.168.20.252 -1
+192.168.20.251 -1
+192.168.20.250 -1
+192.168.20.249 -1
+EOF
+
+simple_test 1,1,1 <<EOF
+192.168.20.249 0
+192.168.20.250 1
+192.168.20.251 2
+192.168.20.252 0
+192.168.20.253 1
+192.168.20.254 2
+192.168.21.252 0
+192.168.21.253 1
+192.168.21.254 2
+EOF
diff --git a/ctdb/tests/UNIT/takeover/lcp2.010.sh b/ctdb/tests/UNIT/takeover/lcp2.010.sh
new file mode 100755
index 0000000..f7dabdd
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/lcp2.010.sh
@@ -0,0 +1,32 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "2 disjoint groups of nodes/addresses, a node becomes healthy"
+
+# This illustrates a bug in LCP2 when the the only candidate for a
+# source node is chosen to be the "most imbalanced" node. This means
+# that nodes in the smaller group aren't necessarily (depends on sort
+# order and addresses used) considered as candidates. If the larger
+# group has 6 addresses then the "necessarily" goes away and the
+# smaller group won't be rebalanced.
+
+export CTDB_TEST_LOGLEVEL=ERR
+
+required_result <<EOF
+192.168.209.102 3
+192.168.209.101 2
+192.168.140.4 1
+192.168.140.3 1
+192.168.140.2 0
+192.168.140.1 0
+EOF
+
+simple_test 0,0,0,0 <<EOF
+192.168.140.1 0 0,1
+192.168.140.2 0 0,1
+192.168.140.3 1 0,1
+192.168.140.4 1 0,1
+192.168.209.101 2 2,3
+192.168.209.102 2 2,3
+EOF
diff --git a/ctdb/tests/UNIT/takeover/lcp2.011.sh b/ctdb/tests/UNIT/takeover/lcp2.011.sh
new file mode 100755
index 0000000..1f10bd1
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/lcp2.011.sh
@@ -0,0 +1,45 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "2 disjoint groups of nodes/addresses, continue a stopped node"
+
+# Another LCP2 1.0 bug
+
+export CTDB_TEST_LOGLEVEL=ERR
+
+required_result <<EOF
+10.11.19.46 3
+10.11.19.45 3
+10.11.19.44 1
+10.11.18.46 1
+10.11.18.45 3
+10.11.18.44 1
+10.11.17.46 3
+10.11.17.45 3
+10.11.17.44 1
+10.11.16.46 1
+10.11.16.45 3
+10.11.16.44 1
+9.11.136.46 2
+9.11.136.45 0
+9.11.136.44 2
+EOF
+
+simple_test 0,0,0,0 <<EOF
+9.11.136.44 2 0,2
+9.11.136.45 2 0,2
+9.11.136.46 2 0,2
+10.11.16.44 1 1,3
+10.11.16.45 3 1,3
+10.11.16.46 1 1,3
+10.11.17.44 1 1,3
+10.11.17.45 3 1,3
+10.11.17.46 3 1,3
+10.11.18.44 1 1,3
+10.11.18.45 3 1,3
+10.11.18.46 1 1,3
+10.11.19.44 1 1,3
+10.11.19.45 3 1,3
+10.11.19.46 3 1,3
+EOF
diff --git a/ctdb/tests/UNIT/takeover/lcp2.012.sh b/ctdb/tests/UNIT/takeover/lcp2.012.sh
new file mode 100755
index 0000000..074cdcc
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/lcp2.012.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "NoIPTakeover - nodes don't gain IPs"
+
+export CTDB_TEST_LOGLEVEL=ERR
+
+required_result <<EOF
+192.168.21.254 1
+192.168.21.253 1
+192.168.21.252 1
+192.168.20.254 1
+192.168.20.253 1
+192.168.20.252 1
+192.168.20.251 1
+192.168.20.250 1
+192.168.20.249 1
+EOF
+
+export CTDB_SET_NoIPTakeover=1
+
+simple_test 0,0,0 <<EOF
+192.168.20.249 1
+192.168.20.250 1
+192.168.20.251 1
+192.168.20.252 1
+192.168.20.253 1
+192.168.20.254 1
+192.168.21.252 1
+192.168.21.253 1
+192.168.21.254 1
+EOF
diff --git a/ctdb/tests/UNIT/takeover/lcp2.013.sh b/ctdb/tests/UNIT/takeover/lcp2.013.sh
new file mode 100755
index 0000000..091a235
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/lcp2.013.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "NoIPTakeover: nodes don't lose IPs"
+
+export CTDB_TEST_LOGLEVEL=ERR
+
+required_result <<EOF
+192.168.21.254 2
+192.168.21.253 1
+192.168.21.252 0
+192.168.20.254 2
+192.168.20.253 1
+192.168.20.252 0
+192.168.20.251 2
+192.168.20.250 1
+192.168.20.249 0
+EOF
+
+export CTDB_SET_NoIPTakeover=1
+
+simple_test 0,0,0 <<EOF
+192.168.20.249 0
+192.168.20.250 1
+192.168.20.251 2
+192.168.20.252 0
+192.168.20.253 1
+192.168.20.254 2
+192.168.21.252 0
+192.168.21.253 1
+192.168.21.254 2
+EOF
diff --git a/ctdb/tests/UNIT/takeover/lcp2.014.sh b/ctdb/tests/UNIT/takeover/lcp2.014.sh
new file mode 100755
index 0000000..25482c0
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/lcp2.014.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, no IPs assigned, all unhealthy"
+
+export CTDB_TEST_LOGLEVEL=ERR
+
+required_result <<EOF
+192.168.21.254 -1
+192.168.21.253 -1
+192.168.21.252 -1
+192.168.20.254 -1
+192.168.20.253 -1
+192.168.20.252 -1
+192.168.20.251 -1
+192.168.20.250 -1
+192.168.20.249 -1
+EOF
+
+simple_test 2,2,2 <<EOF
+192.168.21.254 -1
+192.168.21.253 -1
+192.168.21.252 -1
+192.168.20.254 -1
+192.168.20.253 -1
+192.168.20.252 -1
+192.168.20.251 -1
+192.168.20.250 -1
+192.168.20.249 -1
+EOF
diff --git a/ctdb/tests/UNIT/takeover/lcp2.015.sh b/ctdb/tests/UNIT/takeover/lcp2.015.sh
new file mode 100755
index 0000000..63c87c6
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/lcp2.015.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all IPs assigned, all unhealthy"
+
+export CTDB_TEST_LOGLEVEL=ERR
+
+required_result <<EOF
+192.168.21.254 -1
+192.168.21.253 -1
+192.168.21.252 -1
+192.168.20.254 -1
+192.168.20.253 -1
+192.168.20.252 -1
+192.168.20.251 -1
+192.168.20.250 -1
+192.168.20.249 -1
+EOF
+
+simple_test 2,2,2 <<EOF
+192.168.21.254 2
+192.168.21.253 2
+192.168.21.252 2
+192.168.20.254 1
+192.168.20.253 1
+192.168.20.252 1
+192.168.20.251 0
+192.168.20.250 0
+192.168.20.249 0
+EOF
diff --git a/ctdb/tests/UNIT/takeover/lcp2.016.sh b/ctdb/tests/UNIT/takeover/lcp2.016.sh
new file mode 100755
index 0000000..da2f4b0
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/lcp2.016.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all IPs assigned, 2->3 unhealthy"
+
+export CTDB_TEST_LOGLEVEL=ERR
+
+required_result <<EOF
+192.168.21.254 -1
+192.168.21.253 -1
+192.168.21.252 -1
+192.168.20.254 -1
+192.168.20.253 -1
+192.168.20.252 -1
+192.168.20.251 -1
+192.168.20.250 -1
+192.168.20.249 -1
+EOF
+
+simple_test 2,2,2 <<EOF
+192.168.21.254 2
+192.168.21.253 2
+192.168.21.252 2
+192.168.20.254 2
+192.168.20.253 2
+192.168.20.252 2
+192.168.20.251 2
+192.168.20.250 2
+192.168.20.249 2
+EOF
diff --git a/ctdb/tests/UNIT/takeover/lcp2.024.sh b/ctdb/tests/UNIT/takeover/lcp2.024.sh
new file mode 100755
index 0000000..d297084
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/lcp2.024.sh
@@ -0,0 +1,42 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, no IPs assigned, all healthy, all in STARTUP runstate"
+
+export CTDB_TEST_LOGLEVEL=NOTICE
+
+required_result <<EOF
+${TEST_DATE_STAMP}Failed to find node to cover ip 192.168.21.254
+${TEST_DATE_STAMP}Failed to find node to cover ip 192.168.21.253
+${TEST_DATE_STAMP}Failed to find node to cover ip 192.168.21.252
+${TEST_DATE_STAMP}Failed to find node to cover ip 192.168.20.254
+${TEST_DATE_STAMP}Failed to find node to cover ip 192.168.20.253
+${TEST_DATE_STAMP}Failed to find node to cover ip 192.168.20.252
+${TEST_DATE_STAMP}Failed to find node to cover ip 192.168.20.251
+${TEST_DATE_STAMP}Failed to find node to cover ip 192.168.20.250
+${TEST_DATE_STAMP}Failed to find node to cover ip 192.168.20.249
+192.168.21.254 -1
+192.168.21.253 -1
+192.168.21.252 -1
+192.168.20.254 -1
+192.168.20.253 -1
+192.168.20.252 -1
+192.168.20.251 -1
+192.168.20.250 -1
+192.168.20.249 -1
+EOF
+
+export CTDB_TEST_RUNSTATE=4,4,4
+
+simple_test 0,0,0 <<EOF
+192.168.21.254 -1
+192.168.21.253 -1
+192.168.21.252 -1
+192.168.20.254 -1
+192.168.20.253 -1
+192.168.20.252 -1
+192.168.20.251 -1
+192.168.20.250 -1
+192.168.20.249 -1
+EOF
diff --git a/ctdb/tests/UNIT/takeover/lcp2.025.sh b/ctdb/tests/UNIT/takeover/lcp2.025.sh
new file mode 100755
index 0000000..f52282e
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/lcp2.025.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, no IPs assigned, all healthy, 1 in STARTUP runstate"
+
+export CTDB_TEST_LOGLEVEL=NOTICE
+
+required_result <<EOF
+192.168.21.254 1
+192.168.21.253 2
+192.168.21.252 1
+192.168.20.254 1
+192.168.20.253 2
+192.168.20.252 1
+192.168.20.251 1
+192.168.20.250 2
+192.168.20.249 2
+EOF
+
+export CTDB_TEST_RUNSTATE=4,5,5
+
+simple_test 0,0,0 <<EOF
+192.168.21.254 -1
+192.168.21.253 -1
+192.168.21.252 -1
+192.168.20.254 -1
+192.168.20.253 -1
+192.168.20.252 -1
+192.168.20.251 -1
+192.168.20.250 -1
+192.168.20.249 -1
+EOF
diff --git a/ctdb/tests/UNIT/takeover/lcp2.027.sh b/ctdb/tests/UNIT/takeover/lcp2.027.sh
new file mode 100755
index 0000000..f572b47
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/lcp2.027.sh
@@ -0,0 +1,45 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "4 nodes, all IPs assigned, 3->4 unhealthy"
+
+export CTDB_TEST_LOGLEVEL=ERR
+
+required_result <<EOF
+130.216.30.181 0
+130.216.30.180 1
+130.216.30.179 3
+130.216.30.178 3
+130.216.30.177 0
+130.216.30.176 1
+130.216.30.175 0
+130.216.30.174 1
+130.216.30.173 0
+130.216.30.172 3
+130.216.30.171 1
+130.216.30.170 3
+10.19.99.253 0
+10.19.99.252 1
+10.19.99.251 0
+10.19.99.250 3
+EOF
+
+simple_test 0,0,2,0 <<EOF
+130.216.30.170 3
+130.216.30.171 2
+130.216.30.172 3
+130.216.30.173 2
+130.216.30.174 1
+130.216.30.175 0
+130.216.30.176 1
+130.216.30.177 0
+130.216.30.178 3
+130.216.30.179 2
+130.216.30.180 1
+130.216.30.181 0
+10.19.99.250 3
+10.19.99.251 2
+10.19.99.252 1
+10.19.99.253 0
+EOF
diff --git a/ctdb/tests/UNIT/takeover/lcp2.028.sh b/ctdb/tests/UNIT/takeover/lcp2.028.sh
new file mode 100755
index 0000000..b0a8ef5
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/lcp2.028.sh
@@ -0,0 +1,45 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "4 nodes, all healthy/assigned, stays unbalanced"
+
+export CTDB_TEST_LOGLEVEL=INFO
+
+required_result <<EOF
+130.216.30.181 0
+130.216.30.180 1
+130.216.30.179 2
+130.216.30.178 3
+130.216.30.177 0
+130.216.30.176 1
+130.216.30.175 0
+130.216.30.174 1
+130.216.30.173 0
+130.216.30.172 3
+130.216.30.171 1
+130.216.30.170 3
+10.19.99.253 0
+10.19.99.252 1
+10.19.99.251 0
+10.19.99.250 3
+EOF
+
+simple_test 0,0,0,0 <<EOF
+130.216.30.181 0
+130.216.30.180 1
+130.216.30.179 2
+130.216.30.178 3
+130.216.30.177 0
+130.216.30.176 1
+130.216.30.175 0
+130.216.30.174 1
+130.216.30.173 0
+130.216.30.172 3
+130.216.30.171 1
+130.216.30.170 3
+10.19.99.253 0
+10.19.99.252 1
+10.19.99.251 0
+10.19.99.250 3
+EOF
diff --git a/ctdb/tests/UNIT/takeover/lcp2.029.sh b/ctdb/tests/UNIT/takeover/lcp2.029.sh
new file mode 100755
index 0000000..5354963
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/lcp2.029.sh
@@ -0,0 +1,111 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "4 nodes, some IPs unassigned on target nodes"
+
+export CTDB_TEST_LOGLEVEL=INFO
+
+required_result <<EOF
+${TEST_DATE_STAMP} 10.19.99.251 -> 2 [+9216]
+${TEST_DATE_STAMP} 130.216.30.173 -> 2 [+24345]
+${TEST_DATE_STAMP} 130.216.30.171 -> 2 [+39970]
+130.216.30.181 0
+130.216.30.180 1
+130.216.30.179 2
+130.216.30.178 3
+130.216.30.177 0
+130.216.30.176 1
+130.216.30.175 0
+130.216.30.174 1
+130.216.30.173 2
+130.216.30.172 3
+130.216.30.171 2
+130.216.30.170 3
+10.19.99.253 0
+10.19.99.252 1
+10.19.99.251 2
+10.19.99.250 3
+EOF
+
+# In this example were 4 releases from node 2 in a previous iteration
+#
+# Release of IP 130.216.30.179/27 on interface ethX1 node:3
+# Release of IP 130.216.30.173/27 on interface ethX1 node:0
+# Release of IP 130.216.30.171/27 on interface ethX1 node:1
+# Release of IP 10.19.99.251/22 on interface ethX2 node:0
+#
+# However, one release failed so no takeovers were done. This means
+# that the target node for each IP still thinks that the IPs are held
+# by node 2. The release of 130.216.30.179 was so late that node 2
+# still thought that it held that address.
+
+simple_test 0,0,0,0 multi <<EOF
+130.216.30.181 0
+130.216.30.180 1
+130.216.30.179 3
+130.216.30.178 3
+130.216.30.177 0
+130.216.30.176 1
+130.216.30.175 0
+130.216.30.174 1
+130.216.30.173 2
+130.216.30.172 3
+130.216.30.171 1
+130.216.30.170 3
+10.19.99.253 0
+10.19.99.252 1
+10.19.99.251 2
+10.19.99.250 3
+
+130.216.30.181 0
+130.216.30.180 1
+130.216.30.179 3
+130.216.30.178 3
+130.216.30.177 0
+130.216.30.176 1
+130.216.30.175 0
+130.216.30.174 1
+130.216.30.173 0
+130.216.30.172 3
+130.216.30.171 2
+130.216.30.170 3
+10.19.99.253 0
+10.19.99.252 1
+10.19.99.251 0
+10.19.99.250 3
+
+130.216.30.181 0
+130.216.30.180 1
+130.216.30.179 2
+130.216.30.178 3
+130.216.30.177 0
+130.216.30.176 1
+130.216.30.175 0
+130.216.30.174 1
+130.216.30.173 0
+130.216.30.172 3
+130.216.30.171 1
+130.216.30.170 3
+10.19.99.253 0
+10.19.99.252 1
+10.19.99.251 0
+10.19.99.250 3
+
+130.216.30.181 0
+130.216.30.180 1
+130.216.30.179 2
+130.216.30.178 3
+130.216.30.177 0
+130.216.30.176 1
+130.216.30.175 0
+130.216.30.174 1
+130.216.30.173 0
+130.216.30.172 3
+130.216.30.171 1
+130.216.30.170 3
+10.19.99.253 0
+10.19.99.252 1
+10.19.99.251 0
+10.19.99.250 3
+EOF
diff --git a/ctdb/tests/UNIT/takeover/lcp2.030.sh b/ctdb/tests/UNIT/takeover/lcp2.030.sh
new file mode 100755
index 0000000..87a7f58
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/lcp2.030.sh
@@ -0,0 +1,1813 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "900 IPs, 5 nodes, 0 -> 5 healthy"
+
+export CTDB_TEST_LOGLEVEL=ERR
+
+required_result <<EOF
+192.168.10.90 0
+192.168.10.89 1
+192.168.10.88 2
+192.168.10.87 3
+192.168.10.86 4
+192.168.10.85 0
+192.168.10.84 1
+192.168.10.83 2
+192.168.10.82 3
+192.168.10.81 4
+192.168.10.80 0
+192.168.10.79 0
+192.168.10.78 1
+192.168.10.77 2
+192.168.10.76 3
+192.168.10.75 4
+192.168.10.74 1
+192.168.10.73 2
+192.168.10.72 3
+192.168.10.71 3
+192.168.10.70 4
+192.168.10.69 0
+192.168.10.68 1
+192.168.10.67 2
+192.168.10.66 4
+192.168.10.65 0
+192.168.10.64 1
+192.168.10.63 0
+192.168.10.62 1
+192.168.10.61 2
+192.168.10.60 3
+192.168.10.59 4
+192.168.10.58 2
+192.168.10.57 3
+192.168.10.56 0
+192.168.10.55 0
+192.168.10.54 1
+192.168.10.53 2
+192.168.10.52 3
+192.168.10.51 4
+192.168.10.50 1
+192.168.10.49 4
+192.168.10.48 2
+192.168.10.47 0
+192.168.10.46 1
+192.168.10.45 2
+192.168.10.44 3
+192.168.10.43 4
+192.168.10.42 2
+192.168.10.41 3
+192.168.10.40 1
+192.168.10.39 3
+192.168.10.38 4
+192.168.10.37 0
+192.168.10.36 1
+192.168.10.35 2
+192.168.10.34 4
+192.168.10.33 0
+192.168.10.32 3
+192.168.10.31 0
+192.168.10.30 1
+192.168.10.29 2
+192.168.10.28 3
+192.168.10.27 4
+192.168.10.26 3
+192.168.10.25 2
+192.168.10.24 0
+192.168.10.23 3
+192.168.10.22 4
+192.168.10.21 0
+192.168.10.20 1
+192.168.10.19 2
+192.168.10.18 4
+192.168.10.17 1
+192.168.10.16 4
+192.168.10.15 0
+192.168.10.14 1
+192.168.10.13 2
+192.168.10.12 3
+192.168.10.11 4
+192.168.10.10 2
+192.168.10.9 3
+192.168.10.8 4
+192.168.10.7 0
+192.168.10.6 1
+192.168.10.5 2
+192.168.10.4 3
+192.168.10.3 4
+192.168.10.2 0
+192.168.10.1 1
+192.168.9.90 0
+192.168.9.89 1
+192.168.9.88 2
+192.168.9.87 3
+192.168.9.86 4
+192.168.9.85 0
+192.168.9.84 1
+192.168.9.83 2
+192.168.9.82 3
+192.168.9.81 4
+192.168.9.80 0
+192.168.9.79 0
+192.168.9.78 1
+192.168.9.77 2
+192.168.9.76 3
+192.168.9.75 4
+192.168.9.74 1
+192.168.9.73 2
+192.168.9.72 3
+192.168.9.71 3
+192.168.9.70 4
+192.168.9.69 0
+192.168.9.68 1
+192.168.9.67 2
+192.168.9.66 4
+192.168.9.65 0
+192.168.9.64 1
+192.168.9.63 0
+192.168.9.62 1
+192.168.9.61 2
+192.168.9.60 3
+192.168.9.59 4
+192.168.9.58 2
+192.168.9.57 3
+192.168.9.56 4
+192.168.9.55 0
+192.168.9.54 1
+192.168.9.53 2
+192.168.9.52 3
+192.168.9.51 4
+192.168.9.50 0
+192.168.9.49 1
+192.168.9.48 2
+192.168.9.47 0
+192.168.9.46 1
+192.168.9.45 2
+192.168.9.44 3
+192.168.9.43 4
+192.168.9.42 2
+192.168.9.41 4
+192.168.9.40 3
+192.168.9.39 0
+192.168.9.38 1
+192.168.9.37 2
+192.168.9.36 3
+192.168.9.35 4
+192.168.9.34 0
+192.168.9.33 1
+192.168.9.32 4
+192.168.9.31 0
+192.168.9.30 1
+192.168.9.29 2
+192.168.9.28 3
+192.168.9.27 4
+192.168.9.26 2
+192.168.9.25 3
+192.168.9.24 0
+192.168.9.23 3
+192.168.9.22 4
+192.168.9.21 0
+192.168.9.20 1
+192.168.9.19 2
+192.168.9.18 4
+192.168.9.17 1
+192.168.9.16 3
+192.168.9.15 0
+192.168.9.14 1
+192.168.9.13 2
+192.168.9.12 3
+192.168.9.11 4
+192.168.9.10 2
+192.168.9.9 4
+192.168.9.8 3
+192.168.9.7 0
+192.168.9.6 1
+192.168.9.5 2
+192.168.9.4 3
+192.168.9.3 4
+192.168.9.2 0
+192.168.9.1 1
+192.168.8.90 0
+192.168.8.89 1
+192.168.8.88 2
+192.168.8.87 3
+192.168.8.86 4
+192.168.8.85 0
+192.168.8.84 1
+192.168.8.83 2
+192.168.8.82 3
+192.168.8.81 4
+192.168.8.80 0
+192.168.8.79 0
+192.168.8.78 1
+192.168.8.77 2
+192.168.8.76 3
+192.168.8.75 4
+192.168.8.74 1
+192.168.8.73 2
+192.168.8.72 3
+192.168.8.71 3
+192.168.8.70 4
+192.168.8.69 0
+192.168.8.68 1
+192.168.8.67 2
+192.168.8.66 4
+192.168.8.65 3
+192.168.8.64 0
+192.168.8.63 0
+192.168.8.62 1
+192.168.8.61 2
+192.168.8.60 3
+192.168.8.59 4
+192.168.8.58 1
+192.168.8.57 2
+192.168.8.56 3
+192.168.8.55 0
+192.168.8.54 1
+192.168.8.53 2
+192.168.8.52 3
+192.168.8.51 4
+192.168.8.50 0
+192.168.8.49 4
+192.168.8.48 1
+192.168.8.47 0
+192.168.8.46 1
+192.168.8.45 2
+192.168.8.44 3
+192.168.8.43 4
+192.168.8.42 2
+192.168.8.41 1
+192.168.8.40 4
+192.168.8.39 0
+192.168.8.38 1
+192.168.8.37 2
+192.168.8.36 3
+192.168.8.35 4
+192.168.8.34 3
+192.168.8.33 0
+192.168.8.32 2
+192.168.8.31 0
+192.168.8.30 1
+192.168.8.29 2
+192.168.8.28 3
+192.168.8.27 4
+192.168.8.26 2
+192.168.8.25 1
+192.168.8.24 3
+192.168.8.23 3
+192.168.8.22 4
+192.168.8.21 0
+192.168.8.20 1
+192.168.8.19 2
+192.168.8.18 4
+192.168.8.17 0
+192.168.8.16 4
+192.168.8.15 0
+192.168.8.14 1
+192.168.8.13 2
+192.168.8.12 3
+192.168.8.11 4
+192.168.8.10 1
+192.168.8.9 2
+192.168.8.8 4
+192.168.8.7 0
+192.168.8.6 1
+192.168.8.5 2
+192.168.8.4 3
+192.168.8.3 4
+192.168.8.2 3
+192.168.8.1 0
+192.168.7.90 0
+192.168.7.89 1
+192.168.7.88 2
+192.168.7.87 3
+192.168.7.86 4
+192.168.7.85 0
+192.168.7.84 1
+192.168.7.83 2
+192.168.7.82 3
+192.168.7.81 4
+192.168.7.80 1
+192.168.7.79 0
+192.168.7.78 1
+192.168.7.77 2
+192.168.7.76 3
+192.168.7.75 4
+192.168.7.74 2
+192.168.7.73 3
+192.168.7.72 0
+192.168.7.71 3
+192.168.7.70 4
+192.168.7.69 0
+192.168.7.68 1
+192.168.7.67 2
+192.168.7.66 4
+192.168.7.65 1
+192.168.7.64 3
+192.168.7.63 0
+192.168.7.62 1
+192.168.7.61 2
+192.168.7.60 3
+192.168.7.59 4
+192.168.7.58 2
+192.168.7.57 0
+192.168.7.56 1
+192.168.7.55 0
+192.168.7.54 1
+192.168.7.53 2
+192.168.7.52 3
+192.168.7.51 4
+192.168.7.50 3
+192.168.7.49 4
+192.168.7.48 2
+192.168.7.47 0
+192.168.7.46 1
+192.168.7.45 2
+192.168.7.44 3
+192.168.7.43 4
+192.168.7.42 2
+192.168.7.41 0
+192.168.7.40 1
+192.168.7.39 4
+192.168.7.38 0
+192.168.7.37 1
+192.168.7.36 2
+192.168.7.35 3
+192.168.7.34 4
+192.168.7.33 3
+192.168.7.32 0
+192.168.7.31 0
+192.168.7.30 1
+192.168.7.29 2
+192.168.7.28 3
+192.168.7.27 4
+192.168.7.26 2
+192.168.7.25 0
+192.168.7.24 1
+192.168.7.23 3
+192.168.7.22 4
+192.168.7.21 0
+192.168.7.20 1
+192.168.7.19 2
+192.168.7.18 4
+192.168.7.17 3
+192.168.7.16 4
+192.168.7.15 0
+192.168.7.14 1
+192.168.7.13 2
+192.168.7.12 3
+192.168.7.11 4
+192.168.7.10 3
+192.168.7.9 2
+192.168.7.8 0
+192.168.7.7 2
+192.168.7.6 4
+192.168.7.5 0
+192.168.7.4 1
+192.168.7.3 3
+192.168.7.2 4
+192.168.7.1 1
+192.168.6.90 0
+192.168.6.89 1
+192.168.6.88 2
+192.168.6.87 3
+192.168.6.86 4
+192.168.6.85 0
+192.168.6.84 1
+192.168.6.83 2
+192.168.6.82 4
+192.168.6.81 3
+192.168.6.80 0
+192.168.6.79 0
+192.168.6.78 1
+192.168.6.77 2
+192.168.6.76 3
+192.168.6.75 4
+192.168.6.74 2
+192.168.6.73 3
+192.168.6.72 1
+192.168.6.71 3
+192.168.6.70 4
+192.168.6.69 0
+192.168.6.68 1
+192.168.6.67 2
+192.168.6.66 4
+192.168.6.65 0
+192.168.6.64 1
+192.168.6.63 0
+192.168.6.62 1
+192.168.6.61 2
+192.168.6.60 3
+192.168.6.59 4
+192.168.6.58 2
+192.168.6.57 3
+192.168.6.56 0
+192.168.6.55 3
+192.168.6.54 4
+192.168.6.53 1
+192.168.6.52 2
+192.168.6.51 0
+192.168.6.50 4
+192.168.6.49 1
+192.168.6.48 2
+192.168.6.47 0
+192.168.6.46 1
+192.168.6.45 2
+192.168.6.44 3
+192.168.6.43 4
+192.168.6.42 2
+192.168.6.41 4
+192.168.6.40 3
+192.168.6.39 0
+192.168.6.38 1
+192.168.6.37 2
+192.168.6.36 3
+192.168.6.35 4
+192.168.6.34 0
+192.168.6.33 1
+192.168.6.32 4
+192.168.6.31 0
+192.168.6.30 1
+192.168.6.29 2
+192.168.6.28 3
+192.168.6.27 4
+192.168.6.26 2
+192.168.6.25 3
+192.168.6.24 0
+192.168.6.23 3
+192.168.6.22 4
+192.168.6.21 0
+192.168.6.20 1
+192.168.6.19 2
+192.168.6.18 4
+192.168.6.17 1
+192.168.6.16 3
+192.168.6.15 0
+192.168.6.14 1
+192.168.6.13 2
+192.168.6.12 3
+192.168.6.11 4
+192.168.6.10 2
+192.168.6.9 3
+192.168.6.8 4
+192.168.6.7 0
+192.168.6.6 1
+192.168.6.5 2
+192.168.6.4 3
+192.168.6.3 4
+192.168.6.2 0
+192.168.6.1 1
+192.168.5.90 0
+192.168.5.89 1
+192.168.5.88 2
+192.168.5.87 3
+192.168.5.86 4
+192.168.5.85 0
+192.168.5.84 1
+192.168.5.83 2
+192.168.5.82 4
+192.168.5.81 3
+192.168.5.80 0
+192.168.5.79 0
+192.168.5.78 1
+192.168.5.77 2
+192.168.5.76 3
+192.168.5.75 4
+192.168.5.74 2
+192.168.5.73 3
+192.168.5.72 1
+192.168.5.71 3
+192.168.5.70 4
+192.168.5.69 2
+192.168.5.68 0
+192.168.5.67 1
+192.168.5.66 4
+192.168.5.65 2
+192.168.5.64 0
+192.168.5.63 0
+192.168.5.62 1
+192.168.5.61 2
+192.168.5.60 3
+192.168.5.59 4
+192.168.5.58 1
+192.168.5.57 3
+192.168.5.56 2
+192.168.5.55 0
+192.168.5.54 1
+192.168.5.53 2
+192.168.5.52 3
+192.168.5.51 4
+192.168.5.50 0
+192.168.5.49 4
+192.168.5.48 1
+192.168.5.47 0
+192.168.5.46 1
+192.168.5.45 2
+192.168.5.44 3
+192.168.5.43 4
+192.168.5.42 1
+192.168.5.41 3
+192.168.5.40 2
+192.168.5.39 2
+192.168.5.38 3
+192.168.5.37 4
+192.168.5.36 0
+192.168.5.35 1
+192.168.5.34 4
+192.168.5.33 0
+192.168.5.32 4
+192.168.5.31 0
+192.168.5.30 1
+192.168.5.29 2
+192.168.5.28 3
+192.168.5.27 4
+192.168.5.26 1
+192.168.5.25 3
+192.168.5.24 2
+192.168.5.23 3
+192.168.5.22 4
+192.168.5.21 2
+192.168.5.20 0
+192.168.5.19 1
+192.168.5.18 4
+192.168.5.17 0
+192.168.5.16 3
+192.168.5.15 0
+192.168.5.14 1
+192.168.5.13 2
+192.168.5.12 3
+192.168.5.11 4
+192.168.5.10 1
+192.168.5.9 4
+192.168.5.8 3
+192.168.5.7 0
+192.168.5.6 1
+192.168.5.5 2
+192.168.5.4 3
+192.168.5.3 4
+192.168.5.2 2
+192.168.5.1 0
+192.168.4.90 0
+192.168.4.89 1
+192.168.4.88 2
+192.168.4.87 3
+192.168.4.86 4
+192.168.4.85 0
+192.168.4.84 1
+192.168.4.83 2
+192.168.4.82 3
+192.168.4.81 4
+192.168.4.80 0
+192.168.4.79 0
+192.168.4.78 1
+192.168.4.77 2
+192.168.4.76 3
+192.168.4.75 4
+192.168.4.74 1
+192.168.4.73 2
+192.168.4.72 3
+192.168.4.71 3
+192.168.4.70 4
+192.168.4.69 0
+192.168.4.68 1
+192.168.4.67 2
+192.168.4.66 4
+192.168.4.65 1
+192.168.4.64 3
+192.168.4.63 0
+192.168.4.62 1
+192.168.4.61 2
+192.168.4.60 3
+192.168.4.59 4
+192.168.4.58 0
+192.168.4.57 2
+192.168.4.56 1
+192.168.4.55 0
+192.168.4.54 1
+192.168.4.53 2
+192.168.4.52 3
+192.168.4.51 4
+192.168.4.50 3
+192.168.4.49 4
+192.168.4.48 0
+192.168.4.47 0
+192.168.4.46 1
+192.168.4.45 2
+192.168.4.44 3
+192.168.4.43 4
+192.168.4.42 2
+192.168.4.41 0
+192.168.4.40 1
+192.168.4.39 4
+192.168.4.38 0
+192.168.4.37 1
+192.168.4.36 2
+192.168.4.35 3
+192.168.4.34 4
+192.168.4.33 3
+192.168.4.32 2
+192.168.4.31 0
+192.168.4.30 1
+192.168.4.29 2
+192.168.4.28 3
+192.168.4.27 4
+192.168.4.26 0
+192.168.4.25 2
+192.168.4.24 1
+192.168.4.23 3
+192.168.4.22 4
+192.168.4.21 0
+192.168.4.20 1
+192.168.4.19 2
+192.168.4.18 4
+192.168.4.17 3
+192.168.4.16 1
+192.168.4.15 0
+192.168.4.14 1
+192.168.4.13 2
+192.168.4.12 3
+192.168.4.11 4
+192.168.4.10 3
+192.168.4.9 0
+192.168.4.8 2
+192.168.4.7 2
+192.168.4.6 3
+192.168.4.5 4
+192.168.4.4 0
+192.168.4.3 1
+192.168.4.2 4
+192.168.4.1 4
+192.168.3.90 0
+192.168.3.89 1
+192.168.3.88 2
+192.168.3.87 3
+192.168.3.86 4
+192.168.3.85 0
+192.168.3.84 1
+192.168.3.83 2
+192.168.3.82 3
+192.168.3.81 4
+192.168.3.80 0
+192.168.3.79 0
+192.168.3.78 1
+192.168.3.77 2
+192.168.3.76 3
+192.168.3.75 4
+192.168.3.74 1
+192.168.3.73 2
+192.168.3.72 3
+192.168.3.71 3
+192.168.3.70 4
+192.168.3.69 0
+192.168.3.68 1
+192.168.3.67 2
+192.168.3.66 4
+192.168.3.65 0
+192.168.3.64 3
+192.168.3.63 0
+192.168.3.62 1
+192.168.3.61 2
+192.168.3.60 3
+192.168.3.59 4
+192.168.3.58 2
+192.168.3.57 1
+192.168.3.56 3
+192.168.3.55 0
+192.168.3.54 1
+192.168.3.53 2
+192.168.3.52 3
+192.168.3.51 4
+192.168.3.50 0
+192.168.3.49 4
+192.168.3.48 2
+192.168.3.47 0
+192.168.3.46 1
+192.168.3.45 2
+192.168.3.44 3
+192.168.3.43 4
+192.168.3.42 2
+192.168.3.41 1
+192.168.3.40 0
+192.168.3.39 1
+192.168.3.38 2
+192.168.3.37 3
+192.168.3.36 4
+192.168.3.35 0
+192.168.3.34 4
+192.168.3.33 3
+192.168.3.32 4
+192.168.3.31 0
+192.168.3.30 1
+192.168.3.29 2
+192.168.3.28 3
+192.168.3.27 4
+192.168.3.26 2
+192.168.3.25 1
+192.168.3.24 0
+192.168.3.23 3
+192.168.3.22 4
+192.168.3.21 0
+192.168.3.20 1
+192.168.3.19 2
+192.168.3.18 4
+192.168.3.17 3
+192.168.3.16 1
+192.168.3.15 0
+192.168.3.14 1
+192.168.3.13 2
+192.168.3.12 3
+192.168.3.11 4
+192.168.3.10 2
+192.168.3.9 1
+192.168.3.8 0
+192.168.3.7 4
+192.168.3.6 0
+192.168.3.5 1
+192.168.3.4 2
+192.168.3.3 3
+192.168.3.2 4
+192.168.3.1 3
+192.168.2.90 0
+192.168.2.89 1
+192.168.2.88 2
+192.168.2.87 3
+192.168.2.86 4
+192.168.2.85 0
+192.168.2.84 1
+192.168.2.83 2
+192.168.2.82 3
+192.168.2.81 4
+192.168.2.80 1
+192.168.2.79 0
+192.168.2.78 1
+192.168.2.77 2
+192.168.2.76 3
+192.168.2.75 4
+192.168.2.74 2
+192.168.2.73 3
+192.168.2.72 0
+192.168.2.71 3
+192.168.2.70 4
+192.168.2.69 0
+192.168.2.68 1
+192.168.2.67 2
+192.168.2.66 4
+192.168.2.65 1
+192.168.2.64 3
+192.168.2.63 0
+192.168.2.62 1
+192.168.2.61 2
+192.168.2.60 3
+192.168.2.59 4
+192.168.2.58 0
+192.168.2.57 2
+192.168.2.56 1
+192.168.2.55 0
+192.168.2.54 1
+192.168.2.53 2
+192.168.2.52 3
+192.168.2.51 4
+192.168.2.50 3
+192.168.2.49 4
+192.168.2.48 0
+192.168.2.47 0
+192.168.2.46 1
+192.168.2.45 2
+192.168.2.44 3
+192.168.2.43 4
+192.168.2.42 2
+192.168.2.41 0
+192.168.2.40 1
+192.168.2.39 0
+192.168.2.38 1
+192.168.2.37 2
+192.168.2.36 3
+192.168.2.35 4
+192.168.2.34 3
+192.168.2.33 4
+192.168.2.32 2
+192.168.2.31 0
+192.168.2.30 1
+192.168.2.29 2
+192.168.2.28 3
+192.168.2.27 4
+192.168.2.26 2
+192.168.2.25 0
+192.168.2.24 1
+192.168.2.23 3
+192.168.2.22 4
+192.168.2.21 0
+192.168.2.20 1
+192.168.2.19 2
+192.168.2.18 4
+192.168.2.17 3
+192.168.2.16 4
+192.168.2.15 0
+192.168.2.14 1
+192.168.2.13 2
+192.168.2.12 3
+192.168.2.11 4
+192.168.2.10 0
+192.168.2.9 2
+192.168.2.8 3
+192.168.2.7 2
+192.168.2.6 4
+192.168.2.5 0
+192.168.2.4 1
+192.168.2.3 3
+192.168.2.2 4
+192.168.2.1 1
+192.168.1.90 0
+192.168.1.89 1
+192.168.1.88 2
+192.168.1.87 3
+192.168.1.86 4
+192.168.1.85 0
+192.168.1.84 1
+192.168.1.83 2
+192.168.1.82 3
+192.168.1.81 4
+192.168.1.80 0
+192.168.1.79 0
+192.168.1.78 1
+192.168.1.77 2
+192.168.1.76 3
+192.168.1.75 4
+192.168.1.74 1
+192.168.1.73 2
+192.168.1.72 3
+192.168.1.71 3
+192.168.1.70 4
+192.168.1.69 0
+192.168.1.68 1
+192.168.1.67 2
+192.168.1.66 4
+192.168.1.65 0
+192.168.1.64 1
+192.168.1.63 0
+192.168.1.62 1
+192.168.1.61 2
+192.168.1.60 3
+192.168.1.59 4
+192.168.1.58 2
+192.168.1.57 3
+192.168.1.56 1
+192.168.1.55 0
+192.168.1.54 1
+192.168.1.53 2
+192.168.1.52 3
+192.168.1.51 4
+192.168.1.50 0
+192.168.1.49 4
+192.168.1.48 2
+192.168.1.47 0
+192.168.1.46 1
+192.168.1.45 2
+192.168.1.44 3
+192.168.1.43 4
+192.168.1.42 2
+192.168.1.41 3
+192.168.1.40 0
+192.168.1.39 3
+192.168.1.38 4
+192.168.1.37 0
+192.168.1.36 1
+192.168.1.35 2
+192.168.1.34 4
+192.168.1.33 1
+192.168.1.32 3
+192.168.1.31 0
+192.168.1.30 1
+192.168.1.29 2
+192.168.1.28 3
+192.168.1.27 4
+192.168.1.26 2
+192.168.1.25 3
+192.168.1.24 0
+192.168.1.23 3
+192.168.1.22 4
+192.168.1.21 0
+192.168.1.20 1
+192.168.1.19 2
+192.168.1.18 4
+192.168.1.17 1
+192.168.1.16 4
+192.168.1.15 0
+192.168.1.14 1
+192.168.1.13 2
+192.168.1.12 3
+192.168.1.11 4
+192.168.1.10 2
+192.168.1.9 3
+192.168.1.8 0
+192.168.1.7 3
+192.168.1.6 4
+192.168.1.5 0
+192.168.1.4 1
+192.168.1.3 2
+192.168.1.2 4
+192.168.1.1 1
+EOF
+
+simple_test 0,0,0,0,0 <<EOF
+192.168.1.1 -1
+192.168.1.2 -1
+192.168.1.3 -1
+192.168.1.4 -1
+192.168.1.5 -1
+192.168.1.6 -1
+192.168.1.7 -1
+192.168.1.8 -1
+192.168.1.9 -1
+192.168.1.10 -1
+192.168.1.11 -1
+192.168.1.12 -1
+192.168.1.13 -1
+192.168.1.14 -1
+192.168.1.15 -1
+192.168.1.16 -1
+192.168.1.17 -1
+192.168.1.18 -1
+192.168.1.19 -1
+192.168.1.20 -1
+192.168.1.21 -1
+192.168.1.22 -1
+192.168.1.23 -1
+192.168.1.24 -1
+192.168.1.25 -1
+192.168.1.26 -1
+192.168.1.27 -1
+192.168.1.28 -1
+192.168.1.29 -1
+192.168.1.30 -1
+192.168.1.31 -1
+192.168.1.32 -1
+192.168.1.33 -1
+192.168.1.34 -1
+192.168.1.35 -1
+192.168.1.36 -1
+192.168.1.37 -1
+192.168.1.38 -1
+192.168.1.39 -1
+192.168.1.40 -1
+192.168.1.41 -1
+192.168.1.42 -1
+192.168.1.43 -1
+192.168.1.44 -1
+192.168.1.45 -1
+192.168.1.46 -1
+192.168.1.47 -1
+192.168.1.48 -1
+192.168.1.49 -1
+192.168.1.50 -1
+192.168.1.51 -1
+192.168.1.52 -1
+192.168.1.53 -1
+192.168.1.54 -1
+192.168.1.55 -1
+192.168.1.56 -1
+192.168.1.57 -1
+192.168.1.58 -1
+192.168.1.59 -1
+192.168.1.60 -1
+192.168.1.61 -1
+192.168.1.62 -1
+192.168.1.63 -1
+192.168.1.64 -1
+192.168.1.65 -1
+192.168.1.66 -1
+192.168.1.67 -1
+192.168.1.68 -1
+192.168.1.69 -1
+192.168.1.70 -1
+192.168.1.71 -1
+192.168.1.72 -1
+192.168.1.73 -1
+192.168.1.74 -1
+192.168.1.75 -1
+192.168.1.76 -1
+192.168.1.77 -1
+192.168.1.78 -1
+192.168.1.79 -1
+192.168.1.80 -1
+192.168.1.81 -1
+192.168.1.82 -1
+192.168.1.83 -1
+192.168.1.84 -1
+192.168.1.85 -1
+192.168.1.86 -1
+192.168.1.87 -1
+192.168.1.88 -1
+192.168.1.89 -1
+192.168.1.90 -1
+192.168.2.1 -1
+192.168.2.2 -1
+192.168.2.3 -1
+192.168.2.4 -1
+192.168.2.5 -1
+192.168.2.6 -1
+192.168.2.7 -1
+192.168.2.8 -1
+192.168.2.9 -1
+192.168.2.10 -1
+192.168.2.11 -1
+192.168.2.12 -1
+192.168.2.13 -1
+192.168.2.14 -1
+192.168.2.15 -1
+192.168.2.16 -1
+192.168.2.17 -1
+192.168.2.18 -1
+192.168.2.19 -1
+192.168.2.20 -1
+192.168.2.21 -1
+192.168.2.22 -1
+192.168.2.23 -1
+192.168.2.24 -1
+192.168.2.25 -1
+192.168.2.26 -1
+192.168.2.27 -1
+192.168.2.28 -1
+192.168.2.29 -1
+192.168.2.30 -1
+192.168.2.31 -1
+192.168.2.32 -1
+192.168.2.33 -1
+192.168.2.34 -1
+192.168.2.35 -1
+192.168.2.36 -1
+192.168.2.37 -1
+192.168.2.38 -1
+192.168.2.39 -1
+192.168.2.40 -1
+192.168.2.41 -1
+192.168.2.42 -1
+192.168.2.43 -1
+192.168.2.44 -1
+192.168.2.45 -1
+192.168.2.46 -1
+192.168.2.47 -1
+192.168.2.48 -1
+192.168.2.49 -1
+192.168.2.50 -1
+192.168.2.51 -1
+192.168.2.52 -1
+192.168.2.53 -1
+192.168.2.54 -1
+192.168.2.55 -1
+192.168.2.56 -1
+192.168.2.57 -1
+192.168.2.58 -1
+192.168.2.59 -1
+192.168.2.60 -1
+192.168.2.61 -1
+192.168.2.62 -1
+192.168.2.63 -1
+192.168.2.64 -1
+192.168.2.65 -1
+192.168.2.66 -1
+192.168.2.67 -1
+192.168.2.68 -1
+192.168.2.69 -1
+192.168.2.70 -1
+192.168.2.71 -1
+192.168.2.72 -1
+192.168.2.73 -1
+192.168.2.74 -1
+192.168.2.75 -1
+192.168.2.76 -1
+192.168.2.77 -1
+192.168.2.78 -1
+192.168.2.79 -1
+192.168.2.80 -1
+192.168.2.81 -1
+192.168.2.82 -1
+192.168.2.83 -1
+192.168.2.84 -1
+192.168.2.85 -1
+192.168.2.86 -1
+192.168.2.87 -1
+192.168.2.88 -1
+192.168.2.89 -1
+192.168.2.90 -1
+192.168.3.1 -1
+192.168.3.2 -1
+192.168.3.3 -1
+192.168.3.4 -1
+192.168.3.5 -1
+192.168.3.6 -1
+192.168.3.7 -1
+192.168.3.8 -1
+192.168.3.9 -1
+192.168.3.10 -1
+192.168.3.11 -1
+192.168.3.12 -1
+192.168.3.13 -1
+192.168.3.14 -1
+192.168.3.15 -1
+192.168.3.16 -1
+192.168.3.17 -1
+192.168.3.18 -1
+192.168.3.19 -1
+192.168.3.20 -1
+192.168.3.21 -1
+192.168.3.22 -1
+192.168.3.23 -1
+192.168.3.24 -1
+192.168.3.25 -1
+192.168.3.26 -1
+192.168.3.27 -1
+192.168.3.28 -1
+192.168.3.29 -1
+192.168.3.30 -1
+192.168.3.31 -1
+192.168.3.32 -1
+192.168.3.33 -1
+192.168.3.34 -1
+192.168.3.35 -1
+192.168.3.36 -1
+192.168.3.37 -1
+192.168.3.38 -1
+192.168.3.39 -1
+192.168.3.40 -1
+192.168.3.41 -1
+192.168.3.42 -1
+192.168.3.43 -1
+192.168.3.44 -1
+192.168.3.45 -1
+192.168.3.46 -1
+192.168.3.47 -1
+192.168.3.48 -1
+192.168.3.49 -1
+192.168.3.50 -1
+192.168.3.51 -1
+192.168.3.52 -1
+192.168.3.53 -1
+192.168.3.54 -1
+192.168.3.55 -1
+192.168.3.56 -1
+192.168.3.57 -1
+192.168.3.58 -1
+192.168.3.59 -1
+192.168.3.60 -1
+192.168.3.61 -1
+192.168.3.62 -1
+192.168.3.63 -1
+192.168.3.64 -1
+192.168.3.65 -1
+192.168.3.66 -1
+192.168.3.67 -1
+192.168.3.68 -1
+192.168.3.69 -1
+192.168.3.70 -1
+192.168.3.71 -1
+192.168.3.72 -1
+192.168.3.73 -1
+192.168.3.74 -1
+192.168.3.75 -1
+192.168.3.76 -1
+192.168.3.77 -1
+192.168.3.78 -1
+192.168.3.79 -1
+192.168.3.80 -1
+192.168.3.81 -1
+192.168.3.82 -1
+192.168.3.83 -1
+192.168.3.84 -1
+192.168.3.85 -1
+192.168.3.86 -1
+192.168.3.87 -1
+192.168.3.88 -1
+192.168.3.89 -1
+192.168.3.90 -1
+192.168.4.1 -1
+192.168.4.2 -1
+192.168.4.3 -1
+192.168.4.4 -1
+192.168.4.5 -1
+192.168.4.6 -1
+192.168.4.7 -1
+192.168.4.8 -1
+192.168.4.9 -1
+192.168.4.10 -1
+192.168.4.11 -1
+192.168.4.12 -1
+192.168.4.13 -1
+192.168.4.14 -1
+192.168.4.15 -1
+192.168.4.16 -1
+192.168.4.17 -1
+192.168.4.18 -1
+192.168.4.19 -1
+192.168.4.20 -1
+192.168.4.21 -1
+192.168.4.22 -1
+192.168.4.23 -1
+192.168.4.24 -1
+192.168.4.25 -1
+192.168.4.26 -1
+192.168.4.27 -1
+192.168.4.28 -1
+192.168.4.29 -1
+192.168.4.30 -1
+192.168.4.31 -1
+192.168.4.32 -1
+192.168.4.33 -1
+192.168.4.34 -1
+192.168.4.35 -1
+192.168.4.36 -1
+192.168.4.37 -1
+192.168.4.38 -1
+192.168.4.39 -1
+192.168.4.40 -1
+192.168.4.41 -1
+192.168.4.42 -1
+192.168.4.43 -1
+192.168.4.44 -1
+192.168.4.45 -1
+192.168.4.46 -1
+192.168.4.47 -1
+192.168.4.48 -1
+192.168.4.49 -1
+192.168.4.50 -1
+192.168.4.51 -1
+192.168.4.52 -1
+192.168.4.53 -1
+192.168.4.54 -1
+192.168.4.55 -1
+192.168.4.56 -1
+192.168.4.57 -1
+192.168.4.58 -1
+192.168.4.59 -1
+192.168.4.60 -1
+192.168.4.61 -1
+192.168.4.62 -1
+192.168.4.63 -1
+192.168.4.64 -1
+192.168.4.65 -1
+192.168.4.66 -1
+192.168.4.67 -1
+192.168.4.68 -1
+192.168.4.69 -1
+192.168.4.70 -1
+192.168.4.71 -1
+192.168.4.72 -1
+192.168.4.73 -1
+192.168.4.74 -1
+192.168.4.75 -1
+192.168.4.76 -1
+192.168.4.77 -1
+192.168.4.78 -1
+192.168.4.79 -1
+192.168.4.80 -1
+192.168.4.81 -1
+192.168.4.82 -1
+192.168.4.83 -1
+192.168.4.84 -1
+192.168.4.85 -1
+192.168.4.86 -1
+192.168.4.87 -1
+192.168.4.88 -1
+192.168.4.89 -1
+192.168.4.90 -1
+192.168.5.1 -1
+192.168.5.2 -1
+192.168.5.3 -1
+192.168.5.4 -1
+192.168.5.5 -1
+192.168.5.6 -1
+192.168.5.7 -1
+192.168.5.8 -1
+192.168.5.9 -1
+192.168.5.10 -1
+192.168.5.11 -1
+192.168.5.12 -1
+192.168.5.13 -1
+192.168.5.14 -1
+192.168.5.15 -1
+192.168.5.16 -1
+192.168.5.17 -1
+192.168.5.18 -1
+192.168.5.19 -1
+192.168.5.20 -1
+192.168.5.21 -1
+192.168.5.22 -1
+192.168.5.23 -1
+192.168.5.24 -1
+192.168.5.25 -1
+192.168.5.26 -1
+192.168.5.27 -1
+192.168.5.28 -1
+192.168.5.29 -1
+192.168.5.30 -1
+192.168.5.31 -1
+192.168.5.32 -1
+192.168.5.33 -1
+192.168.5.34 -1
+192.168.5.35 -1
+192.168.5.36 -1
+192.168.5.37 -1
+192.168.5.38 -1
+192.168.5.39 -1
+192.168.5.40 -1
+192.168.5.41 -1
+192.168.5.42 -1
+192.168.5.43 -1
+192.168.5.44 -1
+192.168.5.45 -1
+192.168.5.46 -1
+192.168.5.47 -1
+192.168.5.48 -1
+192.168.5.49 -1
+192.168.5.50 -1
+192.168.5.51 -1
+192.168.5.52 -1
+192.168.5.53 -1
+192.168.5.54 -1
+192.168.5.55 -1
+192.168.5.56 -1
+192.168.5.57 -1
+192.168.5.58 -1
+192.168.5.59 -1
+192.168.5.60 -1
+192.168.5.61 -1
+192.168.5.62 -1
+192.168.5.63 -1
+192.168.5.64 -1
+192.168.5.65 -1
+192.168.5.66 -1
+192.168.5.67 -1
+192.168.5.68 -1
+192.168.5.69 -1
+192.168.5.70 -1
+192.168.5.71 -1
+192.168.5.72 -1
+192.168.5.73 -1
+192.168.5.74 -1
+192.168.5.75 -1
+192.168.5.76 -1
+192.168.5.77 -1
+192.168.5.78 -1
+192.168.5.79 -1
+192.168.5.80 -1
+192.168.5.81 -1
+192.168.5.82 -1
+192.168.5.83 -1
+192.168.5.84 -1
+192.168.5.85 -1
+192.168.5.86 -1
+192.168.5.87 -1
+192.168.5.88 -1
+192.168.5.89 -1
+192.168.5.90 -1
+192.168.6.1 -1
+192.168.6.2 -1
+192.168.6.3 -1
+192.168.6.4 -1
+192.168.6.5 -1
+192.168.6.6 -1
+192.168.6.7 -1
+192.168.6.8 -1
+192.168.6.9 -1
+192.168.6.10 -1
+192.168.6.11 -1
+192.168.6.12 -1
+192.168.6.13 -1
+192.168.6.14 -1
+192.168.6.15 -1
+192.168.6.16 -1
+192.168.6.17 -1
+192.168.6.18 -1
+192.168.6.19 -1
+192.168.6.20 -1
+192.168.6.21 -1
+192.168.6.22 -1
+192.168.6.23 -1
+192.168.6.24 -1
+192.168.6.25 -1
+192.168.6.26 -1
+192.168.6.27 -1
+192.168.6.28 -1
+192.168.6.29 -1
+192.168.6.30 -1
+192.168.6.31 -1
+192.168.6.32 -1
+192.168.6.33 -1
+192.168.6.34 -1
+192.168.6.35 -1
+192.168.6.36 -1
+192.168.6.37 -1
+192.168.6.38 -1
+192.168.6.39 -1
+192.168.6.40 -1
+192.168.6.41 -1
+192.168.6.42 -1
+192.168.6.43 -1
+192.168.6.44 -1
+192.168.6.45 -1
+192.168.6.46 -1
+192.168.6.47 -1
+192.168.6.48 -1
+192.168.6.49 -1
+192.168.6.50 -1
+192.168.6.51 -1
+192.168.6.52 -1
+192.168.6.53 -1
+192.168.6.54 -1
+192.168.6.55 -1
+192.168.6.56 -1
+192.168.6.57 -1
+192.168.6.58 -1
+192.168.6.59 -1
+192.168.6.60 -1
+192.168.6.61 -1
+192.168.6.62 -1
+192.168.6.63 -1
+192.168.6.64 -1
+192.168.6.65 -1
+192.168.6.66 -1
+192.168.6.67 -1
+192.168.6.68 -1
+192.168.6.69 -1
+192.168.6.70 -1
+192.168.6.71 -1
+192.168.6.72 -1
+192.168.6.73 -1
+192.168.6.74 -1
+192.168.6.75 -1
+192.168.6.76 -1
+192.168.6.77 -1
+192.168.6.78 -1
+192.168.6.79 -1
+192.168.6.80 -1
+192.168.6.81 -1
+192.168.6.82 -1
+192.168.6.83 -1
+192.168.6.84 -1
+192.168.6.85 -1
+192.168.6.86 -1
+192.168.6.87 -1
+192.168.6.88 -1
+192.168.6.89 -1
+192.168.6.90 -1
+192.168.7.1 -1
+192.168.7.2 -1
+192.168.7.3 -1
+192.168.7.4 -1
+192.168.7.5 -1
+192.168.7.6 -1
+192.168.7.7 -1
+192.168.7.8 -1
+192.168.7.9 -1
+192.168.7.10 -1
+192.168.7.11 -1
+192.168.7.12 -1
+192.168.7.13 -1
+192.168.7.14 -1
+192.168.7.15 -1
+192.168.7.16 -1
+192.168.7.17 -1
+192.168.7.18 -1
+192.168.7.19 -1
+192.168.7.20 -1
+192.168.7.21 -1
+192.168.7.22 -1
+192.168.7.23 -1
+192.168.7.24 -1
+192.168.7.25 -1
+192.168.7.26 -1
+192.168.7.27 -1
+192.168.7.28 -1
+192.168.7.29 -1
+192.168.7.30 -1
+192.168.7.31 -1
+192.168.7.32 -1
+192.168.7.33 -1
+192.168.7.34 -1
+192.168.7.35 -1
+192.168.7.36 -1
+192.168.7.37 -1
+192.168.7.38 -1
+192.168.7.39 -1
+192.168.7.40 -1
+192.168.7.41 -1
+192.168.7.42 -1
+192.168.7.43 -1
+192.168.7.44 -1
+192.168.7.45 -1
+192.168.7.46 -1
+192.168.7.47 -1
+192.168.7.48 -1
+192.168.7.49 -1
+192.168.7.50 -1
+192.168.7.51 -1
+192.168.7.52 -1
+192.168.7.53 -1
+192.168.7.54 -1
+192.168.7.55 -1
+192.168.7.56 -1
+192.168.7.57 -1
+192.168.7.58 -1
+192.168.7.59 -1
+192.168.7.60 -1
+192.168.7.61 -1
+192.168.7.62 -1
+192.168.7.63 -1
+192.168.7.64 -1
+192.168.7.65 -1
+192.168.7.66 -1
+192.168.7.67 -1
+192.168.7.68 -1
+192.168.7.69 -1
+192.168.7.70 -1
+192.168.7.71 -1
+192.168.7.72 -1
+192.168.7.73 -1
+192.168.7.74 -1
+192.168.7.75 -1
+192.168.7.76 -1
+192.168.7.77 -1
+192.168.7.78 -1
+192.168.7.79 -1
+192.168.7.80 -1
+192.168.7.81 -1
+192.168.7.82 -1
+192.168.7.83 -1
+192.168.7.84 -1
+192.168.7.85 -1
+192.168.7.86 -1
+192.168.7.87 -1
+192.168.7.88 -1
+192.168.7.89 -1
+192.168.7.90 -1
+192.168.8.1 -1
+192.168.8.2 -1
+192.168.8.3 -1
+192.168.8.4 -1
+192.168.8.5 -1
+192.168.8.6 -1
+192.168.8.7 -1
+192.168.8.8 -1
+192.168.8.9 -1
+192.168.8.10 -1
+192.168.8.11 -1
+192.168.8.12 -1
+192.168.8.13 -1
+192.168.8.14 -1
+192.168.8.15 -1
+192.168.8.16 -1
+192.168.8.17 -1
+192.168.8.18 -1
+192.168.8.19 -1
+192.168.8.20 -1
+192.168.8.21 -1
+192.168.8.22 -1
+192.168.8.23 -1
+192.168.8.24 -1
+192.168.8.25 -1
+192.168.8.26 -1
+192.168.8.27 -1
+192.168.8.28 -1
+192.168.8.29 -1
+192.168.8.30 -1
+192.168.8.31 -1
+192.168.8.32 -1
+192.168.8.33 -1
+192.168.8.34 -1
+192.168.8.35 -1
+192.168.8.36 -1
+192.168.8.37 -1
+192.168.8.38 -1
+192.168.8.39 -1
+192.168.8.40 -1
+192.168.8.41 -1
+192.168.8.42 -1
+192.168.8.43 -1
+192.168.8.44 -1
+192.168.8.45 -1
+192.168.8.46 -1
+192.168.8.47 -1
+192.168.8.48 -1
+192.168.8.49 -1
+192.168.8.50 -1
+192.168.8.51 -1
+192.168.8.52 -1
+192.168.8.53 -1
+192.168.8.54 -1
+192.168.8.55 -1
+192.168.8.56 -1
+192.168.8.57 -1
+192.168.8.58 -1
+192.168.8.59 -1
+192.168.8.60 -1
+192.168.8.61 -1
+192.168.8.62 -1
+192.168.8.63 -1
+192.168.8.64 -1
+192.168.8.65 -1
+192.168.8.66 -1
+192.168.8.67 -1
+192.168.8.68 -1
+192.168.8.69 -1
+192.168.8.70 -1
+192.168.8.71 -1
+192.168.8.72 -1
+192.168.8.73 -1
+192.168.8.74 -1
+192.168.8.75 -1
+192.168.8.76 -1
+192.168.8.77 -1
+192.168.8.78 -1
+192.168.8.79 -1
+192.168.8.80 -1
+192.168.8.81 -1
+192.168.8.82 -1
+192.168.8.83 -1
+192.168.8.84 -1
+192.168.8.85 -1
+192.168.8.86 -1
+192.168.8.87 -1
+192.168.8.88 -1
+192.168.8.89 -1
+192.168.8.90 -1
+192.168.9.1 -1
+192.168.9.2 -1
+192.168.9.3 -1
+192.168.9.4 -1
+192.168.9.5 -1
+192.168.9.6 -1
+192.168.9.7 -1
+192.168.9.8 -1
+192.168.9.9 -1
+192.168.9.10 -1
+192.168.9.11 -1
+192.168.9.12 -1
+192.168.9.13 -1
+192.168.9.14 -1
+192.168.9.15 -1
+192.168.9.16 -1
+192.168.9.17 -1
+192.168.9.18 -1
+192.168.9.19 -1
+192.168.9.20 -1
+192.168.9.21 -1
+192.168.9.22 -1
+192.168.9.23 -1
+192.168.9.24 -1
+192.168.9.25 -1
+192.168.9.26 -1
+192.168.9.27 -1
+192.168.9.28 -1
+192.168.9.29 -1
+192.168.9.30 -1
+192.168.9.31 -1
+192.168.9.32 -1
+192.168.9.33 -1
+192.168.9.34 -1
+192.168.9.35 -1
+192.168.9.36 -1
+192.168.9.37 -1
+192.168.9.38 -1
+192.168.9.39 -1
+192.168.9.40 -1
+192.168.9.41 -1
+192.168.9.42 -1
+192.168.9.43 -1
+192.168.9.44 -1
+192.168.9.45 -1
+192.168.9.46 -1
+192.168.9.47 -1
+192.168.9.48 -1
+192.168.9.49 -1
+192.168.9.50 -1
+192.168.9.51 -1
+192.168.9.52 -1
+192.168.9.53 -1
+192.168.9.54 -1
+192.168.9.55 -1
+192.168.9.56 -1
+192.168.9.57 -1
+192.168.9.58 -1
+192.168.9.59 -1
+192.168.9.60 -1
+192.168.9.61 -1
+192.168.9.62 -1
+192.168.9.63 -1
+192.168.9.64 -1
+192.168.9.65 -1
+192.168.9.66 -1
+192.168.9.67 -1
+192.168.9.68 -1
+192.168.9.69 -1
+192.168.9.70 -1
+192.168.9.71 -1
+192.168.9.72 -1
+192.168.9.73 -1
+192.168.9.74 -1
+192.168.9.75 -1
+192.168.9.76 -1
+192.168.9.77 -1
+192.168.9.78 -1
+192.168.9.79 -1
+192.168.9.80 -1
+192.168.9.81 -1
+192.168.9.82 -1
+192.168.9.83 -1
+192.168.9.84 -1
+192.168.9.85 -1
+192.168.9.86 -1
+192.168.9.87 -1
+192.168.9.88 -1
+192.168.9.89 -1
+192.168.9.90 -1
+192.168.10.1 -1
+192.168.10.2 -1
+192.168.10.3 -1
+192.168.10.4 -1
+192.168.10.5 -1
+192.168.10.6 -1
+192.168.10.7 -1
+192.168.10.8 -1
+192.168.10.9 -1
+192.168.10.10 -1
+192.168.10.11 -1
+192.168.10.12 -1
+192.168.10.13 -1
+192.168.10.14 -1
+192.168.10.15 -1
+192.168.10.16 -1
+192.168.10.17 -1
+192.168.10.18 -1
+192.168.10.19 -1
+192.168.10.20 -1
+192.168.10.21 -1
+192.168.10.22 -1
+192.168.10.23 -1
+192.168.10.24 -1
+192.168.10.25 -1
+192.168.10.26 -1
+192.168.10.27 -1
+192.168.10.28 -1
+192.168.10.29 -1
+192.168.10.30 -1
+192.168.10.31 -1
+192.168.10.32 -1
+192.168.10.33 -1
+192.168.10.34 -1
+192.168.10.35 -1
+192.168.10.36 -1
+192.168.10.37 -1
+192.168.10.38 -1
+192.168.10.39 -1
+192.168.10.40 -1
+192.168.10.41 -1
+192.168.10.42 -1
+192.168.10.43 -1
+192.168.10.44 -1
+192.168.10.45 -1
+192.168.10.46 -1
+192.168.10.47 -1
+192.168.10.48 -1
+192.168.10.49 -1
+192.168.10.50 -1
+192.168.10.51 -1
+192.168.10.52 -1
+192.168.10.53 -1
+192.168.10.54 -1
+192.168.10.55 -1
+192.168.10.56 -1
+192.168.10.57 -1
+192.168.10.58 -1
+192.168.10.59 -1
+192.168.10.60 -1
+192.168.10.61 -1
+192.168.10.62 -1
+192.168.10.63 -1
+192.168.10.64 -1
+192.168.10.65 -1
+192.168.10.66 -1
+192.168.10.67 -1
+192.168.10.68 -1
+192.168.10.69 -1
+192.168.10.70 -1
+192.168.10.71 -1
+192.168.10.72 -1
+192.168.10.73 -1
+192.168.10.74 -1
+192.168.10.75 -1
+192.168.10.76 -1
+192.168.10.77 -1
+192.168.10.78 -1
+192.168.10.79 -1
+192.168.10.80 -1
+192.168.10.81 -1
+192.168.10.82 -1
+192.168.10.83 -1
+192.168.10.84 -1
+192.168.10.85 -1
+192.168.10.86 -1
+192.168.10.87 -1
+192.168.10.88 -1
+192.168.10.89 -1
+192.168.10.90 -1
+EOF
diff --git a/ctdb/tests/UNIT/takeover/lcp2.031.sh b/ctdb/tests/UNIT/takeover/lcp2.031.sh
new file mode 100755
index 0000000..3a2cb79
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/lcp2.031.sh
@@ -0,0 +1,143 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "12+4 IPs, 4 nodes, 3 -> 4 healthy"
+
+export CTDB_TEST_LOGLEVEL=DEBUG
+
+required_result <<EOF
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP} CONSIDERING MOVES (UNASSIGNED)
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP}+++++++++++++++++++++++++++++++++++++++++
+${TEST_DATE_STAMP}Selecting most imbalanced node from:
+${TEST_DATE_STAMP} 0 [0]
+${TEST_DATE_STAMP} 1 [181370]
+${TEST_DATE_STAMP} 2 [128630]
+${TEST_DATE_STAMP} 3 [128881]
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP} CONSIDERING MOVES FROM 1 [181370]
+${TEST_DATE_STAMP} 1 [-64566] -> 130.216.30.178 -> 0 [+0]
+${TEST_DATE_STAMP} 1 [-64566] -> 130.216.30.176 -> 0 [+0]
+${TEST_DATE_STAMP} 1 [-64315] -> 130.216.30.175 -> 0 [+0]
+${TEST_DATE_STAMP} 1 [-64315] -> 130.216.30.171 -> 0 [+0]
+${TEST_DATE_STAMP} 1 [-52489] -> 10.19.99.253 -> 0 [+0]
+${TEST_DATE_STAMP} 1 [-52489] -> 10.19.99.250 -> 0 [+0]
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP}1 [-64566] -> 130.216.30.178 -> 0 [+0]
+${TEST_DATE_STAMP}+++++++++++++++++++++++++++++++++++++++++
+${TEST_DATE_STAMP}Selecting most imbalanced node from:
+${TEST_DATE_STAMP} 0 [0]
+${TEST_DATE_STAMP} 1 [116804]
+${TEST_DATE_STAMP} 2 [128630]
+${TEST_DATE_STAMP} 3 [128881]
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP} CONSIDERING MOVES FROM 3 [128881]
+${TEST_DATE_STAMP} 3 [-55099] -> 130.216.30.180 -> 0 [+15625]
+${TEST_DATE_STAMP} 3 [-55099] -> 130.216.30.177 -> 0 [+15876]
+${TEST_DATE_STAMP} 3 [-55350] -> 130.216.30.174 -> 0 [+15129]
+${TEST_DATE_STAMP} 3 [-55350] -> 130.216.30.173 -> 0 [+15129]
+${TEST_DATE_STAMP} 3 [-36864] -> 10.19.99.252 -> 0 [+9216]
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP}3 [-55350] -> 130.216.30.174 -> 0 [+15129]
+${TEST_DATE_STAMP}+++++++++++++++++++++++++++++++++++++++++
+${TEST_DATE_STAMP}Selecting most imbalanced node from:
+${TEST_DATE_STAMP} 0 [15129]
+${TEST_DATE_STAMP} 1 [116804]
+${TEST_DATE_STAMP} 2 [128630]
+${TEST_DATE_STAMP} 3 [73531]
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP} CONSIDERING MOVES FROM 2 [128630]
+${TEST_DATE_STAMP} 2 [-55099] -> 130.216.30.181 -> 0 [+30754]
+${TEST_DATE_STAMP} 2 [-55099] -> 130.216.30.179 -> 0 [+31258]
+${TEST_DATE_STAMP} 2 [-55099] -> 130.216.30.172 -> 0 [+31005]
+${TEST_DATE_STAMP} 2 [-55099] -> 130.216.30.170 -> 0 [+30754]
+${TEST_DATE_STAMP} 2 [-36864] -> 10.19.99.251 -> 0 [+18432]
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP}2 [-55099] -> 130.216.30.181 -> 0 [+30754]
+${TEST_DATE_STAMP}+++++++++++++++++++++++++++++++++++++++++
+${TEST_DATE_STAMP}Selecting most imbalanced node from:
+${TEST_DATE_STAMP} 0 [45883]
+${TEST_DATE_STAMP} 1 [116804]
+${TEST_DATE_STAMP} 2 [73531]
+${TEST_DATE_STAMP} 3 [73531]
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP} CONSIDERING MOVES FROM 1 [116804]
+${TEST_DATE_STAMP} 1 [-48690] -> 130.216.30.176 -> 0 [+46630]
+${TEST_DATE_STAMP} 1 [-49186] -> 130.216.30.175 -> 0 [+46387]
+${TEST_DATE_STAMP} 1 [-49186] -> 130.216.30.171 -> 0 [+45883]
+${TEST_DATE_STAMP} 1 [-43273] -> 10.19.99.253 -> 0 [+27648]
+${TEST_DATE_STAMP} 1 [-43273] -> 10.19.99.250 -> 0 [+27648]
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP}1 [-43273] -> 10.19.99.253 -> 0 [+27648]
+${TEST_DATE_STAMP}+++++++++++++++++++++++++++++++++++++++++
+${TEST_DATE_STAMP}Selecting most imbalanced node from:
+${TEST_DATE_STAMP} 0 [73531]
+${TEST_DATE_STAMP} 1 [73531]
+${TEST_DATE_STAMP} 2 [73531]
+${TEST_DATE_STAMP} 3 [73531]
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP} CONSIDERING MOVES FROM 0 [73531]
+${TEST_DATE_STAMP} 0 [-39970] -> 130.216.30.181 -> 0 [+39970]
+${TEST_DATE_STAMP} 0 [-39970] -> 130.216.30.178 -> 0 [+39970]
+${TEST_DATE_STAMP} 0 [-39474] -> 130.216.30.174 -> 0 [+39474]
+${TEST_DATE_STAMP} 0 [-27648] -> 10.19.99.253 -> 0 [+27648]
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP} CONSIDERING MOVES FROM 1 [73531]
+${TEST_DATE_STAMP} 1 [-39474] -> 130.216.30.176 -> 0 [+55846]
+${TEST_DATE_STAMP} 1 [-39970] -> 130.216.30.175 -> 0 [+55603]
+${TEST_DATE_STAMP} 1 [-39970] -> 130.216.30.171 -> 0 [+55099]
+${TEST_DATE_STAMP} 1 [-27648] -> 10.19.99.250 -> 0 [+43273]
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP} CONSIDERING MOVES FROM 2 [73531]
+${TEST_DATE_STAMP} 2 [-39474] -> 130.216.30.179 -> 0 [+56099]
+${TEST_DATE_STAMP} 2 [-39970] -> 130.216.30.172 -> 0 [+55350]
+${TEST_DATE_STAMP} 2 [-39970] -> 130.216.30.170 -> 0 [+55099]
+${TEST_DATE_STAMP} 2 [-27648] -> 10.19.99.251 -> 0 [+43273]
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP} ----------------------------------------
+${TEST_DATE_STAMP} CONSIDERING MOVES FROM 3 [73531]
+${TEST_DATE_STAMP} 3 [-39970] -> 130.216.30.180 -> 0 [+56099]
+${TEST_DATE_STAMP} 3 [-39970] -> 130.216.30.177 -> 0 [+55846]
+${TEST_DATE_STAMP} 3 [-39474] -> 130.216.30.173 -> 0 [+55350]
+${TEST_DATE_STAMP} 3 [-27648] -> 10.19.99.252 -> 0 [+43777]
+${TEST_DATE_STAMP} ----------------------------------------
+130.216.30.181 0
+130.216.30.180 3
+130.216.30.179 2
+130.216.30.178 0
+130.216.30.177 3
+130.216.30.176 1
+130.216.30.175 1
+130.216.30.174 0
+130.216.30.173 3
+130.216.30.172 2
+130.216.30.171 1
+130.216.30.170 2
+10.19.99.253 0
+10.19.99.252 3
+10.19.99.251 2
+10.19.99.250 1
+EOF
+
+simple_test 0,0,0,0 <<EOF
+10.19.99.250 1
+10.19.99.251 2
+10.19.99.252 3
+10.19.99.253 1
+130.216.30.170 2
+130.216.30.171 1
+130.216.30.172 2
+130.216.30.173 3
+130.216.30.174 3
+130.216.30.175 1
+130.216.30.176 1
+130.216.30.177 3
+130.216.30.178 1
+130.216.30.179 2
+130.216.30.180 3
+130.216.30.181 2
+EOF
diff --git a/ctdb/tests/UNIT/takeover/lcp2.032.sh b/ctdb/tests/UNIT/takeover/lcp2.032.sh
new file mode 100755
index 0000000..fa032f4
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/lcp2.032.sh
@@ -0,0 +1,450 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "12+4 IPs, 4 nodes, multiple transitions"
+
+export CTDB_TEST_LOGLEVEL=ERR
+
+set -e
+
+echo "Node 3 stopped -> continue node 3, all healthy"
+
+required_result <<EOF
+130.216.30.181 2
+130.216.30.180 3
+130.216.30.179 2
+130.216.30.178 1
+130.216.30.177 3
+130.216.30.176 0
+130.216.30.175 1
+130.216.30.174 0
+130.216.30.173 3
+130.216.30.172 2
+130.216.30.171 1
+130.216.30.170 0
+10.19.99.253 1
+10.19.99.252 3
+10.19.99.251 2
+10.19.99.250 0
+EOF
+
+simple_test 0,0,0,0 <<EOF
+10.19.99.250 0
+10.19.99.251 2
+10.19.99.252 0
+10.19.99.253 1
+130.216.30.170 0
+130.216.30.171 1
+130.216.30.172 2
+130.216.30.173 2
+130.216.30.174 0
+130.216.30.175 1
+130.216.30.176 0
+130.216.30.177 0
+130.216.30.178 1
+130.216.30.179 2
+130.216.30.180 1
+130.216.30.181 2
+EOF
+
+echo "All healthy -> stop node 0"
+
+required_result <<EOF
+130.216.30.181 2
+130.216.30.180 3
+130.216.30.179 2
+130.216.30.178 1
+130.216.30.177 3
+130.216.30.176 1
+130.216.30.175 1
+130.216.30.174 3
+130.216.30.173 3
+130.216.30.172 2
+130.216.30.171 1
+130.216.30.170 2
+10.19.99.253 1
+10.19.99.252 3
+10.19.99.251 2
+10.19.99.250 1
+EOF
+
+simple_test 0x20,0,0,0 <<EOF
+$_out
+EOF
+
+echo "Continue node 0, all healthy"
+
+required_result <<EOF
+130.216.30.181 0
+130.216.30.180 3
+130.216.30.179 2
+130.216.30.178 0
+130.216.30.177 3
+130.216.30.176 1
+130.216.30.175 1
+130.216.30.174 0
+130.216.30.173 3
+130.216.30.172 2
+130.216.30.171 1
+130.216.30.170 2
+10.19.99.253 0
+10.19.99.252 3
+10.19.99.251 2
+10.19.99.250 1
+EOF
+
+simple_test 0,0,0,0 <<EOF
+$_out
+EOF
+
+echo "All healthy -> stop node 1"
+
+required_result <<EOF
+130.216.30.181 0
+130.216.30.180 3
+130.216.30.179 2
+130.216.30.178 0
+130.216.30.177 3
+130.216.30.176 2
+130.216.30.175 0
+130.216.30.174 0
+130.216.30.173 3
+130.216.30.172 2
+130.216.30.171 3
+130.216.30.170 2
+10.19.99.253 0
+10.19.99.252 3
+10.19.99.251 2
+10.19.99.250 0
+EOF
+
+simple_test 0,0x20,0,0 <<EOF
+$_out
+EOF
+
+echo "Continue node 1, all healthy"
+
+required_result <<EOF
+130.216.30.181 0
+130.216.30.180 1
+130.216.30.179 1
+130.216.30.178 0
+130.216.30.177 3
+130.216.30.176 2
+130.216.30.175 1
+130.216.30.174 0
+130.216.30.173 3
+130.216.30.172 2
+130.216.30.171 3
+130.216.30.170 2
+10.19.99.253 1
+10.19.99.252 3
+10.19.99.251 2
+10.19.99.250 0
+EOF
+
+simple_test 0,0,0,0 <<EOF
+$_out
+EOF
+
+echo "All healthy -> Stop node 2"
+
+required_result <<EOF
+130.216.30.181 0
+130.216.30.180 1
+130.216.30.179 1
+130.216.30.178 0
+130.216.30.177 3
+130.216.30.176 3
+130.216.30.175 1
+130.216.30.174 0
+130.216.30.173 3
+130.216.30.172 1
+130.216.30.171 3
+130.216.30.170 0
+10.19.99.253 1
+10.19.99.252 3
+10.19.99.251 1
+10.19.99.250 0
+EOF
+
+simple_test 0,0,0x20,0 <<EOF
+$_out
+EOF
+
+echo "Continue node 2, all healthy"
+
+required_result <<EOF
+130.216.30.181 2
+130.216.30.180 1
+130.216.30.179 1
+130.216.30.178 0
+130.216.30.177 2
+130.216.30.176 3
+130.216.30.175 2
+130.216.30.174 0
+130.216.30.173 3
+130.216.30.172 1
+130.216.30.171 3
+130.216.30.170 0
+10.19.99.253 2
+10.19.99.252 3
+10.19.99.251 1
+10.19.99.250 0
+EOF
+
+simple_test 0,0,0,0 <<EOF
+$_out
+EOF
+
+echo "All healthy -> stop node 3"
+
+required_result <<EOF
+130.216.30.181 2
+130.216.30.180 1
+130.216.30.179 1
+130.216.30.178 0
+130.216.30.177 2
+130.216.30.176 0
+130.216.30.175 2
+130.216.30.174 0
+130.216.30.173 2
+130.216.30.172 1
+130.216.30.171 1
+130.216.30.170 0
+10.19.99.253 2
+10.19.99.252 0
+10.19.99.251 1
+10.19.99.250 0
+EOF
+
+simple_test 0,0,0,0x20 <<EOF
+$_out
+EOF
+
+echo "Continue node 3, all healthy"
+
+required_result <<EOF
+130.216.30.181 2
+130.216.30.180 3
+130.216.30.179 1
+130.216.30.178 3
+130.216.30.177 2
+130.216.30.176 0
+130.216.30.175 3
+130.216.30.174 0
+130.216.30.173 2
+130.216.30.172 1
+130.216.30.171 1
+130.216.30.170 0
+10.19.99.253 2
+10.19.99.252 3
+10.19.99.251 1
+10.19.99.250 0
+EOF
+
+simple_test 0,0,0,0 <<EOF
+$_out
+EOF
+
+echo "All healthy -> node 0 stopped"
+
+required_result <<EOF
+130.216.30.181 2
+130.216.30.180 3
+130.216.30.179 1
+130.216.30.178 3
+130.216.30.177 2
+130.216.30.176 1
+130.216.30.175 3
+130.216.30.174 2
+130.216.30.173 2
+130.216.30.172 1
+130.216.30.171 1
+130.216.30.170 3
+10.19.99.253 2
+10.19.99.252 3
+10.19.99.251 1
+10.19.99.250 2
+EOF
+
+simple_test 0x20,0,0,0 <<EOF
+$_out
+EOF
+
+echo "Continue node 0, all healthy"
+
+required_result <<EOF
+130.216.30.181 2
+130.216.30.180 0
+130.216.30.179 0
+130.216.30.178 3
+130.216.30.177 2
+130.216.30.176 1
+130.216.30.175 3
+130.216.30.174 0
+130.216.30.173 2
+130.216.30.172 1
+130.216.30.171 1
+130.216.30.170 3
+10.19.99.253 0
+10.19.99.252 3
+10.19.99.251 1
+10.19.99.250 2
+EOF
+
+simple_test 0,0,0,0 <<EOF
+$_out
+EOF
+
+echo "All healthy -> node 1 stopped"
+
+required_result <<EOF
+130.216.30.181 2
+130.216.30.180 0
+130.216.30.179 0
+130.216.30.178 3
+130.216.30.177 2
+130.216.30.176 3
+130.216.30.175 3
+130.216.30.174 0
+130.216.30.173 2
+130.216.30.172 0
+130.216.30.171 2
+130.216.30.170 3
+10.19.99.253 0
+10.19.99.252 3
+10.19.99.251 0
+10.19.99.250 2
+EOF
+
+simple_test 0,0x20,0,0 <<EOF
+$_out
+EOF
+
+echo "Continue node 1, all healthy"
+
+required_result <<EOF
+130.216.30.181 1
+130.216.30.180 0
+130.216.30.179 0
+130.216.30.178 1
+130.216.30.177 2
+130.216.30.176 3
+130.216.30.175 3
+130.216.30.174 1
+130.216.30.173 2
+130.216.30.172 0
+130.216.30.171 2
+130.216.30.170 3
+10.19.99.253 1
+10.19.99.252 3
+10.19.99.251 0
+10.19.99.250 2
+EOF
+
+simple_test 0,0,0,0 <<EOF
+$_out
+EOF
+
+echo "All healthy -> node 2 stopped"
+
+required_result <<EOF
+130.216.30.181 1
+130.216.30.180 0
+130.216.30.179 0
+130.216.30.178 1
+130.216.30.177 3
+130.216.30.176 3
+130.216.30.175 3
+130.216.30.174 1
+130.216.30.173 1
+130.216.30.172 0
+130.216.30.171 0
+130.216.30.170 3
+10.19.99.253 1
+10.19.99.252 3
+10.19.99.251 0
+10.19.99.250 1
+EOF
+
+simple_test 0,0,0x20,0 <<EOF
+$_out
+EOF
+
+echo "Continue node 2, all healthy"
+
+required_result <<EOF
+130.216.30.181 1
+130.216.30.180 2
+130.216.30.179 0
+130.216.30.178 1
+130.216.30.177 2
+130.216.30.176 3
+130.216.30.175 3
+130.216.30.174 2
+130.216.30.173 1
+130.216.30.172 0
+130.216.30.171 0
+130.216.30.170 3
+10.19.99.253 2
+10.19.99.252 3
+10.19.99.251 0
+10.19.99.250 1
+EOF
+
+simple_test 0,0,0,0 <<EOF
+$_out
+EOF
+
+echo "All healthy -> node 3 stopped"
+
+required_result <<EOF
+130.216.30.181 1
+130.216.30.180 2
+130.216.30.179 0
+130.216.30.178 1
+130.216.30.177 2
+130.216.30.176 0
+130.216.30.175 2
+130.216.30.174 2
+130.216.30.173 1
+130.216.30.172 0
+130.216.30.171 0
+130.216.30.170 1
+10.19.99.253 2
+10.19.99.252 0
+10.19.99.251 0
+10.19.99.250 1
+EOF
+
+simple_test 0,0,0,0x20 <<EOF
+$_out
+EOF
+
+echo "Continue node 3, all healthy"
+
+required_result <<EOF
+130.216.30.181 3
+130.216.30.180 2
+130.216.30.179 3
+130.216.30.178 1
+130.216.30.177 2
+130.216.30.176 0
+130.216.30.175 3
+130.216.30.174 2
+130.216.30.173 1
+130.216.30.172 0
+130.216.30.171 0
+130.216.30.170 1
+10.19.99.253 2
+10.19.99.252 3
+10.19.99.251 0
+10.19.99.250 1
+EOF
+
+simple_test 0,0,0,0 <<EOF
+$_out
+EOF
+
diff --git a/ctdb/tests/UNIT/takeover/lcp2.033.sh b/ctdb/tests/UNIT/takeover/lcp2.033.sh
new file mode 100755
index 0000000..206699a
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/lcp2.033.sh
@@ -0,0 +1,74 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "12+4 IPs, 4 nodes, 2 -> 3 -> 4 healthy"
+
+export CTDB_TEST_LOGLEVEL=ERR
+
+set -e
+
+echo "Nodes 2, 3 disconnected -> node 2 attaches"
+
+required_result <<EOF
+130.216.30.181 2
+130.216.30.180 0
+130.216.30.179 2
+130.216.30.178 1
+130.216.30.177 0
+130.216.30.176 1
+130.216.30.175 0
+130.216.30.174 2
+130.216.30.173 1
+130.216.30.172 2
+130.216.30.171 1
+130.216.30.170 0
+10.19.99.253 2
+10.19.99.252 0
+10.19.99.251 1
+10.19.99.250 0
+EOF
+
+simple_test 0,0,0,1 <<EOF
+10.19.99.253 1
+10.19.99.252 0
+10.19.99.251 1
+10.19.99.250 0
+130.216.30.181 1
+130.216.30.180 0
+130.216.30.179 0
+130.216.30.178 1
+130.216.30.177 0
+130.216.30.176 1
+130.216.30.175 0
+130.216.30.174 1
+130.216.30.173 1
+130.216.30.172 0
+130.216.30.171 1
+130.216.30.170 0
+EOF
+
+echo "Node 3 attaches"
+
+required_result <<EOF
+130.216.30.181 2
+130.216.30.180 3
+130.216.30.179 3
+130.216.30.178 1
+130.216.30.177 0
+130.216.30.176 1
+130.216.30.175 0
+130.216.30.174 2
+130.216.30.173 3
+130.216.30.172 2
+130.216.30.171 1
+130.216.30.170 0
+10.19.99.253 2
+10.19.99.252 3
+10.19.99.251 1
+10.19.99.250 0
+EOF
+
+simple_test 0,0,0,0 <<EOF
+$_out
+EOF
diff --git a/ctdb/tests/UNIT/takeover/lcp2.034.sh b/ctdb/tests/UNIT/takeover/lcp2.034.sh
new file mode 100755
index 0000000..6cea2d5
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/lcp2.034.sh
@@ -0,0 +1,21 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, 1 without IP addresses"
+
+export CTDB_TEST_LOGLEVEL=ERR
+
+required_result <<EOF
+192.168.140.4 0
+192.168.140.3 1
+192.168.140.2 0
+192.168.140.1 1
+EOF
+
+simple_test 0,0,0 <<EOF
+192.168.140.1 -1 0,1
+192.168.140.2 -1 0,1
+192.168.140.3 -1 0,1
+192.168.140.4 -1 0,1
+EOF
diff --git a/ctdb/tests/UNIT/takeover/lcp2.035.sh b/ctdb/tests/UNIT/takeover/lcp2.035.sh
new file mode 100755
index 0000000..2bb58f5
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/lcp2.035.sh
@@ -0,0 +1,1813 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "900 IPs, all 5 nodes healthy, all assigned, no-op"
+
+export CTDB_TEST_LOGLEVEL=ERR
+
+required_result <<EOF
+192.168.10.90 0
+192.168.10.89 1
+192.168.10.88 2
+192.168.10.87 3
+192.168.10.86 4
+192.168.10.85 0
+192.168.10.84 1
+192.168.10.83 2
+192.168.10.82 3
+192.168.10.81 4
+192.168.10.80 0
+192.168.10.79 0
+192.168.10.78 1
+192.168.10.77 2
+192.168.10.76 3
+192.168.10.75 4
+192.168.10.74 1
+192.168.10.73 2
+192.168.10.72 3
+192.168.10.71 3
+192.168.10.70 4
+192.168.10.69 0
+192.168.10.68 1
+192.168.10.67 2
+192.168.10.66 4
+192.168.10.65 0
+192.168.10.64 1
+192.168.10.63 0
+192.168.10.62 1
+192.168.10.61 2
+192.168.10.60 3
+192.168.10.59 4
+192.168.10.58 2
+192.168.10.57 3
+192.168.10.56 0
+192.168.10.55 0
+192.168.10.54 1
+192.168.10.53 2
+192.168.10.52 3
+192.168.10.51 4
+192.168.10.50 1
+192.168.10.49 4
+192.168.10.48 2
+192.168.10.47 0
+192.168.10.46 1
+192.168.10.45 2
+192.168.10.44 3
+192.168.10.43 4
+192.168.10.42 2
+192.168.10.41 3
+192.168.10.40 1
+192.168.10.39 3
+192.168.10.38 4
+192.168.10.37 0
+192.168.10.36 1
+192.168.10.35 2
+192.168.10.34 4
+192.168.10.33 0
+192.168.10.32 3
+192.168.10.31 0
+192.168.10.30 1
+192.168.10.29 2
+192.168.10.28 3
+192.168.10.27 4
+192.168.10.26 3
+192.168.10.25 2
+192.168.10.24 0
+192.168.10.23 3
+192.168.10.22 4
+192.168.10.21 0
+192.168.10.20 1
+192.168.10.19 2
+192.168.10.18 4
+192.168.10.17 1
+192.168.10.16 4
+192.168.10.15 0
+192.168.10.14 1
+192.168.10.13 2
+192.168.10.12 3
+192.168.10.11 4
+192.168.10.10 2
+192.168.10.9 3
+192.168.10.8 4
+192.168.10.7 0
+192.168.10.6 1
+192.168.10.5 2
+192.168.10.4 3
+192.168.10.3 4
+192.168.10.2 0
+192.168.10.1 1
+192.168.9.90 0
+192.168.9.89 1
+192.168.9.88 2
+192.168.9.87 3
+192.168.9.86 4
+192.168.9.85 0
+192.168.9.84 1
+192.168.9.83 2
+192.168.9.82 3
+192.168.9.81 4
+192.168.9.80 0
+192.168.9.79 0
+192.168.9.78 1
+192.168.9.77 2
+192.168.9.76 3
+192.168.9.75 4
+192.168.9.74 1
+192.168.9.73 2
+192.168.9.72 3
+192.168.9.71 3
+192.168.9.70 4
+192.168.9.69 0
+192.168.9.68 1
+192.168.9.67 2
+192.168.9.66 4
+192.168.9.65 0
+192.168.9.64 1
+192.168.9.63 0
+192.168.9.62 1
+192.168.9.61 2
+192.168.9.60 3
+192.168.9.59 4
+192.168.9.58 2
+192.168.9.57 3
+192.168.9.56 4
+192.168.9.55 0
+192.168.9.54 1
+192.168.9.53 2
+192.168.9.52 3
+192.168.9.51 4
+192.168.9.50 0
+192.168.9.49 1
+192.168.9.48 2
+192.168.9.47 0
+192.168.9.46 1
+192.168.9.45 2
+192.168.9.44 3
+192.168.9.43 4
+192.168.9.42 2
+192.168.9.41 4
+192.168.9.40 3
+192.168.9.39 0
+192.168.9.38 1
+192.168.9.37 2
+192.168.9.36 3
+192.168.9.35 4
+192.168.9.34 0
+192.168.9.33 1
+192.168.9.32 4
+192.168.9.31 0
+192.168.9.30 1
+192.168.9.29 2
+192.168.9.28 3
+192.168.9.27 4
+192.168.9.26 2
+192.168.9.25 3
+192.168.9.24 0
+192.168.9.23 3
+192.168.9.22 4
+192.168.9.21 0
+192.168.9.20 1
+192.168.9.19 2
+192.168.9.18 4
+192.168.9.17 1
+192.168.9.16 3
+192.168.9.15 0
+192.168.9.14 1
+192.168.9.13 2
+192.168.9.12 3
+192.168.9.11 4
+192.168.9.10 2
+192.168.9.9 4
+192.168.9.8 3
+192.168.9.7 0
+192.168.9.6 1
+192.168.9.5 2
+192.168.9.4 3
+192.168.9.3 4
+192.168.9.2 0
+192.168.9.1 1
+192.168.8.90 0
+192.168.8.89 1
+192.168.8.88 2
+192.168.8.87 3
+192.168.8.86 4
+192.168.8.85 0
+192.168.8.84 1
+192.168.8.83 2
+192.168.8.82 3
+192.168.8.81 4
+192.168.8.80 0
+192.168.8.79 0
+192.168.8.78 1
+192.168.8.77 2
+192.168.8.76 3
+192.168.8.75 4
+192.168.8.74 1
+192.168.8.73 2
+192.168.8.72 3
+192.168.8.71 3
+192.168.8.70 4
+192.168.8.69 0
+192.168.8.68 1
+192.168.8.67 2
+192.168.8.66 4
+192.168.8.65 3
+192.168.8.64 0
+192.168.8.63 0
+192.168.8.62 1
+192.168.8.61 2
+192.168.8.60 3
+192.168.8.59 4
+192.168.8.58 1
+192.168.8.57 2
+192.168.8.56 3
+192.168.8.55 0
+192.168.8.54 1
+192.168.8.53 2
+192.168.8.52 3
+192.168.8.51 4
+192.168.8.50 0
+192.168.8.49 4
+192.168.8.48 1
+192.168.8.47 0
+192.168.8.46 1
+192.168.8.45 2
+192.168.8.44 3
+192.168.8.43 4
+192.168.8.42 2
+192.168.8.41 1
+192.168.8.40 4
+192.168.8.39 0
+192.168.8.38 1
+192.168.8.37 2
+192.168.8.36 3
+192.168.8.35 4
+192.168.8.34 3
+192.168.8.33 0
+192.168.8.32 2
+192.168.8.31 0
+192.168.8.30 1
+192.168.8.29 2
+192.168.8.28 3
+192.168.8.27 4
+192.168.8.26 2
+192.168.8.25 1
+192.168.8.24 3
+192.168.8.23 3
+192.168.8.22 4
+192.168.8.21 0
+192.168.8.20 1
+192.168.8.19 2
+192.168.8.18 4
+192.168.8.17 0
+192.168.8.16 4
+192.168.8.15 0
+192.168.8.14 1
+192.168.8.13 2
+192.168.8.12 3
+192.168.8.11 4
+192.168.8.10 1
+192.168.8.9 2
+192.168.8.8 4
+192.168.8.7 0
+192.168.8.6 1
+192.168.8.5 2
+192.168.8.4 3
+192.168.8.3 4
+192.168.8.2 3
+192.168.8.1 0
+192.168.7.90 0
+192.168.7.89 1
+192.168.7.88 2
+192.168.7.87 3
+192.168.7.86 4
+192.168.7.85 0
+192.168.7.84 1
+192.168.7.83 2
+192.168.7.82 3
+192.168.7.81 4
+192.168.7.80 1
+192.168.7.79 0
+192.168.7.78 1
+192.168.7.77 2
+192.168.7.76 3
+192.168.7.75 4
+192.168.7.74 2
+192.168.7.73 3
+192.168.7.72 0
+192.168.7.71 3
+192.168.7.70 4
+192.168.7.69 0
+192.168.7.68 1
+192.168.7.67 2
+192.168.7.66 4
+192.168.7.65 1
+192.168.7.64 3
+192.168.7.63 0
+192.168.7.62 1
+192.168.7.61 2
+192.168.7.60 3
+192.168.7.59 4
+192.168.7.58 2
+192.168.7.57 0
+192.168.7.56 1
+192.168.7.55 0
+192.168.7.54 1
+192.168.7.53 2
+192.168.7.52 3
+192.168.7.51 4
+192.168.7.50 3
+192.168.7.49 4
+192.168.7.48 2
+192.168.7.47 0
+192.168.7.46 1
+192.168.7.45 2
+192.168.7.44 3
+192.168.7.43 4
+192.168.7.42 2
+192.168.7.41 0
+192.168.7.40 1
+192.168.7.39 4
+192.168.7.38 0
+192.168.7.37 1
+192.168.7.36 2
+192.168.7.35 3
+192.168.7.34 4
+192.168.7.33 3
+192.168.7.32 0
+192.168.7.31 0
+192.168.7.30 1
+192.168.7.29 2
+192.168.7.28 3
+192.168.7.27 4
+192.168.7.26 2
+192.168.7.25 0
+192.168.7.24 1
+192.168.7.23 3
+192.168.7.22 4
+192.168.7.21 0
+192.168.7.20 1
+192.168.7.19 2
+192.168.7.18 4
+192.168.7.17 3
+192.168.7.16 4
+192.168.7.15 0
+192.168.7.14 1
+192.168.7.13 2
+192.168.7.12 3
+192.168.7.11 4
+192.168.7.10 3
+192.168.7.9 2
+192.168.7.8 0
+192.168.7.7 2
+192.168.7.6 4
+192.168.7.5 0
+192.168.7.4 1
+192.168.7.3 3
+192.168.7.2 4
+192.168.7.1 1
+192.168.6.90 0
+192.168.6.89 1
+192.168.6.88 2
+192.168.6.87 3
+192.168.6.86 4
+192.168.6.85 0
+192.168.6.84 1
+192.168.6.83 2
+192.168.6.82 4
+192.168.6.81 3
+192.168.6.80 0
+192.168.6.79 0
+192.168.6.78 1
+192.168.6.77 2
+192.168.6.76 3
+192.168.6.75 4
+192.168.6.74 2
+192.168.6.73 3
+192.168.6.72 1
+192.168.6.71 3
+192.168.6.70 4
+192.168.6.69 0
+192.168.6.68 1
+192.168.6.67 2
+192.168.6.66 4
+192.168.6.65 0
+192.168.6.64 1
+192.168.6.63 0
+192.168.6.62 1
+192.168.6.61 2
+192.168.6.60 3
+192.168.6.59 4
+192.168.6.58 2
+192.168.6.57 3
+192.168.6.56 0
+192.168.6.55 3
+192.168.6.54 4
+192.168.6.53 1
+192.168.6.52 2
+192.168.6.51 0
+192.168.6.50 4
+192.168.6.49 1
+192.168.6.48 2
+192.168.6.47 0
+192.168.6.46 1
+192.168.6.45 2
+192.168.6.44 3
+192.168.6.43 4
+192.168.6.42 2
+192.168.6.41 4
+192.168.6.40 3
+192.168.6.39 0
+192.168.6.38 1
+192.168.6.37 2
+192.168.6.36 3
+192.168.6.35 4
+192.168.6.34 0
+192.168.6.33 1
+192.168.6.32 4
+192.168.6.31 0
+192.168.6.30 1
+192.168.6.29 2
+192.168.6.28 3
+192.168.6.27 4
+192.168.6.26 2
+192.168.6.25 3
+192.168.6.24 0
+192.168.6.23 3
+192.168.6.22 4
+192.168.6.21 0
+192.168.6.20 1
+192.168.6.19 2
+192.168.6.18 4
+192.168.6.17 1
+192.168.6.16 3
+192.168.6.15 0
+192.168.6.14 1
+192.168.6.13 2
+192.168.6.12 3
+192.168.6.11 4
+192.168.6.10 2
+192.168.6.9 3
+192.168.6.8 4
+192.168.6.7 0
+192.168.6.6 1
+192.168.6.5 2
+192.168.6.4 3
+192.168.6.3 4
+192.168.6.2 0
+192.168.6.1 1
+192.168.5.90 0
+192.168.5.89 1
+192.168.5.88 2
+192.168.5.87 3
+192.168.5.86 4
+192.168.5.85 0
+192.168.5.84 1
+192.168.5.83 2
+192.168.5.82 4
+192.168.5.81 3
+192.168.5.80 0
+192.168.5.79 0
+192.168.5.78 1
+192.168.5.77 2
+192.168.5.76 3
+192.168.5.75 4
+192.168.5.74 2
+192.168.5.73 3
+192.168.5.72 1
+192.168.5.71 3
+192.168.5.70 4
+192.168.5.69 2
+192.168.5.68 0
+192.168.5.67 1
+192.168.5.66 4
+192.168.5.65 2
+192.168.5.64 0
+192.168.5.63 0
+192.168.5.62 1
+192.168.5.61 2
+192.168.5.60 3
+192.168.5.59 4
+192.168.5.58 1
+192.168.5.57 3
+192.168.5.56 2
+192.168.5.55 0
+192.168.5.54 1
+192.168.5.53 2
+192.168.5.52 3
+192.168.5.51 4
+192.168.5.50 0
+192.168.5.49 4
+192.168.5.48 1
+192.168.5.47 0
+192.168.5.46 1
+192.168.5.45 2
+192.168.5.44 3
+192.168.5.43 4
+192.168.5.42 1
+192.168.5.41 3
+192.168.5.40 2
+192.168.5.39 2
+192.168.5.38 3
+192.168.5.37 4
+192.168.5.36 0
+192.168.5.35 1
+192.168.5.34 4
+192.168.5.33 0
+192.168.5.32 4
+192.168.5.31 0
+192.168.5.30 1
+192.168.5.29 2
+192.168.5.28 3
+192.168.5.27 4
+192.168.5.26 1
+192.168.5.25 3
+192.168.5.24 2
+192.168.5.23 3
+192.168.5.22 4
+192.168.5.21 2
+192.168.5.20 0
+192.168.5.19 1
+192.168.5.18 4
+192.168.5.17 0
+192.168.5.16 3
+192.168.5.15 0
+192.168.5.14 1
+192.168.5.13 2
+192.168.5.12 3
+192.168.5.11 4
+192.168.5.10 1
+192.168.5.9 4
+192.168.5.8 3
+192.168.5.7 0
+192.168.5.6 1
+192.168.5.5 2
+192.168.5.4 3
+192.168.5.3 4
+192.168.5.2 2
+192.168.5.1 0
+192.168.4.90 0
+192.168.4.89 1
+192.168.4.88 2
+192.168.4.87 3
+192.168.4.86 4
+192.168.4.85 0
+192.168.4.84 1
+192.168.4.83 2
+192.168.4.82 3
+192.168.4.81 4
+192.168.4.80 0
+192.168.4.79 0
+192.168.4.78 1
+192.168.4.77 2
+192.168.4.76 3
+192.168.4.75 4
+192.168.4.74 1
+192.168.4.73 2
+192.168.4.72 3
+192.168.4.71 3
+192.168.4.70 4
+192.168.4.69 0
+192.168.4.68 1
+192.168.4.67 2
+192.168.4.66 4
+192.168.4.65 1
+192.168.4.64 3
+192.168.4.63 0
+192.168.4.62 1
+192.168.4.61 2
+192.168.4.60 3
+192.168.4.59 4
+192.168.4.58 0
+192.168.4.57 2
+192.168.4.56 1
+192.168.4.55 0
+192.168.4.54 1
+192.168.4.53 2
+192.168.4.52 3
+192.168.4.51 4
+192.168.4.50 3
+192.168.4.49 4
+192.168.4.48 0
+192.168.4.47 0
+192.168.4.46 1
+192.168.4.45 2
+192.168.4.44 3
+192.168.4.43 4
+192.168.4.42 2
+192.168.4.41 0
+192.168.4.40 1
+192.168.4.39 4
+192.168.4.38 0
+192.168.4.37 1
+192.168.4.36 2
+192.168.4.35 3
+192.168.4.34 4
+192.168.4.33 3
+192.168.4.32 2
+192.168.4.31 0
+192.168.4.30 1
+192.168.4.29 2
+192.168.4.28 3
+192.168.4.27 4
+192.168.4.26 0
+192.168.4.25 2
+192.168.4.24 1
+192.168.4.23 3
+192.168.4.22 4
+192.168.4.21 0
+192.168.4.20 1
+192.168.4.19 2
+192.168.4.18 4
+192.168.4.17 3
+192.168.4.16 1
+192.168.4.15 0
+192.168.4.14 1
+192.168.4.13 2
+192.168.4.12 3
+192.168.4.11 4
+192.168.4.10 3
+192.168.4.9 0
+192.168.4.8 2
+192.168.4.7 2
+192.168.4.6 3
+192.168.4.5 4
+192.168.4.4 0
+192.168.4.3 1
+192.168.4.2 4
+192.168.4.1 4
+192.168.3.90 0
+192.168.3.89 1
+192.168.3.88 2
+192.168.3.87 3
+192.168.3.86 4
+192.168.3.85 0
+192.168.3.84 1
+192.168.3.83 2
+192.168.3.82 3
+192.168.3.81 4
+192.168.3.80 0
+192.168.3.79 0
+192.168.3.78 1
+192.168.3.77 2
+192.168.3.76 3
+192.168.3.75 4
+192.168.3.74 1
+192.168.3.73 2
+192.168.3.72 3
+192.168.3.71 3
+192.168.3.70 4
+192.168.3.69 0
+192.168.3.68 1
+192.168.3.67 2
+192.168.3.66 4
+192.168.3.65 0
+192.168.3.64 3
+192.168.3.63 0
+192.168.3.62 1
+192.168.3.61 2
+192.168.3.60 3
+192.168.3.59 4
+192.168.3.58 2
+192.168.3.57 1
+192.168.3.56 3
+192.168.3.55 0
+192.168.3.54 1
+192.168.3.53 2
+192.168.3.52 3
+192.168.3.51 4
+192.168.3.50 0
+192.168.3.49 4
+192.168.3.48 2
+192.168.3.47 0
+192.168.3.46 1
+192.168.3.45 2
+192.168.3.44 3
+192.168.3.43 4
+192.168.3.42 2
+192.168.3.41 1
+192.168.3.40 0
+192.168.3.39 1
+192.168.3.38 2
+192.168.3.37 3
+192.168.3.36 4
+192.168.3.35 0
+192.168.3.34 4
+192.168.3.33 3
+192.168.3.32 4
+192.168.3.31 0
+192.168.3.30 1
+192.168.3.29 2
+192.168.3.28 3
+192.168.3.27 4
+192.168.3.26 2
+192.168.3.25 1
+192.168.3.24 0
+192.168.3.23 3
+192.168.3.22 4
+192.168.3.21 0
+192.168.3.20 1
+192.168.3.19 2
+192.168.3.18 4
+192.168.3.17 3
+192.168.3.16 1
+192.168.3.15 0
+192.168.3.14 1
+192.168.3.13 2
+192.168.3.12 3
+192.168.3.11 4
+192.168.3.10 2
+192.168.3.9 1
+192.168.3.8 0
+192.168.3.7 4
+192.168.3.6 0
+192.168.3.5 1
+192.168.3.4 2
+192.168.3.3 3
+192.168.3.2 4
+192.168.3.1 3
+192.168.2.90 0
+192.168.2.89 1
+192.168.2.88 2
+192.168.2.87 3
+192.168.2.86 4
+192.168.2.85 0
+192.168.2.84 1
+192.168.2.83 2
+192.168.2.82 3
+192.168.2.81 4
+192.168.2.80 1
+192.168.2.79 0
+192.168.2.78 1
+192.168.2.77 2
+192.168.2.76 3
+192.168.2.75 4
+192.168.2.74 2
+192.168.2.73 3
+192.168.2.72 0
+192.168.2.71 3
+192.168.2.70 4
+192.168.2.69 0
+192.168.2.68 1
+192.168.2.67 2
+192.168.2.66 4
+192.168.2.65 1
+192.168.2.64 3
+192.168.2.63 0
+192.168.2.62 1
+192.168.2.61 2
+192.168.2.60 3
+192.168.2.59 4
+192.168.2.58 0
+192.168.2.57 2
+192.168.2.56 1
+192.168.2.55 0
+192.168.2.54 1
+192.168.2.53 2
+192.168.2.52 3
+192.168.2.51 4
+192.168.2.50 3
+192.168.2.49 4
+192.168.2.48 0
+192.168.2.47 0
+192.168.2.46 1
+192.168.2.45 2
+192.168.2.44 3
+192.168.2.43 4
+192.168.2.42 2
+192.168.2.41 0
+192.168.2.40 1
+192.168.2.39 0
+192.168.2.38 1
+192.168.2.37 2
+192.168.2.36 3
+192.168.2.35 4
+192.168.2.34 3
+192.168.2.33 4
+192.168.2.32 2
+192.168.2.31 0
+192.168.2.30 1
+192.168.2.29 2
+192.168.2.28 3
+192.168.2.27 4
+192.168.2.26 2
+192.168.2.25 0
+192.168.2.24 1
+192.168.2.23 3
+192.168.2.22 4
+192.168.2.21 0
+192.168.2.20 1
+192.168.2.19 2
+192.168.2.18 4
+192.168.2.17 3
+192.168.2.16 4
+192.168.2.15 0
+192.168.2.14 1
+192.168.2.13 2
+192.168.2.12 3
+192.168.2.11 4
+192.168.2.10 0
+192.168.2.9 2
+192.168.2.8 3
+192.168.2.7 2
+192.168.2.6 4
+192.168.2.5 0
+192.168.2.4 1
+192.168.2.3 3
+192.168.2.2 4
+192.168.2.1 1
+192.168.1.90 0
+192.168.1.89 1
+192.168.1.88 2
+192.168.1.87 3
+192.168.1.86 4
+192.168.1.85 0
+192.168.1.84 1
+192.168.1.83 2
+192.168.1.82 3
+192.168.1.81 4
+192.168.1.80 0
+192.168.1.79 0
+192.168.1.78 1
+192.168.1.77 2
+192.168.1.76 3
+192.168.1.75 4
+192.168.1.74 1
+192.168.1.73 2
+192.168.1.72 3
+192.168.1.71 3
+192.168.1.70 4
+192.168.1.69 0
+192.168.1.68 1
+192.168.1.67 2
+192.168.1.66 4
+192.168.1.65 0
+192.168.1.64 1
+192.168.1.63 0
+192.168.1.62 1
+192.168.1.61 2
+192.168.1.60 3
+192.168.1.59 4
+192.168.1.58 2
+192.168.1.57 3
+192.168.1.56 1
+192.168.1.55 0
+192.168.1.54 1
+192.168.1.53 2
+192.168.1.52 3
+192.168.1.51 4
+192.168.1.50 0
+192.168.1.49 4
+192.168.1.48 2
+192.168.1.47 0
+192.168.1.46 1
+192.168.1.45 2
+192.168.1.44 3
+192.168.1.43 4
+192.168.1.42 2
+192.168.1.41 3
+192.168.1.40 0
+192.168.1.39 3
+192.168.1.38 4
+192.168.1.37 0
+192.168.1.36 1
+192.168.1.35 2
+192.168.1.34 4
+192.168.1.33 1
+192.168.1.32 3
+192.168.1.31 0
+192.168.1.30 1
+192.168.1.29 2
+192.168.1.28 3
+192.168.1.27 4
+192.168.1.26 2
+192.168.1.25 3
+192.168.1.24 0
+192.168.1.23 3
+192.168.1.22 4
+192.168.1.21 0
+192.168.1.20 1
+192.168.1.19 2
+192.168.1.18 4
+192.168.1.17 1
+192.168.1.16 4
+192.168.1.15 0
+192.168.1.14 1
+192.168.1.13 2
+192.168.1.12 3
+192.168.1.11 4
+192.168.1.10 2
+192.168.1.9 3
+192.168.1.8 0
+192.168.1.7 3
+192.168.1.6 4
+192.168.1.5 0
+192.168.1.4 1
+192.168.1.3 2
+192.168.1.2 4
+192.168.1.1 1
+EOF
+
+simple_test 0,0,0,0,0 <<EOF
+192.168.10.90 0
+192.168.10.89 1
+192.168.10.88 2
+192.168.10.87 3
+192.168.10.86 4
+192.168.10.85 0
+192.168.10.84 1
+192.168.10.83 2
+192.168.10.82 3
+192.168.10.81 4
+192.168.10.80 0
+192.168.10.79 0
+192.168.10.78 1
+192.168.10.77 2
+192.168.10.76 3
+192.168.10.75 4
+192.168.10.74 1
+192.168.10.73 2
+192.168.10.72 3
+192.168.10.71 3
+192.168.10.70 4
+192.168.10.69 0
+192.168.10.68 1
+192.168.10.67 2
+192.168.10.66 4
+192.168.10.65 0
+192.168.10.64 1
+192.168.10.63 0
+192.168.10.62 1
+192.168.10.61 2
+192.168.10.60 3
+192.168.10.59 4
+192.168.10.58 2
+192.168.10.57 3
+192.168.10.56 0
+192.168.10.55 0
+192.168.10.54 1
+192.168.10.53 2
+192.168.10.52 3
+192.168.10.51 4
+192.168.10.50 1
+192.168.10.49 4
+192.168.10.48 2
+192.168.10.47 0
+192.168.10.46 1
+192.168.10.45 2
+192.168.10.44 3
+192.168.10.43 4
+192.168.10.42 2
+192.168.10.41 3
+192.168.10.40 1
+192.168.10.39 3
+192.168.10.38 4
+192.168.10.37 0
+192.168.10.36 1
+192.168.10.35 2
+192.168.10.34 4
+192.168.10.33 0
+192.168.10.32 3
+192.168.10.31 0
+192.168.10.30 1
+192.168.10.29 2
+192.168.10.28 3
+192.168.10.27 4
+192.168.10.26 3
+192.168.10.25 2
+192.168.10.24 0
+192.168.10.23 3
+192.168.10.22 4
+192.168.10.21 0
+192.168.10.20 1
+192.168.10.19 2
+192.168.10.18 4
+192.168.10.17 1
+192.168.10.16 4
+192.168.10.15 0
+192.168.10.14 1
+192.168.10.13 2
+192.168.10.12 3
+192.168.10.11 4
+192.168.10.10 2
+192.168.10.9 3
+192.168.10.8 4
+192.168.10.7 0
+192.168.10.6 1
+192.168.10.5 2
+192.168.10.4 3
+192.168.10.3 4
+192.168.10.2 0
+192.168.10.1 1
+192.168.9.90 0
+192.168.9.89 1
+192.168.9.88 2
+192.168.9.87 3
+192.168.9.86 4
+192.168.9.85 0
+192.168.9.84 1
+192.168.9.83 2
+192.168.9.82 3
+192.168.9.81 4
+192.168.9.80 0
+192.168.9.79 0
+192.168.9.78 1
+192.168.9.77 2
+192.168.9.76 3
+192.168.9.75 4
+192.168.9.74 1
+192.168.9.73 2
+192.168.9.72 3
+192.168.9.71 3
+192.168.9.70 4
+192.168.9.69 0
+192.168.9.68 1
+192.168.9.67 2
+192.168.9.66 4
+192.168.9.65 0
+192.168.9.64 1
+192.168.9.63 0
+192.168.9.62 1
+192.168.9.61 2
+192.168.9.60 3
+192.168.9.59 4
+192.168.9.58 2
+192.168.9.57 3
+192.168.9.56 4
+192.168.9.55 0
+192.168.9.54 1
+192.168.9.53 2
+192.168.9.52 3
+192.168.9.51 4
+192.168.9.50 0
+192.168.9.49 1
+192.168.9.48 2
+192.168.9.47 0
+192.168.9.46 1
+192.168.9.45 2
+192.168.9.44 3
+192.168.9.43 4
+192.168.9.42 2
+192.168.9.41 4
+192.168.9.40 3
+192.168.9.39 0
+192.168.9.38 1
+192.168.9.37 2
+192.168.9.36 3
+192.168.9.35 4
+192.168.9.34 0
+192.168.9.33 1
+192.168.9.32 4
+192.168.9.31 0
+192.168.9.30 1
+192.168.9.29 2
+192.168.9.28 3
+192.168.9.27 4
+192.168.9.26 2
+192.168.9.25 3
+192.168.9.24 0
+192.168.9.23 3
+192.168.9.22 4
+192.168.9.21 0
+192.168.9.20 1
+192.168.9.19 2
+192.168.9.18 4
+192.168.9.17 1
+192.168.9.16 3
+192.168.9.15 0
+192.168.9.14 1
+192.168.9.13 2
+192.168.9.12 3
+192.168.9.11 4
+192.168.9.10 2
+192.168.9.9 4
+192.168.9.8 3
+192.168.9.7 0
+192.168.9.6 1
+192.168.9.5 2
+192.168.9.4 3
+192.168.9.3 4
+192.168.9.2 0
+192.168.9.1 1
+192.168.8.90 0
+192.168.8.89 1
+192.168.8.88 2
+192.168.8.87 3
+192.168.8.86 4
+192.168.8.85 0
+192.168.8.84 1
+192.168.8.83 2
+192.168.8.82 3
+192.168.8.81 4
+192.168.8.80 0
+192.168.8.79 0
+192.168.8.78 1
+192.168.8.77 2
+192.168.8.76 3
+192.168.8.75 4
+192.168.8.74 1
+192.168.8.73 2
+192.168.8.72 3
+192.168.8.71 3
+192.168.8.70 4
+192.168.8.69 0
+192.168.8.68 1
+192.168.8.67 2
+192.168.8.66 4
+192.168.8.65 3
+192.168.8.64 0
+192.168.8.63 0
+192.168.8.62 1
+192.168.8.61 2
+192.168.8.60 3
+192.168.8.59 4
+192.168.8.58 1
+192.168.8.57 2
+192.168.8.56 3
+192.168.8.55 0
+192.168.8.54 1
+192.168.8.53 2
+192.168.8.52 3
+192.168.8.51 4
+192.168.8.50 0
+192.168.8.49 4
+192.168.8.48 1
+192.168.8.47 0
+192.168.8.46 1
+192.168.8.45 2
+192.168.8.44 3
+192.168.8.43 4
+192.168.8.42 2
+192.168.8.41 1
+192.168.8.40 4
+192.168.8.39 0
+192.168.8.38 1
+192.168.8.37 2
+192.168.8.36 3
+192.168.8.35 4
+192.168.8.34 3
+192.168.8.33 0
+192.168.8.32 2
+192.168.8.31 0
+192.168.8.30 1
+192.168.8.29 2
+192.168.8.28 3
+192.168.8.27 4
+192.168.8.26 2
+192.168.8.25 1
+192.168.8.24 3
+192.168.8.23 3
+192.168.8.22 4
+192.168.8.21 0
+192.168.8.20 1
+192.168.8.19 2
+192.168.8.18 4
+192.168.8.17 0
+192.168.8.16 4
+192.168.8.15 0
+192.168.8.14 1
+192.168.8.13 2
+192.168.8.12 3
+192.168.8.11 4
+192.168.8.10 1
+192.168.8.9 2
+192.168.8.8 4
+192.168.8.7 0
+192.168.8.6 1
+192.168.8.5 2
+192.168.8.4 3
+192.168.8.3 4
+192.168.8.2 3
+192.168.8.1 0
+192.168.7.90 0
+192.168.7.89 1
+192.168.7.88 2
+192.168.7.87 3
+192.168.7.86 4
+192.168.7.85 0
+192.168.7.84 1
+192.168.7.83 2
+192.168.7.82 3
+192.168.7.81 4
+192.168.7.80 1
+192.168.7.79 0
+192.168.7.78 1
+192.168.7.77 2
+192.168.7.76 3
+192.168.7.75 4
+192.168.7.74 2
+192.168.7.73 3
+192.168.7.72 0
+192.168.7.71 3
+192.168.7.70 4
+192.168.7.69 0
+192.168.7.68 1
+192.168.7.67 2
+192.168.7.66 4
+192.168.7.65 1
+192.168.7.64 3
+192.168.7.63 0
+192.168.7.62 1
+192.168.7.61 2
+192.168.7.60 3
+192.168.7.59 4
+192.168.7.58 2
+192.168.7.57 0
+192.168.7.56 1
+192.168.7.55 0
+192.168.7.54 1
+192.168.7.53 2
+192.168.7.52 3
+192.168.7.51 4
+192.168.7.50 3
+192.168.7.49 4
+192.168.7.48 2
+192.168.7.47 0
+192.168.7.46 1
+192.168.7.45 2
+192.168.7.44 3
+192.168.7.43 4
+192.168.7.42 2
+192.168.7.41 0
+192.168.7.40 1
+192.168.7.39 4
+192.168.7.38 0
+192.168.7.37 1
+192.168.7.36 2
+192.168.7.35 3
+192.168.7.34 4
+192.168.7.33 3
+192.168.7.32 0
+192.168.7.31 0
+192.168.7.30 1
+192.168.7.29 2
+192.168.7.28 3
+192.168.7.27 4
+192.168.7.26 2
+192.168.7.25 0
+192.168.7.24 1
+192.168.7.23 3
+192.168.7.22 4
+192.168.7.21 0
+192.168.7.20 1
+192.168.7.19 2
+192.168.7.18 4
+192.168.7.17 3
+192.168.7.16 4
+192.168.7.15 0
+192.168.7.14 1
+192.168.7.13 2
+192.168.7.12 3
+192.168.7.11 4
+192.168.7.10 3
+192.168.7.9 2
+192.168.7.8 0
+192.168.7.7 2
+192.168.7.6 4
+192.168.7.5 0
+192.168.7.4 1
+192.168.7.3 3
+192.168.7.2 4
+192.168.7.1 1
+192.168.6.90 0
+192.168.6.89 1
+192.168.6.88 2
+192.168.6.87 3
+192.168.6.86 4
+192.168.6.85 0
+192.168.6.84 1
+192.168.6.83 2
+192.168.6.82 4
+192.168.6.81 3
+192.168.6.80 0
+192.168.6.79 0
+192.168.6.78 1
+192.168.6.77 2
+192.168.6.76 3
+192.168.6.75 4
+192.168.6.74 2
+192.168.6.73 3
+192.168.6.72 1
+192.168.6.71 3
+192.168.6.70 4
+192.168.6.69 0
+192.168.6.68 1
+192.168.6.67 2
+192.168.6.66 4
+192.168.6.65 0
+192.168.6.64 1
+192.168.6.63 0
+192.168.6.62 1
+192.168.6.61 2
+192.168.6.60 3
+192.168.6.59 4
+192.168.6.58 2
+192.168.6.57 3
+192.168.6.56 0
+192.168.6.55 3
+192.168.6.54 4
+192.168.6.53 1
+192.168.6.52 2
+192.168.6.51 0
+192.168.6.50 4
+192.168.6.49 1
+192.168.6.48 2
+192.168.6.47 0
+192.168.6.46 1
+192.168.6.45 2
+192.168.6.44 3
+192.168.6.43 4
+192.168.6.42 2
+192.168.6.41 4
+192.168.6.40 3
+192.168.6.39 0
+192.168.6.38 1
+192.168.6.37 2
+192.168.6.36 3
+192.168.6.35 4
+192.168.6.34 0
+192.168.6.33 1
+192.168.6.32 4
+192.168.6.31 0
+192.168.6.30 1
+192.168.6.29 2
+192.168.6.28 3
+192.168.6.27 4
+192.168.6.26 2
+192.168.6.25 3
+192.168.6.24 0
+192.168.6.23 3
+192.168.6.22 4
+192.168.6.21 0
+192.168.6.20 1
+192.168.6.19 2
+192.168.6.18 4
+192.168.6.17 1
+192.168.6.16 3
+192.168.6.15 0
+192.168.6.14 1
+192.168.6.13 2
+192.168.6.12 3
+192.168.6.11 4
+192.168.6.10 2
+192.168.6.9 3
+192.168.6.8 4
+192.168.6.7 0
+192.168.6.6 1
+192.168.6.5 2
+192.168.6.4 3
+192.168.6.3 4
+192.168.6.2 0
+192.168.6.1 1
+192.168.5.90 0
+192.168.5.89 1
+192.168.5.88 2
+192.168.5.87 3
+192.168.5.86 4
+192.168.5.85 0
+192.168.5.84 1
+192.168.5.83 2
+192.168.5.82 4
+192.168.5.81 3
+192.168.5.80 0
+192.168.5.79 0
+192.168.5.78 1
+192.168.5.77 2
+192.168.5.76 3
+192.168.5.75 4
+192.168.5.74 2
+192.168.5.73 3
+192.168.5.72 1
+192.168.5.71 3
+192.168.5.70 4
+192.168.5.69 2
+192.168.5.68 0
+192.168.5.67 1
+192.168.5.66 4
+192.168.5.65 2
+192.168.5.64 0
+192.168.5.63 0
+192.168.5.62 1
+192.168.5.61 2
+192.168.5.60 3
+192.168.5.59 4
+192.168.5.58 1
+192.168.5.57 3
+192.168.5.56 2
+192.168.5.55 0
+192.168.5.54 1
+192.168.5.53 2
+192.168.5.52 3
+192.168.5.51 4
+192.168.5.50 0
+192.168.5.49 4
+192.168.5.48 1
+192.168.5.47 0
+192.168.5.46 1
+192.168.5.45 2
+192.168.5.44 3
+192.168.5.43 4
+192.168.5.42 1
+192.168.5.41 3
+192.168.5.40 2
+192.168.5.39 2
+192.168.5.38 3
+192.168.5.37 4
+192.168.5.36 0
+192.168.5.35 1
+192.168.5.34 4
+192.168.5.33 0
+192.168.5.32 4
+192.168.5.31 0
+192.168.5.30 1
+192.168.5.29 2
+192.168.5.28 3
+192.168.5.27 4
+192.168.5.26 1
+192.168.5.25 3
+192.168.5.24 2
+192.168.5.23 3
+192.168.5.22 4
+192.168.5.21 2
+192.168.5.20 0
+192.168.5.19 1
+192.168.5.18 4
+192.168.5.17 0
+192.168.5.16 3
+192.168.5.15 0
+192.168.5.14 1
+192.168.5.13 2
+192.168.5.12 3
+192.168.5.11 4
+192.168.5.10 1
+192.168.5.9 4
+192.168.5.8 3
+192.168.5.7 0
+192.168.5.6 1
+192.168.5.5 2
+192.168.5.4 3
+192.168.5.3 4
+192.168.5.2 2
+192.168.5.1 0
+192.168.4.90 0
+192.168.4.89 1
+192.168.4.88 2
+192.168.4.87 3
+192.168.4.86 4
+192.168.4.85 0
+192.168.4.84 1
+192.168.4.83 2
+192.168.4.82 3
+192.168.4.81 4
+192.168.4.80 0
+192.168.4.79 0
+192.168.4.78 1
+192.168.4.77 2
+192.168.4.76 3
+192.168.4.75 4
+192.168.4.74 1
+192.168.4.73 2
+192.168.4.72 3
+192.168.4.71 3
+192.168.4.70 4
+192.168.4.69 0
+192.168.4.68 1
+192.168.4.67 2
+192.168.4.66 4
+192.168.4.65 1
+192.168.4.64 3
+192.168.4.63 0
+192.168.4.62 1
+192.168.4.61 2
+192.168.4.60 3
+192.168.4.59 4
+192.168.4.58 0
+192.168.4.57 2
+192.168.4.56 1
+192.168.4.55 0
+192.168.4.54 1
+192.168.4.53 2
+192.168.4.52 3
+192.168.4.51 4
+192.168.4.50 3
+192.168.4.49 4
+192.168.4.48 0
+192.168.4.47 0
+192.168.4.46 1
+192.168.4.45 2
+192.168.4.44 3
+192.168.4.43 4
+192.168.4.42 2
+192.168.4.41 0
+192.168.4.40 1
+192.168.4.39 4
+192.168.4.38 0
+192.168.4.37 1
+192.168.4.36 2
+192.168.4.35 3
+192.168.4.34 4
+192.168.4.33 3
+192.168.4.32 2
+192.168.4.31 0
+192.168.4.30 1
+192.168.4.29 2
+192.168.4.28 3
+192.168.4.27 4
+192.168.4.26 0
+192.168.4.25 2
+192.168.4.24 1
+192.168.4.23 3
+192.168.4.22 4
+192.168.4.21 0
+192.168.4.20 1
+192.168.4.19 2
+192.168.4.18 4
+192.168.4.17 3
+192.168.4.16 1
+192.168.4.15 0
+192.168.4.14 1
+192.168.4.13 2
+192.168.4.12 3
+192.168.4.11 4
+192.168.4.10 3
+192.168.4.9 0
+192.168.4.8 2
+192.168.4.7 2
+192.168.4.6 3
+192.168.4.5 4
+192.168.4.4 0
+192.168.4.3 1
+192.168.4.2 4
+192.168.4.1 4
+192.168.3.90 0
+192.168.3.89 1
+192.168.3.88 2
+192.168.3.87 3
+192.168.3.86 4
+192.168.3.85 0
+192.168.3.84 1
+192.168.3.83 2
+192.168.3.82 3
+192.168.3.81 4
+192.168.3.80 0
+192.168.3.79 0
+192.168.3.78 1
+192.168.3.77 2
+192.168.3.76 3
+192.168.3.75 4
+192.168.3.74 1
+192.168.3.73 2
+192.168.3.72 3
+192.168.3.71 3
+192.168.3.70 4
+192.168.3.69 0
+192.168.3.68 1
+192.168.3.67 2
+192.168.3.66 4
+192.168.3.65 0
+192.168.3.64 3
+192.168.3.63 0
+192.168.3.62 1
+192.168.3.61 2
+192.168.3.60 3
+192.168.3.59 4
+192.168.3.58 2
+192.168.3.57 1
+192.168.3.56 3
+192.168.3.55 0
+192.168.3.54 1
+192.168.3.53 2
+192.168.3.52 3
+192.168.3.51 4
+192.168.3.50 0
+192.168.3.49 4
+192.168.3.48 2
+192.168.3.47 0
+192.168.3.46 1
+192.168.3.45 2
+192.168.3.44 3
+192.168.3.43 4
+192.168.3.42 2
+192.168.3.41 1
+192.168.3.40 0
+192.168.3.39 1
+192.168.3.38 2
+192.168.3.37 3
+192.168.3.36 4
+192.168.3.35 0
+192.168.3.34 4
+192.168.3.33 3
+192.168.3.32 4
+192.168.3.31 0
+192.168.3.30 1
+192.168.3.29 2
+192.168.3.28 3
+192.168.3.27 4
+192.168.3.26 2
+192.168.3.25 1
+192.168.3.24 0
+192.168.3.23 3
+192.168.3.22 4
+192.168.3.21 0
+192.168.3.20 1
+192.168.3.19 2
+192.168.3.18 4
+192.168.3.17 3
+192.168.3.16 1
+192.168.3.15 0
+192.168.3.14 1
+192.168.3.13 2
+192.168.3.12 3
+192.168.3.11 4
+192.168.3.10 2
+192.168.3.9 1
+192.168.3.8 0
+192.168.3.7 4
+192.168.3.6 0
+192.168.3.5 1
+192.168.3.4 2
+192.168.3.3 3
+192.168.3.2 4
+192.168.3.1 3
+192.168.2.90 0
+192.168.2.89 1
+192.168.2.88 2
+192.168.2.87 3
+192.168.2.86 4
+192.168.2.85 0
+192.168.2.84 1
+192.168.2.83 2
+192.168.2.82 3
+192.168.2.81 4
+192.168.2.80 1
+192.168.2.79 0
+192.168.2.78 1
+192.168.2.77 2
+192.168.2.76 3
+192.168.2.75 4
+192.168.2.74 2
+192.168.2.73 3
+192.168.2.72 0
+192.168.2.71 3
+192.168.2.70 4
+192.168.2.69 0
+192.168.2.68 1
+192.168.2.67 2
+192.168.2.66 4
+192.168.2.65 1
+192.168.2.64 3
+192.168.2.63 0
+192.168.2.62 1
+192.168.2.61 2
+192.168.2.60 3
+192.168.2.59 4
+192.168.2.58 0
+192.168.2.57 2
+192.168.2.56 1
+192.168.2.55 0
+192.168.2.54 1
+192.168.2.53 2
+192.168.2.52 3
+192.168.2.51 4
+192.168.2.50 3
+192.168.2.49 4
+192.168.2.48 0
+192.168.2.47 0
+192.168.2.46 1
+192.168.2.45 2
+192.168.2.44 3
+192.168.2.43 4
+192.168.2.42 2
+192.168.2.41 0
+192.168.2.40 1
+192.168.2.39 0
+192.168.2.38 1
+192.168.2.37 2
+192.168.2.36 3
+192.168.2.35 4
+192.168.2.34 3
+192.168.2.33 4
+192.168.2.32 2
+192.168.2.31 0
+192.168.2.30 1
+192.168.2.29 2
+192.168.2.28 3
+192.168.2.27 4
+192.168.2.26 2
+192.168.2.25 0
+192.168.2.24 1
+192.168.2.23 3
+192.168.2.22 4
+192.168.2.21 0
+192.168.2.20 1
+192.168.2.19 2
+192.168.2.18 4
+192.168.2.17 3
+192.168.2.16 4
+192.168.2.15 0
+192.168.2.14 1
+192.168.2.13 2
+192.168.2.12 3
+192.168.2.11 4
+192.168.2.10 0
+192.168.2.9 2
+192.168.2.8 3
+192.168.2.7 2
+192.168.2.6 4
+192.168.2.5 0
+192.168.2.4 1
+192.168.2.3 3
+192.168.2.2 4
+192.168.2.1 1
+192.168.1.90 0
+192.168.1.89 1
+192.168.1.88 2
+192.168.1.87 3
+192.168.1.86 4
+192.168.1.85 0
+192.168.1.84 1
+192.168.1.83 2
+192.168.1.82 3
+192.168.1.81 4
+192.168.1.80 0
+192.168.1.79 0
+192.168.1.78 1
+192.168.1.77 2
+192.168.1.76 3
+192.168.1.75 4
+192.168.1.74 1
+192.168.1.73 2
+192.168.1.72 3
+192.168.1.71 3
+192.168.1.70 4
+192.168.1.69 0
+192.168.1.68 1
+192.168.1.67 2
+192.168.1.66 4
+192.168.1.65 0
+192.168.1.64 1
+192.168.1.63 0
+192.168.1.62 1
+192.168.1.61 2
+192.168.1.60 3
+192.168.1.59 4
+192.168.1.58 2
+192.168.1.57 3
+192.168.1.56 1
+192.168.1.55 0
+192.168.1.54 1
+192.168.1.53 2
+192.168.1.52 3
+192.168.1.51 4
+192.168.1.50 0
+192.168.1.49 4
+192.168.1.48 2
+192.168.1.47 0
+192.168.1.46 1
+192.168.1.45 2
+192.168.1.44 3
+192.168.1.43 4
+192.168.1.42 2
+192.168.1.41 3
+192.168.1.40 0
+192.168.1.39 3
+192.168.1.38 4
+192.168.1.37 0
+192.168.1.36 1
+192.168.1.35 2
+192.168.1.34 4
+192.168.1.33 1
+192.168.1.32 3
+192.168.1.31 0
+192.168.1.30 1
+192.168.1.29 2
+192.168.1.28 3
+192.168.1.27 4
+192.168.1.26 2
+192.168.1.25 3
+192.168.1.24 0
+192.168.1.23 3
+192.168.1.22 4
+192.168.1.21 0
+192.168.1.20 1
+192.168.1.19 2
+192.168.1.18 4
+192.168.1.17 1
+192.168.1.16 4
+192.168.1.15 0
+192.168.1.14 1
+192.168.1.13 2
+192.168.1.12 3
+192.168.1.11 4
+192.168.1.10 2
+192.168.1.9 3
+192.168.1.8 0
+192.168.1.7 3
+192.168.1.6 4
+192.168.1.5 0
+192.168.1.4 1
+192.168.1.3 2
+192.168.1.2 4
+192.168.1.1 1
+EOF
diff --git a/ctdb/tests/UNIT/takeover/nondet.001.sh b/ctdb/tests/UNIT/takeover/nondet.001.sh
new file mode 100755
index 0000000..5f838ee
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/nondet.001.sh
@@ -0,0 +1,35 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, 1 healthy"
+
+required_result <<EOF
+${TEST_DATE_STAMP}Unassign IP: 192.168.21.253 from 1
+${TEST_DATE_STAMP}Unassign IP: 192.168.21.252 from 0
+${TEST_DATE_STAMP}Unassign IP: 192.168.20.253 from 1
+${TEST_DATE_STAMP}Unassign IP: 192.168.20.252 from 0
+${TEST_DATE_STAMP}Unassign IP: 192.168.20.250 from 1
+${TEST_DATE_STAMP}Unassign IP: 192.168.20.249 from 0
+192.168.21.254 2
+192.168.21.253 2
+192.168.21.252 2
+192.168.20.254 2
+192.168.20.253 2
+192.168.20.252 2
+192.168.20.251 2
+192.168.20.250 2
+192.168.20.249 2
+EOF
+
+simple_test 2,2,0 <<EOF
+192.168.20.249 0
+192.168.20.250 1
+192.168.20.251 2
+192.168.20.252 0
+192.168.20.253 1
+192.168.20.254 2
+192.168.21.252 0
+192.168.21.253 1
+192.168.21.254 2
+EOF
diff --git a/ctdb/tests/UNIT/takeover/nondet.002.sh b/ctdb/tests/UNIT/takeover/nondet.002.sh
new file mode 100755
index 0000000..bc80f5c
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/nondet.002.sh
@@ -0,0 +1,32 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, 2 healthy"
+
+required_result <<EOF
+${TEST_DATE_STAMP}Unassign IP: 192.168.21.253 from 1
+${TEST_DATE_STAMP}Unassign IP: 192.168.20.253 from 1
+${TEST_DATE_STAMP}Unassign IP: 192.168.20.250 from 1
+192.168.21.254 2
+192.168.21.253 0
+192.168.21.252 0
+192.168.20.254 2
+192.168.20.253 2
+192.168.20.252 0
+192.168.20.251 2
+192.168.20.250 0
+192.168.20.249 0
+EOF
+
+simple_test 0,2,0 <<EOF
+192.168.20.249 0
+192.168.20.250 1
+192.168.20.251 2
+192.168.20.252 0
+192.168.20.253 1
+192.168.20.254 2
+192.168.21.252 0
+192.168.21.253 1
+192.168.21.254 2
+EOF
diff --git a/ctdb/tests/UNIT/takeover/nondet.003.sh b/ctdb/tests/UNIT/takeover/nondet.003.sh
new file mode 100755
index 0000000..2a9dfb4
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/nondet.003.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, 1 -> all healthy"
+
+required_result <<EOF
+192.168.21.254 0
+192.168.21.253 2
+192.168.21.252 0
+192.168.20.254 2
+192.168.20.253 0
+192.168.20.252 2
+192.168.20.251 1
+192.168.20.250 1
+192.168.20.249 1
+EOF
+
+simple_test 0,0,0 <<EOF
+192.168.20.249 1
+192.168.20.250 1
+192.168.20.251 1
+192.168.20.252 1
+192.168.20.253 1
+192.168.20.254 1
+192.168.21.252 1
+192.168.21.253 1
+192.168.21.254 1
+EOF
diff --git a/ctdb/tests/UNIT/takeover/scripts/local.sh b/ctdb/tests/UNIT/takeover/scripts/local.sh
new file mode 100644
index 0000000..0db3d90
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover/scripts/local.sh
@@ -0,0 +1,30 @@
+# Hey Emacs, this is a -*- shell-script -*- !!! :-)
+
+test_prog="ctdb_takeover_tests ipalloc"
+
+define_test ()
+{
+ _f=$(basename "$0" ".sh")
+
+ export CTDB_IP_ALGORITHM="${_f%%.*}"
+ case "$CTDB_IP_ALGORITHM" in
+ lcp2|nondet|det) : ;;
+ *) die "Unknown algorithm for testcase \"$_f\"" ;;
+ esac
+
+ printf "%-12s - %s\n" "$_f" "$1"
+}
+
+extra_footer ()
+{
+ cat <<EOF
+--------------------------------------------------
+Algorithm: $CTDB_IP_ALGORITHM
+--------------------------------------------------
+EOF
+}
+
+simple_test ()
+{
+ unit_test $VALGRIND $test_prog "$@"
+}
diff --git a/ctdb/tests/UNIT/takeover_helper/000.sh b/ctdb/tests/UNIT/takeover_helper/000.sh
new file mode 100755
index 0000000..3cb9635
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/000.sh
@@ -0,0 +1,22 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, no IPs"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+required_result 0 <<EOF
+No nodes available to host public IPs yet
+EOF
+test_takeover_helper
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/010.sh b/ctdb/tests/UNIT/takeover_helper/010.sh
new file mode 100755
index 0000000..1275156
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/010.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, IPs all unassigned"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 -1
+10.0.0.32 -1
+10.0.0.33 -1
+EOF
+
+ok_null
+test_takeover_helper
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 2
+10.0.0.32 1
+10.0.0.33 0
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/011.sh b/ctdb/tests/UNIT/takeover_helper/011.sh
new file mode 100755
index 0000000..12a2a1a
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/011.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, 1 ok, IPs all unassigned"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x2 CURRENT RECMASTER
+1 192.168.20.42 0x2
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 -1
+10.0.0.32 -1
+10.0.0.33 -1
+EOF
+
+ok_null
+test_takeover_helper
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 2
+10.0.0.32 2
+10.0.0.33 2
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/012.sh b/ctdb/tests/UNIT/takeover_helper/012.sh
new file mode 100755
index 0000000..04e4508
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/012.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, 1 IP unassigned"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 -1
+10.0.0.32 2
+10.0.0.33 1
+EOF
+
+ok_null
+test_takeover_helper
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 0
+10.0.0.32 2
+10.0.0.33 1
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/013.sh b/ctdb/tests/UNIT/takeover_helper/013.sh
new file mode 100755
index 0000000..ad55564
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/013.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, 1 unhealthy, IPs all assigned"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x2
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 0
+10.0.0.32 2
+10.0.0.33 1
+EOF
+
+ok_null
+test_takeover_helper
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 0
+10.0.0.32 0
+10.0.0.33 1
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/014.sh b/ctdb/tests/UNIT/takeover_helper/014.sh
new file mode 100755
index 0000000..e3d8515
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/014.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all unhealthy, all IPs assigned"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x2 CURRENT RECMASTER
+1 192.168.20.42 0x2
+2 192.168.20.43 0x2
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 0
+10.0.0.32 2
+10.0.0.33 1
+EOF
+
+ok <<EOF
+Failed to find node to cover ip 10.0.0.33
+Failed to find node to cover ip 10.0.0.32
+Failed to find node to cover ip 10.0.0.31
+EOF
+test_takeover_helper
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 -1
+10.0.0.32 -1
+10.0.0.33 -1
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/016.sh b/ctdb/tests/UNIT/takeover_helper/016.sh
new file mode 100755
index 0000000..7fbed7e
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/016.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all healthy, IPs all unassigned, IP failover disabled"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 -1
+10.0.0.32 -1
+10.0.0.33 -1
+EOF
+
+export CTDB_DISABLE_IP_FAILOVER=1
+
+ok <<EOF
+EOF
+test_takeover_helper
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 -1
+10.0.0.32 -1
+10.0.0.33 -1
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/017.sh b/ctdb/tests/UNIT/takeover_helper/017.sh
new file mode 100755
index 0000000..e5bcd20
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/017.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all healthy, IPs unbalanced, NoIPFailback"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 0
+10.0.0.32 1
+10.0.0.33 1
+EOF
+
+ctdb_cmd setvar NoIPFailback 1
+
+ok <<EOF
+EOF
+test_takeover_helper
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 0
+10.0.0.32 1
+10.0.0.33 1
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/018.sh b/ctdb/tests/UNIT/takeover_helper/018.sh
new file mode 100755
index 0000000..61a26dd
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/018.sh
@@ -0,0 +1,34 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all healthy, IPs unbalanced"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 0
+10.0.0.32 1
+10.0.0.33 1
+EOF
+
+ok <<EOF
+EOF
+test_takeover_helper
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 0
+10.0.0.32 1
+10.0.0.33 2
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/019.sh b/ctdb/tests/UNIT/takeover_helper/019.sh
new file mode 100755
index 0000000..0802611
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/019.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, 1 node unhealthy, IPs all assigned, NoIPTakeover"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x2
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 0
+10.0.0.32 1
+10.0.0.33 2
+EOF
+
+ctdb_cmd setvar NoIPTakeover 1
+
+ok <<EOF
+Failed to find node to cover ip 10.0.0.32
+EOF
+test_takeover_helper
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 0
+10.0.0.32 -1
+10.0.0.33 2
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/021.sh b/ctdb/tests/UNIT/takeover_helper/021.sh
new file mode 100755
index 0000000..ad8e59f
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/021.sh
@@ -0,0 +1,39 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all healthy, IPs all assigned"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 0
+10.0.0.32 1
+10.0.0.33 2
+10.0.0.34 -1
+EOF
+
+ctdb_cmd setvar NoIPTakeover 1
+
+ok <<EOF
+Failed to find node to cover ip 10.0.0.34
+EOF
+test_takeover_helper
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 0
+10.0.0.32 1
+10.0.0.33 2
+10.0.0.34 -1
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/022.sh b/ctdb/tests/UNIT/takeover_helper/022.sh
new file mode 100755
index 0000000..e8c5a96
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/022.sh
@@ -0,0 +1,40 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all healthy, IPs very unbalanced, no force rebalance"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 0
+10.0.0.32 1
+10.0.0.33 2
+10.0.0.34 2
+10.0.0.35 2
+10.0.0.36 2
+EOF
+
+ok <<EOF
+EOF
+test_takeover_helper
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 0
+10.0.0.32 1
+10.0.0.33 2
+10.0.0.34 2
+10.0.0.35 2
+10.0.0.36 2
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/023.sh b/ctdb/tests/UNIT/takeover_helper/023.sh
new file mode 100755
index 0000000..a76afef
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/023.sh
@@ -0,0 +1,41 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all healthy, IPs very unbalanced, force rebalance 1"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 0
+10.0.0.32 1
+10.0.0.33 2
+10.0.0.34 2
+10.0.0.35 2
+10.0.0.36 2
+EOF
+
+ok <<EOF
+Forcing rebalancing of IPs to node 1
+EOF
+test_takeover_helper 1
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 0
+10.0.0.32 1
+10.0.0.33 2
+10.0.0.34 2
+10.0.0.35 1
+10.0.0.36 2
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/024.sh b/ctdb/tests/UNIT/takeover_helper/024.sh
new file mode 100755
index 0000000..af7480c
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/024.sh
@@ -0,0 +1,43 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all healthy, IPs very unbalanced, force rebalance all"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 0
+10.0.0.32 1
+10.0.0.33 2
+10.0.0.34 2
+10.0.0.35 2
+10.0.0.36 2
+EOF
+
+ok <<EOF
+Forcing rebalancing of IPs to node 1
+Forcing rebalancing of IPs to node 0
+Forcing rebalancing of IPs to node 2
+EOF
+test_takeover_helper 1,0,2
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 0
+10.0.0.32 1
+10.0.0.33 2
+10.0.0.34 2
+10.0.0.35 0
+10.0.0.36 1
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/025.sh b/ctdb/tests/UNIT/takeover_helper/025.sh
new file mode 100755
index 0000000..28db486
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/025.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, IPs all assigned randomly, deterministic IPs"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 1
+10.0.0.32 0
+10.0.0.33 2
+EOF
+
+ctdb_cmd setvar IPAllocAlgorithm 0
+
+ok <<EOF
+Deterministic IPs enabled. Resetting all ip allocations
+EOF
+test_takeover_helper
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 2
+10.0.0.32 1
+10.0.0.33 0
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/026.sh b/ctdb/tests/UNIT/takeover_helper/026.sh
new file mode 100755
index 0000000..08a7b6d
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/026.sh
@@ -0,0 +1,41 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, IPs assigned, unbalanced, non-deterministic IPs"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 0
+10.0.0.32 1
+10.0.0.33 2
+10.0.0.34 2
+10.0.0.35 2
+EOF
+
+ctdb_cmd setvar IPAllocAlgorithm 1
+
+ok_null
+test_takeover_helper
+
+# This is non-deterministic - LCP2 would not rebalance without
+# force-rebalance-nodes
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 0
+10.0.0.32 1
+10.0.0.33 2
+10.0.0.34 2
+10.0.0.35 0
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/027.sh b/ctdb/tests/UNIT/takeover_helper/027.sh
new file mode 100755
index 0000000..1c36d87
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/027.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, 2 banned, IPs all unassigned"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x8
+2 192.168.20.43 0x8
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 -1
+10.0.0.32 -1
+10.0.0.33 -1
+EOF
+
+ok_null
+test_takeover_helper
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 0
+10.0.0.32 0
+10.0.0.33 0
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/028.sh b/ctdb/tests/UNIT/takeover_helper/028.sh
new file mode 100755
index 0000000..a69cd47
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/028.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, 2 banned, IPs all unassigned"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x1
+2 192.168.20.43 0x1
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 -1
+10.0.0.32 -1
+10.0.0.33 -1
+EOF
+
+ok_null
+test_takeover_helper
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 0
+10.0.0.32 0
+10.0.0.33 0
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/030.sh b/ctdb/tests/UNIT/takeover_helper/030.sh
new file mode 100755
index 0000000..e6411c5
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/030.sh
@@ -0,0 +1,35 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, IPs defined on 2, IPs all unassigned"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 -1 0,2
+10.0.0.32 -1 0,2
+10.0.0.33 -1 0,2
+10.0.0.34 -1 0,2
+EOF
+
+ok_null
+test_takeover_helper
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 0
+10.0.0.32 2
+10.0.0.33 2
+10.0.0.34 0
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/031.sh b/ctdb/tests/UNIT/takeover_helper/031.sh
new file mode 100755
index 0000000..13005ee
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/031.sh
@@ -0,0 +1,55 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, IPs defined on 2, IPs all unassigned"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 -1 0,2
+10.0.0.32 -1 0,2
+10.0.0.33 -1 0,2
+10.0.0.34 -1 0,2
+EOF
+
+HELPER_DEBUGLEVEL=INFO
+ok <<EOF
+Fetched public IPs from node 0
+Fetched public IPs from node 1
+Fetched public IPs from node 2
+Fetched public IPs from node 0
+Fetched public IPs from node 2
+ 10.0.0.34 -> 0 [+0]
+ 10.0.0.33 -> 2 [+0]
+ 10.0.0.31 -> 0 [+14884]
+ 10.0.0.32 -> 2 [+16129]
+RELEASE_IP 10.0.0.34 succeeded on 1 nodes
+RELEASE_IP 10.0.0.33 succeeded on 1 nodes
+RELEASE_IP 10.0.0.32 succeeded on 1 nodes
+RELEASE_IP 10.0.0.31 succeeded on 1 nodes
+TAKEOVER_IP 10.0.0.34 succeeded on node 0
+TAKEOVER_IP 10.0.0.33 succeeded on node 2
+TAKEOVER_IP 10.0.0.32 succeeded on node 2
+TAKEOVER_IP 10.0.0.31 succeeded on node 0
+IPREALLOCATED succeeded on 3 nodes
+EOF
+test_takeover_helper
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 0
+10.0.0.32 2
+10.0.0.33 2
+10.0.0.34 0
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/110.sh b/ctdb/tests/UNIT/takeover_helper/110.sh
new file mode 100755
index 0000000..56dc16c
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/110.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, no IPs, IPREALLOCATED error"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+CONTROLFAILS
+137 1 ERROR CTDB_CONTROL_IPREALLOCATED fake failure
+
+EOF
+
+required_result 255 <<EOF
+No nodes available to host public IPs yet
+IPREALLOCATED failed on node 1, ret=-1
+Assigning banning credits to node 1
+takeover run failed, ret=-1
+EOF
+test_takeover_helper
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/111.sh b/ctdb/tests/UNIT/takeover_helper/111.sh
new file mode 100755
index 0000000..d14868b
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/111.sh
@@ -0,0 +1,40 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, IPs all unassigned, IPREALLOCATED error"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 -1
+10.0.0.32 -1
+10.0.0.33 -1
+
+CONTROLFAILS
+137 1 ERROR CTDB_CONTROL_IPREALLOCATED fake failure
+EOF
+
+required_result 255 <<EOF
+IPREALLOCATED failed on node 1, ret=-1
+Assigning banning credits to node 1
+takeover run failed, ret=-1
+EOF
+test_takeover_helper
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 2
+10.0.0.32 1
+10.0.0.33 0
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/120.sh b/ctdb/tests/UNIT/takeover_helper/120.sh
new file mode 100755
index 0000000..af780d6
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/120.sh
@@ -0,0 +1,40 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, IPs all unassigned, TAKEOVER_IP error"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 -1
+10.0.0.32 -1
+10.0.0.33 -1
+
+CONTROLFAILS
+89 1 ERROR CTDB_CONTROL_TAKEOVER_IP fake failure
+EOF
+
+required_result 255 <<EOF
+TAKEOVER_IP 10.0.0.32 failed on node 1, ret=-1
+Assigning banning credits to node 1
+takeover run failed, ret=-1
+EOF
+test_takeover_helper
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 2
+10.0.0.32 -1
+10.0.0.33 0
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/121.sh b/ctdb/tests/UNIT/takeover_helper/121.sh
new file mode 100755
index 0000000..cc113da
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/121.sh
@@ -0,0 +1,40 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, 2/3 IPs assigned, TAKEOVER_IP error (redundant)"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 2
+10.0.0.32 1
+10.0.0.33 -1
+
+CONTROLFAILS
+89 1 ERROR CTDB_CONTROL_TAKEOVER_IP fake failure
+EOF
+
+required_result 255 <<EOF
+TAKEOVER_IP 10.0.0.32 failed on node 1, ret=-1
+Assigning banning credits to node 1
+takeover run failed, ret=-1
+EOF
+test_takeover_helper
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 2
+10.0.0.32 1
+10.0.0.33 0
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/122.sh b/ctdb/tests/UNIT/takeover_helper/122.sh
new file mode 100755
index 0000000..d823b09
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/122.sh
@@ -0,0 +1,40 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, 2/3 IPs assigned, TAKEOVER_IP error (target)"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 2
+10.0.0.32 1
+10.0.0.33 -1
+
+CONTROLFAILS
+89 0 ERROR CTDB_CONTROL_TAKEOVER_IP fake failure
+EOF
+
+required_result 255 <<EOF
+TAKEOVER_IP 10.0.0.33 failed on node 0, ret=-1
+Assigning banning credits to node 0
+takeover run failed, ret=-1
+EOF
+test_takeover_helper
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 2
+10.0.0.32 1
+10.0.0.33 -1
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/130.sh b/ctdb/tests/UNIT/takeover_helper/130.sh
new file mode 100755
index 0000000..83735d4
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/130.sh
@@ -0,0 +1,41 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, IPs all unassigned, RELEASE_IP error"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 -1
+10.0.0.32 -1
+10.0.0.33 -1
+
+CONTROLFAILS
+88 2 ERROR CTDB_CONTROL_RELEASE_IP fake failure
+EOF
+
+required_result 255 <<EOF
+RELEASE_IP 10.0.0.33 failed on node 2, ret=-1
+RELEASE_IP 10.0.0.32 failed on node 2, ret=-1
+Assigning banning credits to node 2
+takeover run failed, ret=-1
+EOF
+test_takeover_helper
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 -1
+10.0.0.32 -1
+10.0.0.33 -1
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/131.sh b/ctdb/tests/UNIT/takeover_helper/131.sh
new file mode 100755
index 0000000..4e0cd46
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/131.sh
@@ -0,0 +1,40 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, all IPs assigned, RELEASE_IP error (redundant)"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x2
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 0
+10.0.0.32 1
+10.0.0.33 2
+
+CONTROLFAILS
+88 0 ERROR CTDB_CONTROL_RELEASE_IP fake failure
+EOF
+
+required_result 255 <<EOF
+RELEASE_IP 10.0.0.33 failed on node 0, ret=-1
+Assigning banning credits to node 0
+takeover run failed, ret=-1
+EOF
+test_takeover_helper
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 0
+10.0.0.32 -1
+10.0.0.33 2
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/132.sh b/ctdb/tests/UNIT/takeover_helper/132.sh
new file mode 100755
index 0000000..a1a4ce5
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/132.sh
@@ -0,0 +1,42 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, all IPs assigned, RELEASE_IP error (target)"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x2
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 0
+10.0.0.32 1
+10.0.0.33 2
+
+CONTROLFAILS
+88 1 ERROR CTDB_CONTROL_RELEASE_IP fake failure
+EOF
+
+required_result 255 <<EOF
+RELEASE_IP 10.0.0.33 failed on node 1, ret=-1
+RELEASE_IP 10.0.0.32 failed on node 1, ret=-1
+RELEASE_IP 10.0.0.31 failed on node 1, ret=-1
+Assigning banning credits to node 1
+takeover run failed, ret=-1
+EOF
+test_takeover_helper
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 0
+10.0.0.32 1
+10.0.0.33 2
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/140.sh b/ctdb/tests/UNIT/takeover_helper/140.sh
new file mode 100755
index 0000000..844a35a
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/140.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, GET_PUBLIC_IPS error"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 1
+10.0.0.32 1
+10.0.0.33 1
+
+CONTROLFAILS
+90 2 ERROR CTDB_CONTROL_GET_PUBLIC_IPS fake failure
+EOF
+
+required_result 255 <<EOF
+control GET_PUBLIC_IPS failed on node 2, ret=-1
+Failed to fetch known public IPs
+Assigning banning credits to node 2
+takeover run failed, ret=-1
+EOF
+test_takeover_helper
diff --git a/ctdb/tests/UNIT/takeover_helper/150.sh b/ctdb/tests/UNIT/takeover_helper/150.sh
new file mode 100755
index 0000000..56042b4
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/150.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, GET_NODEMAP error"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 1
+10.0.0.32 1
+10.0.0.33 1
+
+CONTROLFAILS
+91 0 ERROR CTDB_CONTROL_GET_NODEMAP fake failure
+EOF
+
+required_result 255 <<EOF
+control GET_NODEMAP failed, ret=-1
+takeover run failed, ret=-1
+EOF
+test_takeover_helper
diff --git a/ctdb/tests/UNIT/takeover_helper/160.sh b/ctdb/tests/UNIT/takeover_helper/160.sh
new file mode 100755
index 0000000..c09f649
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/160.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, GET_ALL_TUNABLES error"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 1
+10.0.0.32 1
+10.0.0.33 1
+
+CONTROLFAILS
+53 0 ERROR CTDB_CONTROL_GET_ALL_TUNABLES fake failure
+EOF
+
+required_result 255 <<EOF
+control GET_ALL_TUNABLES failed, ret=-1
+takeover run failed, ret=-1
+EOF
+test_takeover_helper
diff --git a/ctdb/tests/UNIT/takeover_helper/210.sh b/ctdb/tests/UNIT/takeover_helper/210.sh
new file mode 100755
index 0000000..eacf024
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/210.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, no IPs, IPREALLOCATED timeout"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+CONTROLFAILS
+137 1 TIMEOUT CTDB_CONTROL_IPREALLOCATED fake timeout
+
+EOF
+
+required_error ETIMEDOUT <<EOF
+No nodes available to host public IPs yet
+IPREALLOCATED failed on node 1, ret=$(errcode ETIMEDOUT)
+Assigning banning credits to node 1
+takeover run failed, ret=$(errcode ETIMEDOUT)
+EOF
+test_takeover_helper
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/211.sh b/ctdb/tests/UNIT/takeover_helper/211.sh
new file mode 100755
index 0000000..27eebe3
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/211.sh
@@ -0,0 +1,40 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, IPs all unassigned, IPREALLOCATED timeout"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 -1
+10.0.0.32 -1
+10.0.0.33 -1
+
+CONTROLFAILS
+137 1 TIMEOUT CTDB_CONTROL_IPREALLOCATED fake timeout
+EOF
+
+required_error ETIMEDOUT <<EOF
+IPREALLOCATED failed on node 1, ret=$(errcode ETIMEDOUT)
+Assigning banning credits to node 1
+takeover run failed, ret=$(errcode ETIMEDOUT)
+EOF
+test_takeover_helper
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 2
+10.0.0.32 1
+10.0.0.33 0
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/220.sh b/ctdb/tests/UNIT/takeover_helper/220.sh
new file mode 100755
index 0000000..84fc1d7
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/220.sh
@@ -0,0 +1,40 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, IPs all unassigned, TAKEOVER_IP timeout"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 -1
+10.0.0.32 -1
+10.0.0.33 -1
+
+CONTROLFAILS
+89 1 TIMEOUT CTDB_CONTROL_TAKEOVER_IP fake timeout
+EOF
+
+required_error ETIMEDOUT <<EOF
+TAKEOVER_IP 10.0.0.32 failed to node 1, ret=$(errcode ETIMEDOUT)
+Assigning banning credits to node 1
+takeover run failed, ret=$(errcode ETIMEDOUT)
+EOF
+test_takeover_helper
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 2
+10.0.0.32 -1
+10.0.0.33 0
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/230.sh b/ctdb/tests/UNIT/takeover_helper/230.sh
new file mode 100755
index 0000000..13ed08b
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/230.sh
@@ -0,0 +1,41 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, IPs all unassigned, RELEASE_IP timeout"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 -1
+10.0.0.32 -1
+10.0.0.33 -1
+
+CONTROLFAILS
+88 2 TIMEOUT CTDB_CONTROL_RELEASE_IP fake timeout
+EOF
+
+required_error ETIMEDOUT <<EOF
+RELEASE_IP 10.0.0.33 failed on node 2, ret=$(errcode ETIMEDOUT)
+RELEASE_IP 10.0.0.32 failed on node 2, ret=$(errcode ETIMEDOUT)
+Assigning banning credits to node 2
+takeover run failed, ret=$(errcode ETIMEDOUT)
+EOF
+test_takeover_helper
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 -1
+10.0.0.32 -1
+10.0.0.33 -1
+EOF
+test_ctdb_ip_all
diff --git a/ctdb/tests/UNIT/takeover_helper/240.sh b/ctdb/tests/UNIT/takeover_helper/240.sh
new file mode 100755
index 0000000..7afb2fc
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/240.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, GET_PUBLIC_IPS timeout"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 1
+10.0.0.32 1
+10.0.0.33 1
+
+CONTROLFAILS
+90 2 TIMEOUT CTDB_CONTROL_GET_PUBLIC_IPS fake timeout
+EOF
+
+required_error ETIMEDOUT <<EOF
+control GET_PUBLIC_IPS failed on node 2, ret=$(errcode ETIMEDOUT)
+Failed to fetch known public IPs
+Assigning banning credits to node 2
+takeover run failed, ret=$(errcode ETIMEDOUT)
+EOF
+test_takeover_helper
diff --git a/ctdb/tests/UNIT/takeover_helper/250.sh b/ctdb/tests/UNIT/takeover_helper/250.sh
new file mode 100755
index 0000000..91c6766
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/250.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, GET_NODEMAP timeout"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 1
+10.0.0.32 1
+10.0.0.33 1
+
+CONTROLFAILS
+91 0 TIMEOUT CTDB_CONTROL_GET_NODEMAP fake timeout
+EOF
+
+required_error ETIMEDOUT <<EOF
+control GET_NODEMAP failed to node 0, ret=$(errcode ETIMEDOUT)
+takeover run failed, ret=$(errcode ETIMEDOUT)
+EOF
+test_takeover_helper
diff --git a/ctdb/tests/UNIT/takeover_helper/260.sh b/ctdb/tests/UNIT/takeover_helper/260.sh
new file mode 100755
index 0000000..7e24e32
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/260.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, GET_ALL_TUNABLES timeout"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 1
+10.0.0.32 1
+10.0.0.33 1
+
+CONTROLFAILS
+53 0 TIMEOUT CTDB_CONTROL_GET_ALL_TUNABLES fake timeout
+EOF
+
+required_error ETIMEDOUT <<EOF
+control GET_ALL_TUNABLES failed, ret=$(errcode ETIMEDOUT)
+takeover run failed, ret=$(errcode ETIMEDOUT)
+EOF
+test_takeover_helper
diff --git a/ctdb/tests/UNIT/takeover_helper/scripts/local.sh b/ctdb/tests/UNIT/takeover_helper/scripts/local.sh
new file mode 100644
index 0000000..d36d4e4
--- /dev/null
+++ b/ctdb/tests/UNIT/takeover_helper/scripts/local.sh
@@ -0,0 +1,108 @@
+# Hey Emacs, this is a -*- shell-script -*- !!! :-)
+
+if "$CTDB_TEST_VERBOSE" ; then
+ debug () { echo "$@" ; }
+else
+ debug () { : ; }
+fi
+
+. "${TEST_SCRIPTS_DIR}/script_install_paths.sh"
+
+PATH="${PATH}:${CTDB_SCRIPTS_TOOLS_HELPER_DIR}"
+PATH="${PATH}:${CTDB_SCRIPTS_HELPER_BINDIR}"
+
+setup_ctdb_base "$CTDB_TEST_TMP_DIR" "ctdb-etc"
+
+ctdbd_socket=$(ctdb-path socket "ctdbd")
+ctdbd_pidfile=$(ctdb-path pidfile "ctdbd")
+ctdbd_dbdir=$(ctdb-path vardir append "db")
+
+define_test ()
+{
+ _f=$(basename "$0" ".sh")
+
+ printf "%-28s - %s\n" "$_f" "$1"
+
+ if [ -z "$FAKE_CTDBD_DEBUGLEVEL" ] ; then
+ FAKE_CTDBD_DEBUGLEVEL="ERR"
+ fi
+ if [ -z "$HELPER_DEBUGLEVEL" ] ; then
+ HELPER_DEBUGLEVEL="NOTICE"
+ fi
+ if [ -z "$CTDB_DEBUGLEVEL" ] ; then
+ CTDB_DEBUGLEVEL="ERR"
+ fi
+}
+
+cleanup_ctdbd ()
+{
+ debug "Cleaning up fake ctdbd"
+
+ pid=$(cat "$ctdbd_pidfile" 2>/dev/null || echo)
+ if [ -n "$pid" ] ; then
+ kill $pid || true
+ rm -f "$ctdbd_pidfile"
+ fi
+ rm -f "$ctdbd_socket"
+ rm -rf "$ctdbd_dbdir"
+}
+
+setup_ctdbd ()
+{
+ debug "Setting up fake ctdbd"
+
+ mkdir -p "$ctdbd_dbdir"
+ $VALGRIND fake_ctdbd -d "$FAKE_CTDBD_DEBUGLEVEL" \
+ -s "$ctdbd_socket" -p "$ctdbd_pidfile" \
+ -D "$ctdbd_dbdir"
+ # This current translates to a 6 second timeout for the
+ # important controls
+ ctdb setvar TakeoverTimeout 2
+ test_cleanup cleanup_ctdbd
+}
+
+# Render non-printable characters. The helper prints the status as
+# binary, so render it for easy comparison.
+result_filter ()
+{
+ sed -e 's|ctdb-takeover\[[0-9]*\]: ||'
+}
+
+ctdb_cmd ()
+{
+ echo Running: ctdb -d "$CTDB_DEBUGLEVEL" "$@"
+ ctdb -d "$CTDB_DEBUGLEVEL" "$@"
+}
+
+test_ctdb_ip_all ()
+{
+ unit_test ctdb -d "$CTDB_DEBUGLEVEL" ip all || exit $?
+}
+
+takeover_helper_out="${CTDB_TEST_TMP_DIR}/takover_helper.out"
+
+takeover_helper_format_outfd ()
+{
+ od -A n -t d4 "$takeover_helper_out" | sed -e 's|[[:space:]]*||g'
+}
+
+test_takeover_helper ()
+{
+ (
+ export CTDB_DEBUGLEVEL="$HELPER_DEBUGLEVEL"
+ export CTDB_LOGGING="file:"
+ unit_test ctdb_takeover_helper 3 "$ctdbd_socket" "$@" \
+ 3>"$takeover_helper_out"
+ ) || exit $?
+
+ case "$required_rc" in
+ 255) _t="-1" ;;
+ *) _t="$required_rc" ;;
+ esac
+ ok "$_t"
+
+ unit_test_notrace takeover_helper_format_outfd
+ _ret=$?
+ rm "$takeover_helper_out"
+ [ $_ret -eq 0 ] || exit $_ret
+}
diff --git a/ctdb/tests/UNIT/tool/README b/ctdb/tests/UNIT/tool/README
new file mode 100644
index 0000000..8160528
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/README
@@ -0,0 +1,17 @@
+Unit tests for the ctdb tool (i.e. tools/ctdb).
+
+Test case filenames can take 2 forms:
+
+* func.<some_function>.NNN.sh
+
+ Run <some_function> in the ctdb tool code using the
+ ctdb_tool_functest test program. This test program uses test stubs
+ for CTDB client functions.
+
+* stubby.<command>.NNN.sh
+
+ Run the ctdb_tool_stubby test program with <command> as the 1st
+ argument - subsequent are passed to simple_test(). ctdb_tool_stubby
+ is linked against the test stubs for CTDB client functions.
+
+To add tests here you may need to add appropriate test stubs.
diff --git a/ctdb/tests/UNIT/tool/ctdb.attach.001.sh b/ctdb/tests/UNIT/tool/ctdb.attach.001.sh
new file mode 100755
index 0000000..82c3332
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.attach.001.sh
@@ -0,0 +1,35 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "attach volatile database"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok_null
+simple_test "volatile.tdb"
+
+ok <<EOF
+Number of databases:1
+dbid:0x211bf47b name:volatile.tdb path:${ctdbd_dbdir}/volatile.tdb
+EOF
+
+simple_test_other getdbmap
+
+ok <<EOF
+dbid: 0x211bf47b
+name: volatile.tdb
+path: ${ctdbd_dbdir}/volatile.tdb
+PERSISTENT: no
+REPLICATED: no
+STICKY: no
+READONLY: no
+HEALTH: OK
+EOF
+
+simple_test_other getdbstatus "volatile.tdb"
diff --git a/ctdb/tests/UNIT/tool/ctdb.attach.002.sh b/ctdb/tests/UNIT/tool/ctdb.attach.002.sh
new file mode 100755
index 0000000..a4719bf
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.attach.002.sh
@@ -0,0 +1,35 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "attach persistent database"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok_null
+simple_test "persistent.tdb" persistent
+
+ok <<EOF
+Number of databases:1
+dbid:0x54ef7d5e name:persistent.tdb path:${ctdbd_dbdir}/persistent.tdb PERSISTENT
+EOF
+
+simple_test_other getdbmap
+
+ok <<EOF
+dbid: 0x54ef7d5e
+name: persistent.tdb
+path: ${ctdbd_dbdir}/persistent.tdb
+PERSISTENT: yes
+REPLICATED: no
+STICKY: no
+READONLY: no
+HEALTH: OK
+EOF
+
+simple_test_other getdbstatus "persistent.tdb"
diff --git a/ctdb/tests/UNIT/tool/ctdb.attach.003.sh b/ctdb/tests/UNIT/tool/ctdb.attach.003.sh
new file mode 100755
index 0000000..1a4cdeb
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.attach.003.sh
@@ -0,0 +1,35 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "attach replicated database"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok_null
+simple_test "replicated.tdb" replicated
+
+ok <<EOF
+Number of databases:1
+dbid:0x84241f7c name:replicated.tdb path:${ctdbd_dbdir}/replicated.tdb REPLICATED
+EOF
+
+simple_test_other getdbmap
+
+ok <<EOF
+dbid: 0x84241f7c
+name: replicated.tdb
+path: ${ctdbd_dbdir}/replicated.tdb
+PERSISTENT: no
+REPLICATED: yes
+STICKY: no
+READONLY: no
+HEALTH: OK
+EOF
+
+simple_test_other getdbstatus "replicated.tdb"
diff --git a/ctdb/tests/UNIT/tool/ctdb.ban.001.sh b/ctdb/tests/UNIT/tool/ctdb.ban.001.sh
new file mode 100755
index 0000000..3c17f75
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.ban.001.sh
@@ -0,0 +1,35 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "ban default (0), wait for timeout"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok_null
+simple_test 4
+
+required_result 8 <<EOF
+Number of nodes:3
+pnn:0 192.168.20.41 BANNED|INACTIVE (THIS NODE)
+pnn:1 192.168.20.42 OK
+pnn:2 192.168.20.43 OK
+EOF
+simple_test_other nodestatus all
+
+echo
+echo "Waiting 5 seconds for ban to expire..."
+sleep 5
+
+ok <<EOF
+Number of nodes:3
+pnn:0 192.168.20.41 OK (THIS NODE)
+pnn:1 192.168.20.42 OK
+pnn:2 192.168.20.43 OK
+EOF
+simple_test_other nodestatus all
diff --git a/ctdb/tests/UNIT/tool/ctdb.ban.002.sh b/ctdb/tests/UNIT/tool/ctdb.ban.002.sh
new file mode 100755
index 0000000..47a9995
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.ban.002.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "ban node 1"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok_null
+simple_test 60 -n 1
+
+required_result 8 <<EOF
+Number of nodes:3
+pnn:0 192.168.20.41 OK (THIS NODE)
+pnn:1 192.168.20.42 BANNED|INACTIVE
+pnn:2 192.168.20.43 OK
+EOF
+simple_test_other nodestatus all
diff --git a/ctdb/tests/UNIT/tool/ctdb.ban.003.sh b/ctdb/tests/UNIT/tool/ctdb.ban.003.sh
new file mode 100755
index 0000000..95acf50
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.ban.003.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "already banned"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x8
+2 192.168.20.43 0x0
+EOF
+
+ok "Node 1 is already banned"
+simple_test 60 -n 1
+
+required_result 8 <<EOF
+Number of nodes:3
+pnn:0 192.168.20.41 OK (THIS NODE)
+pnn:1 192.168.20.42 BANNED|INACTIVE
+pnn:2 192.168.20.43 OK
+EOF
+simple_test_other nodestatus all
diff --git a/ctdb/tests/UNIT/tool/ctdb.catdb.001.sh b/ctdb/tests/UNIT/tool/ctdb.catdb.001.sh
new file mode 100755
index 0000000..7fef1f1
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.catdb.001.sh
@@ -0,0 +1,80 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "volatile traverse"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok_null
+simple_test_other attach "volatile.tdb"
+
+for i in $(seq 1 9) ; do
+ ok_null
+ simple_test_other writekey "volatile.tdb" "key$i" "value$i"
+done
+
+ok <<EOF
+key(4) = "key2"
+dmaster: 0
+rsn: 0
+flags: 0x00000000
+data(6) = "value2"
+
+key(4) = "key4"
+dmaster: 0
+rsn: 0
+flags: 0x00000000
+data(6) = "value4"
+
+key(4) = "key9"
+dmaster: 0
+rsn: 0
+flags: 0x00000000
+data(6) = "value9"
+
+key(4) = "key8"
+dmaster: 0
+rsn: 0
+flags: 0x00000000
+data(6) = "value8"
+
+key(4) = "key6"
+dmaster: 0
+rsn: 0
+flags: 0x00000000
+data(6) = "value6"
+
+key(4) = "key3"
+dmaster: 0
+rsn: 0
+flags: 0x00000000
+data(6) = "value3"
+
+key(4) = "key7"
+dmaster: 0
+rsn: 0
+flags: 0x00000000
+data(6) = "value7"
+
+key(4) = "key5"
+dmaster: 0
+rsn: 0
+flags: 0x00000000
+data(6) = "value5"
+
+key(4) = "key1"
+dmaster: 0
+rsn: 0
+flags: 0x00000000
+data(6) = "value1"
+
+Dumped 9 records
+EOF
+
+simple_test "volatile.tdb"
diff --git a/ctdb/tests/UNIT/tool/ctdb.catdb.002.sh b/ctdb/tests/UNIT/tool/ctdb.catdb.002.sh
new file mode 100755
index 0000000..5258308
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.catdb.002.sh
@@ -0,0 +1,86 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "persistent traverse"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok_null
+simple_test_other attach "persistent.tdb" persistent
+
+for i in $(seq 1 9) ; do
+ ok_null
+ simple_test_other pstore "persistent.tdb" "key$i" "value$i"
+done
+
+ok <<EOF
+key(23) = "__db_sequence_number__\00"
+dmaster: 0
+rsn: 9
+flags: 0x00000000
+data(8) = "\09\00\00\00\00\00\00\00"
+
+key(4) = "key9"
+dmaster: 0
+rsn: 1
+flags: 0x00000000
+data(6) = "value9"
+
+key(4) = "key8"
+dmaster: 0
+rsn: 1
+flags: 0x00000000
+data(6) = "value8"
+
+key(4) = "key7"
+dmaster: 0
+rsn: 1
+flags: 0x00000000
+data(6) = "value7"
+
+key(4) = "key6"
+dmaster: 0
+rsn: 1
+flags: 0x00000000
+data(6) = "value6"
+
+key(4) = "key5"
+dmaster: 0
+rsn: 1
+flags: 0x00000000
+data(6) = "value5"
+
+key(4) = "key4"
+dmaster: 0
+rsn: 1
+flags: 0x00000000
+data(6) = "value4"
+
+key(4) = "key3"
+dmaster: 0
+rsn: 1
+flags: 0x00000000
+data(6) = "value3"
+
+key(4) = "key2"
+dmaster: 0
+rsn: 1
+flags: 0x00000000
+data(6) = "value2"
+
+key(4) = "key1"
+dmaster: 0
+rsn: 1
+flags: 0x00000000
+data(6) = "value1"
+
+Dumped 10 records
+EOF
+
+simple_test "persistent.tdb"
diff --git a/ctdb/tests/UNIT/tool/ctdb.cattdb.001.sh b/ctdb/tests/UNIT/tool/ctdb.cattdb.001.sh
new file mode 100755
index 0000000..be549e2
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.cattdb.001.sh
@@ -0,0 +1,80 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "volatile traverse"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok_null
+simple_test_other attach "volatile.tdb"
+
+for i in $(seq 1 9) ; do
+ ok_null
+ simple_test_other writekey "volatile.tdb" "key$i" "value$i"
+done
+
+ok <<EOF
+key(4) = "key2"
+dmaster: 0
+rsn: 0
+flags: 0x00000000
+data(6) = "value2"
+
+key(4) = "key4"
+dmaster: 0
+rsn: 0
+flags: 0x00000000
+data(6) = "value4"
+
+key(4) = "key9"
+dmaster: 0
+rsn: 0
+flags: 0x00000000
+data(6) = "value9"
+
+key(4) = "key8"
+dmaster: 0
+rsn: 0
+flags: 0x00000000
+data(6) = "value8"
+
+key(4) = "key6"
+dmaster: 0
+rsn: 0
+flags: 0x00000000
+data(6) = "value6"
+
+key(4) = "key3"
+dmaster: 0
+rsn: 0
+flags: 0x00000000
+data(6) = "value3"
+
+key(4) = "key7"
+dmaster: 0
+rsn: 0
+flags: 0x00000000
+data(6) = "value7"
+
+key(4) = "key5"
+dmaster: 0
+rsn: 0
+flags: 0x00000000
+data(6) = "value5"
+
+key(4) = "key1"
+dmaster: 0
+rsn: 0
+flags: 0x00000000
+data(6) = "value1"
+
+Dumped 9 record(s)
+EOF
+
+simple_test "volatile.tdb"
diff --git a/ctdb/tests/UNIT/tool/ctdb.cattdb.002.sh b/ctdb/tests/UNIT/tool/ctdb.cattdb.002.sh
new file mode 100755
index 0000000..03c5e7f
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.cattdb.002.sh
@@ -0,0 +1,86 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "persistent traverse"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok_null
+simple_test_other attach "persistent.tdb" persistent
+
+for i in $(seq 1 9) ; do
+ ok_null
+ simple_test_other pstore "persistent.tdb" "key$i" "value$i"
+done
+
+ok <<EOF
+key(23) = "__db_sequence_number__\00"
+dmaster: 0
+rsn: 9
+flags: 0x00000000
+data(8) = "\09\00\00\00\00\00\00\00"
+
+key(4) = "key9"
+dmaster: 0
+rsn: 1
+flags: 0x00000000
+data(6) = "value9"
+
+key(4) = "key8"
+dmaster: 0
+rsn: 1
+flags: 0x00000000
+data(6) = "value8"
+
+key(4) = "key7"
+dmaster: 0
+rsn: 1
+flags: 0x00000000
+data(6) = "value7"
+
+key(4) = "key6"
+dmaster: 0
+rsn: 1
+flags: 0x00000000
+data(6) = "value6"
+
+key(4) = "key5"
+dmaster: 0
+rsn: 1
+flags: 0x00000000
+data(6) = "value5"
+
+key(4) = "key4"
+dmaster: 0
+rsn: 1
+flags: 0x00000000
+data(6) = "value4"
+
+key(4) = "key3"
+dmaster: 0
+rsn: 1
+flags: 0x00000000
+data(6) = "value3"
+
+key(4) = "key2"
+dmaster: 0
+rsn: 1
+flags: 0x00000000
+data(6) = "value2"
+
+key(4) = "key1"
+dmaster: 0
+rsn: 1
+flags: 0x00000000
+data(6) = "value1"
+
+Dumped 10 record(s)
+EOF
+
+simple_test "persistent.tdb"
diff --git a/ctdb/tests/UNIT/tool/ctdb.continue.001.sh b/ctdb/tests/UNIT/tool/ctdb.continue.001.sh
new file mode 100755
index 0000000..fef1e00
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.continue.001.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "continue default (0)"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x20 CURRENT
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0 RECMASTER
+EOF
+
+ok_null
+simple_test
+
+ok <<EOF
+Number of nodes:3
+pnn:0 192.168.20.41 OK (THIS NODE)
+pnn:1 192.168.20.42 OK
+pnn:2 192.168.20.43 OK
+EOF
+simple_test_other nodestatus all
diff --git a/ctdb/tests/UNIT/tool/ctdb.continue.002.sh b/ctdb/tests/UNIT/tool/ctdb.continue.002.sh
new file mode 100755
index 0000000..55ce7f5
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.continue.002.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "continue 1"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT
+1 192.168.20.42 0x20
+2 192.168.20.43 0x0 RECMASTER
+EOF
+
+ok_null
+simple_test -n 1
+
+ok <<EOF
+Number of nodes:3
+pnn:0 192.168.20.41 OK (THIS NODE)
+pnn:1 192.168.20.42 OK
+pnn:2 192.168.20.43 OK
+EOF
+simple_test_other nodestatus all
diff --git a/ctdb/tests/UNIT/tool/ctdb.continue.003.sh b/ctdb/tests/UNIT/tool/ctdb.continue.003.sh
new file mode 100755
index 0000000..7280125
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.continue.003.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "node is not stopped"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok "Node 2 is not stopped"
+simple_test -n 2
+
+ok <<EOF
+Number of nodes:3
+pnn:0 192.168.20.41 OK (THIS NODE)
+pnn:1 192.168.20.42 OK
+pnn:2 192.168.20.43 OK
+EOF
+simple_test_other nodestatus all
diff --git a/ctdb/tests/UNIT/tool/ctdb.deletekey.001.sh b/ctdb/tests/UNIT/tool/ctdb.deletekey.001.sh
new file mode 100755
index 0000000..f530801
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.deletekey.001.sh
@@ -0,0 +1,34 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "volatile delete"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok_null
+simple_test_other attach "volatile.tdb"
+
+ok_null
+simple_test "volatile.tdb" "key1"
+
+ok_null
+simple_test_other writekey "volatile.tdb" "key1" "value1"
+
+ok <<EOF
+Data: size:6 ptr:[value1]
+EOF
+simple_test_other readkey "volatile.tdb" "key1"
+
+ok_null
+simple_test "volatile.tdb" "key1"
+
+ok <<EOF
+Data: size:0 ptr:[]
+EOF
+simple_test_other readkey "volatile.tdb" "key1"
diff --git a/ctdb/tests/UNIT/tool/ctdb.disable.001.sh b/ctdb/tests/UNIT/tool/ctdb.disable.001.sh
new file mode 100755
index 0000000..b2e419b
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.disable.001.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "disable default (0)"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok_null
+simple_test
+
+required_result 4 <<EOF
+Number of nodes:3
+pnn:0 192.168.20.41 DISABLED (THIS NODE)
+pnn:1 192.168.20.42 OK
+pnn:2 192.168.20.43 OK
+EOF
+simple_test_other nodestatus all
diff --git a/ctdb/tests/UNIT/tool/ctdb.disable.002.sh b/ctdb/tests/UNIT/tool/ctdb.disable.002.sh
new file mode 100755
index 0000000..ac90c75
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.disable.002.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "disable node 1"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok_null
+simple_test -n 1
+
+required_result 4 <<EOF
+Number of nodes:3
+pnn:0 192.168.20.41 OK (THIS NODE)
+pnn:1 192.168.20.42 DISABLED
+pnn:2 192.168.20.43 OK
+EOF
+simple_test_other nodestatus all
diff --git a/ctdb/tests/UNIT/tool/ctdb.disable.003.sh b/ctdb/tests/UNIT/tool/ctdb.disable.003.sh
new file mode 100755
index 0000000..ef02ba0
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.disable.003.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "already disabled"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x4
+2 192.168.20.43 0x0
+EOF
+
+ok "Node 1 is already disabled"
+simple_test -n 1
+
+required_result 4 <<EOF
+Number of nodes:3
+pnn:0 192.168.20.41 OK (THIS NODE)
+pnn:1 192.168.20.42 DISABLED
+pnn:2 192.168.20.43 OK
+EOF
+simple_test_other nodestatus all
diff --git a/ctdb/tests/UNIT/tool/ctdb.disable.004.sh b/ctdb/tests/UNIT/tool/ctdb.disable.004.sh
new file mode 100755
index 0000000..da39d67
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.disable.004.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "invalid node"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+required_result 1 "Node 4 does not exist"
+simple_test -n 4
diff --git a/ctdb/tests/UNIT/tool/ctdb.enable.001.sh b/ctdb/tests/UNIT/tool/ctdb.enable.001.sh
new file mode 100755
index 0000000..9234f19
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.enable.001.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "enable default (0)"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x4 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok_null
+simple_test
+
+ok <<EOF
+Number of nodes:3
+pnn:0 192.168.20.41 OK (THIS NODE)
+pnn:1 192.168.20.42 OK
+pnn:2 192.168.20.43 OK
+EOF
+simple_test_other nodestatus all
diff --git a/ctdb/tests/UNIT/tool/ctdb.enable.002.sh b/ctdb/tests/UNIT/tool/ctdb.enable.002.sh
new file mode 100755
index 0000000..ee9b210
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.enable.002.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "enable node 1"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x4
+2 192.168.20.43 0x0
+EOF
+
+ok_null
+simple_test -n 1
+
+ok <<EOF
+Number of nodes:3
+pnn:0 192.168.20.41 OK (THIS NODE)
+pnn:1 192.168.20.42 OK
+pnn:2 192.168.20.43 OK
+EOF
+simple_test_other nodestatus all
diff --git a/ctdb/tests/UNIT/tool/ctdb.enable.003.sh b/ctdb/tests/UNIT/tool/ctdb.enable.003.sh
new file mode 100755
index 0000000..37656c2
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.enable.003.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "not disabled"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok "Node 1 is not disabled"
+simple_test -n 1
+
+ok <<EOF
+Number of nodes:3
+pnn:0 192.168.20.41 OK (THIS NODE)
+pnn:1 192.168.20.42 OK
+pnn:2 192.168.20.43 OK
+EOF
+simple_test_other nodestatus all
diff --git a/ctdb/tests/UNIT/tool/ctdb.getcapabilities.001.sh b/ctdb/tests/UNIT/tool/ctdb.getcapabilities.001.sh
new file mode 100755
index 0000000..da71f22
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.getcapabilities.001.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+required_result 0 <<EOF
+LEADER: YES
+LMASTER: YES
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.getcapabilities.002.sh b/ctdb/tests/UNIT/tool/ctdb.getcapabilities.002.sh
new file mode 100755
index 0000000..221ae81
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.getcapabilities.002.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, 1 disconnected"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x1
+2 192.168.20.43 0x0
+EOF
+
+required_result 0 <<EOF
+LEADER: YES
+LMASTER: YES
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.getcapabilities.003.sh b/ctdb/tests/UNIT/tool/ctdb.getcapabilities.003.sh
new file mode 100755
index 0000000..74702d5
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.getcapabilities.003.sh
@@ -0,0 +1,28 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, current disconnected"
+
+setup_nodes <<EOF
+192.168.20.41
+192.168.20.42
+192.168.20.43
+EOF
+
+# Don't setup ctdbd - disconnected on current node
+#setup_ctdbd <<EOF
+#NODEMAP
+#0 192.168.20.41 0x1 CURRENT RECMASTER
+#1 192.168.20.42 0x0
+#2 192.168.20.43 0x0
+#EOF
+
+required_result 1 <<EOF
+connect() failed, errno=2
+Failed to connect to CTDB daemon ($ctdbd_socket)
+Failed to detect PNN of the current node.
+Is this node part of CTDB cluster?
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.getcapabilities.004.sh b/ctdb/tests/UNIT/tool/ctdb.getcapabilities.004.sh
new file mode 100755
index 0000000..8662ed3
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.getcapabilities.004.sh
@@ -0,0 +1,39 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, non-default capabilities"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0 -CTDB_CAP_LMASTER
+2 192.168.20.43 0x0 -CTDB_CAP_RECMASTER
+EOF
+
+# node 0
+
+required_result 0 <<EOF
+LEADER: YES
+LMASTER: YES
+EOF
+
+simple_test -n 0
+
+# node 1
+
+required_result 0 <<EOF
+LEADER: YES
+LMASTER: NO
+EOF
+
+simple_test -n 1
+
+# node 2
+
+required_result 0 <<EOF
+LEADER: NO
+LMASTER: YES
+EOF
+
+simple_test -n 2
diff --git a/ctdb/tests/UNIT/tool/ctdb.getdbmap.001.sh b/ctdb/tests/UNIT/tool/ctdb.getdbmap.001.sh
new file mode 100755
index 0000000..f766e9c
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.getdbmap.001.sh
@@ -0,0 +1,34 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "getdbmap from default (0)"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+DBMAP
+0x7a19d84d locking.tdb READONLY
+0x4e66c2b2 brlock.tdb STICKY
+0x4d2a432b g_lock.tdb
+0x7132c184 secrets.tdb PERSISTENT
+0x6cf2837d registry.tdb PERSISTENT 42
+0xbc57b384 ctdb-ip.tdb REPLICATED
+0xbec75f0b ctdb-conn.tdb REPLICATED 23
+EOF
+
+ok <<EOF
+Number of databases:7
+dbid:0x7a19d84d name:locking.tdb path:${ctdbd_dbdir}/locking.tdb READONLY
+dbid:0x4e66c2b2 name:brlock.tdb path:${ctdbd_dbdir}/brlock.tdb STICKY
+dbid:0x4d2a432b name:g_lock.tdb path:${ctdbd_dbdir}/g_lock.tdb
+dbid:0x7132c184 name:secrets.tdb path:${ctdbd_dbdir}/secrets.tdb PERSISTENT
+dbid:0x6cf2837d name:registry.tdb path:${ctdbd_dbdir}/registry.tdb PERSISTENT
+dbid:0xbc57b384 name:ctdb-ip.tdb path:${ctdbd_dbdir}/ctdb-ip.tdb REPLICATED
+dbid:0xbec75f0b name:ctdb-conn.tdb path:${ctdbd_dbdir}/ctdb-conn.tdb REPLICATED
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.getdbseqnum.001.sh b/ctdb/tests/UNIT/tool/ctdb.getdbseqnum.001.sh
new file mode 100755
index 0000000..95ef244
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.getdbseqnum.001.sh
@@ -0,0 +1,41 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "by ID"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+DBMAP
+0x7a19d84d locking.tdb
+0x4e66c2b2 brlock.tdb
+0x4d2a432b g_lock.tdb
+0x7132c184 secrets.tdb PERSISTENT
+0x6cf2837d registry.tdb PERSISTENT 0x42
+0xbc57b384 ctdb-ip.tdb REPLICATED
+0xbec75f0b ctdb-conn.tdb REPLICATED 0x23
+EOF
+
+# locking.tdb
+ok "0x0"
+simple_test 0x7a19d84d
+
+# secrets.tdb
+ok "0x0"
+simple_test 0x7132c184
+
+# registry.tdb
+ok "0x42"
+simple_test 0x6cf2837d
+
+# ctdb-ip.tdb
+ok "0x0"
+simple_test 0xbc57b384
+
+# ctdb-conn.tdb
+ok "0x23"
+simple_test 0xbec75f0b
diff --git a/ctdb/tests/UNIT/tool/ctdb.getdbseqnum.002.sh b/ctdb/tests/UNIT/tool/ctdb.getdbseqnum.002.sh
new file mode 100755
index 0000000..e0274f3
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.getdbseqnum.002.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "by name"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+DBMAP
+0x7a19d84d locking.tdb
+0x4e66c2b2 brlock.tdb
+0x4d2a432b g_lock.tdb
+0x7132c184 secrets.tdb PERSISTENT
+0x6cf2837d registry.tdb PERSISTENT 0x42
+0xbc57b384 ctdb-ip.tdb REPLICATED
+0xbec75f0b ctdb-conn.tdb REPLICATED 0x23
+EOF
+
+ok "0x0"
+simple_test locking.tdb
+
+ok "0x0"
+simple_test secrets.tdb
+
+ok "0x42"
+simple_test registry.tdb
+
+ok "0x0"
+simple_test ctdb-ip.tdb
+
+ok "0x23"
+simple_test ctdb-conn.tdb
diff --git a/ctdb/tests/UNIT/tool/ctdb.getdbstatus.001.sh b/ctdb/tests/UNIT/tool/ctdb.getdbstatus.001.sh
new file mode 100755
index 0000000..5a2b79e
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.getdbstatus.001.sh
@@ -0,0 +1,108 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "by ID"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+DBMAP
+0x7a19d84d locking.tdb READONLY
+0x4e66c2b2 brlock.tdb STICKY
+0x4d2a432b g_lock.tdb
+0x7132c184 secrets.tdb PERSISTENT
+0x6cf2837d registry.tdb PERSISTENT 42
+0xbc57b384 ctdb-ip.tdb REPLICATED
+0xbec75f0b ctdb-conn.tdb REPLICATED 23
+EOF
+
+ok <<EOF
+dbid: 0x7a19d84d
+name: locking.tdb
+path: ${ctdbd_dbdir}/locking.tdb
+PERSISTENT: no
+REPLICATED: no
+STICKY: no
+READONLY: yes
+HEALTH: OK
+EOF
+simple_test 0x7a19d84d
+
+ok <<EOF
+dbid: 0x4e66c2b2
+name: brlock.tdb
+path: ${ctdbd_dbdir}/brlock.tdb
+PERSISTENT: no
+REPLICATED: no
+STICKY: yes
+READONLY: no
+HEALTH: OK
+EOF
+simple_test 0x4e66c2b2
+
+ok <<EOF
+dbid: 0x4d2a432b
+name: g_lock.tdb
+path: ${ctdbd_dbdir}/g_lock.tdb
+PERSISTENT: no
+REPLICATED: no
+STICKY: no
+READONLY: no
+HEALTH: OK
+EOF
+simple_test 0x4d2a432b
+
+ok <<EOF
+dbid: 0x7132c184
+name: secrets.tdb
+path: ${ctdbd_dbdir}/secrets.tdb
+PERSISTENT: yes
+REPLICATED: no
+STICKY: no
+READONLY: no
+HEALTH: OK
+EOF
+simple_test 0x7132c184
+
+ok <<EOF
+dbid: 0x6cf2837d
+name: registry.tdb
+path: ${ctdbd_dbdir}/registry.tdb
+PERSISTENT: yes
+REPLICATED: no
+STICKY: no
+READONLY: no
+HEALTH: OK
+EOF
+simple_test 0x6cf2837d
+
+ok <<EOF
+dbid: 0xbc57b384
+name: ctdb-ip.tdb
+path: ${ctdbd_dbdir}/ctdb-ip.tdb
+PERSISTENT: no
+REPLICATED: yes
+STICKY: no
+READONLY: no
+HEALTH: OK
+EOF
+simple_test 0xbc57b384
+
+ok <<EOF
+dbid: 0xbec75f0b
+name: ctdb-conn.tdb
+path: ${ctdbd_dbdir}/ctdb-conn.tdb
+PERSISTENT: no
+REPLICATED: yes
+STICKY: no
+READONLY: no
+HEALTH: OK
+EOF
+simple_test 0xbec75f0b
+
+required_result 1 "No database matching '0xdeadc0de' found"
+simple_test 0xdeadc0de
diff --git a/ctdb/tests/UNIT/tool/ctdb.getdbstatus.002.sh b/ctdb/tests/UNIT/tool/ctdb.getdbstatus.002.sh
new file mode 100755
index 0000000..2ff6e7b
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.getdbstatus.002.sh
@@ -0,0 +1,108 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "by name, node 1"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+DBMAP
+0x7a19d84d locking.tdb READONLY
+0x4e66c2b2 brlock.tdb STICKY
+0x4d2a432b g_lock.tdb
+0x7132c184 secrets.tdb PERSISTENT
+0x6cf2837d registry.tdb PERSISTENT 42
+0xbc57b384 ctdb-ip.tdb REPLICATED
+0xbec75f0b ctdb-conn.tdb REPLICATED 23
+EOF
+
+ok <<EOF
+dbid: 0x7a19d84d
+name: locking.tdb
+path: ${ctdbd_dbdir}/locking.tdb
+PERSISTENT: no
+REPLICATED: no
+STICKY: no
+READONLY: yes
+HEALTH: OK
+EOF
+simple_test locking.tdb -n 1
+
+ok <<EOF
+dbid: 0x4e66c2b2
+name: brlock.tdb
+path: ${ctdbd_dbdir}/brlock.tdb
+PERSISTENT: no
+REPLICATED: no
+STICKY: yes
+READONLY: no
+HEALTH: OK
+EOF
+simple_test brlock.tdb -n 1
+
+ok <<EOF
+dbid: 0x4d2a432b
+name: g_lock.tdb
+path: ${ctdbd_dbdir}/g_lock.tdb
+PERSISTENT: no
+REPLICATED: no
+STICKY: no
+READONLY: no
+HEALTH: OK
+EOF
+simple_test g_lock.tdb -n 1
+
+ok <<EOF
+dbid: 0x7132c184
+name: secrets.tdb
+path: ${ctdbd_dbdir}/secrets.tdb
+PERSISTENT: yes
+REPLICATED: no
+STICKY: no
+READONLY: no
+HEALTH: OK
+EOF
+simple_test secrets.tdb -n 1
+
+ok <<EOF
+dbid: 0x6cf2837d
+name: registry.tdb
+path: ${ctdbd_dbdir}/registry.tdb
+PERSISTENT: yes
+REPLICATED: no
+STICKY: no
+READONLY: no
+HEALTH: OK
+EOF
+simple_test registry.tdb -n 1
+
+ok <<EOF
+dbid: 0xbc57b384
+name: ctdb-ip.tdb
+path: ${ctdbd_dbdir}/ctdb-ip.tdb
+PERSISTENT: no
+REPLICATED: yes
+STICKY: no
+READONLY: no
+HEALTH: OK
+EOF
+simple_test ctdb-ip.tdb -n 1
+
+ok <<EOF
+dbid: 0xbec75f0b
+name: ctdb-conn.tdb
+path: ${ctdbd_dbdir}/ctdb-conn.tdb
+PERSISTENT: no
+REPLICATED: yes
+STICKY: no
+READONLY: no
+HEALTH: OK
+EOF
+simple_test ctdb-conn.tdb -n 1
+
+required_result 1 "No database matching 'ctdb.tdb' found"
+simple_test ctdb.tdb -n 1
diff --git a/ctdb/tests/UNIT/tool/ctdb.getpid.001.sh b/ctdb/tests/UNIT/tool/ctdb.getpid.001.sh
new file mode 100755
index 0000000..5714102
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.getpid.001.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "simple getpid"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+pid=$(ctdbd_getpid)
+ok "$pid"
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.getpid.010.sh b/ctdb/tests/UNIT/tool/ctdb.getpid.010.sh
new file mode 100755
index 0000000..6e220a2
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.getpid.010.sh
@@ -0,0 +1,25 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, GET_PID control times out"
+
+setup_lvs <<EOF
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+CONTROLFAILS
+30 0 TIMEOUT # Make "ctdb getpid" time out
+EOF
+
+#####
+
+required_result 1 <<EOF
+Maximum runtime exceeded - exiting
+EOF
+simple_test -T 3
diff --git a/ctdb/tests/UNIT/tool/ctdb.getreclock.001.sh b/ctdb/tests/UNIT/tool/ctdb.getreclock.001.sh
new file mode 100755
index 0000000..bfa08d0
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.getreclock.001.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "No reclock set"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok_null
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.getreclock.002.sh b/ctdb/tests/UNIT/tool/ctdb.getreclock.002.sh
new file mode 100755
index 0000000..6543f8f
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.getreclock.002.sh
@@ -0,0 +1,21 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "No reclock set"
+
+reclock="/some/place/on/shared/storage"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+RECLOCK
+${reclock}
+EOF
+
+ok "$reclock"
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.getvar.001.sh b/ctdb/tests/UNIT/tool/ctdb.getvar.001.sh
new file mode 100755
index 0000000..480788a
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.getvar.001.sh
@@ -0,0 +1,35 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "confirm that getvar matches listvar"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+# Squash whitespace for predictable output
+result_filter ()
+{
+ sed -e 's|[[:space:]][[:space:]]*| |g'
+}
+
+$CTDB -d $CTDB_DEBUGLEVEL listvars |
+ while read variable equals value ; do
+ # Variable, as per listvars
+ ok "${variable} = ${value}"
+ simple_test "$variable"
+
+ # Uppercase variable
+ v_upper=$(echo "$variable" | tr "a-z" "A-Z")
+ ok "${v_upper} = ${value}"
+ simple_test "$v_upper"
+
+ # Lowercase variable
+ v_lower=$(echo "$variable" | tr "A-Z" "a-z")
+ ok "${v_lower} = ${value}"
+ simple_test "$v_lower"
+ done
diff --git a/ctdb/tests/UNIT/tool/ctdb.getvar.002.sh b/ctdb/tests/UNIT/tool/ctdb.getvar.002.sh
new file mode 100755
index 0000000..c8aa302
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.getvar.002.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "invalid variable"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+required_result 1 <<EOF
+No such tunable TheQuickBrownFoxJumpsOverTheLazyDog
+EOF
+simple_test "TheQuickBrownFoxJumpsOverTheLazyDog"
diff --git a/ctdb/tests/UNIT/tool/ctdb.ifaces.001.sh b/ctdb/tests/UNIT/tool/ctdb.ifaces.001.sh
new file mode 100755
index 0000000..5b92787
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.ifaces.001.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "basic interface listing test"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+EOF
+
+ok <<EOF
+Interfaces on node 0
+name:eth2 link:up references:2
+name:eth1 link:up references:4
+EOF
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.ip.001.sh b/ctdb/tests/UNIT/tool/ctdb.ip.001.sh
new file mode 100755
index 0000000..df0d141
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.ip.001.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, no ips"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+required_result 0 <<EOF
+Public IPs on node 0
+EOF
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.ip.002.sh b/ctdb/tests/UNIT/tool/ctdb.ip.002.sh
new file mode 100755
index 0000000..98a821f
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.ip.002.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, no ips"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+EOF
+simple_test all
diff --git a/ctdb/tests/UNIT/tool/ctdb.ip.003.sh b/ctdb/tests/UNIT/tool/ctdb.ip.003.sh
new file mode 100755
index 0000000..eec4634
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.ip.003.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, same ips on all nodes"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 0
+10.0.0.32 1
+10.0.0.33 2
+EOF
+
+required_result 0 <<EOF
+Public IPs on node 0
+10.0.0.31 0
+10.0.0.32 1
+10.0.0.33 2
+EOF
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.ip.004.sh b/ctdb/tests/UNIT/tool/ctdb.ip.004.sh
new file mode 100755
index 0000000..53f090c
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.ip.004.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, IP missing on node 0"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 0 0,1,2
+10.0.0.32 1 0,1,2
+10.0.0.33 2 1,2
+EOF
+
+required_result 0 <<EOF
+Public IPs on node 0
+10.0.0.31 0
+10.0.0.32 1
+EOF
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.ip.005.sh b/ctdb/tests/UNIT/tool/ctdb.ip.005.sh
new file mode 100755
index 0000000..f84ac29
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.ip.005.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, IP missing on node 0, get all"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 0 0,1,2
+10.0.0.32 1 0,1,2
+10.0.0.33 2 1,2
+EOF
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 0
+10.0.0.32 1
+10.0.0.33 2
+EOF
+simple_test all
diff --git a/ctdb/tests/UNIT/tool/ctdb.ip.006.sh b/ctdb/tests/UNIT/tool/ctdb.ip.006.sh
new file mode 100755
index 0000000..975a98c
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.ip.006.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, same ips on all nodes, 1 unassigned"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 0
+10.0.0.32 -1
+10.0.0.33 2
+EOF
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 node[0] active[eth2] available[eth2,eth1] configured[eth2,eth1]
+10.0.0.32 node[-1] active[] available[] configured[]
+10.0.0.33 node[2] active[eth2] available[eth2,eth1] configured[eth2,eth1]
+EOF
+simple_test -v all
diff --git a/ctdb/tests/UNIT/tool/ctdb.ip.007.sh b/ctdb/tests/UNIT/tool/ctdb.ip.007.sh
new file mode 100755
index 0000000..cb7939d
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.ip.007.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, same ips on all nodes, IPv6, 1 unassigned"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 0
+fd00::5357:5f01 2
+10.0.0.32 -1
+fd00::5357:5f02 1
+10.0.0.33 2
+fd00::5357:5f03 0
+EOF
+
+required_result 0 <<EOF
+Public IPs on ALL nodes
+10.0.0.31 node[0] active[eth2] available[eth2,eth1] configured[eth2,eth1]
+10.0.0.32 node[-1] active[] available[] configured[]
+10.0.0.33 node[2] active[eth2] available[eth2,eth1] configured[eth2,eth1]
+fd00::5357:5f01 node[2] active[eth2] available[eth2,eth1] configured[eth2,eth1]
+fd00::5357:5f02 node[1] active[eth2] available[eth2,eth1] configured[eth2,eth1]
+fd00::5357:5f03 node[0] active[eth2] available[eth2,eth1] configured[eth2,eth1]
+EOF
+simple_test -v all
diff --git a/ctdb/tests/UNIT/tool/ctdb.ipinfo.001.sh b/ctdb/tests/UNIT/tool/ctdb.ipinfo.001.sh
new file mode 100755
index 0000000..60f9462
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.ipinfo.001.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, no ips"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+required_result 1 <<EOF
+Control GET_PUBLIC_IP_INFO failed, ret=-1
+Node 0 does not know about IP 10.0.0.31
+EOF
+simple_test 10.0.0.31
diff --git a/ctdb/tests/UNIT/tool/ctdb.ipinfo.002.sh b/ctdb/tests/UNIT/tool/ctdb.ipinfo.002.sh
new file mode 100755
index 0000000..366cfd6
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.ipinfo.002.sh
@@ -0,0 +1,32 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, same ips on all nodes"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 0
+10.0.0.32 1
+10.0.0.33 2
+EOF
+
+required_result 0 <<EOF
+Public IP[10.0.0.32] info on node 0
+IP:10.0.0.32
+CurrentNode:1
+NumInterfaces:2
+Interface[1]: Name:eth2 Link:up References:2 (active)
+Interface[2]: Name:eth1 Link:up References:4
+EOF
+simple_test 10.0.0.32
diff --git a/ctdb/tests/UNIT/tool/ctdb.ipinfo.003.sh b/ctdb/tests/UNIT/tool/ctdb.ipinfo.003.sh
new file mode 100755
index 0000000..383f1c7
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.ipinfo.003.sh
@@ -0,0 +1,35 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, same ips on all nodes, IPv6"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+PUBLICIPS
+10.0.0.31 0
+10.0.0.32 1
+10.0.0.33 2
+fd00::5357:5f01 2
+fd00::5357:5f02 1
+fd00::5357:5f03 0
+EOF
+
+required_result 0 <<EOF
+Public IP[fd00::5357:5f02] info on node 0
+IP:fd00::5357:5f02
+CurrentNode:1
+NumInterfaces:2
+Interface[1]: Name:eth2 Link:up References:2 (active)
+Interface[2]: Name:eth1 Link:up References:4
+EOF
+simple_test fd00::5357:5f02
diff --git a/ctdb/tests/UNIT/tool/ctdb.leader.001.sh b/ctdb/tests/UNIT/tool/ctdb.leader.001.sh
new file mode 100755
index 0000000..2855304
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.leader.001.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "node 0"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok 0
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.leader.002.sh b/ctdb/tests/UNIT/tool/ctdb.leader.002.sh
new file mode 100755
index 0000000..93a9daf
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.leader.002.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "node 2"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0 RECMASTER
+EOF
+
+ok 2
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.listnodes.001.sh b/ctdb/tests/UNIT/tool/ctdb.listnodes.001.sh
new file mode 100755
index 0000000..5a494ee
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.listnodes.001.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "missing nodes file"
+
+setup_nodes <<EOF
+192.168.20.41
+192.168.20.42
+192.168.20.43
+EOF
+
+f="${CTDB_BASE}/nodes"
+rm -f "$f"
+
+required_result 1 <<EOF
+${TEST_DATE_STAMP}Failed to read nodes file "${f}"
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.listnodes.002.sh b/ctdb/tests/UNIT/tool/ctdb.listnodes.002.sh
new file mode 100755
index 0000000..95315d7
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.listnodes.002.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "missing nodes file"
+
+setup_nodes <<EOF
+192.168.20.41
+192.168.20.42
+192.168.20.43
+EOF
+
+required_result 0 <<EOF
+192.168.20.41
+192.168.20.42
+192.168.20.43
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.listvars.001.sh b/ctdb/tests/UNIT/tool/ctdb.listvars.001.sh
new file mode 100755
index 0000000..88f0fa4
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.listvars.001.sh
@@ -0,0 +1,66 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "exact check of output"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok << EOF
+SeqnumInterval = 1000
+ControlTimeout = 60
+TraverseTimeout = 20
+KeepaliveInterval = 5
+KeepaliveLimit = 5
+RecoverTimeout = 30
+RecoverInterval = 1
+ElectionTimeout = 3
+TakeoverTimeout = 9
+MonitorInterval = 15
+TickleUpdateInterval = 20
+EventScriptTimeout = 30
+MonitorTimeoutCount = 20
+RecoveryGracePeriod = 120
+RecoveryBanPeriod = 300
+DatabaseHashSize = 100001
+DatabaseMaxDead = 5
+RerecoveryTimeout = 10
+EnableBans = 1
+NoIPFailback = 0
+VerboseMemoryNames = 0
+RecdPingTimeout = 60
+RecdFailCount = 10
+LogLatencyMs = 0
+RecLockLatencyMs = 1000
+RecoveryDropAllIPs = 120
+VacuumInterval = 10
+VacuumMaxRunTime = 120
+RepackLimit = 10000
+VacuumFastPathCount = 60
+MaxQueueDropMsg = 1000000
+AllowUnhealthyDBRead = 0
+StatHistoryInterval = 1
+DeferredAttachTO = 120
+AllowClientDBAttach = 1
+FetchCollapse = 1
+HopcountMakeSticky = 50
+StickyDuration = 600
+StickyPindown = 200
+NoIPTakeover = 0
+DBRecordCountWarn = 100000
+DBRecordSizeWarn = 10000000
+DBSizeWarn = 100000000
+PullDBPreallocation = 10485760
+LockProcessesPerDB = 200
+RecBufferSizeLimit = 1000000
+QueueBufferSize = 1024
+IPAllocAlgorithm = 2
+AllowMixedVersions = 0
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.lvs.001.sh b/ctdb/tests/UNIT/tool/ctdb.lvs.001.sh
new file mode 100755
index 0000000..70c726c
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.lvs.001.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, no LVS, all ok"
+
+setup_lvs <<EOF
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+#####
+
+required_result 255 <<EOF
+EOF
+
+simple_test leader
+
+#####
+
+required_result 0 <<EOF
+EOF
+
+simple_test list
+
+#####
+
+required_result 0 <<EOF
+EOF
+
+simple_test status
diff --git a/ctdb/tests/UNIT/tool/ctdb.lvs.002.sh b/ctdb/tests/UNIT/tool/ctdb.lvs.002.sh
new file mode 100755
index 0000000..edde656
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.lvs.002.sh
@@ -0,0 +1,46 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all LVS, all ok"
+
+setup_lvs <<EOF
+192.168.20.41
+192.168.20.42
+192.168.20.43
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+#####
+
+required_result 0 <<EOF
+0
+EOF
+
+simple_test leader
+
+#####
+
+required_result 0 <<EOF
+0 192.168.20.41
+1 192.168.20.42
+2 192.168.20.43
+EOF
+
+simple_test list
+
+#####
+
+required_result 0 <<EOF
+pnn:0 192.168.20.41 OK (THIS NODE)
+pnn:1 192.168.20.42 OK
+pnn:2 192.168.20.43 OK
+EOF
+
+simple_test status
diff --git a/ctdb/tests/UNIT/tool/ctdb.lvs.003.sh b/ctdb/tests/UNIT/tool/ctdb.lvs.003.sh
new file mode 100755
index 0000000..0045ae4
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.lvs.003.sh
@@ -0,0 +1,43 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, some LVS, all ok"
+
+setup_lvs <<EOF
+192.168.20.41
+192.168.20.43
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+#####
+
+required_result 0 <<EOF
+0
+EOF
+
+simple_test leader
+
+#####
+
+required_result 0 <<EOF
+0 192.168.20.41
+2 192.168.20.43
+EOF
+
+simple_test list
+
+#####
+
+required_result 0 <<EOF
+pnn:0 192.168.20.41 OK (THIS NODE)
+pnn:2 192.168.20.43 OK
+EOF
+
+simple_test status
diff --git a/ctdb/tests/UNIT/tool/ctdb.lvs.004.sh b/ctdb/tests/UNIT/tool/ctdb.lvs.004.sh
new file mode 100755
index 0000000..255966d
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.lvs.004.sh
@@ -0,0 +1,45 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all LVS, node 0 unhealthy"
+
+setup_lvs <<EOF
+192.168.20.41
+192.168.20.42
+192.168.20.43
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x2 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+#####
+
+required_result 0 <<EOF
+1
+EOF
+
+simple_test leader
+
+#####
+
+required_result 0 <<EOF
+1 192.168.20.42
+2 192.168.20.43
+EOF
+
+simple_test list
+
+#####
+
+required_result 0 <<EOF
+pnn:0 192.168.20.41 UNHEALTHY (THIS NODE)
+pnn:1 192.168.20.42 OK
+pnn:2 192.168.20.43 OK
+EOF
+
+simple_test status
diff --git a/ctdb/tests/UNIT/tool/ctdb.lvs.005.sh b/ctdb/tests/UNIT/tool/ctdb.lvs.005.sh
new file mode 100755
index 0000000..73fcd80
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.lvs.005.sh
@@ -0,0 +1,46 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all LVS, all unhealthy"
+
+setup_lvs <<EOF
+192.168.20.41
+192.168.20.42
+192.168.20.43
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x2 CURRENT RECMASTER
+1 192.168.20.42 0x2
+2 192.168.20.43 0x2
+EOF
+
+#####
+
+required_result 0 <<EOF
+0
+EOF
+
+simple_test leader
+
+#####
+
+required_result 0 <<EOF
+0 192.168.20.41
+1 192.168.20.42
+2 192.168.20.43
+EOF
+
+simple_test list
+
+#####
+
+required_result 0 <<EOF
+pnn:0 192.168.20.41 UNHEALTHY (THIS NODE)
+pnn:1 192.168.20.42 UNHEALTHY
+pnn:2 192.168.20.43 UNHEALTHY
+EOF
+
+simple_test status
diff --git a/ctdb/tests/UNIT/tool/ctdb.lvs.006.sh b/ctdb/tests/UNIT/tool/ctdb.lvs.006.sh
new file mode 100755
index 0000000..55b4310
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.lvs.006.sh
@@ -0,0 +1,44 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all LVS, nodes 0,1 disabled, node 2 unhealthy"
+
+setup_lvs <<EOF
+192.168.20.41
+192.168.20.42
+192.168.20.43
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x4 CURRENT RECMASTER
+1 192.168.20.42 0x4
+2 192.168.20.43 0x2
+EOF
+
+#####
+
+required_result 0 <<EOF
+2
+EOF
+
+simple_test leader
+
+#####
+
+required_result 0 <<EOF
+2 192.168.20.43
+EOF
+
+simple_test list
+
+#####
+
+required_result 0 <<EOF
+pnn:0 192.168.20.41 DISABLED (THIS NODE)
+pnn:1 192.168.20.42 DISABLED
+pnn:2 192.168.20.43 UNHEALTHY
+EOF
+
+simple_test status
diff --git a/ctdb/tests/UNIT/tool/ctdb.lvs.007.sh b/ctdb/tests/UNIT/tool/ctdb.lvs.007.sh
new file mode 100755
index 0000000..3dd1104
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.lvs.007.sh
@@ -0,0 +1,42 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all LVS, all nodes disabled"
+
+setup_lvs <<EOF
+192.168.20.41
+192.168.20.42
+192.168.20.43
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x4 CURRENT RECMASTER
+1 192.168.20.42 0x4
+2 192.168.20.43 0x4
+EOF
+
+#####
+
+required_result 255 <<EOF
+EOF
+
+simple_test leader
+
+#####
+
+required_result 0 <<EOF
+EOF
+
+simple_test list
+
+#####
+
+required_result 0 <<EOF
+pnn:0 192.168.20.41 DISABLED (THIS NODE)
+pnn:1 192.168.20.42 DISABLED
+pnn:2 192.168.20.43 DISABLED
+EOF
+
+simple_test status
diff --git a/ctdb/tests/UNIT/tool/ctdb.lvs.008.sh b/ctdb/tests/UNIT/tool/ctdb.lvs.008.sh
new file mode 100755
index 0000000..1997f4c
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.lvs.008.sh
@@ -0,0 +1,66 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, no LVS, current disconnected"
+
+setup_nodes <<EOF
+192.168.20.41
+192.168.20.42
+192.168.20.43
+EOF
+
+setup_lvs <<EOF
+EOF
+
+# Don't setup ctdbd - disconnected on current node
+#setup_ctdbd <<EOF
+#NODEMAP
+#0 192.168.20.41 0x1 CURRENT RECMASTER
+#1 192.168.20.42 0x0
+#2 192.168.20.43 0x0
+#EOF
+
+#####
+
+required_result 1 <<EOF
+connect() failed, errno=2
+Failed to connect to CTDB daemon ($ctdbd_socket)
+Failed to detect PNN of the current node.
+Is this node part of CTDB cluster?
+EOF
+
+simple_test list
+
+#####
+
+required_result 1 <<EOF
+connect() failed, errno=2
+Failed to connect to CTDB daemon ($ctdbd_socket)
+Failed to detect PNN of the current node.
+Is this node part of CTDB cluster?
+EOF
+
+simple_test leader
+
+#####
+
+required_result 1 <<EOF
+connect() failed, errno=2
+Failed to connect to CTDB daemon ($ctdbd_socket)
+Failed to detect PNN of the current node.
+Is this node part of CTDB cluster?
+EOF
+
+simple_test list
+
+#####
+
+required_result 1 <<EOF
+connect() failed, errno=2
+Failed to connect to CTDB daemon ($ctdbd_socket)
+Failed to detect PNN of the current node.
+Is this node part of CTDB cluster?
+EOF
+
+simple_test status
diff --git a/ctdb/tests/UNIT/tool/ctdb.lvs.010.sh b/ctdb/tests/UNIT/tool/ctdb.lvs.010.sh
new file mode 100755
index 0000000..d433939
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.lvs.010.sh
@@ -0,0 +1,25 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all ok, GET_NODEMAP control times out"
+
+setup_lvs <<EOF
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+CONTROLFAILS
+91 0 TIMEOUT # Make "ctdb nodestatus" time out in ctdb_lvs helper
+EOF
+
+#####
+
+required_result 1 <<EOF
+Maximum runtime exceeded - exiting
+EOF
+simple_test status -T 3
diff --git a/ctdb/tests/UNIT/tool/ctdb.natgw.001.sh b/ctdb/tests/UNIT/tool/ctdb.natgw.001.sh
new file mode 100755
index 0000000..ad18f9d
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.natgw.001.sh
@@ -0,0 +1,46 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all in natgw group, all ok"
+
+setup_natgw <<EOF
+192.168.20.41
+192.168.20.42
+192.168.20.43
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+#####
+
+required_result 0 <<EOF
+0 192.168.20.41
+EOF
+
+simple_test leader
+
+#####
+
+required_result 0 <<EOF
+192.168.20.41 LEADER
+192.168.20.42
+192.168.20.43
+EOF
+
+simple_test list
+
+#####
+
+required_result 0 <<EOF
+pnn:0 192.168.20.41 OK (THIS NODE)
+pnn:1 192.168.20.42 OK
+pnn:2 192.168.20.43 OK
+EOF
+
+simple_test status
diff --git a/ctdb/tests/UNIT/tool/ctdb.natgw.002.sh b/ctdb/tests/UNIT/tool/ctdb.natgw.002.sh
new file mode 100755
index 0000000..424189f
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.natgw.002.sh
@@ -0,0 +1,46 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all in natgw group, 1 unhealthy"
+
+setup_natgw <<EOF
+192.168.20.41
+192.168.20.42
+192.168.20.43
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x2
+1 192.168.20.42 0x0 CURRENT RECMASTER
+2 192.168.20.43 0x0
+EOF
+
+#####
+
+required_result 0 <<EOF
+1 192.168.20.42
+EOF
+
+simple_test leader
+
+#####
+
+required_result 0 <<EOF
+192.168.20.41
+192.168.20.42 LEADER
+192.168.20.43
+EOF
+
+simple_test list
+
+#####
+
+required_result 0 <<EOF
+pnn:0 192.168.20.41 UNHEALTHY
+pnn:1 192.168.20.42 OK (THIS NODE)
+pnn:2 192.168.20.43 OK
+EOF
+
+simple_test status
diff --git a/ctdb/tests/UNIT/tool/ctdb.natgw.003.sh b/ctdb/tests/UNIT/tool/ctdb.natgw.003.sh
new file mode 100755
index 0000000..93522d0
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.natgw.003.sh
@@ -0,0 +1,43 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, 2 in natgw group, 1 unhealthy"
+
+setup_natgw <<EOF
+192.168.20.41
+192.168.20.43
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x2
+1 192.168.20.42 0x0 CURRENT RECMASTER
+2 192.168.20.43 0x0
+EOF
+
+#####
+
+required_result 0 <<EOF
+2 192.168.20.43
+EOF
+
+simple_test leader
+
+#####
+
+required_result 0 <<EOF
+192.168.20.41
+192.168.20.43 LEADER
+EOF
+
+simple_test list
+
+#####
+
+required_result 0 <<EOF
+pnn:0 192.168.20.41 UNHEALTHY
+pnn:2 192.168.20.43 OK
+EOF
+
+simple_test status
diff --git a/ctdb/tests/UNIT/tool/ctdb.natgw.004.sh b/ctdb/tests/UNIT/tool/ctdb.natgw.004.sh
new file mode 100755
index 0000000..af8ea22
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.natgw.004.sh
@@ -0,0 +1,46 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all unhealthy, all but 1 stopped"
+
+setup_natgw <<EOF
+192.168.20.41
+192.168.20.42
+192.168.20.43
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x22
+1 192.168.20.42 0x22 CURRENT RECMASTER
+2 192.168.20.43 0x2
+EOF
+
+#####
+
+required_result 0 <<EOF
+2 192.168.20.43
+EOF
+
+simple_test leader
+
+#####
+
+required_result 0 <<EOF
+192.168.20.41
+192.168.20.42
+192.168.20.43 LEADER
+EOF
+
+simple_test list
+
+#####
+
+required_result 0 <<EOF
+pnn:0 192.168.20.41 UNHEALTHY|STOPPED|INACTIVE
+pnn:1 192.168.20.42 UNHEALTHY|STOPPED|INACTIVE (THIS NODE)
+pnn:2 192.168.20.43 UNHEALTHY
+EOF
+
+simple_test status
diff --git a/ctdb/tests/UNIT/tool/ctdb.natgw.005.sh b/ctdb/tests/UNIT/tool/ctdb.natgw.005.sh
new file mode 100755
index 0000000..6a6bbde
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.natgw.005.sh
@@ -0,0 +1,46 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all stopped"
+
+setup_natgw <<EOF
+192.168.20.41
+192.168.20.42
+192.168.20.43
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x20
+1 192.168.20.42 0x20 CURRENT RECMASTER
+2 192.168.20.43 0x20
+EOF
+
+#####
+
+required_result 0 <<EOF
+0 192.168.20.41
+EOF
+
+simple_test leader
+
+#####
+
+required_result 0 <<EOF
+192.168.20.41 LEADER
+192.168.20.42
+192.168.20.43
+EOF
+
+simple_test list
+
+#####
+
+required_result 0 <<EOF
+pnn:0 192.168.20.41 STOPPED|INACTIVE
+pnn:1 192.168.20.42 STOPPED|INACTIVE (THIS NODE)
+pnn:2 192.168.20.43 STOPPED|INACTIVE
+EOF
+
+simple_test status
diff --git a/ctdb/tests/UNIT/tool/ctdb.natgw.006.sh b/ctdb/tests/UNIT/tool/ctdb.natgw.006.sh
new file mode 100755
index 0000000..8080f4e
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.natgw.006.sh
@@ -0,0 +1,46 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, node 0 is follower-only, all stopped"
+
+setup_natgw <<EOF
+192.168.20.41 follower-only
+192.168.20.42
+192.168.20.43
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x20
+1 192.168.20.42 0x20 CURRENT RECMASTER
+2 192.168.20.43 0x20
+EOF
+
+#####
+
+required_result 0 <<EOF
+1 192.168.20.42
+EOF
+
+simple_test leader
+
+#####
+
+required_result 0 <<EOF
+192.168.20.41 follower-only
+192.168.20.42 LEADER
+192.168.20.43
+EOF
+
+simple_test list
+
+#####
+
+required_result 0 <<EOF
+pnn:0 192.168.20.41 STOPPED|INACTIVE
+pnn:1 192.168.20.42 STOPPED|INACTIVE (THIS NODE)
+pnn:2 192.168.20.43 STOPPED|INACTIVE
+EOF
+
+simple_test status
diff --git a/ctdb/tests/UNIT/tool/ctdb.natgw.007.sh b/ctdb/tests/UNIT/tool/ctdb.natgw.007.sh
new file mode 100755
index 0000000..ca8ea35
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.natgw.007.sh
@@ -0,0 +1,45 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all nodes are follower-only, all stopped"
+
+setup_natgw <<EOF
+192.168.20.41 follower-only
+192.168.20.42 follower-only
+192.168.20.43 follower-only
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x20
+1 192.168.20.42 0x20 CURRENT RECMASTER
+2 192.168.20.43 0x20
+EOF
+
+#####
+
+required_result 2 <<EOF
+EOF
+
+simple_test leader
+
+#####
+
+required_result 0 <<EOF
+192.168.20.41 follower-only
+192.168.20.42 follower-only
+192.168.20.43 follower-only
+EOF
+
+simple_test list
+
+#####
+
+required_result 0 <<EOF
+pnn:0 192.168.20.41 STOPPED|INACTIVE
+pnn:1 192.168.20.42 STOPPED|INACTIVE (THIS NODE)
+pnn:2 192.168.20.43 STOPPED|INACTIVE
+EOF
+
+simple_test status
diff --git a/ctdb/tests/UNIT/tool/ctdb.natgw.008.sh b/ctdb/tests/UNIT/tool/ctdb.natgw.008.sh
new file mode 100755
index 0000000..3e485f8
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.natgw.008.sh
@@ -0,0 +1,46 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all in natgw group, 1 disconnected"
+
+setup_natgw <<EOF
+192.168.20.41
+192.168.20.42
+192.168.20.43
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x1
+1 192.168.20.42 0x0 CURRENT RECMASTER
+2 192.168.20.43 0x0
+EOF
+
+#####
+
+required_result 0 <<EOF
+1 192.168.20.42
+EOF
+
+simple_test leader
+
+#####
+
+required_result 0 <<EOF
+192.168.20.41
+192.168.20.42 LEADER
+192.168.20.43
+EOF
+
+simple_test list
+
+#####
+
+required_result 0 <<EOF
+pnn:0 192.168.20.41 DISCONNECTED|INACTIVE
+pnn:1 192.168.20.42 OK (THIS NODE)
+pnn:2 192.168.20.43 OK
+EOF
+
+simple_test status
diff --git a/ctdb/tests/UNIT/tool/ctdb.natgw.010.sh b/ctdb/tests/UNIT/tool/ctdb.natgw.010.sh
new file mode 100755
index 0000000..a3a0e9d
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.natgw.010.sh
@@ -0,0 +1,25 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, all OK, GET_NODEMAP control times out"
+
+setup_natgw <<EOF
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+CONTROLFAILS
+91 0 TIMEOUT # Make "ctdb nodestatus" time out in ctdb_natgw helper
+EOF
+
+#####
+
+required_result 1 <<EOF
+Maximum runtime exceeded - exiting
+EOF
+simple_test status -T 3
diff --git a/ctdb/tests/UNIT/tool/ctdb.nodestatus.001.sh b/ctdb/tests/UNIT/tool/ctdb.nodestatus.001.sh
new file mode 100755
index 0000000..3c754e2
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.nodestatus.001.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "all, 3 nodes, all OK"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0 CURRENT RECMASTER
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+EOF
+
+required_result 0 <<EOF
+Number of nodes:3
+pnn:0 192.168.20.41 OK
+pnn:1 192.168.20.42 OK
+pnn:2 192.168.20.43 OK (THIS NODE)
+EOF
+simple_test all
+
+required_result 0 <<EOF
+|Node|IP|Disconnected|Unknown|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode|
+|0|192.168.20.41|0|0|0|0|0|0|0|0|N|
+|1|192.168.20.42|0|0|0|0|0|0|0|0|N|
+|2|192.168.20.43|0|0|0|0|0|0|0|0|Y|
+EOF
+simple_test -X all
diff --git a/ctdb/tests/UNIT/tool/ctdb.nodestatus.002.sh b/ctdb/tests/UNIT/tool/ctdb.nodestatus.002.sh
new file mode 100755
index 0000000..a5981df
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.nodestatus.002.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "all, 3 nodes, 1 disconnected"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0
+1 192.168.20.42 0x1
+2 192.168.20.43 0x0 CURRENT RECMASTER
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+EOF
+
+required_result 1 <<EOF
+Number of nodes:3
+pnn:0 192.168.20.41 OK
+pnn:1 192.168.20.42 DISCONNECTED|INACTIVE
+pnn:2 192.168.20.43 OK (THIS NODE)
+EOF
+simple_test all
+
+required_result 1 <<EOF
+|Node|IP|Disconnected|Unknown|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode|
+|0|192.168.20.41|0|0|0|0|0|0|0|0|N|
+|1|192.168.20.42|1|0|0|0|0|0|1|0|N|
+|2|192.168.20.43|0|0|0|0|0|0|0|0|Y|
+EOF
+simple_test -X all
diff --git a/ctdb/tests/UNIT/tool/ctdb.nodestatus.003.sh b/ctdb/tests/UNIT/tool/ctdb.nodestatus.003.sh
new file mode 100755
index 0000000..52c2691
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.nodestatus.003.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "all, 3 nodes, 1 unhealthy"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x2
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0 CURRENT RECMASTER
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+EOF
+
+required_result 2 <<EOF
+Number of nodes:3
+pnn:0 192.168.20.41 UNHEALTHY
+pnn:1 192.168.20.42 OK
+pnn:2 192.168.20.43 OK (THIS NODE)
+EOF
+simple_test all
+
+required_result 2 <<EOF
+|Node|IP|Disconnected|Unknown|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode|
+|0|192.168.20.41|0|0|0|0|1|0|0|0|N|
+|1|192.168.20.42|0|0|0|0|0|0|0|0|N|
+|2|192.168.20.43|0|0|0|0|0|0|0|0|Y|
+EOF
+simple_test -X all
diff --git a/ctdb/tests/UNIT/tool/ctdb.nodestatus.004.sh b/ctdb/tests/UNIT/tool/ctdb.nodestatus.004.sh
new file mode 100755
index 0000000..c060fb9
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.nodestatus.004.sh
@@ -0,0 +1,28 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "current, 3 nodes, node 0 unhealthy"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x2
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0 CURRENT RECMASTER
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+EOF
+
+required_result 0 <<EOF
+pnn:2 192.168.20.43 OK (THIS NODE)
+EOF
+simple_test
+
+required_result 0 <<EOF
+|Node|IP|Disconnected|Unknown|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode|
+|2|192.168.20.43|0|0|0|0|0|0|0|0|Y|
+EOF
+simple_test -X
diff --git a/ctdb/tests/UNIT/tool/ctdb.nodestatus.005.sh b/ctdb/tests/UNIT/tool/ctdb.nodestatus.005.sh
new file mode 100755
index 0000000..59f6905
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.nodestatus.005.sh
@@ -0,0 +1,28 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "current, 3 nodes, node 0 unhealthy, query node 0"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x2
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0 CURRENT RECMASTER
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+EOF
+
+required_result 2 <<EOF
+pnn:0 192.168.20.41 UNHEALTHY
+EOF
+simple_test 0
+
+required_result 2 <<EOF
+|Node|IP|Disconnected|Unknown|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode|
+|0|192.168.20.41|0|0|0|0|1|0|0|0|N|
+EOF
+simple_test -X 0
diff --git a/ctdb/tests/UNIT/tool/ctdb.nodestatus.006.sh b/ctdb/tests/UNIT/tool/ctdb.nodestatus.006.sh
new file mode 100755
index 0000000..7d74451
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.nodestatus.006.sh
@@ -0,0 +1,40 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "current, 3 nodes, node 0 disabled+stopped, various queries"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x24
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0 CURRENT RECMASTER
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+EOF
+
+required_result 36 <<EOF
+pnn:0 192.168.20.41 DISABLED|STOPPED|INACTIVE
+EOF
+simple_test 0
+
+required_result 36 <<EOF
+|Node|IP|Disconnected|Unknown|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode|
+|0|192.168.20.41|0|0|0|1|0|1|1|0|N|
+EOF
+simple_test -X 0
+
+required_result 36 <<EOF
+pnn:0 192.168.20.41 DISABLED|STOPPED|INACTIVE
+pnn:1 192.168.20.42 OK
+EOF
+simple_test 0,1
+
+required_result 0 <<EOF
+pnn:1 192.168.20.42 OK
+pnn:2 192.168.20.43 OK (THIS NODE)
+EOF
+simple_test 1,2
diff --git a/ctdb/tests/UNIT/tool/ctdb.nodestatus.007.sh b/ctdb/tests/UNIT/tool/ctdb.nodestatus.007.sh
new file mode 100755
index 0000000..c96df4d
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.nodestatus.007.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "all, 3 nodes, 1 unhealthy, runstate init"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x2
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0 CURRENT RECMASTER
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+RUNSTATE
+INIT
+EOF
+
+required_result 64 <<EOF
+Number of nodes:3
+pnn:0 192.168.20.41 UNKNOWN
+pnn:1 192.168.20.42 UNKNOWN
+pnn:2 192.168.20.43 OK (THIS NODE)
+EOF
+simple_test all
+
+required_result 64 <<EOF
+|Node|IP|Disconnected|Unknown|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode|
+|0|192.168.20.41|0|1|0|0|0|0|0|0|N|
+|1|192.168.20.42|0|1|0|0|0|0|0|0|N|
+|2|192.168.20.43|0|0|0|0|0|0|0|0|Y|
+EOF
+simple_test -X all
diff --git a/ctdb/tests/UNIT/tool/ctdb.pdelete.001.sh b/ctdb/tests/UNIT/tool/ctdb.pdelete.001.sh
new file mode 100755
index 0000000..c0b7c17
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.pdelete.001.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "persistent delete"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok_null
+simple_test_other attach "persistent.tdb" persistent
+
+ok_null
+simple_test_other pstore "persistent.tdb" "key1" "value1"
+
+ok_null
+simple_test "persistent.tdb" "key1"
+
+ok_null
+simple_test_other pfetch "persistent.tdb" "key1"
+
+ok "0x2"
+simple_test_other getdbseqnum "persistent.tdb"
diff --git a/ctdb/tests/UNIT/tool/ctdb.ping.001.sh b/ctdb/tests/UNIT/tool/ctdb.ping.001.sh
new file mode 100755
index 0000000..1e6d7c1
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.ping.001.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "simple ping"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+result_filter ()
+{
+ sed -e "s@=[.0-9]* sec@=NUM sec@"
+}
+
+
+ok <<EOF
+response from 0 time=NUM sec (1 clients)
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.pnn.001.sh b/ctdb/tests/UNIT/tool/ctdb.pnn.001.sh
new file mode 100755
index 0000000..a492071
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.pnn.001.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "local and remote nodes"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok "0"
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.process-exists.001.sh b/ctdb/tests/UNIT/tool/ctdb.process-exists.001.sh
new file mode 100755
index 0000000..d7dc3b2
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.process-exists.001.sh
@@ -0,0 +1,28 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "ctdbd process on node 0"
+
+ctdb_test_check_supported_OS "Linux"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+dummy_client -s $ctdbd_socket &
+pid=$!
+
+wait_until 10 $CTDB process-exists "$pid"
+
+ok "PID $pid exists"
+simple_test "$pid"
+
+kill -9 $pid
+
+pid=$(ctdbd_getpid)
+required_result 1 "PID $pid does not exist"
+simple_test "$pid"
diff --git a/ctdb/tests/UNIT/tool/ctdb.process-exists.002.sh b/ctdb/tests/UNIT/tool/ctdb.process-exists.002.sh
new file mode 100755
index 0000000..e432e21
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.process-exists.002.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "ctdbd process on node 0"
+
+ctdb_test_check_supported_OS "Linux"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+srvid="0xaebbccdd12345678"
+
+dummy_client -d INFO -s "$ctdbd_socket" -S "$srvid" &
+pid=$!
+
+wait_until 10 $CTDB process-exists "$pid"
+
+srvid2="0x1234567812345678"
+required_result 1 "PID $pid with SRVID $srvid2 does not exist"
+simple_test "$pid" "$srvid2"
+
+ok "PID $pid with SRVID $srvid exists"
+simple_test "$pid" "$srvid"
+
+kill -9 $pid
diff --git a/ctdb/tests/UNIT/tool/ctdb.process-exists.003.sh b/ctdb/tests/UNIT/tool/ctdb.process-exists.003.sh
new file mode 100755
index 0000000..6307026
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.process-exists.003.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "ctdbd process with multiple connections on node 0"
+
+ctdb_test_check_supported_OS "Linux"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+srvid="0xaebbccdd12345678"
+
+dummy_client -d INFO -s "$ctdbd_socket" -n 10 -S "$srvid" &
+pid=$!
+
+wait_until 10 $CTDB process-exists "$pid" "$srvid"
+
+srvid2="0x1234567812345678"
+required_result 1 "PID $pid with SRVID $srvid2 does not exist"
+simple_test "$pid" "$srvid2"
+
+ok "PID $pid with SRVID $srvid exists"
+simple_test "$pid" "$srvid"
+
+kill -9 $pid
diff --git a/ctdb/tests/UNIT/tool/ctdb.pstore.001.sh b/ctdb/tests/UNIT/tool/ctdb.pstore.001.sh
new file mode 100755
index 0000000..393b5a9
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.pstore.001.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "persistent store"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok_null
+simple_test_other attach "persistent.tdb" persistent
+
+ok_null
+simple_test "persistent.tdb" "key1" "value1"
+
+ok "value1"
+simple_test_other pfetch "persistent.tdb" "key1"
+
+ok "0x1"
+simple_test_other getdbseqnum "persistent.tdb"
diff --git a/ctdb/tests/UNIT/tool/ctdb.ptrans.001.sh b/ctdb/tests/UNIT/tool/ctdb.ptrans.001.sh
new file mode 100755
index 0000000..40ef1a2
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.ptrans.001.sh
@@ -0,0 +1,49 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "persistent transactions"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok_null
+simple_test_other attach "persistent.tdb" persistent
+
+ok_null
+simple_test_other pstore "persistent.tdb" "key0" "value0"
+
+ok_null
+simple_test "persistent.tdb" <<EOF
+"key1" "value1"
+"key2" "value2"
+"key1" ""
+"key2" "value3"
+EOF
+
+ok "value0"
+simple_test_other pfetch "persistent.tdb" "key0"
+
+ok_null
+simple_test_other pfetch "persistent.tdb" "key1"
+
+ok "value3"
+simple_test_other pfetch "persistent.tdb" "key2"
+
+ok "0x2"
+simple_test_other getdbseqnum "persistent.tdb"
+
+ok_null
+simple_test "persistent.tdb" <<EOF
+"key0" "value0"
+EOF
+
+ok "value0"
+simple_test_other pfetch "persistent.tdb" "key0"
+
+ok "0x2"
+simple_test_other getdbseqnum "persistent.tdb"
diff --git a/ctdb/tests/UNIT/tool/ctdb.readkey.001.sh b/ctdb/tests/UNIT/tool/ctdb.readkey.001.sh
new file mode 100755
index 0000000..e2c58fd
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.readkey.001.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "volatile read"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok_null
+simple_test_other attach "volatile.tdb"
+
+ok <<EOF
+Data: size:0 ptr:[]
+EOF
+simple_test "volatile.tdb" "key1"
diff --git a/ctdb/tests/UNIT/tool/ctdb.recover.001.sh b/ctdb/tests/UNIT/tool/ctdb.recover.001.sh
new file mode 100755
index 0000000..15e05ca
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.recover.001.sh
@@ -0,0 +1,22 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Just a recovery"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT
+1 192.168.20.42 0x0 RECMASTER
+2 192.168.20.43 0x0
+
+VNNMAP
+654321
+0
+1
+2
+EOF
+
+ok_null
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.reloadnodes.001.sh b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.001.sh
new file mode 100755
index 0000000..68d6cfb
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.001.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, no change"
+
+setup_nodes <<EOF
+192.168.20.41
+192.168.20.42
+192.168.20.43
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok <<EOF
+No change in nodes file, skipping unnecessary reload
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.reloadnodes.002.sh b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.002.sh
new file mode 100755
index 0000000..570786d
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.002.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, no change, inconsistent file on 1"
+
+setup_nodes <<EOF
+192.168.20.41
+192.168.20.42
+192.168.20.43
+EOF
+
+setup_nodes 1 <<EOF
+192.168.20.41
+#192.168.20.42
+192.168.20.43
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+required_result 1 <<EOF
+ERROR: Nodes file on node 1 differs from current node (0)
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.reloadnodes.003.sh b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.003.sh
new file mode 100755
index 0000000..99974d0
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.003.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, missing file on 1"
+
+setup_nodes <<EOF
+192.168.20.41
+192.168.20.42
+192.168.20.43
+EOF
+
+# fake_ctdbd returns error for empty file
+setup_nodes 1 <<EOF
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+required_result 1 <<EOF
+Control GET_NODES_FILE failed, ret=-1
+ERROR: Failed to get nodes file from node 1
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.reloadnodes.011.sh b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.011.sh
new file mode 100755
index 0000000..261962e
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.011.sh
@@ -0,0 +1,25 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, add a node"
+
+setup_nodes <<EOF
+192.168.20.41
+192.168.20.42
+192.168.20.43
+192.168.20.44
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+required_result 0 <<EOF
+Node 3 is NEW
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.reloadnodes.012.sh b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.012.sh
new file mode 100755
index 0000000..c3ca0fe
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.012.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, delete last node"
+
+setup_nodes <<EOF
+192.168.20.41
+192.168.20.42
+#192.168.20.43
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x1
+EOF
+
+required_result 0 <<EOF
+Node 2 is DELETED
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.reloadnodes.013.sh b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.013.sh
new file mode 100755
index 0000000..1402b9d
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.013.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, delete connected last node"
+
+setup_nodes <<EOF
+192.168.20.41
+192.168.20.42
+#192.168.20.43
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+required_result 1 <<EOF
+Node 2 is DELETED
+ERROR: Node 2 is still connected
+ERROR: Nodes will not be reloaded due to previous error
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.reloadnodes.014.sh b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.014.sh
new file mode 100755
index 0000000..30e5148
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.014.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, delete first node"
+
+setup_nodes <<EOF
+#192.168.20.41
+192.168.20.42
+192.168.20.43
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x1
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0 CURRENT RECMASTER
+EOF
+
+required_result 0 <<EOF
+Node 0 is DELETED
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.reloadnodes.015.sh b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.015.sh
new file mode 100755
index 0000000..5fad9de
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.015.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, delete connected first node"
+
+setup_nodes <<EOF
+#192.168.20.41
+192.168.20.42
+192.168.20.43
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0 CURRENT RECMASTER
+EOF
+
+required_result 1 <<EOF
+Node 0 is DELETED
+ERROR: Node 0 is still connected
+ERROR: Nodes will not be reloaded due to previous error
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.reloadnodes.016.sh b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.016.sh
new file mode 100755
index 0000000..d444a46
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.016.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, delete middle node"
+
+setup_nodes <<EOF
+192.168.20.41
+#192.168.20.42
+192.168.20.43
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x1
+2 192.168.20.43 0x0
+EOF
+
+required_result 0 <<EOF
+Node 1 is DELETED
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.reloadnodes.017.sh b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.017.sh
new file mode 100755
index 0000000..b9a9694
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.017.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, delete connected middle node"
+
+setup_nodes <<EOF
+192.168.20.41
+#192.168.20.42
+192.168.20.43
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+required_result 1 <<EOF
+Node 1 is DELETED
+ERROR: Node 1 is still connected
+ERROR: Nodes will not be reloaded due to previous error
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.reloadnodes.018.sh b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.018.sh
new file mode 100755
index 0000000..30be596
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.018.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, add a 3 nodes"
+
+setup_nodes <<EOF
+192.168.20.41
+192.168.20.42
+192.168.20.43
+192.168.20.44
+192.168.20.45
+192.168.20.46
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+required_result 0 <<EOF
+Node 3 is NEW
+Node 4 is NEW
+Node 5 is NEW
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.reloadnodes.019.sh b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.019.sh
new file mode 100755
index 0000000..5069485
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.019.sh
@@ -0,0 +1,28 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, delete middle, add 2 nodes"
+
+setup_nodes <<EOF
+192.168.20.41
+#192.168.20.42
+192.168.20.43
+192.168.20.44
+192.168.20.45
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x1
+2 192.168.20.43 0x0
+EOF
+
+required_result 0 <<EOF
+Node 1 is DELETED
+Node 3 is NEW
+Node 4 is NEW
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.reloadnodes.020.sh b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.020.sh
new file mode 100755
index 0000000..66384c9
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.020.sh
@@ -0,0 +1,28 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, delete last, add 2 nodes"
+
+setup_nodes <<EOF
+192.168.20.41
+192.168.20.42
+#192.168.20.43
+192.168.20.44
+192.168.20.45
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x1
+EOF
+
+required_result 0 <<EOF
+Node 2 is DELETED
+Node 3 is NEW
+Node 4 is NEW
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.reloadnodes.021.sh b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.021.sh
new file mode 100755
index 0000000..0f5f0d5
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.021.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, 1 disconnected, add a node"
+
+setup_nodes <<EOF
+192.168.20.41
+192.168.20.42
+192.168.20.43
+192.168.20.44
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x1
+2 192.168.20.43 0x0
+EOF
+
+required_result 0 <<EOF
+WARNING: Node 1 is disconnected. You MUST fix this node manually!
+Node 3 is NEW
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.reloadnodes.023.sh b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.023.sh
new file mode 100755
index 0000000..b3823d3
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.023.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, undelete middle"
+
+setup_nodes <<EOF
+192.168.20.41
+192.168.20.42
+192.168.20.43
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x11
+2 192.168.20.43 0x0
+EOF
+
+ok <<EOF
+Node 1 is UNDELETED
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.reloadnodes.024.sh b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.024.sh
new file mode 100755
index 0000000..9aa0d42
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.reloadnodes.024.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "3 nodes, middle node remains deleted"
+
+setup_nodes <<EOF
+192.168.20.41
+#192.168.20.42
+192.168.20.43
+EOF
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x11
+2 192.168.20.43 0x0
+EOF
+
+ok <<EOF
+No change in nodes file, skipping unnecessary reload
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.runstate.001.sh b/ctdb/tests/UNIT/tool/ctdb.runstate.001.sh
new file mode 100755
index 0000000..d9559bd
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.runstate.001.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "get runstate, should be RUNNING"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok "RUNNING"
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.runstate.002.sh b/ctdb/tests/UNIT/tool/ctdb.runstate.002.sh
new file mode 100755
index 0000000..b75b2ec
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.runstate.002.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "check if RUNNING"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok "RUNNING"
+simple_test "RUNNING"
diff --git a/ctdb/tests/UNIT/tool/ctdb.runstate.003.sh b/ctdb/tests/UNIT/tool/ctdb.runstate.003.sh
new file mode 100755
index 0000000..eba41f8
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.runstate.003.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "check non-RUNNING states"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+for i in "INIT" "SETUP" "FIRST_RECOVERY" "STARTUP" "SHUTDOWN" ; do
+ required_result 1 "CTDB not in required run state (got RUNNING)"
+ simple_test "$i"
+done
diff --git a/ctdb/tests/UNIT/tool/ctdb.runstate.004.sh b/ctdb/tests/UNIT/tool/ctdb.runstate.004.sh
new file mode 100755
index 0000000..666e84d
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.runstate.004.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "check invalid state"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+required_result 1 "Invalid run state (foobar)"
+simple_test "foobar"
diff --git a/ctdb/tests/UNIT/tool/ctdb.runstate.005.sh b/ctdb/tests/UNIT/tool/ctdb.runstate.005.sh
new file mode 100755
index 0000000..972783c
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.runstate.005.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "check from multiple states"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok "RUNNING"
+simple_test "STARTUP" "RUNNING"
diff --git a/ctdb/tests/UNIT/tool/ctdb.setdbreadonly.001.sh b/ctdb/tests/UNIT/tool/ctdb.setdbreadonly.001.sh
new file mode 100755
index 0000000..0a0cfe2
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.setdbreadonly.001.sh
@@ -0,0 +1,53 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "set volatile non-read-only to read-only by ID"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+DBMAP
+0x7a19d84d locking.tdb
+0x4e66c2b2 brlock.tdb
+0x4d2a432b g_lock.tdb
+0x7132c184 secrets.tdb PERSISTENT
+0x6cf2837d registry.tdb PERSISTENT 42
+0xbc57b384 ctdb-ip.tdb REPLICATED
+0xbec75f0b ctdb-conn.tdb REPLICATED 23
+EOF
+
+ok_null
+simple_test 0x7a19d84d
+
+ok <<EOF
+Number of databases:7
+dbid:0x7a19d84d name:locking.tdb path:${ctdbd_dbdir}/locking.tdb READONLY
+dbid:0x4e66c2b2 name:brlock.tdb path:${ctdbd_dbdir}/brlock.tdb
+dbid:0x4d2a432b name:g_lock.tdb path:${ctdbd_dbdir}/g_lock.tdb
+dbid:0x7132c184 name:secrets.tdb path:${ctdbd_dbdir}/secrets.tdb PERSISTENT
+dbid:0x6cf2837d name:registry.tdb path:${ctdbd_dbdir}/registry.tdb PERSISTENT
+dbid:0xbc57b384 name:ctdb-ip.tdb path:${ctdbd_dbdir}/ctdb-ip.tdb REPLICATED
+dbid:0xbec75f0b name:ctdb-conn.tdb path:${ctdbd_dbdir}/ctdb-conn.tdb REPLICATED
+EOF
+
+simple_test_other getdbmap
+
+ok_null
+simple_test 0x7a19d84d
+
+ok <<EOF
+Number of databases:7
+dbid:0x7a19d84d name:locking.tdb path:${ctdbd_dbdir}/locking.tdb READONLY
+dbid:0x4e66c2b2 name:brlock.tdb path:${ctdbd_dbdir}/brlock.tdb
+dbid:0x4d2a432b name:g_lock.tdb path:${ctdbd_dbdir}/g_lock.tdb
+dbid:0x7132c184 name:secrets.tdb path:${ctdbd_dbdir}/secrets.tdb PERSISTENT
+dbid:0x6cf2837d name:registry.tdb path:${ctdbd_dbdir}/registry.tdb PERSISTENT
+dbid:0xbc57b384 name:ctdb-ip.tdb path:${ctdbd_dbdir}/ctdb-ip.tdb REPLICATED
+dbid:0xbec75f0b name:ctdb-conn.tdb path:${ctdbd_dbdir}/ctdb-conn.tdb REPLICATED
+EOF
+
+simple_test_other getdbmap
diff --git a/ctdb/tests/UNIT/tool/ctdb.setdbreadonly.002.sh b/ctdb/tests/UNIT/tool/ctdb.setdbreadonly.002.sh
new file mode 100755
index 0000000..246fb60
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.setdbreadonly.002.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "set volatile non-read-only to read-only by name"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+DBMAP
+0x7a19d84d locking.tdb
+0x4e66c2b2 brlock.tdb
+0x4d2a432b g_lock.tdb
+0x7132c184 secrets.tdb PERSISTENT
+0x6cf2837d registry.tdb PERSISTENT 42
+0xbc57b384 ctdb-ip.tdb REPLICATED
+0xbec75f0b ctdb-conn.tdb REPLICATED 23
+EOF
+
+ok_null
+simple_test locking.tdb
+
+ok <<EOF
+Number of databases:7
+dbid:0x7a19d84d name:locking.tdb path:${ctdbd_dbdir}/locking.tdb READONLY
+dbid:0x4e66c2b2 name:brlock.tdb path:${ctdbd_dbdir}/brlock.tdb
+dbid:0x4d2a432b name:g_lock.tdb path:${ctdbd_dbdir}/g_lock.tdb
+dbid:0x7132c184 name:secrets.tdb path:${ctdbd_dbdir}/secrets.tdb PERSISTENT
+dbid:0x6cf2837d name:registry.tdb path:${ctdbd_dbdir}/registry.tdb PERSISTENT
+dbid:0xbc57b384 name:ctdb-ip.tdb path:${ctdbd_dbdir}/ctdb-ip.tdb REPLICATED
+dbid:0xbec75f0b name:ctdb-conn.tdb path:${ctdbd_dbdir}/ctdb-conn.tdb REPLICATED
+EOF
+
+simple_test_other getdbmap
diff --git a/ctdb/tests/UNIT/tool/ctdb.setdbreadonly.003.sh b/ctdb/tests/UNIT/tool/ctdb.setdbreadonly.003.sh
new file mode 100755
index 0000000..3a11c79
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.setdbreadonly.003.sh
@@ -0,0 +1,39 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "set persistent read-only by name"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+DBMAP
+0x7a19d84d locking.tdb
+0x4e66c2b2 brlock.tdb
+0x4d2a432b g_lock.tdb
+0x7132c184 secrets.tdb PERSISTENT
+0x6cf2837d registry.tdb PERSISTENT 42
+0xbc57b384 ctdb-ip.tdb REPLICATED
+0xbec75f0b ctdb-conn.tdb REPLICATED 23
+EOF
+
+required_result 1 <<EOF
+READONLY can be set only on volatile DB
+EOF
+simple_test secrets.tdb
+
+ok <<EOF
+Number of databases:7
+dbid:0x7a19d84d name:locking.tdb path:${ctdbd_dbdir}/locking.tdb
+dbid:0x4e66c2b2 name:brlock.tdb path:${ctdbd_dbdir}/brlock.tdb
+dbid:0x4d2a432b name:g_lock.tdb path:${ctdbd_dbdir}/g_lock.tdb
+dbid:0x7132c184 name:secrets.tdb path:${ctdbd_dbdir}/secrets.tdb PERSISTENT
+dbid:0x6cf2837d name:registry.tdb path:${ctdbd_dbdir}/registry.tdb PERSISTENT
+dbid:0xbc57b384 name:ctdb-ip.tdb path:${ctdbd_dbdir}/ctdb-ip.tdb REPLICATED
+dbid:0xbec75f0b name:ctdb-conn.tdb path:${ctdbd_dbdir}/ctdb-conn.tdb REPLICATED
+EOF
+
+simple_test_other getdbmap
diff --git a/ctdb/tests/UNIT/tool/ctdb.setdbreadonly.004.sh b/ctdb/tests/UNIT/tool/ctdb.setdbreadonly.004.sh
new file mode 100755
index 0000000..5d6561d
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.setdbreadonly.004.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "set volatile sticky to sticky and read-only by name"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+DBMAP
+0x7a19d84d locking.tdb STICKY
+0x4e66c2b2 brlock.tdb
+0x4d2a432b g_lock.tdb
+0x7132c184 secrets.tdb PERSISTENT
+0x6cf2837d registry.tdb PERSISTENT 42
+0xbc57b384 ctdb-ip.tdb REPLICATED
+0xbec75f0b ctdb-conn.tdb REPLICATED 23
+EOF
+
+ok_null
+simple_test locking.tdb
+
+ok <<EOF
+Number of databases:7
+dbid:0x7a19d84d name:locking.tdb path:${ctdbd_dbdir}/locking.tdb STICKY READONLY
+dbid:0x4e66c2b2 name:brlock.tdb path:${ctdbd_dbdir}/brlock.tdb
+dbid:0x4d2a432b name:g_lock.tdb path:${ctdbd_dbdir}/g_lock.tdb
+dbid:0x7132c184 name:secrets.tdb path:${ctdbd_dbdir}/secrets.tdb PERSISTENT
+dbid:0x6cf2837d name:registry.tdb path:${ctdbd_dbdir}/registry.tdb PERSISTENT
+dbid:0xbc57b384 name:ctdb-ip.tdb path:${ctdbd_dbdir}/ctdb-ip.tdb REPLICATED
+dbid:0xbec75f0b name:ctdb-conn.tdb path:${ctdbd_dbdir}/ctdb-conn.tdb REPLICATED
+EOF
+
+simple_test_other getdbmap
diff --git a/ctdb/tests/UNIT/tool/ctdb.setdbreadonly.005.sh b/ctdb/tests/UNIT/tool/ctdb.setdbreadonly.005.sh
new file mode 100755
index 0000000..ae336dd
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.setdbreadonly.005.sh
@@ -0,0 +1,39 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "set replicated read-only by name"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+DBMAP
+0x7a19d84d locking.tdb
+0x4e66c2b2 brlock.tdb
+0x4d2a432b g_lock.tdb
+0x7132c184 secrets.tdb PERSISTENT
+0x6cf2837d registry.tdb PERSISTENT 42
+0xbc57b384 ctdb-ip.tdb REPLICATED
+0xbec75f0b ctdb-conn.tdb REPLICATED 23
+EOF
+
+required_result 1 <<EOF
+READONLY can be set only on volatile DB
+EOF
+simple_test ctdb-ip.tdb
+
+ok <<EOF
+Number of databases:7
+dbid:0x7a19d84d name:locking.tdb path:${ctdbd_dbdir}/locking.tdb
+dbid:0x4e66c2b2 name:brlock.tdb path:${ctdbd_dbdir}/brlock.tdb
+dbid:0x4d2a432b name:g_lock.tdb path:${ctdbd_dbdir}/g_lock.tdb
+dbid:0x7132c184 name:secrets.tdb path:${ctdbd_dbdir}/secrets.tdb PERSISTENT
+dbid:0x6cf2837d name:registry.tdb path:${ctdbd_dbdir}/registry.tdb PERSISTENT
+dbid:0xbc57b384 name:ctdb-ip.tdb path:${ctdbd_dbdir}/ctdb-ip.tdb REPLICATED
+dbid:0xbec75f0b name:ctdb-conn.tdb path:${ctdbd_dbdir}/ctdb-conn.tdb REPLICATED
+EOF
+
+simple_test_other getdbmap
diff --git a/ctdb/tests/UNIT/tool/ctdb.setdbsticky.001.sh b/ctdb/tests/UNIT/tool/ctdb.setdbsticky.001.sh
new file mode 100755
index 0000000..28cbfd7
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.setdbsticky.001.sh
@@ -0,0 +1,53 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "set volatile non-sticky to sticky by ID"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+DBMAP
+0x7a19d84d locking.tdb
+0x4e66c2b2 brlock.tdb
+0x4d2a432b g_lock.tdb
+0x7132c184 secrets.tdb PERSISTENT
+0x6cf2837d registry.tdb PERSISTENT 42
+0xbc57b384 ctdb-ip.tdb REPLICATED
+0xbec75f0b ctdb-conn.tdb REPLICATED 23
+EOF
+
+ok_null
+simple_test 0x4e66c2b2
+
+ok <<EOF
+Number of databases:7
+dbid:0x7a19d84d name:locking.tdb path:${ctdbd_dbdir}/locking.tdb
+dbid:0x4e66c2b2 name:brlock.tdb path:${ctdbd_dbdir}/brlock.tdb STICKY
+dbid:0x4d2a432b name:g_lock.tdb path:${ctdbd_dbdir}/g_lock.tdb
+dbid:0x7132c184 name:secrets.tdb path:${ctdbd_dbdir}/secrets.tdb PERSISTENT
+dbid:0x6cf2837d name:registry.tdb path:${ctdbd_dbdir}/registry.tdb PERSISTENT
+dbid:0xbc57b384 name:ctdb-ip.tdb path:${ctdbd_dbdir}/ctdb-ip.tdb REPLICATED
+dbid:0xbec75f0b name:ctdb-conn.tdb path:${ctdbd_dbdir}/ctdb-conn.tdb REPLICATED
+EOF
+
+simple_test_other getdbmap
+
+ok_null
+simple_test 0x4e66c2b2
+
+ok <<EOF
+Number of databases:7
+dbid:0x7a19d84d name:locking.tdb path:${ctdbd_dbdir}/locking.tdb
+dbid:0x4e66c2b2 name:brlock.tdb path:${ctdbd_dbdir}/brlock.tdb STICKY
+dbid:0x4d2a432b name:g_lock.tdb path:${ctdbd_dbdir}/g_lock.tdb
+dbid:0x7132c184 name:secrets.tdb path:${ctdbd_dbdir}/secrets.tdb PERSISTENT
+dbid:0x6cf2837d name:registry.tdb path:${ctdbd_dbdir}/registry.tdb PERSISTENT
+dbid:0xbc57b384 name:ctdb-ip.tdb path:${ctdbd_dbdir}/ctdb-ip.tdb REPLICATED
+dbid:0xbec75f0b name:ctdb-conn.tdb path:${ctdbd_dbdir}/ctdb-conn.tdb REPLICATED
+EOF
+
+simple_test_other getdbmap
diff --git a/ctdb/tests/UNIT/tool/ctdb.setdbsticky.002.sh b/ctdb/tests/UNIT/tool/ctdb.setdbsticky.002.sh
new file mode 100755
index 0000000..1c39f54
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.setdbsticky.002.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "set volatile non-sticky to sticky by name"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+DBMAP
+0x7a19d84d locking.tdb
+0x4e66c2b2 brlock.tdb
+0x4d2a432b g_lock.tdb
+0x7132c184 secrets.tdb PERSISTENT
+0x6cf2837d registry.tdb PERSISTENT 42
+0xbc57b384 ctdb-ip.tdb REPLICATED
+0xbec75f0b ctdb-conn.tdb REPLICATED 23
+EOF
+
+ok_null
+simple_test brlock.tdb
+
+ok <<EOF
+Number of databases:7
+dbid:0x7a19d84d name:locking.tdb path:${ctdbd_dbdir}/locking.tdb
+dbid:0x4e66c2b2 name:brlock.tdb path:${ctdbd_dbdir}/brlock.tdb STICKY
+dbid:0x4d2a432b name:g_lock.tdb path:${ctdbd_dbdir}/g_lock.tdb
+dbid:0x7132c184 name:secrets.tdb path:${ctdbd_dbdir}/secrets.tdb PERSISTENT
+dbid:0x6cf2837d name:registry.tdb path:${ctdbd_dbdir}/registry.tdb PERSISTENT
+dbid:0xbc57b384 name:ctdb-ip.tdb path:${ctdbd_dbdir}/ctdb-ip.tdb REPLICATED
+dbid:0xbec75f0b name:ctdb-conn.tdb path:${ctdbd_dbdir}/ctdb-conn.tdb REPLICATED
+EOF
+
+simple_test_other getdbmap
diff --git a/ctdb/tests/UNIT/tool/ctdb.setdbsticky.003.sh b/ctdb/tests/UNIT/tool/ctdb.setdbsticky.003.sh
new file mode 100755
index 0000000..206fed9
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.setdbsticky.003.sh
@@ -0,0 +1,39 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "set persistent sticky by name"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+DBMAP
+0x7a19d84d locking.tdb
+0x4e66c2b2 brlock.tdb
+0x4d2a432b g_lock.tdb
+0x7132c184 secrets.tdb PERSISTENT
+0x6cf2837d registry.tdb PERSISTENT 42
+0xbc57b384 ctdb-ip.tdb REPLICATED
+0xbec75f0b ctdb-conn.tdb REPLICATED 23
+EOF
+
+required_result 1 <<EOF
+STICKY can be set only on volatile DB
+EOF
+simple_test secrets.tdb
+
+ok <<EOF
+Number of databases:7
+dbid:0x7a19d84d name:locking.tdb path:${ctdbd_dbdir}/locking.tdb
+dbid:0x4e66c2b2 name:brlock.tdb path:${ctdbd_dbdir}/brlock.tdb
+dbid:0x4d2a432b name:g_lock.tdb path:${ctdbd_dbdir}/g_lock.tdb
+dbid:0x7132c184 name:secrets.tdb path:${ctdbd_dbdir}/secrets.tdb PERSISTENT
+dbid:0x6cf2837d name:registry.tdb path:${ctdbd_dbdir}/registry.tdb PERSISTENT
+dbid:0xbc57b384 name:ctdb-ip.tdb path:${ctdbd_dbdir}/ctdb-ip.tdb REPLICATED
+dbid:0xbec75f0b name:ctdb-conn.tdb path:${ctdbd_dbdir}/ctdb-conn.tdb REPLICATED
+EOF
+
+simple_test_other getdbmap
diff --git a/ctdb/tests/UNIT/tool/ctdb.setdbsticky.004.sh b/ctdb/tests/UNIT/tool/ctdb.setdbsticky.004.sh
new file mode 100755
index 0000000..a322a57
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.setdbsticky.004.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "set volatile read-only to read-only and sticky by name"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+DBMAP
+0x7a19d84d locking.tdb
+0x4e66c2b2 brlock.tdb READONLY
+0x4d2a432b g_lock.tdb
+0x7132c184 secrets.tdb PERSISTENT
+0x6cf2837d registry.tdb PERSISTENT 42
+0xbc57b384 ctdb-ip.tdb REPLICATED
+0xbec75f0b ctdb-conn.tdb REPLICATED 23
+EOF
+
+ok_null
+simple_test brlock.tdb
+
+ok <<EOF
+Number of databases:7
+dbid:0x7a19d84d name:locking.tdb path:${ctdbd_dbdir}/locking.tdb
+dbid:0x4e66c2b2 name:brlock.tdb path:${ctdbd_dbdir}/brlock.tdb STICKY READONLY
+dbid:0x4d2a432b name:g_lock.tdb path:${ctdbd_dbdir}/g_lock.tdb
+dbid:0x7132c184 name:secrets.tdb path:${ctdbd_dbdir}/secrets.tdb PERSISTENT
+dbid:0x6cf2837d name:registry.tdb path:${ctdbd_dbdir}/registry.tdb PERSISTENT
+dbid:0xbc57b384 name:ctdb-ip.tdb path:${ctdbd_dbdir}/ctdb-ip.tdb REPLICATED
+dbid:0xbec75f0b name:ctdb-conn.tdb path:${ctdbd_dbdir}/ctdb-conn.tdb REPLICATED
+EOF
+
+simple_test_other getdbmap
diff --git a/ctdb/tests/UNIT/tool/ctdb.setdbsticky.005.sh b/ctdb/tests/UNIT/tool/ctdb.setdbsticky.005.sh
new file mode 100755
index 0000000..9a9bec1
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.setdbsticky.005.sh
@@ -0,0 +1,39 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "set replicated sticky by name"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+DBMAP
+0x7a19d84d locking.tdb
+0x4e66c2b2 brlock.tdb
+0x4d2a432b g_lock.tdb
+0x7132c184 secrets.tdb PERSISTENT
+0x6cf2837d registry.tdb PERSISTENT 42
+0xbc57b384 ctdb-ip.tdb REPLICATED
+0xbec75f0b ctdb-conn.tdb REPLICATED 23
+EOF
+
+required_result 1 <<EOF
+STICKY can be set only on volatile DB
+EOF
+simple_test ctdb-ip.tdb
+
+ok <<EOF
+Number of databases:7
+dbid:0x7a19d84d name:locking.tdb path:${ctdbd_dbdir}/locking.tdb
+dbid:0x4e66c2b2 name:brlock.tdb path:${ctdbd_dbdir}/brlock.tdb
+dbid:0x4d2a432b name:g_lock.tdb path:${ctdbd_dbdir}/g_lock.tdb
+dbid:0x7132c184 name:secrets.tdb path:${ctdbd_dbdir}/secrets.tdb PERSISTENT
+dbid:0x6cf2837d name:registry.tdb path:${ctdbd_dbdir}/registry.tdb PERSISTENT
+dbid:0xbc57b384 name:ctdb-ip.tdb path:${ctdbd_dbdir}/ctdb-ip.tdb REPLICATED
+dbid:0xbec75f0b name:ctdb-conn.tdb path:${ctdbd_dbdir}/ctdb-conn.tdb REPLICATED
+EOF
+
+simple_test_other getdbmap
diff --git a/ctdb/tests/UNIT/tool/ctdb.setdebug.001.sh b/ctdb/tests/UNIT/tool/ctdb.setdebug.001.sh
new file mode 100755
index 0000000..bec32a3
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.setdebug.001.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "bogus debug level string, ensure no change"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+orig=$($CTDB -d $CTDB_DEBUGLEVEL getdebug)
+
+required_result 1 <<EOF
+Invalid debug level 'foobar'. Valid levels are:
+ ERROR | WARNING | NOTICE | INFO | DEBUG
+EOF
+simple_test foobar
+
+ok "$orig"
+simple_test_other getdebug
diff --git a/ctdb/tests/UNIT/tool/ctdb.setdebug.002.sh b/ctdb/tests/UNIT/tool/ctdb.setdebug.002.sh
new file mode 100755
index 0000000..7819b0b
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.setdebug.002.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "bogus debug level integer, ensure no change"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+orig=$($CTDB -d $CTDB_DEBUGLEVEL getdebug)
+
+required_result 1 <<EOF
+Invalid debug level '42'. Valid levels are:
+ ERROR | WARNING | NOTICE | INFO | DEBUG
+EOF
+simple_test 42
+
+ok "$orig"
+simple_test_other getdebug
diff --git a/ctdb/tests/UNIT/tool/ctdb.setdebug.003.sh b/ctdb/tests/UNIT/tool/ctdb.setdebug.003.sh
new file mode 100755
index 0000000..2a8be18
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.setdebug.003.sh
@@ -0,0 +1,32 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "all possible legal levels, including some abbreviations"
+
+debug_set_result ()
+{
+ case "$1" in
+ 0|ERR*) ok "ERROR" ;;
+ 1|2|WARN*) ok "WARNING" ;;
+ 3|4|NOTICE) ok "NOTICE" ;;
+ 5|6|7|8|9|INFO) ok "INFO" ;;
+ 10|DEBUG) ok "DEBUG" ;;
+ *) required_result 42 "foo" ;;
+ esac
+}
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+for i in "ERROR" "WARNING" "NOTICE" "INFO" "DEBUG" $(seq 0 10) "ERR" "WARN" ; do
+ ok_null
+ simple_test "$i"
+
+ debug_set_result "$i"
+ simple_test_other getdebug
+done
diff --git a/ctdb/tests/UNIT/tool/ctdb.setifacelink.001.sh b/ctdb/tests/UNIT/tool/ctdb.setifacelink.001.sh
new file mode 100755
index 0000000..53104cf
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.setifacelink.001.sh
@@ -0,0 +1,76 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "toggle state of 2 interfaces"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:0:4:
+EOF
+
+# eth1: down -> down
+
+ok_null
+simple_test eth1 down
+
+ok <<EOF
+Interfaces on node 0
+name:eth2 link:up references:2
+name:eth1 link:down references:4
+EOF
+simple_test_other ifaces
+
+# eth1: down -> up
+
+ok_null
+simple_test eth1 up
+
+ok <<EOF
+Interfaces on node 0
+name:eth2 link:up references:2
+name:eth1 link:up references:4
+EOF
+simple_test_other ifaces
+
+# eth1: up -> down
+ok_null
+simple_test eth1 down
+
+ok <<EOF
+Interfaces on node 0
+name:eth2 link:up references:2
+name:eth1 link:down references:4
+EOF
+simple_test_other ifaces
+
+# eth2: up -> down
+
+ok_null
+simple_test eth2 down
+
+ok <<EOF
+Interfaces on node 0
+name:eth2 link:down references:2
+name:eth1 link:down references:4
+EOF
+simple_test_other ifaces
+
+# eth1: down -> up
+
+ok_null
+simple_test eth1 up
+
+ok <<EOF
+Interfaces on node 0
+name:eth2 link:down references:2
+name:eth1 link:up references:4
+EOF
+simple_test_other ifaces
diff --git a/ctdb/tests/UNIT/tool/ctdb.setifacelink.002.sh b/ctdb/tests/UNIT/tool/ctdb.setifacelink.002.sh
new file mode 100755
index 0000000..a27062e
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.setifacelink.002.sh
@@ -0,0 +1,22 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "invalid interface"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:0:4:
+EOF
+
+required_result 1 <<EOF
+Interface eth0 not configured on node 0
+EOF
+simple_test eth0 down
diff --git a/ctdb/tests/UNIT/tool/ctdb.setvar.001.sh b/ctdb/tests/UNIT/tool/ctdb.setvar.001.sh
new file mode 100755
index 0000000..e11ff9c
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.setvar.001.sh
@@ -0,0 +1,49 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "get a variable, change its value"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+# Squash whitespace for predictable output
+result_filter ()
+{
+ sed -e 's|[[:space:]][[:space:]]*| |g'
+}
+
+$CTDB -d $CTDB_DEBUGLEVEL listvars |
+ tail -n 1 |
+ {
+ read variable equals value
+
+ # Increment original variable
+ newvalue=$((value + 1))
+ ok_null
+ simple_test "$variable" "$newvalue"
+
+ ok "${variable} = ${newvalue}"
+ simple_test_other getvar "$variable"
+
+ # Increment uppercase variable
+ v_upper=$(echo "$variable" | tr "a-z" "A-Z")
+ newvalue=$((newvalue + 1))
+ ok_null
+ simple_test "$v_upper" "$newvalue"
+
+ ok "${variable} = ${newvalue}"
+ simple_test_other getvar "$variable"
+
+ # Put back original, lowercase
+ v_lower=$(echo "$variable" | tr "A-Z" "a-z")
+ ok_null
+ simple_test "$v_lower" "$value"
+
+ ok "${variable} = ${value}"
+ simple_test_other getvar "$variable"
+ }
diff --git a/ctdb/tests/UNIT/tool/ctdb.setvar.002.sh b/ctdb/tests/UNIT/tool/ctdb.setvar.002.sh
new file mode 100755
index 0000000..bf788a2
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.setvar.002.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "invalid variable"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+required_result 1 <<EOF
+No such tunable TheQuickBrownFoxJumpsOverTheLazyDog
+EOF
+simple_test "TheQuickBrownFoxJumpsOverTheLazyDog" 42
diff --git a/ctdb/tests/UNIT/tool/ctdb.status.001.sh b/ctdb/tests/UNIT/tool/ctdb.status.001.sh
new file mode 100755
index 0000000..62c1dc7
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.status.001.sh
@@ -0,0 +1,46 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "all, 3 nodes, all ok"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+VNNMAP
+654321
+0
+1
+2
+EOF
+
+required_result 0 <<EOF
+Number of nodes:3
+pnn:0 192.168.20.41 OK (THIS NODE)
+pnn:1 192.168.20.42 OK
+pnn:2 192.168.20.43 OK
+Generation:654321
+Size:3
+hash:0 lmaster:0
+hash:1 lmaster:1
+hash:2 lmaster:2
+Recovery mode:NORMAL (0)
+Leader:0
+EOF
+simple_test
+
+required_result 0 <<EOF
+|Node|IP|Disconnected|Unknown|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode|
+|0|192.168.20.41|0|0|0|0|0|0|0|0|Y|
+|1|192.168.20.42|0|0|0|0|0|0|0|0|N|
+|2|192.168.20.43|0|0|0|0|0|0|0|0|N|
+EOF
+simple_test -X
diff --git a/ctdb/tests/UNIT/tool/ctdb.status.002.sh b/ctdb/tests/UNIT/tool/ctdb.status.002.sh
new file mode 100755
index 0000000..0cce443
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.status.002.sh
@@ -0,0 +1,46 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "all, 3 nodes, 1 unhealthy"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x2
+1 192.168.20.42 0x0 CURRENT RECMASTER
+2 192.168.20.43 0x0
+
+VNNMAP
+654321
+0
+1
+2
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+EOF
+
+required_result 0 <<EOF
+Number of nodes:3
+pnn:0 192.168.20.41 UNHEALTHY
+pnn:1 192.168.20.42 OK (THIS NODE)
+pnn:2 192.168.20.43 OK
+Generation:654321
+Size:3
+hash:0 lmaster:0
+hash:1 lmaster:1
+hash:2 lmaster:2
+Recovery mode:NORMAL (0)
+Leader:1
+EOF
+simple_test
+
+required_result 0 <<EOF
+|Node|IP|Disconnected|Unknown|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode|
+|0|192.168.20.41|0|0|0|0|1|0|0|0|N|
+|1|192.168.20.42|0|0|0|0|0|0|0|0|Y|
+|2|192.168.20.43|0|0|0|0|0|0|0|0|N|
+EOF
+simple_test -X
diff --git a/ctdb/tests/UNIT/tool/ctdb.status.003.sh b/ctdb/tests/UNIT/tool/ctdb.status.003.sh
new file mode 100755
index 0000000..67a2966
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.status.003.sh
@@ -0,0 +1,49 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "all, 3 nodes, 1 unhealthy"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x2
+1 192.168.20.42 0x0 CURRENT RECMASTER
+2 192.168.20.43 0x0
+
+VNNMAP
+654321
+0
+1
+2
+
+IFACES
+:Name:LinkStatus:References:
+:eth2:1:2:
+:eth1:1:4:
+
+RUNSTATE
+FIRST_RECOVERY
+EOF
+
+required_result 0 <<EOF
+Number of nodes:3
+pnn:0 192.168.20.41 UNKNOWN
+pnn:1 192.168.20.42 OK (THIS NODE)
+pnn:2 192.168.20.43 UNKNOWN
+Generation:654321
+Size:3
+hash:0 lmaster:0
+hash:1 lmaster:1
+hash:2 lmaster:2
+Recovery mode:NORMAL (0)
+Leader:1
+EOF
+simple_test
+
+required_result 0 <<EOF
+|Node|IP|Disconnected|Unknown|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode|
+|0|192.168.20.41|0|1|0|0|0|0|0|0|N|
+|1|192.168.20.42|0|0|0|0|0|0|0|0|Y|
+|2|192.168.20.43|0|1|0|0|0|0|0|0|N|
+EOF
+simple_test -X
diff --git a/ctdb/tests/UNIT/tool/ctdb.stop.001.sh b/ctdb/tests/UNIT/tool/ctdb.stop.001.sh
new file mode 100755
index 0000000..d374ebf
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.stop.001.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "stop default (0)"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok_null
+simple_test
+
+required_result 32 <<EOF
+Number of nodes:3
+pnn:0 192.168.20.41 STOPPED|INACTIVE (THIS NODE)
+pnn:1 192.168.20.42 OK
+pnn:2 192.168.20.43 OK
+EOF
+simple_test_other nodestatus all
diff --git a/ctdb/tests/UNIT/tool/ctdb.stop.002.sh b/ctdb/tests/UNIT/tool/ctdb.stop.002.sh
new file mode 100755
index 0000000..f8cc792
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.stop.002.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "stop 1"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok_null
+simple_test -n 1
+
+required_result 32 <<EOF
+Number of nodes:3
+pnn:0 192.168.20.41 OK (THIS NODE)
+pnn:1 192.168.20.42 STOPPED|INACTIVE
+pnn:2 192.168.20.43 OK
+EOF
+simple_test_other nodestatus all
diff --git a/ctdb/tests/UNIT/tool/ctdb.stop.003.sh b/ctdb/tests/UNIT/tool/ctdb.stop.003.sh
new file mode 100755
index 0000000..3e4981c
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.stop.003.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "node is already stopped"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x20
+EOF
+
+ok "Node 2 is already stopped"
+simple_test -n 2
+
+required_result 32 <<EOF
+Number of nodes:3
+pnn:0 192.168.20.41 OK (THIS NODE)
+pnn:1 192.168.20.42 OK
+pnn:2 192.168.20.43 STOPPED|INACTIVE
+EOF
+simple_test_other nodestatus all
diff --git a/ctdb/tests/UNIT/tool/ctdb.unban.001.sh b/ctdb/tests/UNIT/tool/ctdb.unban.001.sh
new file mode 100755
index 0000000..c771fb4
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.unban.001.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "unban default (0)"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x8 CURRENT
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0 RECMASTER
+EOF
+
+ok_null
+simple_test
+
+ok <<EOF
+Number of nodes:3
+pnn:0 192.168.20.41 OK (THIS NODE)
+pnn:1 192.168.20.42 OK
+pnn:2 192.168.20.43 OK
+EOF
+simple_test_other nodestatus all
diff --git a/ctdb/tests/UNIT/tool/ctdb.unban.002.sh b/ctdb/tests/UNIT/tool/ctdb.unban.002.sh
new file mode 100755
index 0000000..b65143d
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.unban.002.sh
@@ -0,0 +1,34 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "ban, unban node 1"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok_null
+simple_test_other ban 60 -n 1
+
+required_result 8 <<EOF
+Number of nodes:3
+pnn:0 192.168.20.41 OK (THIS NODE)
+pnn:1 192.168.20.42 BANNED|INACTIVE
+pnn:2 192.168.20.43 OK
+EOF
+simple_test_other nodestatus all
+
+ok_null
+simple_test_other unban -n 1
+
+ok <<EOF
+Number of nodes:3
+pnn:0 192.168.20.41 OK (THIS NODE)
+pnn:1 192.168.20.42 OK
+pnn:2 192.168.20.43 OK
+EOF
+simple_test_other nodestatus all
diff --git a/ctdb/tests/UNIT/tool/ctdb.unban.003.sh b/ctdb/tests/UNIT/tool/ctdb.unban.003.sh
new file mode 100755
index 0000000..8b94f30
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.unban.003.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "node not banned"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok "Node 0 is not banned"
+simple_test
+
+ok <<EOF
+Number of nodes:3
+pnn:0 192.168.20.41 OK (THIS NODE)
+pnn:1 192.168.20.42 OK
+pnn:2 192.168.20.43 OK
+EOF
+simple_test_other nodestatus all
diff --git a/ctdb/tests/UNIT/tool/ctdb.uptime.001.sh b/ctdb/tests/UNIT/tool/ctdb.uptime.001.sh
new file mode 100755
index 0000000..34fd1f4
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.uptime.001.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "simple ping"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+result_filter ()
+{
+ _weekday="[A-Z][a-z][a-z]"
+ _month="[A-Z][a-z][a-z]"
+ _date="[0-9][0-9]*"
+ _time="[0-9][0-9]:[0-9][0-9]:[0-9][0-9]"
+ _year="[0-9][0-9]*"
+ _date_time="${_weekday} ${_month} *${_date} ${_time} ${_year}"
+ _duration="(000 00:00:[0-9][0-9])"
+ sed -e "s|${_date_time}\$|DATE/TIME|" \
+ -e "s|[.0-9]* seconds|SEC seconds|" \
+ -e "s|${_duration}|(DURATION)|"
+}
+
+
+ok <<EOF
+Current time of node 0 : DATE/TIME
+Ctdbd start time : (DURATION) DATE/TIME
+Time of last recovery/failover: (DURATION) DATE/TIME
+Duration of last recovery/failover: SEC seconds
+EOF
+
+simple_test
diff --git a/ctdb/tests/UNIT/tool/ctdb.writekey.001.sh b/ctdb/tests/UNIT/tool/ctdb.writekey.001.sh
new file mode 100755
index 0000000..7adee9f
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/ctdb.writekey.001.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "volatile write"
+
+setup_ctdbd <<EOF
+NODEMAP
+0 192.168.20.41 0x0 CURRENT RECMASTER
+1 192.168.20.42 0x0
+2 192.168.20.43 0x0
+EOF
+
+ok_null
+simple_test_other attach "volatile.tdb"
+
+ok_null
+simple_test "volatile.tdb" "key1" "value1"
+
+ok <<EOF
+Data: size:6 ptr:[value1]
+EOF
+simple_test_other readkey "volatile.tdb" "key1"
+
+ok_null
+simple_test "volatile.tdb" "key1" "a new value"
+
+ok <<EOF
+Data: size:11 ptr:[a new value]
+EOF
+simple_test_other readkey "volatile.tdb" "key1"
diff --git a/ctdb/tests/UNIT/tool/scripts/local.sh b/ctdb/tests/UNIT/tool/scripts/local.sh
new file mode 100644
index 0000000..618fa36
--- /dev/null
+++ b/ctdb/tests/UNIT/tool/scripts/local.sh
@@ -0,0 +1,112 @@
+# Hey Emacs, this is a -*- shell-script -*- !!! :-)
+
+PATH="${PATH}:${CTDB_SCRIPTS_TOOLS_HELPER_DIR}"
+PATH="${PATH}:${CTDB_SCRIPTS_HELPER_BINDIR}"
+
+setup_ctdb_base "$CTDB_TEST_TMP_DIR" "ctdb-etc" \
+ functions
+
+if "$CTDB_TEST_VERBOSE" ; then
+ debug () { echo "$@" ; }
+else
+ debug () { : ; }
+fi
+
+ctdbd_socket=$(ctdb-path socket "ctdbd")
+ctdbd_pidfile=$(ctdb-path pidfile "ctdbd")
+ctdbd_dbdir=$(ctdb-path vardir append "db")
+
+define_test ()
+{
+ _f=$(basename "$0" ".sh")
+
+ case "$_f" in
+ ctdb.*)
+ _cmd="${_f#ctdb.}"
+ _cmd="${_cmd%.*}" # Strip test number
+ export CTDB="ctdb"
+ export CTDB_DEBUGLEVEL=NOTICE
+ if [ -z "$FAKE_CTDBD_DEBUGLEVEL" ] ; then
+ FAKE_CTDBD_DEBUGLEVEL="ERR"
+ fi
+ export FAKE_CTDBD_DEBUGLEVEL
+ test_args="$_cmd"
+ ;;
+ *)
+ die "Unknown pattern for testcase \"$_f\""
+ esac
+
+ printf "%-28s - %s\n" "$_f" "$1"
+}
+
+cleanup_ctdbd ()
+{
+ debug "Cleaning up fake ctdbd"
+
+ pid=$(cat "$ctdbd_pidfile" 2>/dev/null || echo)
+ if [ -n "$pid" ] ; then
+ kill $pid || true
+ rm -f "$ctdbd_pidfile"
+ fi
+ rm -f "$ctdbd_socket"
+ rm -rf "$ctdbd_dbdir"
+}
+
+setup_ctdbd ()
+{
+ echo "Setting up fake ctdbd"
+
+ mkdir -p "$ctdbd_dbdir"
+ $VALGRIND fake_ctdbd -d "$FAKE_CTDBD_DEBUGLEVEL" \
+ -s "$ctdbd_socket" -p "$ctdbd_pidfile" \
+ -D "$ctdbd_dbdir"
+ # Wait till fake_ctdbd is running
+ wait_until 10 test -S "$ctdbd_socket" || \
+ die "fake_ctdbd failed to start"
+
+ test_cleanup cleanup_ctdbd
+}
+
+ctdbd_getpid ()
+{
+ cat "$ctdbd_pidfile"
+}
+
+setup_natgw ()
+{
+ debug "Setting up NAT gateway"
+
+ export CTDB_NATGW_HELPER="${CTDB_SCRIPTS_TOOLS_HELPER_DIR}/ctdb_natgw"
+ export CTDB_NATGW_NODES="${CTDB_BASE}/natgw_nodes"
+
+ cat >"$CTDB_NATGW_NODES"
+}
+
+setup_lvs ()
+{
+ debug "Setting up LVS"
+
+ export CTDB_LVS_HELPER="${CTDB_SCRIPTS_TOOLS_HELPER_DIR}/ctdb_lvs"
+ export CTDB_LVS_NODES="${CTDB_BASE}/lvs_nodes"
+
+ cat >"$CTDB_LVS_NODES"
+}
+
+setup_nodes ()
+{
+ _pnn="$1"
+
+ _f="${CTDB_BASE}/nodes${_pnn:+.}${_pnn}"
+
+ cat >"$_f"
+}
+
+simple_test_other ()
+{
+ unit_test $CTDB -d $CTDB_DEBUGLEVEL "$@"
+}
+
+simple_test ()
+{
+ simple_test_other $test_args "$@"
+}
diff --git a/ctdb/tests/etc-ctdb/events/legacy/00.test.script b/ctdb/tests/etc-ctdb/events/legacy/00.test.script
new file mode 100755
index 0000000..c6797da
--- /dev/null
+++ b/ctdb/tests/etc-ctdb/events/legacy/00.test.script
@@ -0,0 +1,30 @@
+#!/bin/sh
+# event script for 'make test'
+
+. "${CTDB_BASE}/functions"
+
+load_script_options
+
+ctdb_check_args "$@"
+
+event="$1"
+shift
+
+case "$event" in
+monitor)
+ if [ "$CTDB_RUN_TIMEOUT_MONITOR" = "yes" ] ; then
+ timeout=9999
+ echo "Sleeping for ${timeout} seconds..."
+ sleep $timeout
+ fi
+ ;;
+
+startup)
+ ifaces=$(ctdb ifaces -X | tail -n +2 | cut -d '|' -f2)
+ for i in $ifaces; do
+ ctdb setifacelink "$i" up
+ done
+ ;;
+esac
+
+echo "${event} event${*:+ for }$*"
diff --git a/ctdb/tests/local_daemons.sh b/ctdb/tests/local_daemons.sh
new file mode 100755
index 0000000..b474668
--- /dev/null
+++ b/ctdb/tests/local_daemons.sh
@@ -0,0 +1,506 @@
+#!/bin/sh
+
+set -u
+
+export CTDB_TEST_MODE="yes"
+
+# Following 2 lines may be modified by installation script
+CTDB_TESTS_ARE_INSTALLED=false
+CTDB_TEST_DIR=$(dirname "$0")
+export CTDB_TESTS_ARE_INSTALLED CTDB_TEST_DIR
+
+export TEST_SCRIPTS_DIR="${CTDB_TEST_DIR}/scripts"
+
+. "${TEST_SCRIPTS_DIR}/common.sh"
+
+if ! $CTDB_TESTS_ARE_INSTALLED ; then
+ hdir="$CTDB_SCRIPTS_HELPER_BINDIR"
+ export CTDB_EVENTD="${hdir}/ctdb-eventd"
+ export CTDB_EVENT_HELPER="${hdir}/ctdb-event"
+ export CTDB_LOCK_HELPER="${hdir}/ctdb_lock_helper"
+ export CTDB_RECOVERY_HELPER="${hdir}/ctdb_recovery_helper"
+ export CTDB_TAKEOVER_HELPER="${hdir}/ctdb_takeover_helper"
+ export CTDB_CLUSTER_MUTEX_HELPER="${hdir}/ctdb_mutex_fcntl_helper"
+fi
+
+########################################
+
+# If the given IP is hosted then print 2 items: maskbits and iface
+have_ip ()
+{
+ _addr="$1"
+
+ case "$_addr" in
+ *:*) _bits=128 ;;
+ *) _bits=32 ;;
+ esac
+
+ _t=$(ip addr show to "${_addr}/${_bits}")
+ [ -n "$_t" ]
+}
+
+setup_nodes ()
+{
+ _num_nodes="$1"
+ _use_ipv6="$2"
+
+ _have_all_ips=true
+ for _i in $(seq 0 $((_num_nodes - 1)) ) ; do
+ if $_use_ipv6 ; then
+ _j=$(printf "%04x" $((0x5f00 + 1 + _i)) )
+ _node_ip="fd00::5357:${_j}"
+ if have_ip "$_node_ip" ; then
+ echo "$_node_ip"
+ else
+ cat >&2 <<EOF
+ERROR: ${_node_ip} not on an interface, please add it
+EOF
+ _have_all_ips=false
+ fi
+ else
+ _c=$(( _i / 100 ))
+ _d=$(( 1 + (_i % 100) ))
+ echo "127.0.${_c}.${_d}"
+ fi
+ done
+
+ # Fail if we don't have all of the IPv6 addresses assigned
+ $_have_all_ips
+}
+
+setup_public_addresses ()
+{
+ _num_nodes="$1"
+ _node_no_ips="$2"
+ _use_ipv6="$3"
+
+ for _i in $(seq 0 $((_num_nodes - 1)) ) ; do
+ if [ "$_i" -eq "$_node_no_ips" ] ; then
+ continue
+ fi
+
+ # 2 public addresses on most nodes, just to make
+ # things interesting
+ if $_use_ipv6 ; then
+ printf 'fc00:10::1:%x/64 lo\n' $((1 + _i))
+ printf 'fc00:10::2:%x/64 lo\n' $((1 + _i))
+ else
+ _c1=$(( 100 + (_i / 100) ))
+ _c2=$(( 200 + (_i / 100) ))
+ _d=$(( 1 + (_i % 100) ))
+ printf '192.168.%d.%d/24 lo\n' "$_c1" "$_d"
+ printf '192.168.%d.%d/24 lo\n' "$_c2" "$_d"
+ fi
+ done
+}
+
+setup_socket_wrapper ()
+{
+ _socket_wrapper_so="$1"
+
+ _so="${directory}/libsocket-wrapper.so"
+ if [ ! -f "$_socket_wrapper_so" ] ; then
+ die "$0 setup: Unable to find ${_socket_wrapper_so}"
+ fi
+
+ # Find absolute path if only relative is given
+ case "$_socket_wrapper_so" in
+ /*) : ;;
+ *) _socket_wrapper_so="${PWD}/${_socket_wrapper_so}" ;;
+ esac
+
+ rm -f "$_so"
+ ln -s "$_socket_wrapper_so" "$_so"
+
+ _d="${directory}/sw"
+ rm -rf "$_d"
+ mkdir -p "$_d"
+}
+
+local_daemons_setup_usage ()
+{
+ cat >&2 <<EOF
+$0 <directory> setup [ <options>... ]
+
+Options:
+ -C Comment out given config item (default: item uncommented)
+ -F Disable failover (default: failover enabled)
+ -N <file> Nodes file (default: automatically generated)
+ -n <num> Number of nodes (default: 3)
+ -P <file> Public addresses file (default: automatically generated)
+ -R Use a command for the cluster lock (default: use a file)
+ -r <time> Like -R and set recheck interval to <time> (default: use a file)
+ -S <library> Socket wrapper shared library to preload (default: none)
+ -6 Generate IPv6 IPs for nodes, public addresses (default: IPv4)
+EOF
+
+ exit 1
+}
+
+local_daemons_setup ()
+{
+ _commented_config=""
+ _disable_failover=false
+ _nodes_file=""
+ _num_nodes=3
+ _public_addresses_file=""
+ _cluster_lock_use_command=false
+ _cluster_lock_recheck_interval=""
+ _socket_wrapper=""
+ _use_ipv6=false
+
+ set -e
+
+ while getopts "C:FN:n:P:Rr:S:6h?" _opt ; do
+ case "$_opt" in
+ C) _t="${_commented_config}${_commented_config:+|}"
+ _commented_config="${_t}${OPTARG}"
+ ;;
+ F) _disable_failover=true ;;
+ N) _nodes_file="$OPTARG" ;;
+ n) _num_nodes="$OPTARG" ;;
+ P) _public_addresses_file="$OPTARG" ;;
+ R) _cluster_lock_use_command=true ;;
+ r) _cluster_lock_use_command=true
+ _cluster_lock_recheck_interval="$OPTARG"
+ ;;
+ S) _socket_wrapper="$OPTARG" ;;
+ 6) _use_ipv6=true ;;
+ \?|h) local_daemons_setup_usage ;;
+ esac
+ done
+ shift $((OPTIND - 1))
+
+ mkdir -p "$directory"
+
+ _nodes_all="${directory}/nodes"
+ if [ -n "$_nodes_file" ] ; then
+ cp "$_nodes_file" "$_nodes_all"
+ else
+ setup_nodes "$_num_nodes" $_use_ipv6 >"$_nodes_all"
+ fi
+
+ # If there are (strictly) greater than 2 nodes then we'll
+ # "randomly" choose a node to have no public addresses
+ _node_no_ips=-1
+ if [ "$_num_nodes" -gt 2 ] ; then
+ _node_no_ips=$(($$ % _num_nodes))
+ fi
+
+ _public_addresses_all="${directory}/public_addresses"
+ if [ -n "$_public_addresses_file" ] ; then
+ cp "$_public_addresses_file" "$_public_addresses_all"
+ else
+ setup_public_addresses "$_num_nodes" \
+ $_node_no_ips \
+ "$_use_ipv6" >"$_public_addresses_all"
+ fi
+
+ _cluster_lock_dir="${directory}/shared/.ctdb"
+ mkdir -p "$_cluster_lock_dir"
+ _cluster_lock="${_cluster_lock_dir}/cluster.lock"
+ if $_cluster_lock_use_command ; then
+ _helper="${CTDB_SCRIPTS_HELPER_BINDIR}/ctdb_mutex_fcntl_helper"
+ _t="! ${_helper} ${_cluster_lock}"
+ if [ -n "$_cluster_lock_recheck_interval" ] ; then
+ _t="${_t} ${_cluster_lock_recheck_interval}"
+ fi
+ _cluster_lock="$_t"
+ fi
+
+ if [ -n "$_socket_wrapper" ] ; then
+ setup_socket_wrapper "$_socket_wrapper"
+ fi
+
+ for _n in $(seq 0 $((_num_nodes - 1))) ; do
+ # CTDB_TEST_SUITE_DIR needs to be correctly set so
+ # setup_ctdb_base() finds the etc-ctdb/ subdirectory
+ # and the test event script is correctly installed
+ # shellcheck disable=SC2034
+ CTDB_TEST_SUITE_DIR="$CTDB_TEST_DIR" \
+ setup_ctdb_base "$directory" "node.${_n}" \
+ functions notify.sh debug-hung-script.sh
+
+ cp "$_nodes_all" "${CTDB_BASE}/nodes"
+
+ _public_addresses="${CTDB_BASE}/public_addresses"
+
+ if [ -z "$_public_addresses_file" ] && \
+ [ "$_node_no_ips" -eq "$_n" ] ; then
+ echo "Node ${_n} will have no public IPs."
+ : >"$_public_addresses"
+ else
+ cp "$_public_addresses_all" "$_public_addresses"
+ fi
+
+ _node_ip=$(sed -n -e "$((_n + 1))p" "$_nodes_all")
+
+ _db_dir="${CTDB_BASE}/db"
+ for _d in "volatile" "persistent" "state" ; do
+ mkdir -p "${_db_dir}/${_d}"
+ done
+
+ cat >"${CTDB_BASE}/ctdb.conf" <<EOF
+[logging]
+ location = file:${CTDB_BASE}/log.ctdb
+ log level = INFO
+
+[cluster]
+ cluster lock = ${_cluster_lock}
+ node address = ${_node_ip}
+
+[database]
+ volatile database directory = ${_db_dir}/volatile
+ persistent database directory = ${_db_dir}/persistent
+ state database directory = ${_db_dir}/state
+
+[failover]
+ disabled = ${_disable_failover}
+
+[event]
+ debug script = debug-hung-script.sh
+EOF
+
+ (
+ IFS='|'
+ for _c in $_commented_config ; do
+ # Quote all backslashes due to double-quotes
+ sed -i -e "s|^\\t\\(${_c}\\) = |\\t# \\1 = |" \
+ "${CTDB_BASE}/ctdb.conf"
+ done
+ )
+ done
+}
+
+local_daemons_ssh_usage ()
+{
+ cat >&2 <<EOF
+usage: $0 <directory> ssh [ -n ] <ip> <command>
+EOF
+
+ exit 1
+}
+
+local_daemons_ssh ()
+{
+ if [ $# -lt 2 ] ; then
+ local_daemons_ssh_usage
+ fi
+
+ # Only try to respect ssh -n option, others can't be used so discard them
+ _close_stdin=false
+ while getopts "nh?" _opt ; do
+ case "$_opt" in
+ n) _close_stdin=true ;;
+ \?|h) local_daemons_ssh_usage ;;
+ *) : ;;
+ esac
+ done
+ shift $((OPTIND - 1))
+
+ if [ $# -lt 2 ] ; then
+ local_daemons_ssh_usage
+ fi
+
+ _nodes="${directory}/nodes"
+
+ # IP address of node. onnode can pass hostnames but not in these tests
+ _ip="$1" ; shift
+ # "$*" is command
+
+
+ # Determine the correct CTDB base directory
+ _num=$(awk -v ip="$_ip" '$1 == ip { print NR }' "$_nodes")
+ _node=$((_num - 1))
+ export CTDB_BASE="${directory}/node.${_node}"
+
+ if [ ! -d "$CTDB_BASE" ] ; then
+ die "$0 ssh: Unable to find base for node ${_ip}"
+ fi
+
+ if $_close_stdin ; then
+ exec sh -c "$*" </dev/null
+ else
+ exec sh -c "$*"
+ fi
+}
+
+onnode_common ()
+{
+ # onnode will execute this, which fakes ssh against local daemons
+ export ONNODE_SSH="${0} ${directory} ssh"
+
+ # onnode just needs the nodes file, so use the common one
+ export CTDB_BASE="$directory"
+}
+
+local_daemons_generic_usage ()
+{
+ cat >&2 <<EOF
+usage: $0 <directory> ${1} <nodes>
+
+<nodes> can be "all", a node number or any specification supported by onnode
+EOF
+
+ exit 1
+}
+
+local_daemons_start_socket_wrapper ()
+{
+ _so="${directory}/libsocket-wrapper.so"
+ _d="${directory}/sw"
+
+ if [ -d "$_d" ] && [ -f "$_so" ] ; then
+ export SOCKET_WRAPPER_DIR="$_d"
+ export LD_PRELOAD="$_so"
+ export SOCKET_WRAPPER_DIR_ALLOW_ORIG="1"
+ fi
+}
+
+local_daemons_start ()
+{
+ if [ $# -ne 1 ] || [ "$1" = "-h" ] ; then
+ local_daemons_generic_usage "start"
+ fi
+
+ local_daemons_start_socket_wrapper
+
+ _nodes="$1"
+
+ onnode_common
+
+ onnode -i "$_nodes" "${VALGRIND:-} ctdbd"
+}
+
+local_daemons_stop ()
+{
+ if [ $# -ne 1 ] || [ "$1" = "-h" ] ; then
+ local_daemons_generic_usage "stop"
+ fi
+
+ _nodes="$1"
+
+ onnode_common
+
+ onnode -p "$_nodes" \
+ "if [ -e \"\${CTDB_BASE}/run/ctdbd.pid\" ] ; then \
+ ${CTDB:-${VALGRIND:-} ctdb} shutdown ; \
+ fi"
+}
+
+local_daemons_onnode_usage ()
+{
+ cat >&2 <<EOF
+usage: $0 <directory> onnode <nodes> <command>...
+
+<nodes> can be "all", a node number or any specification supported by onnode
+EOF
+
+ exit 1
+}
+
+local_daemons_onnode ()
+{
+ if [ $# -lt 2 ] || [ "$1" = "-h" ] ; then
+ local_daemons_onnode_usage
+ fi
+
+ _nodes="$1"
+ shift
+
+ onnode_common
+
+ onnode "$_nodes" "$@"
+}
+
+local_daemons_print_socket ()
+{
+ if [ $# -ne 1 ] || [ "$1" = "-h" ] ; then
+ local_daemons_generic_usage "print-socket"
+ fi
+
+ _nodes="$1"
+ shift
+
+ onnode_common
+
+ _path="${CTDB_SCRIPTS_HELPER_BINDIR}/ctdb-path"
+ onnode -q "$_nodes" "${VALGRIND:-} ${_path} socket ctdbd"
+}
+
+local_daemons_print_log ()
+{
+ if [ $# -ne 1 ] || [ "$1" = "-h" ] ; then
+ local_daemons_generic_usage "print-log"
+ fi
+
+ _nodes="$1"
+ shift
+
+ onnode_common
+
+ # shellcheck disable=SC2016
+ # $CTDB_BASE must only be expanded under onnode, not in top-level shell
+ onnode -q "$_nodes" 'cat ${CTDB_BASE}/log.ctdb' |
+ sort
+
+}
+
+local_daemons_tail_log ()
+{
+ if [ $# -ne 1 ] || [ "$1" = "-h" ] ; then
+ local_daemons_generic_usage "tail-log"
+ fi
+
+ _nodes="$1"
+ shift
+
+ onnode_common
+
+ # shellcheck disable=SC2016,SC2046
+ # $CTDB_BASE must only be expanded under onnode, not in top-level shell
+ # Intentional word splitting to separate log filenames
+ tail -f $(onnode -q "$_nodes" 'echo ${CTDB_BASE}/log.ctdb')
+}
+
+usage ()
+{
+ cat <<EOF
+usage: $0 <directory> <command> [ <options>... ]
+
+Commands:
+ setup Set up daemon configuration according to given options
+ start Start specified daemon(s)
+ stop Stop specified daemon(s)
+ onnode Run a command in the environment of specified daemon(s)
+ print-socket Print the Unix domain socket used by specified daemon(s)
+ print-log Print logs for specified daemon(s) to stdout
+ tail-log Follow logs for specified daemon(s) to stdout
+
+All commands use <directory> for daemon configuration
+
+Run command with -h option to see per-command usage
+EOF
+
+ exit 1
+}
+
+if [ $# -lt 2 ] ; then
+ usage
+fi
+
+directory="$1"
+command="$2"
+shift 2
+
+case "$command" in
+setup) local_daemons_setup "$@" ;;
+ssh) local_daemons_ssh "$@" ;; # Internal, not shown by usage()
+start) local_daemons_start "$@" ;;
+stop) local_daemons_stop "$@" ;;
+onnode) local_daemons_onnode "$@" ;;
+print-socket) local_daemons_print_socket "$@" ;;
+print-log) local_daemons_print_log "$@" ;;
+tail-log) local_daemons_tail_log "$@" ;;
+*) usage ;;
+esac
diff --git a/ctdb/tests/run_cluster_tests.sh b/ctdb/tests/run_cluster_tests.sh
new file mode 120000
index 0000000..5236e32
--- /dev/null
+++ b/ctdb/tests/run_cluster_tests.sh
@@ -0,0 +1 @@
+run_tests.sh \ No newline at end of file
diff --git a/ctdb/tests/run_tests.sh b/ctdb/tests/run_tests.sh
new file mode 100755
index 0000000..dfe2a9a
--- /dev/null
+++ b/ctdb/tests/run_tests.sh
@@ -0,0 +1,399 @@
+#!/usr/bin/env bash
+
+usage() {
+ cat <<EOF
+Usage: $0 [OPTIONS] [TESTS]
+
+Options:
+ -A Use "cat -A" to print test output (only some tests)
+ -c Run integration tests on a cluster
+ -C Clean up when done by removing test state directory (see -V)
+ -D Show diff between failed/expected test output (some tests only)
+ -e Exit on the first test failure
+ -H No headers - for running single test with other wrapper
+ -I <count> Iterate tests <count> times, exiting on failure (implies -e, -N)
+ -l <count> Use <count> daemons for local daemon integration tests
+ -L Print daemon logs on test failure (only some tests)
+ -N Don't print summary of tests results after running all tests
+ -q Quiet - don't show tests being run (still displays summary)
+ -S <lib> Use socket wrapper library <lib> for local integration tests
+ -v Verbose - print test output for non-failures (only some tests)
+ -V <dir> Use <dir> as test state directory
+ -x Trace this script with the -x option
+ -X Trace certain scripts run by tests using -x (only some tests)
+EOF
+ exit 1
+}
+
+# Print a message and exit.
+die ()
+{
+ echo "$1" >&2 ; exit "${2:-1}"
+}
+
+######################################################################
+
+with_summary=true
+quiet=false
+exit_on_fail=false
+max_iterations=1
+no_header=false
+test_state_dir=""
+cleanup=false
+test_time_limit=3600
+
+export CTDB_TEST_VERBOSE=false
+export CTDB_TEST_COMMAND_TRACE=false
+export CTDB_TEST_CAT_RESULTS_OPTS=""
+export CTDB_TEST_DIFF_RESULTS=false
+export CTDB_TEST_PRINT_LOGS_ON_ERROR=false
+export CTDB_TEST_LOCAL_DAEMONS=3
+export CTDB_TEST_SWRAP_SO_PATH=""
+
+while getopts "AcCDehHI:l:LNqS:T:vV:xX?" opt ; do
+ case "$opt" in
+ A) CTDB_TEST_CAT_RESULTS_OPTS="-A" ;;
+ c) CTDB_TEST_LOCAL_DAEMONS="" ;;
+ C) cleanup=true ;;
+ D) CTDB_TEST_DIFF_RESULTS=true ;;
+ e) exit_on_fail=true ;;
+ H) no_header=true ;;
+ I) max_iterations="$OPTARG" ; exit_on_fail=true ; with_summary=false ;;
+ l) CTDB_TEST_LOCAL_DAEMONS="$OPTARG" ;;
+ L) CTDB_TEST_PRINT_LOGS_ON_ERROR=true ;;
+ N) with_summary=false ;;
+ q) quiet=true ;;
+ S) CTDB_TEST_SWRAP_SO_PATH="$OPTARG" ;;
+ T) test_time_limit="$OPTARG" ;;
+ v) CTDB_TEST_VERBOSE=true ;;
+ V) test_state_dir="$OPTARG" ;;
+ x) set -x ;;
+ X) CTDB_TEST_COMMAND_TRACE=true ;;
+ \?|h) usage ;;
+ esac
+done
+shift $((OPTIND - 1))
+
+case $(basename "$0") in
+ *run_cluster_tests*)
+ # Running on a cluster... same as -c
+ CTDB_TEST_LOCAL_DAEMONS=""
+ ;;
+esac
+
+if $quiet ; then
+ show_progress() { cat >/dev/null ; }
+else
+ show_progress() { cat ; }
+fi
+
+######################################################################
+
+test_header ()
+{
+ local name="$1"
+
+ echo "--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--"
+ echo "Running test $name ($(date '+%T'))"
+ echo "--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--"
+}
+
+test_footer ()
+{
+ local f="$1"
+ local status="$2"
+ local interp="$3"
+ local duration="$4"
+
+ local statstr=""
+ if [ "$status" -eq 0 ] ; then
+ statstr=""
+ else
+ statstr=" (status $status)"
+ fi
+
+ echo "=========================================================================="
+ echo "TEST ${interp}: ${f}${statstr} (duration: ${duration}s)"
+ echo "=========================================================================="
+}
+
+ctdb_test_run ()
+{
+ local f="$1"
+
+ $no_header || test_header "$f"
+
+ local status=0
+ local start_time
+
+ start_time=$(date '+%s')
+
+ if [ -x "$f" ] ; then
+ timeout "$test_time_limit" "$f" </dev/null | show_progress
+ status=$?
+ else
+ echo "TEST IS NOT EXECUTABLE"
+ status=99
+ fi
+
+ local duration=$(($(date +%s) - start_time))
+
+ tests_total=$((tests_total + 1))
+
+ local interp
+ case "$status" in
+ 0)
+ interp="PASSED"
+ tests_passed=$((tests_passed + 1))
+ ;;
+ 77)
+ interp="SKIPPED"
+ tests_skipped=$((tests_skipped + 1))
+ ;;
+ 99)
+ interp="ERROR"
+ tests_failed=$((tests_failed + 1))
+ ;;
+ 124)
+ interp="TIMEDOUT"
+ tests_failed=$((tests_failed + 1))
+ ;;
+ *)
+ interp="FAILED"
+ tests_failed=$((tests_failed + 1))
+ ;;
+ esac
+
+ $no_header || test_footer "$f" "$status" "$interp" "$duration"
+
+ if $with_summary ; then
+ local t
+ if [ $status -eq 0 ] ; then
+ t=" ${interp}"
+ else
+ t="*${interp}*"
+ fi
+ printf '%-10s %s\n' "$t" "$f" >>"$summary_file"
+ fi
+
+ # Skipped tests should not cause failure
+ case "$status" in
+ 77)
+ status=0
+ ;;
+ esac
+
+ return $status
+}
+
+######################################################################
+
+tests_total=0
+tests_passed=0
+tests_skipped=0
+tests_failed=0
+
+if ! type mktemp >/dev/null 2>&1 ; then
+ # Not perfect, but it will do...
+ mktemp ()
+ {
+ local dir=false
+ if [ "$1" = "-d" ] ; then
+ dir=true
+ fi
+ local t="${TMPDIR:-/tmp}/tmp.$$.$RANDOM"
+ (
+ umask 077
+ if $dir ; then
+ mkdir "$t"
+ else
+ : >"$t"
+ fi
+ )
+ echo "$t"
+ }
+fi
+
+set -o pipefail
+
+run_one_test ()
+{
+ local f="$1"
+
+ CTDB_TEST_SUITE_DIR=$(dirname "$f")
+ export CTDB_TEST_SUITE_DIR
+ # This expands the most probable problem cases like "." and "..".
+ if [ "$(dirname "$CTDB_TEST_SUITE_DIR")" = "." ] ; then
+ CTDB_TEST_SUITE_DIR=$(cd "$CTDB_TEST_SUITE_DIR" && pwd)
+ fi
+
+ # Set CTDB_TEST_TMP_DIR
+ #
+ # Determine the relative test suite subdirectory. The top-level
+ # test directory needs to be a prefix of the test suite directory,
+ # so make absolute versions of both.
+ local test_dir test_suite_dir reldir
+ test_dir=$(cd "$CTDB_TEST_DIR" && pwd)
+ test_suite_dir=$(cd "$CTDB_TEST_SUITE_DIR" && pwd)
+ reldir="${test_suite_dir#"${test_dir}"/}"
+
+ export CTDB_TEST_TMP_DIR="${test_state_dir}/${reldir}"
+ rm -rf "$CTDB_TEST_TMP_DIR"
+ mkdir -p "$CTDB_TEST_TMP_DIR"
+
+ ctdb_test_run "$f"
+ status=$?
+}
+
+run_tests ()
+{
+ local f
+
+ for f ; do
+ case "$f" in
+ */README|*/README.md)
+ continue
+ ;;
+ esac
+
+ if [ ! -e "$f" ] ; then
+ # Can't find it? Check relative to CTDB_TEST_DIR.
+ # Strip off current directory from beginning,
+ # if there, just to make paths more friendly.
+ f="${CTDB_TEST_DIR#"${PWD}"/}/${f}"
+ fi
+
+ if [ -d "$f" ] ; then
+ local test_dir dir reldir subtests
+
+ test_dir=$(cd "$CTDB_TEST_DIR" && pwd)
+ dir=$(cd "$f" && pwd)
+ reldir="${dir#"${test_dir}"/}"
+
+ case "$reldir" in
+ */*/*)
+ die "test \"$f\" is not recognised"
+ ;;
+ */*)
+ # This is a test suite
+ subtests=$(echo "${f%/}/"*".sh")
+ if [ "$subtests" = "${f%/}/*.sh" ] ; then
+ # Probably empty directory
+ die "test \"$f\" is not recognised"
+ fi
+ ;;
+ CLUSTER|INTEGRATION|UNIT)
+ # A collection of test suites
+ subtests=$(echo "${f%/}/"*)
+ ;;
+ *)
+ die "test \"$f\" is not recognised"
+ esac
+
+ # Recurse - word-splitting wanted
+ # shellcheck disable=SC2086
+ run_tests $subtests
+ elif [ -f "$f" ] ; then
+ run_one_test "$f"
+ else
+ # Time to give up
+ die "test \"$f\" is not recognised"
+ fi
+
+ if $exit_on_fail && [ "$status" -ne 0 ] ; then
+ return "$status"
+ fi
+ done
+}
+
+export CTDB_TEST_MODE="yes"
+
+# Following 2 lines may be modified by installation script
+CTDB_TESTS_ARE_INSTALLED=false
+CTDB_TEST_DIR=$(dirname "$0")
+export CTDB_TESTS_ARE_INSTALLED CTDB_TEST_DIR
+
+if [ -z "$test_state_dir" ] ; then
+ if $CTDB_TESTS_ARE_INSTALLED ; then
+ test_state_dir=$(mktemp -d)
+ else
+ test_state_dir="${CTDB_TEST_DIR}/var"
+ fi
+fi
+mkdir -p "$test_state_dir"
+
+summary_file="${test_state_dir}/.summary"
+: >"$summary_file"
+
+export TEST_SCRIPTS_DIR="${CTDB_TEST_DIR}/scripts"
+
+# If no tests specified then run some defaults
+if [ -z "$1" ] ; then
+ if [ -n "$CTDB_TEST_LOCAL_DAEMONS" ] ; then
+ set -- UNIT INTEGRATION
+ else
+ set -- INTEGRATION CLUSTER
+ fi
+fi
+
+do_cleanup ()
+{
+ if $cleanup ; then
+ echo "Removing test state directory: ${test_state_dir}"
+ rm -rf "$test_state_dir"
+ else
+ echo "Not cleaning up test state directory: ${test_state_dir}"
+ fi
+}
+
+trap "do_cleanup ; exit 130" SIGINT
+trap "do_cleanup ; exit 143" SIGTERM
+
+iterations=0
+# Special case: -I 0 means iterate forever (until failure)
+while [ "$max_iterations" -eq 0 ] || [ $iterations -lt "$max_iterations" ] ; do
+ iterations=$((iterations + 1))
+
+ if [ "$max_iterations" -ne 1 ] ; then
+ echo
+ echo "##################################################"
+ echo "ITERATION ${iterations}"
+ echo "##################################################"
+ echo
+ fi
+
+ run_tests "$@"
+ status=$?
+
+ if [ $status -ne 0 ] ; then
+ break
+ fi
+done
+
+if $with_summary ; then
+ if [ "$status" -eq 0 ] || ! $exit_on_fail ; then
+ echo
+ cat "$summary_file"
+
+ echo
+ tests_run=$((tests_total - tests_skipped))
+ printf '%d/%d tests passed' $tests_passed $tests_run
+ if [ $tests_skipped -gt 0 ] ; then
+ printf ' (%d skipped)' $tests_skipped
+ fi
+ printf '\n'
+ fi
+fi
+rm -f "$summary_file"
+
+echo
+
+do_cleanup
+
+if $no_header || $exit_on_fail ; then
+ exit "$status"
+elif [ $tests_failed -gt 0 ] ; then
+ exit 1
+else
+ exit 0
+fi
diff --git a/ctdb/tests/scripts/cluster.bash b/ctdb/tests/scripts/cluster.bash
new file mode 100755
index 0000000..916fc84
--- /dev/null
+++ b/ctdb/tests/scripts/cluster.bash
@@ -0,0 +1,18 @@
+# Hey Emacs, this is a -*- shell-script -*- !!! :-)
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+if ! ctdb_test_on_cluster ; then
+ # Do not run on local daemons
+ ctdb_test_error \
+ "ERROR: This test must be run against a real/virtual cluster"
+fi
+
+h=$(hostname)
+
+for i in $(onnode -q all hostname) ; do
+ if [ "$h" = "$i" ] ; then
+ ctdb_test_error \
+ "ERROR: This test must not be run from a cluster node"
+ fi
+done
diff --git a/ctdb/tests/scripts/common.sh b/ctdb/tests/scripts/common.sh
new file mode 100644
index 0000000..5bc5869
--- /dev/null
+++ b/ctdb/tests/scripts/common.sh
@@ -0,0 +1,146 @@
+# Hey Emacs, this is a -*- shell-script -*- !!! :-)
+
+# Common variables and functions for all CTDB tests.
+
+
+# Commands on different platforms may quote or sort things differently
+# without this
+export LANG=C
+
+# Print a message and exit.
+die ()
+{
+ echo "$1" >&2 ; exit "${2:-1}"
+}
+
+. "${TEST_SCRIPTS_DIR}/script_install_paths.sh"
+
+if [ -d "$CTDB_SCRIPTS_TOOLS_BIN_DIR" ] ; then
+ PATH="${CTDB_SCRIPTS_TOOLS_BIN_DIR}:${PATH}"
+fi
+
+if [ -d "$CTDB_SCRIPTS_TESTS_LIBEXEC_DIR" ] ; then
+ PATH="${CTDB_SCRIPTS_TESTS_LIBEXEC_DIR}:${PATH}"
+fi
+
+ctdb_test_error ()
+{
+ if [ $# -gt 0 ] ; then
+ echo "$*"
+ fi
+ exit 99
+}
+
+ctdb_test_fail ()
+{
+ if [ $# -gt 0 ] ; then
+ echo "$*"
+ fi
+ exit 1
+}
+
+ctdb_test_skip ()
+{
+ if [ $# -gt 0 ] ; then
+ echo "$*"
+ fi
+ exit 77
+}
+
+# "$@" is supported OSes
+ctdb_test_check_supported_OS ()
+{
+ _os=$(uname -s)
+ for _i ; do
+ if [ "$_os" = "$_i" ] ; then
+ return
+ fi
+ done
+
+ ctdb_test_skip "This test is not supported on ${_os}"
+}
+
+# Wait until either timeout expires or command succeeds. The command
+# will be tried once per second, unless timeout has format T/I, where
+# I is the recheck interval.
+wait_until ()
+{
+ _timeout="$1" ; shift # "$@" is the command...
+
+ _interval=1
+ case "$_timeout" in
+ */*)
+ _interval="${_timeout#*/}"
+ _timeout="${_timeout%/*}"
+ esac
+
+ _negate=false
+ if [ "$1" = "!" ] ; then
+ _negate=true
+ shift
+ fi
+
+ printf '<%d|' "$_timeout"
+ _t="$_timeout"
+ while [ "$_t" -gt 0 ] ; do
+ _rc=0
+ "$@" || _rc=$?
+ if { ! $_negate && [ $_rc -eq 0 ] ; } || \
+ { $_negate && [ $_rc -ne 0 ] ; } ; then
+ echo "|$((_timeout - _t))|"
+ echo "OK"
+ return 0
+ fi
+ for _i in $(seq 1 "$_interval") ; do
+ printf '.'
+ done
+ _t=$((_t - _interval))
+ sleep "$_interval"
+ done
+
+ echo "*TIMEOUT*"
+
+ return 1
+}
+
+# setup_ctdb_base <parent> <subdir> [item-to-copy]...
+setup_ctdb_base ()
+{
+ [ $# -ge 2 ] || die "usage: setup_ctdb_base <parent> <subdir> [item]..."
+ # If empty arguments are passed then we attempt to remove /
+ # (i.e. the root directory) below
+ if [ -z "$1" ] || [ -z "$2" ] ; then
+ die "usage: setup_ctdb_base <parent> <subdir> [item]..."
+ fi
+
+ _parent="$1"
+ _subdir="$2"
+
+ # Other arguments are files/directories to copy
+ shift 2
+
+ export CTDB_BASE="${_parent}/${_subdir}"
+ if [ -d "$CTDB_BASE" ] ; then
+ rm -r "$CTDB_BASE"
+ fi
+ mkdir -p "$CTDB_BASE" || die "Failed to create CTDB_BASE=$CTDB_BASE"
+ mkdir -p "${CTDB_BASE}/run" || die "Failed to create ${CTDB_BASE}/run"
+ mkdir -p "${CTDB_BASE}/var" || die "Failed to create ${CTDB_BASE}/var"
+
+ for _i ; do
+ cp -pr "${CTDB_SCRIPTS_BASE}/${_i}" "${CTDB_BASE}/"
+ done
+
+ mkdir -p "${CTDB_BASE}/events/legacy"
+
+ if [ -z "$CTDB_TEST_SUITE_DIR" ] ; then
+ return
+ fi
+
+ for _i in "${CTDB_TEST_SUITE_DIR}/etc-ctdb/"* ; do
+ # No/empty etc-ctdb directory
+ [ -e "$_i" ] || break
+
+ cp -pr "$_i" "${CTDB_BASE}/"
+ done
+}
diff --git a/ctdb/tests/scripts/integration.bash b/ctdb/tests/scripts/integration.bash
new file mode 100644
index 0000000..65e974e
--- /dev/null
+++ b/ctdb/tests/scripts/integration.bash
@@ -0,0 +1,864 @@
+# Hey Emacs, this is a -*- shell-script -*- !!! :-)
+
+. "${TEST_SCRIPTS_DIR}/common.sh"
+
+######################################################################
+
+export CTDB_TIMEOUT=60
+
+if [ -n "$CTDB_TEST_REMOTE_DIR" ] ; then
+ CTDB_TEST_WRAPPER="${CTDB_TEST_REMOTE_DIR}/test_wrap"
+else
+ _d=$(cd "$TEST_SCRIPTS_DIR" && echo "$PWD")
+ CTDB_TEST_WRAPPER="$_d/test_wrap"
+fi
+export CTDB_TEST_WRAPPER
+
+# If $VALGRIND is set then use it whenever ctdb is called, but only if
+# $CTDB is not already set.
+[ -n "$CTDB" ] || export CTDB="${VALGRIND}${VALGRIND:+ }ctdb"
+
+# why???
+PATH="${TEST_SCRIPTS_DIR}:${PATH}"
+
+######################################################################
+
+ctdb_test_on_cluster ()
+{
+ [ -z "$CTDB_TEST_LOCAL_DAEMONS" ]
+}
+
+ctdb_test_exit ()
+{
+ local status=$?
+
+ trap - 0
+
+ # run_tests.sh pipes stdout into tee. If the tee process is
+ # killed then any attempt to write to stdout (e.g. echo) will
+ # result in SIGPIPE, terminating the caller. Ignore SIGPIPE to
+ # ensure that all clean-up is run.
+ trap '' PIPE
+
+ # Avoid making a test fail from this point onwards. The test is
+ # now complete.
+ set +e
+
+ echo "*** TEST COMPLETED (RC=$status) AT $(date '+%F %T'), CLEANING UP..."
+
+ eval "$ctdb_test_exit_hook" || true
+ unset ctdb_test_exit_hook
+
+ echo "Stopping cluster..."
+ ctdb_nodes_stop || ctdb_test_error "Cluster shutdown failed"
+
+ exit $status
+}
+
+ctdb_test_exit_hook_add ()
+{
+ ctdb_test_exit_hook="${ctdb_test_exit_hook}${ctdb_test_exit_hook:+ ; }$*"
+}
+
+# Setting cleanup_pid to <pid>@<node> will cause <pid> to be killed on
+# <node> when the test completes. To cancel, just unset cleanup_pid.
+ctdb_test_cleanup_pid=""
+ctdb_test_cleanup_pid_exit_hook ()
+{
+ if [ -n "$ctdb_test_cleanup_pid" ] ; then
+ local pid="${ctdb_test_cleanup_pid%@*}"
+ local node="${ctdb_test_cleanup_pid#*@}"
+
+ try_command_on_node "$node" "kill ${pid}"
+ fi
+}
+
+ctdb_test_exit_hook_add ctdb_test_cleanup_pid_exit_hook
+
+ctdb_test_cleanup_pid_set ()
+{
+ local node="$1"
+ local pid="$2"
+
+ ctdb_test_cleanup_pid="${pid}@${node}"
+}
+
+ctdb_test_cleanup_pid_clear ()
+{
+ ctdb_test_cleanup_pid=""
+}
+
+# -n option means do not configure/start cluster
+ctdb_test_init ()
+{
+ trap "ctdb_test_exit" 0
+
+ ctdb_nodes_stop >/dev/null 2>&1 || true
+
+ if [ "$1" != "-n" ] ; then
+ echo "Configuring cluster..."
+ setup_ctdb || ctdb_test_error "Cluster configuration failed"
+
+ echo "Starting cluster..."
+ ctdb_init || ctdb_test_error "Cluster startup failed"
+ fi
+
+ echo "*** SETUP COMPLETE AT $(date '+%F %T'), RUNNING TEST..."
+}
+
+ctdb_nodes_start_custom ()
+{
+ if ctdb_test_on_cluster ; then
+ ctdb_test_error "ctdb_nodes_start_custom() on real cluster"
+ fi
+
+ ctdb_nodes_stop >/dev/null 2>&1 || true
+
+ echo "Configuring cluster..."
+ setup_ctdb "$@" || ctdb_test_error "Cluster configuration failed"
+
+ echo "Starting cluster..."
+ ctdb_init || ctdb_test_fail "Cluster startup failed"
+}
+
+ctdb_test_skip_on_cluster ()
+{
+ if ctdb_test_on_cluster ; then
+ ctdb_test_skip \
+ "SKIPPING this test - only runs against local daemons"
+ fi
+}
+
+
+ctdb_nodes_restart ()
+{
+ ctdb_nodes_stop "$@"
+ ctdb_nodes_start "$@"
+}
+
+########################################
+
+# Sets: $out, $outfile
+# * The first 1KB of output is put into $out
+# * Tests should use $outfile for handling large output
+# * $outfile is removed after each test
+out=""
+outfile="${CTDB_TEST_TMP_DIR}/try_command_on_node.out"
+
+outfile_cleanup ()
+{
+ rm -f "$outfile"
+}
+
+ctdb_test_exit_hook_add outfile_cleanup
+
+try_command_on_node ()
+{
+ local nodespec="$1" ; shift
+
+ local verbose=false
+ local onnode_opts=""
+
+ while [ "${nodespec#-}" != "$nodespec" ] ; do
+ if [ "$nodespec" = "-v" ] ; then
+ verbose=true
+ else
+ onnode_opts="${onnode_opts}${onnode_opts:+ }${nodespec}"
+ fi
+ nodespec="$1" ; shift
+ done
+
+ local cmd="$*"
+
+ local status=0
+ # Intentionally unquoted - might be empty
+ # shellcheck disable=SC2086
+ onnode -q $onnode_opts "$nodespec" "$cmd" >"$outfile" 2>&1 || status=$?
+ out=$(dd if="$outfile" bs=1k count=1 2>/dev/null)
+
+ if [ $status -ne 0 ] ; then
+ echo "Failed to execute \"$cmd\" on node(s) \"$nodespec\""
+ cat "$outfile"
+ return $status
+ fi
+
+ if $verbose ; then
+ echo "Output of \"$cmd\":"
+ cat "$outfile" || true
+ fi
+}
+
+_run_onnode ()
+{
+ local thing="$1"
+ shift
+
+ local options nodespec
+
+ while : ; do
+ case "$1" in
+ -*)
+ options="${options}${options:+ }${1}"
+ shift
+ ;;
+ *)
+ nodespec="$1"
+ shift
+ break
+ esac
+ done
+
+ # shellcheck disable=SC2086
+ # $options can be multi-word
+ try_command_on_node $options "$nodespec" "${thing} $*"
+}
+
+ctdb_onnode ()
+{
+ _run_onnode "$CTDB" "$@"
+}
+
+testprog_onnode ()
+{
+ _run_onnode "${CTDB_TEST_WRAPPER} ${VALGRIND}" "$@"
+}
+
+function_onnode ()
+{
+ _run_onnode "${CTDB_TEST_WRAPPER}" "$@"
+}
+
+sanity_check_output ()
+{
+ local min_lines="$1"
+ local regexp="$2" # Should be anchored as necessary.
+
+ local ret=0
+
+ local num_lines
+ num_lines=$(wc -l <"$outfile" | tr -d '[:space:]')
+ echo "There are $num_lines lines of output"
+ if [ "$num_lines" -lt "$min_lines" ] ; then
+ ctdb_test_fail "BAD: that's less than the required number (${min_lines})"
+ fi
+
+ local status=0
+ local unexpected # local doesn't pass through status of command on RHS.
+ unexpected=$(grep -Ev "$regexp" "$outfile") || status=$?
+
+ # Note that this is reversed.
+ if [ $status -eq 0 ] ; then
+ echo "BAD: unexpected lines in output:"
+ echo "$unexpected" | cat -A
+ ret=1
+ else
+ echo "Output lines look OK"
+ fi
+
+ return $ret
+}
+
+select_test_node ()
+{
+ try_command_on_node any ctdb pnn || return 1
+
+ test_node="$out"
+ echo "Selected node ${test_node}"
+}
+
+# This returns a list of "ip node" lines in $outfile
+all_ips_on_node()
+{
+ local node="$1"
+ try_command_on_node "$node" \
+ "$CTDB ip -X | awk -F'|' 'NR > 1 { print \$2, \$3 }'"
+}
+
+_select_test_node_and_ips ()
+{
+ try_command_on_node any \
+ "$CTDB ip -X all | awk -F'|' 'NR > 1 { print \$2, \$3 }'"
+
+ test_node="" # this matches no PNN
+ test_node_ips=""
+ local ip pnn
+ while read -r ip pnn ; do
+ if [ -z "$test_node" ] && [ "$pnn" != "-1" ] ; then
+ test_node="$pnn"
+ fi
+ if [ "$pnn" = "$test_node" ] ; then
+ test_node_ips="${test_node_ips}${test_node_ips:+ }${ip}"
+ fi
+ done <"$outfile"
+
+ echo "Selected node ${test_node} with IPs: ${test_node_ips}."
+ test_ip="${test_node_ips%% *}"
+
+ # test_prefix used by caller
+ # shellcheck disable=SC2034
+ case "$test_ip" in
+ *:*) test_prefix="${test_ip}/128" ;;
+ *) test_prefix="${test_ip}/32" ;;
+ esac
+
+ [ -n "$test_node" ] || return 1
+}
+
+select_test_node_and_ips ()
+{
+ local timeout=10
+ while ! _select_test_node_and_ips ; do
+ echo "Unable to find a test node with IPs assigned"
+ if [ $timeout -le 0 ] ; then
+ ctdb_test_error "BAD: Too many attempts"
+ return 1
+ fi
+ sleep_for 1
+ timeout=$((timeout - 1))
+ done
+
+ return 0
+}
+
+# Sets: mask, iface
+get_test_ip_mask_and_iface ()
+{
+ # Find the interface
+ ctdb_onnode "$test_node" "ip -v -X"
+ iface=$(awk -F'|' -v ip="$test_ip" '$2 == ip { print $4 }' "$outfile")
+
+ if ctdb_test_on_cluster ; then
+ # Find the netmask
+ try_command_on_node "$test_node" ip addr show to "$test_ip"
+ mask="${out##*/}"
+ mask="${mask%% *}"
+ else
+ mask="24"
+ fi
+
+ echo "$test_ip/$mask is on $iface"
+}
+
+ctdb_get_all_pnns ()
+{
+ try_command_on_node -q all "$CTDB pnn"
+ all_pnns="$out"
+}
+
+# The subtlety is that "ctdb delip" will fail if the IP address isn't
+# configured on a node...
+delete_ip_from_all_nodes ()
+{
+ _ip="$1"
+
+ ctdb_get_all_pnns
+
+ _nodes=""
+
+ for _pnn in $all_pnns ; do
+ all_ips_on_node "$_pnn"
+ while read -r _i _ ; do
+ if [ "$_ip" = "$_i" ] ; then
+ _nodes="${_nodes}${_nodes:+,}${_pnn}"
+ fi
+ done <"$outfile"
+ done
+
+ try_command_on_node -pq "$_nodes" "$CTDB delip $_ip"
+}
+
+#######################################
+
+sleep_for ()
+{
+ echo -n "=${1}|"
+ for i in $(seq 1 "$1") ; do
+ echo -n '.'
+ sleep 1
+ done
+ echo '|'
+}
+
+_cluster_is_healthy ()
+{
+ $CTDB nodestatus all >/dev/null
+}
+
+_cluster_is_recovered ()
+{
+ node_has_status 0 recovered
+}
+
+_cluster_is_ready ()
+{
+ _cluster_is_healthy && _cluster_is_recovered
+}
+
+cluster_is_healthy ()
+{
+ if onnode 0 "$CTDB_TEST_WRAPPER" _cluster_is_healthy ; then
+ echo "Cluster is HEALTHY"
+ if ! onnode 0 "$CTDB_TEST_WRAPPER" _cluster_is_recovered ; then
+ echo "WARNING: cluster in recovery mode!"
+ fi
+ return 0
+ fi
+
+ echo "Cluster is UNHEALTHY"
+
+ echo "DEBUG AT $(date '+%F %T'):"
+ local i
+ for i in "onnode -q 0 $CTDB status" \
+ "onnode -q 0 onnode all $CTDB scriptstatus" ; do
+ echo "$i"
+ $i || true
+ done
+
+ return 1
+}
+
+wait_until_ready ()
+{
+ local timeout="${1:-120}"
+
+ echo "Waiting for cluster to become ready..."
+
+ wait_until "$timeout" onnode -q any "$CTDB_TEST_WRAPPER" _cluster_is_ready
+}
+
+# This function is becoming nicely overloaded. Soon it will collapse! :-)
+node_has_status ()
+{
+ local pnn="$1"
+ local status="$2"
+
+ case "$status" in
+ recovered)
+ ! $CTDB status -n "$pnn" | \
+ grep -Eq '^Recovery mode:RECOVERY \(1\)$'
+ return
+ ;;
+ notlmaster)
+ ! $CTDB status | grep -Eq "^hash:.* lmaster:${pnn}\$"
+ return
+ ;;
+ esac
+
+ local bits
+ case "$status" in
+ unhealthy) bits="?|?|?|?|1|*" ;;
+ healthy) bits="?|?|?|?|0|*" ;;
+ disconnected) bits="1|*" ;;
+ connected) bits="0|*" ;;
+ banned) bits="?|?|1|*" ;;
+ unbanned) bits="?|?|0|*" ;;
+ disabled) bits="?|?|?|1|*" ;;
+ enabled) bits="?|?|?|0|*" ;;
+ stopped) bits="?|?|?|?|?|1|*" ;;
+ notstopped) bits="?|?|?|?|?|0|*" ;;
+ *)
+ echo "node_has_status: unknown status \"$status\""
+ return 1
+ esac
+ local out _ line
+
+ out=$($CTDB -X status 2>&1) || return 1
+
+ {
+ read -r _
+ while read -r line ; do
+ # This needs to be done in 2 steps to
+ # avoid false matches.
+ local line_bits="${line#|"${pnn}"|*|}"
+ [ "$line_bits" = "$line" ] && continue
+ # shellcheck disable=SC2295
+ # This depends on $bits being a pattern
+ [ "${line_bits#${bits}}" != "$line_bits" ] && \
+ return 0
+ done
+ return 1
+ } <<<"$out" # Yay bash!
+}
+
+wait_until_node_has_status ()
+{
+ local pnn="$1"
+ local status="$2"
+ local timeout="${3:-30}"
+ local proxy_pnn="${4:-any}"
+
+ echo "Waiting until node $pnn has status \"$status\"..."
+
+ if ! wait_until "$timeout" onnode "$proxy_pnn" \
+ "$CTDB_TEST_WRAPPER" node_has_status "$pnn" "$status" ; then
+
+ for i in "onnode -q any $CTDB status" "onnode -q any onnode all $CTDB scriptstatus" ; do
+ echo "$i"
+ $i || true
+ done
+
+ return 1
+ fi
+
+}
+
+# Useful for superficially testing IP failover.
+# IPs must be on the given node.
+# If the first argument is '!' then the IPs must not be on the given node.
+ips_are_on_node ()
+{
+ local negating=false
+ if [ "$1" = "!" ] ; then
+ negating=true ; shift
+ fi
+ local node="$1" ; shift
+ local ips="$*"
+
+ local out
+
+ all_ips_on_node "$node"
+
+ local check
+ for check in $ips ; do
+ local ip pnn
+ while read -r ip pnn ; do
+ if [ "$check" = "$ip" ] ; then
+ if [ "$pnn" = "$node" ] ; then
+ if $negating ; then return 1 ; fi
+ else
+ if ! $negating ; then return 1 ; fi
+ fi
+ ips="${ips/${ip}}" # Remove from list
+ break
+ fi
+ # If we're negating and we didn't see the address then it
+ # isn't hosted by anyone!
+ if $negating ; then
+ ips="${ips/${check}}"
+ fi
+ done <"$outfile"
+ done
+
+ ips="${ips// }" # Remove any spaces.
+ [ -z "$ips" ]
+}
+
+wait_until_ips_are_on_node ()
+{
+ # Go to some trouble to print a use description of what is happening
+ local not=""
+ if [ "$1" == "!" ] ; then
+ not="no longer "
+ fi
+ local node=""
+ local ips=""
+ local i
+ for i ; do
+ [ "$i" != "!" ] || continue
+ if [ -z "$node" ] ; then
+ node="$i"
+ continue
+ fi
+ ips="${ips}${ips:+, }${i}"
+ done
+ echo "Waiting for ${ips} to ${not}be assigned to node ${node}"
+
+ wait_until 60 ips_are_on_node "$@"
+}
+
+node_has_some_ips ()
+{
+ local node="$1"
+
+ local out
+
+ all_ips_on_node "$node"
+
+ while read -r ip pnn ; do
+ if [ "$node" = "$pnn" ] ; then
+ return 0
+ fi
+ done <"$outfile"
+
+ return 1
+}
+
+wait_until_node_has_some_ips ()
+{
+ echo "Waiting for some IPs to be assigned to node ${test_node}"
+
+ wait_until 60 node_has_some_ips "$@"
+}
+
+wait_until_node_has_no_ips ()
+{
+ echo "Waiting until no IPs are assigned to node ${test_node}"
+
+ wait_until 60 ! node_has_some_ips "$@"
+}
+
+#######################################
+
+ctdb_init ()
+{
+ if ! ctdb_nodes_start ; then
+ echo "Cluster start failed"
+ return 1
+ fi
+
+ if ! wait_until_ready 120 ; then
+ echo "Cluster didn't become ready"
+ return 1
+ fi
+
+ echo "Setting RerecoveryTimeout to 1"
+ onnode -pq all "$CTDB setvar RerecoveryTimeout 1"
+
+ echo "Forcing a recovery..."
+ onnode -q 0 "$CTDB recover"
+ sleep_for 2
+
+ if ! onnode -q all "$CTDB_TEST_WRAPPER _cluster_is_recovered" ; then
+ echo "Cluster has gone into recovery again, waiting..."
+ if ! wait_until 30/2 onnode -q all \
+ "$CTDB_TEST_WRAPPER _cluster_is_recovered" ; then
+ echo "Cluster did not come out of recovery"
+ return 1
+ fi
+ fi
+
+ if ! onnode 0 "$CTDB_TEST_WRAPPER _cluster_is_healthy" ; then
+ echo "Cluster became UNHEALTHY again [$(date)]"
+ return 1
+ fi
+
+ echo "Doing a sync..."
+ onnode -q 0 "$CTDB sync"
+
+ echo "ctdb is ready"
+ return 0
+}
+
+ctdb_base_show ()
+{
+ echo "${CTDB_BASE:-${CTDB_SCRIPTS_BASE}}"
+}
+
+#######################################
+
+# sets: leader
+_leader_get ()
+{
+ local node="$1"
+
+ ctdb_onnode "$node" leader
+ # shellcheck disable=SC2154
+ # $out set by ctdb_onnode() above
+ leader="$out"
+}
+
+leader_get ()
+{
+ local node="$1"
+
+ echo "Get leader"
+ _leader_get "$node"
+ echo "Leader is ${leader}"
+ echo
+}
+
+_leader_has_changed ()
+{
+ local node="$1"
+ local leader_old="$2"
+
+ _leader_get "$node"
+
+ [ "$leader" != "$leader_old" ]
+}
+
+# uses: leader
+wait_until_leader_has_changed ()
+{
+ local node="$1"
+
+ echo
+ echo "Wait until leader changes..."
+ wait_until 30 _leader_has_changed "$node" "$leader"
+ echo "Leader changed to ${leader}"
+}
+
+#######################################
+
+# sets: generation
+_generation_get ()
+{
+ local node="$1"
+
+ ctdb_onnode "$node" status
+ # shellcheck disable=SC2154
+ # $outfile set by ctdb_onnode() above
+ generation=$(sed -n -e 's/^Generation:\([0-9]*\)/\1/p' "$outfile")
+}
+
+generation_get ()
+{
+ local node="$1"
+
+ echo "Get generation"
+ _generation_get "$node"
+ echo "Generation is ${generation}"
+ echo
+}
+
+_generation_has_changed ()
+{
+ local node="$1"
+ local generation_old="$2"
+
+ _generation_get "$node"
+
+ [ "$generation" != "$generation_old" ]
+}
+
+# uses: generation
+wait_until_generation_has_changed ()
+{
+ local node="$1"
+
+ echo "Wait until generation changes..."
+ wait_until 30 _generation_has_changed "$node" "$generation"
+ echo "Generation changed to ${generation}"
+ echo
+}
+
+#######################################
+
+wait_for_monitor_event ()
+{
+ local pnn="$1"
+ local timeout=120
+
+ echo "Waiting for a monitor event on node ${pnn}..."
+
+ ctdb_onnode "$pnn" scriptstatus || {
+ echo "Unable to get scriptstatus from node $pnn"
+ return 1
+ }
+
+ mv "$outfile" "${outfile}.orig"
+
+ wait_until 120 _ctdb_scriptstatus_changed
+}
+
+_ctdb_scriptstatus_changed ()
+{
+ ctdb_onnode "$pnn" scriptstatus || {
+ echo "Unable to get scriptstatus from node $pnn"
+ return 1
+ }
+
+ ! diff "$outfile" "${outfile}.orig" >/dev/null
+}
+
+#######################################
+
+# If the given IP is hosted then print 2 items: maskbits and iface
+ip_maskbits_iface ()
+{
+ _addr="$1"
+
+ case "$_addr" in
+ *:*) _family="inet6" ; _bits=128 ;;
+ *) _family="inet" ; _bits=32 ;;
+ esac
+
+ # Literal backslashes in awk script
+ # shellcheck disable=SC1004
+ ip addr show to "${_addr}/${_bits}" 2>/dev/null | \
+ awk -v family="${_family}" \
+ 'NR == 1 { iface = $2; sub(":$", "", iface) } \
+ $1 ~ /inet/ { mask = $2; sub(".*/", "", mask); \
+ print mask, iface, family }'
+}
+
+drop_ip ()
+{
+ _addr="${1%/*}" # Remove optional maskbits
+
+ # Intentional word splitting
+ # shellcheck disable=SC2046,SC2086
+ set -- $(ip_maskbits_iface $_addr)
+ if [ -n "$1" ] ; then
+ _maskbits="$1"
+ _iface="$2"
+ echo "Removing public address $_addr/$_maskbits from device $_iface"
+ ip addr del "$_ip/$_maskbits" dev "$_iface" >/dev/null 2>&1 || true
+ fi
+}
+
+drop_ips ()
+{
+ for _ip ; do
+ drop_ip "$_ip"
+ done
+}
+
+#######################################
+
+# $1: pnn, $2: DB name
+db_get_path ()
+{
+ ctdb_onnode -v "$1" "getdbstatus $2" | sed -n -e "s@^path: @@p"
+}
+
+# $1: pnn, $2: DB name
+db_ctdb_cattdb_count_records ()
+{
+ # Count the number of keys, excluding any that begin with '_'.
+ # This excludes at least the sequence number record in
+ # persistent/replicated databases. The trailing "|| :" forces
+ # the command to succeed when no records are matched.
+ ctdb_onnode "$1" "cattdb $2 | grep -c '^key([0-9][0-9]*) = \"[^_]' || :"
+ echo "$out"
+}
+
+# $1: pnn, $2: DB name, $3: key string, $4: value string, $5: RSN (default 7)
+db_ctdb_tstore ()
+{
+ _tdb=$(db_get_path "$1" "$2")
+ _rsn="${5:-7}"
+ ctdb_onnode "$1" tstore "$_tdb" "$3" "$4" "$_rsn"
+}
+
+# $1: pnn, $2: DB name, $3: dbseqnum (must be < 255!!!!!)
+db_ctdb_tstore_dbseqnum ()
+{
+ # "__db_sequence_number__" + trailing 0x00
+ _key='0x5f5f64625f73657175656e63655f6e756d6265725f5f00'
+
+ # Construct 8 byte (unit64_t) database sequence number. This
+ # probably breaks if $3 > 255
+ _value=$(printf "0x%02x%014x" "$3" 0)
+
+ db_ctdb_tstore "$1" "$2" "$_key" "$_value"
+}
+
+########################################
+
+# Make sure that $CTDB is set.
+if [ -z "$CTDB" ] ; then
+ CTDB="ctdb"
+fi
+
+if ctdb_test_on_cluster ; then
+ . "${TEST_SCRIPTS_DIR}/integration_real_cluster.bash"
+else
+ . "${TEST_SCRIPTS_DIR}/integration_local_daemons.bash"
+fi
+
+
+local="${CTDB_TEST_SUITE_DIR}/scripts/local.bash"
+if [ -r "$local" ] ; then
+ . "$local"
+fi
diff --git a/ctdb/tests/scripts/integration_local_daemons.bash b/ctdb/tests/scripts/integration_local_daemons.bash
new file mode 100644
index 0000000..643fc5e
--- /dev/null
+++ b/ctdb/tests/scripts/integration_local_daemons.bash
@@ -0,0 +1,95 @@
+# Hey Emacs, this is a -*- shell-script -*- !!! :-)
+
+hdir="$CTDB_SCRIPTS_HELPER_BINDIR"
+export CTDB_EVENT_HELPER="${hdir}/ctdb-event"
+
+if $CTDB_TESTS_ARE_INSTALLED ; then
+ # Find it in $PATH
+ helper="ctdb_local_daemons"
+else
+ helper="${CTDB_TEST_DIR}/local_daemons.sh"
+fi
+
+ctdb_local_daemons="${helper} ${CTDB_TEST_TMP_DIR}"
+
+#######################################
+
+setup_ctdb ()
+{
+ local no_event_scripts=false
+
+ # All other options are passed through to local_daemons.sh setup
+ case "$1" in
+ --no-event-scripts) no_event_scripts=true ; shift ;;
+ esac
+
+ $ctdb_local_daemons setup "$@" \
+ -n "$CTDB_TEST_LOCAL_DAEMONS" \
+ ${CTDB_USE_IPV6:+-6} \
+ ${CTDB_TEST_SWRAP_SO_PATH:+-S ${CTDB_TEST_SWRAP_SO_PATH}}
+ # Burying the above in an if-statement condition reduces readability.
+ # shellcheck disable=SC2181
+ if [ $? -ne 0 ] ; then
+ exit 1
+ fi
+
+ if $no_event_scripts ; then
+ # Want CTDB_BASE expanded when executed under onnode
+ # shellcheck disable=SC2016
+ $ctdb_local_daemons onnode -q all \
+ 'rm "${CTDB_BASE}/events/legacy/"*'
+ fi
+
+ if $CTDB_TEST_PRINT_LOGS_ON_ERROR ; then
+ ctdb_test_exit_hook_add _print_logs_on_test_failure
+ fi
+}
+
+ctdb_nodes_start ()
+{
+ local nodespec="${1:-all}"
+
+ $ctdb_local_daemons start "$nodespec"
+}
+
+ctdb_nodes_stop ()
+{
+ local nodespec="${1:-all}"
+
+ if $ctdb_local_daemons stop "$nodespec" ; then
+ return 0
+ fi
+
+ # Failed, dump logs?
+ if $CTDB_TEST_PRINT_LOGS_ON_ERROR ; then
+ _print_logs
+ fi
+
+ # Next level up can log the error...
+ return 1
+}
+
+onnode ()
+{
+ $ctdb_local_daemons onnode "$@"
+}
+
+
+
+_print_logs ()
+{
+ echo "*** LOG START --------------------"
+ $ctdb_local_daemons print-log all | tail -n 500
+ echo "*** LOG END --------------------"
+}
+
+_print_logs_on_test_failure ()
+{
+ # This is called from ctdb_test_exit() where $status is available
+ # shellcheck disable=SC2154
+ if [ "$status" -eq 0 ] ; then
+ return
+ fi
+
+ _print_logs
+}
diff --git a/ctdb/tests/scripts/integration_real_cluster.bash b/ctdb/tests/scripts/integration_real_cluster.bash
new file mode 100644
index 0000000..8d3f68a
--- /dev/null
+++ b/ctdb/tests/scripts/integration_real_cluster.bash
@@ -0,0 +1,53 @@
+# Hey Emacs, this is a -*- shell-script -*- !!! :-)
+
+#######################################
+
+# Enables all of the event scripts used in cluster tests, except for
+# the mandatory scripts
+_ctdb_enable_cluster_test_event_scripts ()
+{
+ local scripts="
+ 06.nfs
+ 10.interface
+ 49.winbind
+ 50.samba
+ 60.nfs
+ "
+
+ local s
+ for s in $scripts ; do
+ try_command_on_node all ctdb event script enable legacy "$s"
+ done
+}
+
+setup_ctdb ()
+{
+ _ctdb_enable_cluster_test_event_scripts
+}
+
+#######################################
+
+_service_ctdb ()
+{
+ cmd="$1"
+
+ if [ -e /etc/redhat-release ] ; then
+ service ctdb "$cmd"
+ else
+ /etc/init.d/ctdb "$cmd"
+ fi
+}
+
+# Stop/start CTDB on all nodes. Override for local daemons.
+ctdb_nodes_stop ()
+{
+ local nodespec="${1:-all}"
+
+ onnode -p "$nodespec" "$CTDB_TEST_WRAPPER" _service_ctdb stop
+}
+ctdb_nodes_start ()
+{
+ local nodespec="${1:-all}"
+
+ onnode -p "$nodespec" "$CTDB_TEST_WRAPPER" _service_ctdb start
+}
diff --git a/ctdb/tests/scripts/script_install_paths.sh b/ctdb/tests/scripts/script_install_paths.sh
new file mode 100644
index 0000000..6890cf8
--- /dev/null
+++ b/ctdb/tests/scripts/script_install_paths.sh
@@ -0,0 +1,67 @@
+# Hey Emacs, this is a -*- shell-script -*- !!! :-)
+
+# Sets $bin_dir
+find_bin_dir ()
+{
+ _ctdb_dir="$1"
+
+ bin_dir="$(pwd -P)/bin"
+ if [ -d "$(pwd -P)/bin" ] ; then
+ return
+ fi
+
+ bin_dir="${_ctdb_dir}/bin"
+ if [ -d "$bin_dir" ] ; then
+ return
+ fi
+
+ bin_dir="$(dirname "${_ctdb_dir}")/bin"
+ if [ -d "$bin_dir" ] ; then
+ return
+ fi
+
+ die "Unable to locate bin/ subdirectory"
+}
+
+
+if ! $CTDB_TESTS_ARE_INSTALLED ; then
+ if [ ! -f "${CTDB_TEST_DIR}/run_tests.sh" ] ; then
+ die "Tests not installed but can't find run_tests.sh"
+ fi
+
+ ctdb_dir=$(cd -P "$(dirname "$CTDB_TEST_DIR")" && pwd) # real path
+
+ find_bin_dir "$ctdb_dir"
+
+ CTDB_SCRIPTS_BASE="${ctdb_dir}/config"
+ CTDB_SCRIPTS_INIT_SCRIPT="${ctdb_dir}/config/ctdb.init"
+ CTDB_SCRIPTS_SBIN_DIR="${ctdb_dir}/config"
+ CTDB_SCRIPTS_TOOLS_BIN_DIR="${ctdb_dir}/tools"
+ CTDB_SCRIPTS_TOOLS_HELPER_DIR="${ctdb_dir}/tools"
+ CTDB_SCRIPTS_HELPER_BINDIR="$bin_dir"
+ CTDB_SCRIPTS_DATA_DIR="${ctdb_dir}/config"
+ CTDB_SCRIPTS_TESTS_LIBEXEC_DIR="$bin_dir"
+ CTDB_SCRIPTS_TESTS_BIN_DIR="$CTDB_TEST_DIR"
+else
+ # Installed
+ CTDB_SCRIPTS_BASE="/usr/local/etc/ctdb"
+ CTDB_SCRIPTS_INIT_SCRIPT="" # No ideas here... this is a packaging choice
+ CTDB_SCRIPTS_SBIN_DIR="/usr/local/sbin"
+ CTDB_SCRIPTS_TOOLS_BIN_DIR="/usr/local/bin"
+ CTDB_SCRIPTS_TOOLS_HELPER_DIR="/usr/local/libexec/ctdb"
+ CTDB_SCRIPTS_HELPER_BINDIR="/usr/local/libexec/ctdb"
+ CTDB_SCRIPTS_DATA_DIR="/usr/local/share/ctdb"
+ CTDB_SCRIPTS_TESTS_LIBEXEC_DIR="/usr/local/libexec/ctdb/tests"
+ CTDB_SCRIPTS_TESTS_BIN_DIR="/usr/local/bin"
+fi
+
+export CTDB_SCRIPTS_BASE \
+ CTDB_SCRIPTS_BIN_DIR \
+ CTDB_SCRIPTS_INIT_SCRIPT \
+ CTDB_SCRIPTS_SBIN_DIR \
+ CTDB_SCRIPTS_TOOLS_BIN_DIR \
+ CTDB_SCRIPTS_TOOLS_HELPER_DIR \
+ CTDB_SCRIPTS_HELPER_BINDIR \
+ CTDB_SCRIPTS_DATA_DIR \
+ CTDB_SCRIPTS_TESTS_LIBEXEC_DIR \
+ CTDB_SCRIPTS_TESTS_BIN_DIR
diff --git a/ctdb/tests/scripts/test_wrap b/ctdb/tests/scripts/test_wrap
new file mode 100755
index 0000000..619ac7c
--- /dev/null
+++ b/ctdb/tests/scripts/test_wrap
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+
+# Execute the given command. The intention is that it is either
+# * a function from "${TEST_SCRIPTS_DIR}/integration.bash"; or
+# * a test helper binary
+
+TEST_SCRIPTS_DIR=$(dirname "$0")
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+"$@"
diff --git a/ctdb/tests/scripts/unit.sh b/ctdb/tests/scripts/unit.sh
new file mode 100644
index 0000000..403ee07
--- /dev/null
+++ b/ctdb/tests/scripts/unit.sh
@@ -0,0 +1,267 @@
+# Hey Emacs, this is a -*- shell-script -*- !!! :-)
+
+. "${TEST_SCRIPTS_DIR}/common.sh"
+
+# Common variables and functions for CTDB unit tests.
+
+trap -- '' PIPE
+
+# Set the required result for a test.
+# - Argument 1 is exit code.
+# - Argument 2, if present is the required test output but "--"
+# indicates empty output.
+# If argument 2 is not present or null then read required test output
+# from stdin.
+required_result()
+{
+ required_rc="${1:-0}"
+ if [ -n "$2" ]; then
+ if [ "$2" = "--" ]; then
+ required_output=""
+ else
+ # Use a sub-shell to strip trailing newlines.
+ # They can't be matched anyway because the
+ # test is run in a sub-shell, which strips
+ # trailing newlines.
+ # shellcheck disable=SC2116
+ required_output=$(echo "$2")
+ fi
+ else
+ if ! tty -s; then
+ required_output=$(cat)
+ else
+ required_output=""
+ fi
+ fi
+}
+
+required_error()
+{
+ rc=$(errcode "$1")
+ shift
+ required_result "$rc" "$@"
+}
+
+ok()
+{
+ required_result 0 "$@"
+}
+
+ok_null()
+{
+ ok --
+}
+
+reset_extra_header()
+{
+ # Re-define this function to output extra header information
+ extra_header()
+ {
+ :
+ }
+}
+
+reset_extra_footer()
+{
+ # Re-define this function to output extra footer information
+ extra_footer()
+ {
+ :
+ }
+}
+
+reset_extra_header
+reset_extra_footer
+
+result_print()
+{
+ _passed="$1"
+ _out="$2"
+ _rc="$3"
+
+ if "$CTDB_TEST_VERBOSE" || ! $_passed; then
+ extra_header
+
+ cat <<EOF
+--------------------------------------------------
+Output (Exit status: ${_rc}):
+--------------------------------------------------
+EOF
+ # Avoid echo, which might expand unintentional escapes
+ printf '%s\n' "$_out" |
+ result_filter |
+ cat "${CTDB_TEST_CAT_RESULTS_OPTS:--}"
+ fi
+
+ if ! $_passed; then
+ cat <<EOF
+--------------------------------------------------
+Required output (Exit status: ${required_rc}):
+--------------------------------------------------
+EOF
+ # Avoid echo, which might expand unintentional escapes
+ printf '%s\n' "$required_output" |
+ cat "${CTDB_TEST_CAT_RESULTS_OPTS:--}"
+
+ if $CTDB_TEST_DIFF_RESULTS; then
+ _outr=$(mktemp)
+ # Avoid echo, which might expand unintentional escapes
+ printf '%s\n' "$required_output" >"$_outr"
+
+ _outf=$(mktemp)
+ # Avoid echo, which might expand unintentional escapes
+ printf '%s\n' "$_fout" >"$_outf"
+
+ cat <<EOF
+--------------------------------------------------
+Diff:
+--------------------------------------------------
+EOF
+ diff -u "$_outr" "$_outf" | cat -A
+ rm "$_outr" "$_outf"
+ fi
+ fi
+}
+
+result_footer()
+{
+ _passed="$1"
+
+ if "$CTDB_TEST_VERBOSE" || ! $_passed; then
+ extra_footer
+ fi
+
+ if $_passed; then
+ echo "PASSED"
+ return 0
+ else
+ echo
+ echo "FAILED"
+ return 1
+ fi
+}
+
+# Result filtering is (usually) used to replace the date/time/PID
+# prefix on some CTDB tool/client log messages with the literal string
+# "DATE TIME [PID]". This allows tests to loosely match this output,
+# since it can't otherwise be matched.
+result_filter_default()
+{
+ _date_time_pid='[0-9/][0-9/]*\ [0-9:\.][0-9:\.]*\ \[[\ 0-9][\ 0-9]*\]'
+ sed -e "s@^${_date_time_pid}:@DATE\ TIME\ \[PID\]:@"
+}
+# Used in testcases
+# shellcheck disable=SC2034
+TEST_DATE_STAMP=""
+
+# Override this function to customise output filtering.
+result_filter()
+{
+ result_filter_default
+}
+
+result_check()
+{
+ _rc=$?
+
+ # Avoid echo, which might expand unintentional escapes
+ _fout=$(printf '%s\n' "$_out" | result_filter)
+
+ if [ "$_fout" = "$required_output" ] &&
+ [ "$_rc" = "$required_rc" ]; then
+ _passed=true
+ else
+ _passed=false
+ fi
+
+ result_print "$_passed" "$_out" "$_rc"
+ result_footer "$_passed"
+}
+
+test_fail()
+{
+ _passed=false
+ return 1
+}
+
+test_case_string=""
+test_case()
+{
+ test_case_string="$*"
+}
+
+test_header_default()
+{
+ echo "=================================================="
+ if [ -n "$test_case_string" ]; then
+ echo "Summary: ${test_case_string}"
+ test_case_string=""
+ fi
+ echo "Running: $*"
+}
+
+reset_test_header()
+{
+ # Re-define this function to get different header
+ test_header()
+ {
+ test_header_default "$@"
+ }
+}
+
+reset_test_header
+
+# Simple test harness for running binary unit tests
+unit_test()
+{
+ test_header "$@"
+
+ _wrapper="$VALGRIND"
+ if $CTDB_TEST_COMMAND_TRACE; then
+ _wrapper="strace"
+ fi
+ _out=$($_wrapper "$@" 2>&1)
+
+ result_check || exit $?
+}
+
+# Simple test harness for running shell script unit tests
+script_test()
+{
+ test_header "$@"
+
+ _shell=""
+ if ${CTDB_TEST_COMMAND_TRACE}; then
+ _shell="sh -x"
+ else
+ _shell="sh"
+ fi
+
+ _out=$($_shell "$@" 2>&1)
+
+ result_check || exit $?
+}
+
+# Simple test harness for running tests without tracing
+unit_test_notrace()
+{
+ test_header "$@"
+
+ _out=$("$@" 2>&1)
+
+ result_check || exit $?
+}
+
+test_cleanup_hooks=""
+
+test_cleanup()
+{
+ test_cleanup_hooks="${test_cleanup_hooks}${test_cleanup_hooks:+ ; }$*"
+}
+
+trap 'eval $test_cleanup_hooks' 0
+
+local="${CTDB_TEST_SUITE_DIR}/scripts/local.sh"
+if [ -r "$local" ]; then
+ . "$local"
+fi
diff --git a/ctdb/tests/src/cluster_mutex_test.c b/ctdb/tests/src/cluster_mutex_test.c
new file mode 100644
index 0000000..2576163
--- /dev/null
+++ b/ctdb/tests/src/cluster_mutex_test.c
@@ -0,0 +1,844 @@
+/*
+ CTDB cluster mutex test
+
+ Copyright (C) Martin Schwenke 2019
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/wait.h"
+
+#include <assert.h>
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/util.h"
+#include "lib/util/smb_strtox.h"
+
+#include "tests/src/test_backtrace.h"
+
+/*
+ * ctdb_cluster_mutex.c is included below. This requires a few hacks...
+ */
+
+/* Avoid inclusion of ctdb_private.h */
+#define _CTDB_PRIVATE_H
+
+/* Fake ctdb_context */
+struct ctdb_context {
+ struct tevent_context *ev;
+};
+
+/*
+ * ctdb_fork() and ctdb_kill() are used in ctdb_cluster_mutex.c for
+ * safer tracking of PIDs. Fake them here to avoid dragging in the
+ * world.
+ */
+
+static pid_t ctdb_fork(struct ctdb_context *ctdb)
+{
+ return fork();
+}
+
+static int ctdb_kill(struct ctdb_context *ctdb, pid_t pid, int signum)
+{
+ /*
+ * Tests need to wait for the child to exit to ensure that the
+ * lock really has been released. The PID is only accessible
+ * in ctdb_cluster_mutex.c, so make a best attempt to ensure
+ * that the child process is waited for after it is killed.
+ * Avoid waiting if the process is already gone.
+ */
+ int ret;
+
+ if (signum == 0) {
+ return kill(pid, signum);
+ }
+
+ ret = kill(pid, signum);
+ waitpid(pid, NULL, 0);
+
+ return ret;
+}
+
+#include "server/ctdb_cluster_mutex.c"
+
+/*
+ * Mutex testing support
+ */
+
+struct mutex_handle {
+ bool done;
+ bool locked;
+ struct ctdb_cluster_mutex_handle *h;
+};
+
+struct do_lock_context {
+ struct mutex_handle *mh;
+ struct ctdb_context *ctdb;
+};
+
+static void do_lock_handler(char status, double latency, void *private_data)
+{
+ struct do_lock_context *dl = talloc_get_type_abort(
+ private_data, struct do_lock_context);
+ struct mutex_handle *mh;
+
+ assert(dl->mh != NULL);
+ mh = dl->mh;
+
+ mh->locked = (status == '0') ;
+
+ /*
+ * If unsuccessful then ensure the process has exited and that
+ * the file descriptor event handler has been cancelled
+ */
+ if (! mh->locked) {
+ TALLOC_FREE(mh->h);
+ }
+
+ switch (status) {
+ case '0':
+ printf("LOCK\n");
+ break;
+
+ case '1':
+ printf("CONTENTION\n");
+ break;
+
+ case '2':
+ printf("TIMEOUT\n");
+ break;
+
+ default:
+ printf("ERROR\n");
+ }
+
+ fflush(stdout);
+ mh->done = true;
+}
+
+static void do_lock_lost_handler(void *private_data)
+{
+ struct do_lock_context *dl = talloc_get_type_abort(
+ private_data, struct do_lock_context);
+
+ printf("LOST\n");
+ fflush(stdout);
+ TALLOC_FREE(dl->mh);
+}
+
+static void do_lock_take(struct do_lock_context *dl,
+ const char *mutex_string)
+{
+ struct ctdb_cluster_mutex_handle *h;
+
+ dl->mh = talloc_zero(dl, struct mutex_handle);
+ assert(dl->mh != NULL);
+
+ h = ctdb_cluster_mutex(dl->mh,
+ dl->ctdb,
+ mutex_string,
+ 120,
+ do_lock_handler,
+ dl,
+ do_lock_lost_handler,
+ dl);
+ assert(h != NULL);
+
+ dl->mh->h = h;
+}
+
+static void do_lock_wait_done(struct do_lock_context *dl)
+{
+ assert(dl->mh != NULL);
+
+ while (! dl->mh->done) {
+ tevent_loop_once(dl->ctdb->ev);
+ }
+}
+
+static void do_lock_check(struct do_lock_context *dl)
+{
+ assert(dl->mh != NULL);
+
+ if (! dl->mh->locked) {
+ printf("NOLOCK\n");
+ fflush(stdout);
+ TALLOC_FREE(dl->mh);
+ }
+}
+
+static void do_lock(struct do_lock_context *dl,
+ const char *mutex_string)
+{
+ do_lock_take(dl, mutex_string);
+
+ do_lock_wait_done(dl);
+
+ do_lock_check(dl);
+}
+
+static void do_unlock(struct do_lock_context *dl)
+{
+ if (dl->mh == NULL) {
+ return;
+ }
+
+ if (! dl->mh->done) {
+ /*
+ * Taking of lock still in progress. Free the cluster
+ * mutex handle to release it but leave the lock
+ * handle in place to allow taking of the lock to
+ * fail.
+ */
+ printf("CANCEL\n");
+ fflush(stdout);
+ TALLOC_FREE(dl->mh->h);
+ dl->mh->done = true;
+ dl->mh->locked = false;
+ return;
+ }
+
+ printf("UNLOCK\n");
+ fflush(stdout);
+ TALLOC_FREE(dl->mh);
+}
+
+static void wait_handler(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t,
+ void *private_data)
+{
+ bool *done = (bool *)private_data;
+
+ *done = true;
+}
+
+static void do_lock_wait_time(struct do_lock_context *dl,
+ unsigned long wait_time)
+{
+ struct tevent_timer *tt;
+ bool done = false;
+
+ tt = tevent_add_timer(dl->ctdb->ev,
+ dl,
+ tevent_timeval_current_ofs(wait_time, 0),
+ wait_handler,
+ &done);
+ assert(tt != NULL);
+
+ while (!done && dl->mh != NULL) {
+ tevent_loop_once(dl->ctdb->ev);
+ }
+}
+
+/*
+ * Testcases
+ */
+
+static void test_lock_unlock(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ const char *mutex_string)
+{
+ struct do_lock_context *dl;
+
+ dl = talloc_zero(mem_ctx, struct do_lock_context);
+ assert(dl != NULL);
+ dl->ctdb = ctdb;
+
+ /* LOCK */
+ do_lock(dl, mutex_string);
+ assert(dl->mh != NULL);
+
+ /* UNLOCK */
+ do_unlock(dl);
+ assert(dl->mh == NULL);
+}
+
+static void test_lock_lock_unlock(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ const char *mutex_string)
+{
+ struct do_lock_context *dl1;
+ struct do_lock_context *dl2;
+
+ dl1 = talloc_zero(mem_ctx, struct do_lock_context);
+ assert(dl1 != NULL);
+ dl1->ctdb = ctdb;
+
+ dl2 = talloc_zero(mem_ctx, struct do_lock_context);
+ assert(dl2 != NULL);
+ dl2->ctdb = ctdb;
+
+ /* LOCK */
+ do_lock(dl1, mutex_string);
+ assert(dl1->mh != NULL);
+
+ /* CONTENTION */
+ do_lock(dl2, mutex_string);
+ assert(dl2->mh == NULL);
+
+ /* UNLOCK */
+ do_unlock(dl1);
+ assert(dl1->mh == NULL);
+}
+
+static void test_lock_unlock_lock_unlock(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ const char *mutex_string)
+{
+ struct do_lock_context *dl1;
+ struct do_lock_context *dl2;
+
+ dl1 = talloc_zero(mem_ctx, struct do_lock_context);
+ assert(dl1 != NULL);
+ dl1->ctdb = ctdb;
+
+ dl2 = talloc_zero(mem_ctx, struct do_lock_context);
+ assert(dl2 != NULL);
+ dl2->ctdb = ctdb;
+
+ /* LOCK */
+ do_lock(dl1, mutex_string);
+ assert(dl1->mh != NULL);
+
+ /* UNLOCK */
+ do_unlock(dl1);
+ assert(dl1->mh == NULL);
+
+ /* LOCK */
+ do_lock(dl2, mutex_string);
+ assert(dl2->mh != NULL);
+
+ /* UNLOCK */
+ do_unlock(dl2);
+ assert(dl2->mh == NULL);
+}
+
+static void test_lock_cancel_check(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ const char *mutex_string)
+{
+ struct do_lock_context *dl;
+
+ dl = talloc_zero(mem_ctx, struct do_lock_context);
+ assert(dl != NULL);
+ dl->ctdb = ctdb;
+
+ do_lock_take(dl, mutex_string);
+ assert(dl->mh != NULL);
+
+ /* CANCEL */
+ do_unlock(dl);
+ assert(dl->mh != NULL);
+
+ do_lock_wait_done(dl);
+
+ /* NOLOCK */
+ do_lock_check(dl);
+ assert(dl->mh == NULL);
+}
+
+static void test_lock_cancel_unlock(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ const char *mutex_string)
+{
+ struct do_lock_context *dl;
+
+ dl = talloc_zero(mem_ctx, struct do_lock_context);
+ assert(dl != NULL);
+ dl->ctdb = ctdb;
+
+ do_lock_take(dl, mutex_string);
+ assert(dl->mh != NULL);
+
+ /* CANCEL */
+ do_unlock(dl);
+ assert(dl->mh != NULL);
+
+ do_lock_wait_done(dl);
+
+ /* UNLOCK */
+ do_unlock(dl);
+ assert(dl->mh == NULL);
+}
+
+static void test_lock_wait_unlock(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ const char *mutex_string)
+{
+ struct do_lock_context *dl;
+
+ dl = talloc_zero(mem_ctx, struct do_lock_context);
+ assert(dl != NULL);
+ dl->ctdb = ctdb;
+
+ /* LOCK */
+ do_lock(dl, mutex_string);
+ assert(dl->mh != NULL);
+
+ /* Wait for twice as long as the PPID timeout */
+ do_lock_wait_time(dl, 2 * 5);
+ assert(dl->mh != NULL);
+
+ /* UNLOCK */
+ do_unlock(dl);
+ assert(dl->mh == NULL);
+}
+
+static void fd_done_handler(struct tevent_context *ev,
+ struct tevent_fd *fde,
+ uint16_t flags,
+ void *private_data)
+{
+ bool *done = (bool *)private_data;
+
+ *done = true;
+}
+
+static void test_lock_ppid_gone_lock_unlock(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ const char *mutex_string)
+{
+ struct do_lock_context *dl;
+ struct tevent_fd *fde;
+ int pipefd[2];
+ int ret;
+ pid_t pid, pid2;
+ ssize_t nread;
+ bool done;
+
+ /*
+ * Do this in the parent - debugging aborts of the child is
+ * trickier
+ */
+ dl = talloc_zero(mem_ctx, struct do_lock_context);
+ assert(dl != NULL);
+ dl->ctdb = ctdb;
+
+ ret = pipe(pipefd);
+ assert(ret == 0);
+
+ pid = fork();
+ assert(pid != -1);
+
+ if (pid == 0) {
+ ssize_t nwritten;
+
+ close(pipefd[0]);
+
+ /* LOCK */
+ do_lock(dl, mutex_string);
+ assert(dl->mh != NULL);
+
+ /*
+ * Note that we never see corresponding LOST. That
+ * would come from this process, but it is killed
+ * below.
+ */
+
+ nwritten = write(pipefd[1], &ret, sizeof(ret));
+ assert(nwritten == sizeof(ret));
+
+ sleep(999);
+ exit(1);
+ }
+
+ close(pipefd[1]);
+
+ nread = read(pipefd[0], &ret, sizeof(ret));
+ assert(nread == sizeof(ret));
+ assert(ret == 0);
+
+ /*
+ * pipefd[1] is leaked into the helper, so there will be an
+ * event generated when the helper exits
+ */
+ done = false;
+ fde = tevent_add_fd(ctdb->ev,
+ ctdb,
+ pipefd[0],
+ TEVENT_FD_READ,
+ fd_done_handler,
+ &done);
+ assert(fde != NULL);
+
+ ret = kill(pid, SIGKILL);
+ assert(ret == 0);
+ pid2 = waitpid(pid, &ret, 0);
+ assert(pid2 == pid);
+
+ while (! done) {
+ tevent_loop_once(ctdb->ev);
+ }
+
+ /* LOCK */
+ do_lock(dl, mutex_string);
+ assert(dl->mh != NULL);
+
+ /* UNLOCK */
+ do_unlock(dl);
+ assert(dl->mh == NULL);
+}
+
+static void test_lock_file_removed_no_recheck(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ const char *mutex_string,
+ const char *lock_file)
+{
+ struct do_lock_context *dl1;
+ struct do_lock_context *dl2;
+ int ret;
+
+ dl1 = talloc_zero(mem_ctx, struct do_lock_context);
+ assert(dl1 != NULL);
+ dl1->ctdb = ctdb;
+
+ dl2 = talloc_zero(mem_ctx, struct do_lock_context);
+ assert(dl2 != NULL);
+ dl2->ctdb = ctdb;
+
+ /* LOCK */
+ do_lock(dl1, mutex_string);
+ assert(dl1->mh != NULL);
+
+ ret = unlink(lock_file);
+ assert(ret == 0);
+
+ /* LOCK */
+ do_lock(dl2, mutex_string);
+ assert(dl2->mh != NULL);
+
+ /* UNLOCK */
+ do_unlock(dl2);
+ assert(dl2->mh == NULL);
+
+ /* UNLOCK */
+ do_unlock(dl1);
+ assert(dl1->mh == NULL);
+}
+
+static void test_lock_file_wait_recheck_unlock(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ const char *mutex_string,
+ unsigned long wait_time)
+{
+ struct do_lock_context *dl;
+
+ dl = talloc_zero(mem_ctx, struct do_lock_context);
+ assert(dl != NULL);
+ dl->ctdb = ctdb;
+
+ /* LOCK */
+ do_lock(dl, mutex_string);
+ assert(dl->mh != NULL);
+
+ do_lock_wait_time(dl, wait_time);
+ assert(dl->mh != NULL);
+
+ /* UNLOCK */
+ do_unlock(dl);
+ assert(dl->mh == NULL);
+}
+
+static void test_lock_file_removed(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ const char *mutex_string,
+ const char *lock_file)
+{
+ struct do_lock_context *dl;
+ int ret;
+
+ dl = talloc_zero(mem_ctx, struct do_lock_context);
+ assert(dl != NULL);
+ dl->ctdb = ctdb;
+
+ /* LOCK */
+ do_lock(dl, mutex_string);
+ assert(dl->mh != NULL);
+
+ ret = unlink(lock_file);
+ assert(ret == 0);
+
+ while (dl->mh != NULL) {
+ /* LOST */
+ tevent_loop_once(ctdb->ev);
+ }
+}
+
+static void test_lock_file_changed(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ const char *mutex_string,
+ const char *lock_file)
+{
+ struct do_lock_context *dl;
+ char *t;
+ int fd;
+ int ret;
+
+ dl = talloc_zero(mem_ctx, struct do_lock_context);
+ assert(dl != NULL);
+ dl->ctdb = ctdb;
+
+ /* LOCK */
+ do_lock(dl, mutex_string);
+ assert(dl->mh != NULL);
+
+ t = talloc_asprintf(ctdb, "%s.new", lock_file);
+ assert(t != NULL);
+
+ fd = open(t, O_RDWR|O_CREAT, 0600);
+ assert(fd != -1);
+ close(fd);
+
+ ret = rename(t, lock_file);
+ assert(ret == 0);
+
+ while (dl->mh != NULL) {
+ /* LOST */
+ tevent_loop_once(ctdb->ev);
+ }
+}
+
+static void test_lock_io_timeout(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ const char *mutex_string,
+ const char *lock_file,
+ unsigned long block_wait,
+ unsigned long block_time)
+{
+ struct do_lock_context *dl;
+ int pipefd[2];
+ int ret;
+ pid_t pid, pid2;
+ ssize_t nwritten;
+
+ dl = talloc_zero(mem_ctx, struct do_lock_context);
+ assert(dl != NULL);
+ dl->ctdb = ctdb;
+
+ ret = pipe(pipefd);
+ assert(ret == 0);
+
+ pid = fork();
+ assert(pid != -1);
+
+ if (pid == 0) {
+ static struct flock lock = {
+ .l_type = F_WRLCK,
+ .l_whence = SEEK_SET,
+ .l_start = 1,
+ .l_len = 1,
+ .l_pid = 0,
+ };
+ ssize_t nread;
+ int fd;
+
+ close(pipefd[1]);
+
+ /* Only continue when the parent is ready */
+ nread = read(pipefd[0], &ret, sizeof(ret));
+ assert(nread == sizeof(ret));
+ assert(ret == 0);
+
+ sleep(block_wait);
+
+ fd = open(lock_file, O_RDWR, 0600);
+ assert(fd != -1);
+
+ ret = fcntl(fd, F_SETLKW, &lock);
+ assert(ret == 0);
+
+ sleep(block_time);
+
+ close(fd);
+
+ sleep(999);
+
+ _exit(0);
+ }
+
+ close(pipefd[0]);
+
+ /* LOCK */
+ do_lock(dl, mutex_string);
+ assert(dl->mh != NULL);
+
+ nwritten = write(pipefd[1], &ret, sizeof(ret));
+ assert(nwritten == sizeof(ret));
+
+ do_lock_wait_time(dl, block_wait + block_time * 2);
+ if (dl->mh != NULL) {
+ /* UNLOCK */
+ do_unlock(dl);
+ assert(dl->mh == NULL);
+ }
+
+ ret = kill(pid, SIGKILL);
+ assert(ret == 0);
+ pid2 = waitpid(pid, &ret, 0);
+ assert(pid2 == pid);
+}
+
+/*
+ * Main
+ */
+
+static const char *prog;
+
+static void usage(void)
+{
+ fprintf(stderr, "usage: %s <test> <mutex-string> [<arg>...]\n", prog);
+ exit(1);
+}
+
+static void alarm_handler(int sig)
+{
+ abort();
+}
+
+int main(int argc, const char *argv[])
+{
+ TALLOC_CTX *mem_ctx;
+ struct ctdb_context *ctdb;
+ const char *mutex_string;
+ const char *test;
+ struct sigaction sa = { .sa_handler = NULL, };
+ int ret;
+ const char *lock_file;
+ unsigned int wait_time;
+
+ prog = argv[0];
+
+ if (argc < 3) {
+ usage();
+ }
+
+ mem_ctx = talloc_new(NULL);
+ assert(mem_ctx != NULL);
+
+ ctdb = talloc_zero(mem_ctx, struct ctdb_context);
+ assert(ctdb != NULL);
+
+ ctdb->ev = tevent_context_init(ctdb);
+ assert(ctdb->ev != NULL);
+
+ /* Add a 60s timeout for the whole test */
+ sa.sa_handler = alarm_handler;
+ sigemptyset(&sa.sa_mask);
+ ret = sigaction(SIGALRM, &sa, NULL);
+ assert(ret == 0);
+ alarm(60);
+
+ test_backtrace_setup();
+
+ test = argv[1];
+ mutex_string = argv[2];
+
+ if (strcmp(test, "lock-unlock") == 0) {
+ test_lock_unlock(mem_ctx, ctdb, mutex_string);
+ } else if (strcmp(test, "lock-lock-unlock") == 0) {
+ test_lock_lock_unlock(mem_ctx, ctdb, mutex_string);
+ } else if (strcmp(test, "lock-unlock-lock-unlock") == 0) {
+ test_lock_unlock_lock_unlock(mem_ctx, ctdb, mutex_string);
+ } else if (strcmp(test, "lock-cancel-check") == 0) {
+ test_lock_cancel_check(mem_ctx, ctdb, mutex_string);
+ } else if (strcmp(test, "lock-cancel-unlock") == 0) {
+ test_lock_cancel_unlock(mem_ctx, ctdb, mutex_string);
+ } else if (strcmp(test, "lock-wait-unlock") == 0) {
+ test_lock_wait_unlock(mem_ctx, ctdb, mutex_string);
+ } else if (strcmp(test, "lock-ppid-gone-lock-unlock") == 0) {
+ test_lock_ppid_gone_lock_unlock(mem_ctx, ctdb, mutex_string);
+ } else if (strcmp(test, "lock-file-removed-no-recheck") == 0) {
+ if (argc != 4) {
+ usage();
+ }
+
+ lock_file = argv[3];
+
+ test_lock_file_removed_no_recheck(mem_ctx,
+ ctdb,
+ mutex_string,
+ lock_file);
+ } else if (strcmp(test, "lock-file-wait-recheck-unlock") == 0) {
+ if (argc != 4) {
+ usage();
+ }
+
+ wait_time = smb_strtoul(argv[3],
+ NULL,
+ 10,
+ &ret,
+ SMB_STR_STANDARD);
+ if (ret != 0) {
+ usage();
+ }
+
+ test_lock_file_wait_recheck_unlock(mem_ctx,
+ ctdb,
+ mutex_string,
+ wait_time);
+ } else if (strcmp(test, "lock-file-removed") == 0) {
+ if (argc != 4) {
+ usage();
+ }
+
+ lock_file = argv[3];
+
+ test_lock_file_removed(mem_ctx,
+ ctdb,
+ mutex_string,
+ lock_file);
+ } else if (strcmp(test, "lock-file-changed") == 0) {
+ if (argc != 4) {
+ usage();
+ }
+
+ lock_file = argv[3];
+
+ test_lock_file_changed(mem_ctx,
+ ctdb,
+ mutex_string,
+ lock_file);
+ } else if (strcmp(test, "lock-io-timeout") == 0) {
+ unsigned long block_wait;
+ unsigned long block_time;
+
+ if (argc != 6) {
+ usage();
+ }
+
+ lock_file = argv[3];
+ block_wait = (unsigned long)atol(argv[4]);
+ block_time = (unsigned long)atol(argv[5]);
+
+ test_lock_io_timeout(mem_ctx,
+ ctdb,
+ mutex_string,
+ lock_file,
+ block_wait,
+ block_time);
+ } else {
+ fprintf(stderr, "Unknown test\n");
+ exit(1);
+ }
+
+ talloc_free(mem_ctx);
+
+ return 0;
+}
diff --git a/ctdb/tests/src/cluster_wait.c b/ctdb/tests/src/cluster_wait.c
new file mode 100644
index 0000000..d411591
--- /dev/null
+++ b/ctdb/tests/src/cluster_wait.c
@@ -0,0 +1,346 @@
+/*
+ Cluster wide synchronization
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include "lib/util/tevent_unix.h"
+
+#include "client/client.h"
+
+#include "tests/src/cluster_wait.h"
+
+#define MSG_ID_JOIN (CTDB_SRVID_TEST_RANGE | 0x1)
+#define MSG_ID_SYNC (CTDB_SRVID_TEST_RANGE | 0x2)
+
+/* Wait for all the clients to initialize */
+
+struct cluster_wait_state {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ uint32_t num_nodes;
+ bool *ready;
+ bool join_done;
+};
+
+static void cluster_wait_join_registered(struct tevent_req *subreq);
+static void cluster_wait_sync_registered(struct tevent_req *subreq);
+static void cluster_wait_join(struct tevent_req *subreq);
+static void cluster_wait_join_sent(struct tevent_req *subreq);
+static void cluster_wait_join_handler(uint64_t srvid, TDB_DATA data,
+ void *private_data);
+static void cluster_wait_join_unregistered(struct tevent_req *subreq);
+static void cluster_wait_sync_sent(struct tevent_req *subreq);
+static void cluster_wait_sync_handler(uint64_t srvid, TDB_DATA data,
+ void *private_data);
+static void cluster_wait_sync_unregistered(struct tevent_req *subreq);
+
+struct tevent_req *cluster_wait_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t num_nodes)
+{
+ struct tevent_req *req, *subreq;
+ struct cluster_wait_state *state;
+ bool ok;
+
+ req = tevent_req_create(mem_ctx, &state, struct cluster_wait_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->client = client;
+ state->num_nodes = num_nodes;
+
+ state->join_done = false;
+
+ if (ctdb_client_pnn(client) == 0) {
+ state->ready = talloc_zero_array(state, bool, num_nodes);
+ if (tevent_req_nomem(state->ready, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ subreq = ctdb_client_set_message_handler_send(
+ state, ev, client, MSG_ID_JOIN,
+ cluster_wait_join_handler, req);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, cluster_wait_join_registered,
+ req);
+ }
+
+ subreq = ctdb_client_set_message_handler_send(
+ state, ev, client, MSG_ID_SYNC,
+ cluster_wait_sync_handler, req);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, cluster_wait_sync_registered, req);
+
+ /* If cluster is not synchronized within 30 seconds, time out */
+ ok = tevent_req_set_endtime(
+ req,
+ ev,
+ tevent_timeval_current_ofs(30, 0));
+ if (!ok) {
+ return tevent_req_post(req, ev);
+ }
+
+ return req;
+}
+
+static void cluster_wait_join_registered(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ bool status;
+ int ret;
+
+ status = ctdb_client_set_message_handler_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ printf("Waiting for cluster\n");
+ fflush(stdout);
+}
+
+static void cluster_wait_sync_registered(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct cluster_wait_state *state = tevent_req_data(
+ req, struct cluster_wait_state);
+ bool status;
+ int ret;
+
+ status = ctdb_client_set_message_handler_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ subreq = tevent_wakeup_send(state, state->ev,
+ tevent_timeval_current_ofs(1, 0));
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, cluster_wait_join, req);
+}
+
+static void cluster_wait_join(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct cluster_wait_state *state = tevent_req_data(
+ req, struct cluster_wait_state);
+ struct ctdb_req_message msg;
+ uint32_t pnn;
+ bool status;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ pnn = ctdb_client_pnn(state->client);
+
+ msg.srvid = MSG_ID_JOIN;
+ msg.data.data.dsize = sizeof(pnn);
+ msg.data.data.dptr = (uint8_t *)&pnn;
+
+ subreq = ctdb_client_message_send(state, state->ev, state->client,
+ 0, &msg);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, cluster_wait_join_sent, req);
+}
+
+static void cluster_wait_join_sent(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct cluster_wait_state *state = tevent_req_data(
+ req, struct cluster_wait_state);
+ bool status;
+ int ret;
+
+ status = ctdb_client_message_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ subreq = tevent_wakeup_send(state, state->ev,
+ tevent_timeval_current_ofs(1, 0));
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, cluster_wait_join, req);
+}
+
+static void cluster_wait_join_handler(uint64_t srvid, TDB_DATA data,
+ void *private_data)
+{
+ struct tevent_req *req = talloc_get_type_abort(
+ private_data, struct tevent_req);
+ struct cluster_wait_state *state = tevent_req_data(
+ req, struct cluster_wait_state);
+ struct tevent_req *subreq;
+ uint32_t pnn;
+ uint32_t i;
+
+ if (srvid != MSG_ID_JOIN) {
+ return;
+ }
+
+ if (data.dsize != sizeof(uint32_t)) {
+ return;
+ }
+
+ pnn = *(uint32_t *)data.dptr;
+
+ if (pnn > state->num_nodes) {
+ return;
+ }
+
+ state->ready[pnn] = true;
+
+ for (i=0; i<state->num_nodes; i++) {
+ if (! state->ready[i]) {
+ return;
+ }
+ }
+
+ if (state->join_done) {
+ return;
+ }
+
+ state->join_done = true;
+ subreq = ctdb_client_remove_message_handler_send(
+ state, state->ev, state->client,
+ MSG_ID_JOIN, req);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, cluster_wait_join_unregistered, req);
+}
+
+static void cluster_wait_join_unregistered(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct cluster_wait_state *state = tevent_req_data(
+ req, struct cluster_wait_state);
+ struct ctdb_req_message msg;
+ bool status;
+ int ret;
+
+ status = ctdb_client_remove_message_handler_recv(subreq, &ret);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ msg.srvid = MSG_ID_SYNC;
+ msg.data.data = tdb_null;
+
+ subreq = ctdb_client_message_send(state, state->ev, state->client,
+ CTDB_BROADCAST_CONNECTED, &msg);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, cluster_wait_sync_sent, req);
+}
+
+static void cluster_wait_sync_sent(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ bool status;
+ int ret;
+
+ status = ctdb_client_message_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+}
+
+static void cluster_wait_sync_handler(uint64_t srvid, TDB_DATA data,
+ void *private_data)
+{
+ struct tevent_req *req = talloc_get_type_abort(
+ private_data, struct tevent_req);
+ struct cluster_wait_state *state = tevent_req_data(
+ req, struct cluster_wait_state);
+ struct tevent_req *subreq;
+
+ if (srvid != MSG_ID_SYNC) {
+ return;
+ }
+
+ subreq = ctdb_client_remove_message_handler_send(
+ state, state->ev, state->client,
+ MSG_ID_SYNC, req);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, cluster_wait_sync_unregistered, req);
+}
+
+static void cluster_wait_sync_unregistered(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ bool status;
+ int ret;
+
+ status = ctdb_client_remove_message_handler_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+bool cluster_wait_recv(struct tevent_req *req, int *perr)
+{
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+ return true;
+}
diff --git a/ctdb/tests/src/cluster_wait.h b/ctdb/tests/src/cluster_wait.h
new file mode 100644
index 0000000..e0c64cc
--- /dev/null
+++ b/ctdb/tests/src/cluster_wait.h
@@ -0,0 +1,30 @@
+/*
+ Cluster wide synchronization
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CLUSTER_WAIT_H__
+#define __CLUSTER_WAIT_H__
+
+struct tevent_req *cluster_wait_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t num_nodes);
+
+bool cluster_wait_recv(struct tevent_req *req, int *perr);
+
+#endif /* __CLUSTER_WAIT_H__ */
diff --git a/ctdb/tests/src/cmdline_test.c b/ctdb/tests/src/cmdline_test.c
new file mode 100644
index 0000000..916d820
--- /dev/null
+++ b/ctdb/tests/src/cmdline_test.c
@@ -0,0 +1,480 @@
+/*
+ Command line processing tests
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <popt.h>
+#include <talloc.h>
+
+#include <assert.h>
+
+#include "common/cmdline.c"
+
+static int dummy_func(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ return 0;
+}
+
+static struct poptOption dummy_options[] = {
+ POPT_TABLEEND
+};
+
+static struct cmdline_command dummy_commands[] = {
+ CMDLINE_TABLEEND
+};
+
+static void test1(void)
+{
+ TALLOC_CTX *mem_ctx;
+ struct cmdline_context *cmdline;
+ int ret;
+
+ mem_ctx = talloc_new(NULL);
+ assert(mem_ctx != NULL);
+
+ ret = cmdline_init(mem_ctx, NULL, NULL, NULL, NULL, &cmdline);
+ assert(ret == EINVAL);
+
+ ret = cmdline_init(mem_ctx, "test1", NULL, NULL, NULL, &cmdline);
+ assert(ret == EINVAL);
+
+ ret = cmdline_init(mem_ctx,
+ "test1",
+ dummy_options,
+ NULL,
+ NULL,
+ &cmdline);
+ assert(ret == EINVAL);
+
+ talloc_free(mem_ctx);
+}
+
+static struct cmdline_command test2_nofunc[] = {
+ { "nofunc", NULL, NULL, NULL },
+ CMDLINE_TABLEEND
+};
+
+static struct cmdline_command test2_nohelp[] = {
+ { "nohelp", dummy_func, NULL, NULL },
+ CMDLINE_TABLEEND
+};
+
+static struct cmdline_command test2_long[] = {
+ { "really really long command with lots of words",
+ dummy_func, "long command help",
+ "<and lots of really long long arguments>" },
+ CMDLINE_TABLEEND
+};
+
+static struct cmdline_command test2_longhelp[] = {
+ { "longhelp", dummy_func,
+ "this is a really really really long help message" \
+ "with lots of words and lots of description",
+ NULL },
+ CMDLINE_TABLEEND
+};
+
+static struct cmdline_command test2_twowords[] = {
+ { "multiple words", dummy_func, "multiple words help", NULL },
+ CMDLINE_TABLEEND
+};
+
+static void test2(void)
+{
+ TALLOC_CTX *mem_ctx;
+ struct cmdline_context *cmdline;
+ int ret;
+
+ mem_ctx = talloc_new(NULL);
+ assert(mem_ctx != NULL);
+
+ ret = cmdline_init(mem_ctx,
+ "test2",
+ NULL,
+ NULL,
+ test2_nofunc,
+ &cmdline);
+ assert(ret == EINVAL);
+
+ ret = cmdline_init(mem_ctx,
+ "test2",
+ NULL,
+ NULL,
+ test2_nohelp,
+ &cmdline);
+ assert(ret == EINVAL);
+
+ ret = cmdline_init(mem_ctx,
+ "test2",
+ NULL,
+ NULL,
+ test2_long,
+ &cmdline);
+ assert(ret == EINVAL);
+
+ ret = cmdline_init(mem_ctx,
+ "test2",
+ NULL,
+ NULL,
+ test2_longhelp,
+ &cmdline);
+ assert(ret == EINVAL);
+
+ ret = cmdline_init(mem_ctx,
+ "test2",
+ NULL,
+ NULL,
+ test2_twowords,
+ &cmdline);
+ assert(ret == 0);
+
+ talloc_free(mem_ctx);
+}
+
+struct {
+ const char *str;
+} test3_data;
+
+static struct poptOption test3_noname[] = {
+ { NULL, 'o', POPT_ARG_STRING, &test3_data.str, 0,
+ "Noname option", NULL },
+ POPT_TABLEEND
+};
+
+static struct poptOption test3_notype[] = {
+ { "debug", 'd', POPT_ARG_NONE, NULL, 0,
+ "No argument option", NULL },
+ POPT_TABLEEND
+};
+
+static struct poptOption test3_noarg[] = {
+ { "debug", 'd', POPT_ARG_STRING, NULL, 0,
+ "No argument option", NULL },
+ POPT_TABLEEND
+};
+
+static void test3(void)
+{
+ TALLOC_CTX *mem_ctx;
+ struct cmdline_context *cmdline;
+ int ret;
+
+ mem_ctx = talloc_new(NULL);
+ assert(mem_ctx != NULL);
+
+ ret = cmdline_init(mem_ctx,
+ "test3",
+ test3_noname,
+ NULL,
+ dummy_commands,
+ &cmdline);
+ assert(ret == EINVAL);
+
+ ret = cmdline_init(mem_ctx,
+ "test3",
+ test3_notype,
+ NULL,
+ dummy_commands,
+ &cmdline);
+ assert(ret == EINVAL);
+
+ ret = cmdline_init(mem_ctx,
+ "test3",
+ test3_noarg,
+ NULL,
+ dummy_commands,
+ &cmdline);
+ assert(ret == EINVAL);
+
+ talloc_free(mem_ctx);
+}
+
+static int test4_count;
+static int test4_value;
+
+static struct poptOption test4_options[] = {
+ { "count", 'c', POPT_ARG_INT, &test4_count, 0,
+ "Option help of length thirty.", NULL },
+ { "value", 'v', POPT_ARG_INT, &test4_value, 0,
+ "Short description", "Value help of length 23" },
+ POPT_TABLEEND
+};
+
+static struct cmdline_command test4_commands[] = {
+ { "A really really long command", dummy_func,
+ "This is a really long help message",
+ "<a long arguments message>" },
+ { "short command", dummy_func,
+ "short msg for short command", "<short arg msg>" },
+ CMDLINE_TABLEEND
+};
+
+static void test4(void)
+{
+ TALLOC_CTX *mem_ctx;
+ struct cmdline_context *cmdline;
+ int ret;
+
+ mem_ctx = talloc_new(NULL);
+ assert(mem_ctx != NULL);
+
+ ret = cmdline_init(mem_ctx,
+ "test4",
+ test4_options,
+ NULL,
+ test4_commands,
+ &cmdline);
+ assert(ret == 0);
+
+ cmdline_usage(cmdline, NULL);
+ cmdline_usage(cmdline, "short command");
+
+ talloc_free(mem_ctx);
+}
+
+static int action_func(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ if (argc != 1) {
+ return 100;
+ }
+
+ printf("%s\n", argv[0]);
+ return 200;
+}
+
+static struct cmdline_command action_commands[] = {
+ { "action one", dummy_func, "action one help", NULL },
+ { "action two", action_func, "action two help", NULL },
+ CMDLINE_TABLEEND
+};
+
+static void test5(void)
+{
+ TALLOC_CTX *mem_ctx;
+ struct cmdline_context *cmdline;
+ const char *argv1[] = { "test5", "--help" };
+ const char *argv2[] = { "test5", "action" };
+ const char *argv3[] = { "test5", "action", "--help" };
+ const char *argv4[] = { "test5", "action", "one" };
+ int ret;
+
+ mem_ctx = talloc_new(NULL);
+ assert(mem_ctx != NULL);
+
+ ret = cmdline_init(mem_ctx,
+ "test5",
+ NULL,
+ "Action",
+ action_commands,
+ &cmdline);
+ assert(ret == 0);
+
+ ret = cmdline_parse(cmdline, 2, argv1, true);
+ assert(ret == EAGAIN);
+
+ ret = cmdline_parse(cmdline, 2, argv2, true);
+ assert(ret == ENOENT);
+
+ ret = cmdline_parse(cmdline, 3, argv3, true);
+ assert(ret == EAGAIN);
+
+ ret = cmdline_parse(cmdline, 3, argv4, true);
+ assert(ret == 0);
+
+ talloc_free(mem_ctx);
+}
+
+static void test6(void)
+{
+ TALLOC_CTX *mem_ctx;
+ struct cmdline_context *cmdline;
+ const char *argv1[] = { "action", "two" };
+ const char *argv2[] = { "action", "two", "arg1" };
+ const char *argv3[] = { "action", "two", "arg1", "arg2" };
+ int ret, result;
+
+ mem_ctx = talloc_new(NULL);
+ assert(mem_ctx != NULL);
+
+ ret = cmdline_init(mem_ctx,
+ "test6",
+ NULL,
+ NULL,
+ action_commands,
+ &cmdline);
+ assert(ret == 0);
+
+ ret = cmdline_parse(cmdline, 2, argv1, false);
+ assert(ret == 0);
+
+ ret = cmdline_run(cmdline, NULL, &result);
+ assert(ret == 0);
+ assert(result == 100);
+
+ ret = cmdline_parse(cmdline, 3, argv2, false);
+ assert(ret == 0);
+
+ ret = cmdline_run(cmdline, NULL, &result);
+ assert(ret == 0);
+ assert(result == 200);
+
+ ret = cmdline_parse(cmdline, 4, argv3, false);
+ assert(ret == 0);
+
+ ret = cmdline_run(cmdline, NULL, &result);
+ assert(ret == 0);
+ assert(result == 100);
+
+ talloc_free(mem_ctx);
+}
+
+static int test7_func(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ assert(argc == 1);
+
+ printf("%s\n", argv[0]);
+
+ return 0;
+}
+
+static struct cmdline_command test7_basic_commands[] = {
+ { "cmd1", test7_func, "command one help", NULL },
+ { "cmd2", test7_func, "command two help", NULL },
+ CMDLINE_TABLEEND
+};
+
+static struct cmdline_command test7_advanced_commands[] = {
+ { "cmd3", test7_func, "command three help", NULL },
+ { "cmd4", test7_func, "command four help", NULL },
+ CMDLINE_TABLEEND
+};
+
+static struct cmdline_command test7_ultimate_commands[] = {
+ { "cmd5", test7_func, "command five help", NULL },
+ { "cmd6", test7_func, "command six help", NULL },
+ CMDLINE_TABLEEND
+};
+
+static void test7(void)
+{
+ TALLOC_CTX *mem_ctx;
+ struct cmdline_context *cmdline;
+ const char *argv1[] = { "cmd1", "one" };
+ const char *argv2[] = { "cmd3", "three" };
+ const char *argv3[] = { "cmd6", "six" };
+ int ret, result;
+
+ mem_ctx = talloc_new(NULL);
+ assert(mem_ctx != NULL);
+
+ ret = cmdline_init(mem_ctx,
+ "test7",
+ NULL,
+ "Basic",
+ test7_basic_commands,
+ &cmdline);
+ assert(ret == 0);
+
+ ret = cmdline_add(cmdline, "Advanced", test7_advanced_commands);
+ assert(ret == 0);
+
+ ret = cmdline_add(cmdline, "Ultimate", test7_ultimate_commands);
+ assert(ret == 0);
+
+ cmdline_usage(cmdline, NULL);
+
+ printf("\n");
+
+ ret = cmdline_parse(cmdline, 2, argv1, false);
+ assert(ret == 0);
+
+ ret = cmdline_run(cmdline, NULL, &result);
+ assert(ret == 0);
+ assert(result == 0);
+
+ ret = cmdline_parse(cmdline, 2, argv2, false);
+ assert(ret == 0);
+
+ ret = cmdline_run(cmdline, NULL, &result);
+ assert(ret == 0);
+ assert(result == 0);
+
+ ret = cmdline_parse(cmdline, 2, argv3, false);
+ assert(ret == 0);
+
+ ret = cmdline_run(cmdline, NULL, &result);
+ assert(ret == 0);
+ assert(result == 0);
+
+ talloc_free(mem_ctx);
+}
+
+
+int main(int argc, const char **argv)
+{
+ int num;
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage %s <testnum>\n", argv[0]);
+ exit(1);
+ }
+
+ num = atoi(argv[1]);
+
+ switch (num) {
+ case 1:
+ test1();
+ break;
+
+ case 2:
+ test2();
+ break;
+
+ case 3:
+ test3();
+ break;
+
+ case 4:
+ test4();
+ break;
+
+ case 5:
+ test5();
+ break;
+
+ case 6:
+ test6();
+ break;
+
+ case 7:
+ test7();
+ break;
+ }
+
+ return 0;
+}
diff --git a/ctdb/tests/src/comm_client_test.c b/ctdb/tests/src/comm_client_test.c
new file mode 100644
index 0000000..41ed5f7
--- /dev/null
+++ b/ctdb/tests/src/comm_client_test.c
@@ -0,0 +1,217 @@
+/*
+ comm tests
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+
+#include <assert.h>
+
+#include "common/pkt_read.c"
+#include "common/pkt_write.c"
+#include "common/comm.c"
+
+
+struct writer_state {
+ struct tevent_context *ev;
+ struct comm_context *comm;
+ uint8_t *buf;
+ size_t *pkt_size;
+ size_t count, id;
+};
+
+static void writer_done(struct tevent_req *subreq);
+static void read_handler(uint8_t *buf, size_t buflen, void *private_data);
+static void dead_handler(void *private_data);
+
+static struct tevent_req *writer_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ int fd, size_t *pkt_size,
+ size_t count)
+{
+ struct tevent_req *req, *subreq;
+ struct writer_state *state;
+ size_t max_size = 0, buflen;
+ size_t i;
+ int ret;
+
+ for (i=0; i<count; i++) {
+ if (pkt_size[i] > max_size) {
+ max_size = pkt_size[i];
+ }
+ }
+
+ req = tevent_req_create(mem_ctx, &state, struct writer_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->pkt_size = pkt_size;
+ state->count = count;
+ state->id = 0;
+
+ ret = comm_setup(state, ev, fd, read_handler, req,
+ dead_handler, req, &state->comm);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ state->buf = talloc_array(state, uint8_t, max_size);
+ if (state->buf == NULL) {
+ talloc_free(req);
+ return NULL;
+ }
+ for (i=0; i<max_size; i++) {
+ state->buf[i] = i%256;
+ }
+
+ buflen = state->pkt_size[state->id];
+ *(uint32_t *)state->buf = buflen;
+ subreq = comm_write_send(state, state->ev, state->comm,
+ state->buf, buflen);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, writer_done, req);
+
+ return req;
+}
+
+static void writer_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ bool ret;
+ int err;
+
+ ret = comm_write_recv(subreq, &err);
+ TALLOC_FREE(subreq);
+ if (!ret) {
+ tevent_req_error(req, err);
+ return;
+ }
+}
+
+static void read_handler(uint8_t *buf, size_t buflen, void *private_data)
+{
+ struct tevent_req *req = talloc_get_type_abort(
+ private_data, struct tevent_req);
+ struct writer_state *state = tevent_req_data(
+ req, struct writer_state);
+ struct tevent_req *subreq;
+
+ if (buflen != state->pkt_size[state->id]) {
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ state->id++;
+ if (state->id >= state->count) {
+ tevent_req_done(req);
+ return;
+ }
+
+ buflen = state->pkt_size[state->id];
+ *(uint32_t *)state->buf = buflen;
+ subreq = comm_write_send(state, state->ev, state->comm,
+ state->buf, buflen);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, writer_done, req);
+}
+
+static void dead_handler(void *private_data)
+{
+ struct tevent_req *req = talloc_get_type_abort(
+ private_data, struct tevent_req);
+
+ tevent_req_error(req, EPIPE);
+}
+
+static void writer_recv(struct tevent_req *req, int *perr)
+{
+ if (tevent_req_is_unix_error(req, perr)) {
+ return;
+ }
+ *perr = 0;
+}
+
+static int socket_init(char *sockpath)
+{
+ struct sockaddr_un addr;
+ int fd, ret, i;
+ size_t len;
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+
+ len = strlcpy(addr.sun_path, sockpath, sizeof(addr.sun_path));
+ assert(len < sizeof(addr.sun_path));
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ assert(fd != -1);
+
+ for (i=0; i<5; i++) {
+ ret = connect(fd, (struct sockaddr *)&addr, sizeof(addr));
+ if (ret == 0) {
+ break;
+ }
+ sleep(1);
+ }
+ assert(ret != -1);
+
+ return fd;
+}
+
+int main(int argc, char *argv[])
+{
+ TALLOC_CTX *mem_ctx;
+ struct tevent_context *ev;
+ struct tevent_req *req;
+ int fd;
+ size_t pkt_size[13] = { 100, 2048, 500, 4096, 1024, 8192,
+ 200, 16384, 300, 32768, 400, 65536,
+ 1024*1024 };
+ int err;
+
+ if (argc != 2) {
+ printf("Usage: %s <sockpath>\n", argv[0]);
+ exit(1);
+ }
+
+ mem_ctx = talloc_new(NULL);
+ assert(mem_ctx != NULL);
+
+ ev = tevent_context_init(mem_ctx);
+ assert(ev != NULL);
+
+ fd = socket_init(argv[1]);
+
+ req = writer_send(mem_ctx, ev, fd, pkt_size, 13);
+ assert(req != NULL);
+
+ tevent_req_poll(req, ev);
+
+ writer_recv(req, &err);
+ assert(err == 0);
+
+ exit(0);
+}
diff --git a/ctdb/tests/src/comm_server_test.c b/ctdb/tests/src/comm_server_test.c
new file mode 100644
index 0000000..86b5658
--- /dev/null
+++ b/ctdb/tests/src/comm_server_test.c
@@ -0,0 +1,292 @@
+/*
+ comm tests
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+
+#include <assert.h>
+
+#include "lib/async_req/async_sock.h"
+
+#include "common/pkt_read.c"
+#include "common/pkt_write.c"
+#include "common/comm.c"
+
+struct echo_state {
+ struct tevent_context *ev;
+ int fd;
+ struct comm_context *comm;
+ uint8_t *data;
+};
+
+static void read_handler(uint8_t *buf, size_t buflen, void *private_data);
+static void read_failed(void *private_data);
+static void write_done(struct tevent_req *subreq);
+
+static struct tevent_req *echo_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev, int fd)
+{
+ struct tevent_req *req;
+ struct echo_state *state;
+ int ret;
+
+ req = tevent_req_create(mem_ctx, &state, struct echo_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->fd = fd;
+
+ ret = comm_setup(state, ev, fd, read_handler, req,
+ read_failed, req, &state->comm);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ return req;
+}
+
+static void read_handler(uint8_t *buf, size_t buflen, void *private_data)
+{
+ struct tevent_req *req = talloc_get_type_abort(
+ private_data, struct tevent_req);
+ struct echo_state *state = tevent_req_data(
+ req, struct echo_state);
+ struct tevent_req *subreq;
+
+ state->data = talloc_memdup(state, buf, buflen);
+ if (tevent_req_nomem(state->data, req)) {
+ return;
+ }
+
+ subreq = comm_write_send(state, state->ev, state->comm,
+ state->data, buflen);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, write_done, req);
+}
+
+static void read_failed(void *private_data)
+{
+ struct tevent_req *req = talloc_get_type_abort(
+ private_data, struct tevent_req);
+
+ tevent_req_done(req);
+}
+
+static void write_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct echo_state *state = tevent_req_data(
+ req, struct echo_state);
+ bool ret;
+ int err;
+
+ TALLOC_FREE(state->data);
+
+ ret = comm_write_recv(subreq, &err);
+ TALLOC_FREE(subreq);
+ if (!ret) {
+ tevent_req_error(req, err);
+ return;
+ }
+}
+
+static bool echo_recv(struct tevent_req *req, int *perr)
+{
+ struct echo_state *state = tevent_req_data(
+ req, struct echo_state);
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+
+ close(state->fd);
+ return true;
+}
+
+
+struct socket_process_state {
+ struct tevent_context *ev;
+ int fd;
+ int max_clients;
+ int num_clients;
+};
+
+static void socket_process_client(struct tevent_req *subreq);
+static void socket_process_client_done(struct tevent_req *subreq);
+
+static struct tevent_req *socket_process_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ int fd, int max_clients)
+{
+ struct tevent_req *req, *subreq;
+ struct socket_process_state *state;
+
+ req = tevent_req_create(mem_ctx, &state, struct socket_process_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->fd = fd;
+ state->max_clients = max_clients;
+ state->num_clients = 0;
+
+ subreq = accept_send(state, ev, fd);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, socket_process_client, req);
+
+ return req;
+}
+
+static void socket_process_client(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct socket_process_state *state = tevent_req_data(
+ req, struct socket_process_state);
+ int client_fd;
+ int err = 0;
+
+ client_fd = accept_recv(subreq, NULL, NULL, &err);
+ TALLOC_FREE(subreq);
+
+ state->num_clients++;
+
+ if (client_fd == -1) {
+ tevent_req_error(req, err);
+ return;
+ }
+
+ subreq = echo_send(state, state->ev, client_fd);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, socket_process_client_done, req);
+
+ if (state->num_clients == state->max_clients) {
+ /* Stop accepting any more clients */
+ return;
+ }
+
+ subreq = accept_send(state, state->ev, state->fd);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, socket_process_client, req);
+}
+
+static void socket_process_client_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct socket_process_state *state = tevent_req_data(
+ req, struct socket_process_state);
+ bool ret;
+ int err = 0;
+
+ ret = echo_recv(subreq, &err);
+ TALLOC_FREE(subreq);
+ if (!ret) {
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ if (state->num_clients == state->max_clients) {
+ tevent_req_done(req);
+ }
+}
+
+static void socket_process_recv(struct tevent_req *req, int *perr)
+{
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ }
+}
+
+static int socket_init(char *sockpath)
+{
+ struct sockaddr_un addr;
+ int fd, ret;
+ size_t len;
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+
+ len = strlcpy(addr.sun_path, sockpath, sizeof(addr.sun_path));
+ assert(len < sizeof(addr.sun_path));
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ assert(fd != -1);
+
+ ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
+ assert(ret != -1);
+
+ ret = listen(fd, 10);
+ assert(ret != -1);
+
+ return fd;
+}
+
+int main(int argc, char *argv[])
+{
+ TALLOC_CTX *mem_ctx;
+ struct tevent_context *ev;
+ struct tevent_req *req;
+ int fd, err = 0;
+ int num_clients;
+
+ if (argc != 3) {
+ printf("Usage: %s <sockpath> <num_clients>\n", argv[0]);
+ exit(1);
+ }
+
+ mem_ctx = talloc_new(NULL);
+ assert(mem_ctx != NULL);
+
+ ev = tevent_context_init(mem_ctx);
+ assert(ev != NULL);
+
+ fd = socket_init(argv[1]);
+ num_clients = atoi(argv[2]);
+ assert(num_clients > 0);
+
+ req = socket_process_send(mem_ctx, ev, fd, num_clients);
+ assert(req != NULL);
+
+ tevent_req_poll(req, ev);
+
+ socket_process_recv(req, &err);
+ return err;
+}
diff --git a/ctdb/tests/src/comm_test.c b/ctdb/tests/src/comm_test.c
new file mode 100644
index 0000000..4595928
--- /dev/null
+++ b/ctdb/tests/src/comm_test.c
@@ -0,0 +1,501 @@
+/*
+ comm tests
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+
+#include <assert.h>
+
+#include "common/pkt_read.c"
+#include "common/pkt_write.c"
+#include "common/comm.c"
+
+/*
+ * Test read_handler and dead_handler
+ */
+
+static void test1_read_handler(uint8_t *buf, size_t buflen,
+ void *private_data)
+{
+ int *result = (int *)private_data;
+
+ *result = -1;
+}
+
+static void test1_dead_handler(void *private_data)
+{
+ int *result = (int *)private_data;
+
+ *result = 1;
+}
+
+static void test1(void)
+{
+ TALLOC_CTX *mem_ctx;
+ struct tevent_context *ev;
+ struct comm_context *comm;
+ int fd[2];
+ int result = 0;
+ uint32_t data[2];
+ int ret;
+ ssize_t n;
+
+ mem_ctx = talloc_new(NULL);
+ assert(mem_ctx != NULL);
+
+ ev = tevent_context_init(mem_ctx);
+ assert(ev != NULL);
+
+ ret = pipe(fd);
+ assert(ret == 0);
+
+ ret = comm_setup(ev, ev, fd[0], test1_read_handler, &result,
+ test1_dead_handler, &result, &comm);
+ assert(ret == 0);
+
+ data[0] = 2 * sizeof(uint32_t);
+ data[1] = 0;
+
+ n = write(fd[1], (void *)&data, data[0]);
+ assert(n == data[0]);
+
+ while (result == 0) {
+ tevent_loop_once(ev);
+ }
+
+ assert(result == -1);
+
+ result = 0;
+ close(fd[1]);
+
+ while (result == 0) {
+ tevent_loop_once(ev);
+ }
+
+ assert(result == 1);
+
+ talloc_free(mem_ctx);
+}
+
+/*
+ * Test that the tevent_req returned by comm_write_send() can be free'd.
+ */
+
+struct test2_state {
+ TALLOC_CTX *mem_ctx;
+ bool done;
+};
+
+static void test2_read_handler(uint8_t *buf, size_t buflen,
+ void *private_data)
+{
+ struct test2_state *state = (struct test2_state *)private_data;
+
+ TALLOC_FREE(state->mem_ctx);
+}
+
+static void test2_dead_handler(void *private_data)
+{
+ abort();
+}
+
+struct test2_write_state {
+ int count;
+};
+
+static void test2_write_done(struct tevent_req *subreq);
+
+static struct tevent_req *test2_write_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct comm_context *comm,
+ uint8_t *buf, size_t buflen)
+{
+ struct tevent_req *req, *subreq;
+ struct test2_write_state *state;
+ int i;
+
+ req = tevent_req_create(mem_ctx, &state, struct test2_write_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->count = 0;
+
+ for (i=0; i<10; i++) {
+ subreq = comm_write_send(state, ev, comm, buf, buflen);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, test2_write_done, req);
+ }
+
+ return req;
+}
+
+static void test2_write_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct test2_write_state *state = tevent_req_data(
+ req, struct test2_write_state);
+ bool status;
+ int ret;
+
+ status = comm_write_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ state->count += 1;
+
+ if (state->count == 10) {
+ tevent_req_done(req);
+ }
+}
+
+static void test2_timer_handler(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval cur_time,
+ void *private_data)
+{
+ struct test2_state *state = (struct test2_state *)private_data;
+
+ state->done = true;
+}
+
+static void test2(void)
+{
+ TALLOC_CTX *mem_ctx;
+ struct tevent_context *ev;
+ struct comm_context *comm_reader, *comm_writer;
+ struct test2_state test2_state;
+ struct tevent_req *req;
+ struct tevent_timer *te;
+ int fd[2];
+ uint32_t data[2];
+ int ret;
+
+ mem_ctx = talloc_new(NULL);
+ assert(mem_ctx != NULL);
+
+ test2_state.mem_ctx = talloc_new(mem_ctx);
+ assert(test2_state.mem_ctx != NULL);
+
+ test2_state.done = false;
+
+ ev = tevent_context_init(mem_ctx);
+ assert(ev != NULL);
+
+ ret = pipe(fd);
+ assert(ret == 0);
+
+ ret = comm_setup(ev, ev, fd[0], test2_read_handler, &test2_state,
+ test2_dead_handler, NULL, &comm_reader);
+ assert(ret == 0);
+
+ ret = comm_setup(ev, ev, fd[1], NULL, NULL, test2_dead_handler, NULL,
+ &comm_writer);
+ assert(ret == 0);
+
+ data[0] = 2 * sizeof(uint32_t);
+ data[1] = 0;
+
+ req = test2_write_send(test2_state.mem_ctx, ev, comm_writer,
+ (uint8_t *)data, data[0]);
+ assert(req != NULL);
+
+ te = tevent_add_timer(ev, ev, tevent_timeval_current_ofs(5,0),
+ test2_timer_handler, &test2_state);
+ assert(te != NULL);
+
+ while (! test2_state.done) {
+ tevent_loop_once(ev);
+ }
+
+ talloc_free(mem_ctx);
+}
+
+/*
+ * Test that data is written and read correctly.
+ */
+
+static void test3_dead_handler(void *private_data)
+{
+ int dead_data = *(int *)private_data;
+
+ assert(dead_data == 1 || dead_data == 2);
+
+ if (dead_data == 1) {
+ /* reader */
+ fprintf(stderr, "writer closed pipe\n");
+ } else {
+ /* writer */
+ fprintf(stderr, "reader closed pipe\n");
+ }
+}
+
+struct test3_writer_state {
+ struct tevent_context *ev;
+ struct comm_context *comm;
+ uint8_t *buf;
+ size_t *pkt_size;
+ int count, id;
+};
+
+static void test3_writer_next(struct tevent_req *subreq);
+
+static struct tevent_req *test3_writer_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct comm_context *comm,
+ size_t *pkt_size, size_t count)
+{
+ struct tevent_req *req, *subreq;
+ struct test3_writer_state *state;
+ size_t max_size = 0, buflen;
+ size_t i;
+
+ for (i=0; i<count; i++) {
+ if (pkt_size[i] > max_size) {
+ max_size = pkt_size[i];
+ }
+ }
+
+ req = tevent_req_create(mem_ctx, &state, struct test3_writer_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->comm = comm;
+ state->pkt_size = pkt_size;
+ state->count = count;
+ state->id = 0;
+
+ state->buf = talloc_array(state, uint8_t, max_size);
+ if (state->buf == NULL) {
+ talloc_free(req);
+ return NULL;
+ }
+ for (i=0; i<max_size; i++) {
+ state->buf[i] = i%256;
+ }
+
+ buflen = state->pkt_size[state->id];
+ *(uint32_t *)state->buf = buflen;
+ subreq = comm_write_send(state, state->ev, state->comm,
+ state->buf, buflen);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ tevent_req_set_callback(subreq, test3_writer_next, req);
+ return req;
+}
+
+static void test3_writer_next(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct test3_writer_state *state = tevent_req_data(
+ req, struct test3_writer_state);
+ bool ret;
+ int err;
+ size_t buflen;
+
+ ret = comm_write_recv(subreq, &err);
+ TALLOC_FREE(subreq);
+ if (!ret) {
+ tevent_req_error(req, err);
+ return;
+ }
+
+ state->id++;
+ if (state->id >= state->count) {
+ tevent_req_done(req);
+ return;
+ }
+
+ buflen = state->pkt_size[state->id];
+ *(uint32_t *)state->buf = buflen;
+ subreq = comm_write_send(state, state->ev, state->comm,
+ state->buf, buflen);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+
+ tevent_req_set_callback(subreq, test3_writer_next, req);
+}
+
+static void test3_writer_recv(struct tevent_req *req, int *perr)
+{
+ if (tevent_req_is_unix_error(req, perr)) {
+ return;
+ }
+ *perr = 0;
+}
+
+static void test3_writer(int fd, size_t *pkt_size, size_t count)
+{
+ TALLOC_CTX *mem_ctx;
+ struct tevent_context *ev;
+ struct comm_context *comm;
+ struct tevent_req *req;
+ int dead_data = 2;
+ int err;
+
+ mem_ctx = talloc_new(NULL);
+ assert(mem_ctx != NULL);
+
+ ev = tevent_context_init(mem_ctx);
+ assert(ev != NULL);
+
+ err = comm_setup(mem_ctx, ev, fd, NULL, NULL,
+ test3_dead_handler, &dead_data, &comm);
+ assert(err == 0);
+ assert(comm != NULL);
+
+ req = test3_writer_send(mem_ctx, ev, comm, pkt_size, count);
+ assert(req != NULL);
+
+ tevent_req_poll(req, ev);
+
+ test3_writer_recv(req, &err);
+ assert(err == 0);
+
+ talloc_free(mem_ctx);
+}
+
+struct test3_reader_state {
+ size_t *pkt_size;
+ int count, received;
+ bool done;
+};
+
+static void test3_reader_handler(uint8_t *buf, size_t buflen,
+ void *private_data)
+{
+ struct test3_reader_state *state = talloc_get_type_abort(
+ private_data, struct test3_reader_state);
+
+ assert(buflen == state->pkt_size[state->received]);
+ printf("%zi ", buflen);
+ state->received++;
+
+ if (state->received == state->count) {
+ printf("\n");
+ state->done = true;
+ }
+}
+
+static void test3_reader(int fd, size_t *pkt_size, int count)
+{
+ TALLOC_CTX *mem_ctx;
+ struct tevent_context *ev;
+ struct comm_context *comm;
+ struct test3_reader_state *state;
+ int dead_data = 1;
+ int err;
+
+ mem_ctx = talloc_new(NULL);
+ assert(mem_ctx != NULL);
+
+ ev = tevent_context_init(mem_ctx);
+ assert(ev != NULL);
+
+ state = talloc_zero(mem_ctx, struct test3_reader_state);
+ assert(state != NULL);
+
+ state->pkt_size = pkt_size;
+ state->count = count;
+ state->received = 0;
+ state->done = false;
+
+ err = comm_setup(mem_ctx, ev, fd, test3_reader_handler, state,
+ test3_dead_handler, &dead_data, &comm);
+ assert(err == 0);
+ assert(comm != NULL);
+
+ while (!state->done) {
+ tevent_loop_once(ev);
+ }
+
+ talloc_free(mem_ctx);
+}
+
+static void test3(void)
+{
+ int fd[2];
+ int ret;
+ pid_t pid;
+ size_t pkt_size[13] = { 100, 2048, 500, 4096, 1024, 8192,
+ 200, 16384, 300, 32768, 400, 65536,
+ 1024*1024 };
+
+ ret = pipe(fd);
+ assert(ret == 0);
+
+ pid = fork();
+ assert(pid != -1);
+
+ if (pid == 0) {
+ /* Child process */
+ close(fd[0]);
+ test3_writer(fd[1], pkt_size, 13);
+ close(fd[1]);
+ exit(0);
+ }
+
+ close(fd[1]);
+ test3_reader(fd[0], pkt_size, 13);
+ close(fd[0]);
+}
+
+
+int main(int argc, const char **argv)
+{
+ int num;
+
+ if (argc != 2) {
+ fprintf(stderr, "%s <testnum>\n", argv[0]);
+ exit(1);
+ }
+
+ num = atoi(argv[1]);
+
+ switch (num) {
+ case 1:
+ test1();
+ break;
+
+ case 2:
+ test2();
+ break;
+
+ case 3:
+ test3();
+ break;
+
+ default:
+ fprintf(stderr, "Unknown test number %s\n", argv[1]);
+ }
+
+ return 0;
+}
diff --git a/ctdb/tests/src/conf_test.c b/ctdb/tests/src/conf_test.c
new file mode 100644
index 0000000..9b3bd8f
--- /dev/null
+++ b/ctdb/tests/src/conf_test.c
@@ -0,0 +1,513 @@
+/*
+ Configuration file handling on top of tini
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <assert.h>
+
+#include "common/conf.c"
+
+static void test1(void)
+{
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+ struct conf_context *conf;
+ int ret;
+ bool status;
+
+ ret = conf_init(mem_ctx, &conf);
+ assert(ret == 0);
+ assert(conf != NULL);
+
+ conf_define_section(conf, "section1", NULL);
+ status = conf_valid(conf);
+ assert(status == true);
+
+ conf_define_section(conf, NULL, NULL);
+ status = conf_valid(conf);
+ assert(status == false);
+
+ talloc_free(mem_ctx);
+}
+
+static void test2(void)
+{
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+ struct conf_context *conf;
+ int ret;
+ bool status;
+
+ ret = conf_init(mem_ctx, &conf);
+ assert(ret == 0);
+ assert(conf != NULL);
+
+ conf_define_string(conf, "section1", "key1", "default", NULL);
+ status = conf_valid(conf);
+ assert(status == false);
+
+ talloc_free(mem_ctx);
+}
+
+static void test3(void)
+{
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+ struct conf_context *conf;
+ int ret;
+ bool status;
+
+ ret = conf_init(mem_ctx, &conf);
+ assert(ret == 0);
+ assert(conf != NULL);
+
+ conf_define_section(conf, "section1", NULL);
+ status = conf_valid(conf);
+ assert(status == true);
+
+ conf_define_string(conf, "section1", "key1", NULL, NULL);
+ status = conf_valid(conf);
+ assert(status == true);
+
+ conf_define_string(conf, "section1", "key1", "value1", NULL);
+ status = conf_valid(conf);
+ assert(status == false);
+
+ talloc_free(mem_ctx);
+}
+
+static void test4(void)
+{
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+ struct conf_context *conf;
+ int ret;
+ bool status;
+
+ ret = conf_init(mem_ctx, &conf);
+ assert(ret == 0);
+ assert(conf != NULL);
+
+ conf_define_section(conf, "section1", NULL);
+ status = conf_valid(conf);
+ assert(status == true);
+
+ conf_define_string(conf, "section1", "key1", NULL, NULL);
+ status = conf_valid(conf);
+ assert(status == true);
+
+ conf_define_integer(conf, "section1", "key1", 10, NULL);
+ status = conf_valid(conf);
+ assert(status == false);
+
+ talloc_free(mem_ctx);
+}
+
+static void test5(void)
+{
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+ struct conf_context *conf;
+ enum conf_type type;
+ int ret;
+ bool status;
+ const char *s_val;
+ int i_val;
+ bool b_val;
+
+ ret = conf_init(mem_ctx, &conf);
+ assert(ret == 0);
+ assert(conf != NULL);
+
+ conf_define_section(conf, "section1", NULL);
+ status = conf_valid(conf);
+ assert(status == true);
+
+ conf_define_string(conf, "section1", "key1", "value1", NULL);
+ conf_define_integer(conf, "section1", "key2", 10, NULL);
+ conf_define_boolean(conf, "section1", "key3", true, NULL);
+
+ conf_assign_string_pointer(conf, "section1", "key1", &s_val);
+ conf_assign_integer_pointer(conf, "section1", "key2", &i_val);
+ conf_assign_boolean_pointer(conf, "section1", "key3", &b_val);
+
+ status = conf_valid(conf);
+ assert(status == true);
+
+ status = conf_query(conf, "section1", "key1", &type);
+ assert(status == true);
+ assert(type == CONF_STRING);
+
+ status = conf_query(conf, "section1", "key2", &type);
+ assert(status == true);
+ assert(type == CONF_INTEGER);
+
+ status = conf_query(conf, "section1", "key3", &type);
+ assert(status == true);
+ assert(type == CONF_BOOLEAN);
+
+ assert(strcmp(s_val, "value1") == 0);
+ assert(i_val == 10);
+ assert(b_val == true);
+
+ conf_set_defaults(conf);
+
+ assert(strcmp(s_val, "value1") == 0);
+ assert(i_val == 10);
+ assert(b_val == true);
+
+ talloc_free(mem_ctx);
+}
+
+static void test6(void)
+{
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+ struct conf_context *conf;
+ int ret;
+ bool status;
+ const char *s_val, *s2_val;
+ int i_val, i2_val;
+ bool b_val, b2_val, is_default;
+
+ ret = conf_init(mem_ctx, &conf);
+ assert(ret == 0);
+ assert(conf != NULL);
+
+ conf_define_section(conf, "section1", NULL);
+ status = conf_valid(conf);
+ assert(status == true);
+
+ conf_define_string(conf, "section1", "key1", "default", NULL);
+ conf_define_integer(conf, "section1", "key2", 10, NULL);
+ conf_define_boolean(conf, "section1", "key3", true, NULL);
+
+ conf_assign_string_pointer(conf, "section1", "key1", &s_val);
+ conf_assign_integer_pointer(conf, "section1", "key2", &i_val);
+ conf_assign_boolean_pointer(conf, "section1", "key3", &b_val);
+
+ status = conf_valid(conf);
+ assert(status == true);
+
+ is_default = false;
+ ret = conf_get_string(conf, "section1", "key1", &s2_val, &is_default);
+ assert(ret == 0);
+ assert(strcmp(s2_val, "default") == 0);
+ assert(is_default == true);
+
+ is_default = false;
+ ret = conf_get_integer(conf, "section1", "key2", &i2_val, &is_default);
+ assert(ret == 0);
+ assert(i2_val == 10);
+ assert(is_default == true);
+
+ is_default = false;
+ ret = conf_get_boolean(conf, "section1", "key3", &b2_val, &is_default);
+ assert(ret == 0);
+ assert(b2_val == true);
+ assert(is_default == true);
+
+ ret = conf_set_string(conf, "section1", "key1", "foobar");
+ assert(ret == 0);
+
+ ret = conf_set_integer(conf, "section1", "key2", 20);
+ assert(ret == 0);
+
+ ret = conf_set_boolean(conf, "section1", "key3", false);
+ assert(ret == 0);
+
+ assert(strcmp(s_val, "foobar") == 0);
+ assert(i_val == 20);
+ assert(b_val == false);
+
+ is_default = true;
+ ret = conf_get_string(conf, "section1", "key1", &s2_val, &is_default);
+ assert(ret == 0);
+ assert(strcmp(s2_val, "foobar") == 0);
+ assert(is_default == false);
+
+ is_default = true;
+ ret = conf_get_integer(conf, "section1", "key2", &i2_val, &is_default);
+ assert(ret == 0);
+ assert(i2_val == 20);
+ assert(is_default == false);
+
+ is_default = true;
+ ret = conf_get_boolean(conf, "section1", "key3", &b2_val, &is_default);
+ assert(ret == 0);
+ assert(b2_val == false);
+ assert(is_default == false);
+
+ conf_dump(conf, stdout);
+
+ conf_set_defaults(conf);
+
+ assert(strcmp(s_val, "default") == 0);
+ assert(i_val == 10);
+ assert(b_val == true);
+
+ talloc_free(mem_ctx);
+}
+
+static bool test7_validate_string(const char *key,
+ const char *old_value, const char *new_value,
+ enum conf_update_mode mode)
+{
+ return false;
+}
+
+static bool test7_validate_integer(const char *key,
+ int old_value, int new_value,
+ enum conf_update_mode mode)
+{
+ return false;
+}
+
+static bool test7_validate_boolean(const char *key,
+ bool old_value, bool new_value,
+ enum conf_update_mode mode)
+{
+ return false;
+}
+
+static void test7(void)
+{
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+ struct conf_context *conf;
+ int ret;
+ bool status;
+ const char *s_val, *s2_val;
+ int i_val, i2_val;
+ bool b_val, b2_val;
+
+ ret = conf_init(mem_ctx, &conf);
+ assert(ret == 0);
+ assert(conf != NULL);
+
+ conf_define_section(conf, "section1", NULL);
+ status = conf_valid(conf);
+ assert(status == true);
+
+ conf_define_string(conf, "section1", "key1", "default",
+ test7_validate_string);
+ conf_define_integer(conf, "section1", "key2", 10,
+ test7_validate_integer);
+ conf_define_boolean(conf, "section1", "key3", true,
+ test7_validate_boolean);
+
+ conf_assign_string_pointer(conf, "section1", "key1", &s_val);
+ conf_assign_integer_pointer(conf, "section1", "key2", &i_val);
+ conf_assign_boolean_pointer(conf, "section1", "key3", &b_val);
+
+ status = conf_valid(conf);
+ assert(status == true);
+
+ ret = conf_set_string(conf, "section1", "key1", "default");
+ assert(ret == 0);
+
+ ret = conf_set_string(conf, "section1", "key1", "foobar");
+ assert(ret == EINVAL);
+
+ ret = conf_set_integer(conf, "section1", "key2", 10);
+ assert(ret == 0);
+
+ ret = conf_set_integer(conf, "section1", "key2", 20);
+ assert(ret == EINVAL);
+
+ ret = conf_set_boolean(conf, "section1", "key3", true);
+ assert(ret == 0);
+
+ ret = conf_set_boolean(conf, "section1", "key3", false);
+ assert(ret == EINVAL);
+
+ assert(strcmp(s_val, "default") == 0);
+ assert(i_val == 10);
+ assert(b_val == true);
+
+ ret = conf_get_string(conf, "section1", "key2", &s2_val, NULL);
+ assert(ret == EINVAL);
+
+ ret = conf_get_integer(conf, "section1", "key3", &i2_val, NULL);
+ assert(ret == EINVAL);
+
+ ret = conf_get_boolean(conf, "section1", "key1", &b2_val, NULL);
+ assert(ret == EINVAL);
+
+ talloc_free(mem_ctx);
+}
+
+static bool test8_validate(struct conf_context *conf,
+ const char *section,
+ enum conf_update_mode mode)
+{
+ return false;
+}
+
+static void test8(const char *filename)
+{
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+ struct conf_context *conf;
+ int ret;
+ bool status;
+
+ ret = conf_init(mem_ctx, &conf);
+ assert(ret == 0);
+ assert(conf != NULL);
+
+ conf_define_section(conf, "section1", test8_validate);
+ status = conf_valid(conf);
+ assert(status == true);
+
+ conf_define_string(conf, "section1", "key1", "default", NULL);
+
+ status = conf_valid(conf);
+ assert(status == true);
+
+ ret = conf_load(conf, filename, true);
+ conf_dump(conf, stdout);
+
+ talloc_free(mem_ctx);
+ exit(ret);
+}
+
+static void test9(const char *filename, bool ignore_unknown)
+{
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+ struct conf_context *conf;
+ int ret;
+ bool status;
+
+ ret = conf_init(mem_ctx, &conf);
+ assert(ret == 0);
+ assert(conf != NULL);
+
+ conf_define_section(conf, "section1", NULL);
+
+ conf_define_string(conf, "section1", "key1", "value1", NULL);
+ conf_define_integer(conf, "section1", "key2", 10, NULL);
+ conf_define_boolean(conf, "section1", "key3", true, NULL);
+
+ status = conf_valid(conf);
+ assert(status == true);
+
+ conf_set_boolean(conf, "section1", "key3", false);
+
+ ret = conf_load(conf, filename, ignore_unknown);
+ conf_dump(conf, stdout);
+
+ talloc_free(mem_ctx);
+ exit(ret);
+}
+
+static void test11(const char *filename)
+{
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+ char reload[PATH_MAX];
+ struct conf_context *conf;
+ int ret;
+ bool status;
+
+ ret = snprintf(reload, sizeof(reload), "%s.reload", filename);
+ assert((size_t)ret < sizeof(reload));
+
+ ret = conf_init(mem_ctx, &conf);
+ assert(ret == 0);
+ assert(conf != NULL);
+
+ conf_define_section(conf, "section1", NULL);
+
+ conf_define_string(conf, "section1", "key1", "value1", NULL);
+ conf_define_integer(conf, "section1", "key2", 10, NULL);
+ conf_define_boolean(conf, "section1", "key3", true, NULL);
+
+ status = conf_valid(conf);
+ assert(status == true);
+
+ ret = conf_load(conf, filename, false);
+ assert(ret == 0);
+
+ ret = rename(reload, filename);
+ assert(ret == 0);
+
+ ret = conf_reload(conf);
+ assert(ret == 0);
+
+ conf_dump(conf, stdout);
+
+ talloc_free(mem_ctx);
+ exit(ret);
+}
+
+int main(int argc, const char **argv)
+{
+ int num;
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s <testnum> [<config>]\n", argv[0]);
+ exit(1);
+ }
+
+ num = atoi(argv[1]);
+ if (num > 7 && argc != 3) {
+ fprintf(stderr, "Usage: %s <testnum> [<config>]\n", argv[0]);
+ exit(1);
+ }
+
+ switch (num) {
+ case 1:
+ test1();
+ break;
+
+ case 2:
+ test2();
+ break;
+
+ case 3:
+ test3();
+ break;
+
+ case 4:
+ test4();
+ break;
+
+ case 5:
+ test5();
+ break;
+
+ case 6:
+ test6();
+ break;
+
+ case 7:
+ test7();
+ break;
+
+ case 8:
+ test8(argv[2]);
+ break;
+
+ case 9:
+ test9(argv[2], true);
+ break;
+
+ case 10:
+ test9(argv[2], false);
+ break;
+
+ case 11:
+ test11(argv[2]);
+ break;
+ }
+
+ return 0;
+}
diff --git a/ctdb/tests/src/ctdb_io_test.c b/ctdb/tests/src/ctdb_io_test.c
new file mode 100644
index 0000000..b035342
--- /dev/null
+++ b/ctdb/tests/src/ctdb_io_test.c
@@ -0,0 +1,356 @@
+/*
+ ctdb_io tests
+
+ Copyright (C) Christof Schmitt 2019
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+
+#include <assert.h>
+
+#include "common/ctdb_io.c"
+
+void ctdb_set_error(struct ctdb_context *ctdb, const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ vprintf(fmt, ap);
+ va_end(ap);
+ assert(false);
+}
+
+static void test_setup(ctdb_queue_cb_fn_t cb,
+ int *pfd,
+ struct ctdb_context **pctdb,
+ struct ctdb_queue **pqueue)
+{
+ int pipefd[2], ret;
+ struct ctdb_context *ctdb;
+ struct ctdb_queue *queue;
+
+ ret = pipe(pipefd);
+ assert(ret == 0);
+
+ ctdb = talloc_zero(NULL, struct ctdb_context);
+ assert(ctdb != NULL);
+
+ ctdb->ev = tevent_context_init(NULL);
+
+ queue = ctdb_queue_setup(ctdb, ctdb, pipefd[0], 0, cb,
+ NULL, "test queue");
+ assert(queue != NULL);
+
+ *pctdb = ctdb;
+ *pfd = pipefd[1];
+ if (pqueue != NULL) {
+ *pqueue = queue;
+ }
+}
+
+static const size_t test1_req_len = 8;
+static const char *test1_req = "abcdefgh";
+
+static void test1_callback(uint8_t *data, size_t length, void *private_data)
+{
+ uint32_t len;
+
+ len = *(uint32_t *)data;
+ assert(len == sizeof(uint32_t) + test1_req_len);
+
+ assert(length == sizeof(uint32_t) + test1_req_len);
+ assert(memcmp(data + sizeof(len), test1_req, test1_req_len) == 0);
+}
+
+static void test1(void)
+{
+ struct ctdb_context *ctdb;
+ int fd;
+ ssize_t ret;
+ uint32_t pkt_size;
+
+ test_setup(test1_callback, &fd, &ctdb, NULL);
+
+ pkt_size = sizeof(uint32_t) + test1_req_len;
+ ret = write(fd, &pkt_size, sizeof(pkt_size));
+ assert(ret != -1 && (size_t)ret == sizeof(pkt_size));
+
+ ret = write(fd, test1_req, test1_req_len);
+ assert(ret != -1 && (size_t)ret == test1_req_len);
+
+ tevent_loop_once(ctdb->ev);
+
+ TALLOC_FREE(ctdb);
+}
+
+static const size_t test2_req_len[] = { 900, 24, 600 };
+
+static int test2_cb_num = 0;
+
+static void test2_callback(uint8_t *data, size_t length, void *private_data)
+{
+ uint32_t len;
+
+ len = *(uint32_t *)data;
+ assert(len == sizeof(uint32_t) + test2_req_len[test2_cb_num]);
+ assert(length == sizeof(uint32_t) + test2_req_len[test2_cb_num]);
+
+ test2_cb_num++;
+}
+
+static void test2(void)
+{
+ struct ctdb_context *ctdb;
+ int fd;
+ ssize_t ret;
+ size_t i;
+ uint32_t pkt_size;
+ char req[1024] = { 0 };
+
+ for (i = 0; i < sizeof(req); i++) {
+ req[i] = i % CHAR_MAX;
+ }
+
+ test_setup(test2_callback, &fd, &ctdb, NULL);
+
+ /*
+ * request 0
+ */
+
+ pkt_size = sizeof(uint32_t) + test2_req_len[0];
+ ret = write(fd, &pkt_size, sizeof(pkt_size));
+ assert(ret != -1 && (size_t)ret == sizeof(pkt_size));
+
+ ret = write(fd, req, test2_req_len[0]);
+ assert(ret != -1 && (size_t)ret == test2_req_len[0]);
+
+ /*
+ * request 1
+ */
+ pkt_size = sizeof(uint32_t) + test2_req_len[1];
+ ret = write(fd, &pkt_size, sizeof(pkt_size));
+ assert(ret != -1 && (size_t)ret == sizeof(pkt_size));
+
+ /*
+ * Omit the last byte to avoid buffer processing.
+ */
+ ret = write(fd, req, test2_req_len[1] - 1);
+ assert(ret != -1 && (size_t)ret == test2_req_len[1] - 1);
+
+ tevent_loop_once(ctdb->ev);
+
+ /*
+ * Write the missing byte now.
+ */
+ ret = write(fd, &req[test2_req_len[1] - 1], 1);
+ assert(ret != -1 && (size_t)ret == 1);
+
+ /*
+ * request 2
+ */
+ pkt_size = sizeof(uint32_t) + test2_req_len[2];
+ ret = write(fd, &pkt_size, sizeof(pkt_size));
+ assert(ret != -1 && (size_t)ret == sizeof(pkt_size));
+
+ ret = write(fd, req, test2_req_len[2]);
+ assert(ret != -1 && (size_t)ret == test2_req_len[2]);
+
+ tevent_loop_once(ctdb->ev);
+ tevent_loop_once(ctdb->ev);
+
+ assert(test2_cb_num == 2);
+
+ TALLOC_FREE(ctdb);
+}
+
+static void test_cb(uint8_t *data, size_t length, void *private_data)
+{
+ /* dummy handler, not verifying anything */
+ TALLOC_FREE(data);
+}
+
+static void test3(void)
+{
+ struct ctdb_context *ctdb;
+ struct ctdb_queue *queue;
+ uint32_t pkt_size;
+ char *request;
+ size_t req_len;
+ int fd;
+ ssize_t ret;
+
+ test_setup(test_cb, &fd, &ctdb, &queue);
+ request = talloc_zero_size(queue, queue->buffer_size);
+
+ /*
+ * calculate a request length which will fit into the buffer
+ * but not twice. Because we need to write the size integer
+ * as well (4-bytes) we're guaranteed that no 2 packets will fit.
+ */
+ req_len = queue->buffer_size >> 1;
+
+ /* writing first packet */
+ pkt_size = sizeof(uint32_t) + req_len;
+
+ ret = write(fd, &pkt_size, sizeof(pkt_size));
+ assert(ret != -1 && (size_t)ret == sizeof(pkt_size));
+
+ ret = write(fd, request, req_len);
+ assert(ret != -1 && (size_t)ret == req_len);
+
+ /* writing second, incomplete packet */
+ pkt_size = sizeof(uint32_t) + req_len;
+
+ ret = write(fd, &pkt_size, sizeof(pkt_size));
+ assert(ret != -1 && (size_t)ret == sizeof(pkt_size));
+
+ ret = write(fd, request, req_len >> 1);
+ assert(ret != -1 && (size_t)ret == req_len >> 1);
+
+ /* process...only 1st packet can be processed */
+ tevent_loop_once(ctdb->ev);
+
+ /* we should see a progressed offset of req_len + sizeof(pkt_size) */
+ assert(queue->buffer.offset == req_len + sizeof(pkt_size));
+
+ /* writing another few bytes of the still incomplete packet */
+ ret = write(fd, request, (req_len >> 1) - 1);
+ assert(ret != -1 && (size_t)ret == (req_len >> 1) - 1);
+
+ /*
+ * the packet is still incomplete and cannot be processed
+ * but the packet data had to be moved in the buffer in order
+ * to fetch the new 199 bytes -> offset must be 0 now.
+ */
+ tevent_loop_once(ctdb->ev);
+ /*
+ * needs to be called twice as an incomplete packet
+ * does not trigger a schedule_immediate
+ */
+ tevent_loop_once(ctdb->ev);
+
+ assert(queue->buffer.offset == 0);
+
+ TALLOC_FREE(ctdb);
+}
+
+static void test4(void)
+{
+ struct ctdb_context *ctdb;
+ struct ctdb_queue *queue;
+ uint32_t pkt_size;
+ char *request;
+ size_t req_len, half_buf_size;
+ int fd;
+ ssize_t ret;
+
+ test_setup(test_cb, &fd, &ctdb, &queue);
+
+ req_len = queue->buffer_size << 1; /* double the buffer size */
+ request = talloc_zero_size(queue, req_len);
+
+ /* writing first part of packet exceeding standard buffer size */
+ pkt_size = sizeof(uint32_t) + req_len;
+
+ ret = write(fd, &pkt_size, sizeof(pkt_size));
+ assert(ret != -1 && (size_t)ret == sizeof(pkt_size));
+
+ half_buf_size = queue->buffer_size >> 1;
+
+ ret = write(fd, request, req_len - half_buf_size);
+ assert(ret != -1 && (size_t)ret == req_len - half_buf_size);
+
+ /*
+ * process...
+ * this needs to be done to have things changed
+ */
+ tevent_loop_once(ctdb->ev);
+ /*
+ * needs to be called twice as an initial incomplete packet
+ * does not trigger a schedule_immediate
+ */
+ tevent_loop_once(ctdb->ev);
+
+ /* the buffer should be resized to packet size now */
+ assert(queue->buffer.size == pkt_size);
+
+ /* writing remaining data */
+ ret = write(fd, request, half_buf_size);
+ assert(ret != -1 && (size_t)ret == half_buf_size);
+
+ /* process... */
+ tevent_loop_once(ctdb->ev);
+
+ /*
+ * the buffer was increased beyond its standard size.
+ * once packet got processed, the buffer has to be free'd
+ * and will be re-allocated with standard size on new request arrival.
+ */
+
+ assert(queue->buffer.size == 0);
+
+ /* writing new packet to verify standard buffer size */
+ pkt_size = sizeof(uint32_t) + half_buf_size;
+
+ ret = write(fd, &pkt_size, sizeof(pkt_size));
+ assert(ret != -1 && (size_t)ret == sizeof(pkt_size));
+
+ ret = write(fd, request, half_buf_size);
+ assert(ret != -1 && (size_t)ret == half_buf_size);
+
+ /* process... */
+ tevent_loop_once(ctdb->ev);
+
+ /* back to standard buffer size */
+ assert(queue->buffer.size == queue->buffer_size);
+
+ TALLOC_FREE(ctdb);
+}
+
+int main(int argc, const char **argv)
+{
+ int num;
+
+ if (argc != 2) {
+ fprintf(stderr, "%s <testnum>\n", argv[0]);
+ exit(1);
+ }
+
+
+ num = atoi(argv[1]);
+ switch (num) {
+ case 1:
+ test1();
+ break;
+
+ case 2:
+ test2();
+ break;
+
+ case 3:
+ test3();
+ break;
+
+ case 4:
+ test4();
+ break;
+
+ default:
+ fprintf(stderr, "Unknown test number %s\n", argv[1]);
+ }
+
+ return 0;
+}
diff --git a/ctdb/tests/src/ctdb_packet_parse.c b/ctdb/tests/src/ctdb_packet_parse.c
new file mode 100644
index 0000000..0b99b34
--- /dev/null
+++ b/ctdb/tests/src/ctdb_packet_parse.c
@@ -0,0 +1,136 @@
+/*
+ CTDB protocol parser
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/locale.h"
+
+#include <talloc.h>
+#include <tdb.h>
+
+#include "protocol/protocol.h"
+#include "protocol/protocol_api.h"
+
+static TDB_DATA strace_parser(char *buf, TALLOC_CTX *mem_ctx)
+{
+ TDB_DATA data;
+ size_t i = 0, j = 0;
+
+ data.dptr = talloc_size(mem_ctx, strlen(buf));
+ if (data.dptr == NULL) {
+ return tdb_null;
+ }
+
+ while (i < strlen(buf)) {
+ if (buf[i] == '\\') {
+ /* first char after '\' is a digit or other escape */
+ if (isdigit(buf[i+1])) {
+ char tmp[4] = { '\0', '\0', '\0', '\0' };
+
+ tmp[0] = buf[i+1];
+ if (isdigit(buf[i+2])) {
+ tmp[1] = buf[i+2];
+ if (isdigit(buf[i+3])) {
+ tmp[2] = buf[i+3];
+ i += 4;
+ } else {
+ i += 3;
+ }
+ } else {
+ i += 2;
+ }
+ data.dptr[j] = strtol(tmp, NULL, 8);
+ } else if (buf[i+1] == 'a') {
+ data.dptr[j] = 7;
+ i += 2;
+ } else if (buf[i+1] == 'b') {
+ data.dptr[j] = 8;
+ i += 2;
+ } else if (buf[i+1] == 't') {
+ data.dptr[j] = 9;
+ i += 2;
+ } else if (buf[i+1] == 'n') {
+ data.dptr[j] = 10;
+ i += 2;
+ } else if (buf[i+1] == 'v') {
+ data.dptr[j] = 11;
+ i += 2;
+ } else if (buf[i+1] == 'f') {
+ data.dptr[j] = 12;
+ i += 2;
+ } else if (buf[i+1] == 'r') {
+ data.dptr[j] = 13;
+ i += 2;
+ } else {
+ fprintf(stderr,
+ "Unknown escape \\%c\n",
+ buf[i+1]);
+ data.dptr[j] = 0;
+ }
+
+ j += 1;
+ } else if (buf[i] == '\n') {
+ i += 1;
+ } else if (buf[i] == '\0') {
+ break;
+ } else {
+ data.dptr[j] = buf[i];
+ i += 1;
+ j += 1;
+ }
+ }
+
+ data.dsize = j;
+
+ return data;
+}
+
+int main(int argc, char *argv[])
+{
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+ char line[1024];
+ char *ptr;
+ TDB_DATA (*parser)(char *, TALLOC_CTX *);
+
+ if (argc != 2) {
+ fprintf(stderr, "Usage: %s strace\n", argv[0]);
+ exit(1);
+ }
+
+ if (strcmp(argv[1], "strace") == 0) {
+ parser = strace_parser;
+ } else {
+ fprintf(stderr, "Unknown input format - %s\n", argv[1]);
+ exit(1);
+ }
+
+ while ((ptr = fgets(line, sizeof(line), stdin)) != NULL) {
+ TDB_DATA data;
+
+ data = parser(ptr, mem_ctx);
+ if (data.dptr == NULL) {
+ continue;
+ }
+
+ ctdb_packet_print(data.dptr, data.dsize, stdout);
+ TALLOC_FREE(data.dptr);
+ }
+
+ return 0;
+}
diff --git a/ctdb/tests/src/ctdb_takeover_tests.c b/ctdb/tests/src/ctdb_takeover_tests.c
new file mode 100644
index 0000000..ad7d7ee
--- /dev/null
+++ b/ctdb/tests/src/ctdb_takeover_tests.c
@@ -0,0 +1,281 @@
+/*
+ Tests for ctdb_takeover.c
+
+ Copyright (C) Martin Schwenke 2011
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <assert.h>
+#include <talloc.h>
+
+#include "lib/util/debug.h"
+
+#include "protocol/protocol.h"
+#include "protocol/protocol_util.h"
+#include "common/logging.h"
+#include "common/system.h"
+
+#include "server/ipalloc.h"
+
+#include "ipalloc_read_known_ips.h"
+
+static void print_ctdb_public_ip_list(TALLOC_CTX *mem_ctx,
+ struct public_ip_list * ips)
+{
+ while (ips) {
+ printf("%s %d\n",
+ ctdb_sock_addr_to_string(mem_ctx, &(ips->addr), false),
+ ips->pnn);
+ ips = ips->next;
+ }
+}
+
+static uint32_t *get_tunable_values(TALLOC_CTX *tmp_ctx,
+ int numnodes,
+ const char *tunable);
+static enum ctdb_runstate *get_runstate(TALLOC_CTX *tmp_ctx,
+ int numnodes);
+
+static void read_ctdb_public_ip_info(TALLOC_CTX *ctx,
+ int numnodes,
+ bool multi,
+ struct ctdb_public_ip_list ** known,
+ struct ctdb_public_ip_list ** avail)
+{
+ int n;
+ enum ctdb_runstate *runstate;
+
+ *known = ipalloc_read_known_ips(ctx, numnodes, multi);
+ assert(*known != NULL);
+
+ *avail = talloc_zero_array(ctx, struct ctdb_public_ip_list,
+ numnodes);
+ assert(*avail != NULL);
+
+ runstate = get_runstate(ctx, numnodes);
+ for (n = 0; n < numnodes; n++) {
+ if (runstate[n] == CTDB_RUNSTATE_RUNNING) {
+ (*avail)[n] = (*known)[n];
+ }
+ }
+}
+
+static uint32_t *get_tunable_values(TALLOC_CTX *tmp_ctx,
+ int numnodes,
+ const char *tunable)
+{
+ int i;
+ char *tok;
+ uint32_t *tvals = talloc_zero_array(tmp_ctx, uint32_t, numnodes);
+ char *t = getenv(tunable);
+
+ if (t == NULL) {
+ return tvals;
+ }
+
+ if (strcmp(t, "1") == 0) {
+ for (i = 0; i < numnodes; i++) {
+ tvals[i] = 1;
+ }
+ } else {
+ tok = strtok(t, ",");
+ i = 0;
+ while (tok != NULL) {
+ tvals[i] = (uint32_t)strtol(tok, NULL, 0);
+ i++;
+ tok = strtok(NULL, ",");
+ }
+ if (i != numnodes) {
+ fprintf(stderr,
+ "ERROR: Wrong number of values in %s\n",
+ tunable);
+ exit(1);
+ }
+ }
+
+ return tvals;
+}
+
+static enum ctdb_runstate *get_runstate(TALLOC_CTX *tmp_ctx,
+ int numnodes)
+{
+ int i;
+ uint32_t *tvals;
+ enum ctdb_runstate *runstate =
+ talloc_zero_array(tmp_ctx, enum ctdb_runstate, numnodes);
+ char *t = getenv("CTDB_TEST_RUNSTATE");
+
+ if (t == NULL) {
+ for (i=0; i<numnodes; i++) {
+ runstate[i] = CTDB_RUNSTATE_RUNNING;
+ }
+ } else {
+ tvals = get_tunable_values(tmp_ctx, numnodes, "CTDB_TEST_RUNSTATE");
+ for (i=0; i<numnodes; i++) {
+ runstate[i] = (enum ctdb_runstate) tvals[i];
+ }
+ talloc_free(tvals);
+ }
+
+ return runstate;
+}
+
+/* Fake up enough CTDB state to be able to run the IP allocation
+ * algorithm. Usually this sets up some standard state, sets the node
+ * states from the command-line and reads the current IP layout from
+ * stdin.
+ *
+ * However, if read_ips_for_multiple_nodes is true then each node's
+ * idea of the IP layout is read separately from stdin. In this mode
+ * is doesn't make much sense to use read_ctdb_public_ip_info's
+ * optional ALLOWED_PNN,... list in the input, since each node is
+ * being handled separately anyway. IPs for each node are separated
+ * by a blank line. This mode is for testing weird behaviours where
+ * the IP layouts differs across nodes and we want to improve
+ * create_merged_ip_list(), so should only be used in tests of
+ * ipalloc(). Yes, it is a hack... :-)
+ */
+static void ctdb_test_init(TALLOC_CTX *mem_ctx,
+ const char nodestates[],
+ struct ipalloc_state **ipalloc_state,
+ bool read_ips_for_multiple_nodes)
+{
+ struct ctdb_public_ip_list *known;
+ struct ctdb_public_ip_list *avail;
+ char *tok, *ns;
+ const char *t;
+ struct ctdb_node_map *nodemap;
+ uint32_t noiptakeover;
+ ctdb_sock_addr sa_zero = { .ip = { 0 } };
+ enum ipalloc_algorithm algorithm;
+ uint32_t n;
+
+ /* Avoid that const */
+ ns = talloc_strdup(mem_ctx, nodestates);
+
+ nodemap = talloc_zero(mem_ctx, struct ctdb_node_map);
+ assert(nodemap != NULL);
+ nodemap->num = 0;
+ tok = strtok(ns, ",");
+ while (tok != NULL) {
+ n = nodemap->num;
+ nodemap->node = talloc_realloc(nodemap, nodemap->node,
+ struct ctdb_node_and_flags, n+1);
+ nodemap->node[n].pnn = n;
+ nodemap->node[n].flags = (uint32_t) strtol(tok, NULL, 0);
+ nodemap->node[n].addr = sa_zero;
+ nodemap->num++;
+ tok = strtok(NULL, ",");
+ }
+
+ algorithm = IPALLOC_LCP2;
+ if ((t = getenv("CTDB_IP_ALGORITHM"))) {
+ if (strcmp(t, "lcp2") == 0) {
+ algorithm = IPALLOC_LCP2;
+ } else if (strcmp(t, "nondet") == 0) {
+ algorithm = IPALLOC_NONDETERMINISTIC;
+ } else if (strcmp(t, "det") == 0) {
+ algorithm = IPALLOC_DETERMINISTIC;
+ } else {
+ DEBUG(DEBUG_ERR,
+ ("ERROR: unknown IP algorithm %s\n", t));
+ exit(1);
+ }
+ }
+
+ t = getenv("CTDB_SET_NoIPTakeover");
+ if (t != NULL) {
+ noiptakeover = (uint32_t) strtol(t, NULL, 0);
+ } else {
+ noiptakeover = 0;
+ }
+
+ *ipalloc_state = ipalloc_state_init(mem_ctx, nodemap->num,
+ algorithm,
+ (noiptakeover != 0),
+ false,
+ NULL);
+ assert(*ipalloc_state != NULL);
+
+ read_ctdb_public_ip_info(mem_ctx, nodemap->num,
+ read_ips_for_multiple_nodes,
+ &known, &avail);
+
+ /* Drop available IPs for INACTIVE/DISABLED nodes */
+ for (n = 0; n < nodemap->num; n++) {
+ uint32_t flags = nodemap->node[n].flags;
+ if ((flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED)) != 0) {
+ avail[n].num = 0;
+ }
+ }
+
+ ipalloc_set_public_ips(*ipalloc_state, known, avail);
+}
+
+/* IP layout is read from stdin. See comment for ctdb_test_init() for
+ * explanation of read_ips_for_multiple_nodes.
+ */
+static void ctdb_test_ipalloc(const char nodestates[],
+ bool read_ips_for_multiple_nodes)
+{
+ TALLOC_CTX *tmp_ctx = talloc_new(NULL);
+ struct ipalloc_state *ipalloc_state;
+
+ ctdb_test_init(tmp_ctx, nodestates, &ipalloc_state,
+ read_ips_for_multiple_nodes);
+
+ print_ctdb_public_ip_list(tmp_ctx, ipalloc(ipalloc_state));
+
+ talloc_free(tmp_ctx);
+}
+
+static void usage(void)
+{
+ fprintf(stderr, "usage: ctdb_takeover_tests <op>\n");
+ exit(1);
+}
+
+int main(int argc, const char *argv[])
+{
+ int loglevel;
+ const char *debuglevelstr = getenv("CTDB_TEST_LOGLEVEL");
+
+ setup_logging("ctdb_takeover_tests", DEBUG_STDERR);
+
+ if (! debug_level_parse(debuglevelstr, &loglevel)) {
+ loglevel = DEBUG_DEBUG;
+ }
+ debuglevel_set(loglevel);
+
+ if (argc < 2) {
+ usage();
+ }
+
+ if (argc == 3 &&
+ strcmp(argv[1], "ipalloc") == 0) {
+ ctdb_test_ipalloc(argv[2], false);
+ } else if (argc == 4 &&
+ strcmp(argv[1], "ipalloc") == 0 &&
+ strcmp(argv[3], "multi") == 0) {
+ ctdb_test_ipalloc(argv[2], true);
+ } else {
+ usage();
+ }
+
+ return 0;
+}
diff --git a/ctdb/tests/src/db_hash_test.c b/ctdb/tests/src/db_hash_test.c
new file mode 100644
index 0000000..31aa501
--- /dev/null
+++ b/ctdb/tests/src/db_hash_test.c
@@ -0,0 +1,138 @@
+/*
+ db_hash tests
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <assert.h>
+
+#include "common/db_hash.c"
+
+static int record_parser(uint8_t *keybuf, size_t keylen,
+ uint8_t *databuf, size_t datalen,
+ void *private_data)
+{
+ int *count = (int *)private_data;
+
+ (*count) += 1;
+ return 0;
+}
+
+static void do_test(enum db_hash_type type)
+{
+ struct db_hash_context *dh = NULL;
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+ uint8_t key[] = "This is a long key";
+ uint8_t value[] = "This is a long value";
+ int ret;
+ int count = 0;
+
+ ret = db_hash_insert(dh, key, sizeof(key), value, sizeof(value));
+ assert(ret == EINVAL);
+
+ ret = db_hash_add(dh, key, sizeof(key), value, sizeof(value));
+ assert(ret == EINVAL);
+
+ ret = db_hash_exists(dh, key, sizeof(key));
+ assert(ret == EINVAL);
+
+ ret = db_hash_delete(dh, key, sizeof(key));
+ assert(ret == EINVAL);
+
+ ret = db_hash_init(mem_ctx, "foobar", 1024, type, &dh);
+ assert(ret == 0);
+
+ ret = db_hash_insert(dh, key, sizeof(key), value, sizeof(value));
+ assert(ret == 0);
+
+ ret = db_hash_exists(dh, key, sizeof(key));
+ assert(ret == 0);
+
+ ret = db_hash_fetch(dh, key, sizeof(key), NULL, NULL);
+ assert(ret == EINVAL);
+
+ ret = db_hash_fetch(dh, key, sizeof(key), record_parser, &count);
+ assert(ret == 0);
+ assert(count == 1);
+
+ ret = db_hash_insert(dh, key, sizeof(key), value, sizeof(value));
+ assert(ret == EEXIST);
+
+ ret = db_hash_delete(dh, key, sizeof(key));
+ assert(ret == 0);
+
+ ret = db_hash_exists(dh, key, sizeof(key));
+ assert(ret == ENOENT);
+
+ ret = db_hash_delete(dh, key, sizeof(key));
+ assert(ret == ENOENT);
+
+ ret = db_hash_add(dh, key, sizeof(key), key, sizeof(key));
+ assert(ret == 0);
+
+ ret = db_hash_add(dh, key, sizeof(key), value, sizeof(value));
+ assert(ret == 0);
+
+ talloc_free(dh);
+ ret = talloc_get_size(mem_ctx);
+ assert(ret == 0);
+
+ talloc_free(mem_ctx);
+}
+
+static void do_traverse_test(enum db_hash_type type)
+{
+ struct db_hash_context *dh = NULL;
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+ char key[16] = "keyXXXX";
+ char value[] = "This is some test value";
+ int count, ret, i;
+
+ ret = db_hash_traverse(dh, NULL, NULL, &count);
+ assert(ret == EINVAL);
+
+ ret = db_hash_init(mem_ctx, "foobar", 1024, type, &dh);
+ assert(ret == 0);
+
+ for (i=0; i<2000; i++) {
+ sprintf(key, "key%04d", i);
+ ret = db_hash_insert(dh, (uint8_t *)key, sizeof(key),
+ (uint8_t *)value, sizeof(value));
+ assert(ret == 0);
+ }
+
+ ret = db_hash_traverse(dh, NULL, NULL, &count);
+ assert(ret == 0);
+ assert(count == 2000);
+
+ ret = db_hash_traverse(dh, record_parser, &count, NULL);
+ assert(ret == 0);
+ assert(count == 4000);
+
+ talloc_free(dh);
+ talloc_free(mem_ctx);
+}
+
+int main(void)
+{
+ do_test(DB_HASH_SIMPLE);
+ do_test(DB_HASH_COMPLEX);
+ do_traverse_test(DB_HASH_SIMPLE);
+ do_traverse_test(DB_HASH_COMPLEX);
+ return 0;
+}
diff --git a/ctdb/tests/src/db_test_tool.c b/ctdb/tests/src/db_test_tool.c
new file mode 100644
index 0000000..e99da3c
--- /dev/null
+++ b/ctdb/tests/src/db_test_tool.c
@@ -0,0 +1,792 @@
+/*
+ CTDB DB test tool
+
+ Copyright (C) Martin Schwenke 2019
+
+ Parts based on ctdb.c, event_tool.c:
+
+ Copyright (C) Amitay Isaacs 2015, 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/network.h"
+#include "system/time.h"
+
+#include <ctype.h>
+#include <popt.h>
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/debug.h"
+#include "lib/util/sys_rw.h"
+#include "lib/util/util.h"
+#include "lib/util/smb_strtox.h"
+#include "lib/tdb_wrap/tdb_wrap.h"
+
+#include "common/cmdline.h"
+#include "common/logging.h"
+#include "common/path.h"
+#include "common/event_script.h"
+#include "common/system_socket.h"
+
+#include "protocol/protocol.h"
+#include "protocol/protocol_api.h"
+#include "protocol/protocol_util.h"
+
+#include "client/client.h"
+#include "client/client_sync.h"
+
+struct tdb_context *client_db_tdb(struct ctdb_db_context *db);
+
+#define TIMEOUT() tevent_timeval_current_ofs(ctx->timelimit, 0)
+
+struct db_test_tool_context {
+ struct cmdline_context *cmdline;
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ uint32_t destnode;
+ uint32_t timelimit;
+};
+
+/*
+ * If this is ever consolidated into a larger test tool then these
+ * forward declarations can be moved to an include file
+ */
+int db_test_tool_init(TALLOC_CTX *mem_ctx,
+ const char *prog,
+ struct poptOption *options,
+ int argc,
+ const char **argv,
+ bool parse_options,
+ struct db_test_tool_context **result);
+int db_test_tool_run(struct db_test_tool_context *ctx, int *result);
+
+static int db_test_get_lmaster(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct db_test_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct db_test_tool_context);
+ struct ctdb_vnn_map *vnnmap;
+ TDB_DATA key;
+ uint32_t idx, lmaster;
+ unsigned int hash;
+ int ret = 0;
+
+ if (argc != 1) {
+ cmdline_usage(ctx->cmdline, "get-lmaster");
+ return 1;
+ }
+
+ ret = ctdb_ctrl_getvnnmap(mem_ctx,
+ ctx->ev,
+ ctx->client,
+ CTDB_CURRENT_NODE,
+ TIMEOUT(),
+ &vnnmap);
+ if (ret != 0) {
+ D_ERR("Control GETVNN_MAP failed, ret=%d\n", ret);
+ return ret;
+ }
+
+ key.dsize = strlen(argv[0]);
+ key.dptr = (uint8_t *)discard_const(argv[0]);
+
+ hash = tdb_jenkins_hash(&key);
+ idx = hash % vnnmap->size;
+ lmaster = vnnmap->map[idx];
+
+ printf("%"PRId32"\n", lmaster);
+
+ return 0;
+}
+
+static struct ctdb_dbid *db_find(TALLOC_CTX *mem_ctx,
+ struct db_test_tool_context *ctx,
+ struct ctdb_dbid_map *dbmap,
+ const char *db_name)
+{
+ struct ctdb_dbid *db = NULL;
+ const char *name;
+ unsigned int i;
+ int ret;
+
+ for (i=0; i<dbmap->num; i++) {
+ ret = ctdb_ctrl_get_dbname(mem_ctx,
+ ctx->ev,
+ ctx->client,
+ ctx->destnode,
+ TIMEOUT(),
+ dbmap->dbs[i].db_id,
+ &name);
+ if (ret != 0) {
+ return NULL;
+ }
+
+ if (strcmp(db_name, name) == 0) {
+ talloc_free(discard_const(name));
+ db = &dbmap->dbs[i];
+ break;
+ }
+ }
+
+ return db;
+}
+
+static bool db_exists(TALLOC_CTX *mem_ctx,
+ struct db_test_tool_context *ctx,
+ const char *db_arg,
+ uint32_t *db_id,
+ const char **db_name,
+ uint8_t *db_flags)
+{
+ struct ctdb_dbid_map *dbmap;
+ struct ctdb_dbid *db = NULL;
+ uint32_t id = 0;
+ const char *name = NULL;
+ unsigned int i;
+ int ret = 0;
+
+ ret = ctdb_ctrl_get_dbmap(mem_ctx,
+ ctx->ev,
+ ctx->client,
+ ctx->destnode,
+ TIMEOUT(),
+ &dbmap);
+ if (ret != 0) {
+ return false;
+ }
+
+ if (strncmp(db_arg, "0x", 2) == 0) {
+ id = smb_strtoul(db_arg, NULL, 0, &ret, SMB_STR_STANDARD);
+ if (ret != 0) {
+ return false;
+ }
+ for (i=0; i<dbmap->num; i++) {
+ if (id == dbmap->dbs[i].db_id) {
+ db = &dbmap->dbs[i];
+ break;
+ }
+ }
+ } else {
+ name = db_arg;
+ db = db_find(mem_ctx, ctx, dbmap, name);
+ }
+
+ if (db == NULL) {
+ fprintf(stderr, "No database matching '%s' found\n", db_arg);
+ return false;
+ }
+
+ if (name == NULL) {
+ ret = ctdb_ctrl_get_dbname(mem_ctx,
+ ctx->ev,
+ ctx->client,
+ ctx->destnode,
+ TIMEOUT(),
+ id,
+ &name);
+ if (ret != 0) {
+ return false;
+ }
+ }
+
+ if (db_id != NULL) {
+ *db_id = db->db_id;
+ }
+ if (db_name != NULL) {
+ *db_name = talloc_strdup(mem_ctx, name);
+ }
+ if (db_flags != NULL) {
+ *db_flags = db->flags;
+ }
+ return true;
+}
+
+static int db_test_fetch_local_delete(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct db_test_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct db_test_tool_context);
+ struct ctdb_db_context *db = NULL;
+ struct ctdb_record_handle *h = NULL;
+ struct tdb_context *tdb;
+ struct ctdb_ltdb_header header;
+ const char *db_name;
+ TDB_DATA key, data;
+ uint32_t db_id;
+ uint8_t db_flags;
+ size_t len;
+ uint8_t *buf;
+ size_t np;
+ int ret;
+
+ if (argc != 2) {
+ cmdline_usage(ctx->cmdline, "fetch-local-delete");
+ return 1;
+ }
+
+ if (! db_exists(mem_ctx, ctx, argv[0], &db_id, &db_name, &db_flags)) {
+ return ENOENT;
+ }
+
+ if (db_flags & (CTDB_DB_FLAGS_PERSISTENT | CTDB_DB_FLAGS_REPLICATED)) {
+ D_ERR("DB %s is not a volatile database\n", db_name);
+ return EINVAL;
+ }
+
+ ret = ctdb_attach(ctx->ev,
+ ctx->client,
+ TIMEOUT(),
+ db_name,
+ db_flags,
+ &db);
+ if (ret != 0) {
+ D_ERR("Failed to attach to DB %s\n", db_name);
+ return ret;
+ }
+
+ key.dsize = strlen(argv[1]);
+ key.dptr = (uint8_t *)discard_const(argv[1]);
+
+ ret = ctdb_fetch_lock(mem_ctx,
+ ctx->ev,
+ ctx->client,
+ db,
+ key,
+ false,
+ &h,
+ &header,
+ NULL);
+ if (ret != 0) {
+ D_ERR("Failed to fetch record for key %s\n", argv[1]);
+ goto done;
+ }
+
+ len = ctdb_ltdb_header_len(&header);
+ buf = talloc_size(mem_ctx, len);
+ if (buf == NULL) {
+ D_ERR("Memory allocation error\n");
+ ret = ENOMEM;
+ goto done;
+ }
+
+ ctdb_ltdb_header_push(&header, buf, &np);
+
+ data.dsize = np;
+ data.dptr = buf;
+
+ tdb = client_db_tdb(db);
+
+ ret = tdb_store(tdb, key, data, TDB_REPLACE);
+ TALLOC_FREE(buf);
+ if (ret != 0) {
+ D_ERR("fetch_lock delete: %s tdb_store failed, %s\n",
+ db_name,
+ tdb_errorstr(tdb));
+ }
+
+done:
+ TALLOC_FREE(h);
+
+ return ret;
+}
+
+#define ISASCII(x) (isprint(x) && ! strchr("\"\\", (x)))
+
+static void dump(const char *name, uint8_t *dptr, size_t dsize)
+{
+ size_t i;
+
+ fprintf(stdout, "%s(%zu) = \"", name, dsize);
+ for (i = 0; i < dsize; i++) {
+ if (ISASCII(dptr[i])) {
+ fprintf(stdout, "%c", dptr[i]);
+ } else {
+ fprintf(stdout, "\\%02X", dptr[i]);
+ }
+ }
+ fprintf(stdout, "\"\n");
+}
+
+static void dump_ltdb_header(struct ctdb_ltdb_header *header)
+{
+ fprintf(stdout, "dmaster: %u\n", header->dmaster);
+ fprintf(stdout, "rsn: %" PRIu64 "\n", header->rsn);
+ fprintf(stdout, "flags: 0x%08x", header->flags);
+ if (header->flags & CTDB_REC_FLAG_MIGRATED_WITH_DATA) {
+ fprintf(stdout, " MIGRATED_WITH_DATA");
+ }
+ if (header->flags & CTDB_REC_FLAG_VACUUM_MIGRATED) {
+ fprintf(stdout, " VACUUM_MIGRATED");
+ }
+ if (header->flags & CTDB_REC_FLAG_AUTOMATIC) {
+ fprintf(stdout, " AUTOMATIC");
+ }
+ if (header->flags & CTDB_REC_RO_HAVE_DELEGATIONS) {
+ fprintf(stdout, " RO_HAVE_DELEGATIONS");
+ }
+ if (header->flags & CTDB_REC_RO_HAVE_READONLY) {
+ fprintf(stdout, " RO_HAVE_READONLY");
+ }
+ if (header->flags & CTDB_REC_RO_REVOKING_READONLY) {
+ fprintf(stdout, " RO_REVOKING_READONLY");
+ }
+ if (header->flags & CTDB_REC_RO_REVOKE_COMPLETE) {
+ fprintf(stdout, " RO_REVOKE_COMPLETE");
+ }
+ fprintf(stdout, "\n");
+
+}
+
+static int db_test_local_lock(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct db_test_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct db_test_tool_context);
+ struct ctdb_db_context *db;
+ const char *db_name;
+ int pipefd[2];
+ TDB_DATA key;
+ uint32_t db_id;
+ uint8_t db_flags;
+ pid_t pid;
+ int ret;
+
+ if (argc != 2) {
+ cmdline_usage(ctx->cmdline, "local-lock");
+ return 1;
+ }
+
+
+ if (! db_exists(mem_ctx, ctx, argv[0], &db_id, &db_name, &db_flags)) {
+ D_ERR("DB %s not attached\n", db_name);
+ return 1;
+ }
+
+ if (db_flags & (CTDB_DB_FLAGS_PERSISTENT | CTDB_DB_FLAGS_REPLICATED)) {
+ D_ERR("DB %s is not a volatile database\n", db_name);
+ return 1;
+ }
+
+ ret = ctdb_attach(ctx->ev,
+ ctx->client,
+ TIMEOUT(),
+ db_name,
+ db_flags,
+ &db);
+ if (ret != 0) {
+ D_ERR("Failed to attach to DB %s\n", db_name);
+ return 1;
+ }
+
+ ret = pipe(pipefd);
+ if (ret != 0) {
+ DBG_ERR("Failed to create pipe\n");
+ return 1;
+ }
+
+ pid = fork();
+ if (pid < 0) {
+ DBG_ERR("Failed to fork()\n");
+ return 1;
+ }
+
+ if (pid != 0) {
+ ssize_t nread;
+ int status;
+
+ close(pipefd[1]);
+
+ nread = sys_read(pipefd[0], &status, sizeof(status));
+ if (nread < 0 || (size_t)nread != sizeof(status)) {
+ status = EINVAL;
+ }
+
+ if (status == 0) {
+ printf("OK %d\n", pid);
+ } else {
+ printf("FAIL %d\n", status);
+ }
+ fflush(stdout);
+
+ return status;
+ }
+
+ close(pipefd[0]);
+
+ key.dsize = strlen(argv[1]);
+ key.dptr = (uint8_t *)discard_const(argv[1]);
+
+ ret = tdb_chainlock(client_db_tdb(db), key);
+ if (ret != 0) {
+ D_ERR("Failed to lock chain for key %s\n", argv[1]);
+ goto fail;
+ }
+
+ sys_write(pipefd[1], &ret, sizeof(ret));
+
+ fclose(stdin);
+ fclose(stdout);
+ fclose(stderr);
+
+ /* Hold the lock- the caller should SIGTERM to release the lock */
+ sleep(120);
+ exit(1);
+
+fail:
+ sys_write(pipefd[1], &ret, sizeof(ret));
+ return ret;
+}
+
+static int db_test_local_read(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct db_test_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct db_test_tool_context);
+ struct ctdb_db_context *db;
+ struct ctdb_ltdb_header header;
+ const char *db_name;
+ TDB_DATA key, data;
+ uint32_t db_id;
+ uint8_t db_flags;
+ size_t np;
+ int ret;
+
+ if (argc != 2) {
+ cmdline_usage(ctx->cmdline, "local-read");
+ return 1;
+ }
+
+ if (! db_exists(mem_ctx, ctx, argv[0], &db_id, &db_name, &db_flags)) {
+ return ENOENT;
+ }
+
+ if (db_flags & (CTDB_DB_FLAGS_PERSISTENT | CTDB_DB_FLAGS_REPLICATED)) {
+ D_ERR("DB %s is not a volatile database\n", db_name);
+ return EINVAL;
+ }
+
+ ret = ctdb_attach(ctx->ev,
+ ctx->client,
+ TIMEOUT(),
+ db_name,
+ db_flags,
+ &db);
+ if (ret != 0) {
+ D_ERR("Failed to attach to DB %s\n", db_name);
+ return ret;
+ }
+
+ key.dsize = strlen(argv[1]);
+ key.dptr = (uint8_t *)discard_const(argv[1]);
+
+ data = tdb_fetch(client_db_tdb(db), key);
+
+ if (data.dptr == NULL) {
+ D_ERR("No record for key %s\n", argv[1]);
+ return 1;
+ }
+
+ if (data.dsize < sizeof(struct ctdb_ltdb_header)) {
+ D_ERR("Invalid record for key %s\n", argv[1]);
+ free(data.dptr);
+ return 1;
+ }
+
+ ret = ctdb_ltdb_header_pull(data.dptr, data.dsize, &header, &np);
+ if (ret != 0) {
+ D_ERR("Failed to parse header from data\n");
+ free(data.dptr);
+ return 1;
+ }
+
+ dump_ltdb_header(&header);
+ dump("data", data.dptr + np, data.dsize - np);
+
+ free(data.dptr);
+
+ return 0;
+}
+
+static int db_test_vacuum(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct db_test_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct db_test_tool_context);
+ struct ctdb_db_vacuum db_vacuum;
+ struct ctdb_req_control request;
+ struct ctdb_reply_control *reply;
+ const char *db_arg;
+ uint32_t db_id;
+ const char *db_name;
+ uint8_t db_flags;
+ int ret = 0;
+
+ if (argc != 1 && argc != 2) {
+ cmdline_usage(ctx->cmdline, "vacuum");
+ return 1;
+ }
+
+ db_arg = argv[0];
+
+ db_vacuum.full_vacuum_run = false;
+ if (argc == 2) {
+ if (strcmp(argv[1], "full") == 0) {
+ db_vacuum.full_vacuum_run = true;
+ } else {
+ cmdline_usage(ctx->cmdline, "vacuum");
+ return 1;
+ }
+ }
+
+ if (! db_exists(mem_ctx, ctx, db_arg, &db_id, &db_name, &db_flags)) {
+ return ENOENT;
+ }
+
+ if (db_flags & (CTDB_DB_FLAGS_PERSISTENT | CTDB_DB_FLAGS_REPLICATED)) {
+ D_ERR("DB %s is not a volatile database\n", db_name);
+ return EINVAL;
+ }
+
+ db_vacuum.db_id = db_id;
+
+ ctdb_req_control_db_vacuum(&request, &db_vacuum);
+
+ ret = ctdb_client_control(mem_ctx,
+ ctx->ev,
+ ctx->client,
+ ctx->destnode,
+ TIMEOUT(),
+ &request,
+ &reply);
+ if (ret != 0) {
+ D_ERR("Control DB_VACUUM failed to node %u, ret=%d\n",
+ ctx->destnode,
+ ret);
+ return ret;
+ }
+
+
+ ret = ctdb_reply_control_db_vacuum(reply);
+ if (ret != 0) {
+ D_ERR("Control DB_VACUUM failed, ret=%d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+struct cmdline_command db_test_commands[] = {
+ {
+ .name = "get-lmaster",
+ .fn = db_test_get_lmaster,
+ .msg_help = "Print lmaster for key",
+ .msg_args = "<key>"
+ },
+ {
+ .name = "fetch-local-delete",
+ .fn = db_test_fetch_local_delete,
+ .msg_help = "Fetch record and delete from local database",
+ .msg_args = "<dbname|dbid> <key>"
+ },
+ {
+ .name = "local-lock",
+ .fn = db_test_local_lock,
+ .msg_help = "Lock a record in a local database",
+ .msg_args = "<dbname|dbid> <key>"
+ },
+ {
+ .name = "local-read",
+ .fn = db_test_local_read,
+ .msg_help = "Read a record from local database",
+ .msg_args = "<dbname|dbid> <key>"
+ },
+ {
+ .name = "vacuum",
+ .fn = db_test_vacuum,
+ .msg_help = "Vacuum a database",
+ .msg_args = "<dbname|dbid> [full]"
+ },
+ CMDLINE_TABLEEND
+};
+
+int db_test_tool_init(TALLOC_CTX *mem_ctx,
+ const char *prog,
+ struct poptOption *options,
+ int argc,
+ const char **argv,
+ bool parse_options,
+ struct db_test_tool_context **result)
+{
+ struct db_test_tool_context *ctx;
+ int ret;
+
+ ctx = talloc_zero(mem_ctx, struct db_test_tool_context);
+ if (ctx == NULL) {
+ D_ERR("Memory allocation error\n");
+ return ENOMEM;
+ }
+
+ ret = cmdline_init(mem_ctx,
+ prog,
+ options,
+ NULL,
+ db_test_commands,
+ &ctx->cmdline);
+ if (ret != 0) {
+ D_ERR("Failed to initialize cmdline, ret=%d\n", ret);
+ talloc_free(ctx);
+ return ret;
+ }
+
+ ret = cmdline_parse(ctx->cmdline, argc, argv, parse_options);
+ if (ret != 0) {
+ cmdline_usage(ctx->cmdline, NULL);
+ talloc_free(ctx);
+ return ret;
+ }
+
+ *result = ctx;
+ return 0;
+}
+
+int db_test_tool_run(struct db_test_tool_context *ctx, int *result)
+{
+ char *ctdb_socket;
+ int ret;
+
+ ctx->ev = tevent_context_init(ctx);
+ if (ctx->ev == NULL) {
+ D_ERR("Failed to initialize tevent\n");
+ return ENOMEM;
+ }
+
+ ctdb_socket = path_socket(ctx, "ctdbd");
+ if (ctdb_socket == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ return ENOMEM;
+ }
+
+ ret = ctdb_client_init(ctx, ctx->ev, ctdb_socket, &ctx->client);
+ if (ret != 0) {
+ D_ERR("Failed to connect to CTDB daemon (%s)\n", ctdb_socket);
+ return ret;
+ }
+
+ ret = cmdline_run(ctx->cmdline, ctx, result);
+ return ret;
+}
+
+#ifdef CTDB_DB_TEST_TOOL
+
+static struct {
+ const char *debug;
+ int destnode;
+ int timelimit;
+} db_test_data = {
+ .debug = "ERROR",
+ .destnode = CTDB_CURRENT_NODE,
+ .timelimit = 60,
+};
+
+struct poptOption db_test_options[] = {
+ {
+ .longName = "debug",
+ .shortName = 'd',
+ .argInfo = POPT_ARG_STRING,
+ .arg = &db_test_data.debug,
+ .val = 0,
+ .descrip = "debug level",
+ .argDescrip = "ERROR|WARNING|NOTICE|INFO|DEBUG"
+ },
+ {
+ .longName = "node",
+ .shortName = 'n',
+ .argInfo = POPT_ARG_INT,
+ .arg = &db_test_data.destnode,
+ .val = 0,
+ .descrip = "node number",
+ .argDescrip = "NUM"
+ },
+ {
+ .longName = "timelimit",
+ .shortName = 't',
+ .argInfo = POPT_ARG_INT,
+ .arg = &db_test_data.timelimit,
+ .val = 0,
+ .descrip = "control time limit",
+ .argDescrip = "SECONDS"
+ },
+ POPT_TABLEEND
+};
+
+int main(int argc, const char **argv)
+{
+ TALLOC_CTX *mem_ctx;
+ struct db_test_tool_context *ctx;
+ int ret, result = 0;
+ int level;
+ bool ok;
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ret = db_test_tool_init(mem_ctx,
+ "ctdb-db-test",
+ db_test_options,
+ argc,
+ argv,
+ true,
+ &ctx);
+ if (ret != 0) {
+ talloc_free(mem_ctx);
+ exit(1);
+ }
+
+ setup_logging("ctdb-db-test", DEBUG_STDERR);
+ ok = debug_level_parse(db_test_data.debug, &level);
+ if (!ok) {
+ level = DEBUG_ERR;
+ }
+ debuglevel_set(level);
+
+ ctx->destnode = db_test_data.destnode;
+ ctx->timelimit = db_test_data.timelimit;
+
+ ret = db_test_tool_run(ctx, &result);
+ if (ret != 0) {
+ result = ret;
+ }
+
+ talloc_free(mem_ctx);
+ exit(result);
+}
+
+#endif /* CTDB_DB_TEST_TOOL */
diff --git a/ctdb/tests/src/dummy_client.c b/ctdb/tests/src/dummy_client.c
new file mode 100644
index 0000000..13e0691
--- /dev/null
+++ b/ctdb/tests/src/dummy_client.c
@@ -0,0 +1,163 @@
+/*
+ Dummy CTDB client for testing
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <popt.h>
+#include <talloc.h>
+#include <tevent.h>
+
+#include "common/logging.h"
+#include "common/path.h"
+
+#include "client/client.h"
+
+static struct {
+ const char *sockpath;
+ const char *debuglevel;
+ int num_connections;
+ int timelimit;
+ const char *srvidstr;
+} options;
+
+static struct poptOption cmdline_options[] = {
+ POPT_AUTOHELP
+ { "socket", 's', POPT_ARG_STRING, &options.sockpath, 0,
+ "Unix domain socket path", "filename" },
+ { "debug", 'd', POPT_ARG_STRING, &options.debuglevel, 0,
+ "debug level", "ERR|WARNING|NOTICE|INFO|DEBUG" } ,
+ { "nconn", 'n', POPT_ARG_INT, &options.num_connections, 0,
+ "number of connections", "" },
+ { "timelimit", 't', POPT_ARG_INT, &options.timelimit, 0,
+ "time limit", "seconds" },
+ { "srvid", 'S', POPT_ARG_STRING, &options.srvidstr, 0,
+ "srvid to register", "srvid" },
+ POPT_TABLEEND
+};
+
+static void dummy_handler(uint64_t srvid, TDB_DATA data, void *private_data)
+{
+ bool *done = (bool *)private_data;
+
+ *done = true;
+}
+
+int main(int argc, const char *argv[])
+{
+ TALLOC_CTX *mem_ctx;
+ struct tevent_context *ev;
+ struct ctdb_client_context **client;
+ struct ctdb_client_context *last_client;
+ poptContext pc;
+ int opt, ret, i;
+ int log_level;
+ bool status, done;
+
+ /* Set default options */
+ options.sockpath = NULL;
+ options.debuglevel = "ERR";
+ options.num_connections = 1;
+ options.timelimit = 60;
+ options.srvidstr = NULL;
+
+ pc = poptGetContext(argv[0], argc, argv, cmdline_options,
+ POPT_CONTEXT_KEEP_FIRST);
+ while ((opt = poptGetNextOpt(pc)) != -1) {
+ fprintf(stderr, "Invalid option %s\n", poptBadOption(pc, 0));
+ exit(1);
+ }
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ev = tevent_context_init(mem_ctx);
+ if (ev == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ status = debug_level_parse(options.debuglevel, &log_level);
+ if (! status) {
+ fprintf(stderr, "Invalid debug level\n");
+ poptPrintHelp(pc, stdout, 0);
+ exit(1);
+ }
+
+ setup_logging("dummy_client", DEBUG_STDERR);
+ debuglevel_set(log_level);
+
+ if (options.sockpath == NULL) {
+ options.sockpath = path_socket(mem_ctx, "ctdbd");
+ if (options.sockpath == NULL) {
+ D_ERR("Memory allocation error\n");
+ exit(1);
+ }
+ }
+
+ client = talloc_array(mem_ctx, struct ctdb_client_context *,
+ options.num_connections);
+ if (client == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ for (i=0; i<options.num_connections; i++) {
+ ret = ctdb_client_init(client, ev, options.sockpath,
+ &client[i]);
+ if (ret != 0) {
+ D_ERR("Failed to initialize client %d, ret=%d\n",
+ i, ret);
+ exit(1);
+ }
+ }
+
+ last_client = client[options.num_connections-1];
+
+ done = false;
+ if (options.srvidstr != NULL) {
+ uint64_t srvid;
+
+ srvid = strtoull(options.srvidstr, NULL, 0);
+
+ ret = ctdb_client_set_message_handler(ev, last_client, srvid,
+ dummy_handler, &done);
+ if (ret != 0) {
+ D_ERR("Failed to register srvid, ret=%d\n", ret);
+ talloc_free(client);
+ exit(1);
+ }
+
+ D_INFO("Registered SRVID 0x%"PRIx64"\n", srvid);
+ }
+
+ ret = ctdb_client_wait_timeout(ev, &done,
+ tevent_timeval_current_ofs(options.timelimit, 0));
+ if (ret != 0 && ret == ETIMEDOUT) {
+ D_ERR("client_wait_timeout() failed, ret=%d\n", ret);
+ talloc_free(client);
+ exit(1);
+ }
+
+ talloc_free(mem_ctx);
+ exit(0);
+}
diff --git a/ctdb/tests/src/errcode.c b/ctdb/tests/src/errcode.c
new file mode 100644
index 0000000..7343e81
--- /dev/null
+++ b/ctdb/tests/src/errcode.c
@@ -0,0 +1,189 @@
+/*
+ Portability layer for error codes
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+ * These errors are as listed in POSIX standard
+ * IEEE Std 1003.1-2017 (Revision of IEEE Std 1003.1-2008)
+ *
+ * Error codes marked obsolete are removed (ENODATA, ENOSR, ENOSTR, ETIME)
+ */
+
+#include "replace.h"
+
+struct {
+ const char *label;
+ int code;
+} err_codes[] = {
+ { "E2BIG", E2BIG },
+
+ { "EACCES", EACCES },
+ { "EADDRINUSE", EADDRINUSE },
+ { "EADDRNOTAVAIL", EADDRNOTAVAIL },
+ { "EAFNOSUPPORT", EAFNOSUPPORT },
+ { "EAGAIN", EAGAIN },
+ { "EALREADY", EALREADY },
+
+ { "EBADF", EBADF },
+ { "EBADMSG", EBADMSG },
+ { "EBUSY", EBUSY },
+
+ { "ECANCELED", ECANCELED },
+ { "ECHILD", ECHILD },
+ { "ECONNABORTED", ECONNABORTED },
+ { "ECONNREFUSED", ECONNREFUSED },
+ { "ECONNRESET", ECONNRESET },
+
+ { "EDEADLK", EDEADLK },
+ { "EDESTADDRREQ", EDESTADDRREQ },
+ { "EDOM", EDOM },
+ { "EDQUOT", EDQUOT },
+
+ { "EEXIST", EEXIST },
+
+ { "EFAULT", EFAULT },
+ { "EFBIG", EFBIG },
+
+ { "EHOSTUNREACH", EHOSTUNREACH },
+
+ { "EIDRM", EIDRM },
+ { "EILSEQ", EILSEQ },
+ { "EINPROGRESS", EINPROGRESS },
+ { "EINTR", EINTR },
+ { "EINVAL", EINVAL },
+ { "EIO", EIO },
+ { "EISCONN", EISCONN },
+ { "EISDIR", EISDIR },
+
+ { "ELOOP", ELOOP },
+
+ { "EMFILE", EMFILE },
+ { "EMLINK", EMLINK },
+ { "EMSGSIZE", EMSGSIZE },
+ { "EMULTIHOP", EMULTIHOP },
+
+ { "ENAMETOOLONG", ENAMETOOLONG },
+ { "ENETDOWN", ENETDOWN },
+ { "ENETRESET", ENETRESET },
+ { "ENETUNREACH", ENETUNREACH },
+ { "ENFILE", ENFILE },
+ { "ENOBUFS", ENOBUFS },
+ { "ENODEV", ENODEV },
+ { "ENOENT", ENOENT },
+ { "ENOEXEC", ENOEXEC },
+ { "ENOLCK", ENOLCK },
+ { "ENOLINK", ENOLINK },
+ { "ENOMEM", ENOMEM },
+ { "ENOMSG", ENOMSG },
+ { "ENOPROTOOPT", ENOPROTOOPT },
+ { "ENOSPC", ENOSPC },
+ { "ENOSYS", ENOSYS },
+ { "ENOTCONN", ENOTCONN },
+ { "ENOTDIR", ENOTDIR },
+ { "ENOTEMPTY", ENOTEMPTY },
+ { "ENOTSOCK", ENOTSOCK },
+ { "ENOTSUP", ENOTSUP },
+ { "ENOTTY", ENOTTY },
+ { "ENXIO", ENXIO },
+
+ { "EOPNOTSUPP", EOPNOTSUPP },
+ { "EOVERFLOW", EOVERFLOW },
+
+ { "EPERM", EPERM },
+ { "EPIPE", EPIPE },
+ { "EPROTO", EPROTO },
+ { "EPROTONOSUPPORT", EPROTONOSUPPORT },
+ { "EPROTOTYPE", EPROTOTYPE },
+
+ { "ERANGE", ERANGE },
+ { "EROFS", EROFS },
+
+ { "ESPIPE", ESPIPE },
+ { "ESRCH", ESRCH },
+ { "ESTALE", ESTALE },
+
+ { "ETIMEDOUT", ETIMEDOUT },
+ { "ETXTBSY", ETXTBSY },
+
+ { "EWOULDBLOCK", EWOULDBLOCK },
+
+ { "EXDEV", EXDEV },
+};
+
+static void dump(void)
+{
+ size_t i;
+
+ for (i=0; i<ARRAY_SIZE(err_codes); i++) {
+ printf("%s %d\n", err_codes[i].label, err_codes[i].code);
+ }
+}
+
+static void match_label(const char *str)
+{
+ int code = -1;
+ size_t i;
+
+ for (i=0; i<ARRAY_SIZE(err_codes); i++) {
+ if (strcasecmp(err_codes[i].label, str) == 0) {
+ code = err_codes[i].code;
+ break;
+ }
+ }
+
+ printf("%d\n", code);
+}
+
+static void match_code(int code)
+{
+ const char *label = "UNKNOWN";
+ size_t i;
+
+ for (i=0; i<ARRAY_SIZE(err_codes); i++) {
+ if (err_codes[i].code == code) {
+ label = err_codes[i].label;
+ break;
+ }
+ }
+
+ printf("%s\n", label);
+}
+
+int main(int argc, const char **argv)
+{
+ long int code;
+ char *endptr;
+
+ if (argc != 2) {
+ fprintf(stderr, "Usage: %s dump|<errcode>\n", argv[0]);
+ exit(1);
+ }
+
+ if (strcmp(argv[1], "dump") == 0) {
+ dump();
+ } else {
+ code = strtol(argv[1], &endptr, 0);
+ if (*endptr == '\0') {
+ match_code(code);
+ } else {
+ match_label(argv[1]);
+ }
+ }
+
+ exit(0);
+}
diff --git a/ctdb/tests/src/event_script_test.c b/ctdb/tests/src/event_script_test.c
new file mode 100644
index 0000000..f06725a
--- /dev/null
+++ b/ctdb/tests/src/event_script_test.c
@@ -0,0 +1,120 @@
+/*
+ Low level event script handling tests
+
+ Copyright (C) Martin Schwenke 2018
+
+ Based on run_event_test.c:
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <popt.h>
+#include <talloc.h>
+
+#include <assert.h>
+
+#include "common/event_script.c"
+
+static void usage(const char *prog)
+{
+ fprintf(stderr,
+ "Usage: %s list <scriptdir>\n",
+ prog);
+ fprintf(stderr,
+ " %s chmod enable <scriptdir> <scriptname>\n",
+ prog);
+ fprintf(stderr,
+ " %s chmod disable <scriptdir> <scriptname>\n",
+ prog);
+}
+
+static void do_list(TALLOC_CTX *mem_ctx, int argc, const char **argv)
+{
+ struct event_script_list *script_list = NULL;
+ unsigned int i;
+ int ret;
+
+ if (argc != 3) {
+ usage(argv[0]);
+ exit(1);
+ }
+
+ ret = event_script_get_list(mem_ctx, argv[2], &script_list);
+ if (ret != 0) {
+ printf("Script list %s failed with result=%d\n", argv[2], ret);
+ return;
+ }
+
+ if (script_list == NULL || script_list->num_scripts == 0) {
+ printf("No scripts found\n");
+ return;
+ }
+
+ for (i=0; i < script_list->num_scripts; i++) {
+ struct event_script *s = script_list->script[i];
+ printf("%s\n", s->name);
+ }
+}
+
+static void do_chmod(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ bool enable)
+{
+ int ret;
+
+ if (argc != 4) {
+ usage(argv[0]);
+ exit(1);
+ }
+
+ ret = event_script_chmod(argv[2], argv[3], enable);
+
+ printf("Script %s %s %s completed with result=%d\n",
+ argv[1], argv[2], argv[3], ret);
+}
+
+int main(int argc, const char **argv)
+{
+ TALLOC_CTX *mem_ctx;
+
+ if (argc < 3) {
+ usage(argv[0]);
+ exit(1);
+ }
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ fprintf(stderr, "talloc_new() failed\n");
+ exit(1);
+ }
+
+ if (strcmp(argv[1], "list") == 0) {
+ do_list(mem_ctx, argc, argv);
+ } else if (strcmp(argv[1], "enable") == 0) {
+ do_chmod(mem_ctx, argc, argv, true);
+ } else if (strcmp(argv[1], "disable") == 0) {
+ do_chmod(mem_ctx, argc, argv, false);
+ } else {
+ fprintf(stderr, "Invalid command %s\n", argv[2]);
+ usage(argv[0]);
+ }
+
+ talloc_free(mem_ctx);
+ exit(0);
+}
diff --git a/ctdb/tests/src/fake_ctdbd.c b/ctdb/tests/src/fake_ctdbd.c
new file mode 100644
index 0000000..0d430a3
--- /dev/null
+++ b/ctdb/tests/src/fake_ctdbd.c
@@ -0,0 +1,4781 @@
+/*
+ Fake CTDB server for testing
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/time.h"
+#include "system/filesys.h"
+
+#include <popt.h>
+#include <talloc.h>
+#include <tevent.h>
+#include <tdb.h>
+
+#include "lib/util/dlinklist.h"
+#include "lib/util/tevent_unix.h"
+#include "lib/util/debug.h"
+#include "lib/util/samba_util.h"
+#include "lib/async_req/async_sock.h"
+
+#include "protocol/protocol.h"
+#include "protocol/protocol_api.h"
+#include "protocol/protocol_util.h"
+#include "protocol/protocol_private.h"
+
+#include "common/comm.h"
+#include "common/logging.h"
+#include "common/tunable.h"
+#include "common/srvid.h"
+#include "common/system.h"
+
+#include "ipalloc_read_known_ips.h"
+
+
+#define CTDB_PORT 4379
+
+/* A fake flag that is only supported by some functions */
+#define NODE_FLAGS_FAKE_TIMEOUT 0x80000000
+
+struct node {
+ ctdb_sock_addr addr;
+ uint32_t pnn;
+ uint32_t flags;
+ uint32_t capabilities;
+ bool recovery_disabled;
+ void *recovery_substate;
+};
+
+struct node_map {
+ uint32_t num_nodes;
+ struct node *node;
+ uint32_t pnn;
+ uint32_t recmaster;
+};
+
+struct interface {
+ const char *name;
+ bool link_up;
+ uint32_t references;
+};
+
+struct interface_map {
+ int num;
+ struct interface *iface;
+};
+
+struct vnn_map {
+ uint32_t recmode;
+ uint32_t generation;
+ uint32_t size;
+ uint32_t *map;
+};
+
+struct database {
+ struct database *prev, *next;
+ const char *name;
+ const char *path;
+ struct tdb_context *tdb;
+ uint32_t id;
+ uint8_t flags;
+ uint64_t seq_num;
+};
+
+struct database_map {
+ struct database *db;
+ const char *dbdir;
+};
+
+struct fake_control_failure {
+ struct fake_control_failure *prev, *next;
+ enum ctdb_controls opcode;
+ uint32_t pnn;
+ const char *error;
+ const char *comment;
+};
+
+struct ctdb_client {
+ struct ctdb_client *prev, *next;
+ struct ctdbd_context *ctdb;
+ pid_t pid;
+ void *state;
+};
+
+struct ctdbd_context {
+ struct node_map *node_map;
+ struct interface_map *iface_map;
+ struct vnn_map *vnn_map;
+ struct database_map *db_map;
+ struct srvid_context *srv;
+ int num_clients;
+ struct timeval start_time;
+ struct timeval recovery_start_time;
+ struct timeval recovery_end_time;
+ bool takeover_disabled;
+ int log_level;
+ enum ctdb_runstate runstate;
+ struct ctdb_tunable_list tun_list;
+ char *reclock;
+ struct ctdb_public_ip_list *known_ips;
+ struct fake_control_failure *control_failures;
+ struct ctdb_client *client_list;
+};
+
+/*
+ * Parse routines
+ */
+
+static struct node_map *nodemap_init(TALLOC_CTX *mem_ctx)
+{
+ struct node_map *node_map;
+
+ node_map = talloc_zero(mem_ctx, struct node_map);
+ if (node_map == NULL) {
+ return NULL;
+ }
+
+ node_map->pnn = CTDB_UNKNOWN_PNN;
+ node_map->recmaster = CTDB_UNKNOWN_PNN;
+
+ return node_map;
+}
+
+/* Read a nodemap from stdin. Each line looks like:
+ * <PNN> <FLAGS> [RECMASTER] [CURRENT] [CAPABILITIES]
+ * EOF or a blank line terminates input.
+ *
+ * By default, capabilities for each node are
+ * CTDB_CAP_RECMASTER|CTDB_CAP_LMASTER. These 2
+ * capabilities can be faked off by adding, for example,
+ * -CTDB_CAP_RECMASTER.
+ */
+
+static bool nodemap_parse(struct node_map *node_map)
+{
+ char line[1024];
+
+ while ((fgets(line, sizeof(line), stdin) != NULL)) {
+ uint32_t pnn, flags, capabilities;
+ char *tok, *t;
+ char *ip;
+ ctdb_sock_addr saddr;
+ struct node *node;
+ int ret;
+
+ if (line[0] == '\n') {
+ break;
+ }
+
+ /* Get rid of pesky newline */
+ if ((t = strchr(line, '\n')) != NULL) {
+ *t = '\0';
+ }
+
+ /* Get PNN */
+ tok = strtok(line, " \t");
+ if (tok == NULL) {
+ fprintf(stderr, "bad line (%s) - missing PNN\n", line);
+ continue;
+ }
+ pnn = (uint32_t)strtoul(tok, NULL, 0);
+
+ /* Get IP */
+ tok = strtok(NULL, " \t");
+ if (tok == NULL) {
+ fprintf(stderr, "bad line (%s) - missing IP\n", line);
+ continue;
+ }
+ ret = ctdb_sock_addr_from_string(tok, &saddr, false);
+ if (ret != 0) {
+ fprintf(stderr, "bad line (%s) - invalid IP\n", line);
+ continue;
+ }
+ ctdb_sock_addr_set_port(&saddr, CTDB_PORT);
+ ip = talloc_strdup(node_map, tok);
+ if (ip == NULL) {
+ goto fail;
+ }
+
+ /* Get flags */
+ tok = strtok(NULL, " \t");
+ if (tok == NULL) {
+ fprintf(stderr, "bad line (%s) - missing flags\n",
+ line);
+ continue;
+ }
+ flags = (uint32_t)strtoul(tok, NULL, 0);
+ /* Handle deleted nodes */
+ if (flags & NODE_FLAGS_DELETED) {
+ talloc_free(ip);
+ ip = talloc_strdup(node_map, "0.0.0.0");
+ if (ip == NULL) {
+ goto fail;
+ }
+ }
+ capabilities = CTDB_CAP_RECMASTER|CTDB_CAP_LMASTER;
+
+ tok = strtok(NULL, " \t");
+ while (tok != NULL) {
+ if (strcmp(tok, "CURRENT") == 0) {
+ node_map->pnn = pnn;
+ } else if (strcmp(tok, "RECMASTER") == 0) {
+ node_map->recmaster = pnn;
+ } else if (strcmp(tok, "-CTDB_CAP_RECMASTER") == 0) {
+ capabilities &= ~CTDB_CAP_RECMASTER;
+ } else if (strcmp(tok, "-CTDB_CAP_LMASTER") == 0) {
+ capabilities &= ~CTDB_CAP_LMASTER;
+ } else if (strcmp(tok, "TIMEOUT") == 0) {
+ /* This can be done with just a flag
+ * value but it is probably clearer
+ * and less error-prone to fake this
+ * with an explicit token */
+ flags |= NODE_FLAGS_FAKE_TIMEOUT;
+ }
+ tok = strtok(NULL, " \t");
+ }
+
+ node_map->node = talloc_realloc(node_map, node_map->node,
+ struct node,
+ node_map->num_nodes + 1);
+ if (node_map->node == NULL) {
+ goto fail;
+ }
+ node = &node_map->node[node_map->num_nodes];
+
+ ret = ctdb_sock_addr_from_string(ip, &node->addr, false);
+ if (ret != 0) {
+ fprintf(stderr, "bad line (%s) - invalid IP\n", line);
+ continue;
+ }
+ ctdb_sock_addr_set_port(&node->addr, CTDB_PORT);
+ node->pnn = pnn;
+ node->flags = flags;
+ node->capabilities = capabilities;
+ node->recovery_disabled = false;
+ node->recovery_substate = NULL;
+
+ node_map->num_nodes += 1;
+ }
+
+ if (node_map->num_nodes == 0) {
+ goto fail;
+ }
+
+ DEBUG(DEBUG_INFO, ("Parsing nodemap done\n"));
+ return true;
+
+fail:
+ DEBUG(DEBUG_INFO, ("Parsing nodemap failed\n"));
+ return false;
+
+}
+
+/* Append a node to a node map with given address and flags */
+static bool node_map_add(struct ctdb_node_map *nodemap,
+ const char *nstr, uint32_t flags)
+{
+ ctdb_sock_addr addr;
+ uint32_t num;
+ struct ctdb_node_and_flags *n;
+ int ret;
+
+ ret = ctdb_sock_addr_from_string(nstr, &addr, false);
+ if (ret != 0) {
+ fprintf(stderr, "Invalid IP address %s\n", nstr);
+ return false;
+ }
+ ctdb_sock_addr_set_port(&addr, CTDB_PORT);
+
+ num = nodemap->num;
+ nodemap->node = talloc_realloc(nodemap, nodemap->node,
+ struct ctdb_node_and_flags, num+1);
+ if (nodemap->node == NULL) {
+ return false;
+ }
+
+ n = &nodemap->node[num];
+ n->addr = addr;
+ n->pnn = num;
+ n->flags = flags;
+
+ nodemap->num = num+1;
+ return true;
+}
+
+/* Read a nodes file into a node map */
+static struct ctdb_node_map *ctdb_read_nodes_file(TALLOC_CTX *mem_ctx,
+ const char *nlist)
+{
+ char **lines;
+ int nlines;
+ int i;
+ struct ctdb_node_map *nodemap;
+
+ nodemap = talloc_zero(mem_ctx, struct ctdb_node_map);
+ if (nodemap == NULL) {
+ return NULL;
+ }
+
+ lines = file_lines_load(nlist, &nlines, 0, mem_ctx);
+ if (lines == NULL) {
+ return NULL;
+ }
+
+ while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
+ nlines--;
+ }
+
+ for (i=0; i<nlines; i++) {
+ char *node;
+ uint32_t flags;
+ size_t len;
+
+ node = lines[i];
+ /* strip leading spaces */
+ while((*node == ' ') || (*node == '\t')) {
+ node++;
+ }
+
+ len = strlen(node);
+
+ /* strip trailing spaces */
+ while ((len > 1) &&
+ ((node[len-1] == ' ') || (node[len-1] == '\t')))
+ {
+ node[len-1] = '\0';
+ len--;
+ }
+
+ if (len == 0) {
+ continue;
+ }
+ if (*node == '#') {
+ /* A "deleted" node is a node that is
+ commented out in the nodes file. This is
+ used instead of removing a line, which
+ would cause subsequent nodes to change
+ their PNN. */
+ flags = NODE_FLAGS_DELETED;
+ node = discard_const("0.0.0.0");
+ } else {
+ flags = 0;
+ }
+ if (! node_map_add(nodemap, node, flags)) {
+ talloc_free(lines);
+ TALLOC_FREE(nodemap);
+ return NULL;
+ }
+ }
+
+ talloc_free(lines);
+ return nodemap;
+}
+
+static struct ctdb_node_map *read_nodes_file(TALLOC_CTX *mem_ctx,
+ uint32_t pnn)
+{
+ struct ctdb_node_map *nodemap;
+ char nodes_list[PATH_MAX];
+ const char *ctdb_base;
+ int num;
+
+ ctdb_base = getenv("CTDB_BASE");
+ if (ctdb_base == NULL) {
+ D_ERR("CTDB_BASE is not set\n");
+ return NULL;
+ }
+
+ /* read optional node-specific nodes file */
+ num = snprintf(nodes_list, sizeof(nodes_list),
+ "%s/nodes.%d", ctdb_base, pnn);
+ if (num == sizeof(nodes_list)) {
+ D_ERR("nodes file path too long\n");
+ return NULL;
+ }
+ nodemap = ctdb_read_nodes_file(mem_ctx, nodes_list);
+ if (nodemap != NULL) {
+ /* Fake a load failure for an empty nodemap */
+ if (nodemap->num == 0) {
+ talloc_free(nodemap);
+
+ D_ERR("Failed to read nodes file \"%s\"\n", nodes_list);
+ return NULL;
+ }
+
+ return nodemap;
+ }
+
+ /* read normal nodes file */
+ num = snprintf(nodes_list, sizeof(nodes_list), "%s/nodes", ctdb_base);
+ if (num == sizeof(nodes_list)) {
+ D_ERR("nodes file path too long\n");
+ return NULL;
+ }
+ nodemap = ctdb_read_nodes_file(mem_ctx, nodes_list);
+ if (nodemap != NULL) {
+ return nodemap;
+ }
+
+ DBG_ERR("Failed to read nodes file \"%s\"\n", nodes_list);
+ return NULL;
+}
+
+static struct interface_map *interfaces_init(TALLOC_CTX *mem_ctx)
+{
+ struct interface_map *iface_map;
+
+ iface_map = talloc_zero(mem_ctx, struct interface_map);
+ if (iface_map == NULL) {
+ return NULL;
+ }
+
+ return iface_map;
+}
+
+/* Read interfaces information. Same format as "ctdb ifaces -Y"
+ * output:
+ * :Name:LinkStatus:References:
+ * :eth2:1:4294967294
+ * :eth1:1:4294967292
+ */
+
+static bool interfaces_parse(struct interface_map *iface_map)
+{
+ char line[1024];
+
+ while ((fgets(line, sizeof(line), stdin) != NULL)) {
+ uint16_t link_state;
+ uint32_t references;
+ char *tok, *t, *name;
+ struct interface *iface;
+
+ if (line[0] == '\n') {
+ break;
+ }
+
+ /* Get rid of pesky newline */
+ if ((t = strchr(line, '\n')) != NULL) {
+ *t = '\0';
+ }
+
+ if (strcmp(line, ":Name:LinkStatus:References:") == 0) {
+ continue;
+ }
+
+ /* Leading colon... */
+ // tok = strtok(line, ":");
+
+ /* name */
+ tok = strtok(line, ":");
+ if (tok == NULL) {
+ fprintf(stderr, "bad line (%s) - missing name\n", line);
+ continue;
+ }
+ name = tok;
+
+ /* link_state */
+ tok = strtok(NULL, ":");
+ if (tok == NULL) {
+ fprintf(stderr, "bad line (%s) - missing link state\n",
+ line);
+ continue;
+ }
+ link_state = (uint16_t)strtoul(tok, NULL, 0);
+
+ /* references... */
+ tok = strtok(NULL, ":");
+ if (tok == NULL) {
+ fprintf(stderr, "bad line (%s) - missing references\n",
+ line);
+ continue;
+ }
+ references = (uint32_t)strtoul(tok, NULL, 0);
+
+ iface_map->iface = talloc_realloc(iface_map, iface_map->iface,
+ struct interface,
+ iface_map->num + 1);
+ if (iface_map->iface == NULL) {
+ goto fail;
+ }
+
+ iface = &iface_map->iface[iface_map->num];
+
+ iface->name = talloc_strdup(iface_map, name);
+ if (iface->name == NULL) {
+ goto fail;
+ }
+ iface->link_up = link_state;
+ iface->references = references;
+
+ iface_map->num += 1;
+ }
+
+ if (iface_map->num == 0) {
+ goto fail;
+ }
+
+ DEBUG(DEBUG_INFO, ("Parsing interfaces done\n"));
+ return true;
+
+fail:
+ fprintf(stderr, "Parsing interfaces failed\n");
+ return false;
+}
+
+static struct vnn_map *vnnmap_init(TALLOC_CTX *mem_ctx)
+{
+ struct vnn_map *vnn_map;
+
+ vnn_map = talloc_zero(mem_ctx, struct vnn_map);
+ if (vnn_map == NULL) {
+ fprintf(stderr, "Memory error\n");
+ return NULL;
+ }
+ vnn_map->recmode = CTDB_RECOVERY_ACTIVE;
+ vnn_map->generation = INVALID_GENERATION;
+
+ return vnn_map;
+}
+
+/* Read vnn map.
+ * output:
+ * <GENERATION>
+ * <LMASTER0>
+ * <LMASTER1>
+ * ...
+ */
+
+static bool vnnmap_parse(struct vnn_map *vnn_map)
+{
+ char line[1024];
+
+ while (fgets(line, sizeof(line), stdin) != NULL) {
+ uint32_t n;
+ char *t;
+
+ if (line[0] == '\n') {
+ break;
+ }
+
+ /* Get rid of pesky newline */
+ if ((t = strchr(line, '\n')) != NULL) {
+ *t = '\0';
+ }
+
+ n = (uint32_t) strtol(line, NULL, 0);
+
+ /* generation */
+ if (vnn_map->generation == INVALID_GENERATION) {
+ vnn_map->generation = n;
+ continue;
+ }
+
+ vnn_map->map = talloc_realloc(vnn_map, vnn_map->map, uint32_t,
+ vnn_map->size + 1);
+ if (vnn_map->map == NULL) {
+ fprintf(stderr, "Memory error\n");
+ goto fail;
+ }
+
+ vnn_map->map[vnn_map->size] = n;
+ vnn_map->size += 1;
+ }
+
+ if (vnn_map->size == 0) {
+ goto fail;
+ }
+
+ DEBUG(DEBUG_INFO, ("Parsing vnnmap done\n"));
+ return true;
+
+fail:
+ fprintf(stderr, "Parsing vnnmap failed\n");
+ return false;
+}
+
+static bool reclock_parse(struct ctdbd_context *ctdb)
+{
+ char line[1024];
+ char *t;
+
+ if (fgets(line, sizeof(line), stdin) == NULL) {
+ goto fail;
+ }
+
+ if (line[0] == '\n') {
+ goto fail;
+ }
+
+ /* Get rid of pesky newline */
+ if ((t = strchr(line, '\n')) != NULL) {
+ *t = '\0';
+ }
+
+ ctdb->reclock = talloc_strdup(ctdb, line);
+ if (ctdb->reclock == NULL) {
+ goto fail;
+ }
+
+ /* Swallow possible blank line following section. Picky
+ * compiler settings don't allow the return value to be
+ * ignored, so make the compiler happy.
+ */
+ if (fgets(line, sizeof(line), stdin) == NULL) {
+ ;
+ }
+ DEBUG(DEBUG_INFO, ("Parsing reclock done\n"));
+ return true;
+
+fail:
+ fprintf(stderr, "Parsing reclock failed\n");
+ return false;
+}
+
+static struct database_map *dbmap_init(TALLOC_CTX *mem_ctx,
+ const char *dbdir)
+{
+ struct database_map *db_map;
+
+ db_map = talloc_zero(mem_ctx, struct database_map);
+ if (db_map == NULL) {
+ return NULL;
+ }
+
+ db_map->dbdir = talloc_strdup(db_map, dbdir);
+ if (db_map->dbdir == NULL) {
+ talloc_free(db_map);
+ return NULL;
+ }
+
+ return db_map;
+}
+
+/* Read a database map from stdin. Each line looks like:
+ * <ID> <NAME> [FLAGS] [SEQ_NUM]
+ * EOF or a blank line terminates input.
+ *
+ * By default, flags and seq_num are 0
+ */
+
+static bool dbmap_parse(struct database_map *db_map)
+{
+ char line[1024];
+
+ while ((fgets(line, sizeof(line), stdin) != NULL)) {
+ uint32_t id;
+ uint8_t flags = 0;
+ uint32_t seq_num = 0;
+ char *tok, *t;
+ char *name;
+ struct database *db;
+
+ if (line[0] == '\n') {
+ break;
+ }
+
+ /* Get rid of pesky newline */
+ if ((t = strchr(line, '\n')) != NULL) {
+ *t = '\0';
+ }
+
+ /* Get ID */
+ tok = strtok(line, " \t");
+ if (tok == NULL) {
+ fprintf(stderr, "bad line (%s) - missing ID\n", line);
+ continue;
+ }
+ id = (uint32_t)strtoul(tok, NULL, 0);
+
+ /* Get NAME */
+ tok = strtok(NULL, " \t");
+ if (tok == NULL) {
+ fprintf(stderr, "bad line (%s) - missing NAME\n", line);
+ continue;
+ }
+ name = talloc_strdup(db_map, tok);
+ if (name == NULL) {
+ goto fail;
+ }
+
+ /* Get flags */
+ tok = strtok(NULL, " \t");
+ while (tok != NULL) {
+ if (strcmp(tok, "PERSISTENT") == 0) {
+ flags |= CTDB_DB_FLAGS_PERSISTENT;
+ } else if (strcmp(tok, "STICKY") == 0) {
+ flags |= CTDB_DB_FLAGS_STICKY;
+ } else if (strcmp(tok, "READONLY") == 0) {
+ flags |= CTDB_DB_FLAGS_READONLY;
+ } else if (strcmp(tok, "REPLICATED") == 0) {
+ flags |= CTDB_DB_FLAGS_REPLICATED;
+ } else if (tok[0] >= '0'&& tok[0] <= '9') {
+ uint8_t nv = CTDB_DB_FLAGS_PERSISTENT |
+ CTDB_DB_FLAGS_REPLICATED;
+
+ if ((flags & nv) == 0) {
+ fprintf(stderr,
+ "seq_num for volatile db\n");
+ goto fail;
+ }
+ seq_num = (uint64_t)strtoull(tok, NULL, 0);
+ }
+
+ tok = strtok(NULL, " \t");
+ }
+
+ db = talloc_zero(db_map, struct database);
+ if (db == NULL) {
+ goto fail;
+ }
+
+ db->id = id;
+ db->name = talloc_steal(db, name);
+ db->path = talloc_asprintf(db, "%s/%s", db_map->dbdir, name);
+ if (db->path == NULL) {
+ talloc_free(db);
+ goto fail;
+ }
+ db->flags = flags;
+ db->seq_num = seq_num;
+
+ DLIST_ADD_END(db_map->db, db);
+ }
+
+ if (db_map->db == NULL) {
+ goto fail;
+ }
+
+ DEBUG(DEBUG_INFO, ("Parsing dbmap done\n"));
+ return true;
+
+fail:
+ DEBUG(DEBUG_INFO, ("Parsing dbmap failed\n"));
+ return false;
+
+}
+
+static struct database *database_find(struct database_map *db_map,
+ uint32_t db_id)
+{
+ struct database *db;
+
+ for (db = db_map->db; db != NULL; db = db->next) {
+ if (db->id == db_id) {
+ return db;
+ }
+ }
+
+ return NULL;
+}
+
+static int database_count(struct database_map *db_map)
+{
+ struct database *db;
+ int count = 0;
+
+ for (db = db_map->db; db != NULL; db = db->next) {
+ count += 1;
+ }
+
+ return count;
+}
+
+static int database_flags(uint8_t db_flags)
+{
+ int tdb_flags = 0;
+
+ if (db_flags & CTDB_DB_FLAGS_PERSISTENT) {
+ tdb_flags = TDB_DEFAULT;
+ } else {
+ /* volatile and replicated use the same flags */
+ tdb_flags = TDB_NOSYNC |
+ TDB_CLEAR_IF_FIRST |
+ TDB_INCOMPATIBLE_HASH;
+ }
+
+ tdb_flags |= TDB_DISALLOW_NESTING;
+
+ return tdb_flags;
+}
+
+static struct database *database_new(struct database_map *db_map,
+ const char *name, uint8_t flags)
+{
+ struct database *db;
+ TDB_DATA key;
+ int tdb_flags;
+
+ db = talloc_zero(db_map, struct database);
+ if (db == NULL) {
+ return NULL;
+ }
+
+ db->name = talloc_strdup(db, name);
+ if (db->name == NULL) {
+ goto fail;
+ }
+
+ db->path = talloc_asprintf(db, "%s/%s", db_map->dbdir, name);
+ if (db->path == NULL) {
+ goto fail;
+ }
+
+ key.dsize = strlen(db->name) + 1;
+ key.dptr = discard_const(db->name);
+
+ db->id = tdb_jenkins_hash(&key);
+ db->flags = flags;
+
+ tdb_flags = database_flags(flags);
+
+ db->tdb = tdb_open(db->path, 8192, tdb_flags, O_CREAT|O_RDWR, 0644);
+ if (db->tdb == NULL) {
+ DBG_ERR("tdb_open\n");
+ goto fail;
+ }
+
+ DLIST_ADD_END(db_map->db, db);
+ return db;
+
+fail:
+ DBG_ERR("Memory error\n");
+ talloc_free(db);
+ return NULL;
+
+}
+
+static int ltdb_store(struct database *db, TDB_DATA key,
+ struct ctdb_ltdb_header *header, TDB_DATA data)
+{
+ int ret;
+ bool db_volatile = true;
+ bool keep = false;
+
+ if (db->tdb == NULL) {
+ return EINVAL;
+ }
+
+ if ((db->flags & CTDB_DB_FLAGS_PERSISTENT) ||
+ (db->flags & CTDB_DB_FLAGS_REPLICATED)) {
+ db_volatile = false;
+ }
+
+ if (data.dsize > 0) {
+ keep = true;
+ } else {
+ if (db_volatile && header->rsn == 0) {
+ keep = true;
+ }
+ }
+
+ if (keep) {
+ TDB_DATA rec[2];
+
+ rec[0].dsize = ctdb_ltdb_header_len(header);
+ rec[0].dptr = (uint8_t *)header;
+
+ rec[1].dsize = data.dsize;
+ rec[1].dptr = data.dptr;
+
+ ret = tdb_storev(db->tdb, key, rec, 2, TDB_REPLACE);
+ } else {
+ if (header->rsn > 0) {
+ ret = tdb_delete(db->tdb, key);
+ } else {
+ ret = 0;
+ }
+ }
+
+ return ret;
+}
+
+static int ltdb_fetch(struct database *db, TDB_DATA key,
+ struct ctdb_ltdb_header *header,
+ TALLOC_CTX *mem_ctx, TDB_DATA *data)
+{
+ TDB_DATA rec;
+ size_t np;
+ int ret;
+
+ if (db->tdb == NULL) {
+ return EINVAL;
+ }
+
+ rec = tdb_fetch(db->tdb, key);
+ ret = ctdb_ltdb_header_pull(rec.dptr, rec.dsize, header, &np);
+ if (ret != 0) {
+ if (rec.dptr != NULL) {
+ free(rec.dptr);
+ }
+
+ *header = (struct ctdb_ltdb_header) {
+ .rsn = 0,
+ .dmaster = 0,
+ .flags = 0,
+ };
+
+ ret = ltdb_store(db, key, header, tdb_null);
+ if (ret != 0) {
+ return ret;
+ }
+
+ *data = tdb_null;
+ return 0;
+ }
+
+ data->dsize = rec.dsize - ctdb_ltdb_header_len(header);
+ data->dptr = talloc_memdup(mem_ctx,
+ rec.dptr + ctdb_ltdb_header_len(header),
+ data->dsize);
+
+ free(rec.dptr);
+
+ if (data->dptr == NULL) {
+ return ENOMEM;
+ }
+
+ return 0;
+}
+
+static int database_seqnum(struct database *db, uint64_t *seqnum)
+{
+ const char *keyname = CTDB_DB_SEQNUM_KEY;
+ TDB_DATA key, data;
+ struct ctdb_ltdb_header header;
+ size_t np;
+ int ret;
+
+ if (db->tdb == NULL) {
+ *seqnum = db->seq_num;
+ return 0;
+ }
+
+ key.dptr = discard_const(keyname);
+ key.dsize = strlen(keyname) + 1;
+
+ ret = ltdb_fetch(db, key, &header, db, &data);
+ if (ret != 0) {
+ return ret;
+ }
+
+ if (data.dsize == 0) {
+ *seqnum = 0;
+ return 0;
+ }
+
+ ret = ctdb_uint64_pull(data.dptr, data.dsize, seqnum, &np);
+ talloc_free(data.dptr);
+ if (ret != 0) {
+ *seqnum = 0;
+ }
+
+ return ret;
+}
+
+static int ltdb_transaction_update(uint32_t reqid,
+ struct ctdb_ltdb_header *no_header,
+ TDB_DATA key, TDB_DATA data,
+ void *private_data)
+{
+ struct database *db = (struct database *)private_data;
+ TALLOC_CTX *tmp_ctx = talloc_new(db);
+ struct ctdb_ltdb_header header = { 0 }, oldheader;
+ TDB_DATA olddata;
+ int ret;
+
+ if (db->tdb == NULL) {
+ return EINVAL;
+ }
+
+ ret = ctdb_ltdb_header_extract(&data, &header);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ret = ltdb_fetch(db, key, &oldheader, tmp_ctx, &olddata);
+ if (ret != 0) {
+ return ret;
+ }
+
+ if (olddata.dsize > 0) {
+ if (oldheader.rsn > header.rsn ||
+ (oldheader.rsn == header.rsn &&
+ olddata.dsize != data.dsize)) {
+ return -1;
+ }
+ }
+
+ talloc_free(tmp_ctx);
+
+ ret = ltdb_store(db, key, &header, data);
+ return ret;
+}
+
+static int ltdb_transaction(struct database *db,
+ struct ctdb_rec_buffer *recbuf)
+{
+ int ret;
+
+ if (db->tdb == NULL) {
+ return EINVAL;
+ }
+
+ ret = tdb_transaction_start(db->tdb);
+ if (ret == -1) {
+ return ret;
+ }
+
+ ret = ctdb_rec_buffer_traverse(recbuf, ltdb_transaction_update, db);
+ if (ret != 0) {
+ tdb_transaction_cancel(db->tdb);
+ }
+
+ ret = tdb_transaction_commit(db->tdb);
+ return ret;
+}
+
+static bool public_ips_parse(struct ctdbd_context *ctdb,
+ uint32_t numnodes)
+{
+ bool status;
+
+ if (numnodes == 0) {
+ D_ERR("Must initialise nodemap before public IPs\n");
+ return false;
+ }
+
+ ctdb->known_ips = ipalloc_read_known_ips(ctdb, numnodes, false);
+
+ status = (ctdb->known_ips != NULL && ctdb->known_ips->num != 0);
+
+ if (status) {
+ D_INFO("Parsing public IPs done\n");
+ } else {
+ D_INFO("Parsing public IPs failed\n");
+ }
+
+ return status;
+}
+
+/* Read information about controls to fail. Format is:
+ * <opcode> <pnn> {ERROR|TIMEOUT} <comment>
+ */
+static bool control_failures_parse(struct ctdbd_context *ctdb)
+{
+ char line[1024];
+
+ while ((fgets(line, sizeof(line), stdin) != NULL)) {
+ char *tok, *t;
+ enum ctdb_controls opcode;
+ uint32_t pnn;
+ const char *error;
+ const char *comment;
+ struct fake_control_failure *failure = NULL;
+
+ if (line[0] == '\n') {
+ break;
+ }
+
+ /* Get rid of pesky newline */
+ if ((t = strchr(line, '\n')) != NULL) {
+ *t = '\0';
+ }
+
+ /* Get opcode */
+ tok = strtok(line, " \t");
+ if (tok == NULL) {
+ D_ERR("bad line (%s) - missing opcode\n", line);
+ continue;
+ }
+ opcode = (enum ctdb_controls)strtoul(tok, NULL, 0);
+
+ /* Get PNN */
+ tok = strtok(NULL, " \t");
+ if (tok == NULL) {
+ D_ERR("bad line (%s) - missing PNN\n", line);
+ continue;
+ }
+ pnn = (uint32_t)strtoul(tok, NULL, 0);
+
+ /* Get error */
+ tok = strtok(NULL, " \t");
+ if (tok == NULL) {
+ D_ERR("bad line (%s) - missing errno\n", line);
+ continue;
+ }
+ error = talloc_strdup(ctdb, tok);
+ if (error == NULL) {
+ goto fail;
+ }
+ if (strcmp(error, "ERROR") != 0 &&
+ strcmp(error, "TIMEOUT") != 0) {
+ D_ERR("bad line (%s) "
+ "- error must be \"ERROR\" or \"TIMEOUT\"\n",
+ line);
+ goto fail;
+ }
+
+ /* Get comment */
+ tok = strtok(NULL, "\n"); /* rest of line */
+ if (tok == NULL) {
+ D_ERR("bad line (%s) - missing comment\n", line);
+ continue;
+ }
+ comment = talloc_strdup(ctdb, tok);
+ if (comment == NULL) {
+ goto fail;
+ }
+
+ failure = talloc_zero(ctdb, struct fake_control_failure);
+ if (failure == NULL) {
+ goto fail;
+ }
+
+ failure->opcode = opcode;
+ failure->pnn = pnn;
+ failure->error = error;
+ failure->comment = comment;
+
+ DLIST_ADD(ctdb->control_failures, failure);
+ }
+
+ if (ctdb->control_failures == NULL) {
+ goto fail;
+ }
+
+ D_INFO("Parsing fake control failures done\n");
+ return true;
+
+fail:
+ D_INFO("Parsing fake control failures failed\n");
+ return false;
+}
+
+static bool runstate_parse(struct ctdbd_context *ctdb)
+{
+ char line[1024];
+ char *t;
+
+ if (fgets(line, sizeof(line), stdin) == NULL) {
+ goto fail;
+ }
+
+ if (line[0] == '\n') {
+ goto fail;
+ }
+
+ /* Get rid of pesky newline */
+ if ((t = strchr(line, '\n')) != NULL) {
+ *t = '\0';
+ }
+
+ ctdb->runstate = ctdb_runstate_from_string(line);
+ if (ctdb->runstate == CTDB_RUNSTATE_UNKNOWN) {
+ goto fail;
+ }
+
+ /* Swallow possible blank line following section. Picky
+ * compiler settings don't allow the return value to be
+ * ignored, so make the compiler happy.
+ */
+ if (fgets(line, sizeof(line), stdin) == NULL) {
+ ;
+ }
+ D_INFO("Parsing runstate done\n");
+ return true;
+
+fail:
+ D_ERR("Parsing runstate failed\n");
+ return false;
+}
+
+/*
+ * Manage clients
+ */
+
+static int ctdb_client_destructor(struct ctdb_client *client)
+{
+ DLIST_REMOVE(client->ctdb->client_list, client);
+ return 0;
+}
+
+static int client_add(struct ctdbd_context *ctdb, pid_t client_pid,
+ void *client_state)
+{
+ struct ctdb_client *client;
+
+ client = talloc_zero(client_state, struct ctdb_client);
+ if (client == NULL) {
+ return ENOMEM;
+ }
+
+ client->ctdb = ctdb;
+ client->pid = client_pid;
+ client->state = client_state;
+
+ DLIST_ADD(ctdb->client_list, client);
+ talloc_set_destructor(client, ctdb_client_destructor);
+ return 0;
+}
+
+static void *client_find(struct ctdbd_context *ctdb, pid_t client_pid)
+{
+ struct ctdb_client *client;
+
+ for (client=ctdb->client_list; client != NULL; client=client->next) {
+ if (client->pid == client_pid) {
+ return client->state;
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ * CTDB context setup
+ */
+
+static uint32_t new_generation(uint32_t old_generation)
+{
+ uint32_t generation;
+
+ while (1) {
+ generation = random();
+ if (generation != INVALID_GENERATION &&
+ generation != old_generation) {
+ break;
+ }
+ }
+
+ return generation;
+}
+
+static struct ctdbd_context *ctdbd_setup(TALLOC_CTX *mem_ctx,
+ const char *dbdir)
+{
+ struct ctdbd_context *ctdb;
+ char line[1024];
+ bool status;
+ int ret;
+
+ ctdb = talloc_zero(mem_ctx, struct ctdbd_context);
+ if (ctdb == NULL) {
+ return NULL;
+ }
+
+ ctdb->node_map = nodemap_init(ctdb);
+ if (ctdb->node_map == NULL) {
+ goto fail;
+ }
+
+ ctdb->iface_map = interfaces_init(ctdb);
+ if (ctdb->iface_map == NULL) {
+ goto fail;
+ }
+
+ ctdb->vnn_map = vnnmap_init(ctdb);
+ if (ctdb->vnn_map == NULL) {
+ goto fail;
+ }
+
+ ctdb->db_map = dbmap_init(ctdb, dbdir);
+ if (ctdb->db_map == NULL) {
+ goto fail;
+ }
+
+ ret = srvid_init(ctdb, &ctdb->srv);
+ if (ret != 0) {
+ goto fail;
+ }
+
+ ctdb->runstate = CTDB_RUNSTATE_RUNNING;
+
+ while (fgets(line, sizeof(line), stdin) != NULL) {
+ char *t;
+
+ if ((t = strchr(line, '\n')) != NULL) {
+ *t = '\0';
+ }
+
+ if (strcmp(line, "NODEMAP") == 0) {
+ status = nodemap_parse(ctdb->node_map);
+ } else if (strcmp(line, "IFACES") == 0) {
+ status = interfaces_parse(ctdb->iface_map);
+ } else if (strcmp(line, "VNNMAP") == 0) {
+ status = vnnmap_parse(ctdb->vnn_map);
+ } else if (strcmp(line, "DBMAP") == 0) {
+ status = dbmap_parse(ctdb->db_map);
+ } else if (strcmp(line, "PUBLICIPS") == 0) {
+ status = public_ips_parse(ctdb,
+ ctdb->node_map->num_nodes);
+ } else if (strcmp(line, "RECLOCK") == 0) {
+ status = reclock_parse(ctdb);
+ } else if (strcmp(line, "CONTROLFAILS") == 0) {
+ status = control_failures_parse(ctdb);
+ } else if (strcmp(line, "RUNSTATE") == 0) {
+ status = runstate_parse(ctdb);
+ } else {
+ fprintf(stderr, "Unknown line %s\n", line);
+ status = false;
+ }
+
+ if (! status) {
+ goto fail;
+ }
+ }
+
+ ctdb->start_time = tevent_timeval_current();
+ ctdb->recovery_start_time = tevent_timeval_current();
+ ctdb->vnn_map->recmode = CTDB_RECOVERY_NORMAL;
+ if (ctdb->vnn_map->generation == INVALID_GENERATION) {
+ ctdb->vnn_map->generation =
+ new_generation(ctdb->vnn_map->generation);
+ }
+ ctdb->recovery_end_time = tevent_timeval_current();
+
+ ctdb->log_level = DEBUG_ERR;
+
+ ctdb_tunable_set_defaults(&ctdb->tun_list);
+
+ return ctdb;
+
+fail:
+ TALLOC_FREE(ctdb);
+ return NULL;
+}
+
+static bool ctdbd_verify(struct ctdbd_context *ctdb)
+{
+ struct node *node;
+ unsigned int i;
+
+ if (ctdb->node_map->num_nodes == 0) {
+ return true;
+ }
+
+ /* Make sure all the nodes are in order */
+ for (i=0; i<ctdb->node_map->num_nodes; i++) {
+ node = &ctdb->node_map->node[i];
+ if (node->pnn != i) {
+ fprintf(stderr, "Expected node %u, found %u\n",
+ i, node->pnn);
+ return false;
+ }
+ }
+
+ node = &ctdb->node_map->node[ctdb->node_map->pnn];
+ if (node->flags & NODE_FLAGS_DISCONNECTED) {
+ DEBUG(DEBUG_INFO, ("Node disconnected, exiting\n"));
+ exit(0);
+ }
+
+ return true;
+}
+
+/*
+ * Doing a recovery
+ */
+
+struct recover_state {
+ struct tevent_context *ev;
+ struct ctdbd_context *ctdb;
+};
+
+static int recover_check(struct tevent_req *req);
+static void recover_wait_done(struct tevent_req *subreq);
+static void recover_done(struct tevent_req *subreq);
+
+static struct tevent_req *recover_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdbd_context *ctdb)
+{
+ struct tevent_req *req;
+ struct recover_state *state;
+ int ret;
+
+ req = tevent_req_create(mem_ctx, &state, struct recover_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->ctdb = ctdb;
+
+ ret = recover_check(req);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ return req;
+}
+
+static int recover_check(struct tevent_req *req)
+{
+ struct recover_state *state = tevent_req_data(
+ req, struct recover_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct tevent_req *subreq;
+ bool recovery_disabled;
+ unsigned int i;
+
+ recovery_disabled = false;
+ for (i=0; i<ctdb->node_map->num_nodes; i++) {
+ if (ctdb->node_map->node[i].recovery_disabled) {
+ recovery_disabled = true;
+ break;
+ }
+ }
+
+ subreq = tevent_wakeup_send(state, state->ev,
+ tevent_timeval_current_ofs(1, 0));
+ if (subreq == NULL) {
+ return ENOMEM;
+ }
+
+ if (recovery_disabled) {
+ tevent_req_set_callback(subreq, recover_wait_done, req);
+ } else {
+ ctdb->recovery_start_time = tevent_timeval_current();
+ tevent_req_set_callback(subreq, recover_done, req);
+ }
+
+ return 0;
+}
+
+static void recover_wait_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ int ret;
+ bool status;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ ret = recover_check(req);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ }
+}
+
+static void recover_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct recover_state *state = tevent_req_data(
+ req, struct recover_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ bool status;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ ctdb->vnn_map->recmode = CTDB_RECOVERY_NORMAL;
+ ctdb->recovery_end_time = tevent_timeval_current();
+ ctdb->vnn_map->generation = new_generation(ctdb->vnn_map->generation);
+
+ tevent_req_done(req);
+}
+
+static bool recover_recv(struct tevent_req *req, int *perr)
+{
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Routines for ctdb_req_header
+ */
+
+static void header_fix_pnn(struct ctdb_req_header *header,
+ struct ctdbd_context *ctdb)
+{
+ if (header->srcnode == CTDB_CURRENT_NODE) {
+ header->srcnode = ctdb->node_map->pnn;
+ }
+
+ if (header->destnode == CTDB_CURRENT_NODE) {
+ header->destnode = ctdb->node_map->pnn;
+ }
+}
+
+static struct ctdb_req_header header_reply_call(
+ struct ctdb_req_header *header,
+ struct ctdbd_context *ctdb)
+{
+ struct ctdb_req_header reply_header;
+
+ reply_header = (struct ctdb_req_header) {
+ .ctdb_magic = CTDB_MAGIC,
+ .ctdb_version = CTDB_PROTOCOL,
+ .generation = ctdb->vnn_map->generation,
+ .operation = CTDB_REPLY_CALL,
+ .destnode = header->srcnode,
+ .srcnode = header->destnode,
+ .reqid = header->reqid,
+ };
+
+ return reply_header;
+}
+
+static struct ctdb_req_header header_reply_control(
+ struct ctdb_req_header *header,
+ struct ctdbd_context *ctdb)
+{
+ struct ctdb_req_header reply_header;
+
+ reply_header = (struct ctdb_req_header) {
+ .ctdb_magic = CTDB_MAGIC,
+ .ctdb_version = CTDB_PROTOCOL,
+ .generation = ctdb->vnn_map->generation,
+ .operation = CTDB_REPLY_CONTROL,
+ .destnode = header->srcnode,
+ .srcnode = header->destnode,
+ .reqid = header->reqid,
+ };
+
+ return reply_header;
+}
+
+static struct ctdb_req_header header_reply_message(
+ struct ctdb_req_header *header,
+ struct ctdbd_context *ctdb)
+{
+ struct ctdb_req_header reply_header;
+
+ reply_header = (struct ctdb_req_header) {
+ .ctdb_magic = CTDB_MAGIC,
+ .ctdb_version = CTDB_PROTOCOL,
+ .generation = ctdb->vnn_map->generation,
+ .operation = CTDB_REQ_MESSAGE,
+ .destnode = header->srcnode,
+ .srcnode = header->destnode,
+ .reqid = 0,
+ };
+
+ return reply_header;
+}
+
+/*
+ * Client state
+ */
+
+struct client_state {
+ struct tevent_context *ev;
+ int fd;
+ struct ctdbd_context *ctdb;
+ int pnn;
+ pid_t pid;
+ struct comm_context *comm;
+ struct srvid_register_state *rstate;
+ int status;
+};
+
+/*
+ * Send replies to call, controls and messages
+ */
+
+static void client_reply_done(struct tevent_req *subreq);
+
+static void client_send_call(struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_reply_call *reply)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct tevent_req *subreq;
+ struct ctdb_req_header reply_header;
+ uint8_t *buf;
+ size_t datalen, buflen;
+ int ret;
+
+ reply_header = header_reply_call(header, ctdb);
+
+ datalen = ctdb_reply_call_len(&reply_header, reply);
+ ret = ctdb_allocate_pkt(state, datalen, &buf, &buflen);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_reply_call_push(&reply_header, reply, buf, &buflen);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ subreq = comm_write_send(state, state->ev, state->comm, buf, buflen);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, client_reply_done, req);
+
+ talloc_steal(subreq, buf);
+}
+
+static void client_send_message(struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_message_data *message)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct tevent_req *subreq;
+ struct ctdb_req_header reply_header;
+ uint8_t *buf;
+ size_t datalen, buflen;
+ int ret;
+
+ reply_header = header_reply_message(header, ctdb);
+
+ datalen = ctdb_req_message_data_len(&reply_header, message);
+ ret = ctdb_allocate_pkt(state, datalen, &buf, &buflen);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_req_message_data_push(&reply_header, message,
+ buf, &buflen);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ DEBUG(DEBUG_INFO, ("message srvid = 0x%"PRIx64"\n", message->srvid));
+
+ subreq = comm_write_send(state, state->ev, state->comm, buf, buflen);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, client_reply_done, req);
+
+ talloc_steal(subreq, buf);
+}
+
+static void client_send_control(struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_reply_control *reply)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct tevent_req *subreq;
+ struct ctdb_req_header reply_header;
+ uint8_t *buf;
+ size_t datalen, buflen;
+ int ret;
+
+ reply_header = header_reply_control(header, ctdb);
+
+ datalen = ctdb_reply_control_len(&reply_header, reply);
+ ret = ctdb_allocate_pkt(state, datalen, &buf, &buflen);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_reply_control_push(&reply_header, reply, buf, &buflen);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ DEBUG(DEBUG_INFO, ("reply opcode = %u\n", reply->rdata.opcode));
+
+ subreq = comm_write_send(state, state->ev, state->comm, buf, buflen);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, client_reply_done, req);
+
+ talloc_steal(subreq, buf);
+}
+
+static void client_reply_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ int ret;
+ bool status;
+
+ status = comm_write_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ }
+}
+
+/*
+ * Handling protocol - controls
+ */
+
+static void control_process_exists(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct client_state *cstate;
+ struct ctdb_reply_control reply;
+
+ reply.rdata.opcode = request->opcode;
+
+ cstate = client_find(ctdb, request->rdata.data.pid);
+ if (cstate == NULL) {
+ reply.status = -1;
+ reply.errmsg = "No client for PID";
+ } else {
+ reply.status = kill(request->rdata.data.pid, 0);
+ reply.errmsg = NULL;
+ }
+
+ client_send_control(req, header, &reply);
+}
+
+static void control_ping(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+
+ reply.rdata.opcode = request->opcode;
+ reply.status = ctdb->num_clients;
+ reply.errmsg = NULL;
+
+ client_send_control(req, header, &reply);
+}
+
+static void control_getdbpath(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+ struct database *db;
+
+ reply.rdata.opcode = request->opcode;
+
+ db = database_find(ctdb->db_map, request->rdata.data.db_id);
+ if (db == NULL) {
+ reply.status = ENOENT;
+ reply.errmsg = "Database not found";
+ } else {
+ reply.rdata.data.db_path =
+ talloc_strdup(mem_ctx, db->path);
+ if (reply.rdata.data.db_path == NULL) {
+ reply.status = ENOMEM;
+ reply.errmsg = "Memory error";
+ } else {
+ reply.status = 0;
+ reply.errmsg = NULL;
+ }
+ }
+
+ client_send_control(req, header, &reply);
+}
+
+static void control_getvnnmap(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+ struct ctdb_vnn_map *vnnmap;
+
+ reply.rdata.opcode = request->opcode;
+
+ vnnmap = talloc_zero(mem_ctx, struct ctdb_vnn_map);
+ if (vnnmap == NULL) {
+ reply.status = ENOMEM;
+ reply.errmsg = "Memory error";
+ } else {
+ vnnmap->generation = ctdb->vnn_map->generation;
+ vnnmap->size = ctdb->vnn_map->size;
+ vnnmap->map = ctdb->vnn_map->map;
+
+ reply.rdata.data.vnnmap = vnnmap;
+ reply.status = 0;
+ reply.errmsg = NULL;
+ }
+
+ client_send_control(req, header, &reply);
+}
+
+static void control_get_debug(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+
+ reply.rdata.opcode = request->opcode;
+ reply.rdata.data.loglevel = (uint32_t)ctdb->log_level;
+ reply.status = 0;
+ reply.errmsg = NULL;
+
+ client_send_control(req, header, &reply);
+}
+
+static void control_set_debug(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+
+ ctdb->log_level = (int)request->rdata.data.loglevel;
+
+ reply.rdata.opcode = request->opcode;
+ reply.status = 0;
+ reply.errmsg = NULL;
+
+ client_send_control(req, header, &reply);
+}
+
+static void control_get_dbmap(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+ struct ctdb_dbid_map *dbmap;
+ struct database *db;
+ unsigned int i;
+
+ reply.rdata.opcode = request->opcode;
+
+ dbmap = talloc_zero(mem_ctx, struct ctdb_dbid_map);
+ if (dbmap == NULL) {
+ goto fail;
+ }
+
+ dbmap->num = database_count(ctdb->db_map);
+ dbmap->dbs = talloc_zero_array(dbmap, struct ctdb_dbid, dbmap->num);
+ if (dbmap->dbs == NULL) {
+ goto fail;
+ }
+
+ db = ctdb->db_map->db;
+ for (i = 0; i < dbmap->num; i++) {
+ dbmap->dbs[i] = (struct ctdb_dbid) {
+ .db_id = db->id,
+ .flags = db->flags,
+ };
+
+ db = db->next;
+ }
+
+ reply.rdata.data.dbmap = dbmap;
+ reply.status = 0;
+ reply.errmsg = NULL;
+ client_send_control(req, header, &reply);
+ return;
+
+fail:
+ reply.status = -1;
+ reply.errmsg = "Memory error";
+ client_send_control(req, header, &reply);
+}
+
+static void control_get_recmode(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+
+ reply.rdata.opcode = request->opcode;
+ reply.status = ctdb->vnn_map->recmode;
+ reply.errmsg = NULL;
+
+ client_send_control(req, header, &reply);
+}
+
+struct set_recmode_state {
+ struct tevent_req *req;
+ struct ctdbd_context *ctdb;
+ struct ctdb_req_header header;
+ struct ctdb_reply_control reply;
+};
+
+static void set_recmode_callback(struct tevent_req *subreq)
+{
+ struct set_recmode_state *substate = tevent_req_callback_data(
+ subreq, struct set_recmode_state);
+ bool status;
+ int ret;
+
+ status = recover_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ substate->reply.status = ret;
+ substate->reply.errmsg = "recovery failed";
+ } else {
+ substate->reply.status = 0;
+ substate->reply.errmsg = NULL;
+ }
+
+ client_send_control(substate->req, &substate->header, &substate->reply);
+ talloc_free(substate);
+}
+
+static void control_set_recmode(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct tevent_req *subreq;
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct set_recmode_state *substate;
+ struct ctdb_reply_control reply;
+
+ reply.rdata.opcode = request->opcode;
+
+ if (request->rdata.data.recmode == CTDB_RECOVERY_NORMAL) {
+ reply.status = -1;
+ reply.errmsg = "Client cannot set recmode to NORMAL";
+ goto fail;
+ }
+
+ substate = talloc_zero(ctdb, struct set_recmode_state);
+ if (substate == NULL) {
+ reply.status = -1;
+ reply.errmsg = "Memory error";
+ goto fail;
+ }
+
+ substate->req = req;
+ substate->ctdb = ctdb;
+ substate->header = *header;
+ substate->reply.rdata.opcode = request->opcode;
+
+ subreq = recover_send(substate, state->ev, state->ctdb);
+ if (subreq == NULL) {
+ talloc_free(substate);
+ goto fail;
+ }
+ tevent_req_set_callback(subreq, set_recmode_callback, substate);
+
+ ctdb->vnn_map->recmode = CTDB_RECOVERY_ACTIVE;
+ return;
+
+fail:
+ client_send_control(req, header, &reply);
+
+}
+
+static void control_db_attach(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+ struct database *db;
+
+ reply.rdata.opcode = request->opcode;
+
+ for (db = ctdb->db_map->db; db != NULL; db = db->next) {
+ if (strcmp(db->name, request->rdata.data.db_name) == 0) {
+ goto done;
+ }
+ }
+
+ db = database_new(ctdb->db_map, request->rdata.data.db_name, 0);
+ if (db == NULL) {
+ reply.status = -1;
+ reply.errmsg = "Failed to attach database";
+ client_send_control(req, header, &reply);
+ return;
+ }
+
+done:
+ reply.rdata.data.db_id = db->id;
+ reply.status = 0;
+ reply.errmsg = NULL;
+ client_send_control(req, header, &reply);
+}
+
+static void srvid_handler_done(struct tevent_req *subreq);
+
+static void srvid_handler(uint64_t srvid, TDB_DATA data, void *private_data)
+{
+ struct client_state *state = talloc_get_type_abort(
+ private_data, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct tevent_req *subreq;
+ struct ctdb_req_header request_header;
+ struct ctdb_req_message_data message;
+ uint8_t *buf;
+ size_t datalen, buflen;
+ int ret;
+
+ request_header = (struct ctdb_req_header) {
+ .ctdb_magic = CTDB_MAGIC,
+ .ctdb_version = CTDB_PROTOCOL,
+ .generation = ctdb->vnn_map->generation,
+ .operation = CTDB_REQ_MESSAGE,
+ .destnode = state->pnn,
+ .srcnode = ctdb->node_map->recmaster,
+ .reqid = 0,
+ };
+
+ message = (struct ctdb_req_message_data) {
+ .srvid = srvid,
+ .data = data,
+ };
+
+ datalen = ctdb_req_message_data_len(&request_header, &message);
+ ret = ctdb_allocate_pkt(state, datalen, &buf, &buflen);
+ if (ret != 0) {
+ return;
+ }
+
+ ret = ctdb_req_message_data_push(&request_header,
+ &message,
+ buf,
+ &buflen);
+ if (ret != 0) {
+ talloc_free(buf);
+ return;
+ }
+
+ subreq = comm_write_send(state, state->ev, state->comm, buf, buflen);
+ if (subreq == NULL) {
+ talloc_free(buf);
+ return;
+ }
+ tevent_req_set_callback(subreq, srvid_handler_done, state);
+
+ talloc_steal(subreq, buf);
+}
+
+static void srvid_handler_done(struct tevent_req *subreq)
+{
+ struct client_state *state = tevent_req_callback_data(
+ subreq, struct client_state);
+ int ret;
+ bool ok;
+
+ ok = comm_write_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (!ok) {
+ DEBUG(DEBUG_ERR,
+ ("Failed to dispatch message to client pid=%u, ret=%d\n",
+ state->pid,
+ ret));
+ }
+}
+
+static void control_register_srvid(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+ int ret;
+
+ reply.rdata.opcode = request->opcode;
+
+ ret = srvid_register(ctdb->srv, state, request->srvid,
+ srvid_handler, state);
+ if (ret != 0) {
+ reply.status = -1;
+ reply.errmsg = "Memory error";
+ goto fail;
+ }
+
+ DEBUG(DEBUG_INFO, ("Register srvid 0x%"PRIx64"\n", request->srvid));
+
+ reply.status = 0;
+ reply.errmsg = NULL;
+
+fail:
+ client_send_control(req, header, &reply);
+}
+
+static void control_deregister_srvid(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+ int ret;
+
+ reply.rdata.opcode = request->opcode;
+
+ ret = srvid_deregister(ctdb->srv, request->srvid, state);
+ if (ret != 0) {
+ reply.status = -1;
+ reply.errmsg = "srvid not registered";
+ goto fail;
+ }
+
+ DEBUG(DEBUG_INFO, ("Deregister srvid 0x%"PRIx64"\n", request->srvid));
+
+ reply.status = 0;
+ reply.errmsg = NULL;
+
+fail:
+ client_send_control(req, header, &reply);
+}
+
+static void control_get_dbname(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+ struct database *db;
+
+ reply.rdata.opcode = request->opcode;
+
+ db = database_find(ctdb->db_map, request->rdata.data.db_id);
+ if (db == NULL) {
+ reply.status = ENOENT;
+ reply.errmsg = "Database not found";
+ } else {
+ reply.rdata.data.db_name = talloc_strdup(mem_ctx, db->name);
+ if (reply.rdata.data.db_name == NULL) {
+ reply.status = ENOMEM;
+ reply.errmsg = "Memory error";
+ } else {
+ reply.status = 0;
+ reply.errmsg = NULL;
+ }
+ }
+
+ client_send_control(req, header, &reply);
+}
+
+static void control_get_pid(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct ctdb_reply_control reply;
+
+ reply.rdata.opcode = request->opcode;
+ reply.status = getpid();
+ reply.errmsg = NULL;
+
+ client_send_control(req, header, &reply);
+}
+
+static void control_get_pnn(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct ctdb_reply_control reply;
+
+ reply.rdata.opcode = request->opcode;
+ reply.status = header->destnode;
+ reply.errmsg = NULL;
+
+ client_send_control(req, header, &reply);
+}
+
+static void control_shutdown(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *hdr,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+
+ state->status = 99;
+}
+
+static void control_set_tunable(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+ bool ret, obsolete;
+
+ reply.rdata.opcode = request->opcode;
+ reply.errmsg = NULL;
+
+ ret = ctdb_tunable_set_value(&ctdb->tun_list,
+ request->rdata.data.tunable->name,
+ request->rdata.data.tunable->value,
+ &obsolete);
+ if (! ret) {
+ reply.status = -1;
+ } else if (obsolete) {
+ reply.status = 1;
+ } else {
+ reply.status = 0;
+ }
+
+ client_send_control(req, header, &reply);
+}
+
+static void control_get_tunable(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+ uint32_t value;
+ bool ret;
+
+ reply.rdata.opcode = request->opcode;
+ reply.errmsg = NULL;
+
+ ret = ctdb_tunable_get_value(&ctdb->tun_list,
+ request->rdata.data.tun_var, &value);
+ if (! ret) {
+ reply.status = -1;
+ } else {
+ reply.rdata.data.tun_value = value;
+ reply.status = 0;
+ }
+
+ client_send_control(req, header, &reply);
+}
+
+static void control_list_tunables(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct ctdb_reply_control reply;
+ struct ctdb_var_list *var_list;
+
+ reply.rdata.opcode = request->opcode;
+ reply.errmsg = NULL;
+
+ var_list = ctdb_tunable_names(mem_ctx);
+ if (var_list == NULL) {
+ reply.status = -1;
+ } else {
+ reply.rdata.data.tun_var_list = var_list;
+ reply.status = 0;
+ }
+
+ client_send_control(req, header, &reply);
+}
+
+static void control_modify_flags(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_node_flag_change *change = request->rdata.data.flag_change;
+ struct ctdb_reply_control reply;
+ struct node *node;
+
+ reply.rdata.opcode = request->opcode;
+
+ if ((change->old_flags & ~NODE_FLAGS_PERMANENTLY_DISABLED) ||
+ (change->new_flags & ~NODE_FLAGS_PERMANENTLY_DISABLED) != 0) {
+ DEBUG(DEBUG_INFO,
+ ("MODIFY_FLAGS control not for PERMANENTLY_DISABLED\n"));
+ reply.status = EINVAL;
+ reply.errmsg = "Failed to MODIFY_FLAGS";
+ client_send_control(req, header, &reply);
+ return;
+ }
+
+ /* There's all sorts of broadcast weirdness here. Only change
+ * the specified node, not the destination node of the
+ * control. */
+ node = &ctdb->node_map->node[change->pnn];
+
+ if ((node->flags &
+ change->old_flags & NODE_FLAGS_PERMANENTLY_DISABLED) == 0 &&
+ (change->new_flags & NODE_FLAGS_PERMANENTLY_DISABLED) != 0) {
+ DEBUG(DEBUG_INFO,("Disabling node %d\n", header->destnode));
+ node->flags |= NODE_FLAGS_PERMANENTLY_DISABLED;
+ goto done;
+ }
+
+ if ((node->flags &
+ change->old_flags & NODE_FLAGS_PERMANENTLY_DISABLED) != 0 &&
+ (change->new_flags & NODE_FLAGS_PERMANENTLY_DISABLED) == 0) {
+ DEBUG(DEBUG_INFO,("Enabling node %d\n", header->destnode));
+ node->flags &= ~NODE_FLAGS_PERMANENTLY_DISABLED;
+ goto done;
+ }
+
+ DEBUG(DEBUG_INFO, ("Flags unchanged for node %d\n", header->destnode));
+
+done:
+ reply.status = 0;
+ reply.errmsg = NULL;
+ client_send_control(req, header, &reply);
+}
+
+static void control_get_all_tunables(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+
+ reply.rdata.opcode = request->opcode;
+ reply.rdata.data.tun_list = &ctdb->tun_list;
+ reply.status = 0;
+ reply.errmsg = NULL;
+
+ client_send_control(req, header, &reply);
+}
+
+static void control_db_attach_persistent(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+ struct database *db;
+
+ reply.rdata.opcode = request->opcode;
+
+ for (db = ctdb->db_map->db; db != NULL; db = db->next) {
+ if (strcmp(db->name, request->rdata.data.db_name) == 0) {
+ goto done;
+ }
+ }
+
+ db = database_new(ctdb->db_map, request->rdata.data.db_name,
+ CTDB_DB_FLAGS_PERSISTENT);
+ if (db == NULL) {
+ reply.status = -1;
+ reply.errmsg = "Failed to attach database";
+ client_send_control(req, header, &reply);
+ return;
+ }
+
+done:
+ reply.rdata.data.db_id = db->id;
+ reply.status = 0;
+ reply.errmsg = NULL;
+ client_send_control(req, header, &reply);
+}
+
+static void control_uptime(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+ struct ctdb_uptime *uptime;;
+
+ reply.rdata.opcode = request->opcode;
+
+ uptime = talloc_zero(mem_ctx, struct ctdb_uptime);
+ if (uptime == NULL) {
+ goto fail;
+ }
+
+ uptime->current_time = tevent_timeval_current();
+ uptime->ctdbd_start_time = ctdb->start_time;
+ uptime->last_recovery_started = ctdb->recovery_start_time;
+ uptime->last_recovery_finished = ctdb->recovery_end_time;
+
+ reply.rdata.data.uptime = uptime;
+ reply.status = 0;
+ reply.errmsg = NULL;
+ client_send_control(req, header, &reply);
+ return;
+
+fail:
+ reply.status = -1;
+ reply.errmsg = "Memory error";
+ client_send_control(req, header, &reply);
+}
+
+static void control_reload_nodes_file(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+ struct ctdb_node_map *nodemap;
+ struct node_map *node_map = ctdb->node_map;
+ unsigned int i;
+
+ reply.rdata.opcode = request->opcode;
+
+ nodemap = read_nodes_file(mem_ctx, header->destnode);
+ if (nodemap == NULL) {
+ goto fail;
+ }
+
+ for (i=0; i<nodemap->num; i++) {
+ struct node *node;
+
+ if (i < node_map->num_nodes &&
+ ctdb_sock_addr_same(&nodemap->node[i].addr,
+ &node_map->node[i].addr)) {
+ continue;
+ }
+
+ if (nodemap->node[i].flags & NODE_FLAGS_DELETED) {
+ int ret;
+
+ node = &node_map->node[i];
+
+ node->flags |= NODE_FLAGS_DELETED;
+ ret = ctdb_sock_addr_from_string("0.0.0.0", &node->addr,
+ false);
+ if (ret != 0) {
+ /* Can't happen, but Coverity... */
+ goto fail;
+ }
+
+ continue;
+ }
+
+ if (i < node_map->num_nodes &&
+ node_map->node[i].flags & NODE_FLAGS_DELETED) {
+ node = &node_map->node[i];
+
+ node->flags &= ~NODE_FLAGS_DELETED;
+ node->addr = nodemap->node[i].addr;
+
+ continue;
+ }
+
+ node_map->node = talloc_realloc(node_map, node_map->node,
+ struct node,
+ node_map->num_nodes+1);
+ if (node_map->node == NULL) {
+ goto fail;
+ }
+ node = &node_map->node[node_map->num_nodes];
+
+ node->addr = nodemap->node[i].addr;
+ node->pnn = nodemap->node[i].pnn;
+ node->flags = 0;
+ node->capabilities = CTDB_CAP_DEFAULT;
+ node->recovery_disabled = false;
+ node->recovery_substate = NULL;
+
+ node_map->num_nodes += 1;
+ }
+
+ talloc_free(nodemap);
+
+ reply.status = 0;
+ reply.errmsg = NULL;
+ client_send_control(req, header, &reply);
+ return;
+
+fail:
+ reply.status = -1;
+ reply.errmsg = "Memory error";
+ client_send_control(req, header, &reply);
+}
+
+static void control_get_capabilities(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+ struct node *node;
+ uint32_t caps = 0;
+
+ reply.rdata.opcode = request->opcode;
+
+ node = &ctdb->node_map->node[header->destnode];
+ caps = node->capabilities;
+
+ if (node->flags & NODE_FLAGS_FAKE_TIMEOUT) {
+ /* Don't send reply */
+ return;
+ }
+
+ reply.rdata.data.caps = caps;
+ reply.status = 0;
+ reply.errmsg = NULL;
+
+ client_send_control(req, header, &reply);
+}
+
+static void control_release_ip(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_public_ip *ip = request->rdata.data.pubip;
+ struct ctdb_reply_control reply;
+ struct ctdb_public_ip_list *ips = NULL;
+ struct ctdb_public_ip *t = NULL;
+ unsigned int i;
+
+ reply.rdata.opcode = request->opcode;
+
+ if (ctdb->known_ips == NULL) {
+ D_INFO("RELEASE_IP %s - not a public IP\n",
+ ctdb_sock_addr_to_string(mem_ctx, &ip->addr, false));
+ goto done;
+ }
+
+ ips = &ctdb->known_ips[header->destnode];
+
+ t = NULL;
+ for (i = 0; i < ips->num; i++) {
+ if (ctdb_sock_addr_same_ip(&ips->ip[i].addr, &ip->addr)) {
+ t = &ips->ip[i];
+ break;
+ }
+ }
+ if (t == NULL) {
+ D_INFO("RELEASE_IP %s - not a public IP\n",
+ ctdb_sock_addr_to_string(mem_ctx, &ip->addr, false));
+ goto done;
+ }
+
+ if (t->pnn != header->destnode) {
+ if (header->destnode == ip->pnn) {
+ D_ERR("error: RELEASE_IP %s - to TAKE_IP node %d\n",
+ ctdb_sock_addr_to_string(mem_ctx,
+ &ip->addr, false),
+ ip->pnn);
+ reply.status = -1;
+ reply.errmsg = "RELEASE_IP to TAKE_IP node";
+ client_send_control(req, header, &reply);
+ return;
+ }
+
+ D_INFO("RELEASE_IP %s - to node %d - redundant\n",
+ ctdb_sock_addr_to_string(mem_ctx, &ip->addr, false),
+ ip->pnn);
+ t->pnn = ip->pnn;
+ } else {
+ D_NOTICE("RELEASE_IP %s - to node %d\n",
+ ctdb_sock_addr_to_string(mem_ctx, &ip->addr, false),
+ ip->pnn);
+ t->pnn = ip->pnn;
+ }
+
+done:
+ reply.status = 0;
+ reply.errmsg = NULL;
+ client_send_control(req, header, &reply);
+}
+
+static void control_takeover_ip(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_public_ip *ip = request->rdata.data.pubip;
+ struct ctdb_reply_control reply;
+ struct ctdb_public_ip_list *ips = NULL;
+ struct ctdb_public_ip *t = NULL;
+ unsigned int i;
+
+ reply.rdata.opcode = request->opcode;
+
+ if (ctdb->known_ips == NULL) {
+ D_INFO("TAKEOVER_IP %s - not a public IP\n",
+ ctdb_sock_addr_to_string(mem_ctx, &ip->addr, false));
+ goto done;
+ }
+
+ ips = &ctdb->known_ips[header->destnode];
+
+ t = NULL;
+ for (i = 0; i < ips->num; i++) {
+ if (ctdb_sock_addr_same_ip(&ips->ip[i].addr, &ip->addr)) {
+ t = &ips->ip[i];
+ break;
+ }
+ }
+ if (t == NULL) {
+ D_INFO("TAKEOVER_IP %s - not a public IP\n",
+ ctdb_sock_addr_to_string(mem_ctx, &ip->addr, false));
+ goto done;
+ }
+
+ if (t->pnn == header->destnode) {
+ D_INFO("TAKEOVER_IP %s - redundant\n",
+ ctdb_sock_addr_to_string(mem_ctx, &ip->addr, false));
+ } else {
+ D_NOTICE("TAKEOVER_IP %s\n",
+ ctdb_sock_addr_to_string(mem_ctx, &ip->addr, false));
+ t->pnn = ip->pnn;
+ }
+
+done:
+ reply.status = 0;
+ reply.errmsg = NULL;
+ client_send_control(req, header, &reply);
+}
+
+static void control_get_public_ips(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+ struct ctdb_public_ip_list *ips = NULL;
+
+ reply.rdata.opcode = request->opcode;
+
+ if (ctdb->known_ips == NULL) {
+ /* No IPs defined so create a dummy empty struct and ship it */
+ ips = talloc_zero(mem_ctx, struct ctdb_public_ip_list);;
+ if (ips == NULL) {
+ reply.status = ENOMEM;
+ reply.errmsg = "Memory error";
+ goto done;
+ }
+ goto ok;
+ }
+
+ ips = &ctdb->known_ips[header->destnode];
+
+ if (request->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
+ /* If runstate is not RUNNING or a node is then return
+ * no available IPs. Don't worry about interface
+ * states here - we're not faking down to that level.
+ */
+ uint32_t flags = ctdb->node_map->node[header->destnode].flags;
+ if (ctdb->runstate != CTDB_RUNSTATE_RUNNING ||
+ ((flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED)) != 0)) {
+ /* No available IPs: return dummy empty struct */
+ ips = talloc_zero(mem_ctx, struct ctdb_public_ip_list);;
+ if (ips == NULL) {
+ reply.status = ENOMEM;
+ reply.errmsg = "Memory error";
+ goto done;
+ }
+ }
+ }
+
+ok:
+ reply.rdata.data.pubip_list = ips;
+ reply.status = 0;
+ reply.errmsg = NULL;
+
+done:
+ client_send_control(req, header, &reply);
+}
+
+static void control_get_nodemap(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+ struct ctdb_node_map *nodemap;
+ struct node *node;
+ unsigned int i;
+
+ reply.rdata.opcode = request->opcode;
+
+ nodemap = talloc_zero(mem_ctx, struct ctdb_node_map);
+ if (nodemap == NULL) {
+ goto fail;
+ }
+
+ nodemap->num = ctdb->node_map->num_nodes;
+ nodemap->node = talloc_array(nodemap, struct ctdb_node_and_flags,
+ nodemap->num);
+ if (nodemap->node == NULL) {
+ goto fail;
+ }
+
+ for (i=0; i<nodemap->num; i++) {
+ node = &ctdb->node_map->node[i];
+ nodemap->node[i] = (struct ctdb_node_and_flags) {
+ .pnn = node->pnn,
+ .flags = node->flags,
+ .addr = node->addr,
+ };
+ }
+
+ reply.rdata.data.nodemap = nodemap;
+ reply.status = 0;
+ reply.errmsg = NULL;
+ client_send_control(req, header, &reply);
+ return;
+
+fail:
+ reply.status = -1;
+ reply.errmsg = "Memory error";
+ client_send_control(req, header, &reply);
+}
+
+static void control_get_reclock_file(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+
+ reply.rdata.opcode = request->opcode;
+
+ if (ctdb->reclock != NULL) {
+ reply.rdata.data.reclock_file =
+ talloc_strdup(mem_ctx, ctdb->reclock);
+ if (reply.rdata.data.reclock_file == NULL) {
+ reply.status = ENOMEM;
+ reply.errmsg = "Memory error";
+ goto done;
+ }
+ } else {
+ reply.rdata.data.reclock_file = NULL;
+ }
+
+ reply.status = 0;
+ reply.errmsg = NULL;
+
+done:
+ client_send_control(req, header, &reply);
+}
+
+static void control_stop_node(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+
+ reply.rdata.opcode = request->opcode;
+
+ DEBUG(DEBUG_INFO, ("Stopping node\n"));
+ ctdb->node_map->node[header->destnode].flags |= NODE_FLAGS_STOPPED;
+
+ reply.status = 0;
+ reply.errmsg = NULL;
+
+ client_send_control(req, header, &reply);
+ return;
+}
+
+static void control_continue_node(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+
+ reply.rdata.opcode = request->opcode;
+
+ DEBUG(DEBUG_INFO, ("Continue node\n"));
+ ctdb->node_map->node[header->destnode].flags &= ~NODE_FLAGS_STOPPED;
+
+ reply.status = 0;
+ reply.errmsg = NULL;
+
+ client_send_control(req, header, &reply);
+ return;
+}
+
+static void set_ban_state_callback(struct tevent_req *subreq)
+{
+ struct node *node = tevent_req_callback_data(
+ subreq, struct node);
+ bool status;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ DEBUG(DEBUG_INFO, ("tevent_wakeup_recv failed\n"));
+ }
+
+ node->flags &= ~NODE_FLAGS_BANNED;
+}
+
+static void control_set_ban_state(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct tevent_req *subreq;
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_ban_state *ban = request->rdata.data.ban_state;
+ struct ctdb_reply_control reply;
+ struct node *node;
+
+ reply.rdata.opcode = request->opcode;
+
+ if (ban->pnn != header->destnode) {
+ DEBUG(DEBUG_INFO,
+ ("SET_BAN_STATE control for PNN %d rejected\n",
+ ban->pnn));
+ reply.status = EINVAL;
+ goto fail;
+ }
+
+ node = &ctdb->node_map->node[header->destnode];
+
+ if (ban->time == 0) {
+ DEBUG(DEBUG_INFO,("Unbanning this node\n"));
+ node->flags &= ~NODE_FLAGS_BANNED;
+ goto done;
+ }
+
+ subreq = tevent_wakeup_send(ctdb->node_map, state->ev,
+ tevent_timeval_current_ofs(
+ ban->time, 0));
+ if (subreq == NULL) {
+ reply.status = ENOMEM;
+ goto fail;
+ }
+ tevent_req_set_callback(subreq, set_ban_state_callback, node);
+
+ DEBUG(DEBUG_INFO, ("Banning this node for %d seconds\n", ban->time));
+ node->flags |= NODE_FLAGS_BANNED;
+ ctdb->vnn_map->generation = INVALID_GENERATION;
+
+done:
+ reply.status = 0;
+ reply.errmsg = NULL;
+
+ client_send_control(req, header, &reply);
+ return;
+
+fail:
+ reply.errmsg = "Failed to ban node";
+}
+
+static void control_trans3_commit(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+ struct database *db;
+ int ret;
+
+ reply.rdata.opcode = request->opcode;
+
+ db = database_find(ctdb->db_map, request->rdata.data.recbuf->db_id);
+ if (db == NULL) {
+ reply.status = -1;
+ reply.errmsg = "Unknown database";
+ client_send_control(req, header, &reply);
+ return;
+ }
+
+ if (! (db->flags &
+ (CTDB_DB_FLAGS_PERSISTENT|CTDB_DB_FLAGS_REPLICATED))) {
+ reply.status = -1;
+ reply.errmsg = "Transactions on volatile database";
+ client_send_control(req, header, &reply);
+ return;
+ }
+
+ ret = ltdb_transaction(db, request->rdata.data.recbuf);
+ if (ret != 0) {
+ reply.status = -1;
+ reply.errmsg = "Transaction failed";
+ client_send_control(req, header, &reply);
+ return;
+ }
+
+ reply.status = 0;
+ reply.errmsg = NULL;
+ client_send_control(req, header, &reply);
+}
+
+static void control_get_db_seqnum(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+ struct database *db;
+ int ret;
+
+ reply.rdata.opcode = request->opcode;
+
+ db = database_find(ctdb->db_map, request->rdata.data.db_id);
+ if (db == NULL) {
+ reply.status = ENOENT;
+ reply.errmsg = "Database not found";
+ } else {
+ uint64_t seqnum;
+
+ ret = database_seqnum(db, &seqnum);
+ if (ret == 0) {
+ reply.rdata.data.seqnum = seqnum;
+ reply.status = 0;
+ reply.errmsg = NULL;
+ } else {
+ reply.status = ret;
+ reply.errmsg = "Failed to get seqnum";
+ }
+ }
+
+ client_send_control(req, header, &reply);
+}
+
+static void control_db_get_health(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+ struct database *db;
+
+ reply.rdata.opcode = request->opcode;
+
+ db = database_find(ctdb->db_map, request->rdata.data.db_id);
+ if (db == NULL) {
+ reply.status = ENOENT;
+ reply.errmsg = "Database not found";
+ } else {
+ reply.rdata.data.reason = NULL;
+ reply.status = 0;
+ reply.errmsg = NULL;
+ }
+
+ client_send_control(req, header, &reply);
+}
+
+static struct ctdb_iface_list *get_ctdb_iface_list(TALLOC_CTX *mem_ctx,
+ struct ctdbd_context *ctdb)
+{
+ struct ctdb_iface_list *iface_list;
+ struct interface *iface;
+ unsigned int i;
+
+ iface_list = talloc_zero(mem_ctx, struct ctdb_iface_list);
+ if (iface_list == NULL) {
+ goto done;
+ }
+
+ iface_list->num = ctdb->iface_map->num;
+ iface_list->iface = talloc_array(iface_list, struct ctdb_iface,
+ iface_list->num);
+ if (iface_list->iface == NULL) {
+ TALLOC_FREE(iface_list);
+ goto done;
+ }
+
+ for (i=0; i<iface_list->num; i++) {
+ iface = &ctdb->iface_map->iface[i];
+ iface_list->iface[i] = (struct ctdb_iface) {
+ .link_state = iface->link_up,
+ .references = iface->references,
+ };
+ strlcpy(iface_list->iface[i].name, iface->name,
+ sizeof(iface_list->iface[i].name));
+ }
+
+done:
+ return iface_list;
+}
+
+static void control_get_public_ip_info(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+ ctdb_sock_addr *addr = request->rdata.data.addr;
+ struct ctdb_public_ip_list *known = NULL;
+ struct ctdb_public_ip_info *info = NULL;
+ unsigned i;
+
+ reply.rdata.opcode = request->opcode;
+
+ info = talloc_zero(mem_ctx, struct ctdb_public_ip_info);
+ if (info == NULL) {
+ reply.status = ENOMEM;
+ reply.errmsg = "Memory error";
+ goto done;
+ }
+
+ reply.rdata.data.ipinfo = info;
+
+ if (ctdb->known_ips != NULL) {
+ known = &ctdb->known_ips[header->destnode];
+ } else {
+ /* No IPs defined so create a dummy empty struct and
+ * fall through. The given IP won't be matched
+ * below...
+ */
+ known = talloc_zero(mem_ctx, struct ctdb_public_ip_list);;
+ if (known == NULL) {
+ reply.status = ENOMEM;
+ reply.errmsg = "Memory error";
+ goto done;
+ }
+ }
+
+ for (i = 0; i < known->num; i++) {
+ if (ctdb_sock_addr_same_ip(&known->ip[i].addr,
+ addr)) {
+ break;
+ }
+ }
+
+ if (i == known->num) {
+ D_ERR("GET_PUBLIC_IP_INFO: not known public IP %s\n",
+ ctdb_sock_addr_to_string(mem_ctx, addr, false));
+ reply.status = -1;
+ reply.errmsg = "Unknown address";
+ goto done;
+ }
+
+ info->ip = known->ip[i];
+
+ /* The fake PUBLICIPS stanza and resulting known_ips data
+ * don't know anything about interfaces, so completely fake
+ * this.
+ */
+ info->active_idx = 0;
+
+ info->ifaces = get_ctdb_iface_list(mem_ctx, ctdb);
+ if (info->ifaces == NULL) {
+ reply.status = ENOMEM;
+ reply.errmsg = "Memory error";
+ goto done;
+ }
+
+ reply.status = 0;
+ reply.errmsg = NULL;
+
+done:
+ client_send_control(req, header, &reply);
+}
+
+static void control_get_ifaces(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+ struct ctdb_iface_list *iface_list;
+
+ reply.rdata.opcode = request->opcode;
+
+ iface_list = get_ctdb_iface_list(mem_ctx, ctdb);
+ if (iface_list == NULL) {
+ goto fail;
+ }
+
+ reply.rdata.data.iface_list = iface_list;
+ reply.status = 0;
+ reply.errmsg = NULL;
+ client_send_control(req, header, &reply);
+ return;
+
+fail:
+ reply.status = -1;
+ reply.errmsg = "Memory error";
+ client_send_control(req, header, &reply);
+}
+
+static void control_set_iface_link_state(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+ struct ctdb_iface *in_iface;
+ struct interface *iface = NULL;
+ bool link_up = false;
+ int i;
+
+ reply.rdata.opcode = request->opcode;
+
+ in_iface = request->rdata.data.iface;
+
+ if (in_iface->name[CTDB_IFACE_SIZE] != '\0') {
+ reply.errmsg = "interface name not terminated";
+ goto fail;
+ }
+
+ switch (in_iface->link_state) {
+ case 0:
+ link_up = false;
+ break;
+
+ case 1:
+ link_up = true;
+ break;
+
+ default:
+ reply.errmsg = "invalid link state";
+ goto fail;
+ }
+
+ if (in_iface->references != 0) {
+ reply.errmsg = "references should be 0";
+ goto fail;
+ }
+
+ for (i=0; i<ctdb->iface_map->num; i++) {
+ if (strcmp(ctdb->iface_map->iface[i].name,
+ in_iface->name) == 0) {
+ iface = &ctdb->iface_map->iface[i];
+ break;
+ }
+ }
+
+ if (iface == NULL) {
+ reply.errmsg = "interface not found";
+ goto fail;
+ }
+
+ iface->link_up = link_up;
+
+ reply.status = 0;
+ reply.errmsg = NULL;
+ client_send_control(req, header, &reply);
+ return;
+
+fail:
+ reply.status = -1;
+ client_send_control(req, header, &reply);
+}
+
+static void control_set_db_readonly(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+ struct database *db;
+
+ reply.rdata.opcode = request->opcode;
+
+ db = database_find(ctdb->db_map, request->rdata.data.db_id);
+ if (db == NULL) {
+ reply.status = ENOENT;
+ reply.errmsg = "Database not found";
+ goto done;
+ }
+
+ if (db->flags & CTDB_DB_FLAGS_PERSISTENT) {
+ reply.status = EINVAL;
+ reply.errmsg = "Can not set READONLY on persistent db";
+ goto done;
+ }
+
+ db->flags |= CTDB_DB_FLAGS_READONLY;
+ reply.status = 0;
+ reply.errmsg = NULL;
+
+done:
+ client_send_control(req, header, &reply);
+}
+
+struct traverse_start_ext_state {
+ struct tevent_req *req;
+ struct ctdb_req_header *header;
+ uint32_t reqid;
+ uint64_t srvid;
+ bool withemptyrecords;
+ int status;
+};
+
+static int traverse_start_ext_handler(struct tdb_context *tdb,
+ TDB_DATA key, TDB_DATA data,
+ void *private_data)
+{
+ struct traverse_start_ext_state *state =
+ (struct traverse_start_ext_state *)private_data;
+ struct ctdb_rec_data rec;
+ struct ctdb_req_message_data message;
+ size_t np;
+
+ if (data.dsize < sizeof(struct ctdb_ltdb_header)) {
+ return 0;
+ }
+
+ if ((data.dsize == sizeof(struct ctdb_ltdb_header)) &&
+ (!state->withemptyrecords)) {
+ return 0;
+ }
+
+ rec = (struct ctdb_rec_data) {
+ .reqid = state->reqid,
+ .header = NULL,
+ .key = key,
+ .data = data,
+ };
+
+ message.srvid = state->srvid;
+ message.data.dsize = ctdb_rec_data_len(&rec);
+ message.data.dptr = talloc_size(state->req, message.data.dsize);
+ if (message.data.dptr == NULL) {
+ state->status = ENOMEM;
+ return 1;
+ }
+
+ ctdb_rec_data_push(&rec, message.data.dptr, &np);
+ client_send_message(state->req, state->header, &message);
+
+ talloc_free(message.data.dptr);
+
+ return 0;
+}
+
+static void control_traverse_start_ext(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+ struct database *db;
+ struct ctdb_traverse_start_ext *ext;
+ struct traverse_start_ext_state t_state;
+ struct ctdb_rec_data rec;
+ struct ctdb_req_message_data message;
+ uint8_t buffer[32];
+ size_t np;
+ int ret;
+
+ reply.rdata.opcode = request->opcode;
+
+ ext = request->rdata.data.traverse_start_ext;
+
+ db = database_find(ctdb->db_map, ext->db_id);
+ if (db == NULL) {
+ reply.status = -1;
+ reply.errmsg = "Unknown database";
+ client_send_control(req, header, &reply);
+ return;
+ }
+
+ t_state = (struct traverse_start_ext_state) {
+ .req = req,
+ .header = header,
+ .reqid = ext->reqid,
+ .srvid = ext->srvid,
+ .withemptyrecords = ext->withemptyrecords,
+ };
+
+ ret = tdb_traverse_read(db->tdb, traverse_start_ext_handler, &t_state);
+ DEBUG(DEBUG_INFO, ("traversed %d records\n", ret));
+ if (t_state.status != 0) {
+ reply.status = -1;
+ reply.errmsg = "Memory error";
+ client_send_control(req, header, &reply);
+ }
+
+ reply.status = 0;
+ client_send_control(req, header, &reply);
+
+ rec = (struct ctdb_rec_data) {
+ .reqid = ext->reqid,
+ .header = NULL,
+ .key = tdb_null,
+ .data = tdb_null,
+ };
+
+ message.srvid = ext->srvid;
+ message.data.dsize = ctdb_rec_data_len(&rec);
+ ctdb_rec_data_push(&rec, buffer, &np);
+ message.data.dptr = buffer;
+ client_send_message(req, header, &message);
+}
+
+static void control_set_db_sticky(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+ struct database *db;
+
+ reply.rdata.opcode = request->opcode;
+
+ db = database_find(ctdb->db_map, request->rdata.data.db_id);
+ if (db == NULL) {
+ reply.status = ENOENT;
+ reply.errmsg = "Database not found";
+ goto done;
+ }
+
+ if (db->flags & CTDB_DB_FLAGS_PERSISTENT) {
+ reply.status = EINVAL;
+ reply.errmsg = "Can not set STICKY on persistent db";
+ goto done;
+ }
+
+ db->flags |= CTDB_DB_FLAGS_STICKY;
+ reply.status = 0;
+ reply.errmsg = NULL;
+
+done:
+ client_send_control(req, header, &reply);
+}
+
+static void control_ipreallocated(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct ctdb_reply_control reply;
+
+ /* Always succeed */
+ reply.rdata.opcode = request->opcode;
+ reply.status = 0;
+ reply.errmsg = NULL;
+
+ client_send_control(req, header, &reply);
+}
+
+static void control_get_runstate(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+
+ reply.rdata.opcode = request->opcode;
+ reply.rdata.data.runstate = ctdb->runstate;
+ reply.status = 0;
+ reply.errmsg = NULL;
+
+ client_send_control(req, header, &reply);
+}
+
+static void control_get_nodes_file(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct ctdb_reply_control reply;
+ struct ctdb_node_map *nodemap;
+
+ reply.rdata.opcode = request->opcode;
+
+ nodemap = read_nodes_file(mem_ctx, header->destnode);
+ if (nodemap == NULL) {
+ goto fail;
+ }
+
+ reply.rdata.data.nodemap = nodemap;
+ reply.status = 0;
+ reply.errmsg = NULL;
+ client_send_control(req, header, &reply);
+ return;
+
+fail:
+ reply.status = -1;
+ reply.errmsg = "Failed to read nodes file";
+ client_send_control(req, header, &reply);
+}
+
+static void control_db_open_flags(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+ struct database *db;
+
+ reply.rdata.opcode = request->opcode;
+
+ db = database_find(ctdb->db_map, request->rdata.data.db_id);
+ if (db == NULL) {
+ reply.status = ENOENT;
+ reply.errmsg = "Database not found";
+ } else {
+ reply.rdata.data.tdb_flags = database_flags(db->flags);
+ reply.status = 0;
+ reply.errmsg = NULL;
+ }
+
+ client_send_control(req, header, &reply);
+}
+
+static void control_db_attach_replicated(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+ struct database *db;
+
+ reply.rdata.opcode = request->opcode;
+
+ for (db = ctdb->db_map->db; db != NULL; db = db->next) {
+ if (strcmp(db->name, request->rdata.data.db_name) == 0) {
+ goto done;
+ }
+ }
+
+ db = database_new(ctdb->db_map, request->rdata.data.db_name,
+ CTDB_DB_FLAGS_REPLICATED);
+ if (db == NULL) {
+ reply.status = -1;
+ reply.errmsg = "Failed to attach database";
+ client_send_control(req, header, &reply);
+ return;
+ }
+
+done:
+ reply.rdata.data.db_id = db->id;
+ reply.status = 0;
+ reply.errmsg = NULL;
+ client_send_control(req, header, &reply);
+}
+
+static void control_check_pid_srvid(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_client *client;
+ struct client_state *cstate;
+ struct ctdb_reply_control reply;
+ bool pid_found, srvid_found;
+ int ret;
+
+ reply.rdata.opcode = request->opcode;
+
+ pid_found = false;
+ srvid_found = false;
+
+ for (client=ctdb->client_list; client != NULL; client=client->next) {
+ if (client->pid == request->rdata.data.pid_srvid->pid) {
+ pid_found = true;
+ cstate = (struct client_state *)client->state;
+ ret = srvid_exists(ctdb->srv,
+ request->rdata.data.pid_srvid->srvid,
+ cstate);
+ if (ret == 0) {
+ srvid_found = true;
+ ret = kill(cstate->pid, 0);
+ if (ret != 0) {
+ reply.status = ret;
+ reply.errmsg = strerror(errno);
+ } else {
+ reply.status = 0;
+ reply.errmsg = NULL;
+ }
+ }
+ }
+ }
+
+ if (! pid_found) {
+ reply.status = -1;
+ reply.errmsg = "No client for PID";
+ } else if (! srvid_found) {
+ reply.status = -1;
+ reply.errmsg = "No client for PID and SRVID";
+ }
+
+ client_send_control(req, header, &reply);
+}
+
+static void control_disable_node(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+
+ reply.rdata.opcode = request->opcode;
+
+ DEBUG(DEBUG_INFO, ("Disabling node\n"));
+ ctdb->node_map->node[header->destnode].flags |=
+ NODE_FLAGS_PERMANENTLY_DISABLED;
+
+ reply.status = 0;
+ reply.errmsg = NULL;
+
+ client_send_control(req, header, &reply);
+ return;
+}
+
+static void control_enable_node(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+
+ reply.rdata.opcode = request->opcode;
+
+ DEBUG(DEBUG_INFO, ("Enable node\n"));
+ ctdb->node_map->node[header->destnode].flags &=
+ ~NODE_FLAGS_PERMANENTLY_DISABLED;
+
+ reply.status = 0;
+ reply.errmsg = NULL;
+
+ client_send_control(req, header, &reply);
+ return;
+}
+
+static bool fake_control_failure(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_reply_control reply;
+ struct fake_control_failure *f = NULL;
+
+ D_DEBUG("Checking fake control failure for control %u on node %u\n",
+ request->opcode, header->destnode);
+ for (f = ctdb->control_failures; f != NULL; f = f->next) {
+ if (f->opcode == request->opcode &&
+ (f->pnn == header->destnode ||
+ f->pnn == CTDB_UNKNOWN_PNN)) {
+
+ reply.rdata.opcode = request->opcode;
+ if (strcmp(f->error, "TIMEOUT") == 0) {
+ /* Causes no reply */
+ D_ERR("Control %u fake timeout on node %u\n",
+ request->opcode, header->destnode);
+ return true;
+ } else if (strcmp(f->error, "ERROR") == 0) {
+ D_ERR("Control %u fake error on node %u\n",
+ request->opcode, header->destnode);
+ reply.status = -1;
+ reply.errmsg = f->comment;
+ client_send_control(req, header, &reply);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+static void control_error(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_control *request)
+{
+ struct ctdb_reply_control reply;
+
+ D_DEBUG("Control %u not implemented\n", request->opcode);
+
+ reply.rdata.opcode = request->opcode;
+ reply.status = -1;
+ reply.errmsg = "Not implemented";
+
+ client_send_control(req, header, &reply);
+}
+
+/*
+ * Handling protocol - messages
+ */
+
+struct disable_recoveries_state {
+ struct node *node;
+};
+
+static void disable_recoveries_callback(struct tevent_req *subreq)
+{
+ struct disable_recoveries_state *substate = tevent_req_callback_data(
+ subreq, struct disable_recoveries_state);
+ bool status;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ DEBUG(DEBUG_INFO, ("tevent_wakeup_recv failed\n"));
+ }
+
+ substate->node->recovery_disabled = false;
+ TALLOC_FREE(substate->node->recovery_substate);
+}
+
+static void message_disable_recoveries(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_message *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct tevent_req *subreq;
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct disable_recoveries_state *substate;
+ struct ctdb_disable_message *disable = request->data.disable;
+ struct ctdb_req_message_data reply;
+ struct node *node;
+ int ret = -1;
+ TDB_DATA data;
+
+ node = &ctdb->node_map->node[header->destnode];
+
+ if (disable->timeout == 0) {
+ TALLOC_FREE(node->recovery_substate);
+ node->recovery_disabled = false;
+ DEBUG(DEBUG_INFO, ("Enabled recoveries on node %u\n",
+ header->destnode));
+ goto done;
+ }
+
+ substate = talloc_zero(ctdb->node_map,
+ struct disable_recoveries_state);
+ if (substate == NULL) {
+ goto fail;
+ }
+
+ substate->node = node;
+
+ subreq = tevent_wakeup_send(substate, state->ev,
+ tevent_timeval_current_ofs(
+ disable->timeout, 0));
+ if (subreq == NULL) {
+ talloc_free(substate);
+ goto fail;
+ }
+ tevent_req_set_callback(subreq, disable_recoveries_callback, substate);
+
+ DEBUG(DEBUG_INFO, ("Disabled recoveries for %d seconds on node %u\n",
+ disable->timeout, header->destnode));
+ node->recovery_substate = substate;
+ node->recovery_disabled = true;
+
+done:
+ ret = header->destnode;
+
+fail:
+ reply.srvid = disable->srvid;
+ data.dptr = (uint8_t *)&ret;
+ data.dsize = sizeof(int);
+ reply.data = data;
+
+ client_send_message(req, header, &reply);
+}
+
+static void message_takeover_run(TALLOC_CTX *mem_ctx,
+ struct tevent_req *req,
+ struct ctdb_req_header *header,
+ struct ctdb_req_message *request)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_srvid_message *srvid = request->data.msg;
+ struct ctdb_req_message_data reply;
+ int ret = -1;
+ TDB_DATA data;
+
+ if (header->destnode != ctdb->node_map->recmaster) {
+ /* No reply! Only recmaster replies... */
+ return;
+ }
+
+ DEBUG(DEBUG_INFO, ("IP takover run on node %u\n",
+ header->destnode));
+ ret = header->destnode;
+
+ reply.srvid = srvid->srvid;
+ data.dptr = (uint8_t *)&ret;
+ data.dsize = sizeof(int);
+ reply.data = data;
+
+ client_send_message(req, header, &reply);
+}
+
+/*
+ * Handle a single client
+ */
+
+static void client_read_handler(uint8_t *buf, size_t buflen,
+ void *private_data);
+static void client_dead_handler(void *private_data);
+static void client_process_packet(struct tevent_req *req,
+ uint8_t *buf, size_t buflen);
+static void client_process_call(struct tevent_req *req,
+ uint8_t *buf, size_t buflen);
+static void client_process_message(struct tevent_req *req,
+ uint8_t *buf, size_t buflen);
+static void client_process_control(struct tevent_req *req,
+ uint8_t *buf, size_t buflen);
+static void client_reply_done(struct tevent_req *subreq);
+
+static struct tevent_req *client_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ int fd, struct ctdbd_context *ctdb,
+ int pnn)
+{
+ struct tevent_req *req;
+ struct client_state *state;
+ int ret;
+
+ req = tevent_req_create(mem_ctx, &state, struct client_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->fd = fd;
+ state->ctdb = ctdb;
+ state->pnn = pnn;
+
+ (void) ctdb_get_peer_pid(fd, &state->pid);
+
+ ret = comm_setup(state, ev, fd, client_read_handler, req,
+ client_dead_handler, req, &state->comm);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ ret = client_add(ctdb, state->pid, state);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ DEBUG(DEBUG_INFO, ("New client fd=%d\n", fd));
+
+ return req;
+}
+
+static void client_read_handler(uint8_t *buf, size_t buflen,
+ void *private_data)
+{
+ struct tevent_req *req = talloc_get_type_abort(
+ private_data, struct tevent_req);
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ struct ctdb_req_header header;
+ size_t np;
+ unsigned int i;
+ int ret;
+
+ ret = ctdb_req_header_pull(buf, buflen, &header, &np);
+ if (ret != 0) {
+ return;
+ }
+
+ if (buflen != header.length) {
+ return;
+ }
+
+ ret = ctdb_req_header_verify(&header, 0);
+ if (ret != 0) {
+ return;
+ }
+
+ header_fix_pnn(&header, ctdb);
+
+ if (header.destnode == CTDB_BROADCAST_ALL) {
+ for (i=0; i<ctdb->node_map->num_nodes; i++) {
+ header.destnode = i;
+
+ ctdb_req_header_push(&header, buf, &np);
+ client_process_packet(req, buf, buflen);
+ }
+ return;
+ }
+
+ if (header.destnode == CTDB_BROADCAST_CONNECTED) {
+ for (i=0; i<ctdb->node_map->num_nodes; i++) {
+ if (ctdb->node_map->node[i].flags &
+ NODE_FLAGS_DISCONNECTED) {
+ continue;
+ }
+
+ header.destnode = i;
+
+ ctdb_req_header_push(&header, buf, &np);
+ client_process_packet(req, buf, buflen);
+ }
+ return;
+ }
+
+ if (header.destnode > ctdb->node_map->num_nodes) {
+ fprintf(stderr, "Invalid destination pnn 0x%x\n",
+ header.destnode);
+ return;
+ }
+
+
+ if (ctdb->node_map->node[header.destnode].flags & NODE_FLAGS_DISCONNECTED) {
+ fprintf(stderr, "Packet for disconnected node pnn %u\n",
+ header.destnode);
+ return;
+ }
+
+ ctdb_req_header_push(&header, buf, &np);
+ client_process_packet(req, buf, buflen);
+}
+
+static void client_dead_handler(void *private_data)
+{
+ struct tevent_req *req = talloc_get_type_abort(
+ private_data, struct tevent_req);
+
+ tevent_req_done(req);
+}
+
+static void client_process_packet(struct tevent_req *req,
+ uint8_t *buf, size_t buflen)
+{
+ struct ctdb_req_header header;
+ size_t np;
+ int ret;
+
+ ret = ctdb_req_header_pull(buf, buflen, &header, &np);
+ if (ret != 0) {
+ return;
+ }
+
+ switch (header.operation) {
+ case CTDB_REQ_CALL:
+ client_process_call(req, buf, buflen);
+ break;
+
+ case CTDB_REQ_MESSAGE:
+ client_process_message(req, buf, buflen);
+ break;
+
+ case CTDB_REQ_CONTROL:
+ client_process_control(req, buf, buflen);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void client_process_call(struct tevent_req *req,
+ uint8_t *buf, size_t buflen)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ TALLOC_CTX *mem_ctx;
+ struct ctdb_req_header header;
+ struct ctdb_req_call request;
+ struct ctdb_reply_call reply;
+ struct database *db;
+ struct ctdb_ltdb_header hdr;
+ TDB_DATA data;
+ int ret;
+
+ mem_ctx = talloc_new(state);
+ if (tevent_req_nomem(mem_ctx, req)) {
+ return;
+ }
+
+ ret = ctdb_req_call_pull(buf, buflen, &header, mem_ctx, &request);
+ if (ret != 0) {
+ talloc_free(mem_ctx);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ header_fix_pnn(&header, ctdb);
+
+ if (header.destnode >= ctdb->node_map->num_nodes) {
+ goto fail;
+ }
+
+ DEBUG(DEBUG_INFO, ("call db_id = %u\n", request.db_id));
+
+ db = database_find(ctdb->db_map, request.db_id);
+ if (db == NULL) {
+ goto fail;
+ }
+
+ ret = ltdb_fetch(db, request.key, &hdr, mem_ctx, &data);
+ if (ret != 0) {
+ goto fail;
+ }
+
+ /* Fake migration */
+ if (hdr.dmaster != ctdb->node_map->pnn) {
+ hdr.dmaster = ctdb->node_map->pnn;
+
+ ret = ltdb_store(db, request.key, &hdr, data);
+ if (ret != 0) {
+ goto fail;
+ }
+ }
+
+ talloc_free(mem_ctx);
+
+ reply.status = 0;
+ reply.data = tdb_null;
+
+ client_send_call(req, &header, &reply);
+ return;
+
+fail:
+ talloc_free(mem_ctx);
+ reply.status = -1;
+ reply.data = tdb_null;
+
+ client_send_call(req, &header, &reply);
+}
+
+static void client_process_message(struct tevent_req *req,
+ uint8_t *buf, size_t buflen)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ TALLOC_CTX *mem_ctx;
+ struct ctdb_req_header header;
+ struct ctdb_req_message request;
+ uint64_t srvid;
+ int ret;
+
+ mem_ctx = talloc_new(state);
+ if (tevent_req_nomem(mem_ctx, req)) {
+ return;
+ }
+
+ ret = ctdb_req_message_pull(buf, buflen, &header, mem_ctx, &request);
+ if (ret != 0) {
+ talloc_free(mem_ctx);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ header_fix_pnn(&header, ctdb);
+
+ if (header.destnode >= ctdb->node_map->num_nodes) {
+ /* Many messages are not replied to, so just behave as
+ * though this message was not received */
+ fprintf(stderr, "Invalid node %d\n", header.destnode);
+ talloc_free(mem_ctx);
+ return;
+ }
+
+ srvid = request.srvid;
+ DEBUG(DEBUG_INFO, ("request srvid = 0x%"PRIx64"\n", srvid));
+
+ if (srvid == CTDB_SRVID_DISABLE_RECOVERIES) {
+ message_disable_recoveries(mem_ctx, req, &header, &request);
+ } else if (srvid == CTDB_SRVID_TAKEOVER_RUN) {
+ message_takeover_run(mem_ctx, req, &header, &request);
+ } else {
+ D_DEBUG("Message id 0x%"PRIx64" not implemented\n", srvid);
+ }
+
+ /* check srvid */
+ talloc_free(mem_ctx);
+}
+
+static void client_process_control(struct tevent_req *req,
+ uint8_t *buf, size_t buflen)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ TALLOC_CTX *mem_ctx;
+ struct ctdb_req_header header;
+ struct ctdb_req_control request;
+ int ret;
+
+ mem_ctx = talloc_new(state);
+ if (tevent_req_nomem(mem_ctx, req)) {
+ return;
+ }
+
+ ret = ctdb_req_control_pull(buf, buflen, &header, mem_ctx, &request);
+ if (ret != 0) {
+ talloc_free(mem_ctx);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ header_fix_pnn(&header, ctdb);
+
+ if (header.destnode >= ctdb->node_map->num_nodes) {
+ struct ctdb_reply_control reply;
+
+ reply.rdata.opcode = request.opcode;
+ reply.errmsg = "Invalid node";
+ reply.status = -1;
+ client_send_control(req, &header, &reply);
+ return;
+ }
+
+ DEBUG(DEBUG_INFO, ("request opcode = %u, reqid = %u\n",
+ request.opcode, header.reqid));
+
+ if (fake_control_failure(mem_ctx, req, &header, &request)) {
+ goto done;
+ }
+
+ switch (request.opcode) {
+ case CTDB_CONTROL_PROCESS_EXISTS:
+ control_process_exists(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_PING:
+ control_ping(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_GETDBPATH:
+ control_getdbpath(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_GETVNNMAP:
+ control_getvnnmap(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_GET_DEBUG:
+ control_get_debug(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_SET_DEBUG:
+ control_set_debug(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_GET_DBMAP:
+ control_get_dbmap(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_GET_RECMODE:
+ control_get_recmode(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_SET_RECMODE:
+ control_set_recmode(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH:
+ control_db_attach(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_REGISTER_SRVID:
+ control_register_srvid(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_DEREGISTER_SRVID:
+ control_deregister_srvid(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_GET_DBNAME:
+ control_get_dbname(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_GET_PID:
+ control_get_pid(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_GET_PNN:
+ control_get_pnn(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_SHUTDOWN:
+ control_shutdown(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_SET_TUNABLE:
+ control_set_tunable(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_GET_TUNABLE:
+ control_get_tunable(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_LIST_TUNABLES:
+ control_list_tunables(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_MODIFY_FLAGS:
+ control_modify_flags(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_GET_ALL_TUNABLES:
+ control_get_all_tunables(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH_PERSISTENT:
+ control_db_attach_persistent(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_UPTIME:
+ control_uptime(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_RELOAD_NODES_FILE:
+ control_reload_nodes_file(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_GET_CAPABILITIES:
+ control_get_capabilities(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_RELEASE_IP:
+ control_release_ip(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_TAKEOVER_IP:
+ control_takeover_ip(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_GET_PUBLIC_IPS:
+ control_get_public_ips(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_GET_NODEMAP:
+ control_get_nodemap(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_GET_RECLOCK_FILE:
+ control_get_reclock_file(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_STOP_NODE:
+ control_stop_node(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_CONTINUE_NODE:
+ control_continue_node(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_SET_BAN_STATE:
+ control_set_ban_state(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_TRANS3_COMMIT:
+ control_trans3_commit(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_GET_DB_SEQNUM:
+ control_get_db_seqnum(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_DB_GET_HEALTH:
+ control_db_get_health(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_GET_PUBLIC_IP_INFO:
+ control_get_public_ip_info(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_GET_IFACES:
+ control_get_ifaces(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_SET_IFACE_LINK_STATE:
+ control_set_iface_link_state(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_SET_DB_READONLY:
+ control_set_db_readonly(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_START_EXT:
+ control_traverse_start_ext(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_SET_DB_STICKY:
+ control_set_db_sticky(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_IPREALLOCATED:
+ control_ipreallocated(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_GET_RUNSTATE:
+ control_get_runstate(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_GET_NODES_FILE:
+ control_get_nodes_file(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_DB_OPEN_FLAGS:
+ control_db_open_flags(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH_REPLICATED:
+ control_db_attach_replicated(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_CHECK_PID_SRVID:
+ control_check_pid_srvid(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_DISABLE_NODE:
+ control_disable_node(mem_ctx, req, &header, &request);
+ break;
+
+ case CTDB_CONTROL_ENABLE_NODE:
+ control_enable_node(mem_ctx, req, &header, &request);
+ break;
+
+ default:
+ if (! (request.flags & CTDB_CTRL_FLAG_NOREPLY)) {
+ control_error(mem_ctx, req, &header, &request);
+ }
+ break;
+ }
+
+done:
+ talloc_free(mem_ctx);
+}
+
+static int client_recv(struct tevent_req *req, int *perr)
+{
+ struct client_state *state = tevent_req_data(
+ req, struct client_state);
+ int err;
+
+ DEBUG(DEBUG_INFO, ("Client done fd=%d\n", state->fd));
+ close(state->fd);
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return -1;
+ }
+
+ return state->status;
+}
+
+/*
+ * Fake CTDB server
+ */
+
+struct server_state {
+ struct tevent_context *ev;
+ struct ctdbd_context *ctdb;
+ struct tevent_timer *leader_broadcast_te;
+ int fd;
+};
+
+static void server_leader_broadcast(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval current_time,
+ void *private_data);
+static void server_new_client(struct tevent_req *subreq);
+static void server_client_done(struct tevent_req *subreq);
+
+static struct tevent_req *server_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdbd_context *ctdb,
+ int fd)
+{
+ struct tevent_req *req, *subreq;
+ struct server_state *state;
+
+ req = tevent_req_create(mem_ctx, &state, struct server_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->ctdb = ctdb;
+ state->fd = fd;
+
+ state->leader_broadcast_te = tevent_add_timer(state->ev,
+ state,
+ timeval_current_ofs(0, 0),
+ server_leader_broadcast,
+ state);
+ if (state->leader_broadcast_te == NULL) {
+ DBG_WARNING("Failed to set up leader broadcast\n");
+ }
+
+ subreq = accept_send(state, ev, fd);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, server_new_client, req);
+
+ return req;
+}
+
+static void server_leader_broadcast(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval current_time,
+ void *private_data)
+{
+ struct server_state *state = talloc_get_type_abort(
+ private_data, struct server_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ uint32_t leader = ctdb->node_map->recmaster;
+ TDB_DATA data;
+ int ret;
+
+ if (leader == CTDB_UNKNOWN_PNN) {
+ goto done;
+ }
+
+ data.dptr = (uint8_t *)&leader;
+ data.dsize = sizeof(leader);
+
+ ret = srvid_dispatch(ctdb->srv, CTDB_SRVID_LEADER, 0, data);
+ if (ret != 0) {
+ DBG_WARNING("Failed to send leader broadcast, ret=%d\n", ret);
+ }
+
+done:
+ state->leader_broadcast_te = tevent_add_timer(state->ev,
+ state,
+ timeval_current_ofs(1, 0),
+ server_leader_broadcast,
+ state);
+ if (state->leader_broadcast_te == NULL) {
+ DBG_WARNING("Failed to set up leader broadcast\n");
+ }
+}
+
+static void server_new_client(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct server_state *state = tevent_req_data(
+ req, struct server_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ int client_fd;
+ int ret = 0;
+
+ client_fd = accept_recv(subreq, NULL, NULL, &ret);
+ TALLOC_FREE(subreq);
+ if (client_fd == -1) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ subreq = client_send(state, state->ev, client_fd,
+ ctdb, ctdb->node_map->pnn);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, server_client_done, req);
+
+ ctdb->num_clients += 1;
+
+ subreq = accept_send(state, state->ev, state->fd);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, server_new_client, req);
+}
+
+static void server_client_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct server_state *state = tevent_req_data(
+ req, struct server_state);
+ struct ctdbd_context *ctdb = state->ctdb;
+ int ret = 0;
+ int status;
+
+ status = client_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (status < 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ctdb->num_clients -= 1;
+
+ if (status == 99) {
+ /* Special status, to shutdown server */
+ DEBUG(DEBUG_INFO, ("Shutting down server\n"));
+ tevent_req_done(req);
+ }
+}
+
+static bool server_recv(struct tevent_req *req, int *perr)
+{
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+ return true;
+}
+
+/*
+ * Main functions
+ */
+
+static int socket_init(const char *sockpath)
+{
+ struct sockaddr_un addr;
+ size_t len;
+ int ret, fd;
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+
+ len = strlcpy(addr.sun_path, sockpath, sizeof(addr.sun_path));
+ if (len >= sizeof(addr.sun_path)) {
+ fprintf(stderr, "path too long: %s\n", sockpath);
+ return -1;
+ }
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd == -1) {
+ fprintf(stderr, "socket failed - %s\n", sockpath);
+ return -1;
+ }
+
+ ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
+ if (ret != 0) {
+ fprintf(stderr, "bind failed - %s\n", sockpath);
+ goto fail;
+ }
+
+ ret = listen(fd, 10);
+ if (ret != 0) {
+ fprintf(stderr, "listen failed\n");
+ goto fail;
+ }
+
+ DEBUG(DEBUG_INFO, ("Socket init done\n"));
+
+ return fd;
+
+fail:
+ if (fd != -1) {
+ close(fd);
+ }
+ return -1;
+}
+
+static struct options {
+ const char *dbdir;
+ const char *sockpath;
+ const char *pidfile;
+ const char *debuglevel;
+} options;
+
+static struct poptOption cmdline_options[] = {
+ POPT_AUTOHELP
+ { "dbdir", 'D', POPT_ARG_STRING, &options.dbdir, 0,
+ "Database directory", "directory" },
+ { "socket", 's', POPT_ARG_STRING, &options.sockpath, 0,
+ "Unix domain socket path", "filename" },
+ { "pidfile", 'p', POPT_ARG_STRING, &options.pidfile, 0,
+ "pid file", "filename" } ,
+ { "debug", 'd', POPT_ARG_STRING, &options.debuglevel, 0,
+ "debug level", "ERR|WARNING|NOTICE|INFO|DEBUG" } ,
+ POPT_TABLEEND
+};
+
+static void cleanup(void)
+{
+ unlink(options.sockpath);
+ unlink(options.pidfile);
+}
+
+static void signal_handler(int sig)
+{
+ cleanup();
+ exit(0);
+}
+
+static void start_server(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdbd_context *ctdb, int fd, int pfd)
+{
+ struct tevent_req *req;
+ int ret = 0;
+ ssize_t len;
+
+ atexit(cleanup);
+ signal(SIGTERM, signal_handler);
+
+ req = server_send(mem_ctx, ev, ctdb, fd);
+ if (req == NULL) {
+ fprintf(stderr, "Memory error\n");
+ exit(1);
+ }
+
+ len = write(pfd, &ret, sizeof(ret));
+ if (len != sizeof(ret)) {
+ fprintf(stderr, "Failed to send message to parent\n");
+ exit(1);
+ }
+ close(pfd);
+
+ tevent_req_poll(req, ev);
+
+ server_recv(req, &ret);
+ if (ret != 0) {
+ exit(1);
+ }
+}
+
+int main(int argc, const char *argv[])
+{
+ TALLOC_CTX *mem_ctx;
+ struct ctdbd_context *ctdb;
+ struct tevent_context *ev;
+ poptContext pc;
+ int opt, fd, ret, pfd[2];
+ ssize_t len;
+ pid_t pid;
+ FILE *fp;
+
+ pc = poptGetContext(argv[0], argc, argv, cmdline_options,
+ POPT_CONTEXT_KEEP_FIRST);
+ while ((opt = poptGetNextOpt(pc)) != -1) {
+ fprintf(stderr, "Invalid option %s\n", poptBadOption(pc, 0));
+ exit(1);
+ }
+
+ if (options.dbdir == NULL) {
+ fprintf(stderr, "Please specify database directory\n");
+ poptPrintHelp(pc, stdout, 0);
+ exit(1);
+ }
+
+ if (options.sockpath == NULL) {
+ fprintf(stderr, "Please specify socket path\n");
+ poptPrintHelp(pc, stdout, 0);
+ exit(1);
+ }
+
+ if (options.pidfile == NULL) {
+ fprintf(stderr, "Please specify pid file\n");
+ poptPrintHelp(pc, stdout, 0);
+ exit(1);
+ }
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ fprintf(stderr, "Memory error\n");
+ exit(1);
+ }
+
+ ret = logging_init(mem_ctx, "file:", options.debuglevel, "fake-ctdbd");
+ if (ret != 0) {
+ fprintf(stderr, "Invalid debug level\n");
+ poptPrintHelp(pc, stdout, 0);
+ exit(1);
+ }
+
+ ctdb = ctdbd_setup(mem_ctx, options.dbdir);
+ if (ctdb == NULL) {
+ exit(1);
+ }
+
+ if (! ctdbd_verify(ctdb)) {
+ exit(1);
+ }
+
+ ev = tevent_context_init(mem_ctx);
+ if (ev == NULL) {
+ fprintf(stderr, "Memory error\n");
+ exit(1);
+ }
+
+ fd = socket_init(options.sockpath);
+ if (fd == -1) {
+ exit(1);
+ }
+
+ ret = pipe(pfd);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to create pipe\n");
+ cleanup();
+ exit(1);
+ }
+
+ pid = fork();
+ if (pid == -1) {
+ fprintf(stderr, "Failed to fork\n");
+ cleanup();
+ exit(1);
+ }
+
+ if (pid == 0) {
+ /* Child */
+ close(pfd[0]);
+ start_server(mem_ctx, ev, ctdb, fd, pfd[1]);
+ exit(1);
+ }
+
+ /* Parent */
+ close(pfd[1]);
+
+ len = read(pfd[0], &ret, sizeof(ret));
+ close(pfd[0]);
+ if (len != sizeof(ret)) {
+ fprintf(stderr, "len = %zi\n", len);
+ fprintf(stderr, "Failed to get message from child\n");
+ kill(pid, SIGTERM);
+ exit(1);
+ }
+
+ fp = fopen(options.pidfile, "w");
+ if (fp == NULL) {
+ fprintf(stderr, "Failed to open pid file %s\n",
+ options.pidfile);
+ kill(pid, SIGTERM);
+ exit(1);
+ }
+ fprintf(fp, "%d\n", pid);
+ fclose(fp);
+
+ return 0;
+}
diff --git a/ctdb/tests/src/fetch_loop.c b/ctdb/tests/src/fetch_loop.c
new file mode 100644
index 0000000..0e1d9da
--- /dev/null
+++ b/ctdb/tests/src/fetch_loop.c
@@ -0,0 +1,288 @@
+/*
+ simple ctdb benchmark
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include "lib/util/debug.h"
+#include "lib/util/tevent_unix.h"
+
+#include "client/client.h"
+#include "tests/src/test_options.h"
+#include "tests/src/cluster_wait.h"
+
+#define TESTDB "fetch_loop.tdb"
+#define TESTKEY "testkey"
+
+struct fetch_loop_state {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct ctdb_db_context *ctdb_db;
+ int num_nodes;
+ int timelimit;
+ TDB_DATA key;
+ int locks_count;
+};
+
+static void fetch_loop_start(struct tevent_req *subreq);
+static void fetch_loop_next(struct tevent_req *subreq);
+static void fetch_loop_each_second(struct tevent_req *subreq);
+static void fetch_loop_finish(struct tevent_req *subreq);
+
+static struct tevent_req *fetch_loop_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct ctdb_db_context *ctdb_db,
+ int num_nodes, int timelimit)
+{
+ struct tevent_req *req, *subreq;
+ struct fetch_loop_state *state;
+
+ req = tevent_req_create(mem_ctx, &state, struct fetch_loop_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->client = client;
+ state->ctdb_db = ctdb_db;
+ state->num_nodes = num_nodes;
+ state->timelimit = timelimit;
+ state->key.dptr = discard_const(TESTKEY);
+ state->key.dsize = strlen(TESTKEY);
+
+ subreq = cluster_wait_send(state, state->ev, state->client,
+ state->num_nodes);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, fetch_loop_start, req);
+
+ return req;
+}
+
+static void fetch_loop_start(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct fetch_loop_state *state = tevent_req_data(
+ req, struct fetch_loop_state);
+ bool status;
+ int ret;
+
+ status = cluster_wait_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ subreq = ctdb_fetch_lock_send(state, state->ev, state->client,
+ state->ctdb_db, state->key, false);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, fetch_loop_next, req);
+
+ if (ctdb_client_pnn(state->client) == 0) {
+ subreq = tevent_wakeup_send(state, state->ev,
+ tevent_timeval_current_ofs(1, 0));
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, fetch_loop_each_second, req);
+ }
+
+ subreq = tevent_wakeup_send(state, state->ev,
+ tevent_timeval_current_ofs(
+ state->timelimit, 0));
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, fetch_loop_finish, req);
+}
+
+static void fetch_loop_next(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct fetch_loop_state *state = tevent_req_data(
+ req, struct fetch_loop_state);
+ struct ctdb_record_handle *h;
+ TDB_DATA data;
+ int ret;
+
+ h = ctdb_fetch_lock_recv(subreq, NULL, state, &data, &ret);
+ TALLOC_FREE(subreq);
+ if (h == NULL) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ if (data.dsize == sizeof(uint32_t)) {
+ state->locks_count = *(uint32_t *)data.dptr;
+ }
+ TALLOC_FREE(data.dptr);
+
+ state->locks_count += 1;
+ data.dsize = sizeof(uint32_t);
+ data.dptr = (uint8_t *)&state->locks_count;
+
+ ret = ctdb_store_record(h, data);
+ if (ret != 0) {
+ talloc_free(h);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ talloc_free(h);
+
+ subreq = ctdb_fetch_lock_send(state, state->ev, state->client,
+ state->ctdb_db, state->key, false);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, fetch_loop_next, req);
+}
+
+static void fetch_loop_each_second(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct fetch_loop_state *state = tevent_req_data(
+ req, struct fetch_loop_state);
+ bool status;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ printf("Locks:%d\r", state->locks_count);
+ fflush(stdout);
+
+ subreq = tevent_wakeup_send(state, state->ev,
+ tevent_timeval_current_ofs(1, 0));
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, fetch_loop_each_second, req);
+}
+
+static void fetch_loop_finish(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct fetch_loop_state *state = tevent_req_data(
+ req, struct fetch_loop_state);
+ bool status;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ printf("Locks:%d\n", state->locks_count);
+
+ tevent_req_done(req);
+}
+
+static bool fetch_loop_recv(struct tevent_req *req, int *perr)
+{
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+ return true;
+}
+
+int main(int argc, const char *argv[])
+{
+ const struct test_options *opts;
+ TALLOC_CTX *mem_ctx;
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct ctdb_db_context *ctdb_db;
+ struct tevent_req *req;
+ int ret;
+ bool status;
+
+ setup_logging("fetch_loop", DEBUG_STDERR);
+
+ status = process_options_basic(argc, argv, &opts);
+ if (! status) {
+ exit(1);
+ }
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ev = tevent_context_init(mem_ctx);
+ if (ev == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ret = ctdb_client_init(mem_ctx, ev, opts->socket, &client);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to initialize client, ret=%d\n", ret);
+ exit(1);
+ }
+
+ if (! ctdb_recovery_wait(ev, client)) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ret = ctdb_attach(ev, client, tevent_timeval_zero(), TESTDB, 0,
+ &ctdb_db);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to attach to DB %s\n", TESTDB);
+ exit(1);
+ }
+
+ req = fetch_loop_send(mem_ctx, ev, client, ctdb_db,
+ opts->num_nodes, opts->timelimit);
+ if (req == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = fetch_loop_recv(req, &ret);
+ if (! status) {
+ fprintf(stderr, "fetch loop test failed\n");
+ exit(1);
+ }
+
+ talloc_free(mem_ctx);
+ return 0;
+}
diff --git a/ctdb/tests/src/fetch_loop_key.c b/ctdb/tests/src/fetch_loop_key.c
new file mode 100644
index 0000000..3f41ca7
--- /dev/null
+++ b/ctdb/tests/src/fetch_loop_key.c
@@ -0,0 +1,217 @@
+/*
+ simple ctdb benchmark
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include "lib/util/debug.h"
+#include "lib/util/tevent_unix.h"
+
+#include "client/client.h"
+#include "tests/src/test_options.h"
+#include "tests/src/cluster_wait.h"
+
+struct fetch_loop_state {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct ctdb_db_context *ctdb_db;
+ int timelimit;
+ TDB_DATA key;
+ int locks_count;
+};
+
+static void fetch_loop_next(struct tevent_req *subreq);
+
+static struct tevent_req *fetch_loop_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct ctdb_db_context *ctdb_db,
+ const char *keystr,
+ int timelimit)
+{
+ struct tevent_req *req, *subreq;
+ struct fetch_loop_state *state;
+
+ req = tevent_req_create(mem_ctx, &state, struct fetch_loop_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->client = client;
+ state->ctdb_db = ctdb_db;
+ state->timelimit = timelimit;
+ state->key.dptr = discard_const(keystr);
+ state->key.dsize = strlen(keystr);
+
+ subreq = ctdb_fetch_lock_send(state, ev, client, ctdb_db,
+ state->key, false);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, fetch_loop_next, req);
+
+ return req;
+}
+
+static void fetch_loop_next(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct fetch_loop_state *state = tevent_req_data(
+ req, struct fetch_loop_state);
+ struct ctdb_record_handle *h;
+ TDB_DATA data;
+ int ret;
+
+ h = ctdb_fetch_lock_recv(subreq, NULL, state, &data, &ret);
+ TALLOC_FREE(subreq);
+ if (h == NULL) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ if (data.dsize == sizeof(uint32_t)) {
+ state->locks_count = *(uint32_t *)data.dptr;
+ }
+ TALLOC_FREE(data.dptr);
+
+ state->locks_count += 1;
+ data.dsize = sizeof(uint32_t);
+ data.dptr = (uint8_t *)&state->locks_count;
+
+ ret = ctdb_store_record(h, data);
+ if (ret != 0) {
+ talloc_free(h);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ talloc_free(h);
+
+ subreq = ctdb_fetch_lock_send(state, state->ev, state->client,
+ state->ctdb_db, state->key, false);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, fetch_loop_next, req);
+}
+
+static bool fetch_loop_recv(struct tevent_req *req, int *perr)
+{
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+ return true;
+}
+
+static struct tevent_req *global_req;
+
+static void alarm_handler(int sig)
+{
+ struct fetch_loop_state *state = tevent_req_data(
+ global_req, struct fetch_loop_state);
+ static int time_passed = 0;
+
+ time_passed += 1;
+
+ printf("Locks:%d\n", state->locks_count);
+ fflush(stdout);
+
+ if (time_passed >= state->timelimit) {
+ tevent_req_done(global_req);
+ }
+
+ alarm(1);
+}
+
+int main(int argc, const char *argv[])
+{
+ const struct test_options *opts;
+ TALLOC_CTX *mem_ctx;
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct ctdb_db_context *ctdb_db;
+ int ret;
+ bool status;
+
+ setup_logging("fetch_loop_key", DEBUG_STDERR);
+
+ status = process_options_database(argc, argv, &opts);
+ if (! status) {
+ exit(1);
+ }
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ev = tevent_context_init(mem_ctx);
+ if (ev == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ret = ctdb_client_init(mem_ctx, ev, opts->socket, &client);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to initialize client, %s\n",
+ strerror(ret));
+ exit(1);
+ }
+
+ if (! ctdb_recovery_wait(ev, client)) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ret = ctdb_attach(ev, client, tevent_timeval_zero(), opts->dbname, 0,
+ &ctdb_db);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to attach to DB %s\n", opts->dbname);
+ exit(1);
+ }
+
+ global_req = fetch_loop_send(mem_ctx, ev, client, ctdb_db,
+ opts->keystr, opts->timelimit);
+ if (global_req == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ signal(SIGALRM, alarm_handler);
+ alarm(1);
+
+ tevent_req_poll(global_req, ev);
+
+ status = fetch_loop_recv(global_req, &ret);
+ if (! status) {
+ fprintf(stderr, "fetch loop test failed\n");
+ exit(1);
+ }
+
+ talloc_free(mem_ctx);
+ return 0;
+}
diff --git a/ctdb/tests/src/fetch_readonly.c b/ctdb/tests/src/fetch_readonly.c
new file mode 100644
index 0000000..ff126bd
--- /dev/null
+++ b/ctdb/tests/src/fetch_readonly.c
@@ -0,0 +1,166 @@
+/*
+ Fetch a single record using readonly
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include "lib/util/debug.h"
+#include "lib/util/tevent_unix.h"
+
+#include "client/client.h"
+#include "tests/src/test_options.h"
+#include "tests/src/cluster_wait.h"
+
+
+struct fetch_readonly_state {
+ struct tevent_context *ev;
+};
+
+static void fetch_readonly_done(struct tevent_req *subreq);
+
+static struct tevent_req *fetch_readonly_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct ctdb_db_context *db,
+ const char *keystr,
+ int timelimit)
+{
+ struct tevent_req *req, *subreq;
+ struct fetch_readonly_state *state;
+ TDB_DATA key;
+
+ req = tevent_req_create(mem_ctx, &state, struct fetch_readonly_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+
+ key.dptr = (uint8_t *)discard_const(keystr);
+ key.dsize = strlen(keystr);
+
+ subreq = ctdb_fetch_lock_send(state, ev, client, db, key, true);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, fetch_readonly_done, req);
+
+ return req;
+}
+
+static void fetch_readonly_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct fetch_readonly_state *state = tevent_req_data(
+ req, struct fetch_readonly_state);
+ struct ctdb_record_handle *h;
+ int ret;
+
+ h = ctdb_fetch_lock_recv(subreq, NULL, state, NULL, &ret);
+ TALLOC_FREE(subreq);
+ if (h == NULL) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ talloc_free(h);
+ tevent_req_done(req);
+}
+
+static bool fetch_readonly_recv(struct tevent_req *req, int *perr)
+{
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+ return true;
+}
+
+int main(int argc, const char *argv[])
+{
+ const struct test_options *opts;
+ TALLOC_CTX *mem_ctx;
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct ctdb_db_context *ctdb_db;
+ struct tevent_req *req;
+ int ret;
+ bool status;
+
+ setup_logging("fetch_readonly", DEBUG_STDERR);
+
+ status = process_options_database(argc, argv, &opts);
+ if (! status) {
+ exit(1);
+ }
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ev = tevent_context_init(mem_ctx);
+ if (ev == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ret = ctdb_client_init(mem_ctx, ev, opts->socket, &client);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to initialize client, %s\n",
+ strerror(ret));
+ exit(1);
+ }
+
+ if (! ctdb_recovery_wait(ev, client)) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ret = ctdb_attach(ev, client, tevent_timeval_zero(), opts->dbname, 0,
+ &ctdb_db);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to attach to DB %s\n", opts->dbname);
+ exit(1);
+ }
+
+ req = fetch_readonly_send(mem_ctx, ev, client, ctdb_db,
+ opts->keystr, opts->timelimit);
+ if (req == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = fetch_readonly_recv(req, &ret);
+ if (! status) {
+ fprintf(stderr, "fetch readonly loop test failed\n");
+ exit(1);
+ }
+
+ talloc_free(mem_ctx);
+ return 0;
+}
diff --git a/ctdb/tests/src/fetch_readonly_loop.c b/ctdb/tests/src/fetch_readonly_loop.c
new file mode 100644
index 0000000..08cf476
--- /dev/null
+++ b/ctdb/tests/src/fetch_readonly_loop.c
@@ -0,0 +1,272 @@
+/*
+ simple ctdb benchmark
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include "lib/util/debug.h"
+#include "lib/util/tevent_unix.h"
+
+#include "client/client.h"
+#include "tests/src/test_options.h"
+#include "tests/src/cluster_wait.h"
+
+#define TESTDB "fetch_readonly_loop.tdb"
+#define TESTKEY "testkey"
+
+struct fetch_loop_state {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct ctdb_db_context *ctdb_db;
+ int num_nodes;
+ int timelimit;
+ TDB_DATA key;
+ int locks_count;
+};
+
+static void fetch_loop_start(struct tevent_req *subreq);
+static void fetch_loop_next(struct tevent_req *subreq);
+static void fetch_loop_each_second(struct tevent_req *subreq);
+static void fetch_loop_finish(struct tevent_req *subreq);
+
+static struct tevent_req *fetch_loop_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct ctdb_db_context *ctdb_db,
+ int num_nodes, int timelimit)
+{
+ struct tevent_req *req, *subreq;
+ struct fetch_loop_state *state;
+
+ req = tevent_req_create(mem_ctx, &state, struct fetch_loop_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->client = client;
+ state->ctdb_db = ctdb_db;
+ state->num_nodes = num_nodes;
+ state->timelimit = timelimit;
+ state->key.dptr = discard_const(TESTKEY);
+ state->key.dsize = strlen(TESTKEY);
+
+ subreq = cluster_wait_send(state, state->ev, state->client,
+ state->num_nodes);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, fetch_loop_start, req);
+
+ return req;
+}
+
+static void fetch_loop_start(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct fetch_loop_state *state = tevent_req_data(
+ req, struct fetch_loop_state);
+ bool status;
+ int ret;
+
+ status = cluster_wait_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ subreq = ctdb_fetch_lock_send(state, state->ev, state->client,
+ state->ctdb_db, state->key, true);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, fetch_loop_next, req);
+
+ if (ctdb_client_pnn(state->client) == 0) {
+ subreq = tevent_wakeup_send(state, state->ev,
+ tevent_timeval_current_ofs(1, 0));
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, fetch_loop_each_second, req);
+ }
+
+ subreq = tevent_wakeup_send(state, state->ev,
+ tevent_timeval_current_ofs(
+ state->timelimit, 0));
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, fetch_loop_finish, req);
+}
+
+static void fetch_loop_next(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct fetch_loop_state *state = tevent_req_data(
+ req, struct fetch_loop_state);
+ struct ctdb_record_handle *h;
+ int ret;
+
+ h = ctdb_fetch_lock_recv(subreq, NULL, state, NULL, &ret);
+ TALLOC_FREE(subreq);
+ if (h == NULL) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ state->locks_count += 1;
+ talloc_free(h);
+
+ subreq = ctdb_fetch_lock_send(state, state->ev, state->client,
+ state->ctdb_db, state->key, true);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, fetch_loop_next, req);
+}
+
+static void fetch_loop_each_second(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct fetch_loop_state *state = tevent_req_data(
+ req, struct fetch_loop_state);
+ bool status;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ printf("Locks:%d\r", state->locks_count);
+ fflush(stdout);
+
+ subreq = tevent_wakeup_send(state, state->ev,
+ tevent_timeval_current_ofs(1, 0));
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, fetch_loop_each_second, req);
+}
+
+static void fetch_loop_finish(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct fetch_loop_state *state = tevent_req_data(
+ req, struct fetch_loop_state);
+ bool status;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ printf("Locks:%d\n", state->locks_count);
+
+ tevent_req_done(req);
+}
+
+static bool fetch_loop_recv(struct tevent_req *req, int *perr)
+{
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+ return true;
+}
+
+int main(int argc, const char *argv[])
+{
+ const struct test_options *opts;
+ TALLOC_CTX *mem_ctx;
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct ctdb_db_context *ctdb_db;
+ struct tevent_req *req;
+ int ret;
+ bool status;
+
+ setup_logging("fetch_readonly_loop", DEBUG_STDERR);
+
+ status = process_options_basic(argc, argv, &opts);
+ if (! status) {
+ exit(1);
+ }
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ev = tevent_context_init(mem_ctx);
+ if (ev == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ret = ctdb_client_init(mem_ctx, ev, opts->socket, &client);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to initialize client, ret=%d\n", ret);
+ exit(1);
+ }
+
+ if (! ctdb_recovery_wait(ev, client)) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ret = ctdb_attach(ev, client, tevent_timeval_zero(), TESTDB, 0,
+ &ctdb_db);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to attach to DB %s\n", TESTDB);
+ exit(1);
+ }
+
+ req = fetch_loop_send(mem_ctx, ev, client, ctdb_db,
+ opts->num_nodes, opts->timelimit);
+ if (req == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = fetch_loop_recv(req, &ret);
+ if (! status) {
+ fprintf(stderr, "fetch readonly loop test failed\n");
+ exit(1);
+ }
+
+ talloc_free(mem_ctx);
+ return 0;
+}
diff --git a/ctdb/tests/src/fetch_ring.c b/ctdb/tests/src/fetch_ring.c
new file mode 100644
index 0000000..f1786ef
--- /dev/null
+++ b/ctdb/tests/src/fetch_ring.c
@@ -0,0 +1,398 @@
+/*
+ simple ctdb benchmark
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include "lib/util/debug.h"
+#include "lib/util/time.h"
+#include "lib/util/tevent_unix.h"
+
+#include "client/client.h"
+#include "tests/src/test_options.h"
+#include "tests/src/cluster_wait.h"
+
+#define MSG_ID_FETCH 0
+
+static uint32_t next_node(struct ctdb_client_context *client, uint32_t num_nodes)
+{
+ return (ctdb_client_pnn(client) + 1) % num_nodes;
+}
+
+struct fetch_ring_state {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct ctdb_db_context *ctdb_db;
+ uint32_t num_nodes;
+ int timelimit;
+ int interactive;
+ TDB_DATA key;
+ int msg_count;
+ struct timeval start_time;
+};
+
+static void fetch_ring_msg_handler(uint64_t srvid, TDB_DATA data,
+ void *private_data);
+static void fetch_ring_wait(struct tevent_req *subreq);
+static void fetch_ring_start(struct tevent_req *subreq);
+static void fetch_ring_update(struct tevent_req *subreq);
+static void fetch_ring_msg_sent(struct tevent_req *subreq);
+static void fetch_ring_finish(struct tevent_req *subreq);
+static void fetch_ring_final_read(struct tevent_req *subreq);
+
+static struct tevent_req *fetch_ring_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct ctdb_db_context *ctdb_db,
+ const char *keystr,
+ uint32_t num_nodes,
+ int timelimit,
+ int interactive)
+{
+ struct tevent_req *req, *subreq;
+ struct fetch_ring_state *state;
+
+ req = tevent_req_create(mem_ctx, &state, struct fetch_ring_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->client = client;
+ state->ctdb_db = ctdb_db;
+ state->num_nodes = num_nodes;
+ state->timelimit = timelimit;
+ state->interactive = interactive;
+ state->key.dptr = discard_const(keystr);
+ state->key.dsize = strlen(keystr);
+
+ subreq = ctdb_client_set_message_handler_send(
+ state, ev, client, MSG_ID_FETCH,
+ fetch_ring_msg_handler, req);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, fetch_ring_wait, req);
+
+ return req;
+}
+
+static void fetch_ring_msg_handler(uint64_t srvid, TDB_DATA data,
+ void *private_data)
+{
+ struct tevent_req *req = talloc_get_type_abort(
+ private_data, struct tevent_req);
+ struct fetch_ring_state *state = tevent_req_data(
+ req, struct fetch_ring_state);
+ struct tevent_req *subreq;
+
+ state->msg_count += 1;
+
+ subreq = ctdb_fetch_lock_send(state, state->ev, state->client,
+ state->ctdb_db, state->key, false);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, fetch_ring_update, req);
+}
+
+static void fetch_ring_wait(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct fetch_ring_state *state = tevent_req_data(
+ req, struct fetch_ring_state);
+ bool status;
+ int ret;
+
+ status = ctdb_client_set_message_handler_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ subreq = cluster_wait_send(state, state->ev, state->client,
+ state->num_nodes);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, fetch_ring_start, req);
+}
+
+static void fetch_ring_start(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct fetch_ring_state *state = tevent_req_data(
+ req, struct fetch_ring_state);
+ bool status;
+ int ret;
+
+ status = cluster_wait_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ state->start_time = tevent_timeval_current();
+
+ if (ctdb_client_pnn(state->client) == state->num_nodes-1) {
+ subreq = ctdb_fetch_lock_send(state, state->ev, state->client,
+ state->ctdb_db, state->key,
+ false);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, fetch_ring_update, req);
+ }
+
+ subreq = tevent_wakeup_send(state, state->ev,
+ tevent_timeval_current_ofs(
+ state->timelimit, 0));
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, fetch_ring_finish, req);
+
+}
+
+static void fetch_ring_update(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct fetch_ring_state *state = tevent_req_data(
+ req, struct fetch_ring_state);
+ struct ctdb_record_handle *h;
+ struct ctdb_req_message msg;
+ TDB_DATA data;
+ uint32_t pnn;
+ int ret;
+
+ h = ctdb_fetch_lock_recv(subreq, NULL, state, &data, &ret);
+ TALLOC_FREE(subreq);
+ if (h == NULL) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ if (data.dsize > 1000) {
+ TALLOC_FREE(data.dptr);
+ data.dsize = 0;
+ }
+
+ if (data.dsize == 0) {
+ data.dptr = (uint8_t *)talloc_asprintf(state, "Test data\n");
+ if (tevent_req_nomem(data.dptr, req)) {
+ return;
+ }
+ }
+
+ data.dptr = (uint8_t *)talloc_asprintf_append(
+ (char *)data.dptr,
+ "msg_count=%d on node %d\n",
+ state->msg_count,
+ ctdb_client_pnn(state->client));
+ if (tevent_req_nomem(data.dptr, req)) {
+ return;
+ }
+
+ data.dsize = strlen((const char *)data.dptr) + 1;
+
+ ret = ctdb_store_record(h, data);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ talloc_free(data.dptr);
+ talloc_free(h);
+
+ msg.srvid = MSG_ID_FETCH;
+ msg.data.data = tdb_null;
+
+ pnn = next_node(state->client, state->num_nodes);
+
+ subreq = ctdb_client_message_send(state, state->ev, state->client,
+ pnn, &msg);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, fetch_ring_msg_sent, req);
+}
+
+static void fetch_ring_msg_sent(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ bool status;
+ int ret;
+
+ status = ctdb_client_message_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ }
+}
+
+static void fetch_ring_finish(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct fetch_ring_state *state = tevent_req_data(
+ req, struct fetch_ring_state);
+ bool status;
+ double t;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ t = timeval_elapsed(&state->start_time);
+
+ printf("Fetch[%u]: %.2f msgs/sec\n", ctdb_client_pnn(state->client),
+ state->msg_count / t);
+
+ subreq = ctdb_fetch_lock_send(state, state->ev, state->client,
+ state->ctdb_db, state->key, false);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, fetch_ring_final_read, req);
+}
+
+static void fetch_ring_final_read(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct fetch_ring_state *state = tevent_req_data(
+ req, struct fetch_ring_state);
+ struct ctdb_record_handle *h;
+ TDB_DATA data;
+ int err;
+
+ h = ctdb_fetch_lock_recv(subreq, NULL, state, &data, &err);
+ TALLOC_FREE(subreq);
+ if (h == NULL) {
+ tevent_req_error(req, err);
+ return;
+ }
+
+ if (state->interactive == 1) {
+ printf("DATA:\n%s\n", (char *)data.dptr);
+ }
+ talloc_free(data.dptr);
+ talloc_free(h);
+
+ tevent_req_done(req);
+}
+
+static bool fetch_ring_recv(struct tevent_req *req, int *perr)
+{
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+ return true;
+}
+
+int main(int argc, const char *argv[])
+{
+ const struct test_options *opts;
+ TALLOC_CTX *mem_ctx;
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct ctdb_db_context *ctdb_db;
+ struct tevent_req *req;
+ int ret;
+ bool status;
+
+ setup_logging("fetch_ring", DEBUG_STDERR);
+
+ status = process_options_database(argc, argv, &opts);
+ if (! status) {
+ exit(1);
+ }
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ev = tevent_context_init(mem_ctx);
+ if (ev == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ret = ctdb_client_init(mem_ctx, ev, opts->socket, &client);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to initialize client, ret=%d\n", ret);
+ exit(1);
+ }
+
+ if (! ctdb_recovery_wait(ev, client)) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ret = ctdb_attach(ev,
+ client,
+ tevent_timeval_zero(),
+ opts->dbname,
+ 0,
+ &ctdb_db);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to attach to DB %s\n", opts->dbname);
+ exit(1);
+ }
+
+ req = fetch_ring_send(mem_ctx,
+ ev,
+ client,
+ ctdb_db,
+ opts->keystr,
+ opts->num_nodes,
+ opts->timelimit,
+ opts->interactive);
+ if (req == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = fetch_ring_recv(req, NULL);
+ if (! status) {
+ fprintf(stderr, "fetch ring test failed\n");
+ exit(1);
+ }
+
+ talloc_free(mem_ctx);
+ return 0;
+}
diff --git a/ctdb/tests/src/g_lock_loop.c b/ctdb/tests/src/g_lock_loop.c
new file mode 100644
index 0000000..3b84241
--- /dev/null
+++ b/ctdb/tests/src/g_lock_loop.c
@@ -0,0 +1,270 @@
+/*
+ simple ctdb benchmark for g_lock operations
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include "lib/util/tevent_unix.h"
+#include "lib/util/debug.h"
+
+#include "protocol/protocol_api.h"
+#include "client/client.h"
+#include "tests/src/test_options.h"
+#include "tests/src/cluster_wait.h"
+
+#define TESTKEY "testkey"
+
+struct glock_loop_state {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct ctdb_db_context *db;
+ int num_nodes;
+ int timelimit;
+ uint32_t pnn;
+ uint32_t counter;
+ struct ctdb_server_id sid;
+ const char *key;
+};
+
+static void glock_loop_start(struct tevent_req *subreq);
+static void glock_loop_locked(struct tevent_req *subreq);
+static void glock_loop_unlocked(struct tevent_req *subreq);
+static void glock_loop_finish(struct tevent_req *subreq);
+
+static struct tevent_req *glock_loop_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct ctdb_db_context *db,
+ int num_nodes, int timelimit)
+{
+ struct tevent_req *req, *subreq;
+ struct glock_loop_state *state;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct glock_loop_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->client = client;
+ state->db = db;
+ state->num_nodes = num_nodes;
+ state->timelimit = timelimit;
+ state->pnn = ctdb_client_pnn(client);
+ state->counter = 0;
+ state->sid = ctdb_client_get_server_id(client, 1);
+ state->key = TESTKEY;
+
+ subreq = cluster_wait_send(state, state->ev, state->client,
+ state->num_nodes);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, glock_loop_start, req);
+
+ return req;
+}
+
+static void glock_loop_start(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct glock_loop_state *state = tevent_req_data(
+ req, struct glock_loop_state);
+ bool status;
+ int ret;
+
+ status = cluster_wait_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ subreq = ctdb_g_lock_lock_send(state, state->ev, state->client,
+ state->db, state->key, &state->sid,
+ false);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, glock_loop_locked, req);
+
+ subreq = tevent_wakeup_send(state, state->ev,
+ tevent_timeval_current_ofs(
+ state->timelimit, 0));
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, glock_loop_finish, req);
+}
+
+static void glock_loop_locked(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct glock_loop_state *state = tevent_req_data(
+ req, struct glock_loop_state);
+ int ret;
+ bool status;
+
+ status = ctdb_g_lock_lock_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ fprintf(stderr, "g_lock_lock failed\n");
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ state->counter += 1;
+
+ subreq = ctdb_g_lock_unlock_send(state, state->ev, state->client,
+ state->db, state->key, state->sid);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, glock_loop_unlocked, req);
+}
+
+static void glock_loop_unlocked(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct glock_loop_state *state = tevent_req_data(
+ req, struct glock_loop_state);
+ int ret;
+ bool status;
+
+ status = ctdb_g_lock_unlock_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ fprintf(stderr, "g_lock_unlock failed\n");
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ subreq = ctdb_g_lock_lock_send(state, state->ev, state->client,
+ state->db, state->key, &state->sid,
+ false);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, glock_loop_locked, req);
+}
+
+static void glock_loop_finish(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct glock_loop_state *state = tevent_req_data(
+ req, struct glock_loop_state);
+ bool status;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ printf("PNN:%u counter:%u\n", state->pnn, state->counter);
+
+ tevent_req_done(req);
+}
+
+static bool glock_loop_recv(struct tevent_req *req, int *perr)
+{
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+ return true;
+}
+
+int main(int argc, const char *argv[])
+{
+ const struct test_options *opts;
+ TALLOC_CTX *mem_ctx;
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct ctdb_db_context *ctdb_db;
+ struct tevent_req *req;
+ int ret;
+ bool status;
+
+ setup_logging("glock_loop", DEBUG_STDERR);
+
+ status = process_options_basic(argc, argv, &opts);
+ if (! status) {
+ exit(1);
+ }
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ev = tevent_context_init(mem_ctx);
+ if (ev == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ret = ctdb_client_init(mem_ctx, ev, opts->socket, &client);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to initialize client, ret=%d\n", ret);
+ exit(1);
+ }
+
+ if (! ctdb_recovery_wait(ev, client)) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ret = ctdb_attach(ev, client, tevent_timeval_zero(), "g_lock.tdb",
+ 0, &ctdb_db);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to attach to g_lock.tdb\n");
+ exit(1);
+ }
+
+ req = glock_loop_send(mem_ctx, ev, client, ctdb_db,
+ opts->num_nodes, opts->timelimit);
+ if (req == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = glock_loop_recv(req, &ret);
+ if (! status) {
+ fprintf(stderr, "g_lock loop test failed\n");
+ exit(1);
+ }
+
+ talloc_free(mem_ctx);
+ return 0;
+}
diff --git a/ctdb/tests/src/hash_count_test.c b/ctdb/tests/src/hash_count_test.c
new file mode 100644
index 0000000..6ddde08
--- /dev/null
+++ b/ctdb/tests/src/hash_count_test.c
@@ -0,0 +1,132 @@
+/*
+ hash_count tests
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <assert.h>
+
+#include "common/db_hash.c"
+#include "common/hash_count.c"
+
+#define KEY "this_is_a_test_key"
+
+static void test1_handler(TDB_DATA key, uint64_t counter, void *private_data)
+{
+ int *count = (int *)private_data;
+
+ assert(key.dsize == strlen(KEY));
+ assert(strcmp((char *)key.dptr, KEY) == 0);
+ assert(counter > 0);
+
+ (*count) += 1;
+}
+
+static void do_test1(void)
+{
+ struct hash_count_context *hc = NULL;
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+ struct timeval interval = {1, 0};
+ TDB_DATA key;
+ int count = 0;
+ int ret, i;
+
+ key.dptr = (uint8_t *)discard_const(KEY);
+ key.dsize = strlen(KEY);
+
+ ret = hash_count_increment(hc, key);
+ assert(ret == EINVAL);
+
+ ret = hash_count_init(mem_ctx, interval, NULL, NULL, &hc);
+ assert(ret == EINVAL);
+
+ ret = hash_count_init(mem_ctx, interval, test1_handler, &count, &hc);
+ assert(ret == 0);
+ assert(hc != NULL);
+
+ for (i=0; i<10; i++) {
+ ret = hash_count_increment(hc, key);
+ assert(ret == 0);
+ assert(count == i+1);
+ }
+
+ talloc_free(hc);
+ ret = talloc_get_size(mem_ctx);
+ assert(ret == 0);
+
+ talloc_free(mem_ctx);
+}
+
+static void test2_handler(TDB_DATA key, uint64_t counter, void *private_data)
+{
+ uint64_t *count = (uint64_t *)private_data;
+
+ *count = counter;
+}
+
+static void do_test2(void)
+{
+ struct hash_count_context *hc;
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+ struct timeval interval = {1, 0};
+ TDB_DATA key;
+ uint64_t count = 0;
+ int ret;
+
+ key.dptr = (uint8_t *)discard_const(KEY);
+ key.dsize = strlen(KEY);
+
+ ret = hash_count_init(mem_ctx, interval, test2_handler, &count, &hc);
+ assert(ret == 0);
+
+ ret = hash_count_increment(hc, key);
+ assert(ret == 0);
+ assert(count == 1);
+
+ hash_count_expire(hc, &ret);
+ assert(ret == 0);
+
+ ret = hash_count_increment(hc, key);
+ assert(ret == 0);
+ assert(count == 2);
+
+ sleep(2);
+
+ ret = hash_count_increment(hc, key);
+ assert(ret == 0);
+ assert(count == 1);
+
+ sleep(2);
+
+ hash_count_expire(hc, &ret);
+ assert(ret == 1);
+
+ talloc_free(hc);
+ ret = talloc_get_size(mem_ctx);
+ assert(ret == 0);
+
+ talloc_free(mem_ctx);
+}
+
+int main(void)
+{
+ do_test1();
+ do_test2();
+
+ return 0;
+}
diff --git a/ctdb/tests/src/ipalloc_read_known_ips.c b/ctdb/tests/src/ipalloc_read_known_ips.c
new file mode 100644
index 0000000..33d0f94
--- /dev/null
+++ b/ctdb/tests/src/ipalloc_read_known_ips.c
@@ -0,0 +1,179 @@
+/*
+ Tests support for CTDB IP allocation
+
+ Copyright (C) Martin Schwenke 2011
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <talloc.h>
+
+#include "lib/util/debug.h"
+
+#include "protocol/protocol.h"
+#include "protocol/protocol_util.h"
+#include "common/logging.h"
+
+#include "ipalloc_read_known_ips.h"
+
+static bool add_ip(TALLOC_CTX *mem_ctx,
+ struct ctdb_public_ip_list *l,
+ ctdb_sock_addr *addr,
+ uint32_t pnn)
+{
+
+ l->ip = talloc_realloc(mem_ctx, l->ip,
+ struct ctdb_public_ip, l->num + 1);
+ if (l->ip == NULL) {
+ D_ERR(__location__ " out of memory\n");
+ return false;
+ }
+
+ l->ip[l->num].addr = *addr;
+ l->ip[l->num].pnn = pnn;
+ l->num++;
+
+ return true;
+}
+
+/* Format of each line is "IP CURRENT_PNN [ALLOWED_PNN,...]".
+ * If multi is true then ALLOWED_PNNs are not allowed. */
+static bool read_ctdb_public_ip_info_node(bool multi,
+ int numnodes,
+ struct ctdb_public_ip_list **k,
+ struct ctdb_public_ip_list *known)
+{
+ char line[1024];
+ ctdb_sock_addr addr;
+ char *t, *tok;
+ int pnn, n;
+
+ /* Known public IPs */
+ *k = talloc_zero(known, struct ctdb_public_ip_list);
+ if (*k == NULL) {
+ goto fail;
+ }
+
+ while (fgets(line, sizeof(line), stdin) != NULL) {
+ int ret;
+
+ /* Get rid of pesky newline */
+ if ((t = strchr(line, '\n')) != NULL) {
+ *t = '\0';
+ }
+
+ /* Exit on an empty line */
+ if (line[0] == '\0') {
+ break;
+ }
+
+ /* Get the IP address */
+ tok = strtok(line, " \t");
+ if (tok == NULL) {
+ D_WARNING("WARNING, bad line ignored :%s\n", line);
+ continue;
+ }
+
+ ret = ctdb_sock_addr_from_string(tok, &addr, false);
+ if (ret != 0) {
+ D_ERR("ERROR, bad address :%s\n", tok);
+ continue;
+ }
+
+ /* Get the PNN */
+ pnn = -1;
+ tok = strtok(NULL, " \t");
+ if (tok != NULL) {
+ pnn = (int) strtol(tok, (char **) NULL, 10);
+ }
+
+ if (! add_ip(*k, *k, &addr, pnn)) {
+ goto fail;
+ }
+
+ tok = strtok(NULL, " \t#");
+ if (tok == NULL) {
+ if (! multi) {
+ for (n = 0; n < numnodes; n++) {
+ if (! add_ip(known, &known[n],
+ &addr, pnn)) {
+ goto fail;
+ }
+ }
+ }
+ continue;
+ }
+
+ /* Handle allowed nodes for addr */
+ if (multi) {
+ D_ERR("ERROR, bad token\n");
+ goto fail;
+ }
+ t = strtok(tok, ",");
+ while (t != NULL) {
+ n = (int) strtol(t, (char **) NULL, 10);
+ if (! add_ip(known, &known[n], &addr, pnn)) {
+ goto fail;
+ }
+ t = strtok(NULL, ",");
+ }
+ }
+
+ return true;
+
+fail:
+ TALLOC_FREE(*k);
+ return false;
+}
+
+struct ctdb_public_ip_list * ipalloc_read_known_ips(TALLOC_CTX *ctx,
+ int numnodes,
+ bool multi)
+{
+ int n;
+ struct ctdb_public_ip_list *k;
+ struct ctdb_public_ip_list *known;
+
+ known = talloc_zero_array(ctx, struct ctdb_public_ip_list,
+ numnodes);
+ if (known == NULL) {
+ D_ERR(__location__ " out of memory\n");
+ goto fail;
+ }
+
+ if (multi) {
+ for (n = 0; n < numnodes; n++) {
+ if (! read_ctdb_public_ip_info_node(multi, numnodes,
+ &k, known)) {
+ goto fail;
+ }
+
+ known[n] = *k;
+ }
+ } else {
+ if (! read_ctdb_public_ip_info_node(multi, numnodes,
+ &k, known)) {
+ goto fail;
+ }
+ }
+
+ return known;
+
+fail:
+ talloc_free(known);
+ return NULL;
+}
diff --git a/ctdb/tests/src/ipalloc_read_known_ips.h b/ctdb/tests/src/ipalloc_read_known_ips.h
new file mode 100644
index 0000000..aa6d154
--- /dev/null
+++ b/ctdb/tests/src/ipalloc_read_known_ips.h
@@ -0,0 +1,32 @@
+/*
+ Tests support for CTDB IP allocation
+
+ Copyright (C) Martin Schwenke 2011
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __IPALLOC_READ_KNOWN_IPS_H__
+#define __IPALLOC_READ_KNOWN_IPS_H__
+
+#include <stdbool.h>
+#include <talloc.h>
+
+#include "protocol/protocol.h"
+
+struct ctdb_public_ip_list * ipalloc_read_known_ips(TALLOC_CTX *ctx,
+ int numnodes,
+ bool multi);
+
+#endif /* __IPALLOC_READ_KNOWN_IPS_H__ */
diff --git a/ctdb/tests/src/line_test.c b/ctdb/tests/src/line_test.c
new file mode 100644
index 0000000..806d883
--- /dev/null
+++ b/ctdb/tests/src/line_test.c
@@ -0,0 +1,102 @@
+/*
+ Test code for line based I/O over fds
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+
+#include <talloc.h>
+#include <assert.h>
+
+#include "common/line.c"
+
+static int line_print(char *line, void *private_data)
+{
+ printf("%s\n", line);
+ fflush(stdout);
+
+ return 0;
+}
+
+int main(int argc, const char **argv)
+{
+ TALLOC_CTX *mem_ctx;
+ size_t hint = 32;
+ pid_t pid;
+ int ret, lines = 0;
+ int pipefd[2];
+
+ if (argc < 2 || argc > 3) {
+ fprintf(stderr, "Usage: %s <filename> [<hint>]\n", argv[0]);
+ exit(1);
+ }
+
+ if (argc == 3) {
+ long value;
+
+ value = atol(argv[2]);
+ assert(value > 0);
+ hint = value;
+ }
+
+ ret = pipe(pipefd);
+ assert(ret == 0);
+
+ pid = fork();
+ assert(pid != -1);
+
+ if (pid == 0) {
+ char buffer[16];
+ ssize_t n, n2;
+ int fd;
+
+ close(pipefd[0]);
+
+ fd = open(argv[1], O_RDONLY);
+ assert(fd != -1);
+
+ while (1) {
+ n = read(fd, buffer, sizeof(buffer));
+ assert(n >= 0 && (size_t)n <= sizeof(buffer));
+
+ if (n == 0) {
+ break;
+ }
+
+ n2 = write(pipefd[1], buffer, n);
+ assert(n2 == n);
+ }
+
+ close(pipefd[1]);
+ close(fd);
+
+ exit(0);
+ }
+
+ close(pipefd[1]);
+
+ mem_ctx = talloc_new(NULL);
+ assert(mem_ctx != NULL);
+
+ ret = line_read(pipefd[0], hint, NULL, line_print, NULL, &lines);
+ assert(ret == 0);
+
+ talloc_free(mem_ctx);
+
+ return lines;
+}
diff --git a/ctdb/tests/src/lock_tdb.c b/ctdb/tests/src/lock_tdb.c
new file mode 100644
index 0000000..c37f846
--- /dev/null
+++ b/ctdb/tests/src/lock_tdb.c
@@ -0,0 +1,60 @@
+/*
+ Lock a tdb and sleep
+
+ Copyright (C) Amitay Isaacs 2012
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+
+#include <tdb.h>
+
+const char *tdb_file;
+TDB_CONTEXT *tdb;
+
+static void signal_handler(int signum)
+{
+ tdb_close(tdb);
+}
+
+int
+main(int argc, char *argv[])
+{
+ if (argc != 2) {
+ printf("Usage: %s <tdb file>\n", argv[0]);
+ exit(1);
+ }
+
+ tdb_file = argv[1];
+
+ tdb = tdb_open(tdb_file, 0, 0, O_RDWR, 0);
+ if (tdb == NULL) {
+ fprintf(stderr, "Failed to open TDB file %s\n", tdb_file);
+ exit(1);
+ }
+
+ signal(SIGINT, signal_handler);
+
+ if (tdb_lockall(tdb) != 0) {
+ fprintf(stderr, "Failed to lock database %s\n", tdb_file);
+ tdb_close(tdb);
+ exit(1);
+ }
+
+ sleep(999999);
+
+ return 0;
+}
diff --git a/ctdb/tests/src/message_ring.c b/ctdb/tests/src/message_ring.c
new file mode 100644
index 0000000..d1fcee4
--- /dev/null
+++ b/ctdb/tests/src/message_ring.c
@@ -0,0 +1,369 @@
+/*
+ simple ctdb benchmark - send messages in a ring around cluster
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include "lib/util/debug.h"
+#include "lib/util/time.h"
+#include "lib/util/tevent_unix.h"
+
+#include "client/client.h"
+#include "tests/src/test_options.h"
+#include "tests/src/cluster_wait.h"
+
+#define MSG_ID_BENCH 0
+
+struct message_ring_state {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ int num_nodes;
+ int timelimit;
+ int interactive;
+ int msg_count;
+ int msg_plus, msg_minus;
+ struct timeval start_time;
+};
+
+static void message_ring_wait(struct tevent_req *subreq);
+static void message_ring_start(struct tevent_req *subreq);
+static void message_ring_each_second(struct tevent_req *subreq);
+static void message_ring_msg_sent(struct tevent_req *subreq);
+static void message_ring_msg_handler(uint64_t srvid, TDB_DATA data,
+ void *private_data);
+static void message_ring_finish(struct tevent_req *subreq);
+
+static struct tevent_req *message_ring_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int num_nodes, int timelimit,
+ int interactive)
+{
+ struct tevent_req *req, *subreq;
+ struct message_ring_state *state;
+
+ req = tevent_req_create(mem_ctx, &state, struct message_ring_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->client = client;
+ state->num_nodes = num_nodes;
+ state->timelimit = timelimit;
+ state->interactive = interactive;
+
+ subreq = ctdb_client_set_message_handler_send(
+ state, state->ev, state->client,
+ MSG_ID_BENCH,
+ message_ring_msg_handler, req);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, message_ring_wait, req);
+
+ return req;
+}
+
+static void message_ring_wait(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct message_ring_state *state = tevent_req_data(
+ req, struct message_ring_state);
+ bool status;
+ int ret;
+
+ status = ctdb_client_set_message_handler_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ subreq = cluster_wait_send(state, state->ev, state->client,
+ state->num_nodes);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, message_ring_start, req);
+}
+
+static void message_ring_start(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct message_ring_state *state = tevent_req_data(
+ req, struct message_ring_state);
+ bool status;
+ int ret;
+
+ status = cluster_wait_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ state->start_time = tevent_timeval_current();
+
+ if (ctdb_client_pnn(state->client) == 0) {
+ subreq = tevent_wakeup_send(state, state->ev,
+ tevent_timeval_current_ofs(1, 0));
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, message_ring_each_second, req);
+ }
+
+ subreq = tevent_wakeup_send(state, state->ev,
+ tevent_timeval_current_ofs(
+ state->timelimit, 0));
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, message_ring_finish, req);
+}
+
+static uint32_t next_node(struct ctdb_client_context *client,
+ int num_nodes, int incr)
+{
+ return (ctdb_client_pnn(client) + num_nodes + incr) % num_nodes;
+}
+
+static void message_ring_each_second(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct message_ring_state *state = tevent_req_data(
+ req, struct message_ring_state);
+ struct ctdb_req_message msg;
+ uint32_t pnn;
+ int incr;
+ bool status;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ pnn = ctdb_client_pnn(state->client);
+ if (pnn == 0 && state->interactive == 1) {
+ double t;
+
+ t = timeval_elapsed(&state->start_time);
+ printf("Ring[%u]: %.2f msgs/sec (+ve=%d -ve=%d)\n",
+ pnn, state->msg_count / t,
+ state->msg_plus, state->msg_minus);
+ fflush(stdout);
+ }
+
+ if (state->msg_plus == 0) {
+ incr = 1;
+
+ msg.srvid = 0;
+ msg.data.data.dptr = (uint8_t *)&incr;
+ msg.data.data.dsize = sizeof(incr);
+
+ pnn = next_node(state->client, state->num_nodes, incr);
+
+ subreq = ctdb_client_message_send(state, state->ev,
+ state->client, pnn, &msg);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, message_ring_msg_sent, req);
+ }
+
+ if (state->msg_minus == 0) {
+ incr = -1;
+
+ msg.srvid = 0;
+ msg.data.data.dptr = (uint8_t *)&incr;
+ msg.data.data.dsize = sizeof(incr);
+
+ pnn = next_node(state->client, state->num_nodes, incr);
+
+ subreq = ctdb_client_message_send(state, state->ev,
+ state->client, pnn, &msg);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, message_ring_msg_sent, req);
+ }
+
+ subreq = tevent_wakeup_send(state, state->ev,
+ tevent_timeval_current_ofs(1, 0));
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, message_ring_each_second, req);
+}
+
+static void message_ring_msg_sent(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ bool status;
+ int ret;
+
+ status = ctdb_client_message_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ }
+}
+
+static void message_ring_msg_handler(uint64_t srvid, TDB_DATA data,
+ void *private_data)
+{
+ struct tevent_req *req = talloc_get_type_abort(
+ private_data, struct tevent_req);
+ struct message_ring_state *state = tevent_req_data(
+ req, struct message_ring_state);
+ struct ctdb_req_message msg;
+ struct tevent_req *subreq;
+ int incr;
+ uint32_t pnn;
+
+ if (srvid != MSG_ID_BENCH) {
+ return;
+ }
+
+ if (data.dsize != sizeof(int)) {
+ return;
+ }
+ incr = *(int *)data.dptr;
+
+ state->msg_count += 1;
+ if (incr == 1) {
+ state->msg_plus += 1;
+ } else {
+ state->msg_minus += 1;
+ }
+
+ pnn = next_node(state->client, state->num_nodes, incr);
+
+ msg.srvid = srvid;
+ msg.data.data = data;
+
+ subreq = ctdb_client_message_send(state, state->ev, state->client,
+ pnn, &msg);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, message_ring_msg_sent, req);
+}
+
+static void message_ring_finish(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct message_ring_state *state = tevent_req_data(
+ req, struct message_ring_state);
+ bool status;
+ double t;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ t = timeval_elapsed(&state->start_time);
+
+ printf("Ring[%u]: %.2f msgs/sec (+ve=%d -ve=%d)\n",
+ ctdb_client_pnn(state->client), state->msg_count / t,
+ state->msg_plus, state->msg_minus);
+
+ tevent_req_done(req);
+}
+
+static bool message_ring_recv(struct tevent_req *req)
+{
+ int ret;
+
+ if (tevent_req_is_unix_error(req, &ret)) {
+ return false;
+ }
+ return true;
+}
+
+int main(int argc, const char *argv[])
+{
+ const struct test_options *opts;
+ TALLOC_CTX *mem_ctx;
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct tevent_req *req;
+ int ret;
+ bool status;
+
+ setup_logging("message_ring", DEBUG_STDERR);
+
+ status = process_options_basic(argc, argv, &opts);
+ if (! status) {
+ exit(1);
+ }
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ev = tevent_context_init(mem_ctx);
+ if (ev == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ret = ctdb_client_init(mem_ctx, ev, opts->socket, &client);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to initialize client, ret=%d\n", ret);
+ exit(1);
+ }
+
+ if (! ctdb_recovery_wait(ev, client)) {
+ fprintf(stderr, "Failed to wait for recovery\n");
+ exit(1);
+ }
+
+ req = message_ring_send(mem_ctx, ev, client,
+ opts->num_nodes, opts->timelimit,
+ opts->interactive);
+ if (req == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = message_ring_recv(req);
+ if (! status) {
+ fprintf(stderr, "message ring test failed\n");
+ exit(1);
+ }
+
+ talloc_free(mem_ctx);
+ return 0;
+}
diff --git a/ctdb/tests/src/pidfile_test.c b/ctdb/tests/src/pidfile_test.c
new file mode 100644
index 0000000..592fc2b
--- /dev/null
+++ b/ctdb/tests/src/pidfile_test.c
@@ -0,0 +1,242 @@
+/*
+ pidfile tests
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/wait.h"
+
+#include <assert.h>
+
+#include "common/pidfile.c"
+
+
+/* create pid file, check pid file exists, check pid and remove pid file */
+static void test1(const char *pidfile)
+{
+ struct pidfile_context *pid_ctx;
+ int ret;
+ struct stat st;
+ FILE *fp;
+ pid_t pid;
+
+ ret = pidfile_context_create(NULL, pidfile, &pid_ctx);
+ assert(ret == 0);
+ assert(pid_ctx != NULL);
+
+ ret = stat(pidfile, &st);
+ assert(ret == 0);
+ assert(S_ISREG(st.st_mode));
+
+ fp = fopen(pidfile, "r");
+ assert(fp != NULL);
+ ret = fscanf(fp, "%d", &pid);
+ assert(ret == 1);
+ assert(pid == getpid());
+ fclose(fp);
+
+ TALLOC_FREE(pid_ctx);
+
+ ret = stat(pidfile, &st);
+ assert(ret == -1);
+}
+
+/* create pid file in two processes */
+static void test2(const char *pidfile)
+{
+ struct pidfile_context *pid_ctx;
+ pid_t pid, pid2;
+ int fd[2];
+ int ret;
+ size_t nread;
+ FILE *fp;
+ struct stat st;
+
+ ret = pipe(fd);
+ assert(ret == 0);
+
+ pid = fork();
+ assert(pid != -1);
+
+ if (pid == 0) {
+ ssize_t nwritten;
+
+ close(fd[0]);
+
+ ret = pidfile_context_create(NULL, pidfile, &pid_ctx);
+ assert(ret == 0);
+ assert(pid_ctx != NULL);
+
+ nwritten = write(fd[1], &ret, sizeof(ret));
+ assert(nwritten == sizeof(ret));
+
+ sleep(10);
+
+ TALLOC_FREE(pid_ctx);
+
+ nwritten = write(fd[1], &ret, sizeof(ret));
+ assert(nwritten == sizeof(ret));
+
+ exit(1);
+ }
+
+ close(fd[1]);
+
+ nread = read(fd[0], &ret, sizeof(ret));
+ assert(nread == sizeof(ret));
+ assert(ret == 0);
+
+ fp = fopen(pidfile, "r");
+ assert(fp != NULL);
+ ret = fscanf(fp, "%d", &pid2);
+ assert(ret == 1);
+ assert(pid == pid2);
+ fclose(fp);
+
+ ret = pidfile_context_create(NULL, pidfile, &pid_ctx);
+ assert(ret != 0);
+
+ nread = read(fd[0], &ret, sizeof(ret));
+ assert(nread == sizeof(ret));
+ assert(ret == 0);
+
+ ret = pidfile_context_create(NULL, pidfile, &pid_ctx);
+ assert(ret == 0);
+ assert(pid_ctx != NULL);
+
+ TALLOC_FREE(pid_ctx);
+
+ ret = stat(pidfile, &st);
+ assert(ret == -1);
+}
+
+/* create pid file, fork, try to remove pid file in separate process */
+static void test3(const char *pidfile)
+{
+ struct pidfile_context *pid_ctx;
+ pid_t pid;
+ int fd[2];
+ int ret;
+ size_t nread;
+ struct stat st;
+
+ ret = pidfile_context_create(NULL, pidfile, &pid_ctx);
+ assert(ret == 0);
+ assert(pid_ctx != NULL);
+
+ ret = pipe(fd);
+ assert(ret == 0);
+
+ pid = fork();
+ assert(pid != -1);
+
+ if (pid == 0) {
+ ssize_t nwritten;
+
+ close(fd[0]);
+
+ TALLOC_FREE(pid_ctx);
+
+ nwritten = write(fd[1], &ret, sizeof(ret));
+ assert(nwritten == sizeof(ret));
+
+ exit(1);
+ }
+
+ close(fd[1]);
+
+ nread = read(fd[0], &ret, sizeof(ret));
+ assert(nread == sizeof(ret));
+
+ ret = stat(pidfile, &st);
+ assert(ret == 0);
+
+ TALLOC_FREE(pid_ctx);
+
+ ret = stat(pidfile, &st);
+ assert(ret == -1);
+}
+
+/* create pid file, kill process, overwrite pid file in different process */
+static void test4(const char *pidfile)
+{
+ struct pidfile_context *pid_ctx;
+ pid_t pid, pid2;
+ int fd[2];
+ int ret;
+ size_t nread;
+ struct stat st;
+
+ ret = pipe(fd);
+ assert(ret == 0);
+
+ pid = fork();
+ assert(pid != -1);
+
+ if (pid == 0) {
+ ssize_t nwritten;
+
+ close(fd[0]);
+
+ ret = pidfile_context_create(NULL, pidfile, &pid_ctx);
+
+ nwritten = write(fd[1], &ret, sizeof(ret));
+ assert(nwritten == sizeof(ret));
+
+ sleep(99);
+ exit(1);
+ }
+
+ close(fd[1]);
+
+ nread = read(fd[0], &ret, sizeof(ret));
+ assert(nread == sizeof(ret));
+ assert(ret == 0);
+
+ ret = stat(pidfile, &st);
+ assert(ret == 0);
+
+ ret = kill(pid, SIGKILL);
+ assert(ret == 0);
+
+ pid2 = waitpid(pid, &ret, 0);
+ assert(pid2 == pid);
+
+ ret = pidfile_context_create(NULL, pidfile, &pid_ctx);
+ assert(ret == 0);
+ assert(pid_ctx != NULL);
+
+ ret = stat(pidfile, &st);
+ assert(ret == 0);
+
+ TALLOC_FREE(pid_ctx);
+}
+
+int main(int argc, const char **argv)
+{
+ if (argc != 2) {
+ fprintf(stderr, "Usage: %s <pidfile>\n", argv[0]);
+ exit(1);
+ }
+
+ test1(argv[1]);
+ test2(argv[1]);
+ test3(argv[1]);
+ test4(argv[1]);
+
+ return 0;
+}
diff --git a/ctdb/tests/src/pkt_read_test.c b/ctdb/tests/src/pkt_read_test.c
new file mode 100644
index 0000000..a3ebe0a
--- /dev/null
+++ b/ctdb/tests/src/pkt_read_test.c
@@ -0,0 +1,249 @@
+/*
+ packet read tests
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+
+#include <assert.h>
+
+#include "lib/util/blocking.h"
+
+#include "common/pkt_read.c"
+
+static void writer(int fd)
+{
+ uint8_t buf[1024*1024];
+ size_t buflen;
+ size_t pkt_size[4] = { 100, 500, 1024, 1024*1024 };
+ int i, j;
+ int ret;
+
+ for (i=0; i<1024*1024; i++) {
+ buf[i] = i%256;
+ }
+
+ for (i=0; i<1000; i++) {
+ for (j=0; j<4; j++) {
+ buflen = pkt_size[j];
+ memcpy(buf, &buflen, sizeof(buflen));
+
+ ret = write(fd, buf, buflen);
+ if (ret < 0) {
+ printf("write error: %s\n", strerror(errno));
+ assert(ret > 0);
+ }
+ }
+ }
+
+ close(fd);
+}
+
+struct reader_state {
+ struct tevent_context *ev;
+ int fd;
+ uint8_t *buf;
+ size_t buflen;
+ struct tevent_req *subreq;
+};
+
+static ssize_t reader_more(uint8_t *buf, size_t buflen, void *private_data);
+static void reader_done(struct tevent_req *subreq);
+
+static struct tevent_req *reader_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ int fd, uint8_t *buf,
+ size_t buflen)
+{
+ struct tevent_req *req, *subreq;
+ struct reader_state *state;
+
+ req = tevent_req_create(mem_ctx, &state, struct reader_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->fd = fd;
+ state->buf = buf;
+ state->buflen = buflen;
+
+ subreq = pkt_read_send(state, state->ev, state->fd, 4,
+ state->buf, state->buflen, reader_more, NULL);
+ if (tevent_req_nomem(subreq, req)) {
+ tevent_req_post(req, ev);
+ }
+
+ state->subreq = subreq;
+ tevent_req_set_callback(subreq, reader_done, req);
+ return req;
+}
+
+static ssize_t reader_more(uint8_t *buf, size_t buflen, void *private_data)
+{
+ uint32_t pkt_len;
+
+ if (buflen < sizeof(pkt_len)) {
+ return sizeof(pkt_len) - buflen;
+ }
+
+ pkt_len = *(uint32_t *)buf;
+ return pkt_len - buflen;
+}
+
+static void reader_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct reader_state *state = tevent_req_data(
+ req, struct reader_state);
+ ssize_t nread;
+ uint8_t *buf;
+ bool free_buf;
+ int err;
+
+ nread = pkt_read_recv(subreq, state, &buf, &free_buf, &err);
+ TALLOC_FREE(subreq);
+ state->subreq = NULL;
+ if (nread == -1) {
+ if (err == EPIPE) {
+ tevent_req_done(req);
+ } else {
+ tevent_req_error(req, err);
+ }
+ return;
+ }
+
+ if (free_buf) {
+ talloc_free(buf);
+ }
+
+ subreq = pkt_read_send(state, state->ev, state->fd, 4,
+ state->buf, state->buflen, reader_more, NULL);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+
+ state->subreq = subreq;
+ tevent_req_set_callback(subreq, reader_done, req);
+}
+
+static void reader_recv(struct tevent_req *req, int *perr)
+{
+ struct reader_state *state = tevent_req_data(
+ req, struct reader_state);
+ int err = 0;
+
+ if (state->subreq != NULL) {
+ *perr = -1;
+ }
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ *perr = err;
+ return;
+ }
+
+ *perr = 0;
+}
+
+static void reader_handler(struct tevent_context *ev, struct tevent_fd *fde,
+ uint16_t flags, void *private_data)
+{
+ struct tevent_req *req = talloc_get_type_abort(
+ private_data, struct tevent_req);
+ struct reader_state *state = tevent_req_data(
+ req, struct reader_state);
+
+ assert(state->subreq != NULL);
+ pkt_read_handler(ev, fde, flags, state->subreq);
+}
+
+static void reader(int fd, bool fixed)
+{
+ TALLOC_CTX *mem_ctx;
+ struct tevent_context *ev;
+ struct tevent_fd *fde;
+ struct tevent_req *req;
+ int err;
+ uint8_t *buf = NULL;
+ size_t buflen = 0;
+
+ mem_ctx = talloc_new(NULL);
+ assert(mem_ctx != NULL);
+
+ ev = tevent_context_init(mem_ctx);
+ assert(ev != NULL);
+
+ if (fixed) {
+ buflen = 1024;
+ buf = talloc_size(mem_ctx, buflen);
+ assert(buf != NULL);
+ }
+
+ req = reader_send(mem_ctx, ev, fd, buf, buflen);
+ assert(req != NULL);
+
+ fde = tevent_add_fd(ev, mem_ctx, fd, TEVENT_FD_READ,
+ reader_handler, req);
+ assert(fde != NULL);
+
+ tevent_req_poll(req, ev);
+
+ reader_recv(req, &err);
+ assert(err == 0);
+
+ close(fd);
+
+ talloc_free(mem_ctx);
+}
+
+static void reader_test(bool fixed)
+{
+ int fd[2];
+ int ret;
+ pid_t pid;
+
+ ret = pipe(fd);
+ assert(ret == 0);
+
+ pid = fork();
+ assert(pid != -1);
+
+ if (pid == 0) {
+ /* Child process */
+ close(fd[0]);
+ writer(fd[1]);
+ exit(0);
+ }
+
+ close(fd[1]);
+ ret = set_blocking(fd[0], false);
+ if (ret == -1) {
+ exit(1);
+ }
+
+ reader(fd[0], fixed);
+}
+
+int main(void)
+{
+ reader_test(true);
+ reader_test(false);
+
+ return 0;
+}
diff --git a/ctdb/tests/src/pkt_write_test.c b/ctdb/tests/src/pkt_write_test.c
new file mode 100644
index 0000000..dae92a5
--- /dev/null
+++ b/ctdb/tests/src/pkt_write_test.c
@@ -0,0 +1,359 @@
+/*
+ packet write tests
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+
+#include <assert.h>
+
+#include "lib/util/blocking.h"
+
+#include "common/pkt_read.c"
+#include "common/pkt_write.c"
+
+struct writer_state {
+ struct tevent_context *ev;
+ int fd;
+ uint8_t *buf;
+ size_t buflen;
+ int count;
+ struct tevent_req *subreq;
+};
+
+static void writer_next(struct tevent_req *subreq);
+
+static struct tevent_req *writer_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ int fd, uint8_t *buf, size_t buflen)
+{
+ struct tevent_req *req, *subreq;
+ struct writer_state *state;
+
+ req = tevent_req_create(mem_ctx, &state, struct writer_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->fd = fd;
+ state->buf = buf;
+ state->buflen = buflen;
+ state->count = 0;
+
+ subreq = pkt_write_send(state, state->ev, state->fd,
+ state->buf, state->buflen);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ state->subreq = subreq;
+ tevent_req_set_callback(subreq, writer_next, req);
+ return req;
+}
+
+static void writer_next(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct writer_state *state = tevent_req_data(
+ req, struct writer_state);
+ ssize_t nwritten;
+ int err = 0;
+
+ nwritten = pkt_write_recv(subreq, &err);
+ TALLOC_FREE(subreq);
+ state->subreq = NULL;
+ if (nwritten == -1) {
+ tevent_req_error(req, err);
+ return;
+ }
+
+ if ((size_t)nwritten != state->buflen) {
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ state->count++;
+ if (state->count >= 1000) {
+ tevent_req_done(req);
+ return;
+ }
+
+ subreq = pkt_write_send(state, state->ev, state->fd,
+ state->buf, state->buflen);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+
+ state->subreq = subreq;
+ tevent_req_set_callback(subreq, writer_next, req);
+}
+
+static void writer_recv(struct tevent_req *req, int *perr)
+{
+ struct writer_state *state = tevent_req_data(
+ req, struct writer_state);
+ int err = 0;
+
+ if (state->subreq != NULL) {
+ *perr = -1;
+ return;
+ }
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ *perr = err;
+ return;
+ }
+
+ *perr = 0;
+}
+
+static void writer_handler(struct tevent_context *ev, struct tevent_fd *fde,
+ uint16_t flags, void *private_data)
+{
+ struct tevent_req *req = talloc_get_type_abort(
+ private_data, struct tevent_req);
+ struct writer_state *state = tevent_req_data(
+ req, struct writer_state);
+
+ assert(state->subreq != NULL);
+ pkt_write_handler(ev, fde, flags, state->subreq);
+}
+
+static void writer(int fd)
+{
+ TALLOC_CTX *mem_ctx;
+ struct tevent_context *ev;
+ struct tevent_fd *fde;
+ struct tevent_req *req;
+ uint8_t buf[1024*1024];
+ size_t buflen;
+ size_t pkt_size[4] = { 100, 500, 1024, 1024*1024 };
+ int i, err;
+
+ mem_ctx = talloc_new(NULL);
+ assert(mem_ctx != NULL);
+
+ ev = tevent_context_init(mem_ctx);
+ assert(ev != NULL);
+
+ for (i=0; i<1024*1024; i++) {
+ buf[i] = i%256;
+ }
+
+ for (i=0; i<4; i++) {
+ buflen = pkt_size[i];
+ memcpy(buf, &buflen, sizeof(buflen));
+
+ req = writer_send(mem_ctx, ev, fd, buf, buflen);
+ assert(req != NULL);
+
+ fde = tevent_add_fd(ev, mem_ctx, fd, TEVENT_FD_WRITE,
+ writer_handler, req);
+ assert(fde != NULL);
+
+ tevent_req_poll(req, ev);
+
+ writer_recv(req, &err);
+ assert(err == 0);
+
+ talloc_free(fde);
+ talloc_free(req);
+ }
+
+ close(fd);
+
+ talloc_free(mem_ctx);
+}
+
+struct reader_state {
+ struct tevent_context *ev;
+ int fd;
+ uint8_t buf[1024];
+ struct tevent_req *subreq;
+};
+
+static ssize_t reader_more(uint8_t *buf, size_t buflen, void *private_data);
+static void reader_done(struct tevent_req *subreq);
+
+static struct tevent_req *reader_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ int fd)
+{
+ struct tevent_req *req, *subreq;
+ struct reader_state *state;
+
+ req = tevent_req_create(mem_ctx, &state, struct reader_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->fd = fd;
+
+ subreq = pkt_read_send(state, state->ev, state->fd, 4,
+ state->buf, 1024, reader_more, NULL);
+ if (tevent_req_nomem(subreq, req)) {
+ tevent_req_post(req, ev);
+ }
+
+ state->subreq = subreq;
+ tevent_req_set_callback(subreq, reader_done, req);
+ return req;
+}
+
+static ssize_t reader_more(uint8_t *buf, size_t buflen, void *private_data)
+{
+ uint32_t pkt_len;
+
+ if (buflen < sizeof(pkt_len)) {
+ return sizeof(pkt_len) - buflen;
+ }
+
+ pkt_len = *(uint32_t *)buf;
+ return pkt_len - buflen;
+}
+
+static void reader_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct reader_state *state = tevent_req_data(
+ req, struct reader_state);
+ ssize_t nread;
+ uint8_t *buf;
+ bool free_buf;
+ int err = 0;
+
+ nread = pkt_read_recv(subreq, state, &buf, &free_buf, &err);
+ TALLOC_FREE(subreq);
+ state->subreq = NULL;
+ if (nread == -1) {
+ if (err == EPIPE) {
+ tevent_req_done(req);
+ } else {
+ tevent_req_error(req, err);
+ }
+ return;
+ }
+
+ if (free_buf) {
+ talloc_free(buf);
+ }
+
+ subreq = pkt_read_send(state, state->ev, state->fd, 4,
+ state->buf, 1024, reader_more, NULL);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+
+ state->subreq = subreq;
+ tevent_req_set_callback(subreq, reader_done, req);
+}
+
+static void reader_recv(struct tevent_req *req, int *perr)
+{
+ struct reader_state *state = tevent_req_data(
+ req, struct reader_state);
+ int err = 0;
+
+ if (state->subreq != NULL) {
+ *perr = -1;
+ }
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ *perr = err;
+ return;
+ }
+
+ *perr = 0;
+}
+
+static void reader_handler(struct tevent_context *ev, struct tevent_fd *fde,
+ uint16_t flags, void *private_data)
+{
+ struct tevent_req *req = talloc_get_type_abort(
+ private_data, struct tevent_req);
+ struct reader_state *state = tevent_req_data(
+ req, struct reader_state);
+
+ assert(state->subreq != NULL);
+ pkt_read_handler(ev, fde, flags, state->subreq);
+}
+
+static void reader(int fd)
+{
+ TALLOC_CTX *mem_ctx;
+ struct tevent_context *ev;
+ struct tevent_fd *fde;
+ struct tevent_req *req;
+ int err;
+
+ mem_ctx = talloc_new(NULL);
+ assert(mem_ctx != NULL);
+
+ ev = tevent_context_init(mem_ctx);
+ assert(ev != NULL);
+
+ req = reader_send(mem_ctx, ev, fd);
+ assert(req != NULL);
+
+ fde = tevent_add_fd(ev, mem_ctx, fd, TEVENT_FD_READ,
+ reader_handler, req);
+ assert(fde != NULL);
+
+ tevent_req_poll(req, ev);
+
+ reader_recv(req, &err);
+ assert(err == 0);
+
+ close(fd);
+
+ talloc_free(mem_ctx);
+}
+
+int main(void)
+{
+ int fd[2];
+ int ret;
+ pid_t pid;
+
+ ret = pipe(fd);
+ assert(ret == 0);
+
+ pid = fork();
+ assert(pid != -1);
+
+ if (pid == 0) {
+ /* Child process */
+ close(fd[0]);
+ writer(fd[1]);
+ exit(0);
+ }
+
+ close(fd[1]);
+ ret = set_blocking(fd[0], false);
+ if (ret == -1) {
+ exit(1);
+ }
+
+ reader(fd[0]);
+
+ return 0;
+}
diff --git a/ctdb/tests/src/porting_tests.c b/ctdb/tests/src/porting_tests.c
new file mode 100644
index 0000000..00618d2
--- /dev/null
+++ b/ctdb/tests/src/porting_tests.c
@@ -0,0 +1,262 @@
+/*
+ Test porting lib (common/system_*.c)
+
+ Copyright (C) Mathieu Parent 2013
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/network.h"
+
+#include <popt.h>
+#include <talloc.h>
+#include <tevent.h>
+#include <tdb.h>
+#include <assert.h>
+
+#include "lib/util/debug.h"
+#include "lib/util/blocking.h"
+
+#include "protocol/protocol.h"
+#include "common/system.h"
+#include "common/logging.h"
+
+
+static struct {
+ const char *socketname;
+ const char *debuglevel;
+ pid_t helper_pid;
+ int socket;
+} globals = {
+ .socketname = "/tmp/test.sock"
+};
+
+
+
+/*
+ Socket functions
+*/
+/*
+ create a unix domain socket and bind it
+ return a file descriptor open on the socket
+*/
+static int socket_server_create(void)
+{
+ struct sockaddr_un addr;
+ int ret;
+
+ globals.socket = socket(AF_UNIX, SOCK_STREAM, 0);
+ assert(globals.socket != -1);
+
+ set_close_on_exec(globals.socket);
+ //set_blocking(globals.socket, false);
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+ strncpy(addr.sun_path, globals.socketname, sizeof(addr.sun_path)-1);
+
+ ret = bind(globals.socket, (struct sockaddr *)&addr, sizeof(addr));
+ assert(ret == 0);
+
+ ret = chown(globals.socketname, geteuid(), getegid());
+ assert(ret == 0);
+
+ ret = chmod(globals.socketname, 0700);
+ assert(ret == 0);
+
+ ret = listen(globals.socket, 100);
+ assert(ret == 0);
+
+ return 0;
+}
+
+static int socket_server_wait_peer(void)
+{
+ struct sockaddr_un addr;
+ socklen_t len;
+ int fd;
+
+ memset(&addr, 0, sizeof(addr));
+ len = sizeof(addr);
+ fd = accept(globals.socket, (struct sockaddr *)&addr, &len);
+ assert(fd != -1);
+
+ //set_blocking(fd, false);
+ set_close_on_exec(fd);
+ return fd;
+}
+
+static int socket_server_close(void)
+{
+ int ret;
+
+ ret = close(globals.socket);
+ assert(ret == 0);
+
+ ret = unlink(globals.socketname);
+ assert(ret == 0);
+
+ return 0;
+}
+
+static int socket_client_connect(void)
+{
+ struct sockaddr_un addr;
+ int client = 0;
+ int ret;
+
+ client = socket(AF_UNIX, SOCK_STREAM, 0);
+ assert(client != -1);
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+ strncpy(addr.sun_path, globals.socketname, sizeof(addr.sun_path)-1);
+
+ ret = connect(client, (struct sockaddr *)&addr, sizeof(addr));
+ assert(ret == 0);
+
+ return client;
+}
+
+static int socket_client_close(int client)
+{
+ int ret;
+
+ ret = close(client);
+ assert(ret == 0);
+
+ return 0;
+}
+
+/*
+ forked program
+*/
+static int fork_helper(void)
+{
+ pid_t pid;
+ int client;
+
+ pid = fork();
+ assert(pid != -1);
+
+ if (pid == 0) { // Child
+ pid = getppid();
+ client = socket_client_connect();
+ while (kill(pid, 0) == 0) {
+ sleep(1);
+ }
+ socket_client_close(client);
+ exit(0);
+ } else {
+ globals.helper_pid = pid;
+ }
+ return 0;
+}
+
+/*
+ tests
+*/
+static int test_ctdb_sys_check_iface_exists(void)
+{
+ bool test1, test2;
+
+ test1 = ctdb_sys_check_iface_exists("unlikely123xyz");
+ assert(!test1);
+
+ /* Linux and others */
+ test1 = ctdb_sys_check_iface_exists("lo");
+ /* FreeBSD */
+ test2 = ctdb_sys_check_iface_exists("lo0");
+ assert(test1 || test2);
+
+ return 0;
+}
+
+static int test_ctdb_get_peer_pid(void)
+{
+ int ret;
+ int fd;
+ pid_t peer_pid = 0;
+
+ fd = socket_server_wait_peer();
+
+ ret = ctdb_get_peer_pid(fd, &peer_pid);
+ assert(ret == 0 || ret == ENOSYS);
+
+ if (ret == 0) {
+ assert(peer_pid == globals.helper_pid);
+
+ kill(peer_pid, SIGTERM);
+ } else {
+ kill(globals.helper_pid, SIGTERM);
+ }
+
+ close(fd);
+ return 0;
+}
+
+/*
+ main program
+*/
+int main(int argc, const char *argv[])
+{
+ struct poptOption popt_options[] = {
+ POPT_AUTOHELP
+ { "socket", 0, POPT_ARG_STRING, &globals.socketname, 0, "local socket name", "filename" },
+ POPT_TABLEEND
+ };
+ int opt, ret;
+ const char **extra_argv;
+ int extra_argc = 0;
+ poptContext pc;
+
+ pc = poptGetContext(argv[0], argc, argv, popt_options, POPT_CONTEXT_KEEP_FIRST);
+
+ while ((opt = poptGetNextOpt(pc)) != -1) {
+ switch (opt) {
+ default:
+ fprintf(stderr, "Invalid option %s: %s\n",
+ poptBadOption(pc, 0), poptStrerror(opt));
+ exit(1);
+ }
+ }
+
+ /* setup the remaining options for the main program to use */
+ extra_argv = poptGetArgs(pc);
+ if (extra_argv) {
+ extra_argv++;
+ while (extra_argv[extra_argc]) extra_argc++;
+ }
+
+ assert(globals.socketname != NULL);
+
+ ret = socket_server_create();
+ assert(ret == 0);
+
+ /* FIXME: Test tcp_checksum6, tcp_checksum */
+ /* FIXME: Test ctdb_sys_send_arp, ctdb_sys_send_tcp */
+ /* FIXME: Test ctdb_sys_{open,close}_capture_socket, ctdb_sys_read_tcp_packet */
+ test_ctdb_sys_check_iface_exists();
+
+ ret = fork_helper();
+ assert(ret == 0);
+ test_ctdb_get_peer_pid();
+
+ ret = socket_server_close();
+ assert(ret == 0);
+
+ return 0;
+}
diff --git a/ctdb/tests/src/protocol_basic_test.c b/ctdb/tests/src/protocol_basic_test.c
new file mode 100644
index 0000000..7046718
--- /dev/null
+++ b/ctdb/tests/src/protocol_basic_test.c
@@ -0,0 +1,106 @@
+/*
+ protocol types tests
+
+ Copyright (C) Amitay Isaacs 2015-2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <assert.h>
+
+#include "protocol/protocol_basic.c"
+
+#include "tests/src/protocol_common_basic.h"
+
+PROTOCOL_TYPE1_TEST(uint8_t, ctdb_uint8);
+PROTOCOL_TYPE1_TEST(uint16_t, ctdb_uint16);
+PROTOCOL_TYPE1_TEST(int32_t, ctdb_int32);
+PROTOCOL_TYPE1_TEST(uint32_t, ctdb_uint32);
+PROTOCOL_TYPE1_TEST(uint64_t, ctdb_uint64);
+PROTOCOL_TYPE1_TEST(double, ctdb_double);
+PROTOCOL_TYPE1_TEST(bool, ctdb_bool);
+
+static void test_ctdb_chararray(void)
+{
+ size_t len = rand_int(1000) + 1;
+ char p1[len], p2[len];
+ size_t buflen, np = 0;
+ size_t i;
+ int ret;
+
+ for (i=0; i<len-1; i++) {
+ p1[i] = 'A' + rand_int(26);
+ }
+ p1[len-1] = '\0';
+ buflen = ctdb_chararray_len(p1, len);
+ assert(buflen < sizeof(BUFFER));
+ ctdb_chararray_push(p1, len, BUFFER, &np);
+ assert(np == buflen);
+ np = 0;
+ ret = ctdb_chararray_pull(BUFFER, buflen, p2, len, &np);
+ assert(ret == 0);
+ assert(np == buflen);
+ assert(strncmp(p1, p2, len) == 0);
+}
+
+PROTOCOL_TYPE2_TEST(const char *, ctdb_string);
+PROTOCOL_TYPE2_TEST(const char *, ctdb_stringn);
+
+PROTOCOL_TYPE1_TEST(pid_t, ctdb_pid);
+PROTOCOL_TYPE1_TEST(struct timeval, ctdb_timeval);
+
+static void test_ctdb_padding(void)
+{
+ int padding;
+ size_t buflen, np = 0;
+ int ret;
+
+ padding = rand_int(8);
+
+ buflen = ctdb_padding_len(padding);
+ assert(buflen < sizeof(BUFFER));
+ ctdb_padding_push(padding, BUFFER, &np);
+ assert(np == buflen);
+ np = 0;
+ ret = ctdb_padding_pull(BUFFER, buflen, padding, &np);
+ assert(ret == 0);
+ assert(np == buflen);
+}
+
+static void protocol_basic_test(void)
+{
+ TEST_FUNC(ctdb_uint8)();
+ TEST_FUNC(ctdb_uint16)();
+ TEST_FUNC(ctdb_int32)();
+ TEST_FUNC(ctdb_uint32)();
+ TEST_FUNC(ctdb_uint64)();
+ TEST_FUNC(ctdb_double)();
+ TEST_FUNC(ctdb_bool)();
+
+ test_ctdb_chararray();
+
+ TEST_FUNC(ctdb_string)();
+ TEST_FUNC(ctdb_stringn)();
+
+ TEST_FUNC(ctdb_pid)();
+ TEST_FUNC(ctdb_timeval)();
+
+ test_ctdb_padding();
+}
+
+int main(int argc, const char *argv[])
+{
+ protocol_test_iterate(argc, argv, protocol_basic_test);
+ return 0;
+}
diff --git a/ctdb/tests/src/protocol_common.c b/ctdb/tests/src/protocol_common.c
new file mode 100644
index 0000000..212c23c
--- /dev/null
+++ b/ctdb/tests/src/protocol_common.c
@@ -0,0 +1,1260 @@
+/*
+ protocol tests - common functions
+
+ Copyright (C) Amitay Isaacs 2015-2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <assert.h>
+
+#include "protocol/protocol_api.h"
+
+#include "tests/src/protocol_common_basic.h"
+#include "tests/src/protocol_common.h"
+
+void fill_tdb_data_nonnull(TALLOC_CTX *mem_ctx, TDB_DATA *p)
+{
+ p->dsize = rand_int(1024) + 1;
+ p->dptr = talloc_array(mem_ctx, uint8_t, p->dsize);
+ assert(p->dptr != NULL);
+ fill_buffer(p->dptr, p->dsize);
+}
+
+void fill_tdb_data(TALLOC_CTX *mem_ctx, TDB_DATA *p)
+{
+ if (rand_int(5) == 0) {
+ p->dsize = 0;
+ p->dptr = NULL;
+ } else {
+ fill_tdb_data_nonnull(mem_ctx, p);
+ }
+}
+
+void verify_tdb_data(TDB_DATA *p1, TDB_DATA *p2)
+{
+ assert(p1->dsize == p2->dsize);
+ verify_buffer(p1->dptr, p2->dptr, p1->dsize);
+}
+
+void fill_ctdb_tdb_data(TALLOC_CTX *mem_ctx, TDB_DATA *p)
+{
+ fill_tdb_data(mem_ctx, p);
+}
+
+void verify_ctdb_tdb_data(TDB_DATA *p1, TDB_DATA *p2)
+{
+ verify_tdb_data(p1, p2);
+}
+
+void fill_ctdb_tdb_datan(TALLOC_CTX *mem_ctx, TDB_DATA *p)
+{
+ fill_tdb_data(mem_ctx, p);
+}
+
+void verify_ctdb_tdb_datan(TDB_DATA *p1, TDB_DATA *p2)
+{
+ verify_tdb_data(p1, p2);
+}
+
+void fill_ctdb_latency_counter(struct ctdb_latency_counter *p)
+{
+ p->num = rand32i();
+ p->min = rand_double();
+ p->max = rand_double();
+ p->total = rand_double();
+}
+
+void verify_ctdb_latency_counter(struct ctdb_latency_counter *p1,
+ struct ctdb_latency_counter *p2)
+{
+ assert(p1->num == p2->num);
+ assert(p1->min == p2->min);
+ assert(p1->max == p2->max);
+ assert(p1->total == p2->total);
+}
+
+void fill_ctdb_statistics(TALLOC_CTX *mem_ctx, struct ctdb_statistics *p)
+{
+ int i;
+
+ p->num_clients = rand32();
+ p->frozen = rand32();
+ p->recovering = rand32();
+ p->client_packets_sent = rand32();
+ p->client_packets_recv = rand32();
+ p->node_packets_sent = rand32();
+ p->node_packets_recv = rand32();
+ p->keepalive_packets_sent = rand32();
+ p->keepalive_packets_recv = rand32();
+
+ p->node.req_call = rand32();
+ p->node.reply_call = rand32();
+ p->node.req_dmaster = rand32();
+ p->node.reply_dmaster = rand32();
+ p->node.reply_error = rand32();
+ p->node.req_message = rand32();
+ p->node.req_control = rand32();
+ p->node.reply_control = rand32();
+
+ p->client.req_call = rand32();
+ p->client.req_message = rand32();
+ p->client.req_control = rand32();
+
+ p->timeouts.call = rand32();
+ p->timeouts.control = rand32();
+ p->timeouts.traverse = rand32();
+
+ fill_ctdb_latency_counter(&p->reclock.ctdbd);
+ fill_ctdb_latency_counter(&p->reclock.recd);
+
+ p->locks.num_calls = rand32();
+ p->locks.num_current = rand32();
+ p->locks.num_pending = rand32();
+ p->locks.num_failed = rand32();
+ fill_ctdb_latency_counter(&p->locks.latency);
+ for (i=0; i<MAX_COUNT_BUCKETS; i++) {
+ p->locks.buckets[i] = rand32();
+ }
+
+ p->total_calls = rand32();
+ p->pending_calls = rand32();
+ p->childwrite_calls = rand32();
+ p->pending_childwrite_calls = rand32();
+ p->memory_used = rand32();
+ p->__last_counter = rand32();
+ p->max_hop_count = rand32();
+ for (i=0; i<MAX_COUNT_BUCKETS; i++) {
+ p->hop_count_bucket[i] = rand32();
+ }
+ fill_ctdb_latency_counter(&p->call_latency);
+ fill_ctdb_latency_counter(&p->childwrite_latency);
+ p->num_recoveries = rand32();
+ fill_ctdb_timeval(&p->statistics_start_time);
+ fill_ctdb_timeval(&p->statistics_current_time);
+ p->total_ro_delegations = rand32();
+ p->total_ro_revokes = rand32();
+}
+
+void verify_ctdb_statistics(struct ctdb_statistics *p1,
+ struct ctdb_statistics *p2)
+{
+ int i;
+
+ assert(p1->num_clients == p2->num_clients);
+ assert(p1->frozen == p2->frozen);
+ assert(p1->recovering == p2->recovering);
+ assert(p1->client_packets_sent == p2->client_packets_sent);
+ assert(p1->client_packets_recv == p2->client_packets_recv);
+ assert(p1->node_packets_sent == p2->node_packets_sent);
+ assert(p1->node_packets_recv == p2->node_packets_recv);
+ assert(p1->keepalive_packets_sent == p2->keepalive_packets_sent);
+ assert(p1->keepalive_packets_recv == p2->keepalive_packets_recv);
+
+ assert(p1->node.req_call == p2->node.req_call);
+ assert(p1->node.reply_call == p2->node.reply_call);
+ assert(p1->node.req_dmaster == p2->node.req_dmaster);
+ assert(p1->node.reply_dmaster == p2->node.reply_dmaster);
+ assert(p1->node.reply_error == p2->node.reply_error);
+ assert(p1->node.req_message == p2->node.req_message);
+ assert(p1->node.req_control == p2->node.req_control);
+ assert(p1->node.reply_control == p2->node.reply_control);
+
+ assert(p1->client.req_call == p2->client.req_call);
+ assert(p1->client.req_message == p2->client.req_message);
+ assert(p1->client.req_control == p2->client.req_control);
+
+ assert(p1->timeouts.call == p2->timeouts.call);
+ assert(p1->timeouts.control == p2->timeouts.control);
+ assert(p1->timeouts.traverse == p2->timeouts.traverse);
+
+ verify_ctdb_latency_counter(&p1->reclock.ctdbd, &p2->reclock.ctdbd);
+ verify_ctdb_latency_counter(&p1->reclock.recd, &p2->reclock.recd);
+
+ assert(p1->locks.num_calls == p2->locks.num_calls);
+ assert(p1->locks.num_current == p2->locks.num_current);
+ assert(p1->locks.num_pending == p2->locks.num_pending);
+ assert(p1->locks.num_failed == p2->locks.num_failed);
+ verify_ctdb_latency_counter(&p1->locks.latency, &p2->locks.latency);
+ for (i=0; i<MAX_COUNT_BUCKETS; i++) {
+ assert(p1->locks.buckets[i] == p2->locks.buckets[i]);
+ }
+
+ assert(p1->total_calls == p2->total_calls);
+ assert(p1->pending_calls == p2->pending_calls);
+ assert(p1->childwrite_calls == p2->childwrite_calls);
+ assert(p1->pending_childwrite_calls == p2->pending_childwrite_calls);
+ assert(p1->memory_used == p2->memory_used);
+ assert(p1->__last_counter == p2->__last_counter);
+ assert(p1->max_hop_count == p2->max_hop_count);
+ for (i=0; i<MAX_COUNT_BUCKETS; i++) {
+ assert(p1->hop_count_bucket[i] == p2->hop_count_bucket[i]);
+ }
+ verify_ctdb_latency_counter(&p1->call_latency, &p2->call_latency);
+ verify_ctdb_latency_counter(&p1->childwrite_latency,
+ &p2->childwrite_latency);
+ assert(p1->num_recoveries == p2->num_recoveries);
+ verify_ctdb_timeval(&p1->statistics_start_time,
+ &p2->statistics_start_time);
+ verify_ctdb_timeval(&p1->statistics_current_time,
+ &p2->statistics_current_time);
+ assert(p1->total_ro_delegations == p2->total_ro_delegations);
+ assert(p1->total_ro_revokes == p2->total_ro_revokes);
+}
+
+void fill_ctdb_vnn_map(TALLOC_CTX *mem_ctx, struct ctdb_vnn_map *p)
+{
+ unsigned int i;
+
+ p->generation = rand32();
+ p->size = rand_int(20);
+ if (p->size > 0) {
+ p->map = talloc_array(mem_ctx, uint32_t, p->size);
+ assert(p->map != NULL);
+
+ for (i=0; i<p->size; i++) {
+ p->map[i] = rand32();
+ }
+ } else {
+ p->map = NULL;
+ }
+}
+
+void verify_ctdb_vnn_map(struct ctdb_vnn_map *p1, struct ctdb_vnn_map *p2)
+{
+ unsigned int i;
+
+ assert(p1->generation == p2->generation);
+ assert(p1->size == p2->size);
+ for (i=0; i<p1->size; i++) {
+ assert(p1->map[i] == p2->map[i]);
+ }
+}
+
+void fill_ctdb_dbid(TALLOC_CTX *mem_ctx, struct ctdb_dbid *p)
+{
+ p->db_id = rand32();
+ p->flags = rand8();
+}
+
+void verify_ctdb_dbid(struct ctdb_dbid *p1, struct ctdb_dbid *p2)
+{
+ assert(p1->db_id == p2->db_id);
+ assert(p1->flags == p2->flags);
+}
+
+void fill_ctdb_dbid_map(TALLOC_CTX *mem_ctx, struct ctdb_dbid_map *p)
+{
+ unsigned int i;
+
+ p->num = rand_int(40);
+ if (p->num > 0) {
+ p->dbs = talloc_zero_array(mem_ctx, struct ctdb_dbid, p->num);
+ assert(p->dbs != NULL);
+ for (i=0; i<p->num; i++) {
+ fill_ctdb_dbid(mem_ctx, &p->dbs[i]);
+ }
+ } else {
+ p->dbs = NULL;
+ }
+}
+
+void verify_ctdb_dbid_map(struct ctdb_dbid_map *p1, struct ctdb_dbid_map *p2)
+{
+ unsigned int i;
+
+ assert(p1->num == p2->num);
+ for (i=0; i<p1->num; i++) {
+ verify_ctdb_dbid(&p1->dbs[i], &p2->dbs[i]);
+ }
+}
+
+void fill_ctdb_pulldb(TALLOC_CTX *mem_ctx, struct ctdb_pulldb *p)
+{
+ p->db_id = rand32();
+ p->lmaster = rand32();
+}
+
+void verify_ctdb_pulldb(struct ctdb_pulldb *p1, struct ctdb_pulldb *p2)
+{
+ assert(p1->db_id == p2->db_id);
+ assert(p1->lmaster == p2->lmaster);
+}
+
+void fill_ctdb_pulldb_ext(TALLOC_CTX *mem_ctx, struct ctdb_pulldb_ext *p)
+{
+ p->db_id = rand32();
+ p->lmaster = rand32();
+ p->srvid = rand64();
+}
+
+void verify_ctdb_pulldb_ext(struct ctdb_pulldb_ext *p1,
+ struct ctdb_pulldb_ext *p2)
+{
+ assert(p1->db_id == p2->db_id);
+ assert(p1->lmaster == p2->lmaster);
+ assert(p1->srvid == p2->srvid);
+}
+
+void fill_ctdb_db_vacuum(TALLOC_CTX *mem_ctx, struct ctdb_db_vacuum *p)
+{
+ fill_ctdb_uint32(&p->db_id);
+ fill_ctdb_bool(&p->full_vacuum_run);
+}
+
+void verify_ctdb_db_vacuum(struct ctdb_db_vacuum *p1,
+ struct ctdb_db_vacuum *p2)
+{
+ verify_ctdb_uint32(&p1->db_id, &p2->db_id);
+ verify_ctdb_bool(&p1->full_vacuum_run, &p2->full_vacuum_run);
+}
+
+void fill_ctdb_echo_data(TALLOC_CTX *mem_ctx, struct ctdb_echo_data *p)
+{
+ fill_ctdb_uint32(&p->timeout);
+ fill_tdb_data(mem_ctx, &p->buf);
+}
+
+void verify_ctdb_echo_data(struct ctdb_echo_data *p1,
+ struct ctdb_echo_data *p2)
+{
+ verify_ctdb_uint32(&p1->timeout, &p2->timeout);
+ verify_tdb_data(&p1->buf, &p2->buf);
+}
+
+void fill_ctdb_ltdb_header(struct ctdb_ltdb_header *p)
+{
+ p->rsn = rand64();
+ p->dmaster = rand32();
+ p->reserved1 = rand32();
+ p->flags = rand32();
+}
+
+void verify_ctdb_ltdb_header(struct ctdb_ltdb_header *p1,
+ struct ctdb_ltdb_header *p2)
+{
+ assert(p1->rsn == p2->rsn);
+ assert(p1->dmaster == p2->dmaster);
+ assert(p1->reserved1 == p2->reserved1);
+ assert(p1->flags == p2->flags);
+}
+
+void fill_ctdb_rec_data(TALLOC_CTX *mem_ctx, struct ctdb_rec_data *p)
+{
+ p->reqid = rand32();
+ if (p->reqid % 5 == 0) {
+ p->header = talloc(mem_ctx, struct ctdb_ltdb_header);
+ assert(p->header != NULL);
+ fill_ctdb_ltdb_header(p->header);
+ } else {
+ p->header = NULL;
+ }
+ fill_tdb_data_nonnull(mem_ctx, &p->key);
+ fill_tdb_data(mem_ctx, &p->data);
+}
+
+void verify_ctdb_rec_data(struct ctdb_rec_data *p1, struct ctdb_rec_data *p2)
+{
+ struct ctdb_ltdb_header header;
+
+ assert(p1->reqid == p2->reqid);
+ if (p1->header != NULL) {
+ assert(ctdb_ltdb_header_extract(&p2->data, &header) == 0);
+ verify_ctdb_ltdb_header(p1->header, &header);
+ }
+ verify_tdb_data(&p1->key, &p2->key);
+ verify_tdb_data(&p1->data, &p2->data);
+}
+
+void fill_ctdb_rec_buffer(TALLOC_CTX *mem_ctx, struct ctdb_rec_buffer *p)
+{
+ struct ctdb_rec_data rec;
+ int ret, i;
+ int count;
+
+ p->db_id = rand32();
+ p->count = 0;
+ p->buf = NULL;
+ p->buflen = 0;
+
+ count = rand_int(100);
+ if (count > 0) {
+ for (i=0; i<count; i++) {
+ fill_ctdb_rec_data(mem_ctx, &rec);
+ ret = ctdb_rec_buffer_add(mem_ctx, p, rec.reqid,
+ rec.header,
+ rec.key, rec.data);
+ assert(ret == 0);
+ }
+ }
+}
+
+void verify_ctdb_rec_buffer(struct ctdb_rec_buffer *p1,
+ struct ctdb_rec_buffer *p2)
+{
+ assert(p1->db_id == p2->db_id);
+ assert(p1->count == p2->count);
+ assert(p1->buflen == p2->buflen);
+ verify_buffer(p1->buf, p2->buf, p1->buflen);
+}
+
+void fill_ctdb_traverse_start(TALLOC_CTX *mem_ctx,
+ struct ctdb_traverse_start *p)
+{
+ p->db_id = rand32();
+ p->reqid = rand32();
+ p->srvid = rand64();
+}
+
+void verify_ctdb_traverse_start(struct ctdb_traverse_start *p1,
+ struct ctdb_traverse_start *p2)
+{
+ assert(p1->db_id == p2->db_id);
+ assert(p1->reqid == p2->reqid);
+ assert(p1->srvid == p2->srvid);
+}
+
+void fill_ctdb_traverse_all(TALLOC_CTX *mem_ctx,
+ struct ctdb_traverse_all *p)
+{
+ p->db_id = rand32();
+ p->reqid = rand32();
+ p->pnn = rand32();
+ p->client_reqid = rand32();
+ p->srvid = rand64();
+}
+
+void verify_ctdb_traverse_all(struct ctdb_traverse_all *p1,
+ struct ctdb_traverse_all *p2)
+{
+ assert(p1->db_id == p2->db_id);
+ assert(p1->reqid == p2->reqid);
+ assert(p1->pnn == p2->pnn);
+ assert(p1->client_reqid == p2->client_reqid);
+ assert(p1->srvid == p2->srvid);
+}
+
+void fill_ctdb_traverse_start_ext(TALLOC_CTX *mem_ctx,
+ struct ctdb_traverse_start_ext *p)
+{
+ p->db_id = rand32();
+ p->reqid = rand32();
+ p->srvid = rand64();
+ p->withemptyrecords = rand_int(2);
+}
+
+void verify_ctdb_traverse_start_ext(struct ctdb_traverse_start_ext *p1,
+ struct ctdb_traverse_start_ext *p2)
+{
+ assert(p1->db_id == p2->db_id);
+ assert(p1->reqid == p2->reqid);
+ assert(p1->srvid == p2->srvid);
+ assert(p1->withemptyrecords == p2->withemptyrecords);
+}
+
+void fill_ctdb_traverse_all_ext(TALLOC_CTX *mem_ctx,
+ struct ctdb_traverse_all_ext *p)
+{
+ p->db_id = rand32();
+ p->reqid = rand32();
+ p->pnn = rand32();
+ p->client_reqid = rand32();
+ p->srvid = rand64();
+ p->withemptyrecords = rand_int(2);
+}
+
+void verify_ctdb_traverse_all_ext(struct ctdb_traverse_all_ext *p1,
+ struct ctdb_traverse_all_ext *p2)
+{
+ assert(p1->db_id == p2->db_id);
+ assert(p1->reqid == p2->reqid);
+ assert(p1->pnn == p2->pnn);
+ assert(p1->client_reqid == p2->client_reqid);
+ assert(p1->srvid == p2->srvid);
+ assert(p1->withemptyrecords == p2->withemptyrecords);
+}
+
+void fill_ctdb_sock_addr(TALLOC_CTX *mem_ctx, ctdb_sock_addr *p)
+{
+ if (rand_int(2) == 0) {
+ p->ip.sin_family = AF_INET;
+ p->ip.sin_port = rand_int(65535);
+ fill_buffer(&p->ip.sin_addr, sizeof(struct in_addr));
+ } else {
+ p->ip6.sin6_family = AF_INET6;
+ p->ip6.sin6_port = rand_int(65535);
+ fill_buffer(&p->ip6.sin6_addr, sizeof(struct in6_addr));
+ }
+}
+
+void verify_ctdb_sock_addr(ctdb_sock_addr *p1, ctdb_sock_addr *p2)
+{
+ assert(p1->sa.sa_family == p2->sa.sa_family);
+ if (p1->sa.sa_family == AF_INET) {
+ assert(p1->ip.sin_port == p2->ip.sin_port);
+ verify_buffer(&p1->ip.sin_addr, &p2->ip.sin_addr,
+ sizeof(struct in_addr));
+ } else {
+ assert(p1->ip6.sin6_port == p2->ip6.sin6_port);
+ verify_buffer(&p1->ip6.sin6_addr, &p2->ip6.sin6_addr,
+ sizeof(struct in6_addr));
+ }
+}
+
+void fill_ctdb_connection(TALLOC_CTX *mem_ctx, struct ctdb_connection *p)
+{
+ fill_ctdb_sock_addr(mem_ctx, &p->src);
+ fill_ctdb_sock_addr(mem_ctx, &p->dst);
+}
+
+void verify_ctdb_connection(struct ctdb_connection *p1,
+ struct ctdb_connection *p2)
+{
+ verify_ctdb_sock_addr(&p1->src, &p2->src);
+ verify_ctdb_sock_addr(&p1->dst, &p2->dst);
+}
+
+void fill_ctdb_connection_list(TALLOC_CTX *mem_ctx,
+ struct ctdb_connection_list *p)
+{
+ uint32_t i;
+
+ p->num = rand_int(1000);
+ if (p->num > 0) {
+ p->conn = talloc_array(mem_ctx, struct ctdb_connection, p->num);
+ assert(p->conn != NULL);
+ for (i=0; i<p->num; i++) {
+ fill_ctdb_connection(mem_ctx, &p->conn[i]);
+ }
+ } else {
+ p->conn = NULL;
+ }
+}
+
+void verify_ctdb_connection_list(struct ctdb_connection_list *p1,
+ struct ctdb_connection_list *p2)
+{
+ uint32_t i;
+
+ assert(p1->num == p2->num);
+ for (i=0; i<p1->num; i++) {
+ verify_ctdb_connection(&p1->conn[i], &p2->conn[i]);
+ }
+}
+
+void fill_ctdb_tunable(TALLOC_CTX *mem_ctx, struct ctdb_tunable *p)
+{
+ fill_ctdb_string(mem_ctx, &p->name);
+ p->value = rand32();
+}
+
+void verify_ctdb_tunable(struct ctdb_tunable *p1, struct ctdb_tunable *p2)
+{
+ verify_ctdb_string(&p1->name, &p2->name);
+ assert(p1->value == p2->value);
+}
+
+void fill_ctdb_node_flag_change(TALLOC_CTX *mem_ctx,
+ struct ctdb_node_flag_change *p)
+{
+ p->pnn = rand32();
+ p->new_flags = rand32();
+ p->old_flags = rand32();
+}
+
+void verify_ctdb_node_flag_change(struct ctdb_node_flag_change *p1,
+ struct ctdb_node_flag_change *p2)
+{
+ assert(p1->pnn == p2->pnn);
+ assert(p1->new_flags == p2->new_flags);
+ assert(p1->old_flags == p2->old_flags);
+}
+
+void fill_ctdb_var_list(TALLOC_CTX *mem_ctx, struct ctdb_var_list *p)
+{
+ int i;
+
+ p->count = rand_int(100) + 1;
+ p->var = talloc_array(mem_ctx, const char *, p->count);
+ for (i=0; i<p->count; i++) {
+ fill_ctdb_string(p->var, &p->var[i]);
+ }
+}
+
+void verify_ctdb_var_list(struct ctdb_var_list *p1, struct ctdb_var_list *p2)
+{
+ int i;
+
+ assert(p1->count == p2->count);
+ for (i=0; i<p1->count; i++) {
+ verify_ctdb_string(&p1->var[i], &p2->var[i]);
+ }
+}
+
+void fill_ctdb_tunable_list(TALLOC_CTX *mem_ctx, struct ctdb_tunable_list *p)
+{
+ p->max_redirect_count = rand32();
+ p->seqnum_interval = rand32();
+ p->control_timeout = rand32();
+ p->traverse_timeout = rand32();
+ p->keepalive_interval = rand32();
+ p->keepalive_limit = rand32();
+ p->recover_timeout = rand32();
+ p->recover_interval = rand32();
+ p->election_timeout = rand32();
+ p->takeover_timeout = rand32();
+ p->monitor_interval = rand32();
+ p->tickle_update_interval = rand32();
+ p->script_timeout = rand32();
+ p->monitor_timeout_count = rand32();
+ p->script_unhealthy_on_timeout = rand32();
+ p->recovery_grace_period = rand32();
+ p->recovery_ban_period = rand32();
+ p->database_hash_size = rand32();
+ p->database_max_dead = rand32();
+ p->rerecovery_timeout = rand32();
+ p->enable_bans = rand32();
+ p->deterministic_public_ips = rand32();
+ p->reclock_ping_period = rand32();
+ p->no_ip_failback = rand32();
+ p->disable_ip_failover = rand32();
+ p->verbose_memory_names = rand32();
+ p->recd_ping_timeout = rand32();
+ p->recd_ping_failcount = rand32();
+ p->log_latency_ms = rand32();
+ p->reclock_latency_ms = rand32();
+ p->recovery_drop_all_ips = rand32();
+ p->verify_recovery_lock = rand32();
+ p->vacuum_interval = rand32();
+ p->vacuum_max_run_time = rand32();
+ p->repack_limit = rand32();
+ p->vacuum_limit = rand32();
+ p->max_queue_depth_drop_msg = rand32();
+ p->allow_unhealthy_db_read = rand32();
+ p->stat_history_interval = rand32();
+ p->deferred_attach_timeout = rand32();
+ p->vacuum_fast_path_count = rand32();
+ p->lcp2_public_ip_assignment = rand32();
+ p->allow_client_db_attach = rand32();
+ p->recover_pdb_by_seqnum = rand32();
+ p->deferred_rebalance_on_node_add = rand32();
+ p->fetch_collapse = rand32();
+ p->hopcount_make_sticky = rand32();
+ p->sticky_duration = rand32();
+ p->sticky_pindown = rand32();
+ p->no_ip_takeover = rand32();
+ p->db_record_count_warn = rand32();
+ p->db_record_size_warn = rand32();
+ p->db_size_warn = rand32();
+ p->pulldb_preallocation_size = rand32();
+ p->no_ip_host_on_all_disabled = rand32();
+ p->samba3_hack = rand32();
+ p->mutex_enabled = rand32();
+ p->lock_processes_per_db = rand32();
+ p->rec_buffer_size_limit = rand32();
+ p->queue_buffer_size = rand32();
+ p->ip_alloc_algorithm = rand32();
+ p->allow_mixed_versions = rand32();
+}
+
+void verify_ctdb_tunable_list(struct ctdb_tunable_list *p1,
+ struct ctdb_tunable_list *p2)
+{
+ assert(p1->max_redirect_count == p2->max_redirect_count);
+ assert(p1->seqnum_interval == p2->seqnum_interval);
+ assert(p1->control_timeout == p2->control_timeout);
+ assert(p1->traverse_timeout == p2->traverse_timeout);
+ assert(p1->keepalive_interval == p2->keepalive_interval);
+ assert(p1->keepalive_limit == p2->keepalive_limit);
+ assert(p1->recover_timeout == p2->recover_timeout);
+ assert(p1->recover_interval == p2->recover_interval);
+ assert(p1->election_timeout == p2->election_timeout);
+ assert(p1->takeover_timeout == p2->takeover_timeout);
+ assert(p1->monitor_interval == p2->monitor_interval);
+ assert(p1->tickle_update_interval == p2->tickle_update_interval);
+ assert(p1->script_timeout == p2->script_timeout);
+ assert(p1->monitor_timeout_count == p2->monitor_timeout_count);
+ assert(p1->script_unhealthy_on_timeout == p2->script_unhealthy_on_timeout);
+ assert(p1->recovery_grace_period == p2->recovery_grace_period);
+ assert(p1->recovery_ban_period == p2->recovery_ban_period);
+ assert(p1->database_hash_size == p2->database_hash_size);
+ assert(p1->database_max_dead == p2->database_max_dead);
+ assert(p1->rerecovery_timeout == p2->rerecovery_timeout);
+ assert(p1->enable_bans == p2->enable_bans);
+ assert(p1->deterministic_public_ips == p2->deterministic_public_ips);
+ assert(p1->reclock_ping_period == p2->reclock_ping_period);
+ assert(p1->no_ip_failback == p2->no_ip_failback);
+ assert(p1->disable_ip_failover == p2->disable_ip_failover);
+ assert(p1->verbose_memory_names == p2->verbose_memory_names);
+ assert(p1->recd_ping_timeout == p2->recd_ping_timeout);
+ assert(p1->recd_ping_failcount == p2->recd_ping_failcount);
+ assert(p1->log_latency_ms == p2->log_latency_ms);
+ assert(p1->reclock_latency_ms == p2->reclock_latency_ms);
+ assert(p1->recovery_drop_all_ips == p2->recovery_drop_all_ips);
+ assert(p1->verify_recovery_lock == p2->verify_recovery_lock);
+ assert(p1->vacuum_interval == p2->vacuum_interval);
+ assert(p1->vacuum_max_run_time == p2->vacuum_max_run_time);
+ assert(p1->repack_limit == p2->repack_limit);
+ assert(p1->vacuum_limit == p2->vacuum_limit);
+ assert(p1->max_queue_depth_drop_msg == p2->max_queue_depth_drop_msg);
+ assert(p1->allow_unhealthy_db_read == p2->allow_unhealthy_db_read);
+ assert(p1->stat_history_interval == p2->stat_history_interval);
+ assert(p1->deferred_attach_timeout == p2->deferred_attach_timeout);
+ assert(p1->vacuum_fast_path_count == p2->vacuum_fast_path_count);
+ assert(p1->lcp2_public_ip_assignment == p2->lcp2_public_ip_assignment);
+ assert(p1->allow_client_db_attach == p2->allow_client_db_attach);
+ assert(p1->recover_pdb_by_seqnum == p2->recover_pdb_by_seqnum);
+ assert(p1->deferred_rebalance_on_node_add == p2->deferred_rebalance_on_node_add);
+ assert(p1->fetch_collapse == p2->fetch_collapse);
+ assert(p1->hopcount_make_sticky == p2->hopcount_make_sticky);
+ assert(p1->sticky_duration == p2->sticky_duration);
+ assert(p1->sticky_pindown == p2->sticky_pindown);
+ assert(p1->no_ip_takeover == p2->no_ip_takeover);
+ assert(p1->db_record_count_warn == p2->db_record_count_warn);
+ assert(p1->db_record_size_warn == p2->db_record_size_warn);
+ assert(p1->db_size_warn == p2->db_size_warn);
+ assert(p1->pulldb_preallocation_size == p2->pulldb_preallocation_size);
+ assert(p1->no_ip_host_on_all_disabled == p2->no_ip_host_on_all_disabled);
+ assert(p1->samba3_hack == p2->samba3_hack);
+ assert(p1->mutex_enabled == p2->mutex_enabled);
+ assert(p1->lock_processes_per_db == p2->lock_processes_per_db);
+ assert(p1->rec_buffer_size_limit == p2->rec_buffer_size_limit);
+ assert(p1->queue_buffer_size == p2->queue_buffer_size);
+ assert(p1->ip_alloc_algorithm == p2->ip_alloc_algorithm);
+ assert(p1->allow_mixed_versions == p2->allow_mixed_versions);
+}
+
+void fill_ctdb_tickle_list(TALLOC_CTX *mem_ctx, struct ctdb_tickle_list *p)
+{
+ unsigned int i;
+
+ fill_ctdb_sock_addr(mem_ctx, &p->addr);
+ p->num = rand_int(1000);
+ if (p->num > 0) {
+ p->conn = talloc_array(mem_ctx, struct ctdb_connection, p->num);
+ assert(p->conn != NULL);
+ for (i=0; i<p->num; i++) {
+ fill_ctdb_connection(mem_ctx, &p->conn[i]);
+ }
+ } else {
+ p->conn = NULL;
+ }
+}
+
+void verify_ctdb_tickle_list(struct ctdb_tickle_list *p1,
+ struct ctdb_tickle_list *p2)
+{
+ unsigned int i;
+
+ verify_ctdb_sock_addr(&p1->addr, &p2->addr);
+ assert(p1->num == p2->num);
+ for (i=0; i<p1->num; i++) {
+ verify_ctdb_connection(&p1->conn[i], &p2->conn[i]);
+ }
+}
+
+void fill_ctdb_addr_info(TALLOC_CTX *mem_ctx, struct ctdb_addr_info *p)
+{
+ fill_ctdb_sock_addr(mem_ctx, &p->addr);
+ p->mask = rand_int(33);
+ if (rand_int(2) == 0) {
+ p->iface = NULL;
+ } else {
+ fill_ctdb_string(mem_ctx, &p->iface);
+ }
+}
+
+void verify_ctdb_addr_info(struct ctdb_addr_info *p1,
+ struct ctdb_addr_info *p2)
+{
+ verify_ctdb_sock_addr(&p1->addr, &p2->addr);
+ assert(p1->mask == p2->mask);
+ verify_ctdb_string(&p1->iface, &p2->iface);
+}
+
+void fill_ctdb_transdb(TALLOC_CTX *mem_ctx, struct ctdb_transdb *p)
+{
+ p->db_id = rand32();
+ p->tid = rand32();
+}
+
+void verify_ctdb_transdb(struct ctdb_transdb *p1, struct ctdb_transdb *p2)
+{
+ assert(p1->db_id == p2->db_id);
+ assert(p1->tid == p2->tid);
+}
+
+void fill_ctdb_uptime(TALLOC_CTX *mem_ctx, struct ctdb_uptime *p)
+{
+ fill_ctdb_timeval(&p->current_time);
+ fill_ctdb_timeval(&p->ctdbd_start_time);
+ fill_ctdb_timeval(&p->last_recovery_started);
+ fill_ctdb_timeval(&p->last_recovery_finished);
+}
+
+void verify_ctdb_uptime(struct ctdb_uptime *p1, struct ctdb_uptime *p2)
+{
+ verify_ctdb_timeval(&p1->current_time, &p2->current_time);
+ verify_ctdb_timeval(&p1->ctdbd_start_time, &p2->ctdbd_start_time);
+ verify_ctdb_timeval(&p1->last_recovery_started,
+ &p2->last_recovery_started);
+ verify_ctdb_timeval(&p1->last_recovery_finished,
+ &p2->last_recovery_finished);
+}
+
+void fill_ctdb_public_ip(TALLOC_CTX *mem_ctx, struct ctdb_public_ip *p)
+{
+ p->pnn = rand32();
+ fill_ctdb_sock_addr(mem_ctx, &p->addr);
+}
+
+void verify_ctdb_public_ip(struct ctdb_public_ip *p1,
+ struct ctdb_public_ip *p2)
+{
+ assert(p1->pnn == p2->pnn);
+ verify_ctdb_sock_addr(&p1->addr, &p2->addr);
+}
+
+void fill_ctdb_public_ip_list(TALLOC_CTX *mem_ctx,
+ struct ctdb_public_ip_list *p)
+{
+ unsigned int i;
+
+ p->num = rand_int(32);
+ if (p->num > 0) {
+ p->ip = talloc_array(mem_ctx, struct ctdb_public_ip, p->num);
+ assert(p->ip != NULL);
+ for (i=0; i<p->num; i++) {
+ fill_ctdb_public_ip(mem_ctx, &p->ip[i]);
+ }
+ } else {
+ p->ip = NULL;
+ }
+}
+
+void verify_ctdb_public_ip_list(struct ctdb_public_ip_list *p1,
+ struct ctdb_public_ip_list *p2)
+{
+ unsigned int i;
+
+ assert(p1->num == p2->num);
+ for (i=0; i<p1->num; i++) {
+ verify_ctdb_public_ip(&p1->ip[i], &p2->ip[i]);
+ }
+}
+
+void fill_ctdb_node_and_flags(TALLOC_CTX *mem_ctx,
+ struct ctdb_node_and_flags *p)
+{
+ p->pnn = rand32();
+ p->flags = rand32();
+ fill_ctdb_sock_addr(mem_ctx, &p->addr);
+}
+
+void verify_ctdb_node_and_flags(struct ctdb_node_and_flags *p1,
+ struct ctdb_node_and_flags *p2)
+{
+ assert(p1->pnn == p2->pnn);
+ assert(p1->flags == p2->flags);
+ verify_ctdb_sock_addr(&p1->addr, &p2->addr);
+}
+
+void fill_ctdb_node_map(TALLOC_CTX *mem_ctx, struct ctdb_node_map *p)
+{
+ unsigned int i;
+
+ p->num = rand_int(32);
+ if (p->num > 0) {
+ p->node = talloc_array(mem_ctx, struct ctdb_node_and_flags,
+ p->num);
+ assert(p->node != NULL);
+ for (i=0; i<p->num; i++) {
+ fill_ctdb_node_and_flags(mem_ctx, &p->node[i]);
+ }
+ } else {
+ p->node = NULL;
+ }
+}
+
+void verify_ctdb_node_map(struct ctdb_node_map *p1, struct ctdb_node_map *p2)
+{
+ unsigned int i;
+
+ assert(p1->num == p2->num);
+ for (i=0; i<p1->num; i++) {
+ verify_ctdb_node_and_flags(&p1->node[i], &p2->node[i]);
+ }
+}
+
+void fill_ctdb_script(TALLOC_CTX *mem_ctx, struct ctdb_script *p)
+{
+ fill_string(p->name, MAX_SCRIPT_NAME+1);
+ fill_ctdb_timeval(&p->start);
+ fill_ctdb_timeval(&p->finished);
+ p->status = rand32i();
+ fill_string(p->output, MAX_SCRIPT_OUTPUT+1);
+}
+
+void verify_ctdb_script(struct ctdb_script *p1, struct ctdb_script *p2)
+{
+ verify_string(p1->name, p2->name);
+ verify_ctdb_timeval(&p1->start, &p2->start);
+ verify_ctdb_timeval(&p1->finished, &p2->finished);
+ assert(p1->status == p2->status);
+ verify_string(p1->output, p2->output);
+}
+
+void fill_ctdb_script_list(TALLOC_CTX *mem_ctx, struct ctdb_script_list *p)
+{
+ unsigned int i;
+
+ p->num_scripts = rand_int(32);
+ if (p->num_scripts > 0) {
+ p->script = talloc_zero_array(mem_ctx, struct ctdb_script,
+ p->num_scripts);
+ assert(p->script != NULL);
+ for (i=0; i<p->num_scripts; i++) {
+ fill_ctdb_script(mem_ctx, &p->script[i]);
+ }
+ } else {
+ p->script = NULL;
+ }
+}
+
+void verify_ctdb_script_list(struct ctdb_script_list *p1,
+ struct ctdb_script_list *p2)
+{
+ unsigned int i;
+
+ assert(p1->num_scripts == p2->num_scripts);
+ for (i=0; i<p1->num_scripts; i++) {
+ verify_ctdb_script(&p1->script[i], &p2->script[i]);
+ }
+}
+
+void fill_ctdb_ban_state(TALLOC_CTX *mem_ctx, struct ctdb_ban_state *p)
+{
+ p->pnn = rand32();
+ p->time = rand32();
+}
+
+void verify_ctdb_ban_state(struct ctdb_ban_state *p1,
+ struct ctdb_ban_state *p2)
+{
+ assert(p1->pnn == p2->pnn);
+ assert(p1->time == p2->time);
+}
+
+void fill_ctdb_notify_data(TALLOC_CTX *mem_ctx, struct ctdb_notify_data *p)
+{
+ p->srvid = rand64();
+ fill_tdb_data(mem_ctx, &p->data);
+}
+
+void verify_ctdb_notify_data(struct ctdb_notify_data *p1,
+ struct ctdb_notify_data *p2)
+{
+ assert(p1->srvid == p2->srvid);
+ verify_tdb_data(&p1->data, &p2->data);
+}
+
+void fill_ctdb_iface(TALLOC_CTX *mem_ctx, struct ctdb_iface *p)
+{
+ fill_string(p->name, CTDB_IFACE_SIZE+2);
+ p->link_state = rand16();
+ p->references = rand32();
+}
+
+void verify_ctdb_iface(struct ctdb_iface *p1, struct ctdb_iface *p2)
+{
+ verify_string(p1->name, p2->name);
+ assert(p1->link_state == p2->link_state);
+ assert(p1->references == p2->references);
+}
+
+void fill_ctdb_iface_list(TALLOC_CTX *mem_ctx, struct ctdb_iface_list *p)
+{
+ unsigned int i;
+
+ p->num = rand_int(32);
+ if (p->num > 0) {
+ p->iface = talloc_array(mem_ctx, struct ctdb_iface, p->num);
+ assert(p->iface != NULL);
+ for (i=0; i<p->num; i++) {
+ fill_ctdb_iface(mem_ctx, &p->iface[i]);
+ }
+ } else {
+ p->iface = NULL;
+ }
+}
+
+void verify_ctdb_iface_list(struct ctdb_iface_list *p1,
+ struct ctdb_iface_list *p2)
+{
+ unsigned int i;
+
+ assert(p1->num == p2->num);
+ for (i=0; i<p1->num; i++) {
+ verify_ctdb_iface(&p1->iface[i], &p2->iface[i]);
+ }
+}
+
+void fill_ctdb_public_ip_info(TALLOC_CTX *mem_ctx,
+ struct ctdb_public_ip_info *p)
+{
+ fill_ctdb_public_ip(mem_ctx, &p->ip);
+ p->active_idx = rand_int(32) + 1;
+ p->ifaces = talloc(mem_ctx, struct ctdb_iface_list);
+ assert(p->ifaces != NULL);
+ fill_ctdb_iface_list(mem_ctx, p->ifaces);
+}
+
+void verify_ctdb_public_ip_info(struct ctdb_public_ip_info *p1,
+ struct ctdb_public_ip_info *p2)
+{
+ verify_ctdb_public_ip(&p1->ip, &p2->ip);
+ assert(p1->active_idx == p2->active_idx);
+ verify_ctdb_iface_list(p1->ifaces, p2->ifaces);
+}
+
+void fill_ctdb_statistics_list(TALLOC_CTX *mem_ctx,
+ struct ctdb_statistics_list *p)
+{
+ int i;
+
+ p->num = rand_int(10);
+ if (p->num > 0) {
+ p->stats = talloc_zero_array(mem_ctx, struct ctdb_statistics,
+ p->num);
+ assert(p->stats != NULL);
+
+ for (i=0; i<p->num; i++) {
+ fill_ctdb_statistics(mem_ctx, &p->stats[i]);
+ }
+ } else {
+ p->stats = NULL;
+ }
+}
+
+void verify_ctdb_statistics_list(struct ctdb_statistics_list *p1,
+ struct ctdb_statistics_list *p2)
+{
+ int i;
+
+ assert(p1->num == p2->num);
+ for (i=0; i<p1->num; i++) {
+ verify_ctdb_statistics(&p1->stats[i], &p2->stats[i]);
+ }
+}
+
+void fill_ctdb_key_data(TALLOC_CTX *mem_ctx, struct ctdb_key_data *p)
+{
+ p->db_id = rand32();
+ fill_ctdb_ltdb_header(&p->header);
+ fill_tdb_data_nonnull(mem_ctx, &p->key);
+}
+
+void verify_ctdb_key_data(struct ctdb_key_data *p1, struct ctdb_key_data *p2)
+{
+ assert(p1->db_id == p2->db_id);
+ verify_ctdb_ltdb_header(&p1->header, &p2->header);
+ verify_tdb_data(&p1->key, &p2->key);
+}
+
+void fill_ctdb_db_statistics(TALLOC_CTX *mem_ctx,
+ struct ctdb_db_statistics *p)
+{
+ unsigned int i;
+
+ p->locks.num_calls = rand32();
+ p->locks.num_current = rand32();
+ p->locks.num_pending = rand32();
+ p->locks.num_failed = rand32();
+ fill_ctdb_latency_counter(&p->locks.latency);
+ for (i=0; i<MAX_COUNT_BUCKETS; i++) {
+ p->locks.buckets[i] = rand32();
+ }
+
+ fill_ctdb_latency_counter(&p->vacuum.latency);
+
+ p->db_ro_delegations = rand32();
+ p->db_ro_revokes = rand32();
+ for (i=0; i<MAX_COUNT_BUCKETS; i++) {
+ p->hop_count_bucket[i] = rand32();
+ }
+
+ p->num_hot_keys = MAX_HOT_KEYS;
+ for (i=0; i<p->num_hot_keys; i++) {
+ p->hot_keys[i].count = rand32();
+ fill_tdb_data(mem_ctx, &p->hot_keys[i].key);
+ }
+}
+
+void verify_ctdb_db_statistics(struct ctdb_db_statistics *p1,
+ struct ctdb_db_statistics *p2)
+{
+ unsigned int i;
+
+ assert(p1->locks.num_calls == p2->locks.num_calls);
+ assert(p1->locks.num_current == p2->locks.num_current);
+ assert(p1->locks.num_pending == p2->locks.num_pending);
+ assert(p1->locks.num_failed == p2->locks.num_failed);
+ verify_ctdb_latency_counter(&p1->locks.latency, &p2->locks.latency);
+ for (i=0; i<MAX_COUNT_BUCKETS; i++) {
+ assert(p1->locks.buckets[i] == p2->locks.buckets[i]);
+ }
+
+ verify_ctdb_latency_counter(&p1->vacuum.latency, &p2->vacuum.latency);
+
+ assert(p1->db_ro_delegations == p2->db_ro_delegations);
+ assert(p1->db_ro_revokes == p2->db_ro_revokes);
+ for (i=0; i<MAX_COUNT_BUCKETS; i++) {
+ assert(p1->hop_count_bucket[i] == p2->hop_count_bucket[i]);
+ }
+
+ assert(p1->num_hot_keys == p2->num_hot_keys);
+ for (i=0; i<p1->num_hot_keys; i++) {
+ assert(p1->hot_keys[i].count == p2->hot_keys[i].count);
+ verify_tdb_data(&p1->hot_keys[i].key, &p2->hot_keys[i].key);
+ }
+}
+
+void fill_ctdb_pid_srvid(TALLOC_CTX *mem_ctx, struct ctdb_pid_srvid *p)
+{
+ p->pid = rand32();
+ p->srvid = rand64();
+}
+
+void verify_ctdb_pid_srvid(struct ctdb_pid_srvid *p1,
+ struct ctdb_pid_srvid *p2)
+{
+ assert(p1->pid == p2->pid);
+ assert(p1->srvid == p2->srvid);
+}
+
+void fill_ctdb_election_message(TALLOC_CTX *mem_ctx,
+ struct ctdb_election_message *p)
+{
+ p->num_connected = rand_int(32);
+ fill_ctdb_timeval(&p->priority_time);
+ p->pnn = rand_int(32);
+ p->node_flags = rand32();
+}
+
+void verify_ctdb_election_message(struct ctdb_election_message *p1,
+ struct ctdb_election_message *p2)
+{
+ assert(p1->num_connected == p2->num_connected);
+ verify_ctdb_timeval(&p1->priority_time, &p2->priority_time);
+ assert(p1->pnn == p2->pnn);
+ assert(p1->node_flags == p2->node_flags);
+}
+
+void fill_ctdb_srvid_message(TALLOC_CTX *mem_ctx,
+ struct ctdb_srvid_message *p)
+{
+ p->pnn = rand_int(32);
+ p->srvid = rand64();
+}
+
+void verify_ctdb_srvid_message(struct ctdb_srvid_message *p1,
+ struct ctdb_srvid_message *p2)
+{
+ assert(p1->pnn == p2->pnn);
+ assert(p1->srvid == p2->srvid);
+}
+
+void fill_ctdb_disable_message(TALLOC_CTX *mem_ctx,
+ struct ctdb_disable_message *p)
+{
+ p->pnn = rand_int(32);
+ p->srvid = rand64();
+ p->timeout = rand32();
+}
+
+void verify_ctdb_disable_message(struct ctdb_disable_message *p1,
+ struct ctdb_disable_message *p2)
+{
+ assert(p1->pnn == p2->pnn);
+ assert(p1->srvid == p2->srvid);
+ assert(p1->timeout == p2->timeout);
+}
+
+void fill_ctdb_server_id(struct ctdb_server_id *p)
+{
+ p->pid = rand64();
+ p->task_id = rand32();
+ p->vnn = rand_int(32);
+ p->unique_id = rand64();
+}
+
+void verify_ctdb_server_id(struct ctdb_server_id *p1,
+ struct ctdb_server_id *p2)
+{
+ assert(p1->pid == p2->pid);
+ assert(p1->task_id == p2->task_id);
+ assert(p1->vnn == p2->vnn);
+ assert(p1->unique_id == p2->unique_id);
+}
+
+void fill_ctdb_g_lock(struct ctdb_g_lock *p)
+{
+ p->type = rand_int(2);
+ fill_ctdb_server_id(&p->sid);
+}
+
+void verify_ctdb_g_lock(struct ctdb_g_lock *p1, struct ctdb_g_lock *p2)
+{
+ assert(p1->type == p2->type);
+ verify_ctdb_server_id(&p1->sid, &p2->sid);
+}
+
+void fill_ctdb_g_lock_list(TALLOC_CTX *mem_ctx, struct ctdb_g_lock_list *p)
+{
+ unsigned int i;
+
+ p->num = rand_int(20) + 1;
+ p->lock = talloc_zero_array(mem_ctx, struct ctdb_g_lock, p->num);
+ assert(p->lock != NULL);
+ for (i=0; i<p->num; i++) {
+ fill_ctdb_g_lock(&p->lock[i]);
+ }
+}
+
+void verify_ctdb_g_lock_list(struct ctdb_g_lock_list *p1,
+ struct ctdb_g_lock_list *p2)
+{
+ unsigned int i;
+
+ assert(p1->num == p2->num);
+ for (i=0; i<p1->num; i++) {
+ verify_ctdb_g_lock(&p1->lock[i], &p2->lock[i]);
+ }
+}
+
+void fill_sock_packet_header(struct sock_packet_header *p)
+{
+ p->length = rand32();
+ p->reqid = rand32();
+}
+
+void verify_sock_packet_header(struct sock_packet_header *p1,
+ struct sock_packet_header *p2)
+{
+ assert(p1->length == p2->length);
+ assert(p1->reqid == p2->reqid);
+}
diff --git a/ctdb/tests/src/protocol_common.h b/ctdb/tests/src/protocol_common.h
new file mode 100644
index 0000000..171b19b
--- /dev/null
+++ b/ctdb/tests/src/protocol_common.h
@@ -0,0 +1,238 @@
+/*
+ protocol tests - common functions
+
+ Copyright (C) Amitay Isaacs 2015-2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_PROTOCOL_COMMON_H__
+#define __CTDB_PROTOCOL_COMMON_H__
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <tdb.h>
+
+#include "protocol/protocol.h"
+
+#include "tests/src/protocol_common_basic.h"
+
+void fill_tdb_data_nonnull(TALLOC_CTX *mem_ctx, TDB_DATA *p);
+void fill_tdb_data(TALLOC_CTX *mem_ctx, TDB_DATA *p);
+void verify_tdb_data(TDB_DATA *p1, TDB_DATA *p2);
+
+void fill_ctdb_tdb_data(TALLOC_CTX *mem_ctx, TDB_DATA *p);
+void verify_ctdb_tdb_data(TDB_DATA *p1, TDB_DATA *p2);
+
+void fill_ctdb_tdb_datan(TALLOC_CTX *mem_ctx, TDB_DATA *p);
+void verify_ctdb_tdb_datan(TDB_DATA *p1, TDB_DATA *p2);
+
+void fill_ctdb_latency_counter(struct ctdb_latency_counter *p);
+void verify_ctdb_latency_counter(struct ctdb_latency_counter *p1,
+ struct ctdb_latency_counter *p2);
+
+void fill_ctdb_statistics(TALLOC_CTX *mem_ctx, struct ctdb_statistics *p);
+void verify_ctdb_statistics(struct ctdb_statistics *p1,
+ struct ctdb_statistics *p2);
+
+void fill_ctdb_vnn_map(TALLOC_CTX *mem_ctx, struct ctdb_vnn_map *p);
+void verify_ctdb_vnn_map(struct ctdb_vnn_map *p1, struct ctdb_vnn_map *p2);
+
+void fill_ctdb_dbid(TALLOC_CTX *mem_ctx, struct ctdb_dbid *p);
+void verify_ctdb_dbid(struct ctdb_dbid *p1, struct ctdb_dbid *p2);
+
+void fill_ctdb_dbid_map(TALLOC_CTX *mem_ctx, struct ctdb_dbid_map *p);
+void verify_ctdb_dbid_map(struct ctdb_dbid_map *p1, struct ctdb_dbid_map *p2);
+
+void fill_ctdb_pulldb(TALLOC_CTX *mem_ctx, struct ctdb_pulldb *p);
+void verify_ctdb_pulldb(struct ctdb_pulldb *p1, struct ctdb_pulldb *p2);
+
+void fill_ctdb_pulldb_ext(TALLOC_CTX *mem_ctx, struct ctdb_pulldb_ext *p);
+void verify_ctdb_pulldb_ext(struct ctdb_pulldb_ext *p1,
+ struct ctdb_pulldb_ext *p2);
+
+void fill_ctdb_db_vacuum(TALLOC_CTX *mem_ctx, struct ctdb_db_vacuum *p);
+void verify_ctdb_db_vacuum(struct ctdb_db_vacuum *p1,
+ struct ctdb_db_vacuum *p2);
+
+void fill_ctdb_echo_data(TALLOC_CTX *mem_ctx, struct ctdb_echo_data *p);
+void verify_ctdb_echo_data(struct ctdb_echo_data *p1,
+ struct ctdb_echo_data *p2);
+
+void fill_ctdb_ltdb_header(struct ctdb_ltdb_header *p);
+void verify_ctdb_ltdb_header(struct ctdb_ltdb_header *p1,
+ struct ctdb_ltdb_header *p2);
+
+void fill_ctdb_rec_data(TALLOC_CTX *mem_ctx, struct ctdb_rec_data *p);
+void verify_ctdb_rec_data(struct ctdb_rec_data *p1, struct ctdb_rec_data *p2);
+
+void fill_ctdb_rec_buffer(TALLOC_CTX *mem_ctx, struct ctdb_rec_buffer *p);
+void verify_ctdb_rec_buffer(struct ctdb_rec_buffer *p1,
+ struct ctdb_rec_buffer *p2);
+
+void fill_ctdb_traverse_start(TALLOC_CTX *mem_ctx,
+ struct ctdb_traverse_start *p);
+void verify_ctdb_traverse_start(struct ctdb_traverse_start *p1,
+ struct ctdb_traverse_start *p2);
+
+void fill_ctdb_traverse_all(TALLOC_CTX *mem_ctx,
+ struct ctdb_traverse_all *p);
+void verify_ctdb_traverse_all(struct ctdb_traverse_all *p1,
+ struct ctdb_traverse_all *p2);
+
+void fill_ctdb_traverse_start_ext(TALLOC_CTX *mem_ctx,
+ struct ctdb_traverse_start_ext *p);
+void verify_ctdb_traverse_start_ext(struct ctdb_traverse_start_ext *p1,
+ struct ctdb_traverse_start_ext *p2);
+
+void fill_ctdb_traverse_all_ext(TALLOC_CTX *mem_ctx,
+ struct ctdb_traverse_all_ext *p);
+void verify_ctdb_traverse_all_ext(struct ctdb_traverse_all_ext *p1,
+ struct ctdb_traverse_all_ext *p2);
+
+void fill_ctdb_sock_addr(TALLOC_CTX *mem_ctx, ctdb_sock_addr *p);
+void verify_ctdb_sock_addr(ctdb_sock_addr *p1, ctdb_sock_addr *p2);
+
+void fill_ctdb_connection(TALLOC_CTX *mem_ctx, struct ctdb_connection *p);
+void verify_ctdb_connection(struct ctdb_connection *p1,
+ struct ctdb_connection *p2);
+
+void fill_ctdb_connection_list(TALLOC_CTX *mem_ctx,
+ struct ctdb_connection_list *p);
+void verify_ctdb_connection_list(struct ctdb_connection_list *p1,
+ struct ctdb_connection_list *p2);
+
+void fill_ctdb_tunable(TALLOC_CTX *mem_ctx, struct ctdb_tunable *p);
+void verify_ctdb_tunable(struct ctdb_tunable *p1, struct ctdb_tunable *p2);
+
+void fill_ctdb_node_flag_change(TALLOC_CTX *mem_ctx,
+ struct ctdb_node_flag_change *p);
+void verify_ctdb_node_flag_change(struct ctdb_node_flag_change *p1,
+ struct ctdb_node_flag_change *p2);
+
+void fill_ctdb_var_list(TALLOC_CTX *mem_ctx, struct ctdb_var_list *p);
+void verify_ctdb_var_list(struct ctdb_var_list *p1, struct ctdb_var_list *p2);
+
+void fill_ctdb_tunable_list(TALLOC_CTX *mem_ctx, struct ctdb_tunable_list *p);
+void verify_ctdb_tunable_list(struct ctdb_tunable_list *p1,
+ struct ctdb_tunable_list *p2);
+
+void fill_ctdb_tickle_list(TALLOC_CTX *mem_ctx, struct ctdb_tickle_list *p);
+void verify_ctdb_tickle_list(struct ctdb_tickle_list *p1,
+ struct ctdb_tickle_list *p2);
+
+void fill_ctdb_addr_info(TALLOC_CTX *mem_ctx, struct ctdb_addr_info *p);
+void verify_ctdb_addr_info(struct ctdb_addr_info *p1,
+ struct ctdb_addr_info *p2);
+
+void fill_ctdb_transdb(TALLOC_CTX *mem_ctx, struct ctdb_transdb *p);
+void verify_ctdb_transdb(struct ctdb_transdb *p1, struct ctdb_transdb *p2);
+
+void fill_ctdb_uptime(TALLOC_CTX *mem_ctx, struct ctdb_uptime *p);
+void verify_ctdb_uptime(struct ctdb_uptime *p1, struct ctdb_uptime *p2);
+
+void fill_ctdb_public_ip(TALLOC_CTX *mem_ctx, struct ctdb_public_ip *p);
+void verify_ctdb_public_ip(struct ctdb_public_ip *p1,
+ struct ctdb_public_ip *p2);
+
+void fill_ctdb_public_ip_list(TALLOC_CTX *mem_ctx,
+ struct ctdb_public_ip_list *p);
+void verify_ctdb_public_ip_list(struct ctdb_public_ip_list *p1,
+ struct ctdb_public_ip_list *p2);
+
+void fill_ctdb_node_and_flags(TALLOC_CTX *mem_ctx,
+ struct ctdb_node_and_flags *p);
+void verify_ctdb_node_and_flags(struct ctdb_node_and_flags *p1,
+ struct ctdb_node_and_flags *p2);
+
+void fill_ctdb_node_map(TALLOC_CTX *mem_ctx, struct ctdb_node_map *p);
+void verify_ctdb_node_map(struct ctdb_node_map *p1, struct ctdb_node_map *p2);
+
+void fill_ctdb_script(TALLOC_CTX *mem_ctx, struct ctdb_script *p);
+void verify_ctdb_script(struct ctdb_script *p1, struct ctdb_script *p2);
+
+void fill_ctdb_script_list(TALLOC_CTX *mem_ctx, struct ctdb_script_list *p);
+void verify_ctdb_script_list(struct ctdb_script_list *p1,
+ struct ctdb_script_list *p2);
+
+void fill_ctdb_ban_state(TALLOC_CTX *mem_ctx, struct ctdb_ban_state *p);
+void verify_ctdb_ban_state(struct ctdb_ban_state *p1,
+ struct ctdb_ban_state *p2);
+
+void fill_ctdb_notify_data(TALLOC_CTX *mem_ctx, struct ctdb_notify_data *p);
+void verify_ctdb_notify_data(struct ctdb_notify_data *p1,
+ struct ctdb_notify_data *p2);
+
+void fill_ctdb_iface(TALLOC_CTX *mem_ctx, struct ctdb_iface *p);
+void verify_ctdb_iface(struct ctdb_iface *p1, struct ctdb_iface *p2);
+
+void fill_ctdb_iface_list(TALLOC_CTX *mem_ctx, struct ctdb_iface_list *p);
+void verify_ctdb_iface_list(struct ctdb_iface_list *p1,
+ struct ctdb_iface_list *p2);
+
+void fill_ctdb_public_ip_info(TALLOC_CTX *mem_ctx,
+ struct ctdb_public_ip_info *p);
+void verify_ctdb_public_ip_info(struct ctdb_public_ip_info *p1,
+ struct ctdb_public_ip_info *p2);
+
+void fill_ctdb_statistics_list(TALLOC_CTX *mem_ctx,
+ struct ctdb_statistics_list *p);
+void verify_ctdb_statistics_list(struct ctdb_statistics_list *p1,
+ struct ctdb_statistics_list *p2);
+
+void fill_ctdb_key_data(TALLOC_CTX *mem_ctx, struct ctdb_key_data *p);
+void verify_ctdb_key_data(struct ctdb_key_data *p1, struct ctdb_key_data *p2);
+
+void fill_ctdb_db_statistics(TALLOC_CTX *mem_ctx,
+ struct ctdb_db_statistics *p);
+void verify_ctdb_db_statistics(struct ctdb_db_statistics *p1,
+ struct ctdb_db_statistics *p2);
+
+void fill_ctdb_pid_srvid(TALLOC_CTX *mem_ctx, struct ctdb_pid_srvid *p);
+void verify_ctdb_pid_srvid(struct ctdb_pid_srvid *p1,
+ struct ctdb_pid_srvid *p2);
+
+void fill_ctdb_election_message(TALLOC_CTX *mem_ctx,
+ struct ctdb_election_message *p);
+void verify_ctdb_election_message(struct ctdb_election_message *p1,
+ struct ctdb_election_message *p2);
+
+void fill_ctdb_srvid_message(TALLOC_CTX *mem_ctx,
+ struct ctdb_srvid_message *p);
+void verify_ctdb_srvid_message(struct ctdb_srvid_message *p1,
+ struct ctdb_srvid_message *p2);
+
+void fill_ctdb_disable_message(TALLOC_CTX *mem_ctx,
+ struct ctdb_disable_message *p);
+void verify_ctdb_disable_message(struct ctdb_disable_message *p1,
+ struct ctdb_disable_message *p2);
+
+void fill_ctdb_server_id(struct ctdb_server_id *p);
+void verify_ctdb_server_id(struct ctdb_server_id *p1,
+ struct ctdb_server_id *p2);
+
+void fill_ctdb_g_lock(struct ctdb_g_lock *p);
+void verify_ctdb_g_lock(struct ctdb_g_lock *p1, struct ctdb_g_lock *p2);
+
+void fill_ctdb_g_lock_list(TALLOC_CTX *mem_ctx, struct ctdb_g_lock_list *p);
+void verify_ctdb_g_lock_list(struct ctdb_g_lock_list *p1,
+ struct ctdb_g_lock_list *p2);
+
+void fill_sock_packet_header(struct sock_packet_header *p);
+void verify_sock_packet_header(struct sock_packet_header *p1,
+ struct sock_packet_header *p2);
+
+#endif /* __CTDB_PROTOCOL_COMMON_H__ */
diff --git a/ctdb/tests/src/protocol_common_basic.c b/ctdb/tests/src/protocol_common_basic.c
new file mode 100644
index 0000000..7567f7b
--- /dev/null
+++ b/ctdb/tests/src/protocol_common_basic.c
@@ -0,0 +1,305 @@
+/*
+ protocol tests - common functions - basic types
+
+ Copyright (C) Amitay Isaacs 2015-2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/wait.h"
+
+#include <assert.h>
+
+#include "lib/util/fault.h"
+
+#include "tests/src/protocol_common_basic.h"
+
+uint8_t BUFFER[1024*1024];
+
+/*
+ * Functions to generation random data
+ */
+
+int rand_int(int max)
+{
+ return random() % max;
+}
+
+uint8_t rand8(void)
+{
+ uint8_t val = rand_int(256) & 0xff;
+ return val;
+}
+
+uint16_t rand16(void)
+{
+ uint16_t val = rand_int(0xffff) & 0xffff;
+ return val;
+}
+
+int32_t rand32i(void)
+{
+ return INT_MIN + random();
+}
+
+uint32_t rand32(void)
+{
+ return random();
+}
+
+uint64_t rand64(void)
+{
+ uint64_t t = random();
+ t = (t << 32) | random();
+ return t;
+}
+
+double rand_double(void)
+{
+ return 1.0 / rand64();
+}
+
+void fill_buffer(void *p, size_t len)
+{
+ size_t i;
+ uint8_t *ptr = p;
+
+ for (i=0; i<len; i++) {
+ ptr[i] = rand8();
+ }
+}
+
+void verify_buffer(void *p1, void *p2, size_t len)
+{
+ if (len > 0) {
+ assert(memcmp(p1, p2, len) == 0);
+ }
+}
+
+void fill_string(char *p, size_t len)
+{
+ size_t i;
+
+ for (i=0; i<len-1; i++) {
+ p[i] = 'A' + rand_int(26);
+ }
+ p[len-1] = '\0';
+}
+
+void verify_string(const char *p1, const char *p2)
+{
+ assert(strlen(p1) == strlen(p2));
+ assert(strcmp(p1, p2) == 0);
+}
+
+void fill_ctdb_uint8(uint8_t *p)
+{
+ *p = rand8();
+}
+
+void verify_ctdb_uint8(uint8_t *p1, uint8_t *p2)
+{
+ assert(*p1 == *p2);
+}
+
+void fill_ctdb_uint16(uint16_t *p)
+{
+ *p = rand16();
+}
+
+void verify_ctdb_uint16(uint16_t *p1, uint16_t *p2)
+{
+ assert(*p1 == *p2);
+}
+
+void fill_ctdb_int32(int32_t *p)
+{
+ *p = rand32i();
+}
+
+void verify_ctdb_int32(int32_t *p1, int32_t *p2)
+{
+ assert(*p1 == *p2);
+}
+
+void fill_ctdb_uint32(uint32_t *p)
+{
+ *p = rand32();
+}
+
+void verify_ctdb_uint32(uint32_t *p1, uint32_t *p2)
+{
+ assert(*p1 == *p2);
+}
+
+void fill_ctdb_uint64(uint64_t *p)
+{
+ *p = rand64();
+}
+
+void verify_ctdb_uint64(uint64_t *p1, uint64_t *p2)
+{
+ assert(*p1 == *p2);
+}
+
+void fill_ctdb_double(double *p)
+{
+ *p = rand_double();
+}
+
+void verify_ctdb_double(double *p1, double *p2)
+{
+ assert(*p1 == *p2);
+}
+
+void fill_ctdb_bool(bool *p)
+{
+ if (rand_int(2) == 0) {
+ *p = true;
+ } else {
+ *p = false;
+ }
+}
+
+void verify_ctdb_bool(bool *p1, bool *p2)
+{
+ assert(*p1 == *p2);
+}
+
+void fill_ctdb_string(TALLOC_CTX *mem_ctx, const char **p)
+{
+ char *str;
+ int len;
+
+ len = rand_int(1024) + 2;
+ str = talloc_size(mem_ctx, len+1);
+ assert(str != NULL);
+
+ fill_string(str, len);
+ *p = str;
+}
+
+void verify_ctdb_string(const char **p1, const char **p2)
+{
+ if (*p1 == NULL || *p2 == NULL) {
+ assert(*p1 == *p2);
+ } else {
+ verify_string(*p1, *p2);
+ }
+}
+
+void fill_ctdb_stringn(TALLOC_CTX *mem_ctx, const char **p)
+{
+ fill_ctdb_string(mem_ctx, p);
+}
+
+void verify_ctdb_stringn(const char **p1, const char **p2)
+{
+ verify_ctdb_string(p1, p2);
+}
+
+void fill_ctdb_pid(pid_t *p)
+{
+ *p = rand32();
+}
+
+void verify_ctdb_pid(pid_t *p1, pid_t *p2)
+{
+ assert(*p1 == *p2);
+}
+
+void fill_ctdb_timeval(struct timeval *p)
+{
+ p->tv_sec = rand32();
+ p->tv_usec = rand_int(1000000);
+}
+
+void verify_ctdb_timeval(struct timeval *p1, struct timeval *p2)
+{
+ assert(p1->tv_sec == p2->tv_sec);
+ assert(p1->tv_usec == p2->tv_usec);
+}
+
+static unsigned int seed;
+static char protocol_test_iterate_buf[1024];
+
+static void protocol_test_iterate_abort_handler(int sig)
+{
+ struct sigaction act = {
+ .sa_handler = SIG_DFL,
+ };
+
+ fprintf(stderr, "Failed with seed: %d\n", seed);
+ if (protocol_test_iterate_buf[0] != '\0') {
+ fprintf(stderr, " tag: %s\n", protocol_test_iterate_buf);
+ }
+ log_stack_trace();
+ sigaction(SIGABRT, &act, NULL);
+ abort();
+}
+
+void protocol_test_iterate_tag(const char *fmt, ...)
+{
+ va_list ap;
+ int count;
+
+ va_start(ap,fmt);
+ count = vsnprintf(protocol_test_iterate_buf,
+ sizeof(protocol_test_iterate_buf),
+ fmt,
+ ap);
+ va_end(ap);
+
+ assert(count >= 0);
+ protocol_test_iterate_buf[sizeof(protocol_test_iterate_buf) - 1] = '\0';
+}
+
+void protocol_test_iterate(int argc,
+ const char *argv[],
+ void (*test_func)(void))
+{
+ struct sigaction act = {
+ .sa_handler = protocol_test_iterate_abort_handler,
+ };
+ unsigned int min, max;
+
+ if (argc == 2 || argc == 3) {
+ min = atoi(argv[1]);
+
+ if (argc == 3) {
+ max = atoi(argv[2]);
+ if (min >= max) {
+ fprintf(stderr,
+ "%s: min must be less than max\n",
+ argv[0]);
+ exit(1);
+ }
+
+ } else {
+ max = min;
+ }
+ } else {
+ fprintf(stderr, "usage: %s min [max]\n", argv[0]);
+ exit(1);
+ }
+
+ sigaction(SIGABRT, &act, NULL);
+
+ for (seed = min; seed <= max ; seed++) {
+ srandom(seed);
+
+ test_func();
+ }
+}
diff --git a/ctdb/tests/src/protocol_common_basic.h b/ctdb/tests/src/protocol_common_basic.h
new file mode 100644
index 0000000..22a11b3
--- /dev/null
+++ b/ctdb/tests/src/protocol_common_basic.h
@@ -0,0 +1,175 @@
+/*
+ protocol tests - common functions - basic types
+
+ Copyright (C) Amitay Isaacs 2015-2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_PROTOCOL_COMMON_BASIC_H__
+#define __CTDB_PROTOCOL_COMMON_BASIC_H__
+
+#include "replace.h"
+
+#include <talloc.h>
+
+/*
+ * Generate test routines
+ */
+
+#define TEST_FUNC(NAME) test_ ##NAME
+#define FILL_FUNC(NAME) fill_ ##NAME
+#define VERIFY_FUNC(NAME) verify_ ##NAME
+#define LEN_FUNC(NAME) NAME## _len
+#define PUSH_FUNC(NAME) NAME## _push
+#define PULL_FUNC(NAME) NAME## _pull
+
+/*
+ * Test for basic data types that do not need memory allocation
+ * For example - int32_t, uint32_t, uint64_t
+ */
+#define PROTOCOL_TYPE1_TEST(TYPE, NAME) \
+static void TEST_FUNC(NAME)(void) \
+{ \
+ TYPE p1; \
+ TYPE p2; \
+ size_t buflen, np = 0; \
+ int ret; \
+\
+ FILL_FUNC(NAME)(&p1); \
+ buflen = LEN_FUNC(NAME)(&p1); \
+ assert(buflen < sizeof(BUFFER)); \
+ PUSH_FUNC(NAME)(&p1, BUFFER, &np); \
+ assert(np == buflen); \
+ np = 0; \
+ ret = PULL_FUNC(NAME)(BUFFER, buflen, &p2, &np); \
+ assert(ret == 0); \
+ assert(np == buflen); \
+ VERIFY_FUNC(NAME)(&p1, &p2); \
+}
+
+/*
+ * Test for container data types that need memory allocation for sub-elements
+ * For example - TDB_DATA
+ */
+#define PROTOCOL_TYPE2_TEST(TYPE, NAME) \
+static void TEST_FUNC(NAME)(void) \
+{ \
+ TALLOC_CTX *mem_ctx; \
+ TYPE p1; \
+ TYPE p2; \
+ size_t buflen, np = 0; \
+ int ret; \
+\
+ mem_ctx = talloc_new(NULL); \
+ assert(mem_ctx != NULL); \
+ FILL_FUNC(NAME)(mem_ctx, &p1); \
+ buflen = LEN_FUNC(NAME)(&p1); \
+ assert(buflen < sizeof(BUFFER)); \
+ PUSH_FUNC(NAME)(&p1, BUFFER, &np); \
+ assert(np == buflen); \
+ np = 0; \
+ ret = PULL_FUNC(NAME)(BUFFER, buflen, mem_ctx, &p2, &np); \
+ assert(ret == 0); \
+ assert(np == buflen); \
+ VERIFY_FUNC(NAME)(&p1, &p2); \
+ talloc_free(mem_ctx); \
+}
+
+/*
+ * Test for derived data types that need memory allocation
+ * For example - most ctdb structures
+ */
+#define PROTOCOL_TYPE3_TEST(TYPE, NAME) \
+static void TEST_FUNC(NAME)(void) \
+{ \
+ TALLOC_CTX *mem_ctx; \
+ TYPE *p1, *p2; \
+ size_t buflen, np = 0; \
+ int ret; \
+\
+ mem_ctx = talloc_new(NULL); \
+ assert(mem_ctx != NULL); \
+ p1 = talloc_zero(mem_ctx, TYPE); \
+ assert(p1 != NULL); \
+ FILL_FUNC(NAME)(p1, p1); \
+ buflen = LEN_FUNC(NAME)(p1); \
+ assert(buflen < sizeof(BUFFER)); \
+ PUSH_FUNC(NAME)(p1, BUFFER, &np); \
+ assert(np == buflen); \
+ np = 0; \
+ ret = PULL_FUNC(NAME)(BUFFER, buflen, mem_ctx, &p2, &np); \
+ assert(ret == 0); \
+ assert(np == buflen); \
+ VERIFY_FUNC(NAME)(p1, p2); \
+ talloc_free(mem_ctx); \
+}
+
+extern uint8_t BUFFER[1024*1024];
+
+int rand_int(int max);
+uint8_t rand8(void);
+uint16_t rand16(void);
+int32_t rand32i(void);
+uint32_t rand32(void);
+uint64_t rand64(void);
+double rand_double(void);
+
+void fill_buffer(void *p, size_t len);
+void verify_buffer(void *p1, void *p2, size_t len);
+
+void fill_string(char *p, size_t len);
+void verify_string(const char *p1, const char *p2);
+
+void fill_ctdb_uint8(uint8_t *p);
+void verify_ctdb_uint8(uint8_t *p1, uint8_t *p2);
+
+void fill_ctdb_uint16(uint16_t *p);
+void verify_ctdb_uint16(uint16_t *p1, uint16_t *p2);
+
+void fill_ctdb_int32(int32_t *p);
+void verify_ctdb_int32(int32_t *p1, int32_t *p2);
+
+void fill_ctdb_uint32(uint32_t *p);
+void verify_ctdb_uint32(uint32_t *p1, uint32_t *p2);
+
+void fill_ctdb_uint64(uint64_t *p);
+void verify_ctdb_uint64(uint64_t *p1, uint64_t *p2);
+
+void fill_ctdb_double(double *p);
+void verify_ctdb_double(double *p1, double *p2);
+
+void fill_ctdb_bool(bool *p);
+void verify_ctdb_bool(bool *p1, bool *p2);
+
+void fill_ctdb_string(TALLOC_CTX *mem_ctx, const char **p);
+void verify_ctdb_string(const char **p1, const char **p2);
+
+void fill_ctdb_stringn(TALLOC_CTX *mem_ctx, const char **p);
+void verify_ctdb_stringn(const char **p1, const char **p2);
+
+void fill_ctdb_pid(pid_t *p);
+void verify_ctdb_pid(pid_t *p1, pid_t *p2);
+
+void fill_ctdb_timeval(struct timeval *p);
+void verify_ctdb_timeval(struct timeval *p1, struct timeval *p2);
+
+void protocol_test_iterate_tag(const char *fmt, ...) PRINTF_ATTRIBUTE(1,0);
+void protocol_test_iterate(int argc,
+ const char *argv[],
+ void (*test_func)(void));
+
+#endif /* __CTDB_PROTOCOL_COMMON_BASIC_H__ */
+
+
diff --git a/ctdb/tests/src/protocol_common_ctdb.c b/ctdb/tests/src/protocol_common_ctdb.c
new file mode 100644
index 0000000..8a8e114
--- /dev/null
+++ b/ctdb/tests/src/protocol_common_ctdb.c
@@ -0,0 +1,1967 @@
+/*
+ protocol tests - ctdb protocol
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <assert.h>
+
+#include "tests/src/protocol_common.h"
+#include "tests/src/protocol_common_ctdb.h"
+
+/*
+ * Functions to fill and verify protocol structures
+ */
+
+void fill_ctdb_req_header(struct ctdb_req_header *h)
+{
+ h->length = rand32();
+ h->ctdb_magic = rand32();
+ h->ctdb_version = rand32();
+ h->generation = rand32();
+ h->operation = rand32();
+ h->destnode = rand32();
+ h->srcnode = rand32();
+ h->reqid = rand32();
+}
+
+void verify_ctdb_req_header(struct ctdb_req_header *h,
+ struct ctdb_req_header *h2)
+{
+ assert(h->length == h2->length);
+ assert(h->ctdb_magic == h2->ctdb_magic);
+ assert(h->ctdb_version == h2->ctdb_version);
+ assert(h->generation == h2->generation);
+ assert(h->operation == h2->operation);
+ assert(h->destnode == h2->destnode);
+ assert(h->srcnode == h2->srcnode);
+ assert(h->reqid == h2->reqid);
+}
+
+void fill_ctdb_req_call(TALLOC_CTX *mem_ctx, struct ctdb_req_call *c)
+{
+ c->flags = rand32();
+ c->db_id = rand32();
+ c->callid = rand32();
+ c->hopcount = rand32();
+ fill_tdb_data_nonnull(mem_ctx, &c->key);
+ fill_tdb_data(mem_ctx, &c->calldata);
+}
+
+void verify_ctdb_req_call(struct ctdb_req_call *c, struct ctdb_req_call *c2)
+{
+ assert(c->flags == c2->flags);
+ assert(c->db_id == c2->db_id);
+ assert(c->callid == c2->callid);
+ assert(c->hopcount == c2->hopcount);
+ verify_tdb_data(&c->key, &c2->key);
+ verify_tdb_data(&c->calldata, &c2->calldata);
+}
+
+void fill_ctdb_reply_call(TALLOC_CTX *mem_ctx, struct ctdb_reply_call *c)
+{
+ c->status = rand32();
+ fill_tdb_data(mem_ctx, &c->data);
+}
+
+void verify_ctdb_reply_call(struct ctdb_reply_call *c,
+ struct ctdb_reply_call *c2)
+{
+ assert(c->status == c2->status);
+ verify_tdb_data(&c->data, &c2->data);
+}
+
+void fill_ctdb_reply_error(TALLOC_CTX *mem_ctx, struct ctdb_reply_error *c)
+{
+ c->status = rand32();
+ fill_tdb_data(mem_ctx, &c->msg);
+}
+
+void verify_ctdb_reply_error(struct ctdb_reply_error *c,
+ struct ctdb_reply_error *c2)
+{
+ assert(c->status == c2->status);
+ verify_tdb_data(&c->msg, &c2->msg);
+}
+
+void fill_ctdb_req_dmaster(TALLOC_CTX *mem_ctx, struct ctdb_req_dmaster *c)
+{
+ c->db_id = rand32();
+ c->rsn = rand64();
+ c->dmaster = rand32();
+ fill_tdb_data_nonnull(mem_ctx, &c->key);
+ fill_tdb_data(mem_ctx, &c->data);
+}
+
+void verify_ctdb_req_dmaster(struct ctdb_req_dmaster *c,
+ struct ctdb_req_dmaster *c2)
+{
+ assert(c->db_id == c2->db_id);
+ assert(c->rsn == c2->rsn);
+ assert(c->dmaster == c2->dmaster);
+ verify_tdb_data(&c->key, &c2->key);
+ verify_tdb_data(&c->data, &c2->data);
+}
+
+void fill_ctdb_reply_dmaster(TALLOC_CTX *mem_ctx,
+ struct ctdb_reply_dmaster *c)
+{
+ c->db_id = rand32();
+ c->rsn = rand64();
+ fill_tdb_data_nonnull(mem_ctx, &c->key);
+ fill_tdb_data(mem_ctx, &c->data);
+}
+
+void verify_ctdb_reply_dmaster(struct ctdb_reply_dmaster *c,
+ struct ctdb_reply_dmaster *c2)
+{
+ assert(c->db_id == c2->db_id);
+ assert(c->rsn == c2->rsn);
+ verify_tdb_data(&c->key, &c2->key);
+ verify_tdb_data(&c->data, &c2->data);
+}
+
+void fill_ctdb_req_control_data(TALLOC_CTX *mem_ctx,
+ struct ctdb_req_control_data *cd,
+ uint32_t opcode)
+{
+ cd->opcode = opcode;
+ switch (opcode) {
+ case CTDB_CONTROL_PROCESS_EXISTS:
+ cd->data.pid = rand32();
+ break;
+
+ case CTDB_CONTROL_STATISTICS:
+ break;
+
+ case CTDB_CONTROL_PING:
+ break;
+
+ case CTDB_CONTROL_GETDBPATH:
+ cd->data.db_id = rand32();
+ break;
+
+ case CTDB_CONTROL_GETVNNMAP:
+ break;
+
+ case CTDB_CONTROL_SETVNNMAP:
+ cd->data.vnnmap = talloc(mem_ctx, struct ctdb_vnn_map);
+ assert(cd->data.vnnmap != NULL);
+ fill_ctdb_vnn_map(mem_ctx, cd->data.vnnmap);
+ break;
+
+ case CTDB_CONTROL_GET_DEBUG:
+ break;
+
+ case CTDB_CONTROL_SET_DEBUG:
+ cd->data.loglevel = rand_int(5);
+ break;
+
+ case CTDB_CONTROL_GET_DBMAP:
+ break;
+
+ case CTDB_CONTROL_GET_RECMODE:
+ break;
+
+ case CTDB_CONTROL_SET_RECMODE:
+ cd->data.recmode = rand_int(2);
+ break;
+
+ case CTDB_CONTROL_STATISTICS_RESET:
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH:
+ fill_ctdb_string(mem_ctx, &cd->data.db_name);
+ assert(cd->data.db_name != NULL);
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_START:
+ cd->data.traverse_start = talloc(mem_ctx, struct ctdb_traverse_start);
+ assert(cd->data.traverse_start != NULL);
+ fill_ctdb_traverse_start(mem_ctx, cd->data.traverse_start);
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_ALL:
+ cd->data.traverse_all = talloc(mem_ctx, struct ctdb_traverse_all);
+ assert(cd->data.traverse_all != NULL);
+ fill_ctdb_traverse_all(mem_ctx, cd->data.traverse_all);
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_DATA:
+ cd->data.rec_data = talloc(mem_ctx, struct ctdb_rec_data);
+ assert(cd->data.rec_data != NULL);
+ fill_ctdb_rec_data(mem_ctx, cd->data.rec_data);
+ break;
+
+ case CTDB_CONTROL_REGISTER_SRVID:
+ break;
+
+ case CTDB_CONTROL_DEREGISTER_SRVID:
+ break;
+
+ case CTDB_CONTROL_GET_DBNAME:
+ cd->data.db_id = rand32();
+ break;
+
+ case CTDB_CONTROL_ENABLE_SEQNUM:
+ cd->data.db_id = rand32();
+ break;
+
+ case CTDB_CONTROL_UPDATE_SEQNUM:
+ cd->data.db_id = rand32();
+ break;
+
+ case CTDB_CONTROL_DUMP_MEMORY:
+ break;
+
+ case CTDB_CONTROL_GET_PID:
+ break;
+
+ case CTDB_CONTROL_FREEZE:
+ break;
+
+ case CTDB_CONTROL_GET_PNN:
+ break;
+
+ case CTDB_CONTROL_SHUTDOWN:
+ break;
+
+ case CTDB_CONTROL_TCP_CLIENT:
+ cd->data.conn = talloc(mem_ctx, struct ctdb_connection);
+ assert(cd->data.conn != NULL);
+ fill_ctdb_connection(mem_ctx, cd->data.conn);
+ break;
+
+ case CTDB_CONTROL_TCP_ADD:
+ cd->data.conn = talloc(mem_ctx, struct ctdb_connection);
+ assert(cd->data.conn != NULL);
+ fill_ctdb_connection(mem_ctx, cd->data.conn);
+ break;
+
+ case CTDB_CONTROL_TCP_REMOVE:
+ cd->data.conn = talloc(mem_ctx, struct ctdb_connection);
+ assert(cd->data.conn != NULL);
+ fill_ctdb_connection(mem_ctx, cd->data.conn);
+ break;
+
+ case CTDB_CONTROL_STARTUP:
+ break;
+
+ case CTDB_CONTROL_SET_TUNABLE:
+ cd->data.tunable = talloc(mem_ctx, struct ctdb_tunable);
+ assert(cd->data.tunable != NULL);
+ fill_ctdb_tunable(mem_ctx, cd->data.tunable);
+ break;
+
+ case CTDB_CONTROL_GET_TUNABLE:
+ fill_ctdb_string(mem_ctx, &cd->data.tun_var);
+ assert(cd->data.tun_var != NULL);
+ break;
+
+ case CTDB_CONTROL_LIST_TUNABLES:
+ break;
+
+ case CTDB_CONTROL_MODIFY_FLAGS:
+ cd->data.flag_change = talloc(mem_ctx, struct ctdb_node_flag_change);
+ assert(cd->data.flag_change != NULL);
+ fill_ctdb_node_flag_change(mem_ctx, cd->data.flag_change);
+ break;
+
+ case CTDB_CONTROL_GET_ALL_TUNABLES:
+ break;
+
+ case CTDB_CONTROL_GET_TCP_TICKLE_LIST:
+ cd->data.addr = talloc(mem_ctx, ctdb_sock_addr);
+ assert(cd->data.addr != NULL);
+ fill_ctdb_sock_addr(mem_ctx, cd->data.addr);
+ break;
+
+ case CTDB_CONTROL_SET_TCP_TICKLE_LIST:
+ cd->data.tickles = talloc(mem_ctx, struct ctdb_tickle_list);
+ assert(cd->data.tickles != NULL);
+ fill_ctdb_tickle_list(mem_ctx, cd->data.tickles);
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH_PERSISTENT:
+ fill_ctdb_string(mem_ctx, &cd->data.db_name);
+ assert(cd->data.db_name != NULL);
+ break;
+
+ case CTDB_CONTROL_UPDATE_RECORD:
+ cd->data.recbuf = talloc(mem_ctx, struct ctdb_rec_buffer);
+ assert(cd->data.recbuf != NULL);
+ fill_ctdb_rec_buffer(mem_ctx, cd->data.recbuf);
+ break;
+
+ case CTDB_CONTROL_SEND_GRATUITOUS_ARP:
+ cd->data.addr_info = talloc(mem_ctx, struct ctdb_addr_info);
+ assert(cd->data.addr_info != NULL);
+ fill_ctdb_addr_info(mem_ctx, cd->data.addr_info);
+ break;
+
+ case CTDB_CONTROL_WIPE_DATABASE:
+ cd->data.transdb = talloc(mem_ctx, struct ctdb_transdb);
+ assert(cd->data.transdb != NULL);
+ fill_ctdb_transdb(mem_ctx, cd->data.transdb);
+ break;
+
+ case CTDB_CONTROL_UPTIME:
+ break;
+
+ case CTDB_CONTROL_START_RECOVERY:
+ break;
+
+ case CTDB_CONTROL_END_RECOVERY:
+ break;
+
+ case CTDB_CONTROL_RELOAD_NODES_FILE:
+ break;
+
+ case CTDB_CONTROL_TRY_DELETE_RECORDS:
+ cd->data.recbuf = talloc(mem_ctx, struct ctdb_rec_buffer);
+ assert(cd->data.recbuf != NULL);
+ fill_ctdb_rec_buffer(mem_ctx, cd->data.recbuf);
+ break;
+
+ case CTDB_CONTROL_ADD_PUBLIC_IP:
+ cd->data.addr_info = talloc(mem_ctx, struct ctdb_addr_info);
+ assert(cd->data.addr_info != NULL);
+ fill_ctdb_addr_info(mem_ctx, cd->data.addr_info);
+ break;
+
+ case CTDB_CONTROL_DEL_PUBLIC_IP:
+ cd->data.addr_info = talloc(mem_ctx, struct ctdb_addr_info);
+ assert(cd->data.addr_info != NULL);
+ fill_ctdb_addr_info(mem_ctx, cd->data.addr_info);
+ break;
+
+ case CTDB_CONTROL_GET_CAPABILITIES:
+ break;
+
+ case CTDB_CONTROL_RECD_PING:
+ break;
+
+ case CTDB_CONTROL_RELEASE_IP:
+ cd->data.pubip = talloc(mem_ctx, struct ctdb_public_ip);
+ assert(cd->data.pubip != NULL);
+ fill_ctdb_public_ip(mem_ctx, cd->data.pubip);
+ break;
+
+ case CTDB_CONTROL_TAKEOVER_IP:
+ cd->data.pubip = talloc(mem_ctx, struct ctdb_public_ip);
+ assert(cd->data.pubip != NULL);
+ fill_ctdb_public_ip(mem_ctx, cd->data.pubip);
+ break;
+
+ case CTDB_CONTROL_GET_PUBLIC_IPS:
+ break;
+
+ case CTDB_CONTROL_GET_NODEMAP:
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_KILL:
+ cd->data.traverse_start = talloc(mem_ctx, struct ctdb_traverse_start);
+ assert(cd->data.traverse_start != NULL);
+ fill_ctdb_traverse_start(mem_ctx, cd->data.traverse_start);
+ break;
+
+ case CTDB_CONTROL_RECD_RECLOCK_LATENCY:
+ cd->data.reclock_latency = rand_double();
+ break;
+
+ case CTDB_CONTROL_GET_RECLOCK_FILE:
+ break;
+
+ case CTDB_CONTROL_STOP_NODE:
+ break;
+
+ case CTDB_CONTROL_CONTINUE_NODE:
+ break;
+
+ case CTDB_CONTROL_SET_LMASTERROLE:
+ cd->data.role = rand_int(2);
+ break;
+
+ case CTDB_CONTROL_SET_RECMASTERROLE:
+ cd->data.role = rand_int(2);
+ break;
+
+ case CTDB_CONTROL_SET_BAN_STATE:
+ cd->data.ban_state = talloc(mem_ctx, struct ctdb_ban_state);
+ assert(cd->data.ban_state != NULL);
+ fill_ctdb_ban_state(mem_ctx, cd->data.ban_state);
+ break;
+
+ case CTDB_CONTROL_GET_BAN_STATE:
+ break;
+
+ case CTDB_CONTROL_REGISTER_NOTIFY:
+ cd->data.notify = talloc(mem_ctx, struct ctdb_notify_data);
+ assert(cd->data.notify != NULL);
+ fill_ctdb_notify_data(mem_ctx, cd->data.notify);
+ break;
+
+ case CTDB_CONTROL_DEREGISTER_NOTIFY:
+ cd->data.srvid = rand64();
+ break;
+
+ case CTDB_CONTROL_TRANS3_COMMIT:
+ cd->data.recbuf = talloc(mem_ctx, struct ctdb_rec_buffer);
+ assert(cd->data.recbuf != NULL);
+ fill_ctdb_rec_buffer(mem_ctx, cd->data.recbuf);
+ break;
+
+ case CTDB_CONTROL_GET_DB_SEQNUM:
+ cd->data.db_id = rand32();
+ break;
+
+ case CTDB_CONTROL_DB_SET_HEALTHY:
+ cd->data.db_id = rand32();
+ break;
+
+ case CTDB_CONTROL_DB_GET_HEALTH:
+ cd->data.db_id = rand32();
+ break;
+
+ case CTDB_CONTROL_GET_PUBLIC_IP_INFO:
+ cd->data.addr = talloc(mem_ctx, ctdb_sock_addr);
+ assert(cd->data.addr != NULL);
+ fill_ctdb_sock_addr(mem_ctx, cd->data.addr);
+ break;
+
+ case CTDB_CONTROL_GET_IFACES:
+ break;
+
+ case CTDB_CONTROL_SET_IFACE_LINK_STATE:
+ cd->data.iface = talloc(mem_ctx, struct ctdb_iface);
+ assert(cd->data.iface != NULL);
+ fill_ctdb_iface(mem_ctx, cd->data.iface);
+ break;
+
+ case CTDB_CONTROL_TCP_ADD_DELAYED_UPDATE:
+ cd->data.conn = talloc(mem_ctx, struct ctdb_connection);
+ assert(cd->data.conn != NULL);
+ fill_ctdb_connection(mem_ctx, cd->data.conn);
+ break;
+
+ case CTDB_CONTROL_GET_STAT_HISTORY:
+ break;
+
+ case CTDB_CONTROL_SCHEDULE_FOR_DELETION:
+ cd->data.key = talloc(mem_ctx, struct ctdb_key_data);
+ assert(cd->data.key != NULL);
+ fill_ctdb_key_data(mem_ctx, cd->data.key);
+ break;
+
+ case CTDB_CONTROL_SET_DB_READONLY:
+ cd->data.db_id = rand32();
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_START_EXT:
+ cd->data.traverse_start_ext = talloc(mem_ctx, struct ctdb_traverse_start_ext);
+ assert(cd->data.traverse_start_ext != NULL);
+ fill_ctdb_traverse_start_ext(mem_ctx, cd->data.traverse_start_ext);
+ break;
+
+ case CTDB_CONTROL_GET_DB_STATISTICS:
+ cd->data.db_id = rand32();
+ break;
+
+ case CTDB_CONTROL_SET_DB_STICKY:
+ cd->data.db_id = rand32();
+ break;
+
+ case CTDB_CONTROL_RELOAD_PUBLIC_IPS:
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_ALL_EXT:
+ cd->data.traverse_all_ext = talloc(mem_ctx, struct ctdb_traverse_all_ext);
+ assert(cd->data.traverse_all_ext != NULL);
+ fill_ctdb_traverse_all_ext(mem_ctx, cd->data.traverse_all_ext);
+ break;
+
+ case CTDB_CONTROL_IPREALLOCATED:
+ break;
+
+ case CTDB_CONTROL_GET_RUNSTATE:
+ break;
+
+ case CTDB_CONTROL_DB_DETACH:
+ cd->data.db_id = rand32();
+ break;
+
+ case CTDB_CONTROL_GET_NODES_FILE:
+ break;
+
+ case CTDB_CONTROL_DB_FREEZE:
+ cd->data.db_id = rand32();
+ break;
+
+ case CTDB_CONTROL_DB_THAW:
+ cd->data.db_id = rand32();
+ break;
+
+ case CTDB_CONTROL_DB_TRANSACTION_START:
+ cd->data.transdb = talloc(mem_ctx, struct ctdb_transdb);
+ assert(cd->data.transdb != NULL);
+ fill_ctdb_transdb(mem_ctx, cd->data.transdb);
+ break;
+
+ case CTDB_CONTROL_DB_TRANSACTION_COMMIT:
+ cd->data.transdb = talloc(mem_ctx, struct ctdb_transdb);
+ assert(cd->data.transdb != NULL);
+ fill_ctdb_transdb(mem_ctx, cd->data.transdb);
+ break;
+
+ case CTDB_CONTROL_DB_TRANSACTION_CANCEL:
+ cd->data.db_id = rand32();
+ break;
+
+ case CTDB_CONTROL_DB_PULL:
+ cd->data.pulldb_ext = talloc(mem_ctx, struct ctdb_pulldb_ext);
+ assert(cd->data.pulldb_ext != NULL);
+ fill_ctdb_pulldb_ext(mem_ctx, cd->data.pulldb_ext);
+ break;
+
+ case CTDB_CONTROL_DB_PUSH_START:
+ cd->data.pulldb_ext = talloc(mem_ctx, struct ctdb_pulldb_ext);
+ assert(cd->data.pulldb_ext != NULL);
+ fill_ctdb_pulldb_ext(mem_ctx, cd->data.pulldb_ext);
+ break;
+
+ case CTDB_CONTROL_DB_PUSH_CONFIRM:
+ cd->data.db_id = rand32();
+ break;
+
+ case CTDB_CONTROL_DB_OPEN_FLAGS:
+ cd->data.db_id = rand32();
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH_REPLICATED:
+ fill_ctdb_string(mem_ctx, &cd->data.db_name);
+ assert(cd->data.db_name != NULL);
+ break;
+
+ case CTDB_CONTROL_CHECK_PID_SRVID:
+ cd->data.pid_srvid = talloc(mem_ctx, struct ctdb_pid_srvid);
+ assert(cd->data.pid_srvid != NULL);
+ fill_ctdb_pid_srvid(mem_ctx, cd->data.pid_srvid);
+ break;
+
+ case CTDB_CONTROL_TUNNEL_REGISTER:
+ break;
+
+ case CTDB_CONTROL_TUNNEL_DEREGISTER:
+ break;
+
+ case CTDB_CONTROL_VACUUM_FETCH:
+ cd->data.recbuf = talloc(mem_ctx, struct ctdb_rec_buffer);
+ assert(cd->data.recbuf != NULL);
+ fill_ctdb_rec_buffer(mem_ctx, cd->data.recbuf);
+ break;
+
+ case CTDB_CONTROL_DB_VACUUM:
+ cd->data.db_vacuum = talloc(mem_ctx, struct ctdb_db_vacuum);
+ assert(cd->data.db_vacuum != NULL);
+ fill_ctdb_db_vacuum(mem_ctx, cd->data.db_vacuum);
+ break;
+
+ case CTDB_CONTROL_ECHO_DATA:
+ cd->data.echo_data = talloc(mem_ctx, struct ctdb_echo_data);
+ assert(cd->data.echo_data != NULL);
+ fill_ctdb_echo_data(mem_ctx, cd->data.echo_data);
+ break;
+
+ case CTDB_CONTROL_DISABLE_NODE:
+ break;
+
+ case CTDB_CONTROL_ENABLE_NODE:
+ break;
+
+ case CTDB_CONTROL_TCP_CLIENT_DISCONNECTED:
+ cd->data.conn = talloc(mem_ctx, struct ctdb_connection);
+ assert(cd->data.conn != NULL);
+ fill_ctdb_connection(mem_ctx, cd->data.conn);
+ break;
+
+ case CTDB_CONTROL_TCP_CLIENT_PASSED:
+ cd->data.conn = talloc(mem_ctx, struct ctdb_connection);
+ assert(cd->data.conn != NULL);
+ fill_ctdb_connection(mem_ctx, cd->data.conn);
+ break;
+
+ }
+}
+
+void verify_ctdb_req_control_data(struct ctdb_req_control_data *cd,
+ struct ctdb_req_control_data *cd2)
+{
+ assert(cd->opcode == cd2->opcode);
+
+ switch (cd->opcode) {
+ case CTDB_CONTROL_PROCESS_EXISTS:
+ assert(cd->data.pid == cd2->data.pid);
+ break;
+
+ case CTDB_CONTROL_STATISTICS:
+ break;
+
+ case CTDB_CONTROL_PING:
+ break;
+
+ case CTDB_CONTROL_GETDBPATH:
+ assert(cd->data.db_id == cd2->data.db_id);
+ break;
+
+ case CTDB_CONTROL_GETVNNMAP:
+ break;
+
+ case CTDB_CONTROL_SETVNNMAP:
+ verify_ctdb_vnn_map(cd->data.vnnmap, cd2->data.vnnmap);
+ break;
+
+ case CTDB_CONTROL_GET_DEBUG:
+ break;
+
+ case CTDB_CONTROL_SET_DEBUG:
+ assert(cd->data.loglevel == cd2->data.loglevel);
+ break;
+
+ case CTDB_CONTROL_GET_DBMAP:
+ break;
+
+ case CTDB_CONTROL_GET_RECMODE:
+ break;
+
+ case CTDB_CONTROL_SET_RECMODE:
+ assert(cd->data.recmode == cd2->data.recmode);
+ break;
+
+ case CTDB_CONTROL_STATISTICS_RESET:
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH:
+ verify_ctdb_string(&cd->data.db_name, &cd2->data.db_name);
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_START:
+ verify_ctdb_traverse_start(cd->data.traverse_start,
+ cd2->data.traverse_start);
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_ALL:
+ verify_ctdb_traverse_all(cd->data.traverse_all,
+ cd2->data.traverse_all);
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_DATA:
+ verify_ctdb_rec_data(cd->data.rec_data, cd2->data.rec_data);
+ break;
+
+ case CTDB_CONTROL_REGISTER_SRVID:
+ break;
+
+ case CTDB_CONTROL_DEREGISTER_SRVID:
+ break;
+
+ case CTDB_CONTROL_GET_DBNAME:
+ assert(cd->data.db_id == cd2->data.db_id);
+ break;
+
+ case CTDB_CONTROL_ENABLE_SEQNUM:
+ assert(cd->data.db_id == cd2->data.db_id);
+ break;
+
+ case CTDB_CONTROL_UPDATE_SEQNUM:
+ assert(cd->data.db_id == cd2->data.db_id);
+ break;
+
+ case CTDB_CONTROL_DUMP_MEMORY:
+ break;
+
+ case CTDB_CONTROL_GET_PID:
+ break;
+
+ case CTDB_CONTROL_FREEZE:
+ break;
+
+ case CTDB_CONTROL_GET_PNN:
+ break;
+
+ case CTDB_CONTROL_SHUTDOWN:
+ break;
+
+ case CTDB_CONTROL_TCP_CLIENT:
+ verify_ctdb_connection(cd->data.conn, cd2->data.conn);
+ break;
+
+ case CTDB_CONTROL_TCP_ADD:
+ verify_ctdb_connection(cd->data.conn, cd2->data.conn);
+ break;
+
+ case CTDB_CONTROL_TCP_REMOVE:
+ verify_ctdb_connection(cd->data.conn, cd2->data.conn);
+ break;
+
+ case CTDB_CONTROL_STARTUP:
+ break;
+
+ case CTDB_CONTROL_SET_TUNABLE:
+ verify_ctdb_tunable(cd->data.tunable, cd2->data.tunable);
+ break;
+
+ case CTDB_CONTROL_GET_TUNABLE:
+ verify_ctdb_string(&cd->data.tun_var, &cd2->data.tun_var);
+ break;
+
+ case CTDB_CONTROL_LIST_TUNABLES:
+ break;
+
+ case CTDB_CONTROL_MODIFY_FLAGS:
+ verify_ctdb_node_flag_change(cd->data.flag_change,
+ cd2->data.flag_change);
+ break;
+
+ case CTDB_CONTROL_GET_ALL_TUNABLES:
+ break;
+
+ case CTDB_CONTROL_GET_TCP_TICKLE_LIST:
+ verify_ctdb_sock_addr(cd->data.addr, cd2->data.addr);
+ break;
+
+ case CTDB_CONTROL_SET_TCP_TICKLE_LIST:
+ verify_ctdb_tickle_list(cd->data.tickles, cd2->data.tickles);
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH_PERSISTENT:
+ verify_ctdb_string(&cd->data.db_name, &cd2->data.db_name);
+ break;
+
+ case CTDB_CONTROL_UPDATE_RECORD:
+ verify_ctdb_rec_buffer(cd->data.recbuf, cd2->data.recbuf);
+ break;
+
+ case CTDB_CONTROL_SEND_GRATUITOUS_ARP:
+ verify_ctdb_addr_info(cd->data.addr_info, cd2->data.addr_info);
+ break;
+
+ case CTDB_CONTROL_WIPE_DATABASE:
+ verify_ctdb_transdb(cd->data.transdb, cd2->data.transdb);
+ break;
+
+ case CTDB_CONTROL_UPTIME:
+ break;
+
+ case CTDB_CONTROL_START_RECOVERY:
+ break;
+
+ case CTDB_CONTROL_END_RECOVERY:
+ break;
+
+ case CTDB_CONTROL_RELOAD_NODES_FILE:
+ break;
+
+ case CTDB_CONTROL_TRY_DELETE_RECORDS:
+ verify_ctdb_rec_buffer(cd->data.recbuf, cd2->data.recbuf);
+ break;
+
+ case CTDB_CONTROL_ADD_PUBLIC_IP:
+ verify_ctdb_addr_info(cd->data.addr_info, cd2->data.addr_info);
+ break;
+
+ case CTDB_CONTROL_DEL_PUBLIC_IP:
+ verify_ctdb_addr_info(cd->data.addr_info, cd2->data.addr_info);
+ break;
+
+ case CTDB_CONTROL_GET_CAPABILITIES:
+ break;
+
+ case CTDB_CONTROL_RECD_PING:
+ break;
+
+ case CTDB_CONTROL_RELEASE_IP:
+ verify_ctdb_public_ip(cd->data.pubip, cd2->data.pubip);
+ break;
+
+ case CTDB_CONTROL_TAKEOVER_IP:
+ verify_ctdb_public_ip(cd->data.pubip, cd2->data.pubip);
+ break;
+
+ case CTDB_CONTROL_GET_PUBLIC_IPS:
+ break;
+
+ case CTDB_CONTROL_GET_NODEMAP:
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_KILL:
+ verify_ctdb_traverse_start(cd->data.traverse_start,
+ cd2->data.traverse_start);
+ break;
+
+ case CTDB_CONTROL_RECD_RECLOCK_LATENCY:
+ assert(cd->data.reclock_latency == cd2->data.reclock_latency);
+ break;
+
+ case CTDB_CONTROL_GET_RECLOCK_FILE:
+ break;
+
+ case CTDB_CONTROL_STOP_NODE:
+ break;
+
+ case CTDB_CONTROL_CONTINUE_NODE:
+ break;
+
+ case CTDB_CONTROL_SET_LMASTERROLE:
+ assert(cd->data.role == cd2->data.role);
+ break;
+
+ case CTDB_CONTROL_SET_RECMASTERROLE:
+ assert(cd->data.role == cd2->data.role);
+ break;
+
+ case CTDB_CONTROL_SET_BAN_STATE:
+ verify_ctdb_ban_state(cd->data.ban_state, cd2->data.ban_state);
+ break;
+
+ case CTDB_CONTROL_GET_BAN_STATE:
+ break;
+
+ case CTDB_CONTROL_REGISTER_NOTIFY:
+ verify_ctdb_notify_data(cd->data.notify, cd2->data.notify);
+ break;
+
+ case CTDB_CONTROL_DEREGISTER_NOTIFY:
+ assert(cd->data.srvid == cd2->data.srvid);
+ break;
+
+ case CTDB_CONTROL_TRANS3_COMMIT:
+ verify_ctdb_rec_buffer(cd->data.recbuf, cd2->data.recbuf);
+ break;
+
+ case CTDB_CONTROL_GET_DB_SEQNUM:
+ assert(cd->data.db_id == cd2->data.db_id);
+ break;
+
+ case CTDB_CONTROL_DB_SET_HEALTHY:
+ assert(cd->data.db_id == cd2->data.db_id);
+ break;
+
+ case CTDB_CONTROL_DB_GET_HEALTH:
+ assert(cd->data.db_id == cd2->data.db_id);
+ break;
+
+ case CTDB_CONTROL_GET_PUBLIC_IP_INFO:
+ verify_ctdb_sock_addr(cd->data.addr, cd2->data.addr);
+ break;
+
+ case CTDB_CONTROL_GET_IFACES:
+ break;
+
+ case CTDB_CONTROL_SET_IFACE_LINK_STATE:
+ verify_ctdb_iface(cd->data.iface, cd2->data.iface);
+ break;
+
+ case CTDB_CONTROL_TCP_ADD_DELAYED_UPDATE:
+ verify_ctdb_connection(cd->data.conn, cd2->data.conn);
+ break;
+
+ case CTDB_CONTROL_GET_STAT_HISTORY:
+ break;
+
+ case CTDB_CONTROL_SCHEDULE_FOR_DELETION:
+ verify_ctdb_key_data(cd->data.key, cd2->data.key);
+ break;
+
+ case CTDB_CONTROL_SET_DB_READONLY:
+ assert(cd->data.db_id == cd2->data.db_id);
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_START_EXT:
+ verify_ctdb_traverse_start_ext(cd->data.traverse_start_ext,
+ cd2->data.traverse_start_ext);
+ break;
+
+ case CTDB_CONTROL_GET_DB_STATISTICS:
+ assert(cd->data.db_id == cd2->data.db_id);
+ break;
+
+ case CTDB_CONTROL_SET_DB_STICKY:
+ assert(cd->data.db_id == cd2->data.db_id);
+ break;
+
+ case CTDB_CONTROL_RELOAD_PUBLIC_IPS:
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_ALL_EXT:
+ verify_ctdb_traverse_all_ext(cd->data.traverse_all_ext,
+ cd2->data.traverse_all_ext);
+ break;
+
+ case CTDB_CONTROL_IPREALLOCATED:
+ break;
+
+ case CTDB_CONTROL_GET_RUNSTATE:
+ break;
+
+ case CTDB_CONTROL_DB_DETACH:
+ assert(cd->data.db_id == cd2->data.db_id);
+ break;
+
+ case CTDB_CONTROL_GET_NODES_FILE:
+ break;
+
+ case CTDB_CONTROL_DB_FREEZE:
+ assert(cd->data.db_id == cd2->data.db_id);
+ break;
+
+ case CTDB_CONTROL_DB_THAW:
+ assert(cd->data.db_id == cd2->data.db_id);
+ break;
+
+ case CTDB_CONTROL_DB_TRANSACTION_START:
+ verify_ctdb_transdb(cd->data.transdb, cd2->data.transdb);
+ break;
+
+ case CTDB_CONTROL_DB_TRANSACTION_COMMIT:
+ verify_ctdb_transdb(cd->data.transdb, cd2->data.transdb);
+ break;
+
+ case CTDB_CONTROL_DB_TRANSACTION_CANCEL:
+ assert(cd->data.db_id == cd2->data.db_id);
+ break;
+
+ case CTDB_CONTROL_DB_PULL:
+ verify_ctdb_pulldb_ext(cd->data.pulldb_ext,
+ cd2->data.pulldb_ext);
+ break;
+
+ case CTDB_CONTROL_DB_PUSH_START:
+ verify_ctdb_pulldb_ext(cd->data.pulldb_ext,
+ cd2->data.pulldb_ext);
+ break;
+
+ case CTDB_CONTROL_DB_PUSH_CONFIRM:
+ assert(cd->data.db_id == cd2->data.db_id);
+ break;
+
+ case CTDB_CONTROL_DB_OPEN_FLAGS:
+ assert(cd->data.db_id == cd2->data.db_id);
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH_REPLICATED:
+ verify_ctdb_string(&cd->data.db_name, &cd2->data.db_name);
+ break;
+
+ case CTDB_CONTROL_CHECK_PID_SRVID:
+ verify_ctdb_pid_srvid(cd->data.pid_srvid, cd2->data.pid_srvid);
+ break;
+
+ case CTDB_CONTROL_TUNNEL_REGISTER:
+ break;
+
+ case CTDB_CONTROL_TUNNEL_DEREGISTER:
+ break;
+
+ case CTDB_CONTROL_VACUUM_FETCH:
+ verify_ctdb_rec_buffer(cd->data.recbuf, cd2->data.recbuf);
+ break;
+
+ case CTDB_CONTROL_DB_VACUUM:
+ verify_ctdb_db_vacuum(cd->data.db_vacuum, cd2->data.db_vacuum);
+ break;
+
+ case CTDB_CONTROL_ECHO_DATA:
+ verify_ctdb_echo_data(cd->data.echo_data, cd2->data.echo_data);
+ break;
+
+ case CTDB_CONTROL_DISABLE_NODE:
+ break;
+
+ case CTDB_CONTROL_ENABLE_NODE:
+ break;
+
+ case CTDB_CONTROL_TCP_CLIENT_DISCONNECTED:
+ verify_ctdb_connection(cd->data.conn, cd2->data.conn);
+ break;
+
+ case CTDB_CONTROL_TCP_CLIENT_PASSED:
+ verify_ctdb_connection(cd->data.conn, cd2->data.conn);
+ break;
+ }
+}
+
+void fill_ctdb_req_control(TALLOC_CTX *mem_ctx, struct ctdb_req_control *c,
+ uint32_t opcode)
+{
+ c->opcode = opcode;
+ c->pad = rand32();
+ c->srvid = rand64();
+ c->client_id = rand32();
+ c->flags = rand32();
+
+ fill_ctdb_req_control_data(mem_ctx, &c->rdata, opcode);
+}
+
+void verify_ctdb_req_control(struct ctdb_req_control *c,
+ struct ctdb_req_control *c2)
+{
+ assert(c->opcode == c2->opcode);
+ assert(c->pad == c2->pad);
+ assert(c->srvid == c2->srvid);
+ assert(c->client_id == c2->client_id);
+ assert(c->flags == c2->flags);
+
+ verify_ctdb_req_control_data(&c->rdata, &c2->rdata);
+}
+
+void fill_ctdb_reply_control_data(TALLOC_CTX *mem_ctx,
+ struct ctdb_reply_control_data *cd,
+ uint32_t opcode)
+{
+ cd->opcode = opcode;
+
+ switch (opcode) {
+ case CTDB_CONTROL_PROCESS_EXISTS:
+ break;
+
+ case CTDB_CONTROL_STATISTICS:
+ cd->data.stats = talloc(mem_ctx, struct ctdb_statistics);
+ assert(cd->data.stats != NULL);
+ fill_ctdb_statistics(mem_ctx, cd->data.stats);
+ break;
+
+ case CTDB_CONTROL_PING:
+ break;
+
+ case CTDB_CONTROL_GETDBPATH:
+ fill_ctdb_string(mem_ctx, &cd->data.db_path);
+ assert(cd->data.db_path != NULL);
+ break;
+
+ case CTDB_CONTROL_GETVNNMAP:
+ cd->data.vnnmap = talloc(mem_ctx, struct ctdb_vnn_map);
+ assert(cd->data.vnnmap != NULL);
+ fill_ctdb_vnn_map(mem_ctx, cd->data.vnnmap);
+ break;
+
+ case CTDB_CONTROL_SETVNNMAP:
+ break;
+
+ case CTDB_CONTROL_GET_DEBUG:
+ cd->data.loglevel = rand_int(5);
+ break;
+
+ case CTDB_CONTROL_SET_DEBUG:
+ break;
+
+ case CTDB_CONTROL_GET_DBMAP:
+ cd->data.dbmap = talloc(mem_ctx, struct ctdb_dbid_map);
+ assert(cd->data.dbmap != NULL);
+ fill_ctdb_dbid_map(mem_ctx, cd->data.dbmap);
+ break;
+
+ case CTDB_CONTROL_GET_RECMODE:
+ break;
+
+ case CTDB_CONTROL_SET_RECMODE:
+ break;
+
+ case CTDB_CONTROL_STATISTICS_RESET:
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH:
+ cd->data.db_id = rand32();
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_START:
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_ALL:
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_DATA:
+ break;
+
+ case CTDB_CONTROL_REGISTER_SRVID:
+ break;
+
+ case CTDB_CONTROL_DEREGISTER_SRVID:
+ break;
+
+ case CTDB_CONTROL_GET_DBNAME:
+ fill_ctdb_string(mem_ctx, &cd->data.db_name);
+ assert(cd->data.db_name);
+ break;
+
+ case CTDB_CONTROL_ENABLE_SEQNUM:
+ break;
+
+ case CTDB_CONTROL_UPDATE_SEQNUM:
+ break;
+
+ case CTDB_CONTROL_DUMP_MEMORY:
+ fill_ctdb_string(mem_ctx, &cd->data.mem_str);
+ assert(cd->data.mem_str);
+ break;
+
+ case CTDB_CONTROL_GET_PID:
+ break;
+
+ case CTDB_CONTROL_FREEZE:
+ break;
+
+ case CTDB_CONTROL_GET_PNN:
+ break;
+
+ case CTDB_CONTROL_SHUTDOWN:
+ break;
+
+ case CTDB_CONTROL_TCP_CLIENT:
+ break;
+
+ case CTDB_CONTROL_TCP_ADD:
+ break;
+
+ case CTDB_CONTROL_TCP_REMOVE:
+ break;
+
+ case CTDB_CONTROL_STARTUP:
+ break;
+
+ case CTDB_CONTROL_SET_TUNABLE:
+ break;
+
+ case CTDB_CONTROL_GET_TUNABLE:
+ cd->data.tun_value = rand32();
+ break;
+
+ case CTDB_CONTROL_LIST_TUNABLES:
+ cd->data.tun_var_list = talloc(mem_ctx, struct ctdb_var_list);
+ assert(cd->data.tun_var_list != NULL);
+ fill_ctdb_var_list(mem_ctx, cd->data.tun_var_list);
+ break;
+
+ case CTDB_CONTROL_MODIFY_FLAGS:
+ break;
+
+ case CTDB_CONTROL_GET_ALL_TUNABLES:
+ cd->data.tun_list = talloc(mem_ctx, struct ctdb_tunable_list);
+ assert(cd->data.tun_list != NULL);
+ fill_ctdb_tunable_list(mem_ctx, cd->data.tun_list);
+ break;
+
+ case CTDB_CONTROL_GET_TCP_TICKLE_LIST:
+ cd->data.tickles = talloc(mem_ctx, struct ctdb_tickle_list);
+ assert(cd->data.tickles != NULL);
+ fill_ctdb_tickle_list(mem_ctx, cd->data.tickles);
+ break;
+
+ case CTDB_CONTROL_SET_TCP_TICKLE_LIST:
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH_PERSISTENT:
+ cd->data.db_id = rand32();
+ break;
+
+ case CTDB_CONTROL_UPDATE_RECORD:
+ break;
+
+ case CTDB_CONTROL_SEND_GRATUITOUS_ARP:
+ break;
+
+ case CTDB_CONTROL_WIPE_DATABASE:
+ break;
+
+ case CTDB_CONTROL_UPTIME:
+ cd->data.uptime = talloc(mem_ctx, struct ctdb_uptime);
+ assert(cd->data.uptime != NULL);
+ fill_ctdb_uptime(mem_ctx, cd->data.uptime);
+ break;
+
+ case CTDB_CONTROL_START_RECOVERY:
+ break;
+
+ case CTDB_CONTROL_END_RECOVERY:
+ break;
+
+ case CTDB_CONTROL_RELOAD_NODES_FILE:
+ break;
+
+ case CTDB_CONTROL_TRY_DELETE_RECORDS:
+ cd->data.recbuf = talloc(mem_ctx, struct ctdb_rec_buffer);
+ assert(cd->data.recbuf != NULL);
+ fill_ctdb_rec_buffer(mem_ctx, cd->data.recbuf);
+ break;
+
+ case CTDB_CONTROL_ADD_PUBLIC_IP:
+ break;
+
+ case CTDB_CONTROL_DEL_PUBLIC_IP:
+ break;
+
+ case CTDB_CONTROL_GET_CAPABILITIES:
+ cd->data.caps = rand32();
+ break;
+
+ case CTDB_CONTROL_RECD_PING:
+ break;
+
+ case CTDB_CONTROL_RELEASE_IP:
+ break;
+
+ case CTDB_CONTROL_TAKEOVER_IP:
+ break;
+
+ case CTDB_CONTROL_GET_PUBLIC_IPS:
+ cd->data.pubip_list = talloc(mem_ctx, struct ctdb_public_ip_list);
+ assert(cd->data.pubip_list != NULL);
+ fill_ctdb_public_ip_list(mem_ctx, cd->data.pubip_list);
+ break;
+
+ case CTDB_CONTROL_GET_NODEMAP:
+ cd->data.nodemap = talloc(mem_ctx, struct ctdb_node_map);
+ assert(cd->data.nodemap != NULL);
+ fill_ctdb_node_map(mem_ctx, cd->data.nodemap);
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_KILL:
+ break;
+
+ case CTDB_CONTROL_RECD_RECLOCK_LATENCY:
+ break;
+
+ case CTDB_CONTROL_GET_RECLOCK_FILE:
+ fill_ctdb_string(mem_ctx, &cd->data.reclock_file);
+ assert(cd->data.reclock_file != NULL);
+ break;
+
+ case CTDB_CONTROL_STOP_NODE:
+ break;
+
+ case CTDB_CONTROL_CONTINUE_NODE:
+ break;
+
+ case CTDB_CONTROL_SET_LMASTERROLE:
+ break;
+
+ case CTDB_CONTROL_SET_RECMASTERROLE:
+ break;
+
+ case CTDB_CONTROL_SET_BAN_STATE:
+ break;
+
+ case CTDB_CONTROL_GET_BAN_STATE:
+ cd->data.ban_state = talloc(mem_ctx, struct ctdb_ban_state);
+ assert(cd->data.ban_state != NULL);
+ fill_ctdb_ban_state(mem_ctx, cd->data.ban_state);
+ break;
+
+ case CTDB_CONTROL_REGISTER_NOTIFY:
+ break;
+
+ case CTDB_CONTROL_DEREGISTER_NOTIFY:
+ break;
+
+ case CTDB_CONTROL_TRANS3_COMMIT:
+ break;
+
+ case CTDB_CONTROL_GET_DB_SEQNUM:
+ cd->data.seqnum = rand64();
+ break;
+
+ case CTDB_CONTROL_DB_SET_HEALTHY:
+ break;
+
+ case CTDB_CONTROL_DB_GET_HEALTH:
+ fill_ctdb_string(mem_ctx, &cd->data.reason);
+ assert(cd->data.reason != NULL);
+ break;
+
+ case CTDB_CONTROL_GET_PUBLIC_IP_INFO:
+ cd->data.ipinfo = talloc(mem_ctx, struct ctdb_public_ip_info);
+ assert(cd->data.ipinfo != NULL);
+ fill_ctdb_public_ip_info(mem_ctx, cd->data.ipinfo);
+ break;
+
+ case CTDB_CONTROL_GET_IFACES:
+ cd->data.iface_list = talloc(mem_ctx, struct ctdb_iface_list);
+ assert(cd->data.iface_list != NULL);
+ fill_ctdb_iface_list(mem_ctx, cd->data.iface_list);
+ break;
+
+ case CTDB_CONTROL_SET_IFACE_LINK_STATE:
+ break;
+
+ case CTDB_CONTROL_TCP_ADD_DELAYED_UPDATE:
+ break;
+
+ case CTDB_CONTROL_GET_STAT_HISTORY:
+ cd->data.stats_list = talloc(mem_ctx, struct ctdb_statistics_list);
+ assert(cd->data.stats_list != NULL);
+ fill_ctdb_statistics_list(mem_ctx, cd->data.stats_list);
+ break;
+
+ case CTDB_CONTROL_SCHEDULE_FOR_DELETION:
+ break;
+
+ case CTDB_CONTROL_SET_DB_READONLY:
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_START_EXT:
+ break;
+
+ case CTDB_CONTROL_GET_DB_STATISTICS:
+ cd->data.dbstats = talloc(mem_ctx, struct ctdb_db_statistics);
+ assert(cd->data.dbstats != NULL);
+ fill_ctdb_db_statistics(mem_ctx, cd->data.dbstats);
+ break;
+
+ case CTDB_CONTROL_SET_DB_STICKY:
+ break;
+
+ case CTDB_CONTROL_RELOAD_PUBLIC_IPS:
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_ALL_EXT:
+ break;
+
+ case CTDB_CONTROL_IPREALLOCATED:
+ break;
+
+ case CTDB_CONTROL_GET_RUNSTATE:
+ cd->data.runstate = rand32();
+ break;
+
+ case CTDB_CONTROL_DB_DETACH:
+ break;
+
+ case CTDB_CONTROL_GET_NODES_FILE:
+ cd->data.nodemap = talloc(mem_ctx, struct ctdb_node_map);
+ assert(cd->data.nodemap != NULL);
+ fill_ctdb_node_map(mem_ctx, cd->data.nodemap);
+ break;
+
+ case CTDB_CONTROL_DB_PULL:
+ cd->data.num_records = rand32();
+ break;
+
+ case CTDB_CONTROL_DB_PUSH_CONFIRM:
+ cd->data.num_records = rand32();
+ break;
+
+ case CTDB_CONTROL_DB_OPEN_FLAGS:
+ cd->data.tdb_flags = rand32();
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH_REPLICATED:
+ cd->data.db_id = rand32();
+ break;
+
+ case CTDB_CONTROL_CHECK_PID_SRVID:
+ break;
+
+ case CTDB_CONTROL_TUNNEL_REGISTER:
+ break;
+
+ case CTDB_CONTROL_TUNNEL_DEREGISTER:
+ break;
+
+ case CTDB_CONTROL_VACUUM_FETCH:
+ break;
+
+ case CTDB_CONTROL_DB_VACUUM:
+ break;
+
+ case CTDB_CONTROL_ECHO_DATA:
+ cd->data.echo_data = talloc(mem_ctx, struct ctdb_echo_data);
+ assert(cd->data.echo_data != NULL);
+ fill_ctdb_echo_data(mem_ctx, cd->data.echo_data);
+ break;
+
+ case CTDB_CONTROL_DISABLE_NODE:
+ break;
+
+ case CTDB_CONTROL_ENABLE_NODE:
+ break;
+
+ case CTDB_CONTROL_TCP_CLIENT_DISCONNECTED:
+ break;
+
+ case CTDB_CONTROL_TCP_CLIENT_PASSED:
+ break;
+ }
+}
+
+void verify_ctdb_reply_control_data(struct ctdb_reply_control_data *cd,
+ struct ctdb_reply_control_data *cd2)
+{
+ assert(cd->opcode == cd2->opcode);
+
+ switch (cd->opcode) {
+ case CTDB_CONTROL_PROCESS_EXISTS:
+ break;
+
+ case CTDB_CONTROL_STATISTICS:
+ verify_ctdb_statistics(cd->data.stats, cd2->data.stats);
+ break;
+
+ case CTDB_CONTROL_PING:
+ break;
+
+ case CTDB_CONTROL_GETDBPATH:
+ verify_ctdb_string(&cd->data.db_path, &cd2->data.db_path);
+ break;
+
+ case CTDB_CONTROL_GETVNNMAP:
+ verify_ctdb_vnn_map(cd->data.vnnmap, cd2->data.vnnmap);
+ break;
+
+ case CTDB_CONTROL_SETVNNMAP:
+ break;
+
+ case CTDB_CONTROL_GET_DEBUG:
+ assert(cd->data.loglevel == cd2->data.loglevel);
+ break;
+
+ case CTDB_CONTROL_SET_DEBUG:
+ break;
+
+ case CTDB_CONTROL_GET_DBMAP:
+ verify_ctdb_dbid_map(cd->data.dbmap, cd2->data.dbmap);
+ break;
+
+ case CTDB_CONTROL_GET_RECMODE:
+ break;
+
+ case CTDB_CONTROL_SET_RECMODE:
+ break;
+
+ case CTDB_CONTROL_STATISTICS_RESET:
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH:
+ assert(cd->data.db_id == cd2->data.db_id);
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_START:
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_ALL:
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_DATA:
+ break;
+
+ case CTDB_CONTROL_REGISTER_SRVID:
+ break;
+
+ case CTDB_CONTROL_DEREGISTER_SRVID:
+ break;
+
+ case CTDB_CONTROL_GET_DBNAME:
+ verify_ctdb_string(&cd->data.db_name, &cd2->data.db_name);
+ break;
+
+ case CTDB_CONTROL_ENABLE_SEQNUM:
+ break;
+
+ case CTDB_CONTROL_UPDATE_SEQNUM:
+ break;
+
+ case CTDB_CONTROL_DUMP_MEMORY:
+ verify_ctdb_string(&cd->data.mem_str, &cd2->data.mem_str);
+ break;
+
+ case CTDB_CONTROL_GET_PID:
+ break;
+
+ case CTDB_CONTROL_FREEZE:
+ break;
+
+ case CTDB_CONTROL_GET_PNN:
+ break;
+
+ case CTDB_CONTROL_SHUTDOWN:
+ break;
+
+ case CTDB_CONTROL_TCP_CLIENT:
+ break;
+
+ case CTDB_CONTROL_TCP_ADD:
+ break;
+
+ case CTDB_CONTROL_TCP_REMOVE:
+ break;
+
+ case CTDB_CONTROL_STARTUP:
+ break;
+
+ case CTDB_CONTROL_SET_TUNABLE:
+ break;
+
+ case CTDB_CONTROL_GET_TUNABLE:
+ assert(cd->data.tun_value == cd2->data.tun_value);
+ break;
+
+ case CTDB_CONTROL_LIST_TUNABLES:
+ verify_ctdb_var_list(cd->data.tun_var_list,
+ cd2->data.tun_var_list);
+ break;
+
+ case CTDB_CONTROL_MODIFY_FLAGS:
+ break;
+
+ case CTDB_CONTROL_GET_ALL_TUNABLES:
+ verify_ctdb_tunable_list(cd->data.tun_list, cd2->data.tun_list);
+ break;
+
+ case CTDB_CONTROL_GET_TCP_TICKLE_LIST:
+ verify_ctdb_tickle_list(cd->data.tickles, cd2->data.tickles);
+ break;
+
+ case CTDB_CONTROL_SET_TCP_TICKLE_LIST:
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH_PERSISTENT:
+ assert(cd->data.db_id == cd2->data.db_id);
+ break;
+
+ case CTDB_CONTROL_UPDATE_RECORD:
+ break;
+
+ case CTDB_CONTROL_SEND_GRATUITOUS_ARP:
+ break;
+
+ case CTDB_CONTROL_WIPE_DATABASE:
+ break;
+
+ case CTDB_CONTROL_UPTIME:
+ verify_ctdb_uptime(cd->data.uptime, cd2->data.uptime);
+ break;
+
+ case CTDB_CONTROL_START_RECOVERY:
+ break;
+
+ case CTDB_CONTROL_END_RECOVERY:
+ break;
+
+ case CTDB_CONTROL_RELOAD_NODES_FILE:
+ break;
+
+ case CTDB_CONTROL_TRY_DELETE_RECORDS:
+ verify_ctdb_rec_buffer(cd->data.recbuf, cd2->data.recbuf);
+ break;
+
+ case CTDB_CONTROL_ADD_PUBLIC_IP:
+ break;
+
+ case CTDB_CONTROL_DEL_PUBLIC_IP:
+ break;
+
+ case CTDB_CONTROL_GET_CAPABILITIES:
+ assert(cd->data.caps == cd2->data.caps);
+ break;
+
+ case CTDB_CONTROL_RECD_PING:
+ break;
+
+ case CTDB_CONTROL_RELEASE_IP:
+ break;
+
+ case CTDB_CONTROL_TAKEOVER_IP:
+ break;
+
+ case CTDB_CONTROL_GET_PUBLIC_IPS:
+ verify_ctdb_public_ip_list(cd->data.pubip_list,
+ cd2->data.pubip_list);
+ break;
+
+ case CTDB_CONTROL_GET_NODEMAP:
+ verify_ctdb_node_map(cd->data.nodemap, cd2->data.nodemap);
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_KILL:
+ break;
+
+ case CTDB_CONTROL_RECD_RECLOCK_LATENCY:
+ break;
+
+ case CTDB_CONTROL_GET_RECLOCK_FILE:
+ verify_ctdb_string(&cd->data.reclock_file,
+ &cd2->data.reclock_file);
+ break;
+
+ case CTDB_CONTROL_STOP_NODE:
+ break;
+
+ case CTDB_CONTROL_CONTINUE_NODE:
+ break;
+
+ case CTDB_CONTROL_SET_LMASTERROLE:
+ break;
+
+ case CTDB_CONTROL_SET_RECMASTERROLE:
+ break;
+
+ case CTDB_CONTROL_SET_BAN_STATE:
+ break;
+
+ case CTDB_CONTROL_GET_BAN_STATE:
+ verify_ctdb_ban_state(cd->data.ban_state, cd2->data.ban_state);
+ break;
+
+ case CTDB_CONTROL_REGISTER_NOTIFY:
+ break;
+
+ case CTDB_CONTROL_DEREGISTER_NOTIFY:
+ break;
+
+ case CTDB_CONTROL_TRANS3_COMMIT:
+ break;
+
+ case CTDB_CONTROL_GET_DB_SEQNUM:
+ assert(cd->data.seqnum == cd2->data.seqnum);
+ break;
+
+ case CTDB_CONTROL_DB_SET_HEALTHY:
+ break;
+
+ case CTDB_CONTROL_DB_GET_HEALTH:
+ verify_ctdb_string(&cd->data.reason, &cd2->data.reason);
+ break;
+
+ case CTDB_CONTROL_GET_PUBLIC_IP_INFO:
+ verify_ctdb_public_ip_info(cd->data.ipinfo, cd2->data.ipinfo);
+ break;
+
+ case CTDB_CONTROL_GET_IFACES:
+ verify_ctdb_iface_list(cd->data.iface_list,
+ cd2->data.iface_list);
+ break;
+
+ case CTDB_CONTROL_SET_IFACE_LINK_STATE:
+ break;
+
+ case CTDB_CONTROL_TCP_ADD_DELAYED_UPDATE:
+ break;
+
+ case CTDB_CONTROL_GET_STAT_HISTORY:
+ verify_ctdb_statistics_list(cd->data.stats_list,
+ cd2->data.stats_list);
+ break;
+
+ case CTDB_CONTROL_SCHEDULE_FOR_DELETION:
+ break;
+
+ case CTDB_CONTROL_SET_DB_READONLY:
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_START_EXT:
+ break;
+
+ case CTDB_CONTROL_GET_DB_STATISTICS:
+ verify_ctdb_db_statistics(cd->data.dbstats, cd2->data.dbstats);
+ break;
+
+ case CTDB_CONTROL_SET_DB_STICKY:
+ break;
+
+ case CTDB_CONTROL_RELOAD_PUBLIC_IPS:
+ break;
+
+ case CTDB_CONTROL_TRAVERSE_ALL_EXT:
+ break;
+
+ case CTDB_CONTROL_IPREALLOCATED:
+ break;
+
+ case CTDB_CONTROL_GET_RUNSTATE:
+ assert(cd->data.runstate == cd2->data.runstate);
+ break;
+
+ case CTDB_CONTROL_DB_DETACH:
+ break;
+
+ case CTDB_CONTROL_GET_NODES_FILE:
+ verify_ctdb_node_map(cd->data.nodemap, cd2->data.nodemap);
+ break;
+
+ case CTDB_CONTROL_DB_PULL:
+ assert(cd->data.num_records == cd2->data.num_records);
+ break;
+
+ case CTDB_CONTROL_DB_PUSH_CONFIRM:
+ assert(cd->data.num_records == cd2->data.num_records);
+ break;
+
+ case CTDB_CONTROL_DB_OPEN_FLAGS:
+ assert(cd->data.tdb_flags == cd2->data.tdb_flags);
+ break;
+
+ case CTDB_CONTROL_DB_ATTACH_REPLICATED:
+ assert(cd->data.db_id == cd2->data.db_id);
+ break;
+
+ case CTDB_CONTROL_CHECK_PID_SRVID:
+ break;
+
+ case CTDB_CONTROL_TUNNEL_REGISTER:
+ break;
+
+ case CTDB_CONTROL_TUNNEL_DEREGISTER:
+ break;
+
+ case CTDB_CONTROL_VACUUM_FETCH:
+ break;
+
+ case CTDB_CONTROL_DB_VACUUM:
+ break;
+
+ case CTDB_CONTROL_ECHO_DATA:
+ verify_ctdb_echo_data(cd->data.echo_data, cd2->data.echo_data);
+ break;
+
+ case CTDB_CONTROL_DISABLE_NODE:
+ break;
+
+ case CTDB_CONTROL_ENABLE_NODE:
+ break;
+
+ case CTDB_CONTROL_TCP_CLIENT_DISCONNECTED:
+ break;
+
+ case CTDB_CONTROL_TCP_CLIENT_PASSED:
+ break;
+ }
+}
+
+void fill_ctdb_reply_control(TALLOC_CTX *mem_ctx,
+ struct ctdb_reply_control *c, uint32_t opcode)
+{
+ c->status = -rand_int(2);
+ if (c->status == 0) {
+ c->errmsg = NULL;
+ fill_ctdb_reply_control_data(mem_ctx, &c->rdata, opcode);
+ } else {
+ fill_ctdb_string(mem_ctx, &c->errmsg);
+ }
+}
+
+void verify_ctdb_reply_control(struct ctdb_reply_control *c,
+ struct ctdb_reply_control *c2)
+{
+ assert(c->status == c2->status);
+ verify_ctdb_string(&c->errmsg, &c2->errmsg);
+ if (c->status == 0) {
+ verify_ctdb_reply_control_data(&c->rdata, &c2->rdata);
+ }
+}
+
+void fill_ctdb_message_data(TALLOC_CTX *mem_ctx, union ctdb_message_data *md,
+ uint64_t srvid)
+{
+ switch (srvid) {
+ case CTDB_SRVID_RECONFIGURE:
+ case CTDB_SRVID_GETLOG:
+ case CTDB_SRVID_CLEARLOG:
+ case CTDB_SRVID_RELOAD_NODES:
+ break;
+
+ case CTDB_SRVID_ELECTION:
+ md->election = talloc(mem_ctx, struct ctdb_election_message);
+ assert(md->election != NULL);
+ fill_ctdb_election_message(md->election, md->election);
+ break;
+
+ case CTDB_SRVID_RELEASE_IP:
+ case CTDB_SRVID_TAKE_IP:
+ fill_ctdb_string(mem_ctx, &md->ipaddr);
+ break;
+
+ case CTDB_SRVID_SET_NODE_FLAGS:
+ case CTDB_SRVID_PUSH_NODE_FLAGS:
+ md->flag_change = talloc(mem_ctx,
+ struct ctdb_node_flag_change);
+ assert(md->flag_change != NULL);
+ fill_ctdb_node_flag_change(md->flag_change, md->flag_change);
+ break;
+
+ case CTDB_SRVID_RECD_UPDATE_IP:
+ md->pubip = talloc(mem_ctx, struct ctdb_public_ip);
+ assert(md->pubip != NULL);
+ fill_ctdb_public_ip(md->pubip, md->pubip);
+ break;
+
+ case CTDB_SRVID_VACUUM_FETCH:
+ md->recbuf = talloc(mem_ctx, struct ctdb_rec_buffer);
+ assert(md->recbuf != NULL);
+ fill_ctdb_rec_buffer(md->recbuf, md->recbuf);
+ break;
+
+ case CTDB_SRVID_DETACH_DATABASE:
+ md->db_id = rand32();
+ break;
+
+ case CTDB_SRVID_MEM_DUMP:
+ case CTDB_SRVID_TAKEOVER_RUN:
+ md->msg = talloc(mem_ctx, struct ctdb_srvid_message);
+ assert(md->msg != NULL);
+ fill_ctdb_srvid_message(md->msg, md->msg);
+ break;
+
+ case CTDB_SRVID_LEADER:
+ case CTDB_SRVID_BANNING:
+ case CTDB_SRVID_REBALANCE_NODE:
+ md->pnn = rand32();
+ break;
+
+ case CTDB_SRVID_DISABLE_TAKEOVER_RUNS:
+ case CTDB_SRVID_DISABLE_RECOVERIES:
+ md->disable = talloc(mem_ctx, struct ctdb_disable_message);
+ assert(md->disable != NULL);
+ fill_ctdb_disable_message(md->disable, md->disable);
+ break;
+
+ case CTDB_SRVID_DISABLE_IP_CHECK:
+ md->timeout = rand32();
+ break;
+
+ default:
+ abort();
+ }
+}
+
+void verify_ctdb_message_data(union ctdb_message_data *md,
+ union ctdb_message_data *md2, uint64_t srvid)
+{
+ switch (srvid) {
+ case CTDB_SRVID_RECONFIGURE:
+ case CTDB_SRVID_GETLOG:
+ case CTDB_SRVID_CLEARLOG:
+ case CTDB_SRVID_RELOAD_NODES:
+ break;
+
+ case CTDB_SRVID_ELECTION:
+ verify_ctdb_election_message(md->election, md2->election);
+ break;
+
+ case CTDB_SRVID_RELEASE_IP:
+ case CTDB_SRVID_TAKE_IP:
+ verify_ctdb_string(&md->ipaddr, &md2->ipaddr);
+ break;
+
+ case CTDB_SRVID_SET_NODE_FLAGS:
+ case CTDB_SRVID_PUSH_NODE_FLAGS:
+ verify_ctdb_node_flag_change(md->flag_change,
+ md2->flag_change);
+ break;
+
+ case CTDB_SRVID_RECD_UPDATE_IP:
+ verify_ctdb_public_ip(md->pubip, md2->pubip);
+ break;
+
+ case CTDB_SRVID_VACUUM_FETCH:
+ verify_ctdb_rec_buffer(md->recbuf, md2->recbuf);
+ break;
+
+ case CTDB_SRVID_DETACH_DATABASE:
+ assert(md->db_id == md2->db_id);
+ break;
+
+ case CTDB_SRVID_MEM_DUMP:
+ case CTDB_SRVID_TAKEOVER_RUN:
+ verify_ctdb_srvid_message(md->msg, md2->msg);
+ break;
+
+ case CTDB_SRVID_LEADER:
+ case CTDB_SRVID_BANNING:
+ case CTDB_SRVID_REBALANCE_NODE:
+ assert(md->pnn == md2->pnn);
+ break;
+
+ case CTDB_SRVID_DISABLE_TAKEOVER_RUNS:
+ case CTDB_SRVID_DISABLE_RECOVERIES:
+ verify_ctdb_disable_message(md->disable, md2->disable);
+ break;
+
+ case CTDB_SRVID_DISABLE_IP_CHECK:
+ assert(md->timeout == md2->timeout);
+ break;
+
+ default:
+ abort();
+ }
+}
+
+void fill_ctdb_req_message(TALLOC_CTX *mem_ctx, struct ctdb_req_message *c,
+ uint64_t srvid)
+{
+ c->srvid = srvid;
+ fill_ctdb_message_data(mem_ctx, &c->data, srvid);
+}
+
+void verify_ctdb_req_message(struct ctdb_req_message *c,
+ struct ctdb_req_message *c2)
+{
+ assert(c->srvid == c2->srvid);
+ verify_ctdb_message_data(&c->data, &c2->data, c->srvid);
+}
+
+void fill_ctdb_req_message_data(TALLOC_CTX *mem_ctx,
+ struct ctdb_req_message_data *c)
+{
+ c->srvid = rand64();
+ fill_tdb_data(mem_ctx, &c->data);
+}
+
+void verify_ctdb_req_message_data(struct ctdb_req_message_data *c,
+ struct ctdb_req_message_data *c2)
+{
+ assert(c->srvid == c2->srvid);
+ verify_tdb_data(&c->data, &c2->data);
+}
+
+void fill_ctdb_req_keepalive(TALLOC_CTX *mem_ctx,
+ struct ctdb_req_keepalive *c)
+{
+ c->version = rand32();
+ c->uptime = rand32();
+}
+
+void verify_ctdb_req_keepalive(struct ctdb_req_keepalive *c,
+ struct ctdb_req_keepalive *c2)
+{
+ assert(c->version == c2->version);
+ assert(c->uptime == c2->uptime);
+}
+
+void fill_ctdb_req_tunnel(TALLOC_CTX *mem_ctx, struct ctdb_req_tunnel *c)
+{
+ c->tunnel_id = rand64();
+ c->flags = rand32();
+ fill_tdb_data(mem_ctx, &c->data);
+}
+
+void verify_ctdb_req_tunnel(struct ctdb_req_tunnel *c,
+ struct ctdb_req_tunnel *c2)
+{
+ assert(c->tunnel_id == c2->tunnel_id);
+ assert(c->flags == c2->flags);
+ verify_tdb_data(&c->data, &c2->data);
+}
diff --git a/ctdb/tests/src/protocol_common_ctdb.h b/ctdb/tests/src/protocol_common_ctdb.h
new file mode 100644
index 0000000..0681089
--- /dev/null
+++ b/ctdb/tests/src/protocol_common_ctdb.h
@@ -0,0 +1,101 @@
+/*
+ protocol tests - ctdb protocol
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_PROTOCOL_COMMON_CTDB_H__
+#define __CTDB_PROTOCOL_COMMON_CTDB_H__
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <tdb.h>
+
+#include "protocol/protocol.h"
+
+void fill_ctdb_req_header(struct ctdb_req_header *h);
+void verify_ctdb_req_header(struct ctdb_req_header *h,
+ struct ctdb_req_header *h2);
+
+void fill_ctdb_req_call(TALLOC_CTX *mem_ctx, struct ctdb_req_call *c);
+void verify_ctdb_req_call(struct ctdb_req_call *c, struct ctdb_req_call *c2);
+
+void fill_ctdb_reply_call(TALLOC_CTX *mem_ctx, struct ctdb_reply_call *c);
+void verify_ctdb_reply_call(struct ctdb_reply_call *c,
+ struct ctdb_reply_call *c2);
+
+void fill_ctdb_reply_error(TALLOC_CTX *mem_ctx, struct ctdb_reply_error *c);
+void verify_ctdb_reply_error(struct ctdb_reply_error *c,
+ struct ctdb_reply_error *c2);
+
+void fill_ctdb_req_dmaster(TALLOC_CTX *mem_ctx, struct ctdb_req_dmaster *c);
+void verify_ctdb_req_dmaster(struct ctdb_req_dmaster *c,
+ struct ctdb_req_dmaster *c2);
+
+void fill_ctdb_reply_dmaster(TALLOC_CTX *mem_ctx,
+ struct ctdb_reply_dmaster *c);
+void verify_ctdb_reply_dmaster(struct ctdb_reply_dmaster *c,
+ struct ctdb_reply_dmaster *c2);
+
+void fill_ctdb_req_control_data(TALLOC_CTX *mem_ctx,
+ struct ctdb_req_control_data *cd,
+ uint32_t opcode);
+void verify_ctdb_req_control_data(struct ctdb_req_control_data *cd,
+ struct ctdb_req_control_data *cd2);
+
+void fill_ctdb_req_control(TALLOC_CTX *mem_ctx, struct ctdb_req_control *c,
+ uint32_t opcode);
+void verify_ctdb_req_control(struct ctdb_req_control *c,
+ struct ctdb_req_control *c2);
+
+void fill_ctdb_reply_control_data(TALLOC_CTX *mem_ctx,
+ struct ctdb_reply_control_data *cd,
+ uint32_t opcode);
+void verify_ctdb_reply_control_data(struct ctdb_reply_control_data *cd,
+ struct ctdb_reply_control_data *cd2);
+
+void fill_ctdb_reply_control(TALLOC_CTX *mem_ctx,
+ struct ctdb_reply_control *c, uint32_t opcode);
+void verify_ctdb_reply_control(struct ctdb_reply_control *c,
+ struct ctdb_reply_control *c2);
+
+void fill_ctdb_message_data(TALLOC_CTX *mem_ctx, union ctdb_message_data *md,
+ uint64_t srvid);
+void verify_ctdb_message_data(union ctdb_message_data *md,
+ union ctdb_message_data *md2, uint64_t srvid);
+
+void fill_ctdb_req_message(TALLOC_CTX *mem_ctx, struct ctdb_req_message *c,
+ uint64_t srvid);
+void verify_ctdb_req_message(struct ctdb_req_message *c,
+ struct ctdb_req_message *c2);
+
+void fill_ctdb_req_message_data(TALLOC_CTX *mem_ctx,
+ struct ctdb_req_message_data *c);
+void verify_ctdb_req_message_data(struct ctdb_req_message_data *c,
+ struct ctdb_req_message_data *c2);
+
+void fill_ctdb_req_keepalive(TALLOC_CTX *mem_ctx,
+ struct ctdb_req_keepalive *c);
+void verify_ctdb_req_keepalive(struct ctdb_req_keepalive *c,
+ struct ctdb_req_keepalive *c2);
+
+void fill_ctdb_req_tunnel(TALLOC_CTX *mem_ctx, struct ctdb_req_tunnel *c);
+void verify_ctdb_req_tunnel(struct ctdb_req_tunnel *c,
+ struct ctdb_req_tunnel *c2);
+
+#endif /* __CTDB_PROTOCOL_COMMON_CTDB_H__ */
diff --git a/ctdb/tests/src/protocol_ctdb_compat_test.c b/ctdb/tests/src/protocol_ctdb_compat_test.c
new file mode 100644
index 0000000..fc9f82e
--- /dev/null
+++ b/ctdb/tests/src/protocol_ctdb_compat_test.c
@@ -0,0 +1,1270 @@
+/*
+ ctdb protocol backward compatibility test
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+
+#include <assert.h>
+
+#include "protocol/protocol_basic.c"
+#include "protocol/protocol_types.c"
+#include "protocol/protocol_header.c"
+#include "protocol/protocol_call.c"
+#include "protocol/protocol_control.c"
+#include "protocol/protocol_message.c"
+#include "protocol/protocol_keepalive.c"
+#include "protocol/protocol_tunnel.c"
+
+#include "tests/src/protocol_common.h"
+#include "tests/src/protocol_common_ctdb.h"
+
+#define COMPAT_TEST_FUNC(NAME) test_ ##NAME## _compat
+#define OLD_LEN_FUNC(NAME) NAME## _len_old
+#define OLD_PUSH_FUNC(NAME) NAME## _push_old
+#define OLD_PULL_FUNC(NAME) NAME## _pull_old
+
+#define COMPAT_CTDB1_TEST(TYPE, NAME) \
+static void COMPAT_TEST_FUNC(NAME)(void) \
+{ \
+ TALLOC_CTX *mem_ctx; \
+ uint8_t *buf1, *buf2; \
+ TYPE p = { 0 }, p1, p2; \
+ size_t buflen1, buflen2, np = 0; \
+ int ret; \
+\
+ mem_ctx = talloc_new(NULL); \
+ assert(mem_ctx != NULL); \
+ FILL_FUNC(NAME)(&p); \
+ buflen1 = LEN_FUNC(NAME)(&p); \
+ buflen2 = OLD_LEN_FUNC(NAME)(&p); \
+ assert(buflen1 == buflen2); \
+ buf1 = talloc_zero_size(mem_ctx, buflen1); \
+ assert(buf1 != NULL); \
+ buf2 = talloc_zero_size(mem_ctx, buflen2); \
+ assert(buf2 != NULL); \
+ PUSH_FUNC(NAME)(&p, buf1, &np); \
+ OLD_PUSH_FUNC(NAME)(&p, buf2); \
+ assert(memcmp(buf1, buf2, buflen1) == 0); \
+ ret = PULL_FUNC(NAME)(buf1, buflen1, &p1, &np); \
+ assert(ret == 0); \
+ ret = OLD_PULL_FUNC(NAME)(buf2, buflen2, &p2); \
+ assert(ret == 0); \
+ VERIFY_FUNC(NAME)(&p1, &p2); \
+ talloc_free(mem_ctx); \
+}
+
+#define COMPAT_CTDB4_TEST(TYPE, NAME, OPER) \
+static void COMPAT_TEST_FUNC(NAME)(void) \
+{ \
+ TALLOC_CTX *mem_ctx; \
+ uint8_t *buf1, *buf2; \
+ struct ctdb_req_header h, h1, h2; \
+ TYPE p = { 0 }, p1, p2; \
+ size_t buflen1, buflen2; \
+ int ret; \
+\
+ mem_ctx = talloc_new(NULL); \
+ assert(mem_ctx != NULL); \
+ fill_ctdb_req_header(&h); \
+ FILL_FUNC(NAME)(mem_ctx, &p); \
+ buflen1 = LEN_FUNC(NAME)(&h, &p); \
+ buflen2 = OLD_LEN_FUNC(NAME)(&h, &p); \
+ assert(buflen1 == buflen2); \
+ buf1 = talloc_zero_size(mem_ctx, buflen1); \
+ assert(buf1 != NULL); \
+ buf2 = talloc_zero_size(mem_ctx, buflen2); \
+ assert(buf2 != NULL); \
+ ret = PUSH_FUNC(NAME)(&h, &p, buf1, &buflen1); \
+ assert(ret == 0); \
+ ret = OLD_PUSH_FUNC(NAME)(&h, &p, buf2, &buflen2); \
+ assert(ret == 0); \
+ assert(memcmp(buf1, buf2, buflen1) == 0); \
+ ret = PULL_FUNC(NAME)(buf1, buflen1, &h1, mem_ctx, &p1); \
+ assert(ret == 0); \
+ ret = OLD_PULL_FUNC(NAME)(buf2, buflen2, &h2, mem_ctx, &p2); \
+ assert(ret == 0); \
+ verify_ctdb_req_header(&h1, &h2); \
+ VERIFY_FUNC(NAME)(&p1, &p2); \
+ talloc_free(mem_ctx); \
+}
+
+#define COMPAT_CTDB5_TEST(TYPE, NAME, OPER) \
+static void COMPAT_TEST_FUNC(NAME)(uint32_t opcode) \
+{ \
+ TALLOC_CTX *mem_ctx; \
+ uint8_t *buf1, *buf2; \
+ struct ctdb_req_header h, h1, h2; \
+ TYPE p = { 0 }, p1, p2; \
+ size_t buflen1, buflen2; \
+ int ret; \
+\
+ mem_ctx = talloc_new(NULL); \
+ assert(mem_ctx != NULL); \
+ fill_ctdb_req_header(&h); \
+ FILL_FUNC(NAME)(mem_ctx, &p, opcode); \
+ buflen1 = LEN_FUNC(NAME)(&h, &p); \
+ buflen2 = OLD_LEN_FUNC(NAME)(&h, &p); \
+ assert(buflen1 == buflen2); \
+ buf1 = talloc_zero_size(mem_ctx, buflen1); \
+ assert(buf1 != NULL); \
+ buf2 = talloc_zero_size(mem_ctx, buflen2); \
+ assert(buf2 != NULL); \
+ ret = PUSH_FUNC(NAME)(&h, &p, buf1, &buflen1); \
+ assert(ret == 0); \
+ ret = OLD_PUSH_FUNC(NAME)(&h, &p, buf2, &buflen2); \
+ assert(ret == 0); \
+ assert(memcmp(buf1, buf2, buflen1) == 0); \
+ ret = PULL_FUNC(NAME)(buf1, buflen1, &h1, mem_ctx, &p1); \
+ assert(ret == 0); \
+ ret = OLD_PULL_FUNC(NAME)(buf2, buflen2, &h2, mem_ctx, &p2); \
+ assert(ret == 0); \
+ verify_ctdb_req_header(&h1, &h2); \
+ VERIFY_FUNC(NAME)(&p1, &p2); \
+ talloc_free(mem_ctx); \
+}
+
+#define COMPAT_CTDB6_TEST(TYPE, NAME, OPER) \
+static void COMPAT_TEST_FUNC(NAME)(uint32_t opcode) \
+{ \
+ TALLOC_CTX *mem_ctx; \
+ uint8_t *buf1, *buf2; \
+ struct ctdb_req_header h, h1, h2; \
+ TYPE p = { 0 }, p1, p2; \
+ size_t buflen1, buflen2; \
+ int ret; \
+\
+ mem_ctx = talloc_new(NULL); \
+ assert(mem_ctx != NULL); \
+ fill_ctdb_req_header(&h); \
+ FILL_FUNC(NAME)(mem_ctx, &p, opcode); \
+ buflen1 = LEN_FUNC(NAME)(&h, &p); \
+ buflen2 = OLD_LEN_FUNC(NAME)(&h, &p); \
+ assert(buflen1 == buflen2); \
+ buf1 = talloc_zero_size(mem_ctx, buflen1); \
+ assert(buf1 != NULL); \
+ buf2 = talloc_zero_size(mem_ctx, buflen2); \
+ assert(buf2 != NULL); \
+ ret = PUSH_FUNC(NAME)(&h, &p, buf1, &buflen1); \
+ assert(ret == 0); \
+ ret = OLD_PUSH_FUNC(NAME)(&h, &p, buf2, &buflen2); \
+ assert(ret == 0); \
+ assert(memcmp(buf1, buf2, buflen1) == 0); \
+ ret = PULL_FUNC(NAME)(buf1, buflen1, opcode, &h1, mem_ctx, &p1); \
+ assert(ret == 0); \
+ ret = OLD_PULL_FUNC(NAME)(buf2, buflen2, opcode, &h2, mem_ctx, &p2); \
+ assert(ret == 0); \
+ verify_ctdb_req_header(&h1, &h2); \
+ VERIFY_FUNC(NAME)(&p1, &p2); \
+ talloc_free(mem_ctx); \
+}
+
+#define COMPAT_CTDB7_TEST(TYPE, NAME, OPER) \
+static void COMPAT_TEST_FUNC(NAME)(uint64_t srvid) \
+{ \
+ TALLOC_CTX *mem_ctx; \
+ uint8_t *buf1, *buf2; \
+ struct ctdb_req_header h, h1, h2; \
+ TYPE p = { 0 }, p1, p2; \
+ size_t buflen1, buflen2; \
+ int ret; \
+\
+ mem_ctx = talloc_new(NULL); \
+ assert(mem_ctx != NULL); \
+ fill_ctdb_req_header(&h); \
+ FILL_FUNC(NAME)(mem_ctx, &p, srvid); \
+ buflen1 = LEN_FUNC(NAME)(&h, &p); \
+ buflen2 = OLD_LEN_FUNC(NAME)(&h, &p); \
+ assert(buflen1 == buflen2); \
+ buf1 = talloc_zero_size(mem_ctx, buflen1); \
+ assert(buf1 != NULL); \
+ buf2 = talloc_zero_size(mem_ctx, buflen2); \
+ assert(buf2 != NULL); \
+ ret = PUSH_FUNC(NAME)(&h, &p, buf1, &buflen1); \
+ assert(ret == 0); \
+ ret = OLD_PUSH_FUNC(NAME)(&h, &p, buf2, &buflen2); \
+ assert(ret == 0); \
+ assert(memcmp(buf1, buf2, buflen1) == 0); \
+ ret = PULL_FUNC(NAME)(buf1, buflen1, &h1, mem_ctx, &p1); \
+ assert(ret == 0); \
+ ret = OLD_PULL_FUNC(NAME)(buf2, buflen2, &h2, mem_ctx, &p2); \
+ assert(ret == 0); \
+ verify_ctdb_req_header(&h1, &h2); \
+ VERIFY_FUNC(NAME)(&p1, &p2); \
+ talloc_free(mem_ctx); \
+}
+
+
+static size_t ctdb_req_header_len_old(struct ctdb_req_header *in)
+{
+ return sizeof(struct ctdb_req_header);
+}
+
+static void ctdb_req_header_push_old(struct ctdb_req_header *in, uint8_t *buf)
+{
+ memcpy(buf, in, sizeof(struct ctdb_req_header));
+}
+
+static int ctdb_req_header_pull_old(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *out)
+{
+ if (buflen < sizeof(struct ctdb_req_header)) {
+ return EMSGSIZE;
+ }
+
+ memcpy(out, buf, sizeof(struct ctdb_req_header));
+ return 0;
+}
+
+struct ctdb_req_call_wire {
+ struct ctdb_req_header hdr;
+ uint32_t flags;
+ uint32_t db_id;
+ uint32_t callid;
+ uint32_t hopcount;
+ uint32_t keylen;
+ uint32_t calldatalen;
+ uint8_t data[1]; /* key[] followed by calldata[] */
+};
+
+static size_t ctdb_req_call_len_old(struct ctdb_req_header *h,
+ struct ctdb_req_call *c)
+{
+ return offsetof(struct ctdb_req_call_wire, data) +
+ ctdb_tdb_data_len(&c->key) +
+ ctdb_tdb_data_len(&c->calldata);
+}
+
+static int ctdb_req_call_push_old(struct ctdb_req_header *h,
+ struct ctdb_req_call *c,
+ uint8_t *buf, size_t *buflen)
+{
+ struct ctdb_req_call_wire *wire =
+ (struct ctdb_req_call_wire *)buf;
+ size_t length, np;
+
+ if (c->key.dsize == 0) {
+ return EINVAL;
+ }
+
+ length = ctdb_req_call_len_old(h, c);
+ if (*buflen < length) {
+ *buflen = length;
+ return EMSGSIZE;
+ }
+
+ h->length = *buflen;
+ ctdb_req_header_push_old(h, (uint8_t *)&wire->hdr);
+
+ wire->flags = c->flags;
+ wire->db_id = c->db_id;
+ wire->callid = c->callid;
+ wire->hopcount = c->hopcount;
+ wire->keylen = ctdb_tdb_data_len(&c->key);
+ wire->calldatalen = ctdb_tdb_data_len(&c->calldata);
+ ctdb_tdb_data_push(&c->key, wire->data, &np);
+ ctdb_tdb_data_push(&c->calldata, wire->data + wire->keylen, &np);
+
+ return 0;
+}
+
+static int ctdb_req_call_pull_old(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_req_call *c)
+{
+ struct ctdb_req_call_wire *wire =
+ (struct ctdb_req_call_wire *)buf;
+ size_t length, np;
+ int ret;
+
+ length = offsetof(struct ctdb_req_call_wire, data);
+ if (buflen < length) {
+ return EMSGSIZE;
+ }
+ if (wire->keylen > buflen || wire->calldatalen > buflen) {
+ return EMSGSIZE;
+ }
+ if (length + wire->keylen < length) {
+ return EMSGSIZE;
+ }
+ if (length + wire->keylen + wire->calldatalen < length) {
+ return EMSGSIZE;
+ }
+ if (buflen < length + wire->keylen + wire->calldatalen) {
+ return EMSGSIZE;
+ }
+
+ if (h != NULL) {
+ ret = ctdb_req_header_pull_old((uint8_t *)&wire->hdr, buflen,
+ h);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ c->flags = wire->flags;
+ c->db_id = wire->db_id;
+ c->callid = wire->callid;
+ c->hopcount = wire->hopcount;
+
+ ret = ctdb_tdb_data_pull(wire->data, wire->keylen, mem_ctx, &c->key,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ret = ctdb_tdb_data_pull(wire->data + wire->keylen, wire->calldatalen,
+ mem_ctx, &c->calldata, &np);
+ if (ret != 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+struct ctdb_reply_call_wire {
+ struct ctdb_req_header hdr;
+ uint32_t status;
+ uint32_t datalen;
+ uint8_t data[1];
+};
+
+static size_t ctdb_reply_call_len_old(struct ctdb_req_header *h,
+ struct ctdb_reply_call *c)
+{
+ return offsetof(struct ctdb_reply_call_wire, data) +
+ ctdb_tdb_data_len(&c->data);
+}
+
+static int ctdb_reply_call_push_old(struct ctdb_req_header *h,
+ struct ctdb_reply_call *c,
+ uint8_t *buf, size_t *buflen)
+{
+ struct ctdb_reply_call_wire *wire =
+ (struct ctdb_reply_call_wire *)buf;
+ size_t length, np;
+
+ length = ctdb_reply_call_len_old(h, c);
+ if (*buflen < length) {
+ *buflen = length;
+ return EMSGSIZE;
+ }
+
+ h->length = *buflen;
+ ctdb_req_header_push_old(h, (uint8_t *)&wire->hdr);
+
+ wire->status = c->status;
+ wire->datalen = ctdb_tdb_data_len(&c->data);
+ ctdb_tdb_data_push(&c->data, wire->data, &np);
+
+ return 0;
+}
+
+static int ctdb_reply_call_pull_old(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_reply_call *c)
+{
+ struct ctdb_reply_call_wire *wire =
+ (struct ctdb_reply_call_wire *)buf;
+ size_t length, np;
+ int ret;
+
+ length = offsetof(struct ctdb_reply_call_wire, data);
+ if (buflen < length) {
+ return EMSGSIZE;
+ }
+ if (wire->datalen > buflen) {
+ return EMSGSIZE;
+ }
+ if (length + wire->datalen < length) {
+ return EMSGSIZE;
+ }
+ if (buflen < length + wire->datalen) {
+ return EMSGSIZE;
+ }
+
+ if (h != NULL) {
+ ret = ctdb_req_header_pull_old((uint8_t *)&wire->hdr, buflen,
+ h);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ c->status = wire->status;
+
+ ret = ctdb_tdb_data_pull(wire->data, wire->datalen, mem_ctx, &c->data,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+struct ctdb_reply_error_wire {
+ struct ctdb_req_header hdr;
+ uint32_t status;
+ uint32_t msglen;
+ uint8_t msg[1];
+};
+
+static size_t ctdb_reply_error_len_old(struct ctdb_req_header *h,
+ struct ctdb_reply_error *c)
+{
+ return offsetof(struct ctdb_reply_error_wire, msg) +
+ ctdb_tdb_data_len(&c->msg);
+}
+
+static int ctdb_reply_error_push_old(struct ctdb_req_header *h,
+ struct ctdb_reply_error *c,
+ uint8_t *buf, size_t *buflen)
+{
+ struct ctdb_reply_error_wire *wire =
+ (struct ctdb_reply_error_wire *)buf;
+ size_t length, np;
+
+ length = ctdb_reply_error_len_old(h, c);
+ if (*buflen < length) {
+ *buflen = length;
+ return EMSGSIZE;
+ }
+
+ h->length = *buflen;
+ ctdb_req_header_push_old(h, (uint8_t *)&wire->hdr);
+
+ wire->status = c->status;
+ wire->msglen = ctdb_tdb_data_len(&c->msg);
+ ctdb_tdb_data_push(&c->msg, wire->msg, &np);
+
+ return 0;
+}
+
+static int ctdb_reply_error_pull_old(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_reply_error *c)
+{
+ struct ctdb_reply_error_wire *wire =
+ (struct ctdb_reply_error_wire *)buf;
+ size_t length, np;
+ int ret;
+
+ length = offsetof(struct ctdb_reply_error_wire, msg);
+ if (buflen < length) {
+ return EMSGSIZE;
+ }
+ if (wire->msglen > buflen) {
+ return EMSGSIZE;
+ }
+ if (length + wire->msglen < length) {
+ return EMSGSIZE;
+ }
+ if (buflen < length + wire->msglen) {
+ return EMSGSIZE;
+ }
+
+ if (h != NULL) {
+ ret = ctdb_req_header_pull_old((uint8_t *)&wire->hdr, buflen,
+ h);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ c->status = wire->status;
+
+ ret = ctdb_tdb_data_pull(wire->msg, wire->msglen, mem_ctx, &c->msg,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+struct ctdb_req_dmaster_wire {
+ struct ctdb_req_header hdr;
+ uint32_t db_id;
+ uint64_t rsn;
+ uint32_t dmaster;
+ uint32_t keylen;
+ uint32_t datalen;
+ uint8_t data[1];
+};
+
+static size_t ctdb_req_dmaster_len_old(struct ctdb_req_header *h,
+ struct ctdb_req_dmaster *c)
+{
+ return offsetof(struct ctdb_req_dmaster_wire, data) +
+ ctdb_tdb_data_len(&c->key) + ctdb_tdb_data_len(&c->data);
+}
+
+static int ctdb_req_dmaster_push_old(struct ctdb_req_header *h,
+ struct ctdb_req_dmaster *c,
+ uint8_t *buf, size_t *buflen)
+{
+ struct ctdb_req_dmaster_wire *wire =
+ (struct ctdb_req_dmaster_wire *)buf;
+ size_t length, np;
+
+ length = ctdb_req_dmaster_len_old(h, c);
+ if (*buflen < length) {
+ *buflen = length;
+ return EMSGSIZE;
+ }
+
+ h->length = *buflen;
+ ctdb_req_header_push_old(h, (uint8_t *)&wire->hdr);
+
+ wire->db_id = c->db_id;
+ wire->rsn = c->rsn;
+ wire->dmaster = c->dmaster;
+ wire->keylen = ctdb_tdb_data_len(&c->key);
+ wire->datalen = ctdb_tdb_data_len(&c->data);
+ ctdb_tdb_data_push(&c->key, wire->data, &np);
+ ctdb_tdb_data_push(&c->data, wire->data + wire->keylen, &np);
+
+ return 0;
+}
+
+static int ctdb_req_dmaster_pull_old(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_req_dmaster *c)
+{
+ struct ctdb_req_dmaster_wire *wire =
+ (struct ctdb_req_dmaster_wire *)buf;
+ size_t length, np;
+ int ret;
+
+ length = offsetof(struct ctdb_req_dmaster_wire, data);
+ if (buflen < length) {
+ return EMSGSIZE;
+ }
+ if (wire->keylen > buflen || wire->datalen > buflen) {
+ return EMSGSIZE;
+ }
+ if (length + wire->keylen < length) {
+ return EMSGSIZE;
+ }
+ if (length + wire->keylen + wire->datalen < length) {
+ return EMSGSIZE;
+ }
+ if (buflen < length + wire->keylen + wire->datalen) {
+ return EMSGSIZE;
+ }
+
+ if (h != NULL) {
+ ret = ctdb_req_header_pull_old((uint8_t *)&wire->hdr, buflen,
+ h);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ c->db_id = wire->db_id;
+ c->rsn = wire->rsn;
+ c->dmaster = wire->dmaster;
+
+ ret = ctdb_tdb_data_pull(wire->data, wire->keylen, mem_ctx, &c->key,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ret = ctdb_tdb_data_pull(wire->data + wire->keylen, wire->datalen,
+ mem_ctx, &c->data, &np);
+ if (ret != 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+struct ctdb_reply_dmaster_wire {
+ struct ctdb_req_header hdr;
+ uint32_t db_id;
+ uint64_t rsn;
+ uint32_t keylen;
+ uint32_t datalen;
+ uint8_t data[1];
+};
+
+static size_t ctdb_reply_dmaster_len_old(struct ctdb_req_header *h,
+ struct ctdb_reply_dmaster *c)
+{
+ return offsetof(struct ctdb_reply_dmaster_wire, data) +
+ ctdb_tdb_data_len(&c->key) + ctdb_tdb_data_len(&c->data);
+}
+
+static int ctdb_reply_dmaster_push_old(struct ctdb_req_header *h,
+ struct ctdb_reply_dmaster *c,
+ uint8_t *buf, size_t *buflen)
+{
+ struct ctdb_reply_dmaster_wire *wire =
+ (struct ctdb_reply_dmaster_wire *)buf;
+ size_t length, np;
+
+ length = ctdb_reply_dmaster_len_old(h, c);
+ if (*buflen < length) {
+ *buflen = length;
+ return EMSGSIZE;
+ }
+
+ h->length = *buflen;
+ ctdb_req_header_push_old(h, (uint8_t *)&wire->hdr);
+
+ wire->db_id = c->db_id;
+ wire->rsn = c->rsn;
+ wire->keylen = ctdb_tdb_data_len(&c->key);
+ wire->datalen = ctdb_tdb_data_len(&c->data);
+ ctdb_tdb_data_push(&c->key, wire->data, &np);
+ ctdb_tdb_data_push(&c->data, wire->data + wire->keylen, &np);
+
+ return 0;
+}
+
+static int ctdb_reply_dmaster_pull_old(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_reply_dmaster *c)
+{
+ struct ctdb_reply_dmaster_wire *wire =
+ (struct ctdb_reply_dmaster_wire *)buf;
+ size_t length, np;
+ int ret;
+
+ length = offsetof(struct ctdb_reply_dmaster_wire, data);
+ if (buflen < length) {
+ return EMSGSIZE;
+ }
+ if (wire->keylen > buflen || wire->datalen > buflen) {
+ return EMSGSIZE;
+ }
+ if (length + wire->keylen < length) {
+ return EMSGSIZE;
+ }
+ if (length + wire->keylen + wire->datalen < length) {
+ return EMSGSIZE;
+ }
+ if (buflen < length + wire->keylen + wire->datalen) {
+ return EMSGSIZE;
+ }
+
+ if (h != NULL) {
+ ret = ctdb_req_header_pull_old((uint8_t *)&wire->hdr, buflen,
+ h);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ c->db_id = wire->db_id;
+ c->rsn = wire->rsn;
+
+ ret = ctdb_tdb_data_pull(wire->data, wire->keylen, mem_ctx, &c->key,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ret = ctdb_tdb_data_pull(wire->data + wire->keylen, wire->datalen,
+ mem_ctx, &c->data, &np);
+ if (ret != 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+struct ctdb_req_control_wire {
+ struct ctdb_req_header hdr;
+ uint32_t opcode;
+ uint32_t pad;
+ uint64_t srvid;
+ uint32_t client_id;
+ uint32_t flags;
+ uint32_t datalen;
+ uint8_t data[1];
+};
+
+static size_t ctdb_req_control_len_old(struct ctdb_req_header *h,
+ struct ctdb_req_control *c)
+{
+ return offsetof(struct ctdb_req_control_wire, data) +
+ ctdb_req_control_data_len(&c->rdata);
+}
+
+static int ctdb_req_control_push_old(struct ctdb_req_header *h,
+ struct ctdb_req_control *c,
+ uint8_t *buf, size_t *buflen)
+{
+ struct ctdb_req_control_wire *wire =
+ (struct ctdb_req_control_wire *)buf;
+ size_t length, np;
+
+ length = ctdb_req_control_len_old(h, c);
+ if (*buflen < length) {
+ *buflen = length;
+ return EMSGSIZE;
+ }
+
+ h->length = *buflen;
+ ctdb_req_header_push_old(h, (uint8_t *)&wire->hdr);
+
+ wire->opcode = c->opcode;
+ wire->pad = c->pad;
+ wire->srvid = c->srvid;
+ wire->client_id = c->client_id;
+ wire->flags = c->flags;
+
+ wire->datalen = ctdb_req_control_data_len(&c->rdata);
+ ctdb_req_control_data_push(&c->rdata, wire->data, &np);
+
+ return 0;
+}
+
+static int ctdb_req_control_pull_old(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_req_control *c)
+{
+ struct ctdb_req_control_wire *wire =
+ (struct ctdb_req_control_wire *)buf;
+ size_t length, np;
+ int ret;
+
+ length = offsetof(struct ctdb_req_control_wire, data);
+ if (buflen < length) {
+ return EMSGSIZE;
+ }
+ if (wire->datalen > buflen) {
+ return EMSGSIZE;
+ }
+ if (length + wire->datalen < length) {
+ return EMSGSIZE;
+ }
+ if (buflen < length + wire->datalen) {
+ return EMSGSIZE;
+ }
+
+ if (h != NULL) {
+ ret = ctdb_req_header_pull_old((uint8_t *)&wire->hdr, buflen,
+ h);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ c->opcode = wire->opcode;
+ c->pad = wire->pad;
+ c->srvid = wire->srvid;
+ c->client_id = wire->client_id;
+ c->flags = wire->flags;
+
+ ret = ctdb_req_control_data_pull(wire->data, wire->datalen,
+ c->opcode, mem_ctx, &c->rdata, &np);
+ if (ret != 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+struct ctdb_reply_control_wire {
+ struct ctdb_req_header hdr;
+ int32_t status;
+ uint32_t datalen;
+ uint32_t errorlen;
+ uint8_t data[1];
+};
+
+static size_t ctdb_reply_control_len_old(struct ctdb_req_header *h,
+ struct ctdb_reply_control *c)
+{
+ return offsetof(struct ctdb_reply_control_wire, data) +
+ (c->status == 0 ?
+ ctdb_reply_control_data_len(&c->rdata) :
+ ctdb_string_len(&c->errmsg));
+}
+
+static int ctdb_reply_control_push_old(struct ctdb_req_header *h,
+ struct ctdb_reply_control *c,
+ uint8_t *buf, size_t *buflen)
+{
+ struct ctdb_reply_control_wire *wire =
+ (struct ctdb_reply_control_wire *)buf;
+ size_t length, np;
+
+ length = ctdb_reply_control_len_old(h, c);
+ if (*buflen < length) {
+ *buflen = length;
+ return EMSGSIZE;
+ }
+
+ h->length = *buflen;
+ ctdb_req_header_push_old(h, (uint8_t *)&wire->hdr);
+
+ wire->status = c->status;
+
+ if (c->status == 0) {
+ wire->datalen = ctdb_reply_control_data_len(&c->rdata);
+ wire->errorlen = 0;
+ ctdb_reply_control_data_push(&c->rdata, wire->data, &np);
+ } else {
+ wire->datalen = 0;
+ wire->errorlen = ctdb_string_len(&c->errmsg);
+ ctdb_string_push(&c->errmsg, wire->data + wire->datalen, &np);
+ }
+
+ return 0;
+}
+
+static int ctdb_reply_control_pull_old(uint8_t *buf, size_t buflen,
+ uint32_t opcode,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_reply_control *c)
+{
+ struct ctdb_reply_control_wire *wire =
+ (struct ctdb_reply_control_wire *)buf;
+ size_t length, np;
+ int ret;
+
+ length = offsetof(struct ctdb_reply_control_wire, data);
+ if (buflen < length) {
+ return EMSGSIZE;
+ }
+ if (wire->datalen > buflen || wire->errorlen > buflen) {
+ return EMSGSIZE;
+ }
+ if (length + wire->datalen < length) {
+ return EMSGSIZE;
+ }
+ if (length + wire->datalen + wire->errorlen < length) {
+ return EMSGSIZE;
+ }
+ if (buflen < length + wire->datalen + wire->errorlen) {
+ return EMSGSIZE;
+ }
+
+ if (h != NULL) {
+ ret = ctdb_req_header_pull_old((uint8_t *)&wire->hdr, buflen,
+ h);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ c->status = wire->status;
+
+ if (c->status != -1) {
+ ret = ctdb_reply_control_data_pull(wire->data, wire->datalen,
+ opcode, mem_ctx,
+ &c->rdata, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ ret = ctdb_string_pull(wire->data + wire->datalen, wire->errorlen,
+ mem_ctx, &c->errmsg, &np);
+ if (ret != 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+struct ctdb_req_message_wire {
+ struct ctdb_req_header hdr;
+ uint64_t srvid;
+ uint32_t datalen;
+ uint8_t data[1];
+};
+
+static size_t ctdb_req_message_len_old(struct ctdb_req_header *h,
+ struct ctdb_req_message *c)
+{
+ return offsetof(struct ctdb_req_message_wire, data) +
+ ctdb_message_data_len(&c->data, c->srvid);
+}
+
+static int ctdb_req_message_push_old(struct ctdb_req_header *h,
+ struct ctdb_req_message *c,
+ uint8_t *buf, size_t *buflen)
+{
+ struct ctdb_req_message_wire *wire =
+ (struct ctdb_req_message_wire *)buf;
+ size_t length, np;
+
+ length = ctdb_req_message_len_old(h, c);
+ if (*buflen < length) {
+ *buflen = length;
+ return EMSGSIZE;
+ }
+
+ h->length = *buflen;
+ ctdb_req_header_push_old(h, (uint8_t *)&wire->hdr);
+
+ wire->srvid = c->srvid;
+ wire->datalen = ctdb_message_data_len(&c->data, c->srvid);
+ ctdb_message_data_push(&c->data, c->srvid, wire->data, &np);
+
+ return 0;
+}
+
+static int ctdb_req_message_pull_old(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_req_message *c)
+{
+ struct ctdb_req_message_wire *wire =
+ (struct ctdb_req_message_wire *)buf;
+ size_t length, np;
+ int ret;
+
+ length = offsetof(struct ctdb_req_message_wire, data);
+ if (buflen < length) {
+ return EMSGSIZE;
+ }
+ if (wire->datalen > buflen) {
+ return EMSGSIZE;
+ }
+ if (length + wire->datalen < length) {
+ return EMSGSIZE;
+ }
+ if (buflen < length + wire->datalen) {
+ return EMSGSIZE;
+ }
+
+ if (h != NULL) {
+ ret = ctdb_req_header_pull_old((uint8_t *)&wire->hdr, buflen,
+ h);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ c->srvid = wire->srvid;
+ ret = ctdb_message_data_pull(wire->data, wire->datalen, wire->srvid,
+ mem_ctx, &c->data, &np);
+ return ret;
+}
+
+static size_t ctdb_req_message_data_len_old(struct ctdb_req_header *h,
+ struct ctdb_req_message_data *c)
+{
+ return offsetof(struct ctdb_req_message_wire, data) +
+ ctdb_tdb_data_len(&c->data);
+}
+
+static int ctdb_req_message_data_push_old(struct ctdb_req_header *h,
+ struct ctdb_req_message_data *c,
+ uint8_t *buf, size_t *buflen)
+{
+ struct ctdb_req_message_wire *wire =
+ (struct ctdb_req_message_wire *)buf;
+ size_t length, np;
+
+ length = ctdb_req_message_data_len_old(h, c);
+ if (*buflen < length) {
+ *buflen = length;
+ return EMSGSIZE;
+ }
+
+ h->length = *buflen;
+ ctdb_req_header_push(h, (uint8_t *)&wire->hdr, &np);
+
+ wire->srvid = c->srvid;
+ wire->datalen = ctdb_tdb_data_len(&c->data);
+ ctdb_tdb_data_push(&c->data, wire->data, &np);
+
+ return 0;
+}
+
+static int ctdb_req_message_data_pull_old(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_req_message_data *c)
+{
+ struct ctdb_req_message_wire *wire =
+ (struct ctdb_req_message_wire *)buf;
+ size_t length, np;
+ int ret;
+
+ length = offsetof(struct ctdb_req_message_wire, data);
+ if (buflen < length) {
+ return EMSGSIZE;
+ }
+ if (wire->datalen > buflen) {
+ return EMSGSIZE;
+ }
+ if (length + wire->datalen < length) {
+ return EMSGSIZE;
+ }
+ if (buflen < length + wire->datalen) {
+ return EMSGSIZE;
+ }
+
+ if (h != NULL) {
+ ret = ctdb_req_header_pull_old((uint8_t *)&wire->hdr, buflen,
+ h);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ c->srvid = wire->srvid;
+
+ ret = ctdb_tdb_data_pull(wire->data, wire->datalen,
+ mem_ctx, &c->data, &np);
+ if (ret != 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+struct ctdb_req_keepalive_wire {
+ struct ctdb_req_header hdr;
+ uint32_t version;
+ uint32_t uptime;
+};
+
+static size_t ctdb_req_keepalive_len_old(struct ctdb_req_header *h,
+ struct ctdb_req_keepalive *c)
+{
+ return sizeof(struct ctdb_req_keepalive_wire);
+}
+
+static int ctdb_req_keepalive_push_old(struct ctdb_req_header *h,
+ struct ctdb_req_keepalive *c,
+ uint8_t *buf, size_t *buflen)
+{
+ struct ctdb_req_keepalive_wire *wire =
+ (struct ctdb_req_keepalive_wire *)buf;
+ size_t length;
+
+ length = ctdb_req_keepalive_len_old(h, c);
+ if (*buflen < length) {
+ *buflen = length;
+ return EMSGSIZE;
+ }
+
+ h->length = *buflen;
+ ctdb_req_header_push_old(h, (uint8_t *)&wire->hdr);
+
+ wire->version = c->version;
+ wire->uptime = c->uptime;
+
+ return 0;
+}
+
+static int ctdb_req_keepalive_pull_old(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_req_keepalive *c)
+{
+ struct ctdb_req_keepalive_wire *wire =
+ (struct ctdb_req_keepalive_wire *)buf;
+ size_t length;
+ int ret;
+
+ length = sizeof(struct ctdb_req_keepalive_wire);
+ if (buflen < length) {
+ return EMSGSIZE;
+ }
+
+ if (h != NULL) {
+ ret = ctdb_req_header_pull_old((uint8_t *)&wire->hdr, buflen,
+ h);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ c->version = wire->version;
+ c->uptime = wire->uptime;
+
+ return 0;
+}
+
+struct ctdb_req_tunnel_wire {
+ struct ctdb_req_header hdr;
+ uint64_t tunnel_id;
+ uint32_t flags;
+ uint32_t datalen;
+ uint8_t data[1];
+};
+
+static size_t ctdb_req_tunnel_len_old(struct ctdb_req_header *h,
+ struct ctdb_req_tunnel *c)
+{
+ return offsetof(struct ctdb_req_tunnel_wire, data) +
+ ctdb_tdb_data_len(&c->data);
+}
+
+static int ctdb_req_tunnel_push_old(struct ctdb_req_header *h,
+ struct ctdb_req_tunnel *c,
+ uint8_t *buf, size_t *buflen)
+{
+ struct ctdb_req_tunnel_wire *wire =
+ (struct ctdb_req_tunnel_wire *)buf;
+ size_t length, np;
+
+ length = ctdb_req_tunnel_len_old(h, c);
+ if (*buflen < length) {
+ *buflen = length;
+ return EMSGSIZE;
+ }
+
+ h->length = *buflen;
+ ctdb_req_header_push_old(h, (uint8_t *)&wire->hdr);
+
+ wire->tunnel_id = c->tunnel_id;
+ wire->flags = c->flags;
+ wire->datalen = ctdb_tdb_data_len(&c->data);
+ ctdb_tdb_data_push(&c->data, wire->data, &np);
+
+ return 0;
+}
+
+static int ctdb_req_tunnel_pull_old(uint8_t *buf, size_t buflen,
+ struct ctdb_req_header *h,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_req_tunnel *c)
+{
+ struct ctdb_req_tunnel_wire *wire =
+ (struct ctdb_req_tunnel_wire *)buf;
+ size_t length, np;
+ int ret;
+
+ length = offsetof(struct ctdb_req_tunnel_wire, data);
+ if (buflen < length) {
+ return EMSGSIZE;
+ }
+ if (wire->datalen > buflen) {
+ return EMSGSIZE;
+ }
+ if (length + wire->datalen < length) {
+ return EMSGSIZE;
+ }
+ if (buflen < length + wire->datalen) {
+ return EMSGSIZE;
+ }
+
+ if (h != NULL) {
+ ret = ctdb_req_header_pull_old((uint8_t *)&wire->hdr, buflen,
+ h);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ c->tunnel_id = wire->tunnel_id;
+ c->flags = wire->flags;
+
+ ret = ctdb_tdb_data_pull(wire->data, wire->datalen, mem_ctx, &c->data,
+ &np);
+ if (ret != 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+
+COMPAT_CTDB1_TEST(struct ctdb_req_header, ctdb_req_header);
+
+COMPAT_CTDB4_TEST(struct ctdb_req_call, ctdb_req_call, CTDB_REQ_CALL);
+COMPAT_CTDB4_TEST(struct ctdb_reply_call, ctdb_reply_call, CTDB_REPLY_CALL);
+COMPAT_CTDB4_TEST(struct ctdb_reply_error, ctdb_reply_error, CTDB_REPLY_ERROR);
+COMPAT_CTDB4_TEST(struct ctdb_req_dmaster, ctdb_req_dmaster, CTDB_REQ_DMASTER);
+COMPAT_CTDB4_TEST(struct ctdb_reply_dmaster, ctdb_reply_dmaster, CTDB_REPLY_DMASTER);
+
+COMPAT_CTDB5_TEST(struct ctdb_req_control, ctdb_req_control, CTDB_REQ_CONTROL);
+COMPAT_CTDB6_TEST(struct ctdb_reply_control, ctdb_reply_control, CTDB_REPLY_CONTROL);
+
+COMPAT_CTDB7_TEST(struct ctdb_req_message, ctdb_req_message, CTDB_REQ_MESSAGE);
+COMPAT_CTDB4_TEST(struct ctdb_req_message_data, ctdb_req_message_data, CTDB_REQ_MESSAGE);
+
+COMPAT_CTDB4_TEST(struct ctdb_req_keepalive, ctdb_req_keepalive, CTDB_REQ_KEEPALIVE);
+COMPAT_CTDB4_TEST(struct ctdb_req_tunnel, ctdb_req_tunnel, CTDB_REQ_TUNNEL);
+
+#define NUM_CONTROLS 151
+
+static void protocol_ctdb_compat_test(void)
+{
+ uint32_t opcode;
+ uint64_t test_srvid[] = {
+ CTDB_SRVID_BANNING,
+ CTDB_SRVID_ELECTION,
+ CTDB_SRVID_LEADER,
+ CTDB_SRVID_RECONFIGURE,
+ CTDB_SRVID_RELEASE_IP,
+ CTDB_SRVID_TAKE_IP,
+ CTDB_SRVID_SET_NODE_FLAGS,
+ CTDB_SRVID_RECD_UPDATE_IP,
+ CTDB_SRVID_VACUUM_FETCH,
+ CTDB_SRVID_DETACH_DATABASE,
+ CTDB_SRVID_MEM_DUMP,
+ CTDB_SRVID_GETLOG,
+ CTDB_SRVID_CLEARLOG,
+ CTDB_SRVID_PUSH_NODE_FLAGS,
+ CTDB_SRVID_RELOAD_NODES,
+ CTDB_SRVID_TAKEOVER_RUN,
+ CTDB_SRVID_REBALANCE_NODE,
+ CTDB_SRVID_DISABLE_TAKEOVER_RUNS,
+ CTDB_SRVID_DISABLE_RECOVERIES,
+ CTDB_SRVID_DISABLE_IP_CHECK,
+ };
+ unsigned int i;
+
+ COMPAT_TEST_FUNC(ctdb_req_header)();
+
+ COMPAT_TEST_FUNC(ctdb_req_call)();
+ COMPAT_TEST_FUNC(ctdb_reply_call)();
+ COMPAT_TEST_FUNC(ctdb_reply_error)();
+ COMPAT_TEST_FUNC(ctdb_req_dmaster)();
+ COMPAT_TEST_FUNC(ctdb_reply_dmaster)();
+
+ for (opcode=0; opcode<NUM_CONTROLS; opcode++) {
+ COMPAT_TEST_FUNC(ctdb_req_control)(opcode);
+ }
+ for (opcode=0; opcode<NUM_CONTROLS; opcode++) {
+ COMPAT_TEST_FUNC(ctdb_reply_control)(opcode);
+ }
+
+ for (i=0; i<ARRAY_SIZE(test_srvid); i++) {
+ COMPAT_TEST_FUNC(ctdb_req_message)(test_srvid[i]);
+ }
+ COMPAT_TEST_FUNC(ctdb_req_message_data)();
+
+ COMPAT_TEST_FUNC(ctdb_req_keepalive)();
+ COMPAT_TEST_FUNC(ctdb_req_tunnel)();
+}
+
+int main(int argc, const char *argv[])
+{
+ protocol_test_iterate(argc, argv, protocol_ctdb_compat_test);
+ return 0;
+}
diff --git a/ctdb/tests/src/protocol_ctdb_test.c b/ctdb/tests/src/protocol_ctdb_test.c
new file mode 100644
index 0000000..840d465
--- /dev/null
+++ b/ctdb/tests/src/protocol_ctdb_test.c
@@ -0,0 +1,365 @@
+/*
+ protocol tests
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <assert.h>
+
+#include "protocol/protocol_basic.c"
+#include "protocol/protocol_types.c"
+#include "protocol/protocol_header.c"
+#include "protocol/protocol_call.c"
+#include "protocol/protocol_control.c"
+#include "protocol/protocol_message.c"
+#include "protocol/protocol_keepalive.c"
+#include "protocol/protocol_tunnel.c"
+#include "protocol/protocol_packet.c"
+
+#include "tests/src/protocol_common.h"
+#include "tests/src/protocol_common_ctdb.h"
+
+/*
+ * Functions to test marshalling
+ */
+
+/* for ctdb_req_header */
+#define PROTOCOL_CTDB1_TEST(TYPE, NAME) \
+static void TEST_FUNC(NAME)(void) \
+{ \
+ TALLOC_CTX *mem_ctx; \
+ TYPE c1, c2; \
+ uint8_t *pkt; \
+ size_t pkt_len, buflen, np; \
+ int ret; \
+\
+ protocol_test_iterate_tag("%s\n", #NAME); \
+ mem_ctx = talloc_new(NULL); \
+ assert(mem_ctx != NULL); \
+ FILL_FUNC(NAME)(&c1); \
+ buflen = LEN_FUNC(NAME)(&c1); \
+ ret = ctdb_allocate_pkt(mem_ctx, buflen, &pkt, &pkt_len); \
+ assert(ret == 0); \
+ assert(pkt != NULL); \
+ assert(pkt_len >= buflen); \
+ np = 0; \
+ PUSH_FUNC(NAME)(&c1, pkt, &np); \
+ assert(np == buflen); \
+ np = 0; \
+ ret = PULL_FUNC(NAME)(pkt, pkt_len, &c2, &np); \
+ assert(ret == 0); \
+ assert(np == buflen); \
+ VERIFY_FUNC(NAME)(&c1, &c2); \
+ talloc_free(mem_ctx); \
+}
+
+/* for ctdb_req_control_data, ctdb_reply_control_data */
+#define PROTOCOL_CTDB2_TEST(TYPE, NAME) \
+static void TEST_FUNC(NAME)(uint32_t opcode) \
+{ \
+ TALLOC_CTX *mem_ctx; \
+ TYPE c1, c2; \
+ uint8_t *pkt; \
+ size_t pkt_len, buflen, np; \
+ int ret; \
+\
+ protocol_test_iterate_tag("%s %u\n", #NAME, opcode); \
+ mem_ctx = talloc_new(NULL); \
+ assert(mem_ctx != NULL); \
+ FILL_FUNC(NAME)(mem_ctx, &c1, opcode); \
+ buflen = LEN_FUNC(NAME)(&c1); \
+ ret = ctdb_allocate_pkt(mem_ctx, buflen, &pkt, &pkt_len); \
+ assert(ret == 0); \
+ assert(pkt != NULL); \
+ assert(pkt_len >= buflen); \
+ np = 0; \
+ PUSH_FUNC(NAME)(&c1, pkt, &np); \
+ assert(np == buflen); \
+ np = 0; \
+ ret = PULL_FUNC(NAME)(pkt, pkt_len, opcode, mem_ctx, &c2, &np); \
+ assert(ret == 0); \
+ assert(np == buflen); \
+ VERIFY_FUNC(NAME)(&c1, &c2); \
+ talloc_free(mem_ctx); \
+}
+
+/* for ctdb_message_data */
+#define PROTOCOL_CTDB3_TEST(TYPE, NAME) \
+static void TEST_FUNC(NAME)(uint64_t srvid) \
+{ \
+ TALLOC_CTX *mem_ctx; \
+ TYPE c1, c2; \
+ uint8_t *pkt; \
+ size_t pkt_len, buflen, np; \
+ int ret; \
+\
+ protocol_test_iterate_tag("%s %"PRIx64"\n", #NAME, srvid); \
+ mem_ctx = talloc_new(NULL); \
+ assert(mem_ctx != NULL); \
+ FILL_FUNC(NAME)(mem_ctx, &c1, srvid); \
+ buflen = LEN_FUNC(NAME)(&c1, srvid); \
+ ret = ctdb_allocate_pkt(mem_ctx, buflen, &pkt, &pkt_len); \
+ assert(ret == 0); \
+ assert(pkt != NULL); \
+ assert(pkt_len >= buflen); \
+ np = 0; \
+ PUSH_FUNC(NAME)(&c1, srvid, pkt, &np); \
+ assert(np == buflen); \
+ np = 0; \
+ ret = PULL_FUNC(NAME)(pkt, pkt_len, srvid, mem_ctx, &c2, &np); \
+ assert(ret == 0); \
+ assert(np == buflen); \
+ VERIFY_FUNC(NAME)(&c1, &c2, srvid); \
+ talloc_free(mem_ctx); \
+}
+
+/* for ctdb_req_call, ctdb_reply_call, etc. */
+#define PROTOCOL_CTDB4_TEST(TYPE, NAME, OPER) \
+static void TEST_FUNC(NAME)(void) \
+{ \
+ TALLOC_CTX *mem_ctx; \
+ struct ctdb_req_header h1, h2; \
+ TYPE c1, c2; \
+ uint8_t *pkt; \
+ size_t pkt_len, buflen, len; \
+ int ret; \
+\
+ protocol_test_iterate_tag("%s\n", #NAME); \
+ mem_ctx = talloc_new(NULL); \
+ assert(mem_ctx != NULL); \
+ fill_ctdb_req_header(&h1); \
+ FILL_FUNC(NAME)(mem_ctx, &c1); \
+ buflen = LEN_FUNC(NAME)(&h1, &c1); \
+ ret = ctdb_allocate_pkt(mem_ctx, buflen, &pkt, &pkt_len); \
+ assert(ret == 0); \
+ assert(pkt != NULL); \
+ assert(pkt_len >= buflen); \
+ len = 0; \
+ ret = PUSH_FUNC(NAME)(&h1, &c1, pkt, &len); \
+ assert(ret == EMSGSIZE); \
+ assert(len == buflen); \
+ ret = PUSH_FUNC(NAME)(&h1, &c1, pkt, &pkt_len); \
+ assert(ret == 0); \
+ ret = PULL_FUNC(NAME)(pkt, pkt_len, &h2, mem_ctx, &c2); \
+ assert(ret == 0); \
+ verify_ctdb_req_header(&h1, &h2); \
+ assert(h2.length == pkt_len); \
+ VERIFY_FUNC(NAME)(&c1, &c2); \
+ talloc_free(mem_ctx); \
+}
+
+/* for ctdb_req_control */
+#define PROTOCOL_CTDB5_TEST(TYPE, NAME, OPER) \
+static void TEST_FUNC(NAME)(uint32_t opcode) \
+{ \
+ TALLOC_CTX *mem_ctx; \
+ struct ctdb_req_header h1, h2; \
+ TYPE c1, c2; \
+ uint8_t *pkt; \
+ size_t pkt_len, buflen, len; \
+ int ret; \
+\
+ protocol_test_iterate_tag("%s %u\n", #NAME, opcode); \
+ mem_ctx = talloc_new(NULL); \
+ assert(mem_ctx != NULL); \
+ fill_ctdb_req_header(&h1); \
+ FILL_FUNC(NAME)(mem_ctx, &c1, opcode); \
+ buflen = LEN_FUNC(NAME)(&h1, &c1); \
+ ret = ctdb_allocate_pkt(mem_ctx, buflen, &pkt, &pkt_len); \
+ assert(ret == 0); \
+ assert(pkt != NULL); \
+ assert(pkt_len >= buflen); \
+ len = 0; \
+ ret = PUSH_FUNC(NAME)(&h1, &c1, pkt, &len); \
+ assert(ret == EMSGSIZE); \
+ assert(len == buflen); \
+ ret = PUSH_FUNC(NAME)(&h1, &c1, pkt, &pkt_len); \
+ assert(ret == 0); \
+ ret = PULL_FUNC(NAME)(pkt, pkt_len, &h2, mem_ctx, &c2); \
+ assert(ret == 0); \
+ verify_ctdb_req_header(&h1, &h2); \
+ assert(h2.length == pkt_len); \
+ VERIFY_FUNC(NAME)(&c1, &c2); \
+ talloc_free(mem_ctx); \
+}
+
+/* for ctdb_reply_control */
+#define PROTOCOL_CTDB6_TEST(TYPE, NAME, OPER) \
+static void TEST_FUNC(NAME)(uint32_t opcode) \
+{ \
+ TALLOC_CTX *mem_ctx; \
+ struct ctdb_req_header h1, h2; \
+ TYPE c1, c2; \
+ uint8_t *pkt; \
+ size_t pkt_len, buflen, len; \
+ int ret; \
+\
+ protocol_test_iterate_tag("%s %u\n", #NAME, opcode); \
+ mem_ctx = talloc_new(NULL); \
+ assert(mem_ctx != NULL); \
+ fill_ctdb_req_header(&h1); \
+ FILL_FUNC(NAME)(mem_ctx, &c1, opcode); \
+ buflen = LEN_FUNC(NAME)(&h1, &c1); \
+ ret = ctdb_allocate_pkt(mem_ctx, buflen, &pkt, &pkt_len); \
+ assert(ret == 0); \
+ assert(pkt != NULL); \
+ assert(pkt_len >= buflen); \
+ len = 0; \
+ ret = PUSH_FUNC(NAME)(&h1, &c1, pkt, &len); \
+ assert(ret == EMSGSIZE); \
+ assert(len == buflen); \
+ ret = PUSH_FUNC(NAME)(&h1, &c1, pkt, &pkt_len); \
+ assert(ret == 0); \
+ ret = PULL_FUNC(NAME)(pkt, pkt_len, opcode, &h2, mem_ctx, &c2); \
+ assert(ret == 0); \
+ verify_ctdb_req_header(&h1, &h2); \
+ assert(h2.length == pkt_len); \
+ VERIFY_FUNC(NAME)(&c1, &c2); \
+ talloc_free(mem_ctx); \
+}
+
+/* for ctdb_req_message */
+#define PROTOCOL_CTDB7_TEST(TYPE, NAME, OPER) \
+static void TEST_FUNC(NAME)(uint64_t srvid) \
+{ \
+ TALLOC_CTX *mem_ctx; \
+ struct ctdb_req_header h1, h2; \
+ TYPE c1, c2; \
+ uint8_t *pkt; \
+ size_t pkt_len, buflen, len; \
+ int ret; \
+\
+ protocol_test_iterate_tag("%s %"PRIx64"\n", #NAME, srvid); \
+ mem_ctx = talloc_new(NULL); \
+ assert(mem_ctx != NULL); \
+ fill_ctdb_req_header(&h1); \
+ FILL_FUNC(NAME)(mem_ctx, &c1, srvid); \
+ buflen = LEN_FUNC(NAME)(&h1, &c1); \
+ ret = ctdb_allocate_pkt(mem_ctx, buflen, &pkt, &pkt_len); \
+ assert(ret == 0); \
+ assert(pkt != NULL); \
+ assert(pkt_len >= buflen); \
+ len = 0; \
+ ret = PUSH_FUNC(NAME)(&h1, &c1, pkt, &len); \
+ assert(ret == EMSGSIZE); \
+ assert(len == buflen); \
+ ret = PUSH_FUNC(NAME)(&h1, &c1, pkt, &pkt_len); \
+ assert(ret == 0); \
+ ret = PULL_FUNC(NAME)(pkt, pkt_len, &h2, mem_ctx, &c2); \
+ assert(ret == 0); \
+ verify_ctdb_req_header(&h1, &h2); \
+ assert(h2.length == pkt_len); \
+ VERIFY_FUNC(NAME)(&c1, &c2); \
+ talloc_free(mem_ctx); \
+}
+
+PROTOCOL_CTDB1_TEST(struct ctdb_req_header, ctdb_req_header);
+
+PROTOCOL_CTDB4_TEST(struct ctdb_req_call, ctdb_req_call, CTDB_REQ_CALL);
+PROTOCOL_CTDB4_TEST(struct ctdb_reply_call, ctdb_reply_call, CTDB_REPLY_CALL);
+PROTOCOL_CTDB4_TEST(struct ctdb_reply_error, ctdb_reply_error,
+ CTDB_REPLY_ERROR);
+PROTOCOL_CTDB4_TEST(struct ctdb_req_dmaster, ctdb_req_dmaster,
+ CTDB_REQ_DMASTER);
+PROTOCOL_CTDB4_TEST(struct ctdb_reply_dmaster, ctdb_reply_dmaster,
+ CTDB_REPLY_DMASTER);
+
+#define NUM_CONTROLS 161
+
+PROTOCOL_CTDB2_TEST(struct ctdb_req_control_data, ctdb_req_control_data);
+PROTOCOL_CTDB2_TEST(struct ctdb_reply_control_data, ctdb_reply_control_data);
+
+PROTOCOL_CTDB5_TEST(struct ctdb_req_control, ctdb_req_control,
+ CTDB_REQ_CONTROL);
+PROTOCOL_CTDB6_TEST(struct ctdb_reply_control, ctdb_reply_control,
+ CTDB_REPLY_CONTROL);
+
+PROTOCOL_CTDB3_TEST(union ctdb_message_data, ctdb_message_data);
+PROTOCOL_CTDB7_TEST(struct ctdb_req_message, ctdb_req_message,
+ CTDB_REQ_MESSAGE);
+PROTOCOL_CTDB4_TEST(struct ctdb_req_message_data, ctdb_req_message_data,
+ CTDB_REQ_MESSAGE);
+
+PROTOCOL_CTDB4_TEST(struct ctdb_req_keepalive, ctdb_req_keepalive,
+ CTDB_REQ_KEEPALIVE);
+PROTOCOL_CTDB4_TEST(struct ctdb_req_tunnel, ctdb_req_tunnel, CTDB_REQ_TUNNEL);
+
+static void protocol_ctdb_test(void)
+{
+ uint32_t opcode;
+ uint64_t test_srvid[] = {
+ CTDB_SRVID_BANNING,
+ CTDB_SRVID_ELECTION,
+ CTDB_SRVID_LEADER,
+ CTDB_SRVID_RECONFIGURE,
+ CTDB_SRVID_RELEASE_IP,
+ CTDB_SRVID_TAKE_IP,
+ CTDB_SRVID_SET_NODE_FLAGS,
+ CTDB_SRVID_RECD_UPDATE_IP,
+ CTDB_SRVID_VACUUM_FETCH,
+ CTDB_SRVID_DETACH_DATABASE,
+ CTDB_SRVID_MEM_DUMP,
+ CTDB_SRVID_GETLOG,
+ CTDB_SRVID_CLEARLOG,
+ CTDB_SRVID_PUSH_NODE_FLAGS,
+ CTDB_SRVID_RELOAD_NODES,
+ CTDB_SRVID_TAKEOVER_RUN,
+ CTDB_SRVID_REBALANCE_NODE,
+ CTDB_SRVID_DISABLE_TAKEOVER_RUNS,
+ CTDB_SRVID_DISABLE_RECOVERIES,
+ CTDB_SRVID_DISABLE_IP_CHECK,
+ };
+ size_t i;
+
+ TEST_FUNC(ctdb_req_header)();
+
+ TEST_FUNC(ctdb_req_call)();
+ TEST_FUNC(ctdb_reply_call)();
+ TEST_FUNC(ctdb_reply_error)();
+ TEST_FUNC(ctdb_req_dmaster)();
+ TEST_FUNC(ctdb_reply_dmaster)();
+
+ for (opcode=0; opcode<NUM_CONTROLS; opcode++) {
+ TEST_FUNC(ctdb_req_control_data)(opcode);
+ }
+ for (opcode=0; opcode<NUM_CONTROLS; opcode++) {
+ TEST_FUNC(ctdb_reply_control_data)(opcode);
+ }
+
+ for (opcode=0; opcode<NUM_CONTROLS; opcode++) {
+ TEST_FUNC(ctdb_req_control)(opcode);
+ }
+ for (opcode=0; opcode<NUM_CONTROLS; opcode++) {
+ TEST_FUNC(ctdb_reply_control)(opcode);
+ }
+
+ for (i=0; i<ARRAY_SIZE(test_srvid); i++) {
+ TEST_FUNC(ctdb_message_data)(test_srvid[i]);
+ }
+ for (i=0; i<ARRAY_SIZE(test_srvid); i++) {
+ TEST_FUNC(ctdb_req_message)(test_srvid[i]);
+ }
+ TEST_FUNC(ctdb_req_message_data)();
+
+ TEST_FUNC(ctdb_req_keepalive)();
+ TEST_FUNC(ctdb_req_tunnel)();
+}
+
+int main(int argc, const char *argv[])
+{
+ protocol_test_iterate(argc, argv, protocol_ctdb_test);
+ return 0;
+}
diff --git a/ctdb/tests/src/protocol_types_compat_test.c b/ctdb/tests/src/protocol_types_compat_test.c
new file mode 100644
index 0000000..140ea86
--- /dev/null
+++ b/ctdb/tests/src/protocol_types_compat_test.c
@@ -0,0 +1,2371 @@
+/*
+ protocol types backward compatibility test
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+
+#include <assert.h>
+
+#include "protocol/protocol_basic.c"
+#include "protocol/protocol_types.c"
+
+#include "tests/src/protocol_common.h"
+
+#define COMPAT_TEST_FUNC(NAME) test_ ##NAME## _compat
+#define OLD_LEN_FUNC(NAME) NAME## _len_old
+#define OLD_PUSH_FUNC(NAME) NAME## _push_old
+#define OLD_PULL_FUNC(NAME) NAME## _pull_old
+
+#define COMPAT_TYPE1_TEST(TYPE, NAME) \
+static void COMPAT_TEST_FUNC(NAME)(void) \
+{ \
+ TALLOC_CTX *mem_ctx; \
+ uint8_t *buf1, *buf2; \
+ TYPE p = { 0 }, p1, p2; \
+ size_t buflen1, buflen2, np = 0; \
+ int ret; \
+\
+ mem_ctx = talloc_new(NULL); \
+ assert(mem_ctx != NULL); \
+ FILL_FUNC(NAME)(&p); \
+ buflen1 = LEN_FUNC(NAME)(&p); \
+ buflen2 = OLD_LEN_FUNC(NAME)(&p); \
+ assert(buflen1 == buflen2); \
+ buf1 = talloc_zero_size(mem_ctx, buflen1); \
+ assert(buf1 != NULL); \
+ buf2 = talloc_zero_size(mem_ctx, buflen2); \
+ assert(buf2 != NULL); \
+ PUSH_FUNC(NAME)(&p, buf1, &np); \
+ OLD_PUSH_FUNC(NAME)(&p, buf2); \
+ assert(memcmp(buf1, buf2, buflen1) == 0); \
+ ret = PULL_FUNC(NAME)(buf1, buflen1, &p1, &np); \
+ assert(ret == 0); \
+ ret = OLD_PULL_FUNC(NAME)(buf2, buflen2, &p2); \
+ assert(ret == 0); \
+ VERIFY_FUNC(NAME)(&p1, &p2); \
+ talloc_free(mem_ctx); \
+}
+
+#define COMPAT_TYPE3_TEST(TYPE, NAME) \
+static void COMPAT_TEST_FUNC(NAME)(void) \
+{ \
+ TALLOC_CTX *mem_ctx; \
+ uint8_t *buf1, *buf2; \
+ TYPE *p, *p1, *p2; \
+ size_t buflen1, buflen2, np = 0; \
+ int ret; \
+\
+ mem_ctx = talloc_new(NULL); \
+ assert(mem_ctx != NULL); \
+ p = talloc_zero(mem_ctx, TYPE); \
+ assert(p != NULL); \
+ FILL_FUNC(NAME)(p, p); \
+ buflen1 = LEN_FUNC(NAME)(p); \
+ buflen2 = OLD_LEN_FUNC(NAME)(p); \
+ assert(buflen1 == buflen2); \
+ buf1 = talloc_zero_size(mem_ctx, buflen1); \
+ assert(buf1 != NULL); \
+ buf2 = talloc_zero_size(mem_ctx, buflen2); \
+ assert(buf2 != NULL); \
+ PUSH_FUNC(NAME)(p, buf1, &np); \
+ OLD_PUSH_FUNC(NAME)(p, buf2); \
+ assert(memcmp(buf1, buf2, buflen1) == 0); \
+ ret = PULL_FUNC(NAME)(buf1, buflen1, mem_ctx, &p1, &np); \
+ assert(ret == 0); \
+ ret = OLD_PULL_FUNC(NAME)(buf2, buflen2, mem_ctx, &p2); \
+ assert(ret == 0); \
+ VERIFY_FUNC(NAME)(p1, p2); \
+ talloc_free(mem_ctx); \
+}
+
+
+static size_t ctdb_statistics_len_old(struct ctdb_statistics *in)
+{
+ return sizeof(struct ctdb_statistics);
+}
+
+static void ctdb_statistics_push_old(struct ctdb_statistics *in, uint8_t *buf)
+{
+ memcpy(buf, in, sizeof(struct ctdb_statistics));
+}
+
+static int ctdb_statistics_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_statistics **out)
+{
+ struct ctdb_statistics *val;
+
+ if (buflen < sizeof(struct ctdb_statistics)) {
+ return EMSGSIZE;
+ }
+
+ val = talloc(mem_ctx, struct ctdb_statistics);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ memcpy(val, buf, sizeof(struct ctdb_statistics));
+
+ *out = val;
+ return 0;
+}
+
+struct ctdb_vnn_map_wire {
+ uint32_t generation;
+ uint32_t size;
+ uint32_t map[1];
+};
+
+static size_t ctdb_vnn_map_len_old(struct ctdb_vnn_map *in)
+{
+ return offsetof(struct ctdb_vnn_map, map) +
+ in->size * sizeof(uint32_t);
+}
+
+static void ctdb_vnn_map_push_old(struct ctdb_vnn_map *in, uint8_t *buf)
+{
+ struct ctdb_vnn_map_wire *wire = (struct ctdb_vnn_map_wire *)buf;
+
+ memcpy(wire, in, offsetof(struct ctdb_vnn_map, map));
+ memcpy(wire->map, in->map, in->size * sizeof(uint32_t));
+}
+
+static int ctdb_vnn_map_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_vnn_map **out)
+{
+ struct ctdb_vnn_map *val;
+ struct ctdb_vnn_map_wire *wire = (struct ctdb_vnn_map_wire *)buf;
+
+ if (buflen < offsetof(struct ctdb_vnn_map_wire, map)) {
+ return EMSGSIZE;
+ }
+ if (wire->size > buflen / sizeof(uint32_t)) {
+ return EMSGSIZE;
+ }
+ if (offsetof(struct ctdb_vnn_map_wire, map) +
+ wire->size * sizeof(uint32_t) <
+ offsetof(struct ctdb_vnn_map_wire, map)) {
+ return EMSGSIZE;
+ }
+ if (buflen < offsetof(struct ctdb_vnn_map_wire, map) +
+ wire->size * sizeof(uint32_t)) {
+ return EMSGSIZE;
+ }
+
+ val = talloc(mem_ctx, struct ctdb_vnn_map);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ memcpy(val, wire, offsetof(struct ctdb_vnn_map, map));
+
+ val->map = talloc_memdup(val, wire->map,
+ wire->size * sizeof(uint32_t));
+ if (val->map == NULL) {
+ talloc_free(val);
+ return ENOMEM;
+ }
+
+ *out = val;
+ return 0;
+}
+
+struct ctdb_dbid_map_wire {
+ uint32_t num;
+ struct ctdb_dbid dbs[1];
+};
+
+static size_t ctdb_dbid_map_len_old(struct ctdb_dbid_map *in)
+{
+ return sizeof(uint32_t) + in->num * sizeof(struct ctdb_dbid);
+}
+
+static void ctdb_dbid_map_push_old(struct ctdb_dbid_map *in, uint8_t *buf)
+{
+ struct ctdb_dbid_map_wire *wire = (struct ctdb_dbid_map_wire *)buf;
+
+ wire->num = in->num;
+ memcpy(wire->dbs, in->dbs, in->num * sizeof(struct ctdb_dbid));
+}
+
+static int ctdb_dbid_map_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_dbid_map **out)
+{
+ struct ctdb_dbid_map *val;
+ struct ctdb_dbid_map_wire *wire = (struct ctdb_dbid_map_wire *)buf;
+
+ if (buflen < sizeof(uint32_t)) {
+ return EMSGSIZE;
+ }
+ if (wire->num > buflen / sizeof(struct ctdb_dbid)) {
+ return EMSGSIZE;
+ }
+ if (sizeof(uint32_t) + wire->num * sizeof(struct ctdb_dbid) <
+ sizeof(uint32_t)) {
+ return EMSGSIZE;
+ }
+ if (buflen < sizeof(uint32_t) + wire->num * sizeof(struct ctdb_dbid)) {
+ return EMSGSIZE;
+ }
+
+ val = talloc(mem_ctx, struct ctdb_dbid_map);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ val->num = wire->num;
+
+ val->dbs = talloc_memdup(val, wire->dbs,
+ wire->num * sizeof(struct ctdb_dbid));
+ if (val->dbs == NULL) {
+ talloc_free(val);
+ return ENOMEM;
+ }
+
+ *out = val;
+ return 0;
+}
+
+static size_t ctdb_pulldb_len_old(struct ctdb_pulldb *in)
+{
+ return sizeof(struct ctdb_pulldb);
+}
+
+static void ctdb_pulldb_push_old(struct ctdb_pulldb *in, uint8_t *buf)
+{
+ memcpy(buf, in, sizeof(struct ctdb_pulldb));
+}
+
+static int ctdb_pulldb_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx, struct ctdb_pulldb **out)
+{
+ struct ctdb_pulldb *val;
+
+ if (buflen < sizeof(struct ctdb_pulldb)) {
+ return EMSGSIZE;
+ }
+
+ val = talloc_memdup(mem_ctx, buf, sizeof(struct ctdb_pulldb));
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ *out = val;
+ return 0;
+}
+
+static size_t ctdb_pulldb_ext_len_old(struct ctdb_pulldb_ext *in)
+{
+ return sizeof(struct ctdb_pulldb_ext);
+}
+
+static void ctdb_pulldb_ext_push_old(struct ctdb_pulldb_ext *in, uint8_t *buf)
+{
+ memcpy(buf, in, sizeof(struct ctdb_pulldb_ext));
+}
+
+static int ctdb_pulldb_ext_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_pulldb_ext **out)
+{
+ struct ctdb_pulldb_ext *val;
+
+ if (buflen < sizeof(struct ctdb_pulldb_ext)) {
+ return EMSGSIZE;
+ }
+
+ val = talloc_memdup(mem_ctx, buf, sizeof(struct ctdb_pulldb_ext));
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ *out = val;
+ return 0;
+}
+
+static size_t ctdb_ltdb_header_len_old(struct ctdb_ltdb_header *in)
+{
+ return sizeof(struct ctdb_ltdb_header);
+}
+
+static void ctdb_ltdb_header_push_old(struct ctdb_ltdb_header *in,
+ uint8_t *buf)
+{
+ memcpy(buf, in, sizeof(struct ctdb_ltdb_header));
+}
+
+static int ctdb_ltdb_header_pull_old(uint8_t *buf, size_t buflen,
+ struct ctdb_ltdb_header *out)
+{
+ if (buflen < sizeof(struct ctdb_ltdb_header)) {
+ return EMSGSIZE;
+ }
+
+ memcpy(out, buf, sizeof(struct ctdb_ltdb_header));
+ return 0;
+}
+
+struct ctdb_rec_data_wire {
+ uint32_t length;
+ uint32_t reqid;
+ uint32_t keylen;
+ uint32_t datalen;
+ uint8_t data[1];
+};
+
+static size_t ctdb_rec_data_len_old(struct ctdb_rec_data *in)
+{
+ return offsetof(struct ctdb_rec_data_wire, data) +
+ in->key.dsize + in->data.dsize +
+ (in->header == NULL ? 0 : sizeof(struct ctdb_ltdb_header));
+}
+
+static void ctdb_rec_data_push_old(struct ctdb_rec_data *in, uint8_t *buf)
+{
+ struct ctdb_rec_data_wire *wire = (struct ctdb_rec_data_wire *)buf;
+ size_t offset;
+
+ wire->length = ctdb_rec_data_len(in);
+ wire->reqid = in->reqid;
+ wire->keylen = in->key.dsize;
+ wire->datalen = in->data.dsize;
+ if (in->header != NULL) {
+ wire->datalen += sizeof(struct ctdb_ltdb_header);
+ }
+
+ memcpy(wire->data, in->key.dptr, in->key.dsize);
+ offset = in->key.dsize;
+ if (in->header != NULL) {
+ memcpy(&wire->data[offset], in->header,
+ sizeof(struct ctdb_ltdb_header));
+ offset += sizeof(struct ctdb_ltdb_header);
+ }
+ if (in->data.dsize > 0) {
+ memcpy(&wire->data[offset], in->data.dptr, in->data.dsize);
+ }
+}
+
+static int ctdb_rec_data_pull_data_old(uint8_t *buf, size_t buflen,
+ uint32_t *reqid,
+ struct ctdb_ltdb_header **header,
+ TDB_DATA *key, TDB_DATA *data,
+ size_t *reclen)
+{
+ struct ctdb_rec_data_wire *wire = (struct ctdb_rec_data_wire *)buf;
+ size_t offset;
+
+ if (buflen < offsetof(struct ctdb_rec_data_wire, data)) {
+ return EMSGSIZE;
+ }
+ if (wire->keylen > buflen || wire->datalen > buflen) {
+ return EMSGSIZE;
+ }
+ if (offsetof(struct ctdb_rec_data_wire, data) + wire->keylen <
+ offsetof(struct ctdb_rec_data_wire, data)) {
+ return EMSGSIZE;
+ }
+ if (offsetof(struct ctdb_rec_data_wire, data) +
+ wire->keylen + wire->datalen <
+ offsetof(struct ctdb_rec_data_wire, data)) {
+ return EMSGSIZE;
+ }
+ if (buflen < offsetof(struct ctdb_rec_data_wire, data) +
+ wire->keylen + wire->datalen) {
+ return EMSGSIZE;
+ }
+
+ *reqid = wire->reqid;
+
+ key->dsize = wire->keylen;
+ key->dptr = wire->data;
+ offset = wire->keylen;
+
+ /* Always set header to NULL. If it is required, exact it using
+ * ctdb_rec_data_extract_header()
+ */
+ *header = NULL;
+
+ data->dsize = wire->datalen;
+ data->dptr = &wire->data[offset];
+
+ *reclen = offsetof(struct ctdb_rec_data_wire, data) +
+ wire->keylen + wire->datalen;
+
+ return 0;
+}
+
+static int ctdb_rec_data_pull_elems_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_rec_data *out)
+{
+ uint32_t reqid;
+ struct ctdb_ltdb_header *header;
+ TDB_DATA key, data;
+ size_t reclen;
+ int ret;
+
+ ret = ctdb_rec_data_pull_data_old(buf, buflen, &reqid, &header,
+ &key, &data, &reclen);
+ if (ret != 0) {
+ return ret;
+ }
+
+ out->reqid = reqid;
+ out->header = NULL;
+
+ out->key.dsize = key.dsize;
+ if (key.dsize > 0) {
+ out->key.dptr = talloc_memdup(mem_ctx, key.dptr, key.dsize);
+ if (out->key.dptr == NULL) {
+ return ENOMEM;
+ }
+ }
+
+ out->data.dsize = data.dsize;
+ if (data.dsize > 0) {
+ out->data.dptr = talloc_memdup(mem_ctx, data.dptr, data.dsize);
+ if (out->data.dptr == NULL) {
+ return ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+static int ctdb_rec_data_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_rec_data **out)
+{
+ struct ctdb_rec_data *val;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_rec_data);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_rec_data_pull_elems_old(buf, buflen, val, val);
+ if (ret != 0) {
+ TALLOC_FREE(val);
+ return ret;
+ }
+
+ *out = val;
+ return ret;
+}
+
+struct ctdb_rec_buffer_wire {
+ uint32_t db_id;
+ uint32_t count;
+ uint8_t data[1];
+};
+
+static size_t ctdb_rec_buffer_len_old(struct ctdb_rec_buffer *in)
+{
+ return offsetof(struct ctdb_rec_buffer_wire, data) + in->buflen;
+}
+
+static void ctdb_rec_buffer_push_old(struct ctdb_rec_buffer *in, uint8_t *buf)
+{
+ struct ctdb_rec_buffer_wire *wire = (struct ctdb_rec_buffer_wire *)buf;
+
+ wire->db_id = in->db_id;
+ wire->count = in->count;
+ if (in->buflen > 0) {
+ memcpy(wire->data, in->buf, in->buflen);
+ }
+}
+
+static int ctdb_rec_buffer_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_rec_buffer **out)
+{
+ struct ctdb_rec_buffer *val;
+ struct ctdb_rec_buffer_wire *wire = (struct ctdb_rec_buffer_wire *)buf;
+ size_t offset;
+
+ if (buflen < offsetof(struct ctdb_rec_buffer_wire, data)) {
+ return EMSGSIZE;
+ }
+
+ val = talloc(mem_ctx, struct ctdb_rec_buffer);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ val->db_id = wire->db_id;
+ val->count = wire->count;
+
+ offset = offsetof(struct ctdb_rec_buffer_wire, data);
+ val->buflen = buflen - offset;
+ val->buf = talloc_memdup(val, wire->data, val->buflen);
+ if (val->buf == NULL) {
+ talloc_free(val);
+ return ENOMEM;
+ }
+
+ *out = val;
+ return 0;
+}
+
+static size_t ctdb_traverse_start_len_old(struct ctdb_traverse_start *in)
+{
+ return sizeof(struct ctdb_traverse_start);
+}
+
+static void ctdb_traverse_start_push_old(struct ctdb_traverse_start *in,
+ uint8_t *buf)
+{
+ memcpy(buf, in, sizeof(struct ctdb_traverse_start));
+}
+
+static int ctdb_traverse_start_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_traverse_start **out)
+{
+ struct ctdb_traverse_start *val;
+
+ if (buflen < sizeof(struct ctdb_traverse_start)) {
+ return EMSGSIZE;
+ }
+
+ val = talloc_memdup(mem_ctx, buf, sizeof(struct ctdb_traverse_start));
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ *out = val;
+ return 0;
+}
+
+static size_t ctdb_traverse_all_len_old(struct ctdb_traverse_all *in)
+{
+ return sizeof(struct ctdb_traverse_all);
+}
+
+static void ctdb_traverse_all_push_old(struct ctdb_traverse_all *in,
+ uint8_t *buf)
+{
+ memcpy(buf, in, sizeof(struct ctdb_traverse_all));
+}
+
+static int ctdb_traverse_all_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_traverse_all **out)
+{
+ struct ctdb_traverse_all *val;
+
+ if (buflen < sizeof(struct ctdb_traverse_all)) {
+ return EMSGSIZE;
+ }
+
+ val = talloc_memdup(mem_ctx, buf, sizeof(struct ctdb_traverse_all));
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ *out = val;
+ return 0;
+}
+
+static size_t ctdb_traverse_start_ext_len_old(
+ struct ctdb_traverse_start_ext *in)
+{
+ return sizeof(struct ctdb_traverse_start_ext);
+}
+
+static void ctdb_traverse_start_ext_push_old(
+ struct ctdb_traverse_start_ext *in, uint8_t *buf)
+{
+ memcpy(buf, in, sizeof(struct ctdb_traverse_start_ext));
+}
+
+static int ctdb_traverse_start_ext_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_traverse_start_ext **out)
+{
+ struct ctdb_traverse_start_ext *val;
+
+ if (buflen < sizeof(struct ctdb_traverse_start_ext)) {
+ return EMSGSIZE;
+ }
+
+ val = talloc_memdup(mem_ctx, buf,
+ sizeof(struct ctdb_traverse_start_ext));
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ *out = val;
+ return 0;
+}
+
+static size_t ctdb_traverse_all_ext_len_old(struct ctdb_traverse_all_ext *in)
+{
+ return sizeof(struct ctdb_traverse_all_ext);
+}
+
+static void ctdb_traverse_all_ext_push_old(struct ctdb_traverse_all_ext *in,
+ uint8_t *buf)
+{
+ memcpy(buf, in, sizeof(struct ctdb_traverse_all_ext));
+}
+
+static int ctdb_traverse_all_ext_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_traverse_all_ext **out)
+{
+ struct ctdb_traverse_all_ext *val;
+
+ if (buflen < sizeof(struct ctdb_traverse_all_ext)) {
+ return EMSGSIZE;
+ }
+
+ val = talloc_memdup(mem_ctx, buf,
+ sizeof(struct ctdb_traverse_all_ext));
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ *out = val;
+ return 0;
+}
+
+static size_t ctdb_sock_addr_len_old(ctdb_sock_addr *in)
+{
+ return sizeof(ctdb_sock_addr);
+}
+
+static void ctdb_sock_addr_push_old(ctdb_sock_addr *in, uint8_t *buf)
+{
+ memcpy(buf, in, sizeof(ctdb_sock_addr));
+}
+
+static int ctdb_sock_addr_pull_elems_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ ctdb_sock_addr *out)
+{
+ if (buflen < sizeof(ctdb_sock_addr)) {
+ return EMSGSIZE;
+ }
+
+ memcpy(out, buf, sizeof(ctdb_sock_addr));
+
+ return 0;
+}
+
+static int ctdb_sock_addr_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx, ctdb_sock_addr **out)
+{
+ ctdb_sock_addr *val;
+ int ret;
+
+ val = talloc(mem_ctx, ctdb_sock_addr);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_sock_addr_pull_elems_old(buf, buflen, val, val);
+ if (ret != 0) {
+ TALLOC_FREE(val);
+ return ret;
+ }
+
+ *out = val;
+ return ret;
+}
+
+static size_t ctdb_connection_len_old(struct ctdb_connection *in)
+{
+ return sizeof(struct ctdb_connection);
+}
+
+static void ctdb_connection_push_old(struct ctdb_connection *in, uint8_t *buf)
+{
+ memcpy(buf, in, sizeof(struct ctdb_connection));
+}
+
+static int ctdb_connection_pull_elems_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_connection *out)
+{
+ if (buflen < sizeof(struct ctdb_connection)) {
+ return EMSGSIZE;
+ }
+
+ memcpy(out, buf, sizeof(struct ctdb_connection));
+
+ return 0;
+}
+
+static int ctdb_connection_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_connection **out)
+{
+ struct ctdb_connection *val;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_connection);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_connection_pull_elems_old(buf, buflen, val, val);
+ if (ret != 0) {
+ TALLOC_FREE(val);
+ return ret;
+ }
+
+ *out = val;
+ return ret;
+}
+
+struct ctdb_tunable_wire {
+ uint32_t value;
+ uint32_t length;
+ uint8_t name[1];
+};
+
+static size_t ctdb_tunable_len_old(struct ctdb_tunable *in)
+{
+ return offsetof(struct ctdb_tunable_wire, name) +
+ strlen(in->name) + 1;
+}
+
+static void ctdb_tunable_push_old(struct ctdb_tunable *in, uint8_t *buf)
+{
+ struct ctdb_tunable_wire *wire = (struct ctdb_tunable_wire *)buf;
+
+ wire->value = in->value;
+ wire->length = strlen(in->name) + 1;
+ memcpy(wire->name, in->name, wire->length);
+}
+
+static int ctdb_tunable_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_tunable **out)
+{
+ struct ctdb_tunable *val;
+ struct ctdb_tunable_wire *wire = (struct ctdb_tunable_wire *)buf;
+
+ if (buflen < offsetof(struct ctdb_tunable_wire, name)) {
+ return EMSGSIZE;
+ }
+ if (wire->length > buflen) {
+ return EMSGSIZE;
+ }
+ if (offsetof(struct ctdb_tunable_wire, name) + wire->length <
+ offsetof(struct ctdb_tunable_wire, name)) {
+ return EMSGSIZE;
+ }
+ if (buflen < offsetof(struct ctdb_tunable_wire, name) + wire->length) {
+ return EMSGSIZE;
+ }
+
+ val = talloc(mem_ctx, struct ctdb_tunable);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ val->value = wire->value;
+ val->name = talloc_memdup(val, wire->name, wire->length);
+ if (val->name == NULL) {
+ talloc_free(val);
+ return ENOMEM;
+ }
+
+ *out = val;
+ return 0;
+}
+
+static size_t ctdb_node_flag_change_len_old(struct ctdb_node_flag_change *in)
+{
+ return sizeof(struct ctdb_node_flag_change);
+}
+
+static void ctdb_node_flag_change_push_old(struct ctdb_node_flag_change *in,
+ uint8_t *buf)
+{
+ memcpy(buf, in, sizeof(struct ctdb_node_flag_change));
+}
+
+static int ctdb_node_flag_change_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_node_flag_change **out)
+{
+ struct ctdb_node_flag_change *val;
+
+ if (buflen < sizeof(struct ctdb_node_flag_change)) {
+ return EMSGSIZE;
+ }
+
+ val = talloc_memdup(mem_ctx, buf,
+ sizeof(struct ctdb_node_flag_change));
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ *out = val;
+ return 0;
+}
+
+struct ctdb_var_list_wire {
+ uint32_t length;
+ char list_str[1];
+};
+
+static size_t ctdb_var_list_len_old(struct ctdb_var_list *in)
+{
+ int i;
+ size_t len = sizeof(uint32_t);
+
+ for (i=0; i<in->count; i++) {
+ assert(in->var[i] != NULL);
+ len += strlen(in->var[i]) + 1;
+ }
+ return len;
+}
+
+static void ctdb_var_list_push_old(struct ctdb_var_list *in, uint8_t *buf)
+{
+ struct ctdb_var_list_wire *wire = (struct ctdb_var_list_wire *)buf;
+ int i, n;
+ size_t offset = 0;
+
+ if (in->count > 0) {
+ n = sprintf(wire->list_str, "%s", in->var[0]);
+ offset += n;
+ }
+ for (i=1; i<in->count; i++) {
+ n = sprintf(&wire->list_str[offset], ":%s", in->var[i]);
+ offset += n;
+ }
+ wire->length = offset + 1;
+}
+
+static int ctdb_var_list_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_var_list **out)
+{
+ struct ctdb_var_list *val = NULL;
+ struct ctdb_var_list_wire *wire = (struct ctdb_var_list_wire *)buf;
+ char *str, *s, *tok, *ptr;
+ const char **list;
+
+ if (buflen < sizeof(uint32_t)) {
+ return EMSGSIZE;
+ }
+ if (wire->length > buflen) {
+ return EMSGSIZE;
+ }
+ if (sizeof(uint32_t) + wire->length < sizeof(uint32_t)) {
+ return EMSGSIZE;
+ }
+ if (buflen < sizeof(uint32_t) + wire->length) {
+ return EMSGSIZE;
+ }
+
+ str = talloc_strndup(mem_ctx, (char *)wire->list_str, wire->length);
+ if (str == NULL) {
+ return ENOMEM;
+ }
+
+ val = talloc_zero(mem_ctx, struct ctdb_var_list);
+ if (val == NULL) {
+ goto fail;
+ }
+
+ s = str;
+ while ((tok = strtok_r(s, ":", &ptr)) != NULL) {
+ s = NULL;
+ list = talloc_realloc(val, val->var, const char *,
+ val->count+1);
+ if (list == NULL) {
+ goto fail;
+ }
+
+ val->var = list;
+ val->var[val->count] = talloc_strdup(val, tok);
+ if (val->var[val->count] == NULL) {
+ goto fail;
+ }
+ val->count++;
+ }
+
+ talloc_free(str);
+ *out = val;
+ return 0;
+
+fail:
+ talloc_free(str);
+ talloc_free(val);
+ return ENOMEM;
+}
+
+static size_t ctdb_tunable_list_len_old(struct ctdb_tunable_list *in)
+{
+ return sizeof(struct ctdb_tunable_list);
+}
+
+static void ctdb_tunable_list_push_old(struct ctdb_tunable_list *in,
+ uint8_t *buf)
+{
+ memcpy(buf, in, sizeof(struct ctdb_tunable_list));
+}
+
+static int ctdb_tunable_list_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_tunable_list **out)
+{
+ struct ctdb_tunable_list *val;
+
+ if (buflen < sizeof(struct ctdb_tunable_list)) {
+ return EMSGSIZE;
+ }
+
+ val = talloc_memdup(mem_ctx, buf, sizeof(struct ctdb_tunable_list));
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ *out = val;
+ return 0;
+}
+
+struct ctdb_tickle_list_wire {
+ ctdb_sock_addr addr;
+ uint32_t num;
+ struct ctdb_connection conn[1];
+};
+
+static size_t ctdb_tickle_list_len_old(struct ctdb_tickle_list *in)
+{
+ return offsetof(struct ctdb_tickle_list, conn) +
+ in->num * sizeof(struct ctdb_connection);
+}
+
+static void ctdb_tickle_list_push_old(struct ctdb_tickle_list *in,
+ uint8_t *buf)
+{
+ struct ctdb_tickle_list_wire *wire =
+ (struct ctdb_tickle_list_wire *)buf;
+ size_t offset;
+ unsigned int i;
+
+ memcpy(&wire->addr, &in->addr, sizeof(ctdb_sock_addr));
+ wire->num = in->num;
+
+ offset = offsetof(struct ctdb_tickle_list_wire, conn);
+ for (i=0; i<in->num; i++) {
+ ctdb_connection_push_old(&in->conn[i], &buf[offset]);
+ offset += ctdb_connection_len_old(&in->conn[i]);
+ }
+}
+
+static int ctdb_tickle_list_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_tickle_list **out)
+{
+ struct ctdb_tickle_list *val;
+ struct ctdb_tickle_list_wire *wire =
+ (struct ctdb_tickle_list_wire *)buf;
+ size_t offset;
+ unsigned int i;
+ int ret;
+
+ if (buflen < offsetof(struct ctdb_tickle_list_wire, conn)) {
+ return EMSGSIZE;
+ }
+ if (wire->num > buflen / sizeof(struct ctdb_connection)) {
+ return EMSGSIZE;
+ }
+ if (offsetof(struct ctdb_tickle_list_wire, conn) +
+ wire->num * sizeof(struct ctdb_connection) <
+ offsetof(struct ctdb_tickle_list_wire, conn)) {
+ return EMSGSIZE;
+ }
+ if (buflen < offsetof(struct ctdb_tickle_list_wire, conn) +
+ wire->num * sizeof(struct ctdb_connection)) {
+ return EMSGSIZE;
+ }
+
+ val = talloc(mem_ctx, struct ctdb_tickle_list);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ offset = offsetof(struct ctdb_tickle_list, conn);
+ memcpy(val, wire, offset);
+
+ val->conn = talloc_array(val, struct ctdb_connection, wire->num);
+ if (val->conn == NULL) {
+ talloc_free(val);
+ return ENOMEM;
+ }
+
+ for (i=0; i<wire->num; i++) {
+ ret = ctdb_connection_pull_elems_old(&buf[offset],
+ buflen-offset,
+ val->conn,
+ &val->conn[i]);
+ if (ret != 0) {
+ talloc_free(val);
+ return ret;
+ }
+ offset += ctdb_connection_len_old(&val->conn[i]);
+ }
+
+ *out = val;
+ return 0;
+}
+
+struct ctdb_addr_info_wire {
+ ctdb_sock_addr addr;
+ uint32_t mask;
+ uint32_t len;
+ char iface[1];
+};
+
+static size_t ctdb_addr_info_len_old(struct ctdb_addr_info *in)
+{
+ uint32_t len;
+
+ len = offsetof(struct ctdb_addr_info_wire, iface);
+ if (in->iface != NULL) {
+ len += strlen(in->iface)+1;
+ }
+
+ return len;
+}
+
+static void ctdb_addr_info_push_old(struct ctdb_addr_info *in, uint8_t *buf)
+{
+ struct ctdb_addr_info_wire *wire = (struct ctdb_addr_info_wire *)buf;
+
+ wire->addr = in->addr;
+ wire->mask = in->mask;
+ if (in->iface == NULL) {
+ wire->len = 0;
+ } else {
+ wire->len = strlen(in->iface)+1;
+ memcpy(wire->iface, in->iface, wire->len);
+ }
+}
+
+static int ctdb_addr_info_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_addr_info **out)
+{
+ struct ctdb_addr_info *val;
+ struct ctdb_addr_info_wire *wire = (struct ctdb_addr_info_wire *)buf;
+
+ if (buflen < offsetof(struct ctdb_addr_info_wire, iface)) {
+ return EMSGSIZE;
+ }
+ if (wire->len > buflen) {
+ return EMSGSIZE;
+ }
+ if (offsetof(struct ctdb_addr_info_wire, iface) + wire->len <
+ offsetof(struct ctdb_addr_info_wire, iface)) {
+ return EMSGSIZE;
+ }
+ if (buflen < offsetof(struct ctdb_addr_info_wire, iface) + wire->len) {
+ return EMSGSIZE;
+ }
+
+ val = talloc(mem_ctx, struct ctdb_addr_info);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ val->addr = wire->addr;
+ val->mask = wire->mask;
+
+ if (wire->len == 0) {
+ val->iface = NULL;
+ } else {
+ val->iface = talloc_strndup(val, wire->iface, wire->len);
+ if (val->iface == NULL) {
+ talloc_free(val);
+ return ENOMEM;
+ }
+ }
+
+ *out = val;
+ return 0;
+}
+
+static size_t ctdb_transdb_len_old(struct ctdb_transdb *in)
+{
+ return sizeof(struct ctdb_transdb);
+}
+
+static void ctdb_transdb_push_old(struct ctdb_transdb *in, uint8_t *buf)
+{
+ memcpy(buf, in, sizeof(struct ctdb_transdb));
+}
+
+static int ctdb_transdb_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_transdb **out)
+{
+ struct ctdb_transdb *val;
+
+ if (buflen < sizeof(struct ctdb_transdb)) {
+ return EMSGSIZE;
+ }
+
+ val = talloc_memdup(mem_ctx, buf, sizeof(struct ctdb_transdb));
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ *out = val;
+ return 0;
+}
+
+static size_t ctdb_uptime_len_old(struct ctdb_uptime *in)
+{
+ return sizeof(struct ctdb_uptime);
+}
+
+static void ctdb_uptime_push_old(struct ctdb_uptime *in, uint8_t *buf)
+{
+ memcpy(buf, in, sizeof(struct ctdb_uptime));
+}
+
+static int ctdb_uptime_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx, struct ctdb_uptime **out)
+{
+ struct ctdb_uptime *val;
+
+ if (buflen < sizeof(struct ctdb_uptime)) {
+ return EMSGSIZE;
+ }
+
+ val = talloc_memdup(mem_ctx, buf, sizeof(struct ctdb_uptime));
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ *out = val;
+ return 0;
+}
+
+static size_t ctdb_public_ip_len_old(struct ctdb_public_ip *in)
+{
+ return sizeof(struct ctdb_public_ip);
+}
+
+static void ctdb_public_ip_push_old(struct ctdb_public_ip *in, uint8_t *buf)
+{
+ memcpy(buf, in, sizeof(struct ctdb_public_ip));
+}
+
+static int ctdb_public_ip_pull_elems_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_public_ip *out)
+{
+ if (buflen < sizeof(struct ctdb_public_ip)) {
+ return EMSGSIZE;
+ }
+
+ memcpy(out, buf, sizeof(struct ctdb_public_ip));
+
+ return 0;
+}
+
+static int ctdb_public_ip_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_public_ip **out)
+{
+ struct ctdb_public_ip *val;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_public_ip);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_public_ip_pull_elems_old(buf, buflen, val, val);
+ if (ret != 0) {
+ TALLOC_FREE(val);
+ return ret;
+ }
+
+ *out = val;
+ return ret;
+}
+
+struct ctdb_public_ip_list_wire {
+ uint32_t num;
+ struct ctdb_public_ip ip[1];
+};
+
+static size_t ctdb_public_ip_list_len_old(struct ctdb_public_ip_list *in)
+{
+ unsigned int i;
+ size_t len;
+
+ len = sizeof(uint32_t);
+ for (i=0; i<in->num; i++) {
+ len += ctdb_public_ip_len_old(&in->ip[i]);
+ }
+ return len;
+}
+
+static void ctdb_public_ip_list_push_old(struct ctdb_public_ip_list *in,
+ uint8_t *buf)
+{
+ struct ctdb_public_ip_list_wire *wire =
+ (struct ctdb_public_ip_list_wire *)buf;
+ size_t offset;
+ unsigned int i;
+
+ wire->num = in->num;
+
+ offset = offsetof(struct ctdb_public_ip_list_wire, ip);
+ for (i=0; i<in->num; i++) {
+ ctdb_public_ip_push_old(&in->ip[i], &buf[offset]);
+ offset += ctdb_public_ip_len_old(&in->ip[i]);
+ }
+}
+
+static int ctdb_public_ip_list_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_public_ip_list **out)
+{
+ struct ctdb_public_ip_list *val;
+ struct ctdb_public_ip_list_wire *wire =
+ (struct ctdb_public_ip_list_wire *)buf;
+ size_t offset;
+ unsigned int i;
+ bool ret;
+
+ if (buflen < sizeof(uint32_t)) {
+ return EMSGSIZE;
+ }
+ if (wire->num > buflen / sizeof(struct ctdb_public_ip)) {
+ return EMSGSIZE;
+ }
+ if (sizeof(uint32_t) + wire->num * sizeof(struct ctdb_public_ip) <
+ sizeof(uint32_t)) {
+ return EMSGSIZE;
+ }
+ if (buflen < sizeof(uint32_t) +
+ wire->num * sizeof(struct ctdb_public_ip)) {
+ return EMSGSIZE;
+ }
+
+ val = talloc(mem_ctx, struct ctdb_public_ip_list);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ val->num = wire->num;
+ if (wire->num == 0) {
+ val->ip = NULL;
+ *out = val;
+ return 0;
+ }
+ val->ip = talloc_array(val, struct ctdb_public_ip, wire->num);
+ if (val->ip == NULL) {
+ talloc_free(val);
+ return ENOMEM;
+ }
+
+ offset = offsetof(struct ctdb_public_ip_list_wire, ip);
+ for (i=0; i<wire->num; i++) {
+ ret = ctdb_public_ip_pull_elems_old(&buf[offset],
+ buflen-offset,
+ val->ip,
+ &val->ip[i]);
+ if (ret != 0) {
+ talloc_free(val);
+ return ret;
+ }
+ offset += ctdb_public_ip_len_old(&val->ip[i]);
+ }
+
+ *out = val;
+ return 0;
+}
+
+static size_t ctdb_node_and_flags_len_old(struct ctdb_node_and_flags *in)
+{
+ return sizeof(struct ctdb_node_and_flags);
+}
+
+static void ctdb_node_and_flags_push_old(struct ctdb_node_and_flags *in,
+ uint8_t *buf)
+{
+ memcpy(buf, in, sizeof(struct ctdb_node_and_flags));
+}
+
+static int ctdb_node_and_flags_pull_elems_old(TALLOC_CTX *mem_ctx,
+ uint8_t *buf, size_t buflen,
+ struct ctdb_node_and_flags *out)
+{
+ if (buflen < sizeof(struct ctdb_node_and_flags)) {
+ return EMSGSIZE;
+ }
+
+ memcpy(out, buf, sizeof(struct ctdb_node_and_flags));
+
+ return 0;
+}
+
+static int ctdb_node_and_flags_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_node_and_flags **out)
+{
+ struct ctdb_node_and_flags *val;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_node_and_flags);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_node_and_flags_pull_elems_old(val, buf, buflen, val);
+ if (ret != 0) {
+ TALLOC_FREE(val);
+ return ret;
+ }
+
+ *out = val;
+ return ret;
+}
+
+struct ctdb_node_map_wire {
+ uint32_t num;
+ struct ctdb_node_and_flags node[1];
+};
+
+static size_t ctdb_node_map_len_old(struct ctdb_node_map *in)
+{
+ return sizeof(uint32_t) +
+ in->num * sizeof(struct ctdb_node_and_flags);
+}
+
+static void ctdb_node_map_push_old(struct ctdb_node_map *in, uint8_t *buf)
+{
+ struct ctdb_node_map_wire *wire = (struct ctdb_node_map_wire *)buf;
+ size_t offset;
+ unsigned int i;
+
+ wire->num = in->num;
+
+ offset = offsetof(struct ctdb_node_map_wire, node);
+ for (i=0; i<in->num; i++) {
+ ctdb_node_and_flags_push_old(&in->node[i], &buf[offset]);
+ offset += ctdb_node_and_flags_len_old(&in->node[i]);
+ }
+}
+
+static int ctdb_node_map_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_node_map **out)
+{
+ struct ctdb_node_map *val;
+ struct ctdb_node_map_wire *wire = (struct ctdb_node_map_wire *)buf;
+ size_t offset;
+ unsigned int i;
+ bool ret;
+
+ if (buflen < sizeof(uint32_t)) {
+ return EMSGSIZE;
+ }
+ if (wire->num > buflen / sizeof(struct ctdb_node_and_flags)) {
+ return EMSGSIZE;
+ }
+ if (sizeof(uint32_t) + wire->num * sizeof(struct ctdb_node_and_flags) <
+ sizeof(uint32_t)) {
+ return EMSGSIZE;
+ }
+ if (buflen < sizeof(uint32_t) +
+ wire->num * sizeof(struct ctdb_node_and_flags)) {
+ return EMSGSIZE;
+ }
+
+ val = talloc(mem_ctx, struct ctdb_node_map);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ val->num = wire->num;
+ val->node = talloc_array(val, struct ctdb_node_and_flags, wire->num);
+ if (val->node == NULL) {
+ talloc_free(val);
+ return ENOMEM;
+ }
+
+ offset = offsetof(struct ctdb_node_map_wire, node);
+ for (i=0; i<wire->num; i++) {
+ ret = ctdb_node_and_flags_pull_elems_old(val->node,
+ &buf[offset],
+ buflen-offset,
+ &val->node[i]);
+ if (ret != 0) {
+ talloc_free(val);
+ return ret;
+ }
+ offset += ctdb_node_and_flags_len_old(&val->node[i]);
+ }
+
+ *out = val;
+ return 0;
+}
+
+static size_t ctdb_script_len_old(struct ctdb_script *in)
+{
+ return sizeof(struct ctdb_script);
+}
+
+static void ctdb_script_push_old(struct ctdb_script *in, uint8_t *buf)
+{
+ memcpy(buf, in, sizeof(struct ctdb_script));
+}
+
+static int ctdb_script_pull_elems_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_script *out)
+{
+ if (buflen < sizeof(struct ctdb_script)) {
+ return EMSGSIZE;
+ }
+
+ memcpy(out, buf, sizeof(struct ctdb_script));
+
+ return 0;
+}
+
+static int ctdb_script_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx, struct ctdb_script **out)
+{
+ struct ctdb_script *val;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_script);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_script_pull_elems_old(buf, buflen, val, val);
+ if (ret != 0) {
+ TALLOC_FREE(val);
+ return ret;
+ }
+
+ *out = val;
+ return ret;
+}
+
+struct ctdb_script_list_wire {
+ uint32_t num_scripts;
+ struct ctdb_script script[1];
+};
+
+static size_t ctdb_script_list_len_old(struct ctdb_script_list *in)
+{
+ unsigned int i;
+ size_t len;
+
+ if (in == NULL) {
+ return 0;
+ }
+
+ len = offsetof(struct ctdb_script_list_wire, script);
+ for (i=0; i<in->num_scripts; i++) {
+ len += ctdb_script_len_old(&in->script[i]);
+ }
+ return len;
+}
+
+static void ctdb_script_list_push_old(struct ctdb_script_list *in,
+ uint8_t *buf)
+{
+ struct ctdb_script_list_wire *wire =
+ (struct ctdb_script_list_wire *)buf;
+ size_t offset;
+ unsigned int i;
+
+ if (in == NULL) {
+ return;
+ }
+
+ wire->num_scripts = in->num_scripts;
+
+ offset = offsetof(struct ctdb_script_list_wire, script);
+ for (i=0; i<in->num_scripts; i++) {
+ ctdb_script_push_old(&in->script[i], &buf[offset]);
+ offset += ctdb_script_len_old(&in->script[i]);
+ }
+}
+
+static int ctdb_script_list_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_script_list **out)
+{
+ struct ctdb_script_list *val;
+ struct ctdb_script_list_wire *wire =
+ (struct ctdb_script_list_wire *)buf;
+ size_t offset;
+ unsigned int i;
+ bool ret;
+
+ /* If event scripts have never been run, the result will be NULL */
+ if (buflen == 0) {
+ *out = NULL;
+ return 0;
+ }
+
+ offset = offsetof(struct ctdb_script_list_wire, script);
+
+ if (buflen < offset) {
+ return EMSGSIZE;
+ }
+ if (wire->num_scripts > buflen / sizeof(struct ctdb_script)) {
+ return EMSGSIZE;
+ }
+ if (offset + wire->num_scripts * sizeof(struct ctdb_script) < offset) {
+ return EMSGSIZE;
+ }
+ if (buflen < offset + wire->num_scripts * sizeof(struct ctdb_script)) {
+ return EMSGSIZE;
+ }
+
+ val = talloc(mem_ctx, struct ctdb_script_list);
+ if (val == NULL) {
+ return ENOMEM;
+
+ }
+
+ val->num_scripts = wire->num_scripts;
+ val->script = talloc_array(val, struct ctdb_script, wire->num_scripts);
+ if (val->script == NULL) {
+ talloc_free(val);
+ return ENOMEM;
+ }
+
+ for (i=0; i<wire->num_scripts; i++) {
+ ret = ctdb_script_pull_elems_old(&buf[offset], buflen-offset,
+ val->script,
+ &val->script[i]);
+ if (ret != 0) {
+ talloc_free(val);
+ return ret;
+ }
+ offset += ctdb_script_len_old(&val->script[i]);
+ }
+
+ *out = val;
+ return 0;
+}
+
+static size_t ctdb_ban_state_len_old(struct ctdb_ban_state *in)
+{
+ return sizeof(struct ctdb_ban_state);
+}
+
+static void ctdb_ban_state_push_old(struct ctdb_ban_state *in, uint8_t *buf)
+{
+ memcpy(buf, in, sizeof(struct ctdb_ban_state));
+}
+
+static int ctdb_ban_state_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_ban_state **out)
+{
+ struct ctdb_ban_state *val;
+
+ if (buflen < sizeof(struct ctdb_ban_state)) {
+ return EMSGSIZE;
+ }
+
+ val = talloc_memdup(mem_ctx, buf, sizeof(struct ctdb_ban_state));
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ *out = val;
+ return 0;
+}
+
+struct ctdb_notify_data_wire {
+ uint64_t srvid;
+ uint32_t len;
+ uint8_t data[1];
+};
+
+static size_t ctdb_notify_data_len_old(struct ctdb_notify_data *in)
+{
+ return offsetof(struct ctdb_notify_data_wire, data) +
+ in->data.dsize;
+}
+
+static void ctdb_notify_data_push_old(struct ctdb_notify_data *in,
+ uint8_t *buf)
+{
+ struct ctdb_notify_data_wire *wire =
+ (struct ctdb_notify_data_wire *)buf;
+
+ wire->srvid = in->srvid;
+ wire->len = in->data.dsize;
+ memcpy(wire->data, in->data.dptr, in->data.dsize);
+}
+
+static int ctdb_notify_data_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_notify_data **out)
+{
+ struct ctdb_notify_data *val;
+ struct ctdb_notify_data_wire *wire =
+ (struct ctdb_notify_data_wire *)buf;
+
+ if (buflen < offsetof(struct ctdb_notify_data_wire, data)) {
+ return EMSGSIZE;
+ }
+ if (wire->len > buflen) {
+ return EMSGSIZE;
+ }
+ if (offsetof(struct ctdb_notify_data_wire, data) + wire->len <
+ offsetof(struct ctdb_notify_data_wire, data)) {
+ return EMSGSIZE;
+ }
+ if (buflen < offsetof(struct ctdb_notify_data_wire, data) + wire->len) {
+ return EMSGSIZE;
+ }
+
+ val = talloc(mem_ctx, struct ctdb_notify_data);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ val->srvid = wire->srvid;
+ val->data.dsize = wire->len;
+ val->data.dptr = talloc_memdup(val, wire->data, wire->len);
+ if (val->data.dptr == NULL) {
+ talloc_free(val);
+ return ENOMEM;
+ }
+
+ *out = val;
+ return 0;
+}
+
+static size_t ctdb_iface_len_old(struct ctdb_iface *in)
+{
+ return sizeof(struct ctdb_iface);
+}
+
+static void ctdb_iface_push_old(struct ctdb_iface *in, uint8_t *buf)
+{
+ memcpy(buf, in, sizeof(struct ctdb_iface));
+}
+
+static int ctdb_iface_pull_elems_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_iface *out)
+{
+ if (buflen < sizeof(struct ctdb_iface)) {
+ return EMSGSIZE;
+ }
+
+ memcpy(out, buf, sizeof(struct ctdb_iface));
+
+ return 0;
+}
+
+static int ctdb_iface_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx, struct ctdb_iface **out)
+{
+ struct ctdb_iface *val;
+ int ret;
+
+ val = talloc(mem_ctx, struct ctdb_iface);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_iface_pull_elems_old(buf, buflen, val, val);
+ if (ret != 0) {
+ TALLOC_FREE(val);
+ return ret;
+ }
+
+ *out = val;
+ return ret;
+}
+
+struct ctdb_iface_list_wire {
+ uint32_t num;
+ struct ctdb_iface iface[1];
+};
+
+static size_t ctdb_iface_list_len_old(struct ctdb_iface_list *in)
+{
+ return sizeof(uint32_t) +
+ in->num * sizeof(struct ctdb_iface);
+}
+
+static void ctdb_iface_list_push_old(struct ctdb_iface_list *in, uint8_t *buf)
+{
+ struct ctdb_iface_list_wire *wire =
+ (struct ctdb_iface_list_wire *)buf;
+
+ wire->num = in->num;
+ memcpy(wire->iface, in->iface, in->num * sizeof(struct ctdb_iface));
+}
+
+static int ctdb_iface_list_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_iface_list **out)
+{
+ struct ctdb_iface_list *val;
+ struct ctdb_iface_list_wire *wire =
+ (struct ctdb_iface_list_wire *)buf;
+
+ if (buflen < sizeof(uint32_t)) {
+ return EMSGSIZE;
+ }
+ if (wire->num > buflen / sizeof(struct ctdb_iface)) {
+ return EMSGSIZE;
+ }
+ if (sizeof(uint32_t) + wire->num * sizeof(struct ctdb_iface) <
+ sizeof(uint32_t)) {
+ return EMSGSIZE;
+ }
+ if (buflen < sizeof(uint32_t) + wire->num * sizeof(struct ctdb_iface)) {
+ return EMSGSIZE;
+ }
+
+ val = talloc(mem_ctx, struct ctdb_iface_list);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ val->num = wire->num;
+ val->iface = talloc_array(val, struct ctdb_iface, wire->num);
+ if (val->iface == NULL) {
+ talloc_free(val);
+ return ENOMEM;
+ }
+
+ memcpy(val->iface, wire->iface, wire->num * sizeof(struct ctdb_iface));
+
+ *out = val;
+ return 0;
+}
+
+struct ctdb_public_ip_info_wire {
+ struct ctdb_public_ip ip;
+ uint32_t active_idx;
+ uint32_t num;
+ struct ctdb_iface ifaces[1];
+};
+
+static size_t ctdb_public_ip_info_len_old(struct ctdb_public_ip_info *in)
+{
+ return offsetof(struct ctdb_public_ip_info_wire, num) +
+ ctdb_iface_list_len_old(in->ifaces);
+}
+
+static void ctdb_public_ip_info_push_old(struct ctdb_public_ip_info *in,
+ uint8_t *buf)
+{
+ struct ctdb_public_ip_info_wire *wire =
+ (struct ctdb_public_ip_info_wire *)buf;
+ size_t offset;
+
+ offset = offsetof(struct ctdb_public_ip_info_wire, num);
+ memcpy(wire, in, offset);
+ wire->num = in->ifaces->num;
+ memcpy(wire->ifaces, in->ifaces->iface,
+ in->ifaces->num * sizeof(struct ctdb_iface));
+}
+
+static int ctdb_public_ip_info_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_public_ip_info **out)
+{
+ struct ctdb_public_ip_info *val;
+ struct ctdb_public_ip_info_wire *wire =
+ (struct ctdb_public_ip_info_wire *)buf;
+
+ if (buflen < offsetof(struct ctdb_public_ip_info_wire, ifaces)) {
+ return EMSGSIZE;
+ }
+ if (wire->num > buflen / sizeof(struct ctdb_iface)) {
+ return EMSGSIZE;
+ }
+ if (offsetof(struct ctdb_public_ip_info_wire, ifaces) +
+ wire->num * sizeof(struct ctdb_iface) <
+ offsetof(struct ctdb_public_ip_info_wire, ifaces)) {
+ return EMSGSIZE;
+ }
+ if (buflen < offsetof(struct ctdb_public_ip_info_wire, ifaces) +
+ wire->num * sizeof(struct ctdb_iface)) {
+ return EMSGSIZE;
+ }
+
+ val = talloc(mem_ctx, struct ctdb_public_ip_info);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ memcpy(val, wire, offsetof(struct ctdb_public_ip_info_wire, num));
+
+ val->ifaces = talloc(val, struct ctdb_iface_list);
+ if (val->ifaces == NULL) {
+ talloc_free(val);
+ return ENOMEM;
+ }
+
+ val->ifaces->num = wire->num;
+ val->ifaces->iface = talloc_array(val->ifaces, struct ctdb_iface,
+ wire->num);
+ if (val->ifaces->iface == NULL) {
+ talloc_free(val);
+ return ENOMEM;
+ }
+
+ memcpy(val->ifaces->iface, wire->ifaces,
+ wire->num * sizeof(struct ctdb_iface));
+
+ *out = val;
+ return 0;
+}
+
+struct ctdb_statistics_list_wire {
+ uint32_t num;
+ struct ctdb_statistics stats[1];
+};
+
+static size_t ctdb_statistics_list_len_old(struct ctdb_statistics_list *in)
+{
+ return offsetof(struct ctdb_statistics_list_wire, stats) +
+ in->num * sizeof(struct ctdb_statistics);
+}
+
+static void ctdb_statistics_list_push_old(struct ctdb_statistics_list *in,
+ uint8_t *buf)
+{
+ struct ctdb_statistics_list_wire *wire =
+ (struct ctdb_statistics_list_wire *)buf;
+
+ wire->num = in->num;
+ memcpy(wire->stats, in->stats,
+ in->num * sizeof(struct ctdb_statistics));
+}
+
+static int ctdb_statistics_list_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_statistics_list **out)
+{
+ struct ctdb_statistics_list *val;
+ struct ctdb_statistics_list_wire *wire =
+ (struct ctdb_statistics_list_wire *)buf;
+
+ if (buflen < offsetof(struct ctdb_statistics_list_wire, stats)) {
+ return EMSGSIZE;
+ }
+ if (wire->num > buflen / sizeof(struct ctdb_statistics)) {
+ return EMSGSIZE;
+ }
+ if (offsetof(struct ctdb_statistics_list_wire, stats) +
+ wire->num * sizeof(struct ctdb_statistics) <
+ offsetof(struct ctdb_statistics_list_wire, stats)) {
+ return EMSGSIZE;
+ }
+ if (buflen < offsetof(struct ctdb_statistics_list_wire, stats) +
+ wire->num * sizeof(struct ctdb_statistics)) {
+ return EMSGSIZE;
+ }
+
+ val = talloc(mem_ctx, struct ctdb_statistics_list);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ val->num = wire->num;
+
+ val->stats = talloc_array(val, struct ctdb_statistics, wire->num);
+ if (val->stats == NULL) {
+ talloc_free(val);
+ return ENOMEM;
+ }
+
+ memcpy(val->stats, wire->stats,
+ wire->num * sizeof(struct ctdb_statistics));
+
+ *out = val;
+ return 0;
+}
+
+struct ctdb_key_data_wire {
+ uint32_t db_id;
+ struct ctdb_ltdb_header header;
+ uint32_t keylen;
+ uint8_t key[1];
+};
+
+static size_t ctdb_key_data_len_old(struct ctdb_key_data *in)
+{
+ return offsetof(struct ctdb_key_data_wire, key) + in->key.dsize;
+}
+
+static void ctdb_key_data_push_old(struct ctdb_key_data *in, uint8_t *buf)
+{
+ struct ctdb_key_data_wire *wire = (struct ctdb_key_data_wire *)buf;
+
+ memcpy(wire, in, offsetof(struct ctdb_key_data, key));
+ wire->keylen = in->key.dsize;
+ memcpy(wire->key, in->key.dptr, in->key.dsize);
+}
+
+static int ctdb_key_data_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_key_data **out)
+{
+ struct ctdb_key_data *val;
+ struct ctdb_key_data_wire *wire = (struct ctdb_key_data_wire *)buf;
+
+ if (buflen < offsetof(struct ctdb_key_data_wire, key)) {
+ return EMSGSIZE;
+ }
+ if (wire->keylen > buflen) {
+ return EMSGSIZE;
+ }
+ if (offsetof(struct ctdb_key_data_wire, key) + wire->keylen <
+ offsetof(struct ctdb_key_data_wire, key)) {
+ return EMSGSIZE;
+ }
+ if (buflen < offsetof(struct ctdb_key_data_wire, key) + wire->keylen) {
+ return EMSGSIZE;
+ }
+
+ val = talloc(mem_ctx, struct ctdb_key_data);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ memcpy(val, wire, offsetof(struct ctdb_key_data, key));
+
+ val->key.dsize = wire->keylen;
+ val->key.dptr = talloc_memdup(val, wire->key, wire->keylen);
+ if (val->key.dptr == NULL) {
+ talloc_free(val);
+ return ENOMEM;
+ }
+
+ *out = val;
+ return 0;
+}
+
+struct ctdb_db_statistics_wire {
+ struct ctdb_db_statistics dbstats;
+ char hot_keys_wire[1];
+};
+
+static size_t ctdb_db_statistics_len_old(struct ctdb_db_statistics *in)
+{
+ size_t len;
+ int i;
+
+ len = sizeof(struct ctdb_db_statistics);
+ for (i=0; i<MAX_HOT_KEYS; i++) {
+ len += in->hot_keys[i].key.dsize;
+ }
+ return len;
+}
+
+static void ctdb_db_statistics_push_old(struct ctdb_db_statistics *in,
+ void *buf)
+{
+ struct ctdb_db_statistics_wire *wire =
+ (struct ctdb_db_statistics_wire *)buf;
+ size_t offset;
+ int i;
+
+ in->num_hot_keys = MAX_HOT_KEYS;
+ memcpy(wire, in, sizeof(struct ctdb_db_statistics));
+
+ offset = 0;
+ for (i=0; i<MAX_HOT_KEYS; i++) {
+ memcpy(&wire->hot_keys_wire[offset],
+ in->hot_keys[i].key.dptr,
+ in->hot_keys[i].key.dsize);
+ offset += in->hot_keys[i].key.dsize;
+ }
+}
+
+static int ctdb_db_statistics_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_db_statistics **out)
+{
+ struct ctdb_db_statistics *val;
+ struct ctdb_db_statistics_wire *wire =
+ (struct ctdb_db_statistics_wire *)buf;
+ size_t offset;
+ unsigned int i;
+
+ if (buflen < sizeof(struct ctdb_db_statistics)) {
+ return EMSGSIZE;
+ }
+
+ offset = 0;
+ for (i=0; i<wire->dbstats.num_hot_keys; i++) {
+ if (wire->dbstats.hot_keys[i].key.dsize > buflen) {
+ return EMSGSIZE;
+ }
+ if (offset + wire->dbstats.hot_keys[i].key.dsize < offset) {
+ return EMSGSIZE;
+ }
+ offset += wire->dbstats.hot_keys[i].key.dsize;
+ if (offset > buflen) {
+ return EMSGSIZE;
+ }
+ }
+ if (sizeof(struct ctdb_db_statistics) + offset <
+ sizeof(struct ctdb_db_statistics)) {
+ return EMSGSIZE;
+ }
+ if (buflen < sizeof(struct ctdb_db_statistics) + offset) {
+ return EMSGSIZE;
+ }
+
+ val = talloc(mem_ctx, struct ctdb_db_statistics);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ memcpy(val, wire, sizeof(struct ctdb_db_statistics));
+
+ offset = 0;
+ for (i=0; i<wire->dbstats.num_hot_keys; i++) {
+ uint8_t *ptr;
+ size_t key_size;
+
+ key_size = val->hot_keys[i].key.dsize;
+ ptr = talloc_memdup(mem_ctx, &wire->hot_keys_wire[offset],
+ key_size);
+ if (ptr == NULL) {
+ talloc_free(val);
+ return ENOMEM;
+ }
+ val->hot_keys[i].key.dptr = ptr;
+ offset += key_size;
+ }
+
+ *out = val;
+ return 0;
+}
+
+static size_t ctdb_election_message_len_old(struct ctdb_election_message *in)
+{
+ return sizeof(struct ctdb_election_message);
+}
+
+static void ctdb_election_message_push_old(struct ctdb_election_message *in,
+ uint8_t *buf)
+{
+ memcpy(buf, in, sizeof(struct ctdb_election_message));
+}
+
+static int ctdb_election_message_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_election_message **out)
+{
+ struct ctdb_election_message *val;
+
+ if (buflen < sizeof(struct ctdb_election_message)) {
+ return EMSGSIZE;
+ }
+
+ val = talloc_memdup(mem_ctx, buf,
+ sizeof(struct ctdb_election_message));
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ *out = val;
+ return 0;
+}
+
+static size_t ctdb_srvid_message_len_old(struct ctdb_srvid_message *in)
+{
+ return sizeof(struct ctdb_srvid_message);
+}
+
+static void ctdb_srvid_message_push_old(struct ctdb_srvid_message *in,
+ uint8_t *buf)
+{
+ memcpy(buf, in, sizeof(struct ctdb_srvid_message));
+}
+
+static int ctdb_srvid_message_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_srvid_message **out)
+{
+ struct ctdb_srvid_message *val;
+
+ if (buflen < sizeof(struct ctdb_srvid_message)) {
+ return EMSGSIZE;
+ }
+
+ val = talloc_memdup(mem_ctx, buf, sizeof(struct ctdb_srvid_message));
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ *out = val;
+ return 0;
+}
+
+static size_t ctdb_disable_message_len_old(struct ctdb_disable_message *in)
+{
+ return sizeof(struct ctdb_disable_message);
+}
+
+static void ctdb_disable_message_push_old(struct ctdb_disable_message *in,
+ uint8_t *buf)
+{
+ memcpy(buf, in, sizeof(struct ctdb_disable_message));
+}
+
+static int ctdb_disable_message_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_disable_message **out)
+{
+ struct ctdb_disable_message *val;
+
+ if (buflen < sizeof(struct ctdb_disable_message)) {
+ return EMSGSIZE;
+ }
+
+ val = talloc_memdup(mem_ctx, buf, sizeof(struct ctdb_disable_message));
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ *out = val;
+ return 0;
+}
+
+static size_t ctdb_server_id_len_old(struct ctdb_server_id *in)
+{
+ return sizeof(struct ctdb_server_id);
+}
+
+static void ctdb_server_id_push_old(struct ctdb_server_id *in, uint8_t *buf)
+{
+ memcpy(buf, in, sizeof(struct ctdb_server_id));
+}
+
+static int ctdb_server_id_pull_old(uint8_t *buf, size_t buflen,
+ struct ctdb_server_id *out)
+{
+ if (buflen < sizeof(struct ctdb_server_id)) {
+ return EMSGSIZE;
+ }
+
+ memcpy(out, buf, sizeof(struct ctdb_server_id));
+ return 0;
+}
+
+static size_t ctdb_g_lock_len_old(struct ctdb_g_lock *in)
+{
+ return sizeof(struct ctdb_g_lock);
+}
+
+static void ctdb_g_lock_push_old(struct ctdb_g_lock *in, uint8_t *buf)
+{
+ memcpy(buf, in, sizeof(struct ctdb_g_lock));
+}
+
+static int ctdb_g_lock_pull_old(uint8_t *buf, size_t buflen,
+ struct ctdb_g_lock *out)
+{
+ if (buflen < sizeof(struct ctdb_g_lock)) {
+ return EMSGSIZE;
+ }
+
+ memcpy(out, buf, sizeof(struct ctdb_g_lock));
+ return 0;
+}
+
+static size_t ctdb_g_lock_list_len_old(struct ctdb_g_lock_list *in)
+{
+ return in->num * sizeof(struct ctdb_g_lock);
+}
+
+static void ctdb_g_lock_list_push_old(struct ctdb_g_lock_list *in,
+ uint8_t *buf)
+{
+ size_t offset = 0;
+ unsigned int i;
+
+ for (i=0; i<in->num; i++) {
+ ctdb_g_lock_push_old(&in->lock[i], &buf[offset]);
+ offset += sizeof(struct ctdb_g_lock);
+ }
+}
+
+static int ctdb_g_lock_list_pull_old(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_g_lock_list **out)
+{
+ struct ctdb_g_lock_list *val;
+ unsigned count;
+ size_t offset;
+ unsigned int i;
+ int ret;
+
+ val = talloc_zero(mem_ctx, struct ctdb_g_lock_list);
+ if (val == NULL) {
+ return ENOMEM;
+ }
+
+ count = buflen / sizeof(struct ctdb_g_lock);
+ val->lock = talloc_array(val, struct ctdb_g_lock, count);
+ if (val->lock == NULL) {
+ talloc_free(val);
+ return ENOMEM;
+ }
+
+ offset = 0;
+ for (i=0; i<count; i++) {
+ ret = ctdb_g_lock_pull_old(&buf[offset], buflen-offset,
+ &val->lock[i]);
+ if (ret != 0) {
+ talloc_free(val);
+ return ret;
+ }
+ offset += sizeof(struct ctdb_g_lock);
+ }
+
+ val->num = count;
+
+ *out = val;
+ return 0;
+}
+
+COMPAT_TYPE3_TEST(struct ctdb_statistics, ctdb_statistics);
+COMPAT_TYPE3_TEST(struct ctdb_vnn_map, ctdb_vnn_map);
+COMPAT_TYPE3_TEST(struct ctdb_dbid_map, ctdb_dbid_map);
+COMPAT_TYPE3_TEST(struct ctdb_pulldb, ctdb_pulldb);
+COMPAT_TYPE3_TEST(struct ctdb_pulldb_ext, ctdb_pulldb_ext);
+
+COMPAT_TYPE1_TEST(struct ctdb_ltdb_header, ctdb_ltdb_header);
+
+COMPAT_TYPE3_TEST(struct ctdb_rec_data, ctdb_rec_data);
+COMPAT_TYPE3_TEST(struct ctdb_rec_buffer, ctdb_rec_buffer);
+COMPAT_TYPE3_TEST(struct ctdb_traverse_start, ctdb_traverse_start);
+COMPAT_TYPE3_TEST(struct ctdb_traverse_all, ctdb_traverse_all);
+COMPAT_TYPE3_TEST(struct ctdb_traverse_start_ext, ctdb_traverse_start_ext);
+COMPAT_TYPE3_TEST(struct ctdb_traverse_all_ext, ctdb_traverse_all_ext);
+COMPAT_TYPE3_TEST(ctdb_sock_addr, ctdb_sock_addr);
+COMPAT_TYPE3_TEST(struct ctdb_connection, ctdb_connection);
+COMPAT_TYPE3_TEST(struct ctdb_tunable, ctdb_tunable);
+COMPAT_TYPE3_TEST(struct ctdb_node_flag_change, ctdb_node_flag_change);
+COMPAT_TYPE3_TEST(struct ctdb_var_list, ctdb_var_list);
+COMPAT_TYPE3_TEST(struct ctdb_tunable_list, ctdb_tunable_list);
+COMPAT_TYPE3_TEST(struct ctdb_tickle_list, ctdb_tickle_list);
+COMPAT_TYPE3_TEST(struct ctdb_addr_info, ctdb_addr_info);
+COMPAT_TYPE3_TEST(struct ctdb_transdb, ctdb_transdb);
+COMPAT_TYPE3_TEST(struct ctdb_uptime, ctdb_uptime);
+COMPAT_TYPE3_TEST(struct ctdb_public_ip, ctdb_public_ip);
+COMPAT_TYPE3_TEST(struct ctdb_public_ip_list, ctdb_public_ip_list);
+COMPAT_TYPE3_TEST(struct ctdb_node_and_flags, ctdb_node_and_flags);
+COMPAT_TYPE3_TEST(struct ctdb_node_map, ctdb_node_map);
+COMPAT_TYPE3_TEST(struct ctdb_script, ctdb_script);
+COMPAT_TYPE3_TEST(struct ctdb_script_list, ctdb_script_list);
+COMPAT_TYPE3_TEST(struct ctdb_ban_state, ctdb_ban_state);
+COMPAT_TYPE3_TEST(struct ctdb_notify_data, ctdb_notify_data);
+COMPAT_TYPE3_TEST(struct ctdb_iface, ctdb_iface);
+COMPAT_TYPE3_TEST(struct ctdb_iface_list, ctdb_iface_list);
+COMPAT_TYPE3_TEST(struct ctdb_public_ip_info, ctdb_public_ip_info);
+COMPAT_TYPE3_TEST(struct ctdb_statistics_list, ctdb_statistics_list);
+COMPAT_TYPE3_TEST(struct ctdb_key_data, ctdb_key_data);
+COMPAT_TYPE3_TEST(struct ctdb_db_statistics, ctdb_db_statistics);
+
+COMPAT_TYPE3_TEST(struct ctdb_election_message, ctdb_election_message);
+COMPAT_TYPE3_TEST(struct ctdb_srvid_message, ctdb_srvid_message);
+COMPAT_TYPE3_TEST(struct ctdb_disable_message, ctdb_disable_message);
+
+COMPAT_TYPE1_TEST(struct ctdb_server_id, ctdb_server_id);
+COMPAT_TYPE1_TEST(struct ctdb_g_lock, ctdb_g_lock);
+
+COMPAT_TYPE3_TEST(struct ctdb_g_lock_list, ctdb_g_lock_list);
+
+static void protocol_types_compat_test(void)
+{
+ COMPAT_TEST_FUNC(ctdb_statistics)();
+ COMPAT_TEST_FUNC(ctdb_vnn_map)();
+ COMPAT_TEST_FUNC(ctdb_dbid_map)();
+ COMPAT_TEST_FUNC(ctdb_pulldb)();
+ COMPAT_TEST_FUNC(ctdb_pulldb_ext)();
+ COMPAT_TEST_FUNC(ctdb_ltdb_header)();
+ COMPAT_TEST_FUNC(ctdb_rec_data)();
+ COMPAT_TEST_FUNC(ctdb_rec_buffer)();
+ COMPAT_TEST_FUNC(ctdb_traverse_start)();
+ COMPAT_TEST_FUNC(ctdb_traverse_all)();
+ COMPAT_TEST_FUNC(ctdb_traverse_start_ext)();
+ COMPAT_TEST_FUNC(ctdb_traverse_all_ext)();
+ COMPAT_TEST_FUNC(ctdb_sock_addr)();
+ COMPAT_TEST_FUNC(ctdb_connection)();
+ COMPAT_TEST_FUNC(ctdb_tunable)();
+ COMPAT_TEST_FUNC(ctdb_node_flag_change)();
+ COMPAT_TEST_FUNC(ctdb_var_list)();
+ COMPAT_TEST_FUNC(ctdb_tunable_list)();
+ COMPAT_TEST_FUNC(ctdb_tickle_list)();
+ COMPAT_TEST_FUNC(ctdb_addr_info)();
+ COMPAT_TEST_FUNC(ctdb_transdb)();
+ COMPAT_TEST_FUNC(ctdb_uptime)();
+ COMPAT_TEST_FUNC(ctdb_public_ip)();
+ COMPAT_TEST_FUNC(ctdb_public_ip_list)();
+ COMPAT_TEST_FUNC(ctdb_node_and_flags)();
+ COMPAT_TEST_FUNC(ctdb_node_map)();
+ COMPAT_TEST_FUNC(ctdb_script)();
+ COMPAT_TEST_FUNC(ctdb_script_list)();
+ COMPAT_TEST_FUNC(ctdb_ban_state)();
+ COMPAT_TEST_FUNC(ctdb_notify_data)();
+ COMPAT_TEST_FUNC(ctdb_iface)();
+ COMPAT_TEST_FUNC(ctdb_iface_list)();
+ COMPAT_TEST_FUNC(ctdb_public_ip_info)();
+ COMPAT_TEST_FUNC(ctdb_statistics_list)();
+ COMPAT_TEST_FUNC(ctdb_key_data)();
+ COMPAT_TEST_FUNC(ctdb_db_statistics)();
+
+ COMPAT_TEST_FUNC(ctdb_election_message)();
+ COMPAT_TEST_FUNC(ctdb_srvid_message)();
+ COMPAT_TEST_FUNC(ctdb_disable_message)();
+ COMPAT_TEST_FUNC(ctdb_server_id)();
+ COMPAT_TEST_FUNC(ctdb_g_lock)();
+ COMPAT_TEST_FUNC(ctdb_g_lock_list)();
+}
+
+int main(int argc, const char *argv[])
+{
+ protocol_test_iterate(argc, argv, protocol_types_compat_test);
+ return 0;
+}
diff --git a/ctdb/tests/src/protocol_types_test.c b/ctdb/tests/src/protocol_types_test.c
new file mode 100644
index 0000000..f4a3048
--- /dev/null
+++ b/ctdb/tests/src/protocol_types_test.c
@@ -0,0 +1,194 @@
+/*
+ protocol types tests
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+
+#include <assert.h>
+
+#include "protocol/protocol_basic.c"
+#include "protocol/protocol_types.c"
+#include "protocol/protocol_sock.c"
+
+#include "tests/src/protocol_common.h"
+
+PROTOCOL_TYPE2_TEST(TDB_DATA, ctdb_tdb_data);
+PROTOCOL_TYPE2_TEST(TDB_DATA, ctdb_tdb_datan);
+PROTOCOL_TYPE1_TEST(struct ctdb_latency_counter, ctdb_latency_counter);
+
+PROTOCOL_TYPE3_TEST(struct ctdb_statistics, ctdb_statistics);
+PROTOCOL_TYPE3_TEST(struct ctdb_vnn_map, ctdb_vnn_map);
+PROTOCOL_TYPE3_TEST(struct ctdb_dbid, ctdb_dbid);
+PROTOCOL_TYPE3_TEST(struct ctdb_dbid_map, ctdb_dbid_map);
+PROTOCOL_TYPE3_TEST(struct ctdb_pulldb, ctdb_pulldb);
+PROTOCOL_TYPE3_TEST(struct ctdb_pulldb_ext, ctdb_pulldb_ext);
+PROTOCOL_TYPE3_TEST(struct ctdb_db_vacuum, ctdb_db_vacuum);
+PROTOCOL_TYPE3_TEST(struct ctdb_echo_data, ctdb_echo_data);
+PROTOCOL_TYPE1_TEST(struct ctdb_ltdb_header, ctdb_ltdb_header);
+PROTOCOL_TYPE3_TEST(struct ctdb_rec_data, ctdb_rec_data);
+PROTOCOL_TYPE3_TEST(struct ctdb_rec_buffer, ctdb_rec_buffer);
+PROTOCOL_TYPE3_TEST(struct ctdb_traverse_start, ctdb_traverse_start);
+PROTOCOL_TYPE3_TEST(struct ctdb_traverse_all, ctdb_traverse_all);
+PROTOCOL_TYPE3_TEST(struct ctdb_traverse_start_ext, ctdb_traverse_start_ext);
+PROTOCOL_TYPE3_TEST(struct ctdb_traverse_all_ext, ctdb_traverse_all_ext);
+PROTOCOL_TYPE3_TEST(ctdb_sock_addr, ctdb_sock_addr);
+PROTOCOL_TYPE3_TEST(struct ctdb_connection, ctdb_connection);
+PROTOCOL_TYPE3_TEST(struct ctdb_connection_list, ctdb_connection_list);
+PROTOCOL_TYPE3_TEST(struct ctdb_tunable, ctdb_tunable);
+PROTOCOL_TYPE3_TEST(struct ctdb_node_flag_change, ctdb_node_flag_change);
+PROTOCOL_TYPE3_TEST(struct ctdb_var_list, ctdb_var_list);
+PROTOCOL_TYPE3_TEST(struct ctdb_tunable_list, ctdb_tunable_list);
+PROTOCOL_TYPE3_TEST(struct ctdb_tickle_list, ctdb_tickle_list);
+PROTOCOL_TYPE3_TEST(struct ctdb_addr_info, ctdb_addr_info);
+PROTOCOL_TYPE3_TEST(struct ctdb_transdb, ctdb_transdb);
+PROTOCOL_TYPE3_TEST(struct ctdb_uptime, ctdb_uptime);
+PROTOCOL_TYPE3_TEST(struct ctdb_public_ip, ctdb_public_ip);
+PROTOCOL_TYPE3_TEST(struct ctdb_public_ip_list, ctdb_public_ip_list);
+PROTOCOL_TYPE3_TEST(struct ctdb_node_and_flags, ctdb_node_and_flags);
+PROTOCOL_TYPE3_TEST(struct ctdb_node_map, ctdb_node_map);
+PROTOCOL_TYPE3_TEST(struct ctdb_script, ctdb_script);
+PROTOCOL_TYPE3_TEST(struct ctdb_script_list, ctdb_script_list);
+PROTOCOL_TYPE3_TEST(struct ctdb_ban_state, ctdb_ban_state);
+PROTOCOL_TYPE3_TEST(struct ctdb_notify_data, ctdb_notify_data);
+PROTOCOL_TYPE3_TEST(struct ctdb_iface, ctdb_iface);
+PROTOCOL_TYPE3_TEST(struct ctdb_iface_list, ctdb_iface_list);
+PROTOCOL_TYPE3_TEST(struct ctdb_public_ip_info, ctdb_public_ip_info);
+PROTOCOL_TYPE3_TEST(struct ctdb_statistics_list, ctdb_statistics_list);
+PROTOCOL_TYPE3_TEST(struct ctdb_key_data, ctdb_key_data);
+PROTOCOL_TYPE3_TEST(struct ctdb_db_statistics, ctdb_db_statistics);
+PROTOCOL_TYPE3_TEST(struct ctdb_pid_srvid, ctdb_pid_srvid);
+PROTOCOL_TYPE3_TEST(struct ctdb_election_message, ctdb_election_message);
+PROTOCOL_TYPE3_TEST(struct ctdb_srvid_message, ctdb_srvid_message);
+PROTOCOL_TYPE3_TEST(struct ctdb_disable_message, ctdb_disable_message);
+PROTOCOL_TYPE1_TEST(struct ctdb_server_id, ctdb_server_id);
+PROTOCOL_TYPE1_TEST(struct ctdb_g_lock, ctdb_g_lock);
+PROTOCOL_TYPE3_TEST(struct ctdb_g_lock_list, ctdb_g_lock_list);
+
+PROTOCOL_TYPE1_TEST(struct sock_packet_header, sock_packet_header);
+
+static void test_ctdb_rec_buffer_read_write(void)
+{
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+ struct ctdb_rec_buffer *p1, **p2;
+ const char *filename = "ctdb_rec_buffer_test.dat";
+ int count = 100;
+ int fd, i, ret;
+ off_t offset;
+
+ p1 = talloc_array(mem_ctx, struct ctdb_rec_buffer, count);
+ assert(p1 != NULL);
+ for (i=0; i<count; i++) {
+ fill_ctdb_rec_buffer(mem_ctx, &p1[i]);
+ }
+
+ fd = open(filename, O_RDWR|O_CREAT, 0600);
+ assert(fd != -1);
+ unlink(filename);
+
+ for (i=0; i<count; i++) {
+ ret = ctdb_rec_buffer_write(&p1[i], fd);
+ assert(ret == 0);
+ }
+
+ offset = lseek(fd, 0, SEEK_CUR);
+ assert(offset != -1);
+ offset = lseek(fd, -offset, SEEK_CUR);
+ assert(offset == 0);
+
+ p2 = talloc_array(mem_ctx, struct ctdb_rec_buffer *, count);
+ assert(p2 != NULL);
+
+ for (i=0; i<count; i++) {
+ ret = ctdb_rec_buffer_read(fd, mem_ctx, &p2[i]);
+ assert(ret == 0);
+ }
+
+ close(fd);
+
+ for (i=0; i<count; i++) {
+ verify_ctdb_rec_buffer(&p1[i], p2[i]);
+ }
+
+ talloc_free(mem_ctx);
+}
+
+static void protocol_types_test(void)
+{
+ TEST_FUNC(ctdb_tdb_data)();
+ TEST_FUNC(ctdb_tdb_datan)();
+ TEST_FUNC(ctdb_latency_counter)();
+
+ TEST_FUNC(ctdb_statistics)();
+ TEST_FUNC(ctdb_vnn_map)();
+ TEST_FUNC(ctdb_dbid)();
+ TEST_FUNC(ctdb_dbid_map)();
+ TEST_FUNC(ctdb_pulldb)();
+ TEST_FUNC(ctdb_pulldb_ext)();
+ TEST_FUNC(ctdb_db_vacuum)();
+ TEST_FUNC(ctdb_echo_data)();
+ TEST_FUNC(ctdb_ltdb_header)();
+ TEST_FUNC(ctdb_rec_data)();
+ TEST_FUNC(ctdb_rec_buffer)();
+ TEST_FUNC(ctdb_traverse_start)();
+ TEST_FUNC(ctdb_traverse_all)();
+ TEST_FUNC(ctdb_traverse_start_ext)();
+ TEST_FUNC(ctdb_traverse_all_ext)();
+ TEST_FUNC(ctdb_sock_addr)();
+ TEST_FUNC(ctdb_connection)();
+ TEST_FUNC(ctdb_connection_list)();
+ TEST_FUNC(ctdb_tunable)();
+ TEST_FUNC(ctdb_node_flag_change)();
+ TEST_FUNC(ctdb_var_list)();
+ TEST_FUNC(ctdb_tunable_list)();
+ TEST_FUNC(ctdb_tickle_list)();
+ TEST_FUNC(ctdb_addr_info)();
+ TEST_FUNC(ctdb_transdb)();
+ TEST_FUNC(ctdb_uptime)();
+ TEST_FUNC(ctdb_public_ip)();
+ TEST_FUNC(ctdb_public_ip_list)();
+ TEST_FUNC(ctdb_node_and_flags)();
+ TEST_FUNC(ctdb_node_map)();
+ TEST_FUNC(ctdb_script)();
+ TEST_FUNC(ctdb_script_list)();
+ TEST_FUNC(ctdb_ban_state)();
+ TEST_FUNC(ctdb_notify_data)();
+ TEST_FUNC(ctdb_iface)();
+ TEST_FUNC(ctdb_iface_list)();
+ TEST_FUNC(ctdb_public_ip_info)();
+ TEST_FUNC(ctdb_statistics_list)();
+ TEST_FUNC(ctdb_key_data)();
+ TEST_FUNC(ctdb_db_statistics)();
+ TEST_FUNC(ctdb_pid_srvid)();
+ TEST_FUNC(ctdb_election_message)();
+ TEST_FUNC(ctdb_srvid_message)();
+ TEST_FUNC(ctdb_disable_message)();
+ TEST_FUNC(ctdb_server_id)();
+ TEST_FUNC(ctdb_g_lock)();
+ TEST_FUNC(ctdb_g_lock_list)();
+
+ TEST_FUNC(sock_packet_header)();
+
+ test_ctdb_rec_buffer_read_write();
+}
+
+int main(int argc, const char *argv[])
+{
+ protocol_test_iterate(argc, argv, protocol_types_test);
+ return 0;
+}
diff --git a/ctdb/tests/src/protocol_util_test.c b/ctdb/tests/src/protocol_util_test.c
new file mode 100644
index 0000000..4ffe58c
--- /dev/null
+++ b/ctdb/tests/src/protocol_util_test.c
@@ -0,0 +1,417 @@
+/*
+ protocol utilities tests
+
+ Copyright (C) Martin Schwenke 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <assert.h>
+
+#include "protocol/protocol_basic.c"
+#include "protocol/protocol_types.c"
+#include "protocol/protocol_util.c"
+
+/*
+ * Test parsing of IPs, conversion to string
+ */
+
+static void test_sock_addr_to_string(const char *ip, bool with_port)
+{
+ ctdb_sock_addr sa;
+ const char *s;
+ int ret;
+
+ ret = ctdb_sock_addr_from_string(ip, &sa, with_port);
+ assert(ret == 0);
+ s = ctdb_sock_addr_to_string(NULL, &sa, with_port);
+ assert(strcmp(ip, s) == 0);
+ talloc_free(discard_const(s));
+}
+
+static void test_sock_addr_from_string_bad(const char *ip, bool with_port)
+{
+ ctdb_sock_addr sa;
+ int ret;
+
+ ret = ctdb_sock_addr_from_string(ip, &sa, with_port);
+ assert(ret == EINVAL);
+}
+
+static void test_sock_addr_from_string_memcmp(const char *ip1,
+ const char* ip2)
+{
+ ctdb_sock_addr sa1, sa2;
+ int ret;
+
+ ret = ctdb_sock_addr_from_string(ip1, &sa1, false);
+ assert(ret == 0);
+ ret = ctdb_sock_addr_from_string(ip2, &sa2, false);
+ assert(ret == 0);
+ ret = memcmp(&sa1, &sa2, sizeof(ctdb_sock_addr));
+ assert(ret == 0);
+}
+
+static void test_sock_addr_cmp(const char *ip1, const char *ip2,
+ bool with_port, int res)
+{
+ ctdb_sock_addr sa1, sa2;
+ int ret;
+
+ ret = ctdb_sock_addr_from_string(ip1, &sa1, with_port);
+ assert(ret == 0);
+ ret = ctdb_sock_addr_from_string(ip2, &sa2, with_port);
+ assert(ret == 0);
+ ret = ctdb_sock_addr_cmp(&sa1, &sa2);
+ if (ret < 0) {
+ ret = -1;
+ } else if (ret > 0) {
+ ret = 1;
+ }
+
+ assert(ret == res);
+}
+
+/*
+ * Test parsing of IP/mask, conversion to string
+ */
+
+static void test_sock_addr_mask_from_string(const char *ip_mask)
+{
+ ctdb_sock_addr sa;
+ unsigned mask;
+ const char *s, *t;
+ int ret;
+
+ ret = ctdb_sock_addr_mask_from_string(ip_mask, &sa, &mask);
+ assert(ret == 0);
+ s = ctdb_sock_addr_to_string(NULL, &sa, false);
+ assert(s != NULL);
+ t = talloc_asprintf(s, "%s/%u", s, mask);
+ assert(strcmp(ip_mask, t) == 0);
+ talloc_free(discard_const(s));
+}
+
+static void test_sock_addr_mask_from_string_bad(const char *ip_mask)
+{
+ ctdb_sock_addr sa;
+ unsigned mask;
+ int ret;
+
+ ret = ctdb_sock_addr_mask_from_string(ip_mask, &sa, &mask);
+ assert(ret == EINVAL);
+}
+
+/*
+ * Test parsing of connection, conversion to string
+ */
+
+static void test_connection_to_string(const char *conn_str)
+{
+ TALLOC_CTX *tmp_ctx;
+ struct ctdb_connection conn;
+ const char *s, *r;
+ int ret;
+
+ tmp_ctx = talloc_new(NULL);
+ assert(tmp_ctx != NULL);
+
+ /*
+ * Test non-reversed parse and render
+ */
+
+ ret = ctdb_connection_from_string(conn_str, false, &conn);
+ assert(ret == 0);
+
+ s = ctdb_connection_to_string(tmp_ctx, &conn, false);
+ assert(s != NULL);
+ ret = strcmp(conn_str, s);
+ assert(ret == 0);
+
+ talloc_free(discard_const(s));
+
+ /*
+ * Reversed render
+ */
+ r = ctdb_connection_to_string(tmp_ctx, &conn, true);
+ assert(r != NULL);
+ ret = strcmp(conn_str, r);
+ assert(ret != 0);
+
+ /*
+ * Reversed parse with forward render
+ */
+ ret = ctdb_connection_from_string(conn_str, true, &conn);
+ assert(ret == 0);
+
+ s = ctdb_connection_to_string(tmp_ctx, &conn, false);
+ assert(s != NULL);
+ ret = strcmp(r, s);
+ assert(ret == 0);
+
+ talloc_free(discard_const(s));
+
+ /*
+ * Reversed parse and render
+ */
+ ret = ctdb_connection_from_string(conn_str, true, &conn);
+ assert(ret == 0);
+
+ s = ctdb_connection_to_string(tmp_ctx, &conn, true);
+ assert(s != NULL);
+ ret = strcmp(conn_str, s);
+ assert(ret == 0);
+
+ talloc_free(tmp_ctx);
+}
+
+static void test_connection_from_string_bad(const char *conn_str)
+{
+ struct ctdb_connection conn;
+ int ret;
+
+ ret = ctdb_connection_from_string(conn_str, false, &conn);
+ assert(ret == EINVAL);
+}
+
+/*
+ * Test connection list utilities
+ */
+
+static void test_connection_list_read(const char *s1, const char *s2)
+{
+ TALLOC_CTX *tmp_ctx;
+ int pipefd[2];
+ pid_t pid;
+ struct ctdb_connection_list *conn_list = NULL;
+ const char *t;
+ int ret;
+
+ tmp_ctx = talloc_new(NULL);
+ assert(tmp_ctx != NULL);
+
+ ret = pipe(pipefd);
+ assert(ret == 0);
+
+ pid = fork();
+ assert(pid != -1);
+
+ if (pid == 0) {
+ close(pipefd[0]);
+
+ ret = dup2(pipefd[1], STDOUT_FILENO);
+ assert(ret != -1);
+
+ close(pipefd[1]);
+
+ printf("%s", s1);
+ fflush(stdout);
+
+ exit(0);
+ }
+
+ close(pipefd[1]);
+
+ ret = ctdb_connection_list_read(tmp_ctx, pipefd[0], false, &conn_list);
+ assert(ret == 0);
+
+ close(pipefd[0]);
+
+ ret = ctdb_connection_list_sort(conn_list);
+ assert(ret == 0);
+
+ t = ctdb_connection_list_to_string(tmp_ctx, conn_list, false);
+ assert(t != NULL);
+ ret = strcmp(t, s2);
+ assert(ret == 0);
+
+ talloc_free(tmp_ctx);
+}
+
+static void test_connection_list_read_bad(const char *s1)
+{
+ TALLOC_CTX *tmp_ctx;
+ int pipefd[2];
+ pid_t pid;
+ struct ctdb_connection_list *conn_list = NULL;
+ int ret;
+
+ tmp_ctx = talloc_new(NULL);
+ assert(tmp_ctx != NULL);
+
+ ret = pipe(pipefd);
+ assert(ret == 0);
+
+ pid = fork();
+ assert(pid != -1);
+
+ if (pid == 0) {
+ close(pipefd[0]);
+
+ ret = dup2(pipefd[1], STDOUT_FILENO);
+ assert(ret != -1);
+
+ close(pipefd[1]);
+
+ printf("%s", s1);
+ fflush(stdout);
+
+ exit(0);
+ }
+
+ close(pipefd[1]);
+
+ ret = ctdb_connection_list_read(tmp_ctx, pipefd[0], false, &conn_list);
+ assert(ret == EINVAL);
+
+ close(pipefd[0]);
+
+ talloc_free(tmp_ctx);
+}
+
+/*
+ * Use macros for these to make them easy to concatenate
+ */
+
+#define CONN4 \
+"\
+127.0.0.1:12345 127.0.0.2:54321\n\
+127.0.0.2:12345 127.0.0.1:54322\n\
+127.0.0.1:12346 127.0.0.2:54323\n\
+127.0.0.2:12345 127.0.0.1:54324\n\
+127.0.0.1:12345 127.0.0.2:54325\n\
+"
+
+#define CONN4_SORT \
+"\
+127.0.0.1:12345 127.0.0.2:54321\n\
+127.0.0.1:12345 127.0.0.2:54325\n\
+127.0.0.1:12346 127.0.0.2:54323\n\
+127.0.0.2:12345 127.0.0.1:54322\n\
+127.0.0.2:12345 127.0.0.1:54324\n\
+"
+
+#define CONN6 \
+"\
+[fe80::6af7:28ff:fefa:d136]:12345 [fe80::6af7:28ff:fefa:d137]:54321\n\
+[fe80::6af7:28ff:fefa:d138]:12345 [fe80::6af7:28ff:fefa:d137]:54322\n\
+[fe80::6af7:28ff:fefa:d136]:12346 [fe80::6af7:28ff:fefa:d137]:54323\n\
+[fe80::6af7:28ff:fefa:d132]:12345 [fe80::6af7:28ff:fefa:d137]:54324\n\
+[fe80::6af7:28ff:fefa:d136]:12345 [fe80::6af7:28ff:fefa:d137]:54325\n\
+"
+
+#define CONN6_SORT \
+"\
+[fe80::6af7:28ff:fefa:d132]:12345 [fe80::6af7:28ff:fefa:d137]:54324\n\
+[fe80::6af7:28ff:fefa:d136]:12345 [fe80::6af7:28ff:fefa:d137]:54321\n\
+[fe80::6af7:28ff:fefa:d136]:12345 [fe80::6af7:28ff:fefa:d137]:54325\n\
+[fe80::6af7:28ff:fefa:d136]:12346 [fe80::6af7:28ff:fefa:d137]:54323\n\
+[fe80::6af7:28ff:fefa:d138]:12345 [fe80::6af7:28ff:fefa:d137]:54322\n\
+"
+
+int main(int argc, char *argv[])
+{
+ test_sock_addr_to_string("0.0.0.0", false);
+ test_sock_addr_to_string("127.0.0.1", false);
+ test_sock_addr_to_string("::1", false);
+ test_sock_addr_to_string("192.168.2.1", false);
+ test_sock_addr_to_string("fe80::6af7:28ff:fefa:d136", false);
+
+ test_sock_addr_to_string("0.0.0.0:0", true);
+ test_sock_addr_to_string("127.0.0.1:123", true);
+ test_sock_addr_to_string("[::1]:234", true);
+ test_sock_addr_to_string("192.168.2.1:123", true);
+ test_sock_addr_to_string("[fe80::6af7:28ff:fefa:d136]:234", true);
+
+ test_sock_addr_from_string_bad("0.0.0", false);
+ test_sock_addr_from_string_bad("0.0.0:0", true);
+ test_sock_addr_from_string_bad("fe80::6af7:28ff:fefa:d136", true);
+ test_sock_addr_from_string_bad("junk", false);
+ test_sock_addr_from_string_bad("0.0.0.0:0 trailing junk", true);
+
+ test_sock_addr_from_string_memcmp("127.0.0.1", "127.0.0.1");
+ test_sock_addr_from_string_memcmp("fe80::6af7:28ff:fefa:d136",
+ "fe80::6af7:28ff:fefa:d136");
+ test_sock_addr_from_string_memcmp("::ffff:192.0.2.128", "192.0.2.128");
+
+ test_sock_addr_cmp("127.0.0.1", "127.0.0.1" , false, 0);
+ test_sock_addr_cmp("127.0.0.1", "127.0.0.2" , false, -1);
+ test_sock_addr_cmp("127.0.0.2", "127.0.0.1" , false, 1);
+ test_sock_addr_cmp("127.0.1.2", "127.0.2.1" , false, -1);
+ test_sock_addr_cmp("127.0.2.1", "127.0.1.2" , false, 1);
+ test_sock_addr_cmp("fe80::6af7:28ff:fefa:d136", "127.0.1.2" , false, 1);
+ test_sock_addr_cmp("fe80::6af7:28ff:fefa:d136",
+ "fe80::6af7:28ff:fefa:d136" , false, 0);
+ test_sock_addr_cmp("fe80::6af7:28ff:fefa:d136",
+ "fe80::6af7:28ff:fefa:d137" , false, -1);
+ test_sock_addr_cmp("fe80::6af7:28ff:fefa:d136",
+ "fe80:0000:0000:0000:6af7:28ff:fefa:d136" ,
+ false, 0);
+ test_sock_addr_cmp("::ffff:192.0.2.128", "192.0.2.128", false, 0);
+
+ test_sock_addr_cmp("127.0.0.1:123", "127.0.0.1:124" , true, -1);
+ test_sock_addr_cmp("fe80::6af7:28ff:fefa:d136:123",
+ "fe80::6af7:28ff:fefa:d136:122" , true, 1);
+
+ /*
+ * Confirm equivalence of IPv6 sockets with and without
+ * square-brackets
+ */
+ test_sock_addr_cmp("[::1]:234", "::1:234", true, 0);
+ test_sock_addr_cmp("[fe80::6af7:28ff:fefa:d136]:234",
+ "fe80::6af7:28ff:fefa:d136:234",
+ true,
+ 0);
+ /* Check IPv4-mapped IPv6 addresses */
+ test_sock_addr_cmp("::ffff:172.16.0.27:977",
+ "172.16.0.27:977",
+ true,
+ 0);
+ test_sock_addr_cmp("[::ffff:172.16.0.27]:977",
+ "172.16.0.27:977",
+ true,
+ 0);
+
+ test_sock_addr_mask_from_string("127.0.0.1/8");
+ test_sock_addr_mask_from_string("::1/128");
+ test_sock_addr_mask_from_string("fe80::6af7:28ff:fefa:d136/64");
+ test_sock_addr_mask_from_string_bad("127.0.0.1");
+
+ test_connection_to_string("127.0.0.1:12345 127.0.0.2:54321");
+ test_connection_to_string("[fe80::6af7:28ff:fefa:d137]:12345 "
+ "[fe80::6af7:28ff:fefa:d138]:54321");
+
+ test_connection_from_string_bad("127.0.0.1:12345 127.0.0.2:");
+ test_connection_from_string_bad("127.0.0.1:12345");
+ test_connection_from_string_bad("127.0.0.1:12345 "
+ "[fe80::6af7:28ff:fefa:d136]:122");
+ test_connection_from_string_bad("Junk!");
+ test_connection_from_string_bad("More junk");
+
+ test_connection_list_read(CONN4, CONN4_SORT);
+ test_connection_list_read(CONN6, CONN6_SORT);
+ test_connection_list_read(CONN4 CONN6, CONN4_SORT CONN6_SORT);
+ test_connection_list_read(CONN4 "# Comment\n\n# Comment\n" CONN6,
+ CONN4_SORT CONN6_SORT);
+
+ test_connection_list_read_bad(CONN4 "# Comment\n\nJunk!!!\n" CONN6);
+ test_connection_list_read_bad(CONN4
+ "# Comment\n\n127.0.0.1: 127.0.0.1:124\n"
+ CONN6);
+
+ return 0;
+}
diff --git a/ctdb/tests/src/rb_test.c b/ctdb/tests/src/rb_test.c
new file mode 100644
index 0000000..d712c9a
--- /dev/null
+++ b/ctdb/tests/src/rb_test.c
@@ -0,0 +1,336 @@
+/*
+ simple rb test tool
+
+ Copyright (C) Ronnie Sahlberg 2007
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/time.h"
+
+#include <talloc.h>
+#include <assert.h>
+
+#include "lib/util/dlinklist.h"
+#include "lib/util/debug.h"
+
+#include "common/rb_tree.c"
+
+static struct timeval tp1,tp2;
+
+static void start_timer(void)
+{
+ gettimeofday(&tp1,NULL);
+}
+
+static double end_timer(void)
+{
+ gettimeofday(&tp2,NULL);
+ return (tp2.tv_sec + (tp2.tv_usec*1.0e-6)) -
+ (tp1.tv_sec + (tp1.tv_usec*1.0e-6));
+}
+
+int num_records=5;
+
+static void *callback(void *p, void *d)
+{
+ uint32_t *data = (uint32_t *)d;
+
+ if (d==NULL) {
+ data = (uint32_t *)p;
+ }
+
+ (*data)++;
+
+ return data;
+}
+
+static void *random_add(void *p, void *d)
+{
+ return p;
+}
+
+static int traverse(void *p, void *d)
+{
+ uint32_t *data = (uint32_t *)d;
+
+ printf("traverse data:%d\n",*data);
+ return 0;
+}
+
+static int random_traverse(void *p, void *d)
+{
+ printf("%s ",(char *)d);
+ return 0;
+}
+
+static uint32_t calc_checksum = 0;
+static int traverse_checksum(void *p, void *d)
+{
+ int i,j,k;
+
+ sscanf(d, "%d.%d.%d", &i, &j, &k);
+ calc_checksum += i*100+j*10+k;
+ return 0;
+}
+
+static int count_traverse(void *p, void *d)
+{
+ int *count = p;
+ (*count)++;
+ return 0;
+}
+
+static int count_traverse_abort(void *p, void *d)
+{
+ int *count = p;
+ (*count)++;
+ return -1;
+}
+
+/*
+ main program
+*/
+int main(int argc, const char *argv[])
+{
+ int traverse_count;
+ int i,j,k;
+ trbt_tree_t *tree;
+ uint32_t *data;
+ uint32_t key[3];
+ uint32_t key1[3] = {0,10,20};
+ uint32_t key2[3] = {0,10,21};
+ uint32_t key3[3] = {0,11,20};
+ uint32_t key4[3] = {2,10,20};
+ TALLOC_CTX *memctx;
+ uint32_t **u32array;
+ uint32_t checksum;
+
+ /* testing trbt_insert32_callback for num_records */
+ memctx = talloc_new(NULL);
+ assert(memctx != NULL);
+
+ u32array = talloc_array(memctx, uint32_t *, num_records);
+ assert(u32array != NULL);
+
+ tree = trbt_create(memctx, 0);
+ assert(tree != NULL);
+
+ for (i=0; i<num_records; i++) {
+ u32array[i] = talloc(u32array, uint32_t);
+ assert(u32array[i] != NULL);
+ *u32array[i] = 0;
+ trbt_insert32_callback(tree, i, callback, u32array[i]);
+ }
+ for (i=3; i<num_records; i++) {
+ trbt_insert32_callback(tree, i, callback, NULL);
+ }
+
+ /* first 3 keys should have data == 1
+ * the rest of the keys should have data == 2
+ */
+ for (i=0; i<num_records; i++) {
+ data = trbt_lookup32(tree, i);
+ assert(data != NULL);
+ if (i < 3) {
+ assert(*data == 1);
+ } else {
+ assert(*data == 2);
+ }
+ }
+
+ /* deleting key 2 */
+ talloc_free(u32array[2]);
+
+ /* deleting key 1 */
+ talloc_free(u32array[1]);
+
+ assert(talloc_total_size(memctx) == 212);
+
+ /* freeing tree */
+ talloc_free(memctx);
+
+
+ printf("testing trbt_insertarray32_callback\n");
+ memctx = talloc_new(NULL);
+ assert(memctx != NULL);
+
+ tree = trbt_create(memctx, 0);
+ assert(tree != NULL);
+
+ u32array = talloc_array(memctx, uint32_t *, 4);
+ assert(u32array != NULL);
+
+ for (i=0;i<4;i++) {
+ u32array[i] = talloc(u32array, uint32_t);
+ assert(u32array[i] != NULL);
+ *u32array[i] = 0;
+ }
+
+ trbt_insertarray32_callback(tree, 3, key1, callback, u32array[0]);
+ trbt_insertarray32_callback(tree, 3, key1, callback, u32array[0]);
+ trbt_insertarray32_callback(tree, 3, key2, callback, u32array[1]);
+ trbt_insertarray32_callback(tree, 3, key3, callback, u32array[2]);
+ trbt_insertarray32_callback(tree, 3, key2, callback, u32array[1]);
+ trbt_insertarray32_callback(tree, 3, key1, callback, u32array[0]);
+
+ data = trbt_lookuparray32(tree, 3, key1);
+ assert(data != NULL && *data == 3);
+ data = trbt_lookuparray32(tree, 3, key2);
+ assert(data != NULL && *data == 2);
+ data = trbt_lookuparray32(tree, 3, key3);
+ assert(data != NULL && *data == 1);
+ data = trbt_lookuparray32(tree, 3, key4);
+ assert(data == NULL);
+ trbt_traversearray32(tree, 3, traverse, NULL);
+
+ printf("\ndeleting key4\n");
+ talloc_free(trbt_lookuparray32(tree, 3, key4));
+
+ data = trbt_lookuparray32(tree, 3, key1);
+ assert(data != NULL && *data == 3);
+ data = trbt_lookuparray32(tree, 3, key2);
+ assert(data != NULL && *data == 2);
+ data = trbt_lookuparray32(tree, 3, key3);
+ assert(data != NULL && *data == 1);
+ data = trbt_lookuparray32(tree, 3, key4);
+ assert(data == NULL);
+ trbt_traversearray32(tree, 3, traverse, NULL);
+
+ printf("\ndeleting key2\n");
+ talloc_free(trbt_lookuparray32(tree, 3, key2));
+
+ data = trbt_lookuparray32(tree, 3, key1);
+ assert(data != NULL && *data == 3);
+ data = trbt_lookuparray32(tree, 3, key2);
+ assert(data == NULL);
+ data = trbt_lookuparray32(tree, 3, key3);
+ assert(data != NULL && *data == 1);
+ data = trbt_lookuparray32(tree, 3, key4);
+ assert(data == NULL);
+ trbt_traversearray32(tree, 3, traverse, NULL);
+
+ printf("\ndeleting key3\n");
+ talloc_free(trbt_lookuparray32(tree, 3, key3));
+
+ data = trbt_lookuparray32(tree, 3, key1);
+ assert(data != NULL && *data == 3);
+ data = trbt_lookuparray32(tree, 3, key2);
+ assert(data == NULL);
+ data = trbt_lookuparray32(tree, 3, key3);
+ assert(data == NULL);
+ data = trbt_lookuparray32(tree, 3, key4);
+ assert(data == NULL);
+ trbt_traversearray32(tree, 3, traverse, NULL);
+
+ printf("\ndeleting key1\n");
+ talloc_free(trbt_lookuparray32(tree, 3, key1));
+
+ data = trbt_lookuparray32(tree, 3, key1);
+ assert(data == NULL);
+ data = trbt_lookuparray32(tree, 3, key2);
+ assert(data == NULL);
+ data = trbt_lookuparray32(tree, 3, key3);
+ assert(data == NULL);
+ data = trbt_lookuparray32(tree, 3, key4);
+ assert(data == NULL);
+ trbt_traversearray32(tree, 3, traverse, NULL);
+
+ talloc_free(tree);
+ talloc_free(memctx);
+
+
+ printf("\nrun random insert and delete for 60 seconds\n");
+ memctx = talloc_new(NULL);
+ assert(memctx != NULL);
+
+ tree = trbt_create(memctx, 0);
+ assert(tree != NULL);
+
+ i=0;
+ start_timer();
+ checksum = 0;
+ /* Add and delete nodes from a 3 level tree for 60 seconds.
+ Each time a node is added or deleted, traverse the tree and
+ compute a checksum over the data stored in the tree and compare this
+ with a checksum we keep which contains what the checksum should be
+ */
+ while(end_timer() < 60.0){
+ char *str;
+
+ i++;
+ key[0]=random()%10;
+ key[1]=random()%10;
+ key[2]=random()%10;
+
+ if (random()%2) {
+ if (trbt_lookuparray32(tree, 3, key) == NULL) {
+ /* this node does not yet exist, add it to the
+ tree and update the checksum
+ */
+ str=talloc_asprintf(memctx, "%d.%d.%d", key[0],key[1],key[2]);
+ trbt_insertarray32_callback(tree, 3, key, random_add, str);
+ checksum += key[0]*100+key[1]*10+key[2];
+ }
+ } else {
+ if ((str=trbt_lookuparray32(tree, 3, key)) != NULL) {
+ /* this node does exist in the tree, delete
+ it and update the checksum accordingly
+ */
+ talloc_free(str);
+ checksum -= key[0]*100+key[1]*10+key[2];
+ }
+ }
+ /* traverse all nodes in the tree and calculate the checksum
+ it better match the one we keep track of in
+ 'checksum'
+ */
+ calc_checksum = 0;
+ trbt_traversearray32(tree, 3, traverse_checksum, NULL);
+ assert(checksum == calc_checksum);
+ }
+
+ /*
+ printf("\niterations passed:%d\n", i);
+ trbt_traversearray32(tree, 3, random_traverse, NULL);
+ printf("\n");
+ printf("first node: %s\n", (char *)trbt_findfirstarray32(tree, 3));
+ */
+
+ traverse_count = 0;
+ trbt_traversearray32(tree, 3, count_traverse, &traverse_count);
+ assert(traverse_count > 0);
+
+ traverse_count = 0;
+ trbt_traversearray32(tree, 3, count_traverse_abort, &traverse_count);
+ assert(traverse_count == 1);
+
+ printf("\ndeleting all entries\n");
+ for(i=0;i<10;i++){
+ for(j=0;j<10;j++){
+ for(k=0;k<10;k++){
+ key[0]=i;
+ key[1]=j;
+ key[2]=k;
+ talloc_free(trbt_lookuparray32(tree, 3, key));
+ }
+ }
+ }
+ trbt_traversearray32(tree, 3, random_traverse, NULL);
+
+ assert(talloc_total_size(memctx) == 16);
+
+ return 0;
+}
diff --git a/ctdb/tests/src/reqid_test.c b/ctdb/tests/src/reqid_test.c
new file mode 100644
index 0000000..2a0828c
--- /dev/null
+++ b/ctdb/tests/src/reqid_test.c
@@ -0,0 +1,89 @@
+/*
+ reqid tests
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <talloc.h>
+#include <assert.h>
+
+#include "common/reqid.c"
+
+
+int main(void)
+{
+ struct reqid_context *reqid_ctx;
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+ int i, ret;
+ uint32_t reqid;
+ int *data, *tmp;
+
+ ret = reqid_init(mem_ctx, INT_MAX-200, &reqid_ctx);
+ assert(ret == 0);
+
+ data = talloc_zero(mem_ctx, int);
+ assert(data != 0);
+
+ for (i=0; i<1024*1024; i++) {
+ reqid = reqid_new(reqid_ctx, data);
+ assert(reqid != REQID_INVALID);
+ }
+
+ for (i=0; i<1024; i++) {
+ tmp = reqid_find(reqid_ctx, i, int);
+ assert(tmp == data);
+ }
+
+ for (i=0; i<1024; i++) {
+ ret = reqid_remove(reqid_ctx, i);
+ assert(ret == 0);
+ }
+
+ for (i=0; i<1024; i++) {
+ tmp = reqid_find(reqid_ctx, i, int);
+ assert(tmp == NULL);
+ }
+
+ for (i=0; i<1024; i++) {
+ ret = reqid_remove(reqid_ctx, i);
+ assert(ret == ENOENT);
+ }
+
+ talloc_free(reqid_ctx);
+ assert(talloc_get_size(mem_ctx) == 0);
+
+ ret = reqid_init(mem_ctx, INT_MAX-1, &reqid_ctx);
+ assert(ret == 0);
+
+ reqid = reqid_new(reqid_ctx, data);
+ assert(reqid == INT_MAX);
+
+ reqid = reqid_new(reqid_ctx, data);
+ assert(reqid == 0);
+
+ reqid_remove(reqid_ctx, 0);
+
+ reqid = reqid_new(reqid_ctx, data);
+ assert(reqid == 1);
+
+ talloc_free(reqid_ctx);
+
+ talloc_free(mem_ctx);
+
+ return 0;
+}
diff --git a/ctdb/tests/src/run_event_test.c b/ctdb/tests/src/run_event_test.c
new file mode 100644
index 0000000..9454864
--- /dev/null
+++ b/ctdb/tests/src/run_event_test.c
@@ -0,0 +1,251 @@
+/*
+ run_event test wrapper
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "common/db_hash.c"
+#include "common/run_proc.c"
+#include "common/event_script.c"
+#include "common/run_event.c"
+
+static void usage(const char *prog)
+{
+ fprintf(stderr, "Usage: %s <scriptdir> run|list|enable|disable <options>\n", prog);
+ fprintf(stderr, " %s <scriptdir> run <timeout> <event> [<args>]\n", prog);
+ fprintf(stderr, " %s <scriptdir> list\n", prog);
+ fprintf(stderr, " %s <scriptdir> enable <scriptname>\n", prog);
+ fprintf(stderr, " %s <scriptdir> disable <scriptname>\n", prog);
+}
+
+static char *compact_args(const char **argv, int argc, int from)
+{
+ char *arg_str = NULL;
+ int i;
+
+ for (i = from; i < argc; i++) {
+ arg_str = talloc_asprintf_append(arg_str, "%s ", argv[i]);
+ if (arg_str == NULL) {
+ fprintf(stderr, "talloc_asprintf_append() failed\n");
+ exit(1);
+ }
+ }
+
+ return arg_str;
+}
+
+static void run_done(struct tevent_req *req)
+{
+ struct run_event_script_list **script_list =
+ tevent_req_callback_data_void(req);
+ bool status;
+ int ret;
+
+ status = run_event_recv(req, &ret, NULL, script_list);
+ if (!status) {
+ fprintf(stderr, "run_event_recv() failed, ret=%d\n", ret);
+ }
+}
+
+static void do_run(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct run_event_context *run_ctx,
+ int argc, const char **argv)
+{
+ struct tevent_req *req;
+ struct timeval timeout;
+ struct run_event_script_list *script_list = NULL;
+ char *arg_str;
+ unsigned int i;
+ int t;
+ bool wait_for_signal = false;
+
+ if (argc < 5) {
+ usage(argv[0]);
+ exit(1);
+ }
+
+ t = atoi(argv[3]);
+ if (t > 0) {
+ timeout = tevent_timeval_current_ofs(t, 0);
+ } else {
+ timeout = tevent_timeval_zero();
+ }
+
+ arg_str = compact_args(argv, argc, 5);
+
+ req = run_event_send(mem_ctx,
+ ev,
+ run_ctx,
+ argv[4],
+ arg_str,
+ timeout,
+ false);
+ if (req == NULL) {
+ fprintf(stderr, "run_event_send() failed\n");
+ return;
+ }
+
+ tevent_req_set_callback(req, run_done, &script_list);
+
+ tevent_req_poll(req, ev);
+
+ if (script_list == NULL || script_list->num_scripts == 0) {
+ printf("No event scripts found\n");
+ return;
+ }
+
+ printf("Event %s completed with result=%d\n",
+ argv[4], script_list->summary);
+ for (i=0; i<script_list->num_scripts; i++) {
+ struct run_event_script *s = &script_list->script[i];
+ printf("%s result=%d\n", s->name, s->summary);
+
+ if (s->summary == -ETIMEDOUT) {
+ wait_for_signal = true;
+ }
+ }
+
+ TALLOC_FREE(script_list);
+ TALLOC_FREE(req);
+
+ if (!wait_for_signal) {
+ return;
+ }
+
+ req = tevent_wakeup_send(
+ ev, ev, tevent_timeval_current_ofs(10, 0));
+ if (req == NULL) {
+ fprintf(stderr, "Could not wait for signal\n");
+ return;
+ }
+
+ tevent_req_poll(req, ev);
+ TALLOC_FREE(req);
+}
+
+static void do_list(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct run_event_context *run_ctx,
+ int argc, const char **argv)
+{
+ struct run_event_script_list *script_list = NULL;
+ unsigned int i;
+ int ret;
+
+ ret = run_event_list(run_ctx, mem_ctx, &script_list);
+ if (ret != 0) {
+ printf("Script list failed with result=%d\n", ret);
+ return;
+ }
+
+ if (script_list == NULL || script_list->num_scripts == 0) {
+ printf("No event scripts found\n");
+ return;
+ }
+
+ for (i=0; i<script_list->num_scripts; i++) {
+ printf("%s\n", script_list->script[i].name);
+ }
+}
+
+static void do_enable(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct run_event_context *run_ctx,
+ int argc, const char **argv)
+{
+ int ret;
+
+ if (argc != 4) {
+ usage(argv[0]);
+ exit(1);
+ }
+
+ ret = run_event_script_enable(run_ctx, argv[3]);
+ printf("Script enable %s completed with result=%d\n", argv[3], ret);
+}
+
+static void do_disable(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct run_event_context *run_ctx,
+ int argc, const char **argv)
+{
+ int ret;
+
+ if (argc != 4) {
+ usage(argv[0]);
+ exit(1);
+ }
+
+ ret = run_event_script_disable(run_ctx, argv[3]);
+ printf("Script disable %s completed with result=%d\n", argv[3], ret);
+}
+
+int main(int argc, const char **argv)
+{
+ TALLOC_CTX *mem_ctx;
+ struct tevent_context *ev;
+ struct run_proc_context *run_proc_ctx = NULL;
+ struct run_event_context *run_ctx = NULL;
+ int ret;
+
+ if (argc < 3) {
+ usage(argv[0]);
+ exit(1);
+ }
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ fprintf(stderr, "talloc_new() failed\n");
+ exit(1);
+ }
+
+ ev = tevent_context_init(mem_ctx);
+ if (ev == NULL) {
+ fprintf(stderr, "tevent_context_init() failed\n");
+ exit(1);
+ }
+
+ ret = run_proc_init(mem_ctx, ev, &run_proc_ctx);
+ if (ret != 0) {
+ fprintf(stderr, "run_proc_init() failed, ret=%d\n", ret);
+ exit(1);
+ }
+
+ ret = run_event_init(mem_ctx, run_proc_ctx, argv[1], NULL, &run_ctx);
+ if (ret != 0) {
+ fprintf(stderr, "run_event_init() failed, ret=%d\n", ret);
+ exit(1);
+ }
+
+ if (strcmp(argv[2], "run") == 0) {
+ do_run(mem_ctx, ev, run_ctx, argc, argv);
+ } else if (strcmp(argv[2], "list") == 0) {
+ do_list(mem_ctx, ev, run_ctx, argc, argv);
+ } else if (strcmp(argv[2], "enable") == 0) {
+ do_enable(mem_ctx, ev, run_ctx, argc, argv);
+ } else if (strcmp(argv[2], "disable") == 0) {
+ do_disable(mem_ctx, ev, run_ctx, argc, argv);
+ } else {
+ fprintf(stderr, "Invalid command %s\n", argv[2]);
+ usage(argv[0]);
+ }
+
+ talloc_free(mem_ctx);
+ exit(0);
+}
+
diff --git a/ctdb/tests/src/run_proc_test.c b/ctdb/tests/src/run_proc_test.c
new file mode 100644
index 0000000..7cfb870
--- /dev/null
+++ b/ctdb/tests/src/run_proc_test.c
@@ -0,0 +1,111 @@
+/*
+ run_proc test wrapper
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "common/db_hash.c"
+#include "common/run_proc.c"
+
+int main(int argc, const char **argv)
+{
+ TALLOC_CTX *mem_ctx;
+ struct tevent_context *ev;
+ struct tevent_req *req;
+ struct run_proc_context *run_ctx;
+ struct timeval tv;
+ char *output;
+ struct run_proc_result result;
+ pid_t pid;
+ int timeout, ret, fd;
+ bool status;
+
+ if (argc < 4) {
+ fprintf(stderr,
+ "Usage: %s <timeout> <stdin-fd> <program> <args>\n",
+ argv[0]);
+ exit(1);
+ }
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ fprintf(stderr, "talloc_new() failed\n");
+ exit(1);
+ }
+
+ ev = tevent_context_init(mem_ctx);
+ if (ev == NULL) {
+ fprintf(stderr, "tevent_context_init() failed\n");
+ exit(1);
+ }
+
+ timeout = atoi(argv[1]);
+ if (timeout <= 0) {
+ tv = tevent_timeval_zero();
+ } else {
+ tv = tevent_timeval_current_ofs(timeout, 0);
+ }
+
+ fd = atoi(argv[2]);
+ if (fd < 0) {
+ fd = -1;
+ }
+
+ ret = run_proc_init(mem_ctx, ev, &run_ctx);
+ if (ret != 0) {
+ fprintf(stderr, "run_proc_init() failed, ret=%d\n", ret);
+ exit(1);
+ }
+
+ req = run_proc_send(mem_ctx, ev, run_ctx, argv[3], &argv[3], fd, tv);
+ if (req == NULL) {
+ fprintf(stderr, "run_proc_send() failed\n");
+ exit(1);
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = run_proc_recv(req, &ret, &result, &pid, mem_ctx, &output);
+ if (! status) {
+ fprintf(stderr, "run_proc_recv() failed, ret=%d\n", ret);
+ exit(1);
+ }
+
+ if (result.sig > 0) {
+ printf("Process exited with signal %d\n", result.sig);
+ } else if (result.err > 0) {
+ printf("Process exited with error %d\n", result.err);
+ } else {
+ printf("Process exited with status %d\n", result.status);
+ }
+
+ if (pid != -1) {
+ printf("Child = %d\n", pid);
+ }
+
+ if (output != NULL) {
+ printf("Output = (%s)\n", output);
+ }
+
+ talloc_free(mem_ctx);
+
+ exit(0);
+}
diff --git a/ctdb/tests/src/sigcode.c b/ctdb/tests/src/sigcode.c
new file mode 100644
index 0000000..9e5ed81
--- /dev/null
+++ b/ctdb/tests/src/sigcode.c
@@ -0,0 +1,120 @@
+/*
+ Portability layer for signal codes
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+ * These signals are as listed in POSIX standard
+ * IEEE Std 1003.1-2017 (Revision of IEEE Std 1003.1-2008)
+ */
+
+#include "replace.h"
+#include "system/wait.h"
+
+struct {
+ const char *label;
+ int code;
+} sig_codes[] = {
+ { "SIGABRT", SIGABRT },
+ { "SIGALRM", SIGALRM },
+ { "SIBGUS", SIGBUS },
+ { "SIGCHLD", SIGCHLD },
+ { "SIGCONT", SIGCONT },
+ { "SIGFPE", SIGFPE },
+ { "SIGHUP", SIGHUP },
+ { "SIGILL", SIGILL },
+ { "SIGINT", SIGINT },
+ { "SIGKILL", SIGKILL },
+ { "SIGPIPE", SIGPIPE },
+ { "SIGQUIT", SIGQUIT },
+ { "SIGSEGV", SIGSEGV },
+ { "SIGSTOP", SIGSTOP },
+ { "SIGTERM", SIGTERM },
+ { "SIGTSTP", SIGTSTP },
+ { "SIGTTIN", SIGTTIN },
+ { "SIGTTOU", SIGTTOU },
+ { "SIGUSR1", SIGUSR1 },
+ { "SIGUSR2", SIGUSR2 },
+ { "SIGTRAP", SIGTRAP },
+ { "SIGURG", SIGURG },
+ { "SIGXCPU", SIGXCPU },
+ { "SIGXFSZ", SIGXFSZ },
+
+};
+
+static void dump(void)
+{
+ size_t i;
+
+ for (i=0; i<ARRAY_SIZE(sig_codes); i++) {
+ printf("%s %d\n", sig_codes[i].label, sig_codes[i].code);
+ }
+}
+
+static void match_label(const char *str)
+{
+ int code = -1;
+ size_t i;
+
+ for (i=0; i<ARRAY_SIZE(sig_codes); i++) {
+ if (strcasecmp(sig_codes[i].label, str) == 0) {
+ code = sig_codes[i].code;
+ break;
+ }
+ }
+
+ printf("%d\n", code);
+}
+
+static void match_code(int code)
+{
+ const char *label = "UNKNOWN";
+ size_t i;
+
+ for (i=0; i<ARRAY_SIZE(sig_codes); i++) {
+ if (sig_codes[i].code == code) {
+ label = sig_codes[i].label;
+ break;
+ }
+ }
+
+ printf("%s\n", label);
+}
+
+int main(int argc, const char **argv)
+{
+ long int code;
+ char *endptr;
+
+ if (argc != 2) {
+ fprintf(stderr, "Usage: %s dump|<sigcode>\n", argv[0]);
+ exit(1);
+ }
+
+ if (strcmp(argv[1], "dump") == 0) {
+ dump();
+ } else {
+ code = strtol(argv[1], &endptr, 0);
+ if (*endptr == '\0') {
+ match_code(code);
+ } else {
+ match_label(argv[1]);
+ }
+ }
+
+ exit(0);
+}
diff --git a/ctdb/tests/src/sock_daemon_test.c b/ctdb/tests/src/sock_daemon_test.c
new file mode 100644
index 0000000..acafc9f
--- /dev/null
+++ b/ctdb/tests/src/sock_daemon_test.c
@@ -0,0 +1,1980 @@
+/*
+ sock daemon tests
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/network.h"
+#include "system/wait.h"
+
+#include <assert.h>
+
+#include "common/logging.c"
+#include "common/pkt_read.c"
+#include "common/pkt_write.c"
+#include "common/comm.c"
+#include "common/pidfile.c"
+#include "common/sock_daemon.c"
+#include "common/sock_io.c"
+
+struct dummy_wait_state {
+};
+
+static struct tevent_req *dummy_wait_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ void *private_data)
+{
+ struct tevent_req *req;
+ struct dummy_wait_state *state;
+ const char *sockpath = (const char *)private_data;
+ struct stat st;
+ int ret;
+
+ ret = stat(sockpath, &st);
+ assert(ret == 0);
+ assert(S_ISSOCK(st.st_mode));
+
+ req = tevent_req_create(mem_ctx, &state, struct dummy_wait_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+}
+
+static bool dummy_wait_recv(struct tevent_req *req, int *perr)
+{
+ return true;
+}
+
+static int test1_startup_fail(void *private_data)
+{
+ return 1;
+}
+
+static int test1_startup(void *private_data)
+{
+ const char *sockpath = (const char *)private_data;
+ struct stat st;
+ int ret;
+
+ ret = stat(sockpath, &st);
+ assert(ret == -1);
+
+ return 0;
+}
+
+struct test1_startup_state {
+};
+
+static struct tevent_req *test1_startup_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ void *private_data)
+{
+ struct tevent_req *req;
+ struct test1_startup_state *state;
+
+ req = tevent_req_create(mem_ctx, &state, struct test1_startup_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ tevent_req_error(req, 2);
+ return tevent_req_post(req, ev);
+}
+
+static bool test1_startup_recv(struct tevent_req *req, int *perr)
+{
+ if (tevent_req_is_unix_error(req, perr)) {
+ return false;
+ }
+
+ return true;
+}
+
+static struct tevent_req *dummy_read_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct sock_client_context *client,
+ uint8_t *buf, size_t buflen,
+ void *private_data)
+{
+ return NULL;
+}
+
+static bool dummy_read_recv(struct tevent_req *req, int *perr)
+{
+ if (perr != NULL) {
+ *perr = EINVAL;
+ }
+ return false;
+}
+
+static struct sock_socket_funcs dummy_socket_funcs = {
+ .read_send = dummy_read_send,
+ .read_recv = dummy_read_recv,
+};
+
+/*
+ * test1
+ *
+ * Check setup without actually running daemon
+ */
+
+static void test1(TALLOC_CTX *mem_ctx, const char *pidfile,
+ const char *sockpath)
+{
+ struct tevent_context *ev;
+ struct sock_daemon_context *sockd;
+ struct sock_daemon_funcs test1_funcs;
+ struct stat st;
+ int ret;
+
+ ev = tevent_context_init(mem_ctx);
+ assert(ev != NULL);
+
+ test1_funcs = (struct sock_daemon_funcs){
+ .startup = test1_startup_fail,
+ };
+
+ ret = sock_daemon_setup(mem_ctx, "test1", "file:", "NOTICE",
+ &test1_funcs, NULL, &sockd);
+ assert(ret == 0);
+ assert(sockd != NULL);
+
+ ret = stat(pidfile, &st);
+ assert(ret == -1);
+
+ ret = sock_daemon_run(ev, sockd, NULL, false, false, -1);
+ assert(ret == EIO);
+ talloc_free(sockd);
+
+ test1_funcs = (struct sock_daemon_funcs){
+ .startup_send = test1_startup_send,
+ .startup_recv = test1_startup_recv,
+ };
+
+ ret = sock_daemon_setup(mem_ctx, "test1", "file:", "NOTICE",
+ &test1_funcs, NULL, &sockd);
+ assert(ret == 0);
+ assert(sockd != NULL);
+
+ ret = stat(pidfile, &st);
+ assert(ret == -1);
+
+ ret = sock_daemon_run(ev, sockd, NULL, false, false, -1);
+ assert(ret == EIO);
+ talloc_free(sockd);
+
+ test1_funcs = (struct sock_daemon_funcs){
+ .startup = test1_startup,
+ .wait_send = dummy_wait_send,
+ .wait_recv = dummy_wait_recv,
+ };
+
+ ret = sock_daemon_setup(mem_ctx, "test1", "file:", "NOTICE",
+ &test1_funcs, discard_const(sockpath), &sockd);
+ assert(ret == 0);
+ assert(sockd != NULL);
+
+ ret = sock_daemon_add_unix(sockd, sockpath, &dummy_socket_funcs, NULL);
+ assert(ret == 0);
+
+ ret = stat(sockpath, &st);
+ assert(ret == -1);
+
+ ret = sock_daemon_run(ev, sockd, NULL, false, false, -1);
+ assert(ret == 0);
+
+ talloc_free(mem_ctx);
+}
+
+/*
+ * test2
+ *
+ * Start daemon, check PID file, sock daemon functions, termination,
+ * exit code
+ */
+
+static int test2_startup(void *private_data)
+{
+ int fd = *(int *)private_data;
+ int ret = 1;
+ ssize_t nwritten;
+
+ nwritten = write(fd, &ret, sizeof(ret));
+ assert(nwritten == sizeof(ret));
+ return 0;
+}
+
+static int test2_reconfigure(void *private_data)
+{
+ static bool first_time = true;
+ int fd = *(int *)private_data;
+ int ret = 2;
+ ssize_t nwritten;
+
+ nwritten = write(fd, &ret, sizeof(ret));
+ assert(nwritten == sizeof(ret));
+
+ if (first_time) {
+ first_time = false;
+ return 1;
+ }
+
+ return 0;
+}
+
+struct test2_reconfigure_state {
+ int fd;
+};
+
+static struct tevent_req *test2_reconfigure_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ void *private_data)
+{
+ struct tevent_req *req;
+ struct test2_reconfigure_state *state;
+ static bool first_time = true;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct test2_reconfigure_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->fd = *(int *)private_data;
+
+ if (first_time) {
+ first_time = false;
+ tevent_req_error(req, 2);
+ } else {
+ tevent_req_done(req);
+ }
+
+ return tevent_req_post(req, ev);
+}
+
+static bool test2_reconfigure_recv(struct tevent_req *req, int *perr)
+{
+ struct test2_reconfigure_state *state = tevent_req_data(
+ req, struct test2_reconfigure_state);
+ int ret = 2;
+ ssize_t nwritten;
+
+ nwritten = write(state->fd, &ret, sizeof(ret));
+ assert(nwritten == sizeof(ret));
+
+ if (tevent_req_is_unix_error(req, perr)) {
+ return false;
+ }
+
+ return true;
+}
+
+static int test2_reopen_logs(void *private_data)
+{
+ static bool first_time = true;
+ int fd = *(int *)private_data;
+ int ret = 4;
+ ssize_t nwritten;
+
+ nwritten = write(fd, &ret, sizeof(ret));
+ assert(nwritten == sizeof(ret));
+
+ if (first_time) {
+ first_time = false;
+ return 1;
+ }
+
+ return 0;
+}
+
+struct test2_reopen_logs_state {
+ int fd;
+};
+
+static struct tevent_req *test2_reopen_logs_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ void *private_data)
+{
+ struct tevent_req *req;
+ struct test2_reopen_logs_state *state;
+ static bool first_time = true;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct test2_reopen_logs_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->fd = *(int *)private_data;
+
+ if (first_time) {
+ first_time = false;
+ tevent_req_error(req, 2);
+ } else {
+ tevent_req_done(req);
+ }
+
+ return tevent_req_post(req, ev);
+}
+
+static bool test2_reopen_logs_recv(struct tevent_req *req, int *perr)
+{
+ struct test2_reopen_logs_state *state = tevent_req_data(
+ req, struct test2_reopen_logs_state);
+ int ret = 4;
+ ssize_t nwritten;
+
+ nwritten = write(state->fd, &ret, sizeof(ret));
+ assert(nwritten == sizeof(ret));
+
+ if (tevent_req_is_unix_error(req, perr)) {
+ return false;
+ }
+
+ return true;
+}
+
+static void test2_shutdown(void *private_data)
+{
+ int fd = *(int *)private_data;
+ int ret = 3;
+ ssize_t nwritten;
+
+ nwritten = write(fd, &ret, sizeof(ret));
+ assert(nwritten == sizeof(ret));
+}
+
+struct test2_shutdown_state {
+ int fd;
+};
+
+static struct tevent_req *test2_shutdown_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ void *private_data)
+{
+ struct tevent_req *req;
+ struct test2_shutdown_state *state;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct test2_shutdown_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->fd = *(int *)private_data;
+
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+}
+
+static void test2_shutdown_recv(struct tevent_req *req)
+{
+ struct test2_shutdown_state *state = tevent_req_data(
+ req, struct test2_shutdown_state);
+ int ret = 3;
+ ssize_t nwritten;
+
+ nwritten = write(state->fd, &ret, sizeof(ret));
+ assert(nwritten == sizeof(ret));
+}
+
+static void test2(TALLOC_CTX *mem_ctx, const char *pidfile,
+ const char *sockpath)
+{
+ struct stat st;
+ int fd[2];
+ pid_t pid, pid2;
+ int ret;
+ ssize_t n;
+ int pidfile_fd;
+ char pidstr[20] = { 0 };
+
+ ret = pipe(fd);
+ assert(ret == 0);
+
+ pid = fork();
+ assert(pid != -1);
+
+ if (pid == 0) {
+ struct tevent_context *ev;
+ struct sock_daemon_context *sockd;
+ struct sock_daemon_funcs test2_funcs = {
+ .startup = test2_startup,
+ .reconfigure = test2_reconfigure,
+ .reopen_logs = test2_reopen_logs,
+ .shutdown = test2_shutdown,
+ };
+
+ close(fd[0]);
+
+ ev = tevent_context_init(mem_ctx);
+ assert(ev != NULL);
+
+ ret = sock_daemon_setup(mem_ctx, "test2", "file:", "NOTICE",
+ &test2_funcs, &fd[1], &sockd);
+ assert(ret == 0);
+
+ ret = sock_daemon_add_unix(sockd, sockpath,
+ &dummy_socket_funcs, NULL);
+ assert(ret == 0);
+
+ ret = sock_daemon_run(ev, sockd, pidfile, false, false, -1);
+ assert(ret == EINTR);
+
+ exit(0);
+ }
+
+ close(fd[1]);
+
+ n = read(fd[0], &ret, sizeof(ret));
+ assert(n == sizeof(ret));
+ assert(ret == 1);
+
+ pidfile_fd = open(pidfile, O_RDONLY, 0644);
+ assert(pidfile_fd != -1);
+ ret = fstat(pidfile_fd, &st);
+ assert(ret == 0);
+ assert(S_ISREG(st.st_mode));
+ n = read(pidfile_fd, pidstr, sizeof(pidstr)-1);
+ assert(n != -1);
+ pid2 = (pid_t)atoi(pidstr);
+ assert(pid == pid2);
+ close(pidfile_fd);
+
+ ret = kill(pid, SIGUSR1);
+ assert(ret == 0);
+
+ n = read(fd[0], &ret, sizeof(ret));
+ assert(n == sizeof(ret));
+ assert(ret == 2);
+
+ ret = kill(pid, SIGUSR1);
+ assert(ret == 0);
+
+ n = read(fd[0], &ret, sizeof(ret));
+ assert(n == sizeof(ret));
+ assert(ret == 2);
+
+ ret = kill(pid, SIGHUP);
+ assert(ret == 0);
+
+ n = read(fd[0], &ret, sizeof(ret));
+ assert(n == sizeof(ret));
+ assert(ret == 4);
+
+ ret = kill(pid, SIGHUP);
+ assert(ret == 0);
+
+ n = read(fd[0], &ret, sizeof(ret));
+ assert(n == sizeof(ret));
+ assert(ret == 4);
+
+ ret = kill(pid, SIGTERM);
+ assert(ret == 0);
+
+ n = read(fd[0], &ret, sizeof(ret));
+ assert(n == sizeof(ret));
+ assert(ret == 3);
+
+ pid2 = waitpid(pid, &ret, 0);
+ assert(pid2 == pid);
+ assert(WEXITSTATUS(ret) == 0);
+
+ close(fd[0]);
+
+ ret = stat(pidfile, &st);
+ assert(ret == -1);
+
+ ret = stat(sockpath, &st);
+ assert(ret == -1);
+
+ ret = pipe(fd);
+ assert(ret == 0);
+
+ pid = fork();
+ assert(pid != -1);
+
+ if (pid == 0) {
+ struct tevent_context *ev;
+ struct sock_daemon_context *sockd;
+ struct sock_daemon_funcs test2_funcs = {
+ .startup = test2_startup,
+ .reconfigure_send = test2_reconfigure_send,
+ .reconfigure_recv = test2_reconfigure_recv,
+ .reopen_logs_send = test2_reopen_logs_send,
+ .reopen_logs_recv = test2_reopen_logs_recv,
+ .shutdown_send = test2_shutdown_send,
+ .shutdown_recv = test2_shutdown_recv,
+ };
+
+ close(fd[0]);
+
+ ev = tevent_context_init(mem_ctx);
+ assert(ev != NULL);
+
+ ret = sock_daemon_setup(mem_ctx, "test2", "file:", "NOTICE",
+ &test2_funcs, &fd[1], &sockd);
+ assert(ret == 0);
+
+ ret = sock_daemon_add_unix(sockd, sockpath,
+ &dummy_socket_funcs, NULL);
+ assert(ret == 0);
+
+ ret = sock_daemon_run(ev, sockd, pidfile, false, false, -1);
+ assert(ret == EINTR);
+
+ exit(0);
+ }
+
+ close(fd[1]);
+
+ n = read(fd[0], &ret, sizeof(ret));
+ assert(n == sizeof(ret));
+ assert(ret == 1);
+
+ ret = kill(pid, SIGUSR1);
+ assert(ret == 0);
+
+ n = read(fd[0], &ret, sizeof(ret));
+ assert(n == sizeof(ret));
+ assert(ret == 2);
+
+ ret = kill(pid, SIGUSR1);
+ assert(ret == 0);
+
+ n = read(fd[0], &ret, sizeof(ret));
+ assert(n == sizeof(ret));
+ assert(ret == 2);
+
+ ret = kill(pid, SIGHUP);
+ assert(ret == 0);
+
+ n = read(fd[0], &ret, sizeof(ret));
+ assert(n == sizeof(ret));
+ assert(ret == 4);
+
+ ret = kill(pid, SIGHUP);
+ assert(ret == 0);
+
+ n = read(fd[0], &ret, sizeof(ret));
+ assert(n == sizeof(ret));
+ assert(ret == 4);
+
+ ret = kill(pid, SIGTERM);
+ assert(ret == 0);
+
+ n = read(fd[0], &ret, sizeof(ret));
+ assert(n == sizeof(ret));
+ assert(ret == 3);
+
+ pid2 = waitpid(pid, &ret, 0);
+ assert(pid2 == pid);
+ assert(WEXITSTATUS(ret) == 0);
+
+ close(fd[0]);
+}
+
+/*
+ * test3
+ *
+ * Start daemon, test watching of (parent) PID
+ */
+
+static void test3(TALLOC_CTX *mem_ctx, const char *pidfile,
+ const char *sockpath)
+{
+ struct stat st;
+ pid_t pid_watch, pid, pid2;
+ int ret;
+
+ pid_watch = fork();
+ assert(pid_watch != -1);
+
+ if (pid_watch == 0) {
+ sleep(10);
+ exit(0);
+ }
+
+ pid = fork();
+ assert(pid != -1);
+
+ if (pid == 0) {
+ struct tevent_context *ev;
+ struct sock_daemon_context *sockd;
+
+ ev = tevent_context_init(mem_ctx);
+ assert(ev != NULL);
+
+ ret = sock_daemon_setup(mem_ctx, "test3", "file:", "NOTICE",
+ NULL, NULL, &sockd);
+ assert(ret == 0);
+
+ ret = sock_daemon_add_unix(sockd, sockpath,
+ &dummy_socket_funcs, NULL);
+ assert(ret == 0);
+
+ ret = sock_daemon_run(ev, sockd, NULL, false, false, pid_watch);
+ assert(ret == ESRCH);
+
+ exit(0);
+ }
+
+ pid2 = waitpid(pid_watch, &ret, 0);
+ assert(pid2 == pid_watch);
+ assert(WEXITSTATUS(ret) == 0);
+
+ pid2 = waitpid(pid, &ret, 0);
+ assert(pid2 == pid);
+ assert(WEXITSTATUS(ret) == 0);
+
+ ret = stat(pidfile, &st);
+ assert(ret == -1);
+
+ ret = stat(sockpath, &st);
+ assert(ret == -1);
+}
+
+/*
+ * test4
+ *
+ * Start daemon, test termination via wait_send function
+ */
+
+struct test4_wait_state {
+};
+
+static void test4_wait_done(struct tevent_req *subreq);
+
+static struct tevent_req *test4_wait_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ void *private_data)
+{
+ struct tevent_req *req, *subreq;
+ struct test4_wait_state *state;
+
+ req = tevent_req_create(mem_ctx, &state, struct test4_wait_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ subreq = tevent_wakeup_send(state, ev,
+ tevent_timeval_current_ofs(10,0));
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, test4_wait_done, req);
+
+ return req;
+}
+
+static void test4_wait_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ bool status;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+
+ if (! status) {
+ tevent_req_error(req, EIO);
+ } else {
+ tevent_req_done(req);
+ }
+}
+
+static bool test4_wait_recv(struct tevent_req *req, int *perr)
+{
+ int ret;
+
+ if (tevent_req_is_unix_error(req, &ret)) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+static struct sock_daemon_funcs test4_funcs = {
+ .wait_send = test4_wait_send,
+ .wait_recv = test4_wait_recv,
+};
+
+static void test4(TALLOC_CTX *mem_ctx, const char *pidfile,
+ const char *sockpath)
+{
+ struct stat st;
+ pid_t pid, pid2;
+ int ret;
+
+ pid = fork();
+ assert(pid != -1);
+
+ if (pid == 0) {
+ struct tevent_context *ev;
+ struct sock_daemon_context *sockd;
+
+ ev = tevent_context_init(mem_ctx);
+ assert(ev != NULL);
+
+ ret = sock_daemon_setup(mem_ctx, "test4", "file:", "NOTICE",
+ &test4_funcs, NULL, &sockd);
+ assert(ret == 0);
+
+ ret = sock_daemon_run(ev, sockd, pidfile, false, false, -1);
+ assert(ret == 0);
+
+ exit(0);
+ }
+
+ pid2 = waitpid(pid, &ret, 0);
+ assert(pid2 == pid);
+ assert(WEXITSTATUS(ret) == 0);
+
+ ret = stat(pidfile, &st);
+ assert(ret == -1);
+
+ ret = stat(sockpath, &st);
+ assert(ret == -1);
+}
+
+/*
+ * test5
+ *
+ * Start daemon, multiple client connects, requests, disconnects
+ */
+
+#define TEST5_VALID_CLIENTS 10
+#define TEST5_MAX_CLIENTS 100
+
+struct test5_pkt {
+ uint32_t len;
+ int data;
+};
+
+struct test5_client_state {
+ int id;
+ int fd;
+ bool done;
+};
+
+static void test5_client_callback(uint8_t *buf, size_t buflen,
+ void *private_data)
+{
+ struct test5_client_state *state =
+ (struct test5_client_state *)private_data;
+ struct test5_pkt *pkt;
+ ssize_t n;
+ int ret;
+
+ if (buf == NULL) {
+ assert(buflen == 0);
+
+ ret = 0;
+ } else {
+ assert(buflen == sizeof(struct test5_pkt));
+ pkt = (struct test5_pkt *)buf;
+ assert(pkt->len == sizeof(struct test5_pkt));
+
+ ret = pkt->data;
+ }
+
+ assert(state->fd != -1);
+
+ n = write(state->fd, (void *)&ret, sizeof(int));
+ assert(n == sizeof(int));
+
+ state->done = true;
+}
+
+static int test5_client(const char *sockpath, int id, pid_t pid_server,
+ pid_t *client_pid)
+{
+ pid_t pid;
+ int fd[2];
+ int ret;
+ ssize_t n;
+
+ ret = pipe(fd);
+ assert(ret == 0);
+
+ pid = fork();
+ assert(pid != -1);
+
+ if (pid == 0) {
+ struct tevent_context *ev;
+ struct test5_client_state state;
+ struct sock_queue *queue;
+ struct test5_pkt pkt;
+ int conn;
+
+ close(fd[0]);
+
+ ev = tevent_context_init(NULL);
+ assert(ev != NULL);
+
+ conn = sock_connect(sockpath);
+ assert(conn != -1);
+
+ state.id = id;
+ state.fd = fd[1];
+ state.done = false;
+
+ queue = sock_queue_setup(ev, ev, conn,
+ test5_client_callback, &state);
+ assert(queue != NULL);
+
+ pkt.len = 8;
+ pkt.data = 0xbaba;
+
+ ret = sock_queue_write(queue, (uint8_t *)&pkt,
+ sizeof(struct test5_pkt));
+ assert(ret == 0);
+
+ while (! state.done) {
+ tevent_loop_once(ev);
+ }
+
+ close(fd[1]);
+ state.fd = -1;
+
+ while (kill(pid_server, 0) == 0 || errno != ESRCH) {
+ sleep(1);
+ }
+ exit(0);
+ }
+
+ close(fd[1]);
+
+ ret = 0;
+ n = read(fd[0], &ret, sizeof(ret));
+ if (n == 0) {
+ fprintf(stderr, "client id %d read 0 bytes\n", id);
+ }
+ assert(n == 0 || n == sizeof(ret));
+
+ close(fd[0]);
+
+ *client_pid = pid;
+ return ret;
+}
+
+struct test5_server_state {
+ int num_clients;
+};
+
+static bool test5_connect(struct sock_client_context *client,
+ pid_t pid,
+ void *private_data)
+{
+ struct test5_server_state *state =
+ (struct test5_server_state *)private_data;
+
+ if (state->num_clients == TEST5_VALID_CLIENTS) {
+ return false;
+ }
+
+ state->num_clients += 1;
+ assert(state->num_clients <= TEST5_VALID_CLIENTS);
+ return true;
+}
+
+static void test5_disconnect(struct sock_client_context *client,
+ void *private_data)
+{
+ struct test5_server_state *state =
+ (struct test5_server_state *)private_data;
+
+ state->num_clients -= 1;
+ assert(state->num_clients >= 0);
+}
+
+struct test5_read_state {
+ struct test5_pkt reply;
+};
+
+static void test5_read_done(struct tevent_req *subreq);
+
+static struct tevent_req *test5_read_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct sock_client_context *client,
+ uint8_t *buf, size_t buflen,
+ void *private_data)
+{
+ struct test5_server_state *server_state =
+ (struct test5_server_state *)private_data;
+ struct tevent_req *req, *subreq;
+ struct test5_read_state *state;
+ struct test5_pkt *pkt;
+
+ req = tevent_req_create(mem_ctx, &state, struct test5_read_state);
+ assert(req != NULL);
+
+ assert(buflen == sizeof(struct test5_pkt));
+
+ pkt = (struct test5_pkt *)buf;
+ assert(pkt->data == 0xbaba);
+
+ state->reply.len = sizeof(struct test5_pkt);
+ state->reply.data = server_state->num_clients;
+
+ subreq = sock_socket_write_send(state, ev, client,
+ (uint8_t *)&state->reply,
+ state->reply.len);
+ assert(subreq != NULL);
+
+ tevent_req_set_callback(subreq, test5_read_done, req);
+
+ return req;
+}
+
+static void test5_read_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ int ret;
+ bool status;
+
+ status = sock_socket_write_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+static bool test5_read_recv(struct tevent_req *req, int *perr)
+{
+ int ret;
+
+ if (tevent_req_is_unix_error(req, &ret)) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+static struct sock_socket_funcs test5_client_funcs = {
+ .connect = test5_connect,
+ .disconnect = test5_disconnect,
+ .read_send = test5_read_send,
+ .read_recv = test5_read_recv,
+};
+
+struct test5_wait_state {
+};
+
+static struct tevent_req *test5_wait_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ void *private_data)
+{
+ struct tevent_req *req;
+ struct test5_wait_state *state;
+ int fd = *(int *)private_data;
+ int ret = 1;
+ ssize_t nwritten;
+
+ nwritten = write(fd, &ret, sizeof(ret));
+ assert(nwritten == sizeof(ret));
+ close(fd);
+
+ req = tevent_req_create(mem_ctx, &state, struct test5_wait_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ return req;
+}
+
+static bool test5_wait_recv(struct tevent_req *req, int *perr)
+{
+ return true;
+}
+
+static struct sock_daemon_funcs test5_funcs = {
+ .wait_send = test5_wait_send,
+ .wait_recv = test5_wait_recv,
+};
+
+static void test5(TALLOC_CTX *mem_ctx, const char *pidfile,
+ const char *sockpath)
+{
+ pid_t pid_server, pid;
+ int fd[2], ret, i;
+ ssize_t n;
+ pid_t client_pid[TEST5_MAX_CLIENTS];
+
+ pid = getpid();
+
+ ret = pipe(fd);
+ assert(ret == 0);
+
+ pid_server = fork();
+ assert(pid_server != -1);
+
+ if (pid_server == 0) {
+ struct tevent_context *ev;
+ struct sock_daemon_context *sockd;
+ struct test5_server_state state;
+
+ close(fd[0]);
+
+ ev = tevent_context_init(mem_ctx);
+ assert(ev != NULL);
+
+ ret = sock_daemon_setup(mem_ctx, "test5", "file:", "NOTICE",
+ &test5_funcs, &fd[1], &sockd);
+ assert(ret == 0);
+
+ state.num_clients = 0;
+
+ ret = sock_daemon_add_unix(sockd, sockpath,
+ &test5_client_funcs, &state);
+ assert(ret == 0);
+
+ ret = sock_daemon_run(ev, sockd, pidfile, false, false, pid);
+ assert(ret == EINTR);
+
+ exit(0);
+ }
+
+ close(fd[1]);
+
+ n = read(fd[0], &ret, sizeof(ret));
+ assert(n == sizeof(ret));
+ assert(ret == 1);
+
+ close(fd[0]);
+
+ for (i=0; i<TEST5_MAX_CLIENTS; i++) {
+ ret = test5_client(sockpath, i, pid_server, &client_pid[i]);
+ if (i < TEST5_VALID_CLIENTS) {
+ assert(ret == i+1);
+ } else {
+ assert(ret == 0);
+ }
+ }
+
+ for (i=TEST5_MAX_CLIENTS-1; i>=0; i--) {
+ kill(client_pid[i], SIGKILL);
+
+ pid = wait(&ret);
+ assert(pid != -1);
+ }
+
+ ret = kill(pid_server, SIGTERM);
+ assert(ret == 0);
+
+ pid = waitpid(pid_server, &ret, 0);
+ assert(pid == pid_server);
+ assert(WEXITSTATUS(ret) == 0);
+}
+
+/*
+ * test6
+ *
+ * Start daemon, test client connects, requests, replies, disconnects
+ */
+
+struct test6_pkt {
+ uint32_t len;
+ uint32_t data;
+};
+
+struct test6_client_state {
+ bool done;
+};
+
+static void test6_client_callback(uint8_t *buf, size_t buflen,
+ void *private_data)
+{
+ struct test6_client_state *state =
+ (struct test6_client_state *)private_data;
+ struct test6_pkt *pkt;
+
+ assert(buflen == sizeof(struct test6_pkt));
+ pkt = (struct test6_pkt *)buf;
+ assert(pkt->len == sizeof(struct test6_pkt));
+ assert(pkt->data == 0xffeeddcc);
+
+ state->done = true;
+}
+
+static void test6_client(const char *sockpath)
+{
+ struct tevent_context *ev;
+ struct test6_client_state state;
+ struct sock_queue *queue;
+ struct test6_pkt pkt;
+ int conn, ret;
+
+ ev = tevent_context_init(NULL);
+ assert(ev != NULL);
+
+ conn = sock_connect(sockpath);
+ assert(conn != -1);
+
+ state.done = false;
+
+ queue = sock_queue_setup(ev, ev, conn,
+ test6_client_callback, &state);
+ assert(queue != NULL);
+
+ pkt.len = 8;
+ pkt.data = 0xaabbccdd;
+
+ ret = sock_queue_write(queue, (uint8_t *)&pkt,
+ sizeof(struct test6_pkt));
+ assert(ret == 0);
+
+ while (! state.done) {
+ tevent_loop_once(ev);
+ }
+
+ talloc_free(ev);
+}
+
+struct test6_server_state {
+ struct sock_daemon_context *sockd;
+ int fd, done;
+};
+
+struct test6_read_state {
+ struct test6_server_state *server_state;
+ struct test6_pkt reply;
+};
+
+static void test6_read_done(struct tevent_req *subreq);
+
+static struct tevent_req *test6_read_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct sock_client_context *client,
+ uint8_t *buf, size_t buflen,
+ void *private_data)
+{
+ struct test6_server_state *server_state =
+ (struct test6_server_state *)private_data;
+ struct tevent_req *req, *subreq;
+ struct test6_read_state *state;
+ struct test6_pkt *pkt;
+
+ req = tevent_req_create(mem_ctx, &state, struct test6_read_state);
+ assert(req != NULL);
+
+ state->server_state = server_state;
+
+ assert(buflen == sizeof(struct test6_pkt));
+
+ pkt = (struct test6_pkt *)buf;
+ assert(pkt->data == 0xaabbccdd);
+
+ state->reply.len = sizeof(struct test6_pkt);
+ state->reply.data = 0xffeeddcc;
+
+ subreq = sock_socket_write_send(state, ev, client,
+ (uint8_t *)&state->reply,
+ state->reply.len);
+ assert(subreq != NULL);
+
+ tevent_req_set_callback(subreq, test6_read_done, req);
+
+ return req;
+}
+
+static void test6_read_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct test6_read_state *state = tevent_req_data(
+ req, struct test6_read_state);
+ int ret;
+ bool status;
+
+ status = sock_socket_write_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ state->server_state->done = 1;
+ tevent_req_done(req);
+}
+
+static bool test6_read_recv(struct tevent_req *req, int *perr)
+{
+ int ret;
+
+ if (tevent_req_is_unix_error(req, &ret)) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+static struct sock_socket_funcs test6_client_funcs = {
+ .read_send = test6_read_send,
+ .read_recv = test6_read_recv,
+};
+
+struct test6_wait_state {
+ struct test6_server_state *server_state;
+};
+
+static void test6_wait_done(struct tevent_req *subreq);
+
+static struct tevent_req *test6_wait_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ void *private_data)
+{
+ struct test6_server_state *server_state =
+ (struct test6_server_state *)private_data;
+ struct tevent_req *req, *subreq;
+ struct test6_wait_state *state;
+ ssize_t nwritten;
+ int ret = 1;
+
+ nwritten = write(server_state->fd, &ret, sizeof(ret));
+ assert(nwritten == sizeof(ret));
+ close(server_state->fd);
+ server_state->fd = -1;
+
+ req = tevent_req_create(mem_ctx, &state, struct test6_wait_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->server_state = (struct test6_server_state *)private_data;
+
+ subreq = tevent_wakeup_send(state, ev,
+ tevent_timeval_current_ofs(10,0));
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, test6_wait_done, req);
+
+ return req;
+}
+
+static void test6_wait_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct test6_wait_state *state = tevent_req_data(
+ req, struct test6_wait_state);
+ bool status;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ if (state->server_state->done == 0) {
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+static bool test6_wait_recv(struct tevent_req *req, int *perr)
+{
+ int ret;
+
+ if (tevent_req_is_unix_error(req, &ret)) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+static struct sock_daemon_funcs test6_funcs = {
+ .wait_send = test6_wait_send,
+ .wait_recv = test6_wait_recv,
+};
+
+static void test6(TALLOC_CTX *mem_ctx, const char *pidfile,
+ const char *sockpath)
+{
+ pid_t pid_server, pid;
+ int fd[2], ret;
+ ssize_t n;
+
+ pid = getpid();
+
+ ret = pipe(fd);
+ assert(ret == 0);
+
+ pid_server = fork();
+ assert(pid_server != -1);
+
+ if (pid_server == 0) {
+ struct tevent_context *ev;
+ struct sock_daemon_context *sockd;
+ struct test6_server_state server_state = { 0 };
+
+ close(fd[0]);
+
+ ev = tevent_context_init(mem_ctx);
+ assert(ev != NULL);
+
+ server_state.fd = fd[1];
+
+ ret = sock_daemon_setup(mem_ctx, "test6", "file:", "NOTICE",
+ &test6_funcs, &server_state,
+ &sockd);
+ assert(ret == 0);
+
+ server_state.sockd = sockd;
+ server_state.done = 0;
+
+ ret = sock_daemon_add_unix(sockd, sockpath,
+ &test6_client_funcs, &server_state);
+ assert(ret == 0);
+
+ ret = sock_daemon_run(ev, sockd, pidfile, false, false, pid);
+ assert(ret == 0);
+
+ exit(0);
+ }
+
+ close(fd[1]);
+
+ n = read(fd[0], &ret, sizeof(ret));
+ assert(n == sizeof(ret));
+ assert(ret == 1);
+
+ close(fd[0]);
+
+ test6_client(sockpath);
+
+ pid = waitpid(pid_server, &ret, 0);
+ assert(pid == pid_server);
+ assert(WEXITSTATUS(ret) == 0);
+}
+
+/*
+ * test7
+ *
+ * Start daemon twice, confirm PID file contention
+ */
+
+static void test7(TALLOC_CTX *mem_ctx, const char *pidfile,
+ const char *sockpath)
+{
+ struct sock_daemon_funcs test7_funcs;
+ struct stat st;
+ int fd[2];
+ pid_t pid, pid2;
+ int ret;
+ struct tevent_context *ev;
+ struct sock_daemon_context *sockd;
+ ssize_t n;
+
+ /* Reuse test2 funcs for the startup synchronisation */
+ test7_funcs = (struct sock_daemon_funcs) {
+ .startup = test2_startup,
+ .reconfigure = test2_reconfigure,
+ .shutdown = test2_shutdown,
+ };
+
+ ret = pipe(fd);
+ assert(ret == 0);
+
+ pid = fork();
+ assert(pid != -1);
+
+ if (pid == 0) {
+ close(fd[0]);
+
+ ev = tevent_context_init(mem_ctx);
+ assert(ev != NULL);
+
+ ret = sock_daemon_setup(mem_ctx, "test7", "file:", "NOTICE",
+ &test7_funcs, &fd[1], &sockd);
+ assert(ret == 0);
+
+ ret = sock_daemon_run(ev, sockd, pidfile, false, false, -1);
+ assert(ret == EINTR);
+
+ exit(0);
+ }
+
+ close(fd[1]);
+
+ n = read(fd[0], &ret, sizeof(ret));
+ assert(n == sizeof(ret));
+ assert(ret == 1);
+
+ ret = stat(pidfile, &st);
+ assert(ret == 0);
+ assert(S_ISREG(st.st_mode));
+
+ ev = tevent_context_init(mem_ctx);
+ assert(ev != NULL);
+
+ ret = sock_daemon_setup(mem_ctx, "test7-parent", "file:", "NOTICE",
+ &test7_funcs, &fd[1], &sockd);
+ assert(ret == 0);
+
+ ret = sock_daemon_run(ev, sockd, pidfile, false, false, -1);
+ assert(ret == EEXIST);
+
+ ret = kill(pid, SIGTERM);
+ assert(ret == 0);
+
+ n = read(fd[0], &ret, sizeof(ret));
+ assert(n == sizeof(ret));
+ assert(ret == 3);
+
+ pid2 = waitpid(pid, &ret, 0);
+ assert(pid2 == pid);
+ assert(WEXITSTATUS(ret) == 0);
+
+ close(fd[0]);
+}
+
+/*
+ * test8
+ *
+ * Start daemon, confirm that create_session argument works as expected
+ */
+
+static void test8(TALLOC_CTX *mem_ctx, const char *pidfile,
+ const char *sockpath)
+{
+ int fd[2];
+ pid_t pid, pid2, sid;
+ int ret;
+ struct tevent_context *ev;
+ struct sock_daemon_context *sockd;
+ ssize_t n;
+
+ ret = pipe(fd);
+ assert(ret == 0);
+
+ pid = fork();
+ assert(pid != -1);
+
+ if (pid == 0) {
+ /* Reuse test2 funcs for the startup synchronisation */
+ struct sock_daemon_funcs test8_funcs = {
+ .startup = test2_startup,
+ };
+
+ close(fd[0]);
+
+ ev = tevent_context_init(mem_ctx);
+ assert(ev != NULL);
+
+ ret = sock_daemon_setup(mem_ctx, "test8", "file:", "NOTICE",
+ &test8_funcs, &fd[1], &sockd);
+ assert(ret == 0);
+
+ ret = sock_daemon_run(ev, sockd, pidfile, false, false, -1);
+ assert(ret == EINTR);
+
+ exit(0);
+ }
+
+ close(fd[1]);
+
+ n = read(fd[0], &ret, sizeof(ret));
+ assert(n == sizeof(ret));
+ assert(ret == 1);
+
+ /* create_session false above, so pid != sid */
+ sid = getsid(pid);
+ assert(pid != sid);
+
+ ret = kill(pid, SIGTERM);
+ assert(ret == 0);
+
+ pid2 = waitpid(pid, &ret, 0);
+ assert(pid2 == pid);
+ assert(WEXITSTATUS(ret) == 0);
+
+ close(fd[0]);
+
+ ret = pipe(fd);
+ assert(ret == 0);
+
+ pid = fork();
+ assert(pid != -1);
+
+ if (pid == 0) {
+ /* Reuse test2 funcs for the startup synchronisation */
+ struct sock_daemon_funcs test8_funcs = {
+ .startup = test2_startup,
+ };
+
+ close(fd[0]);
+
+ ev = tevent_context_init(mem_ctx);
+ assert(ev != NULL);
+
+ ret = sock_daemon_setup(mem_ctx, "test8", "file:", "NOTICE",
+ &test8_funcs, &fd[1], &sockd);
+ assert(ret == 0);
+
+ ret = sock_daemon_run(ev, sockd, pidfile, false, true, -1);
+ assert(ret == EINTR);
+
+ exit(0);
+ }
+
+ close(fd[1]);
+
+ n = read(fd[0], &ret, sizeof(ret));
+ assert(n == sizeof(ret));
+ assert(ret == 1);
+
+ /* create_session true above, so pid == sid */
+ sid = getsid(pid);
+ assert(pid == sid);
+
+ ret = kill(pid, SIGTERM);
+ assert(ret == 0);
+
+ pid2 = waitpid(pid, &ret, 0);
+ assert(pid2 == pid);
+ assert(WEXITSTATUS(ret) == 0);
+
+ close(fd[0]);
+}
+
+/*
+ * test9
+ *
+ * Confirm that do_fork causes the daemon to be forked as a separate child
+ */
+
+static void test9(TALLOC_CTX *mem_ctx, const char *pidfile,
+ const char *sockpath)
+{
+ int fd[2];
+ pid_t pid, pid2;
+ int ret;
+ struct tevent_context *ev;
+ struct sock_daemon_context *sockd;
+ ssize_t n;
+ int pidfile_fd;
+ char pidstr[20] = { 0 };
+ struct stat st;
+
+ ret = pipe(fd);
+ assert(ret == 0);
+
+ pid = fork();
+ assert(pid != -1);
+
+ if (pid == 0) {
+ /* Reuse test2 funcs for the startup synchronisation */
+ struct sock_daemon_funcs test9_funcs = {
+ .startup = test2_startup,
+ };
+
+ close(fd[0]);
+
+ ev = tevent_context_init(mem_ctx);
+ assert(ev != NULL);
+
+ ret = sock_daemon_setup(mem_ctx, "test9", "file:", "NOTICE",
+ &test9_funcs, &fd[1], &sockd);
+ assert(ret == 0);
+
+ ret = sock_daemon_run(ev, sockd, pidfile, false, false, -1);
+ assert(ret == EINTR);
+
+ exit(0);
+ }
+
+ close(fd[1]);
+
+ n = read(fd[0], &ret, sizeof(ret));
+ assert(n == sizeof(ret));
+ assert(ret == 1);
+
+ /* do_fork false above, so pid should be active */
+ ret = kill(pid, 0);
+ assert(ret == 0);
+
+ ret = kill(pid, SIGTERM);
+ assert(ret == 0);
+
+ pid2 = waitpid(pid, &ret, 0);
+ assert(pid2 == pid);
+ assert(WEXITSTATUS(ret) == 0);
+
+ close(fd[0]);
+
+ ret = pipe(fd);
+ assert(ret == 0);
+
+ pid = fork();
+ assert(pid != -1);
+
+ if (pid == 0) {
+ /* Reuse test2 funcs for the startup synchronisation */
+ struct sock_daemon_funcs test9_funcs = {
+ .startup = test2_startup,
+ .shutdown = test2_shutdown,
+ };
+
+ close(fd[0]);
+
+ ev = tevent_context_init(mem_ctx);
+ assert(ev != NULL);
+
+ ret = sock_daemon_setup(mem_ctx, "test9", "file:", "NOTICE",
+ &test9_funcs, &fd[1], &sockd);
+ assert(ret == 0);
+
+ ret = sock_daemon_run(ev, sockd, pidfile, true, false, -1);
+ assert(ret == EINTR);
+
+ exit(0);
+ }
+
+ close(fd[1]);
+
+ n = read(fd[0], &ret, sizeof(ret));
+ assert(n == sizeof(ret));
+ assert(ret == 1);
+
+ /* do_fork true above, so pid should have exited */
+ pid2 = waitpid(pid, &ret, 0);
+ assert(pid2 == pid);
+ assert(WEXITSTATUS(ret) == 0);
+
+ pidfile_fd = open(pidfile, O_RDONLY, 0644);
+ assert(pidfile_fd != -1);
+ n = read(pidfile_fd, pidstr, sizeof(pidstr)-1);
+ assert(n != -1);
+ pid2 = (pid_t)atoi(pidstr);
+ assert(pid != pid2);
+ close(pidfile_fd);
+
+ ret = kill(pid2, SIGTERM);
+ assert(ret == 0);
+
+ n = read(fd[0], &ret, sizeof(ret));
+ assert(n == sizeof(ret));
+ assert(ret == 3);
+
+ /*
+ * pid2 isn't our child, so can't call waitpid(). kill(pid2, 0)
+ * is unreliable - pid2 may have been recycled. Above indicates
+ * that the shutdown function was called, so just do 1 final
+ * check to see if pidfile has been removed.
+ */
+ ret = stat(sockpath, &st);
+ assert(ret == -1);
+
+ close(fd[0]);
+}
+
+static void test10_shutdown(void *private_data)
+{
+ int fd = *(int *)private_data;
+ int ret = 3;
+ ssize_t nwritten;
+
+ nwritten = write(fd, &ret, sizeof(ret));
+ assert(nwritten == sizeof(ret));
+}
+
+struct test10_wait_state {
+};
+
+static void test10_wait_done(struct tevent_req *subreq);
+
+static struct tevent_req *test10_wait_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ void *private_data)
+{
+ int fd = *(int *)private_data;
+ struct tevent_req *req, *subreq;
+ struct test10_wait_state *state;
+ size_t nwritten;
+ int ret = 1;
+
+ req = tevent_req_create(mem_ctx, &state, struct test10_wait_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ subreq = tevent_wakeup_send(state, ev,
+ tevent_timeval_current_ofs(10, 0));
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, test10_wait_done, req);
+
+ nwritten = write(fd, &ret, sizeof(ret));
+ assert(nwritten == sizeof(ret));
+
+ return req;
+}
+
+static void test10_wait_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ bool status;
+
+ status = tevent_wakeup_recv(subreq);
+ if (! status) {
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+static bool test10_wait_recv(struct tevent_req *req, int *perr)
+{
+ int ret;
+
+ if (tevent_req_is_unix_error(req, &ret)) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+static struct sock_daemon_funcs test10_funcs = {
+ .shutdown = test10_shutdown,
+ .wait_send = test10_wait_send,
+ .wait_recv = test10_wait_recv,
+};
+
+/*
+ * test10
+ *
+ * Confirm that the daemon starts successfully if there is a stale socket
+ */
+
+static void test10(TALLOC_CTX *mem_ctx, const char *pidfile,
+ const char *sockpath)
+{
+ struct stat st;
+ int fd[2];
+ pid_t pid, pid2;
+ int ret;
+ ssize_t n;
+ int pidfile_fd;
+ char pidstr[20] = { 0 };
+
+ ret = pipe(fd);
+ assert(ret == 0);
+
+ pid = fork();
+ assert(pid != -1);
+
+ if (pid == 0) {
+ struct tevent_context *ev;
+ struct sock_daemon_context *sockd;
+
+ close(fd[0]);
+
+ ev = tevent_context_init(mem_ctx);
+ assert(ev != NULL);
+
+ ret = sock_daemon_setup(mem_ctx, "test10", "file:", "NOTICE",
+ &test10_funcs, &fd[1], &sockd);
+ assert(ret == 0);
+
+ ret = sock_daemon_add_unix(sockd, sockpath,
+ &dummy_socket_funcs, NULL);
+ assert(ret == 0);
+
+ ret = sock_daemon_run(ev, sockd, pidfile, false, false, -1);
+ assert(ret == EINTR);
+
+ exit(0);
+ }
+
+ close(fd[1]);
+
+ n = read(fd[0], &ret, sizeof(ret));
+ assert(n == sizeof(ret));
+ assert(ret == 1);
+
+ /* KILL will leave PID file and socket behind */
+ ret = kill (pid, SIGKILL);
+ assert(ret == 0);
+
+ pid2 = waitpid(pid, &ret, 0);
+ assert(pid2 == pid);
+ assert(WEXITSTATUS(ret) == 0);
+
+ ret = stat(sockpath, &st);
+ assert(ret == 0);
+
+ close(fd[0]);
+
+ ret = pipe(fd);
+ assert(ret == 0);
+
+ pid = fork();
+ assert(pid != -1);
+
+ if (pid == 0) {
+ struct tevent_context *ev;
+ struct sock_daemon_context *sockd;
+
+ close(fd[0]);
+
+ ev = tevent_context_init(mem_ctx);
+ assert(ev != NULL);
+
+ ret = sock_daemon_setup(mem_ctx, "test10", "file:", "NOTICE",
+ &test10_funcs, &fd[1], &sockd);
+ assert(ret == 0);
+
+ ret = sock_daemon_add_unix(sockd, sockpath,
+ &dummy_socket_funcs, NULL);
+ assert(ret == 0);
+
+ ret = sock_daemon_run(ev, sockd, pidfile, false, false, -1);
+ assert(ret == EINTR);
+
+ exit(0);
+ }
+
+ close(fd[1]);
+
+ n = read(fd[0], &ret, sizeof(ret));
+ assert(n == sizeof(ret));
+ assert(ret == 1);
+
+ pidfile_fd = open(pidfile, O_RDONLY, 0644);
+ assert(pidfile_fd != -1);
+ n = read(pidfile_fd, pidstr, sizeof(pidstr)-1);
+ assert(n != -1);
+ pid2 = (pid_t)atoi(pidstr);
+ assert(pid == pid2);
+ close(pidfile_fd);
+
+ ret = kill(pid, SIGTERM);
+ assert(ret == 0);
+
+ n = read(fd[0], &ret, sizeof(ret));
+ assert(n == sizeof(ret));
+ assert(ret == 3);
+
+ pid2 = waitpid(pid, &ret, 0);
+ assert(pid2 == pid);
+ assert(WEXITSTATUS(ret) == 0);
+
+ close(fd[0]);
+
+ ret = stat(pidfile, &st);
+ assert(ret == -1);
+
+ ret = stat(sockpath, &st);
+ assert(ret == -1);
+}
+
+int main(int argc, const char **argv)
+{
+ TALLOC_CTX *mem_ctx;
+ const char *pidfile, *sockpath;
+ int num;
+
+ if (argc != 4) {
+ fprintf(stderr, "%s <pidfile> <sockpath> <testnum>\n", argv[0]);
+ exit(1);
+ }
+
+ pidfile = argv[1];
+ sockpath = argv[2];
+ num = atoi(argv[3]);
+
+ mem_ctx = talloc_new(NULL);
+ assert(mem_ctx != NULL);
+
+ switch (num) {
+ case 1:
+ test1(mem_ctx, pidfile, sockpath);
+ break;
+
+ case 2:
+ test2(mem_ctx, pidfile, sockpath);
+ break;
+
+ case 3:
+ test3(mem_ctx, pidfile, sockpath);
+ break;
+
+ case 4:
+ test4(mem_ctx, pidfile, sockpath);
+ break;
+
+ case 5:
+ test5(mem_ctx, pidfile, sockpath);
+ break;
+
+ case 6:
+ test6(mem_ctx, pidfile, sockpath);
+ break;
+
+ case 7:
+ test7(mem_ctx, pidfile, sockpath);
+ break;
+
+ case 8:
+ test8(mem_ctx, pidfile, sockpath);
+ break;
+
+ case 9:
+ test9(mem_ctx, pidfile, sockpath);
+ break;
+
+ case 10:
+ test10(mem_ctx, pidfile, sockpath);
+ break;
+
+ default:
+ fprintf(stderr, "Unknown test number %d\n", num);
+ }
+
+ return 0;
+}
diff --git a/ctdb/tests/src/sock_io_test.c b/ctdb/tests/src/sock_io_test.c
new file mode 100644
index 0000000..ba4b637
--- /dev/null
+++ b/ctdb/tests/src/sock_io_test.c
@@ -0,0 +1,283 @@
+/*
+ sock I/O tests
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/network.h"
+#include "system/wait.h"
+
+#include <assert.h>
+
+#include "common/sock_io.c"
+
+static int socket_init(const char *sockpath)
+{
+ struct sockaddr_un addr;
+ int fd, ret;
+ size_t len;
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+
+ len = strlcpy(addr.sun_path, sockpath, sizeof(addr.sun_path));
+ assert(len < sizeof(addr.sun_path));
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ assert(fd != -1);
+
+ ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
+ assert(ret != -1);
+
+ ret = listen(fd, 10);
+ assert(ret != -1);
+
+ return fd;
+}
+
+static void test1_writer(int fd)
+{
+ uint8_t buf[1024];
+ ssize_t nwritten;
+ uint32_t len;
+
+ for (len = 10; len < 1000; len += 10) {
+ int value = len / 10;
+ uint32_t buflen = len + sizeof(uint32_t);
+
+ memset(buf, value, buflen);
+ memcpy(buf, &buflen, sizeof(uint32_t));
+
+ nwritten = sys_write(fd, buf, buflen);
+ assert(nwritten == buflen);
+ }
+}
+
+struct test1_reader_state {
+ size_t pkt_len;
+ bool done;
+};
+
+static void test1_reader(uint8_t *buf, size_t buflen, void *private_data)
+{
+ struct test1_reader_state *state =
+ (struct test1_reader_state *)private_data;
+
+ if (buflen == 0) {
+ state->done = true;
+ return;
+ }
+
+ assert(buflen == state->pkt_len);
+
+ state->pkt_len += 10;
+}
+
+static void test1(TALLOC_CTX *mem_ctx, const char *sockpath)
+{
+ struct test1_reader_state state;
+ struct tevent_context *ev;
+ struct sock_queue *queue;
+ pid_t pid;
+ int pfd[2], fd, ret;
+ ssize_t n;
+
+ ret = pipe(pfd);
+ assert(ret == 0);
+
+ pid = fork();
+ assert(pid != -1);
+
+ if (pid == 0) {
+ int newfd;
+
+ close(pfd[0]);
+
+ fd = socket_init(sockpath);
+ assert(fd != -1);
+
+ ret = 1;
+ n = sys_write(pfd[1], &ret, sizeof(int));
+ assert(n == sizeof(int));
+
+ newfd = accept(fd, NULL, NULL);
+ assert(newfd != -1);
+
+ test1_writer(newfd);
+ close(newfd);
+ unlink(sockpath);
+
+ exit(0);
+ }
+
+ close(pfd[1]);
+
+ n = sys_read(pfd[0], &ret, sizeof(int));
+ assert(n == sizeof(int));
+ assert(ret == 1);
+
+ close(pfd[0]);
+
+ fd = sock_connect(sockpath);
+ assert(fd != -1);
+
+ ev = tevent_context_init(mem_ctx);
+ assert(ev != NULL);
+
+ state.pkt_len = 10 + sizeof(uint32_t);
+ state.done = false;
+
+ queue = sock_queue_setup(mem_ctx, ev, fd, test1_reader, &state);
+ assert(queue != NULL);
+
+ while (! state.done) {
+ tevent_loop_once(ev);
+ }
+
+ talloc_free(queue);
+ talloc_free(ev);
+
+ pid = wait(&ret);
+ assert(pid != -1);
+}
+
+static void test2_reader(int fd)
+{
+ uint8_t buf[1024];
+ size_t pkt_len = 10 + sizeof(uint32_t);
+ ssize_t n;
+
+ while (1) {
+ n = sys_read(fd, buf, 1024);
+ assert(n != -1);
+
+ if (n == 0) {
+ return;
+ }
+
+ assert((size_t)n == pkt_len);
+ pkt_len += 10;
+ }
+}
+
+static void test2_dummy_reader(uint8_t *buf, size_t buflen,
+ void *private_data)
+{
+ abort();
+}
+
+static void test2_writer(struct sock_queue *queue)
+{
+ uint8_t buf[1024];
+ uint32_t len;
+ int ret;
+
+ for (len = 10; len < 1000; len += 10) {
+ int value = len / 10;
+ uint32_t buflen = len + sizeof(uint32_t);
+
+ memset(buf, value, buflen);
+ memcpy(buf, &buflen, sizeof(uint32_t));
+
+ ret = sock_queue_write(queue, buf, buflen);
+ assert(ret == 0);
+ }
+}
+
+static void test2(TALLOC_CTX *mem_ctx, const char *sockpath)
+{
+ struct tevent_context *ev;
+ struct sock_queue *queue;
+ pid_t pid;
+ int pfd[2], fd, ret;
+ ssize_t n;
+
+ ret = pipe(pfd);
+ assert(ret == 0);
+
+ pid = fork();
+ assert(pid != -1);
+
+ if (pid == 0) {
+ int newfd;
+
+ close(pfd[0]);
+
+ fd = socket_init(sockpath);
+ assert(fd != -1);
+
+ ret = 1;
+ n = sys_write(pfd[1], &ret, sizeof(int));
+ assert(n == sizeof(int));
+
+ newfd = accept(fd, NULL, NULL);
+ assert(newfd != -1);
+
+ test2_reader(newfd);
+ close(newfd);
+ unlink(sockpath);
+
+ exit(0);
+ }
+
+ close(pfd[1]);
+
+ n = sys_read(pfd[0], &ret, sizeof(int));
+ assert(n == sizeof(int));
+ assert(ret == 1);
+
+ close(pfd[0]);
+
+ fd = sock_connect(sockpath);
+ assert(fd != -1);
+
+ ev = tevent_context_init(mem_ctx);
+ assert(ev != NULL);
+
+ queue = sock_queue_setup(mem_ctx, ev, fd, test2_dummy_reader, NULL);
+ assert(queue != NULL);
+
+ test2_writer(queue);
+
+ talloc_free(queue);
+ talloc_free(ev);
+
+ pid = wait(&ret);
+ assert(pid != -1);
+}
+
+int main(int argc, const char **argv)
+{
+ TALLOC_CTX *mem_ctx;
+ const char *sockpath;
+
+ if (argc != 2) {
+ fprintf(stderr, "%s <sockpath>\n", argv[0]);
+ exit(1);
+ }
+
+ sockpath = argv[1];
+
+ mem_ctx = talloc_new(NULL);
+ assert(mem_ctx != NULL);
+
+ test1(mem_ctx, sockpath);
+ test2(mem_ctx, sockpath);
+
+ return 0;
+}
diff --git a/ctdb/tests/src/srvid_test.c b/ctdb/tests/src/srvid_test.c
new file mode 100644
index 0000000..2367c6c
--- /dev/null
+++ b/ctdb/tests/src/srvid_test.c
@@ -0,0 +1,105 @@
+/*
+ srvid tests
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <assert.h>
+
+#include "common/db_hash.c"
+#include "common/srvid.c"
+
+#define TEST_SRVID 0xBE11223344556677
+
+static void test_handler(uint64_t srvid, TDB_DATA data, void *private_data)
+{
+ int *count = (int *)private_data;
+ (*count)++;
+}
+
+int main(void)
+{
+ struct srvid_context *srv = NULL;
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+ TALLOC_CTX *tmp_ctx = talloc_new(NULL);
+ int ret;
+ int count = 0;
+
+ ret = srvid_register(srv, tmp_ctx, TEST_SRVID, test_handler, &count);
+ assert(ret == EINVAL);
+
+ ret = srvid_init(mem_ctx, &srv);
+ assert(ret == 0);
+
+ ret = srvid_deregister(srv, TEST_SRVID, &count);
+ assert(ret == ENOENT);
+
+ ret = srvid_register(srv, tmp_ctx, TEST_SRVID, test_handler, &count);
+ assert(ret == 0);
+
+ ret = srvid_exists(srv, TEST_SRVID, NULL);
+ assert(ret == 0);
+
+ ret = srvid_exists(srv, TEST_SRVID, &count);
+ assert(ret == 0);
+
+ ret = srvid_dispatch(srv, TEST_SRVID, 0, tdb_null);
+ assert(ret == 0);
+ assert(count == 1);
+
+ ret = srvid_dispatch(srv, 0, TEST_SRVID, tdb_null);
+ assert(ret == 0);
+ assert(count == 2);
+
+ ret = srvid_deregister(srv, TEST_SRVID, NULL);
+ assert(ret == ENOENT);
+
+ ret = srvid_deregister(srv, TEST_SRVID, &count);
+ assert(ret == 0);
+
+ ret = srvid_register(srv, tmp_ctx, TEST_SRVID, test_handler, &count);
+ assert(ret == 0);
+
+ talloc_free(tmp_ctx);
+ ret = srvid_exists(srv, TEST_SRVID, NULL);
+ assert(ret == ENOENT);
+
+ ret = srvid_dispatch(srv, TEST_SRVID, 0, tdb_null);
+ assert(ret == ENOENT);
+
+ tmp_ctx = talloc_new(NULL);
+ assert(tmp_ctx != NULL);
+
+ ret = srvid_register(srv, tmp_ctx, TEST_SRVID, test_handler, NULL);
+ assert(ret == 0);
+ ret = srvid_exists(srv, TEST_SRVID, &count);
+ assert(ret == ENOENT);
+
+ ret = srvid_register(srv, tmp_ctx, TEST_SRVID, test_handler, &count);
+ assert(ret == 0);
+ ret = srvid_exists(srv, TEST_SRVID, &count);
+ assert(ret == 0);
+
+ talloc_free(srv);
+ assert(talloc_get_size(mem_ctx) == 0);
+ assert(talloc_get_size(tmp_ctx) == 0);
+
+ talloc_free(mem_ctx);
+
+ return 0;
+}
diff --git a/ctdb/tests/src/system_socket_test.c b/ctdb/tests/src/system_socket_test.c
new file mode 100644
index 0000000..436f52a
--- /dev/null
+++ b/ctdb/tests/src/system_socket_test.c
@@ -0,0 +1,266 @@
+/*
+ Raw socket (un) marshalling tests
+
+ Copyright (C) Martin Schwenke 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <assert.h>
+
+/* For ether_aton() */
+#ifdef _AIX
+#include <arpa/inet.h>
+#endif
+#ifdef __FreeBSD__
+#include <net/ethernet.h>
+#endif
+#ifdef linux
+#include <netinet/ether.h>
+#endif
+
+#include "common/system_socket.c"
+
+#include "protocol/protocol_util.h"
+
+#include "tests/src/test_backtrace.h"
+
+static void hexdump(uint8_t *buf, size_t len)
+{
+ size_t i;
+
+ for (i = 0; i < len; i++) {
+ if (i % 16 == 0) {
+ if (i != 0) {
+ printf("\n");
+ }
+ printf("%06zx", i);
+ }
+ printf(" %02x", buf[i]);
+ }
+
+ printf("\n%06zx\n", i);
+}
+
+static void test_types(void)
+{
+ /*
+ * We use this struct in the code but don't pack it due to
+ * portability concerns. It should have no padding.
+ */
+ struct {
+ struct ip ip;
+ struct tcphdr tcp;
+ } ip4pkt;
+
+ assert(sizeof(ip4pkt) == sizeof(struct ip) + sizeof(struct tcphdr));
+}
+
+#ifdef HAVE_PACKETSOCKET
+
+static void test_arp(const char *addr_str, const char *hwaddr_str, bool reply)
+{
+ ctdb_sock_addr addr;
+ struct ether_addr *hw, *dhw;
+ uint8_t buf[512];
+ size_t buflen = sizeof(buf);
+ size_t len;
+ int ret;
+
+ ret = ctdb_sock_addr_from_string(addr_str, &addr, false);
+ assert(ret == 0);
+
+ hw = ether_aton(hwaddr_str);
+ assert(hw != NULL);
+
+ switch (addr.ip.sin_family) {
+ case AF_INET:
+ ret = arp_build(buf, buflen, &addr.ip, hw, reply, &dhw, &len);
+ break;
+ case AF_INET6:
+ ret = ip6_na_build(buf, buflen, &addr.ip6, hw, &dhw, &len);
+ break;
+ default:
+ abort();
+ }
+
+ assert(ret == 0);
+
+ hexdump(buf, len);
+}
+
+#else /* HAVE_PACKETSOCKET */
+
+static void test_arp(const char *addr_str, const char *hwaddr_str, bool reply)
+{
+ fprintf(stderr, "PACKETSOCKET not supported\n");
+}
+
+#endif /* HAVE_PACKETSOCKET */
+
+static void test_tcp(const char *src_str,
+ const char *dst_str,
+ const char *seq_str,
+ const char *ack_str,
+ const char *rst_str)
+{
+ ctdb_sock_addr src, dst;
+ uint32_t seq, ack;
+ int rst;
+ uint8_t buf[512];
+ struct ether_header *eth;
+ size_t expected_len, len;
+ char src_str_out[64], dst_str_out[64];
+ uint32_t seq_out, ack_out;
+ int rst_out = 0;
+ uint16_t window;
+ int ret;
+
+ ret = ctdb_sock_addr_from_string(src_str, &src, true);
+ assert(ret == 0);
+
+ ret = ctdb_sock_addr_from_string(dst_str, &dst, true);
+ assert(ret == 0);
+
+ seq = atoi(seq_str);
+ ack = atoi(ack_str);
+ rst = atoi(rst_str);
+
+ /* Need to fake this up */
+ eth = (struct ether_header *) buf;
+ memset(eth, 0, sizeof(*eth));
+
+ switch (src.ip.sin_family) {
+ case AF_INET:
+ eth->ether_type = htons(ETHERTYPE_IP);
+ expected_len = 40;
+ ret = tcp4_build(buf + sizeof(struct ether_header),
+ sizeof(buf) - sizeof(struct ether_header),
+ &src.ip,
+ &dst.ip,
+ seq,
+ ack,
+ rst,
+ &len);
+ break;
+ case AF_INET6:
+ eth->ether_type = htons(ETHERTYPE_IP6);
+ expected_len = 60;
+ ret = tcp6_build(buf + sizeof(struct ether_header),
+ sizeof(buf) - sizeof(struct ether_header),
+ &src.ip6,
+ &dst.ip6,
+ seq,
+ ack,
+ rst,
+ &len);
+ break;
+ default:
+ abort();
+ }
+
+ assert(ret == 0);
+ assert(len == expected_len);
+
+ hexdump(buf + sizeof(struct ether_header), len);
+
+ switch (ntohs(eth->ether_type)) {
+ case ETHERTYPE_IP:
+ ret = tcp4_extract(buf + sizeof(struct ether_header),
+ len,
+ &src.ip,
+ &dst.ip,
+ &ack_out,
+ &seq_out,
+ &rst_out,
+ &window);
+ break;
+ case ETHERTYPE_IP6:
+ ret = tcp6_extract(buf + sizeof(struct ether_header),
+ len,
+ &src.ip6,
+ &dst.ip6,
+ &ack_out,
+ &seq_out,
+ &rst_out,
+ &window);
+ break;
+ default:
+ abort();
+ }
+
+ assert(ret == 0);
+
+ assert(seq == seq_out);
+ assert(ack == ack_out);
+ assert((rst != 0) == (rst_out != 0));
+ assert(window == htons(1234));
+
+ ret = ctdb_sock_addr_to_buf(src_str_out, sizeof(src_str_out),
+ &src, true);
+ assert(ret == 0);
+ ret = strcmp(src_str, src_str_out);
+ assert(ret == 0);
+
+ ret = ctdb_sock_addr_to_buf(dst_str_out, sizeof(dst_str_out),
+ &dst, true);
+ assert(ret == 0);
+ ret = strcmp(dst_str, dst_str_out);
+ assert(ret == 0);
+}
+
+static void usage(const char *prog)
+{
+ fprintf(stderr, "usage: %s <cmd> [<arg> ...]\n", prog);
+ fprintf(stderr, " commands:\n");
+ fprintf(stderr, " types\n");
+ fprintf(stderr, " arp <ipaddr> <hwaddr> [reply]\n");
+ fprintf(stderr, " tcp <src> <dst> <seq> <ack> <rst>\n");
+
+ exit(1);
+}
+
+int main(int argc, char **argv)
+{
+
+ if (argc < 2) {
+ usage(argv[0]);
+ }
+
+ test_backtrace_setup();
+
+ if (strcmp(argv[1], "types") == 0) {
+ test_types();
+ } else if (strcmp(argv[1], "arp") == 0) {
+ /*
+ * Extra arg indicates that a reply should be
+ * constructed for IPv4 - value is ignored
+ */
+ if (argc != 4 && argc != 5) {
+ usage(argv[0]);
+ }
+ test_arp(argv[2], argv[3], (argc == 5));
+ } else if (strcmp(argv[1], "tcp") == 0) {
+ if (argc != 7) {
+ usage(argv[0]);
+ }
+ test_tcp(argv[2], argv[3], argv[4], argv[5], argv[6]);
+ } else {
+ usage(argv[0]);
+ }
+
+ return 0;
+}
diff --git a/ctdb/tests/src/test_backtrace.c b/ctdb/tests/src/test_backtrace.c
new file mode 100644
index 0000000..aa3fc0c
--- /dev/null
+++ b/ctdb/tests/src/test_backtrace.c
@@ -0,0 +1,37 @@
+/*
+ Print a backtrace when a test aborts
+
+ Copyright (C) Martin Schwenke, DataDirect Networks 2022
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include "lib/util/fault.h"
+#include "lib/util/signal.h"
+
+#include "tests/src/test_backtrace.h"
+
+static void test_abort_backtrace_handler(int sig)
+{
+ log_stack_trace();
+ CatchSignal(SIGABRT, SIG_DFL);
+ abort();
+}
+
+void test_backtrace_setup(void)
+{
+ CatchSignal(SIGABRT, test_abort_backtrace_handler);
+}
diff --git a/ctdb/tests/src/test_backtrace.h b/ctdb/tests/src/test_backtrace.h
new file mode 100644
index 0000000..a6089c9
--- /dev/null
+++ b/ctdb/tests/src/test_backtrace.h
@@ -0,0 +1,25 @@
+/*
+ Print a backtrace when a test aborts
+
+ Copyright (C) Martin Schwenke, DataDirect Networks 2022
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_TEST_BACKTRACE_H__
+#define __CTDB_TEST_BACKTRACE_H__
+
+void test_backtrace_setup(void);
+
+#endif /* __CTDB_TEST_BACKTRACE_H__ */
diff --git a/ctdb/tests/src/test_mutex_raw.c b/ctdb/tests/src/test_mutex_raw.c
new file mode 100644
index 0000000..8ebf77e
--- /dev/null
+++ b/ctdb/tests/src/test_mutex_raw.c
@@ -0,0 +1,434 @@
+/*
+ * Test the system robust mutex implementation
+ *
+ * Copyright (C) 2016 Amitay Isaacs
+ * Copyright (C) 2018 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/*
+ * To run the test do the following:
+ *
+ * (a) Compile the test.
+ *
+ * gcc -O2 -g3 -o test-robust-mutex test-robust-mutex.c -lpthread
+ *
+ * (b) Start the "init" process.
+ *
+ * ./test-robust-mutex /tmp/shared-mutex init
+ *
+ * (c) Start any number of "worker" instances.
+ *
+ * ./test-robust-mutex <Shared memory file> worker <#> <Priority>
+ *
+ * <Shared memory file> e.g. /tmp/shared-mutex.
+ *
+ * <#> : Number of children processes.
+ *
+ * <Priority> : 0 - Normal, 1 - Realtime, 2 - Nice 20.
+ *
+ * For example:
+ *
+ * As non-root:
+ *
+ * $ while true ; do ./test-robust-mutex /tmp/foo worker 10 0 ; done;
+ *
+ * As root:
+ *
+ * while true ; do ./test-robust-mutex /tmp/foo worker 10 1 ; done;
+ *
+ * This creates 20 processes, 10 at normal priority and 10 at realtime
+ * priority, all taking the lock, being killed and recovering the lock.
+ *
+ * If while running (c) the processes block, it might mean that a futex wakeup
+ * was lost, or that the handoff of EOWNERDEAD did not happen correctly. In
+ * either case you can debug the resulting mutex like this:
+ *
+ * $ ./test-robust-mutex /tmp/shared-mutex debug
+ *
+ * This prints the PID of the process holding the mutex or nothing if
+ * the value was cleared by the kernel and now no process holds the mutex.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <errno.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/wait.h>
+
+/* Define DEBUG to 1 to enable verbose debugging. */
+#define DEBUG 0
+
+/* Implement the worker. The worker has to do the following things:
+
+ * Succeed at locking the mutex, including possible recovery.
+ * Kill itself.
+
+ Other workers are attempting exactly the same thing in order to
+ test the loss and recovery of the robust mutex. */
+static void worker (const char *filename)
+{
+ pthread_mutex_t *mutex;
+ void *addr;
+ int ret, fd;
+
+ /* Open the file and map the shared robust mutex. */
+ fd = open(filename, O_RDWR, 0600);
+ if (fd == -1) {
+ perror ("FAIL: open");
+ exit(EXIT_FAILURE);
+ }
+
+ addr = mmap(NULL,
+ sizeof(pthread_mutex_t),
+ PROT_READ|PROT_WRITE,
+ MAP_SHARED|MAP_FILE,
+ fd,
+ 0);
+ if (addr == NULL) {
+ perror ("FAIL: mmap");
+ exit(EXIT_FAILURE);
+ }
+
+ mutex = (pthread_mutex_t *)addr;
+
+ /* Every process will lock once, and die once. */
+ printf("INFO: pid %u locking\n", getpid());
+ do {
+ ret = pthread_mutex_lock(mutex);
+
+#if DEBUG
+ fprintf(stderr,
+ "DEBUG: pid %u lock attempt, ret=%d\n",
+ getpid(),
+ ret);
+#endif
+
+ if (ret == EOWNERDEAD) {
+ int rc;
+
+ rc = pthread_mutex_consistent(mutex);
+ if (rc == 0) {
+ pthread_mutex_unlock(mutex);
+ } else {
+ fprintf(stderr,
+ "FAIL: pthread_mutex_consistent "
+ "failed\n");
+ exit(EXIT_FAILURE);
+ }
+#if DEBUG
+ fprintf(stderr,
+ "DEBUG: pid %u recovery lock attempt, ret=%d\n",
+ getpid(),
+ ret);
+#endif
+ /* Will loop and try to lock again. */
+ }
+
+ } while (ret != 0);
+
+ printf ("INFO: pid %u locked, now killing\n", getpid());
+ kill(getpid(), SIGKILL);
+}
+
+/* One of three priority modes. */
+#define PRIO_NORMAL 0
+#define PRIO_REALTIME 1
+#define PRIO_NICE_20 2
+
+/* One of three operation modes. */
+#define MODE_INIT 0
+#define MODE_WORKER 1
+#define MODE_DEBUG 2
+
+/* Print usage information and exit. */
+static void usage (const char *name)
+{
+ fprintf(stderr,
+ "Usage: %s <file> [init|worker|debug] [#] [0|1|2]\n",
+ name);
+ exit(EXIT_FAILURE);
+}
+
+/* Set the process priority. */
+static void set_priority (int priority)
+{
+ struct sched_param p;
+ int ret;
+
+ switch (priority) {
+ case PRIO_REALTIME:
+ p.sched_priority = 1;
+ ret = sched_setscheduler(0, SCHED_FIFO, &p);
+ if (ret == -1)
+ perror("FAIL: sched_setscheduler");
+ break;
+
+ case PRIO_NICE_20:
+ ret = nice(-20);
+ if (ret == -1)
+ perror("FAIL: nice");
+ break;
+
+ case PRIO_NORMAL:
+ default:
+ /* Normal priority is the default. */
+ break;
+ }
+}
+
+int main(int argc, const char **argv)
+{
+ int i, fd, ret, num_children, mode = -1, priority = PRIO_NORMAL;
+ const char *mode_str;
+ const char *file;
+ char *addr;
+ pthread_mutex_t *mutex;
+ pthread_mutexattr_t mattr;
+ pid_t pid;
+
+ /* One of three modes, init, worker, or debug. */
+ if (argc < 3 || argc > 5)
+ usage (argv[0]);
+
+ /*
+ * The shared memory file. Care should be taken here because if glibc
+ * is upgraded between runs the internals of the robust mutex could
+ * change. See this blog post about the dangers:
+ * https://developers.redhat.com/blog/2017/03/13/cc-library-upgrades-and-opaque-data-types-in-process-shared-memory/
+ * and how to avoid problems inherent in this.
+ */
+ file = argv[1];
+
+ /* Set the mode. */
+ mode_str = argv[2];
+ if (strcmp ("init", mode_str) == 0) {
+ mode = MODE_INIT;
+ } else if (strcmp ("worker", mode_str) == 0) {
+ mode = MODE_WORKER;
+ } else if (strcmp ("debug", mode_str) == 0) {
+ mode = MODE_DEBUG;
+ } else {
+ usage (argv[0]);
+ }
+
+ /* This is "worker" mode, so set the priority. */
+ if (mode == MODE_WORKER) {
+ priority = atoi(argv[4]);
+ set_priority(priority);
+ }
+
+ /* All modes open the file. */
+ fd = open(argv[1], O_CREAT|O_RDWR, 0600);
+ if (fd == -1) {
+ perror("FAIL: open");
+ exit(EXIT_FAILURE);
+ }
+
+ ret = lseek(fd, 0, SEEK_SET);
+ if (ret != 0) {
+ perror("FAIL: lseek");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Truncate the file backing the mutex only in the init phase. */
+ if (mode == MODE_INIT) {
+ ret = ftruncate(fd, sizeof(pthread_mutex_t));
+ if (ret != 0) {
+ perror("FAIL: ftruncate");
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ /* Map the robust mutex. */
+ addr = mmap(NULL,
+ sizeof(pthread_mutex_t),
+ PROT_READ|PROT_WRITE,
+ MAP_SHARED|MAP_FILE,
+ fd,
+ 0);
+ if (addr == NULL) {
+ perror("FAIL: mmap");
+ exit(EXIT_FAILURE);
+ }
+
+ mutex = (pthread_mutex_t *)(void *)addr;
+
+ /*
+ * In the debug mode we try to recover the mutex and print it.
+ * WARNING: All other processes should be stuck, otherwise they may
+ * change the value of the lock between trylock and the printing after
+ * EBUSY.
+ */
+ if (mode == MODE_DEBUG) {
+ ret = pthread_mutex_trylock(mutex);
+ if (ret == EOWNERDEAD) {
+ ret = pthread_mutex_consistent(mutex);
+ if (ret == 0) {
+ pthread_mutex_unlock(mutex);
+ } else {
+ fprintf(stderr,
+ "FAIL: pthread_mutex_consistent "
+ "failed\n");
+ exit (EXIT_FAILURE);
+ }
+ } else if (ret == EBUSY) {
+ printf("INFO: pid=%u\n", mutex->__data.__owner);
+ } else if (ret == 0) {
+ pthread_mutex_unlock(mutex);
+ }
+ exit(EXIT_SUCCESS);
+ }
+
+ /*
+ * Only the initializing process does initialization because it is
+ * undefined behaviour to re-initialize an already initialized mutex
+ * that was not destroyed.
+ */
+ if (mode == MODE_INIT) {
+
+ ret = pthread_mutexattr_init(&mattr);
+ if (ret != 0) {
+ fprintf(stderr,
+ "FAIL: pthread_mutexattr_init failed\n");
+ exit(EXIT_FAILURE);
+ }
+
+ ret = pthread_mutexattr_settype(&mattr,
+ PTHREAD_MUTEX_ERRORCHECK);
+ if (ret != 0) {
+ fprintf(stderr,
+ "FAIL: pthread_mutexattr_settype failed\n");
+ exit(EXIT_FAILURE);
+ }
+
+ ret = pthread_mutexattr_setpshared(&mattr,
+ PTHREAD_PROCESS_SHARED);
+ if (ret != 0) {
+ fprintf(stderr,
+ "FAIL: pthread_mutexattr_setpshared failed\n");
+ exit(EXIT_FAILURE);
+ }
+
+ ret = pthread_mutexattr_setrobust(&mattr, PTHREAD_MUTEX_ROBUST);
+ if (ret != 0) {
+ fprintf(stderr,
+ "FAIL: pthread_mutexattr_setrobust failed\n");
+ exit(EXIT_FAILURE);
+ }
+
+ ret = pthread_mutex_init(mutex, &mattr);
+ if (ret != 0) {
+ fprintf(stderr, "FAIL: pthread_mutex_init failed\n");
+ exit(EXIT_FAILURE);
+ }
+
+ printf ("INFO: init: Mutex initialization complete.\n");
+ /* Never exit. */
+ for (;;)
+ sleep (1);
+ }
+
+ /* Acquire the mutext for the first time. Might be dead.
+ Might also be concurrent with the high-priority threads. */
+ fprintf(stderr,
+ "INFO: parent: Acquiring mutex (pid = %d).\n",
+ getpid());
+ do {
+ ret = pthread_mutex_lock(mutex);
+
+ /* Not consistent? Try to make it so. */
+ if (ret == EOWNERDEAD) {
+ int rc;
+
+ rc = pthread_mutex_consistent(mutex);
+ if (rc == 0) {
+ pthread_mutex_unlock (mutex);
+ } else {
+ fprintf(stderr,
+ "FAIL: pthread_mutex_consistent "
+ "failed\n");
+ exit (EXIT_FAILURE);
+ }
+
+ /* Will loop and try to lock again. */
+ fprintf(stderr,
+ "INFO: parent: Unlock recovery ret = %d\n",
+ ret);
+ }
+
+ } while (ret != 0);
+
+ /*
+ * Set the parent process into it's own process group (hides the
+ * children).
+ */
+ setpgid(0, 0);
+
+ /* Create # of children. */
+ fprintf(stderr, "INFO: parent: Creating children\n");
+ num_children = atoi(argv[3]);
+
+ for (i = 0; i < num_children; i++) {
+ pid = fork();
+ if (pid < 0) {
+ fprintf(stderr, "FAIL: fork() failed\n");
+ exit(EXIT_FAILURE);
+ }
+ if (pid == 0) {
+ close(fd);
+ worker(file);
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ fprintf(stderr, "INFO: parent: Waiting for children\n");
+
+ /* Unlock the recently acquired mutex or the old lost mutex. */
+ ret = pthread_mutex_unlock(mutex);
+ if (ret != 0) {
+ fprintf(stderr, "FAIL: pthread_mutex_unlock failed\n");
+ exit(EXIT_FAILURE);
+ }
+
+ /*
+ * All threads are running now, and each will take the lock and
+ * die in turn. When they are all dead we will exit and be started
+ * again by the caller.
+ */
+ for (i = 0; i < num_children; i++) {
+ int status;
+ pid = waitpid(-1, &status, 0);
+ if (pid <= 0) {
+ fprintf(stderr, "FAIL: waitpid() failed\n");
+ exit(EXIT_FAILURE);
+ }
+ fprintf(stderr,
+ "INFO: parent: Reaped %u\n",
+ (unsigned int) pid);
+ }
+
+ /* We never unlink fd. The file must be cleaned up by the caller. */
+ close(fd);
+
+ exit(EXIT_SUCCESS);
+}
diff --git a/ctdb/tests/src/test_options.c b/ctdb/tests/src/test_options.c
new file mode 100644
index 0000000..2c64404
--- /dev/null
+++ b/ctdb/tests/src/test_options.c
@@ -0,0 +1,245 @@
+/*
+ CTDB tests commandline options
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <assert.h>
+#include <popt.h>
+#include <talloc.h>
+
+#include "lib/util/debug.h"
+
+#include "common/logging.h"
+#include "common/path.h"
+
+#include "tests/src/test_options.h"
+
+static struct test_options _values;
+
+static struct poptOption options_basic[] = {
+ {
+ .longName = "socket",
+ .shortName = 's',
+ .argInfo = POPT_ARG_STRING,
+ .arg = &_values.socket,
+ .descrip = "CTDB socket path",
+ .argDescrip = "filename",
+ },
+ {
+ .longName = "timelimit",
+ .shortName = 't',
+ .argInfo = POPT_ARG_INT,
+ .arg = &_values.timelimit,
+ .descrip = "Time limit (in seconds)",
+ },
+ {
+ .longName = "num-nodes",
+ .shortName = 'n',
+ .argInfo = POPT_ARG_INT,
+ .arg = &_values.num_nodes,
+ .descrip = "Number of cluster nodes",
+ },
+ {
+ .longName = "debug",
+ .shortName = 'd',
+ .argInfo = POPT_ARG_STRING,
+ .arg = &_values.debugstr,
+ .descrip = "Debug level",
+ },
+ {
+ .longName = "interactive",
+ .shortName = 'i',
+ .argInfo = POPT_ARG_NONE,
+ .arg = &_values.interactive,
+ .val = 0,
+ .descrip = "Interactive output",
+ },
+ POPT_TABLEEND
+};
+
+#define TEST_OPTIONS_BASIC \
+ { \
+ .argInfo = POPT_ARG_INCLUDE_TABLE, \
+ .arg = options_basic, \
+ .descrip = "General options:", \
+ },
+
+static struct poptOption options_database[] = {
+ {
+ .longName = "database",
+ .shortName = 'D',
+ .argInfo = POPT_ARG_STRING,
+ .arg = &_values.dbname,
+ .descrip = "CTDB database name",
+ },
+ {
+ .longName = "key",
+ .shortName = 'k',
+ .argInfo = POPT_ARG_STRING,
+ .arg = &_values.keystr,
+ .descrip = "Name of database key",
+ },
+ {
+ .longName = "value",
+ .shortName = 'v',
+ .argInfo = POPT_ARG_STRING,
+ .arg = &_values.valuestr,
+ .descrip = "Value of database key",
+ },
+ {
+ .longName = "dbtype",
+ .shortName = 'T',
+ .argInfo = POPT_ARG_STRING,
+ .arg = &_values.dbtype,
+ .descrip = "CTDB database type",
+ },
+ POPT_TABLEEND
+};
+
+#define TEST_OPTIONS_DATABASE \
+ { \
+ .argInfo = POPT_ARG_INCLUDE_TABLE, \
+ .arg = options_database, \
+ .descrip = "Database options:", \
+ },
+
+static void set_defaults_basic(struct test_options *opts)
+{
+ /* Set default options */
+ opts->socket = path_socket(NULL, "ctdbd"); /* leaked */
+ assert(opts->socket != NULL);
+
+ opts->timelimit = 10;
+ opts->num_nodes = 1;
+ opts->debugstr = "ERR";
+ opts->interactive = 0;
+}
+
+static void set_defaults_database(struct test_options *opts)
+{
+ opts->dbname = NULL;
+ opts->keystr = NULL;
+ opts->valuestr = NULL;
+ opts->dbtype = "volatile";
+}
+
+static bool verify_options_basic(struct test_options *opts)
+{
+ int log_level;
+ bool status;
+
+ status = debug_level_parse(opts->debugstr, &log_level);
+ if (! status) {
+ fprintf(stderr, "Error: Invalid debug string '%s'\n",
+ opts->debugstr);
+ return false;
+ }
+
+ debuglevel_set(log_level);
+
+ return true;
+}
+
+static bool verify_options_database(struct test_options *opts)
+{
+ if (opts->dbname == NULL) {
+ fprintf(stderr, "Error: Please specify database\n");
+ return false;
+ }
+ if (opts->keystr == NULL) {
+ fprintf(stderr, "Error: Please specify key name\n");
+ return false;
+ }
+
+ if ((strcmp(opts->dbtype, "volatile") != 0) &&
+ (strcmp(opts->dbtype, "persistent") != 0) &&
+ (strcmp(opts->dbtype, "replicated") != 0)) {
+ return false;
+ }
+
+ return true;
+}
+
+static bool process_options_common(int argc, const char **argv,
+ struct poptOption *options)
+{
+ poptContext pc;
+ int opt;
+
+ pc = poptGetContext(argv[0], argc, argv, options,
+ POPT_CONTEXT_KEEP_FIRST);
+ while ((opt = poptGetNextOpt(pc)) != -1) {
+ fprintf(stderr, "Invalid option %s: %s\n",
+ poptBadOption(pc, 0), poptStrerror(opt));
+ return false;
+ }
+
+ return true;
+}
+
+bool process_options_basic(int argc, const char **argv,
+ const struct test_options **opts)
+{
+ struct poptOption options[] = {
+ POPT_AUTOHELP
+ TEST_OPTIONS_BASIC
+ POPT_TABLEEND
+ };
+
+ set_defaults_basic(&_values);
+
+ if (! process_options_common(argc, argv, options)) {
+ return false;
+ }
+
+ if (! verify_options_basic(&_values)) {
+ return false;
+ }
+
+ *opts = &_values;
+ return true;
+}
+
+bool process_options_database(int argc, const char **argv,
+ const struct test_options **opts)
+{
+ struct poptOption options[] = {
+ POPT_AUTOHELP
+ TEST_OPTIONS_BASIC
+ TEST_OPTIONS_DATABASE
+ POPT_TABLEEND
+ };
+
+ set_defaults_basic(&_values);
+ set_defaults_database(&_values);
+
+ if (! process_options_common(argc, argv, options)) {
+ return false;
+ }
+
+ if (! verify_options_basic(&_values)) {
+ return false;
+ }
+ if (! verify_options_database(&_values)) {
+ return false;
+ }
+
+ *opts = &_values;
+ return true;
+}
diff --git a/ctdb/tests/src/test_options.h b/ctdb/tests/src/test_options.h
new file mode 100644
index 0000000..1e194c9
--- /dev/null
+++ b/ctdb/tests/src/test_options.h
@@ -0,0 +1,44 @@
+/*
+ CTDB tests commandline options
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __TEST_OPTIONS_H__
+#define __TEST_OPTIONS_H__
+
+struct test_options {
+ /* Basic options */
+ const char *socket;
+ int timelimit;
+ int num_nodes;
+ const char *debugstr;
+ int interactive;
+
+ /* Database options */
+ const char *dbname;
+ const char *keystr;
+ const char *valuestr;
+ const char *dbtype;
+};
+
+bool process_options_basic(int argc, const char **argv,
+ const struct test_options **opts);
+
+bool process_options_database(int argc, const char **argv,
+ const struct test_options **opts);
+
+#endif /* __TEST_OPTIONS_H__ */
diff --git a/ctdb/tests/src/tmon_ping_test.c b/ctdb/tests/src/tmon_ping_test.c
new file mode 100644
index 0000000..c0c0aae
--- /dev/null
+++ b/ctdb/tests/src/tmon_ping_test.c
@@ -0,0 +1,381 @@
+/*
+ Test trivial FD monitoring
+
+ Copyright (C) Martin Schwenke, DataDirect Networks 2022
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/network.h"
+#include "system/wait.h"
+
+#include <talloc.h>
+#include <tevent.h>
+#include <assert.h>
+
+#include "lib/util/tevent_unix.h"
+
+#include "common/tmon.h"
+
+#include "tests/src/test_backtrace.h"
+
+struct test_state {
+ const char *label;
+ unsigned long async_wait_time;
+ unsigned long blocking_sleep_time;
+};
+
+static void test_tmon_ping_done(struct tevent_req *subreq);
+static void test_async_wait_done(struct tevent_req *subreq);
+
+static struct tevent_req *test_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ const char *label,
+ int fd,
+ int direction,
+ unsigned long timeout,
+ unsigned long interval,
+ unsigned long async_wait_time,
+ unsigned long blocking_sleep_time)
+{
+ struct tevent_req *req, *subreq;
+ struct test_state *state;
+
+ req = tevent_req_create(mem_ctx, &state, struct test_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->label = label;
+ state->async_wait_time = async_wait_time;
+ state->blocking_sleep_time = blocking_sleep_time;
+
+ subreq = tmon_ping_send(state,
+ ev,
+ fd,
+ direction,
+ timeout,
+ interval);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, test_tmon_ping_done, req);
+
+ if (state->async_wait_time != 0) {
+ fprintf(stderr,
+ "%s: async wait start %lu\n",
+ state->label,
+ state->async_wait_time);
+ }
+ subreq = tevent_wakeup_send(state,
+ ev,
+ tevent_timeval_current_ofs(
+ (uint32_t)async_wait_time, 0));
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, test_async_wait_done, req);
+
+ return req;
+}
+
+static void test_tmon_ping_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct test_state *state = tevent_req_data(req, struct test_state);
+ bool status;
+ int err;
+
+ status = tmon_ping_recv(subreq, &err);
+ TALLOC_FREE(subreq);
+ if (!status) {
+ switch(err) {
+ case EPIPE:
+ fprintf(stderr, "%s: pipe closed\n", state->label);
+ break;
+ case ETIMEDOUT:
+ fprintf(stderr, "%s: ping timeout\n", state->label);
+ break;
+ default:
+ fprintf(stderr, "%s: error (%d)\n", state->label, err);
+ }
+ tevent_req_error(req, err);
+ return;
+ }
+
+ fprintf(stderr, "%s: done\n", state->label);
+ tevent_req_done(req);
+}
+
+static void test_async_wait_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct test_state *state = tevent_req_data(req, struct test_state);
+ unsigned int left;
+ bool status;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (!status) {
+ fprintf(stderr,
+ "%s: tevent_wakeup_recv() failed\n",
+ state->label);
+ /* Ignore error */
+ }
+ if (state->async_wait_time != 0) {
+ fprintf(stderr, "%s: async wait end\n", state->label);
+ }
+
+ if (state->blocking_sleep_time == 0) {
+ goto done;
+ }
+
+ fprintf(stderr,
+ "%s: blocking sleep start %lu\n",
+ state->label,
+ state->blocking_sleep_time);
+ left = sleep((unsigned int)state->blocking_sleep_time);
+ fprintf(stderr,
+ "%s: blocking sleep end\n",
+ state->label);
+ if (left != 0) {
+ tevent_req_error(req, EINTR);
+ return;
+ }
+
+done:
+ tevent_req_done(req);
+}
+
+static bool test_recv(struct tevent_req *req, int *perr)
+{
+ if (tevent_req_is_unix_error(req, perr)) {
+ return false;
+ }
+
+ return true;
+}
+
+static int test_one(bool is_parent,
+ int sync_fd,
+ int fd,
+ int direction,
+ unsigned long timeout,
+ unsigned long interval,
+ unsigned long async_wait_time,
+ unsigned long blocking_sleep_time)
+{
+ TALLOC_CTX *mem_ctx;
+ struct tevent_context *ev;
+ struct tevent_req *req;
+ bool status;
+ char buf[1] = "";
+ ssize_t count;
+ int err;
+ int ret;
+
+ if (!is_parent) {
+ count = read(sync_fd, buf, sizeof(buf));
+ assert(count == 1);
+ assert(buf[0] == '\0');
+ close(sync_fd);
+ }
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ ret = ENOMEM;
+ goto done;
+ }
+
+ ev = tevent_context_init(mem_ctx);
+ if (ev == NULL) {
+ ret = ENOMEM;
+ goto done;
+ }
+
+ req = test_send(mem_ctx,
+ ev,
+ is_parent ? "parent" : "child",
+ fd,
+ direction,
+ timeout,
+ interval,
+ async_wait_time,
+ blocking_sleep_time);
+ if (req == NULL) {
+ ret = ENOMEM;
+ goto done;
+ }
+
+ if (is_parent) {
+ count = write(sync_fd, buf, sizeof(buf));
+ assert(count == 1);
+ }
+
+ status = tevent_req_poll(req, ev);
+ if (!status) {
+ ret = EIO;
+ goto done;
+ }
+
+ status = test_recv(req, &err);
+ ret = status ? 0 : err;
+
+done:
+ return ret;
+}
+
+static void test(unsigned long parent_timeout,
+ unsigned long parent_interval,
+ unsigned long parent_async_wait_time,
+ unsigned long parent_blocking_sleep_time,
+ int parent_result,
+ unsigned long child_timeout,
+ unsigned long child_interval,
+ unsigned long child_async_wait_time,
+ unsigned long child_blocking_sleep_time,
+ int child_result)
+{
+ int sync[2];
+ int fd[2];
+ pid_t pid;
+ int wstatus;
+ int ret;
+
+ /* Pipe for synchronisation */
+ ret = pipe(sync);
+ assert(ret == 0);
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fd);
+ assert(ret == 0);
+
+ pid = fork();
+ assert(pid != -1);
+
+ if (pid == 0) {
+ /* child */
+ close(sync[1]);
+ close(fd[0]);
+
+ ret = test_one(false,
+ sync[0],
+ fd[1],
+ TMON_FD_BOTH,
+ child_timeout,
+ child_interval,
+ child_async_wait_time,
+ child_blocking_sleep_time);
+ _exit(ret);
+ }
+
+ /* Parent */
+ close(sync[0]);
+ close(fd[1]);
+
+ ret = test_one(true,
+ sync[1],
+ fd[0],
+ TMON_FD_BOTH,
+ parent_timeout,
+ parent_interval,
+ parent_async_wait_time,
+ parent_blocking_sleep_time);
+ assert(ret == parent_result);
+
+ /* Close to mimic exit, so child status can be checked below */
+ close(fd[0]);
+
+ /* Abort if child failed */
+ waitpid(pid, &wstatus, 0);
+ if (WIFEXITED(wstatus)) {
+ assert(WEXITSTATUS(wstatus) == child_result);
+ }
+}
+
+struct test_inputs {
+ unsigned int timeout;
+ unsigned int interval;
+ unsigned int async_wait_time;
+ unsigned int blocking_sleep_time;
+ int expected_result;
+};
+
+static void get_test_inputs(const char **args, struct test_inputs *inputs)
+{
+ if (strcmp(args[0], "false") == 0) {
+ inputs->interval = 0;
+ } else if (strcmp(args[0], "true") == 0) {
+ inputs->interval = 1;
+ } else {
+ inputs->interval = strtoul(args[0], NULL, 0);
+ }
+
+ inputs->timeout = strtoul(args[1], NULL, 0);
+ inputs->async_wait_time = (unsigned int)strtoul(args[2], NULL, 0);
+ inputs->blocking_sleep_time = (unsigned int)strtoul(args[3], NULL, 0);
+ inputs->expected_result = (int)strtoul(args[4], NULL, 0);
+}
+
+static void usage(const char *prog)
+{
+ fprintf(stderr,
+ "usage: %s "
+ "\\\n\t"
+ "<parent_send_pings> "
+ "<parent_ping_timeout> "
+ "<parent_async_wait_time> "
+ "<parent_blocking_sleep_time> "
+ "<parent_expected_result> "
+ "\\\n\t"
+ "<child_send_pings> "
+ "<child_ping_timeout> "
+ "<child_async_wait_time> "
+ "<child_blocking_sleep_time> "
+ "<child_expected_result> "
+ "\n",
+ prog);
+ exit(1);
+}
+
+int main(int argc, const char **argv)
+{
+ struct test_inputs parent;
+ struct test_inputs child;
+
+ if (argc != 11) {
+ usage(argv[0]);
+ }
+
+ test_backtrace_setup();
+
+ get_test_inputs(&argv[1], &parent);
+ get_test_inputs(&argv[6], &child);
+
+ test(parent.timeout,
+ parent.interval,
+ parent.async_wait_time,
+ parent.blocking_sleep_time,
+ parent.expected_result,
+ child.timeout,
+ child.interval,
+ child.async_wait_time,
+ child.blocking_sleep_time,
+ child.expected_result);
+
+ return 0;
+}
diff --git a/ctdb/tests/src/tmon_test.c b/ctdb/tests/src/tmon_test.c
new file mode 100644
index 0000000..10eaa72
--- /dev/null
+++ b/ctdb/tests/src/tmon_test.c
@@ -0,0 +1,406 @@
+/*
+ Test trivial FD monitoring
+
+ Copyright (C) Martin Schwenke, DataDirect Networks 2022
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/wait.h"
+
+#include <talloc.h>
+#include <tevent.h>
+#include <assert.h>
+#include <ctype.h>
+
+#include "lib/util/tevent_unix.h"
+
+#include "common/tmon.h"
+
+#include "tests/src/test_backtrace.h"
+
+struct test_write_state {
+ const char *write_data;
+ size_t write_data_len;
+ unsigned int offset;
+ struct tevent_req *req;
+};
+
+static int test_write_callback(void *private_data, struct tmon_pkt *pkt)
+{
+ struct test_write_state *state = talloc_get_type_abort(
+ private_data, struct test_write_state);
+ bool status;
+ size_t len;
+ char *end;
+ int err;
+ char c;
+ const char *t;
+
+ assert(state->write_data != NULL);
+
+ len = strlen(state->write_data);
+ if (state->offset >= len) {
+ return TMON_STATUS_EXIT;
+ }
+
+ c = state->write_data[state->offset];
+ state->offset++;
+
+ if (isdigit(c)) {
+ err = c - '0';
+
+ if (err == 0) {
+ status = tmon_set_exit(pkt);
+ } else {
+ status = tmon_set_errno(pkt, err);
+ }
+ } else if (ispunct(c)) {
+ switch (c) {
+ case '.':
+ return TMON_STATUS_SKIP;
+ break;
+ case '!':
+ status = tmon_set_ping(pkt);
+ break;
+ case '#':
+ /* Additional errno syntax: #nnn[;] */
+ t = &state->write_data[state->offset];
+ err = (int)strtol(t, &end, 10);
+ state->offset += (end - t);
+ if (state->write_data[state->offset] == ';') {
+ state->offset++;
+ }
+ status = tmon_set_errno(pkt, err);
+ break;
+ default:
+ status = false;
+ }
+ } else if (isascii(c) && !isspace(c)) {
+ status = tmon_set_ascii(pkt, c);
+ } else {
+ status = tmon_set_custom(pkt, (uint16_t)c);
+ }
+
+ if (!status) {
+ return EDOM;
+ }
+
+ t = getenv("CTDB_TEST_TMON_WRITE_SKIP_MODE");
+ if (t == NULL) {
+ return 0;
+ }
+
+ /*
+ * This is write-skip mode: tmon_write() is called directly
+ * here in the callback and TMON_WRITE_SKIP is returned. This
+ * allows tmon_write() to be exercised by reusing test cases
+ * rather than writing extra test code and test cases.
+ */
+
+ status = tmon_write(state->req, pkt);
+ if (!status) {
+ return EIO;
+ }
+
+ return TMON_STATUS_SKIP;
+}
+
+static void test_tmon_done(struct tevent_req *subreq);
+
+static struct tevent_req *test_write_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ int fd,
+ const char *write_data)
+{
+ struct tevent_req *req, *subreq;
+ struct test_write_state *state;
+ struct tmon_actions actions = {
+ .write_callback = test_write_callback,
+ };
+
+ req = tevent_req_create(mem_ctx, &state, struct test_write_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->write_data = write_data;
+ state->offset = 0;
+
+ subreq = tmon_send(state,
+ ev,
+ fd,
+ TMON_FD_WRITE,
+ 0,
+ 1,
+ &actions,
+ state);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, test_tmon_done, req);
+
+ /* Nasty hack, but OK to cheapen testing - see test_write_callback() */
+ state->req = subreq;
+
+ return req;
+}
+
+static void test_tmon_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ bool status;
+ int err;
+
+ status = tmon_recv(subreq, &err);
+ TALLOC_FREE(subreq);
+ if (!status) {
+ tevent_req_error(req, err);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+static bool test_write_recv(struct tevent_req *req, int *perr)
+{
+ if (tevent_req_is_unix_error(req, perr)) {
+ return false;
+ }
+
+ return true;
+}
+
+static int test_timeout_ok_callback(void *private_data)
+{
+ return 0;
+}
+
+static int test_read_callback(void *private_data, struct tmon_pkt *pkt)
+{
+ bool status;
+ char c;
+ uint16_t val;
+
+ status = tmon_parse_ping(pkt);
+ if (status) {
+ printf("PING\n");
+ fflush(stdout);
+ return 0;
+ }
+
+ status = tmon_parse_ascii(pkt, &c);
+ if (status) {
+ printf("ASCII %c\n", c);
+ fflush(stdout);
+ return 0;
+ }
+
+ status = tmon_parse_custom(pkt, &val);
+ if (status) {
+ printf("CUSTOM 0x%"PRIx16"\n", val);
+ fflush(stdout);
+ return 0;
+ }
+
+ return 0;
+}
+
+static int test_close_ok_callback(void *private_data)
+{
+ return 0;
+}
+
+struct test_read_state {
+};
+
+static struct tevent_req *test_read_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ int fd,
+ bool close_ok,
+ unsigned long timeout,
+ bool timeout_ok)
+{
+ struct tevent_req *req, *subreq;
+ struct test_read_state *state;
+ struct tmon_actions actions = {
+ .read_callback = test_read_callback,
+ };
+
+ req = tevent_req_create(mem_ctx, &state, struct test_read_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ if (timeout_ok) {
+ actions.timeout_callback = test_timeout_ok_callback;
+ }
+ if (close_ok) {
+ actions.close_callback = test_close_ok_callback;
+ }
+
+ subreq = tmon_send(state,
+ ev,
+ fd,
+ TMON_FD_READ,
+ timeout,
+ 0,
+ &actions,
+ state);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, test_tmon_done, req);
+
+ return req;
+}
+
+static bool test_read_recv(struct tevent_req *req, int *perr)
+{
+ if (tevent_req_is_unix_error(req, perr)) {
+ return false;
+ }
+
+ return true;
+}
+
+static void test(const char *write_data,
+ bool close_ok,
+ unsigned long timeout,
+ bool timeout_ok)
+{
+ TALLOC_CTX *mem_ctx;
+ struct tevent_context *ev;
+ struct tevent_req *req;
+ int fd[2];
+ pid_t pid;
+ int wstatus;
+ bool status;
+ int err;
+ int ret;
+
+ mem_ctx = talloc_new(NULL);
+ assert(mem_ctx != NULL);
+
+ ev = tevent_context_init(mem_ctx);
+ assert(ev != NULL);
+
+ ret = pipe(fd);
+ assert(ret == 0);
+
+ pid = fork();
+ assert(pid != -1);
+
+ if (pid == 0) {
+ /* child */
+ close(fd[1]);
+
+ req = test_read_send(mem_ctx,
+ ev,
+ fd[0],
+ close_ok,
+ timeout,
+ timeout_ok);
+ assert(req != NULL);
+
+ status = tevent_req_poll(req, ev);
+ assert(status);
+
+ status = test_read_recv(req, &err);
+ if (status) {
+ err = 0;
+ printf("READER OK\n");
+ } else {
+ printf("READER ERR=%d\n", err);
+ }
+ fflush(stdout);
+
+ _exit(ret);
+ }
+
+ /* Parent */
+ close(fd[0]);
+
+ req = test_write_send(mem_ctx,
+ ev,
+ fd[1],
+ write_data);
+ assert(req != NULL);
+
+ status = tevent_req_poll(req, ev);
+ assert(status);
+
+ status = test_write_recv(req, &err);
+ if (status) {
+ err = 0;
+ printf("WRITER OK\n");
+ } else {
+ printf("WRITER ERR=%d\n", err);
+ }
+ fflush(stdout);
+
+ /* Close to mimic exit, so child status can be checked below */
+ close(fd[1]);
+
+ waitpid(pid, &wstatus, 0);
+}
+
+static void usage(const char *prog)
+{
+ fprintf(stderr,
+ "usage: %s <write_data> <close_ok> <timeout> <timeout_ok>\n\n"
+ " <write_data> is processed by test_write_callback(), "
+ "1 character per second:\n"
+ " 0: write EXIT\n"
+ " 1-9: write ERRNO 1-9\n"
+ " .: skip write\n"
+ " <space>: write CUSTOM containing <space>\n"
+ " other <ascii>: write ASCII containing <ascii>\n"
+ " other: write CUSTOM\n"
+ " See test_write_callback() for more details\n"
+ ,
+ prog);
+ exit(1);
+}
+
+int main(int argc, const char **argv)
+{
+ bool close_ok, timeout_ok;
+ unsigned long timeout;
+
+ if (argc != 5) {
+ usage(argv[0]);
+ }
+
+ test_backtrace_setup();
+
+ close_ok = (strcmp(argv[2], "true") == 0);
+ timeout = strtoul(argv[3], NULL, 0);
+ if (timeout == 0) {
+ /*
+ * Default timeout that should not come into play but
+ * will cause tests to fail after a reasonable amount
+ * of time, if something unexpected happens.
+ */
+ timeout = 20;
+ }
+ timeout_ok = (strcmp(argv[4], "true") == 0);
+
+ test(argv[1], close_ok, timeout, timeout_ok);
+
+ return 0;
+}
diff --git a/ctdb/tests/src/transaction_loop.c b/ctdb/tests/src/transaction_loop.c
new file mode 100644
index 0000000..c6bf35d
--- /dev/null
+++ b/ctdb/tests/src/transaction_loop.c
@@ -0,0 +1,419 @@
+/*
+ simple ctdb benchmark for persistent databases
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include "lib/util/debug.h"
+#include "lib/util/tevent_unix.h"
+
+#include "client/client.h"
+#include "tests/src/test_options.h"
+#include "tests/src/cluster_wait.h"
+
+struct transaction_loop_state {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct ctdb_db_context *ctdb_db;
+ int num_nodes;
+ int timelimit;
+ int interactive;
+ TDB_DATA key;
+ uint32_t pnn;
+ struct ctdb_transaction_handle *h;
+ uint32_t *old_counter, *counter;
+ struct tevent_req *subreq;
+ bool done;
+};
+
+static void transaction_loop_start(struct tevent_req *subreq);
+static void transaction_loop_started(struct tevent_req *subreq);
+static void transaction_loop_committed(struct tevent_req *subreq);
+static void transaction_loop_each_second(struct tevent_req *subreq);
+static bool transaction_loop_check_counters(struct tevent_req *req);
+static void transaction_loop_finish(struct tevent_req *subreq);
+
+static struct tevent_req *transaction_loop_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct ctdb_db_context *ctdb_db,
+ int num_nodes, int timelimit, int interactive,
+ const char *keystr)
+{
+ struct tevent_req *req, *subreq;
+ struct transaction_loop_state *state;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct transaction_loop_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->client = client;
+ state->ctdb_db = ctdb_db;
+ state->num_nodes = num_nodes;
+ state->timelimit = timelimit;
+ state->interactive = interactive;
+ state->key.dptr = discard_const(keystr);
+ state->key.dsize = strlen(keystr);
+ state->pnn = ctdb_client_pnn(client);
+ state->old_counter = talloc_zero_array(state, uint32_t, num_nodes);
+ if (tevent_req_nomem(state->old_counter, req)) {
+ return tevent_req_post(req, ev);
+ }
+ state->counter = talloc_zero_array(state, uint32_t, num_nodes);
+ if (tevent_req_nomem(state->counter, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ subreq = cluster_wait_send(state, state->ev, state->client,
+ state->num_nodes);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, transaction_loop_start, req);
+
+ return req;
+}
+
+static void transaction_loop_start(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct transaction_loop_state *state = tevent_req_data(
+ req, struct transaction_loop_state);
+ bool status;
+ int ret;
+
+ status = cluster_wait_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ subreq = ctdb_transaction_start_send(state, state->ev, state->client,
+ tevent_timeval_current_ofs(
+ state->timelimit, 0),
+ state->ctdb_db, false);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, transaction_loop_started, req);
+ state->subreq = subreq;
+
+ if (ctdb_client_pnn(state->client) == 0) {
+ subreq = tevent_wakeup_send(state, state->ev,
+ tevent_timeval_current_ofs(1, 0));
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, transaction_loop_each_second,
+ req);
+ }
+
+ subreq = tevent_wakeup_send(state, state->ev,
+ tevent_timeval_current_ofs(
+ state->timelimit, 0));
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, transaction_loop_finish, req);
+}
+
+static void transaction_loop_started(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct transaction_loop_state *state = tevent_req_data(
+ req, struct transaction_loop_state);
+ TDB_DATA data;
+ int ret;
+ uint32_t *counter;
+
+ state->h = ctdb_transaction_start_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ state->subreq = NULL;
+ if (state->h == NULL) {
+ fprintf(stderr, "transaction start failed\n");
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_transaction_fetch_record(state->h, state->key,
+ state, &data);
+ if (ret != 0) {
+ fprintf(stderr, "transaction fetch record failed\n");
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ if (data.dsize < state->num_nodes * sizeof(uint32_t)) {
+ TALLOC_FREE(data.dptr);
+
+ data.dsize = state->num_nodes * sizeof(uint32_t);
+ data.dptr = (uint8_t *)talloc_zero_array(state, uint32_t,
+ state->num_nodes);
+ if (tevent_req_nomem(data.dptr, req)) {
+ return;
+ }
+ }
+
+ counter = (uint32_t *)data.dptr;
+ counter[state->pnn] += 1;
+ memcpy(state->counter, counter, state->num_nodes * sizeof(uint32_t));
+
+ ret = ctdb_transaction_store_record(state->h, state->key, data);
+ if (ret != 0) {
+ fprintf(stderr, "transaction store failed\n");
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ subreq = ctdb_transaction_commit_send(state, state->ev,
+ tevent_timeval_current_ofs(
+ state->timelimit, 0),
+ state->h);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, transaction_loop_committed, req);
+ state->subreq = subreq;
+}
+
+static void transaction_loop_committed(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct transaction_loop_state *state = tevent_req_data(
+ req, struct transaction_loop_state);
+ int ret;
+ bool status;
+
+ status = ctdb_transaction_commit_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ state->subreq = NULL;
+ if (! status) {
+ fprintf(stderr, "transaction commit failed - %s\n",
+ strerror(ret));
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ if (state->pnn == 0) {
+ if (! transaction_loop_check_counters(req)) {
+ return;
+ }
+ }
+
+ if (state->done) {
+ int i;
+
+ printf("Transaction[%u]: ", ctdb_client_pnn(state->client));
+ for (i=0; i<state->num_nodes; i++) {
+ printf("%6u ", state->counter[i]);
+ }
+ printf("\n");
+
+ tevent_req_done(req);
+
+ return;
+ }
+
+ subreq = ctdb_transaction_start_send(state, state->ev, state->client,
+ tevent_timeval_current_ofs(
+ state->timelimit, 0),
+ state->ctdb_db, false);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, transaction_loop_started, req);
+}
+
+static void transaction_loop_each_second(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct transaction_loop_state *state = tevent_req_data(
+ req, struct transaction_loop_state);
+ bool status;
+ int i;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ fprintf(stderr, "tevent wakeup failed\n");
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ if (state->interactive == 1) {
+ printf("Transaction[%u]: ", ctdb_client_pnn(state->client));
+ for (i=0; i<state->num_nodes; i++) {
+ printf("%6u ", state->counter[i]);
+ }
+ printf("\n");
+ fflush(stdout);
+ }
+
+ subreq = tevent_wakeup_send(state, state->ev,
+ tevent_timeval_current_ofs(1, 0));
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, transaction_loop_each_second, req);
+}
+
+static bool transaction_loop_check_counters(struct tevent_req *req)
+{
+ struct transaction_loop_state *state = tevent_req_data(
+ req, struct transaction_loop_state);
+ int i;
+ bool monotonous = true;
+
+ for (i=0; i<state->num_nodes; i++) {
+ if (state->counter[i] < state->old_counter[i]) {
+ fprintf(stderr,
+ "Counter reduced for node %d: %u -> %u\n",
+ i, state->old_counter[i], state->counter[i]);
+ monotonous = false;
+ break;
+ }
+ }
+
+ if (monotonous) {
+ memcpy(state->old_counter, state->counter,
+ state->num_nodes * sizeof(uint32_t));
+ }
+
+ return monotonous;
+}
+
+static void transaction_loop_finish(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct transaction_loop_state *state = tevent_req_data(
+ req, struct transaction_loop_state);
+ bool status;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+
+ state->done = true;
+
+ if (! status) {
+ tevent_req_error(req, EIO);
+ return;
+ }
+}
+
+static bool transaction_loop_recv(struct tevent_req *req, int *perr)
+{
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+ return true;
+}
+
+int main(int argc, const char *argv[])
+{
+ const struct test_options *opts;
+ TALLOC_CTX *mem_ctx;
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct ctdb_db_context *ctdb_db;
+ struct tevent_req *req;
+ uint8_t db_flags;
+ int ret;
+ bool status;
+
+ setup_logging("transaction_loop", DEBUG_STDERR);
+
+ status = process_options_database(argc, argv, &opts);
+ if (! status) {
+ exit(1);
+ }
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ev = tevent_context_init(mem_ctx);
+ if (ev == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ret = ctdb_client_init(mem_ctx, ev, opts->socket, &client);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to initialize client, ret=%d\n", ret);
+ exit(1);
+ }
+
+ if (! ctdb_recovery_wait(ev, client)) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ if (strcmp(opts->dbtype, "persistent") == 0) {
+ db_flags = CTDB_DB_FLAGS_PERSISTENT;
+ } else if (strcmp(opts->dbtype, "replicated") == 0) {
+ db_flags = CTDB_DB_FLAGS_REPLICATED;
+ } else {
+ fprintf(stderr, "Database must be persistent or replicated\n");
+ exit(1);
+ }
+
+ ret = ctdb_attach(ev, client, tevent_timeval_zero(), opts->dbname,
+ db_flags, &ctdb_db);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to attach to persistent DB %s\n",
+ opts->dbname);
+ exit(1);
+ }
+
+ req = transaction_loop_send(mem_ctx, ev, client, ctdb_db,
+ opts->num_nodes, opts->timelimit,
+ opts->interactive, opts->keystr);
+ if (req == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = transaction_loop_recv(req, &ret);
+ if (! status) {
+ fprintf(stderr, "transaction loop test failed, ret=%d\n", ret);
+ exit(1);
+ }
+
+ talloc_free(mem_ctx);
+ return 0;
+}
diff --git a/ctdb/tests/src/tunable_test.c b/ctdb/tests/src/tunable_test.c
new file mode 100644
index 0000000..ea94aec
--- /dev/null
+++ b/ctdb/tests/src/tunable_test.c
@@ -0,0 +1,71 @@
+/*
+ Test tunable handling
+
+ Copyright (C) Martin Schwenke, DataDirect Networks 2022
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+
+#include <talloc.h>
+#include <assert.h>
+
+#include "common/tunable.c"
+
+int main(int argc, const char **argv)
+{
+ TALLOC_CTX *mem_ctx;
+ struct ctdb_tunable_list tun_list;
+ struct ctdb_var_list *list;
+ bool status;
+ int ret = 0;
+ int i;
+
+ if (argc != 2) {
+ fprintf(stderr, "Usage: %s <filename>\n", argv[0]);
+ return 1;
+ }
+
+ mem_ctx = talloc_new(NULL);
+ assert(mem_ctx != NULL);
+
+ status = ctdb_tunable_load_file(mem_ctx, &tun_list, argv[1]);
+ if (!status) {
+ ret = EINVAL;
+ goto done;
+ }
+
+ list = ctdb_tunable_names(mem_ctx);
+ assert(list != NULL);
+
+ for (i = 0; i < list->count; i++) {
+ const char *var = list->var[i];
+ uint32_t val;
+
+ status = ctdb_tunable_get_value(&tun_list, var, &val);
+ if (!status) {
+ ret = EIO;
+ goto done;
+ }
+
+ printf("%s=%"PRIu32"\n", var, val);
+ fflush(stdout);
+ }
+
+done:
+ talloc_free(mem_ctx);
+ return ret;
+}
diff --git a/ctdb/tests/src/tunnel_cmd.c b/ctdb/tests/src/tunnel_cmd.c
new file mode 100644
index 0000000..73a2297
--- /dev/null
+++ b/ctdb/tests/src/tunnel_cmd.c
@@ -0,0 +1,199 @@
+/*
+ CTDB tunnel test
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/tevent_unix.h"
+
+#include "protocol/protocol_private.h"
+#include "client/client.h"
+
+#define TUNNEL_ID (CTDB_TUNNEL_TEST | 0xf0f0f0f0)
+
+struct listen_state {
+ TALLOC_CTX *mem_ctx;
+ bool done;
+};
+
+static void listen_callback(struct ctdb_tunnel_context *tctx,
+ uint32_t srcnode, uint32_t reqid,
+ uint8_t *buf, size_t buflen,
+ void *private_data)
+{
+ struct listen_state *state = (struct listen_state *)private_data;
+ const char *msg;
+ size_t np;
+ int ret;
+
+ ret = ctdb_stringn_pull(buf, buflen, state->mem_ctx, &msg, &np);
+ if (ret != 0) {
+ fprintf(stderr, "Invalid tunnel message, ret=%d\n", ret);
+ return;
+ }
+
+ fprintf(stderr, "%u: %s\n", srcnode, msg);
+
+ if (strcmp(msg, "quit") == 0) {
+ state->done = true;
+ }
+
+ talloc_free(discard_const(msg));
+}
+
+static int cmd_listen(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client)
+{
+ struct ctdb_tunnel_context *tunnel;
+ struct listen_state state;
+ int ret;
+
+ state.mem_ctx = mem_ctx;
+ state.done = false;
+
+ ret = ctdb_tunnel_setup(mem_ctx, ev, client, TUNNEL_ID,
+ listen_callback, &state, &tunnel);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ctdb_client_wait(ev, &state.done);
+
+ ret = ctdb_tunnel_destroy(ev, tunnel);
+ if (ret != 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+static void send_callback(struct ctdb_tunnel_context *tctx,
+ uint32_t srcnode, uint32_t reqid,
+ uint8_t *buf, size_t buflen, void *private_data)
+{
+ fprintf(stderr, "send received a message - %u: %zu\n", srcnode, buflen);
+}
+
+static int cmd_send(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t destnode, const char *msg)
+{
+ struct ctdb_tunnel_context *tunnel;
+ uint8_t *buf;
+ size_t buflen, np;
+ int ret;
+
+ ret = ctdb_tunnel_setup(mem_ctx, ev, client, TUNNEL_ID,
+ send_callback, NULL, &tunnel);
+ if (ret != 0) {
+ return ret;
+ }
+
+ buflen = ctdb_stringn_len(&msg);
+ buf = talloc_size(mem_ctx, buflen);
+ if (buf == NULL) {
+ return ENOMEM;
+ }
+ ctdb_stringn_push(&msg, buf, &np);
+
+ ret = ctdb_tunnel_request(mem_ctx, ev, tunnel, destnode,
+ tevent_timeval_zero(), buf, buflen, false);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ret = ctdb_tunnel_destroy(ev, tunnel);
+ if (ret != 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+static void usage(const char *cmd)
+{
+ fprintf(stderr, "usage: %s <ctdb-socket> listen\n", cmd);
+ fprintf(stderr, "usage: %s <ctdb-socket> send <pnn> <msg>\n", cmd);
+}
+
+int main(int argc, const char **argv)
+{
+ TALLOC_CTX *mem_ctx;
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ const char *socket = NULL, *msg = NULL;
+ uint32_t pnn = CTDB_UNKNOWN_PNN;
+ int ret;
+ bool do_listen = false;
+ bool do_send = false;
+
+ if (argc != 3 && argc != 5) {
+ usage(argv[0]);
+ exit(1);
+ }
+
+ socket = argv[1];
+
+ if (strcmp(argv[2], "listen") == 0) {
+ do_listen = true;
+ } else if (strcmp(argv[2], "send") == 0) {
+ if (argc != 5) {
+ usage(argv[0]);
+ exit(1);
+ }
+
+ pnn = atol(argv[3]);
+ msg = argv[4];
+ do_send = true;
+ } else {
+ usage(argv[0]);
+ exit(1);
+ }
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ exit(1);
+ }
+
+ ev = tevent_context_init(mem_ctx);
+ if (ev == NULL) {
+ talloc_free(mem_ctx);
+ exit(1);
+ }
+
+ ret = ctdb_client_init(mem_ctx, ev, socket, &client);
+ if (ret != 0) {
+ talloc_free(mem_ctx);
+ exit(1);
+ }
+
+ if (do_listen) {
+ ret = cmd_listen(mem_ctx, ev, client);
+ }
+ if (do_send) {
+ ret = cmd_send(mem_ctx, ev, client, pnn, msg);
+ }
+
+ talloc_free(mem_ctx);
+
+ return ret;
+}
diff --git a/ctdb/tests/src/tunnel_test.c b/ctdb/tests/src/tunnel_test.c
new file mode 100644
index 0000000..a6d44ba
--- /dev/null
+++ b/ctdb/tests/src/tunnel_test.c
@@ -0,0 +1,480 @@
+/*
+ CTDB tunnel test
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include "lib/util/debug.h"
+#include "lib/util/tevent_unix.h"
+
+#include "protocol/protocol_private.h"
+#include "client/client.h"
+#include "tests/src/test_options.h"
+#include "tests/src/cluster_wait.h"
+
+struct test_data {
+ uint32_t pnn;
+ uint32_t count;
+};
+
+static size_t test_data_len(struct test_data *in)
+{
+ return ctdb_uint32_len(&in->pnn) + ctdb_uint32_len(&in->count);
+}
+
+static void test_data_push(struct test_data *in, uint8_t *buf, size_t *npush)
+{
+ size_t offset = 0, np;
+
+ ctdb_uint32_push(&in->pnn, buf+offset, &np);
+ offset += np;
+
+ ctdb_uint32_push(&in->count, buf+offset, &np);
+ offset += np;
+
+ *npush = offset;
+}
+
+static int test_data_pull(uint8_t *buf, size_t buflen, struct test_data *out,
+ size_t *npull)
+{
+ size_t offset = 0, np;
+ int ret;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &out->pnn, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ ret = ctdb_uint32_pull(buf+offset, buflen-offset, &out->count, &np);
+ if (ret != 0) {
+ return ret;
+ }
+ offset += np;
+
+ *npull = offset;
+ return 0;
+}
+
+/*
+ * Set up 2 tunnels from each node - one to the next node and one to the
+ * previous node. The tunnel to the next node is used for sending data and
+ * tunnel to the previous node is used for receiving data.
+ */
+
+struct tunnel_test_state {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ int num_nodes;
+ int timelimit;
+
+ uint32_t pnn;
+ uint32_t next_node;
+ uint32_t prev_node;
+ bool done;
+ struct ctdb_tunnel_context *send_tunnel;
+ struct ctdb_tunnel_context *recv_tunnel;
+ uint32_t count;
+ uint8_t *buf;
+};
+
+static void tunnel_test_send_tunnel_done(struct tevent_req *subreq);
+static void tunnel_test_recv_tunnel_done(struct tevent_req *subreq);
+static void tunnel_test_start(struct tevent_req *subreq);
+static void tunnel_test_msg_send(struct tevent_req *req,
+ struct test_data *tdata);
+static void tunnel_test_msg_send_done(struct tevent_req *subreq);
+static void tunnel_test_handler(struct ctdb_tunnel_context *tctx,
+ uint32_t srcnode, uint32_t reqid,
+ uint8_t *buf, size_t buflen,
+ void *private_data);
+static void tunnel_test_done(struct tevent_req *subreq);
+static void tunnel_test_finish(struct tevent_req *subreq);
+static void tunnel_test_send_tunnel_closed(struct tevent_req *subreq);
+static void tunnel_test_recv_tunnel_closed(struct tevent_req *subreq);
+
+static struct tevent_req *tunnel_test_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ int num_nodes, int timelimit)
+{
+ struct tevent_req *req, *subreq;
+ struct tunnel_test_state *state;
+
+ req = tevent_req_create(mem_ctx, &state, struct tunnel_test_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->client = client;
+ state->num_nodes = num_nodes;
+ state->timelimit = timelimit;
+ state->pnn = ctdb_client_pnn(client);
+ state->prev_node = (state->pnn + num_nodes - 1) % num_nodes;
+ state->next_node = (state->pnn + 1) % num_nodes;
+ state->done = false;
+
+ subreq = ctdb_tunnel_setup_send(state, state->ev, state->client,
+ CTDB_TUNNEL_TEST | state->pnn,
+ tunnel_test_handler, req);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, tunnel_test_send_tunnel_done, req);
+
+ return req;
+}
+
+static void tunnel_test_send_tunnel_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct tunnel_test_state *state = tevent_req_data(
+ req, struct tunnel_test_state);
+ int ret;
+ bool status;
+
+ status = ctdb_tunnel_setup_recv(subreq, &ret, &state->send_tunnel);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ subreq = ctdb_tunnel_setup_send(state, state->ev, state->client,
+ CTDB_TUNNEL_TEST | state->prev_node,
+ tunnel_test_handler, req);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, tunnel_test_recv_tunnel_done, req);
+}
+
+static void tunnel_test_recv_tunnel_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct tunnel_test_state *state = tevent_req_data(
+ req, struct tunnel_test_state);
+ int ret;
+ bool status;
+
+ status = ctdb_tunnel_setup_recv(subreq, &ret, &state->recv_tunnel);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ subreq = cluster_wait_send(state, state->ev, state->client,
+ state->num_nodes);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, tunnel_test_start, req);
+}
+
+static void tunnel_test_start(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct tunnel_test_state *state = tevent_req_data(
+ req, struct tunnel_test_state);
+ struct test_data tdata;
+ int ret;
+ bool status;
+
+ status = cluster_wait_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ subreq = tevent_wakeup_send(state, state->ev,
+ tevent_timeval_current_ofs(state->timelimit, 0));
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, tunnel_test_done, req);
+
+ tdata.pnn = state->pnn;
+ tdata.count = state->count;
+ tunnel_test_msg_send(req, &tdata);
+}
+
+static void tunnel_test_msg_send(struct tevent_req *req,
+ struct test_data *tdata)
+{
+ struct tunnel_test_state *state = tevent_req_data(
+ req, struct tunnel_test_state);
+ struct tevent_req *subreq;
+ size_t buflen, np;
+
+ buflen = test_data_len(tdata);
+ state->buf = talloc_size(state, buflen);
+ if (tevent_req_nomem(state->buf, req)) {
+ return;
+ }
+ test_data_push(tdata, state->buf, &np);
+
+ subreq = ctdb_tunnel_request_send(state, state->ev,
+ state->send_tunnel,
+ state->next_node,
+ tevent_timeval_zero(),
+ state->buf, buflen, false);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, tunnel_test_msg_send_done, req);
+}
+
+static void tunnel_test_msg_send_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct tunnel_test_state *state = tevent_req_data(
+ req, struct tunnel_test_state);
+ int ret;
+ bool status;
+
+ status = ctdb_tunnel_request_recv(subreq, &ret, NULL, NULL, NULL);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ TALLOC_FREE(state->buf);
+}
+
+static void tunnel_test_handler(struct ctdb_tunnel_context *tctx,
+ uint32_t srcnode, uint32_t reqid,
+ uint8_t *buf, size_t buflen,
+ void *private_data)
+{
+ struct tevent_req *req = talloc_get_type_abort(
+ private_data, struct tevent_req);
+ struct tunnel_test_state *state = tevent_req_data(
+ req, struct tunnel_test_state);
+ struct test_data tdata;
+ size_t np;
+ int ret;
+
+ if (state->done) {
+ return;
+ }
+
+ if (tctx == state->send_tunnel) {
+ fprintf(stderr, "pnn:%u Received data on send tunnel\n",
+ state->pnn);
+ tevent_req_error(req, EPROTO);
+ return;
+ }
+
+ ret = test_data_pull(buf, buflen, &tdata, &np);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ if (tdata.pnn == state->pnn) {
+ if (tdata.count != state->count) {
+ tevent_req_error(req, EPROTO);
+ return;
+ }
+
+ state->count = tdata.count + 1;
+ tdata.count = state->count;
+ }
+
+ tunnel_test_msg_send(req, &tdata);
+}
+
+static void tunnel_test_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct tunnel_test_state *state = tevent_req_data(
+ req, struct tunnel_test_state);
+ bool status;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ printf("pnn[%u] %.1lf msgs/sec\n",
+ state->pnn, (double)state->count / state->timelimit);
+
+ state->done = true;
+
+ /* wait few more seconds */
+ subreq = tevent_wakeup_send(state, state->ev,
+ tevent_timeval_current_ofs(3, 0));
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, tunnel_test_finish, req);
+}
+
+static void tunnel_test_finish(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct tunnel_test_state *state = tevent_req_data(
+ req, struct tunnel_test_state);
+ bool status;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ subreq = ctdb_tunnel_destroy_send(state, state->ev,
+ state->send_tunnel);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, tunnel_test_send_tunnel_closed, req);
+}
+
+static void tunnel_test_send_tunnel_closed(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct tunnel_test_state *state = tevent_req_data(
+ req, struct tunnel_test_state);
+ int ret;
+ bool status;
+
+ status = ctdb_tunnel_destroy_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+ state->send_tunnel = NULL;
+
+ subreq = ctdb_tunnel_destroy_send(state, state->ev,
+ state->recv_tunnel);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, tunnel_test_recv_tunnel_closed, req);
+}
+
+static void tunnel_test_recv_tunnel_closed(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct tunnel_test_state *state = tevent_req_data(
+ req, struct tunnel_test_state);
+ int ret;
+ bool status;
+
+ status = ctdb_tunnel_destroy_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+ state->recv_tunnel = NULL;
+
+ tevent_req_done(req);
+}
+
+static bool tunnel_test_recv(struct tevent_req *req, int *perr)
+{
+ int ret;
+
+ if (tevent_req_is_unix_error(req, &ret)) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+int main(int argc, const char *argv[])
+{
+ const struct test_options *opts;
+ TALLOC_CTX *mem_ctx;
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct tevent_req *req;
+ int ret;
+ bool status;
+
+ setup_logging("tunnel_test", DEBUG_STDERR);
+
+ status = process_options_basic(argc, argv, &opts);
+ if (! status) {
+ exit(1);
+ }
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ev = tevent_context_init(mem_ctx);
+ if (ev == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ret = ctdb_client_init(mem_ctx, ev, opts->socket, &client);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to initialize client, ret=%d\n", ret);
+ exit(1);
+ }
+
+ if (! ctdb_recovery_wait(ev, client)) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ req = tunnel_test_send(mem_ctx, ev, client, opts->num_nodes,
+ opts->timelimit);
+ if (req == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = tunnel_test_recv(req, &ret);
+ if (! status) {
+ fprintf(stderr, "tunnel test failed, ret=%d\n", ret);
+ exit(1);
+ }
+
+ talloc_free(mem_ctx);
+ return 0;
+}
diff --git a/ctdb/tests/src/update_record.c b/ctdb/tests/src/update_record.c
new file mode 100644
index 0000000..11b6050
--- /dev/null
+++ b/ctdb/tests/src/update_record.c
@@ -0,0 +1,236 @@
+/*
+ Update a record and increase it's RSN
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include "lib/util/debug.h"
+#include "lib/util/tevent_unix.h"
+
+#include "protocol/protocol_api.h"
+#include "client/client.h"
+#include "tests/src/test_options.h"
+#include "tests/src/cluster_wait.h"
+
+struct update_record_state {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct ctdb_db_context *db;
+ int timelimit;
+ TDB_DATA key;
+};
+
+static void update_record_fetch_done(struct tevent_req *subreq);
+static void update_record_update_done(struct tevent_req *subreq);
+
+static struct tevent_req *update_record_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct ctdb_db_context *db,
+ const char *keystr,
+ int timelimit)
+{
+ struct tevent_req *req, *subreq;
+ struct update_record_state *state;
+
+ req = tevent_req_create(mem_ctx, &state, struct update_record_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->client = client;
+ state->db = db;
+ state->timelimit = timelimit;
+ state->key.dptr = (uint8_t *)discard_const(keystr);
+ state->key.dsize = strlen(keystr);
+
+ subreq = ctdb_fetch_lock_send(state, state->ev, state->client,
+ state->db, state->key, false);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, update_record_fetch_done, req);
+
+ return req;
+}
+
+static void update_record_fetch_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct update_record_state *state = tevent_req_data(
+ req, struct update_record_state);
+ struct ctdb_record_handle *h;
+ struct ctdb_ltdb_header header;
+ struct ctdb_rec_buffer *recbuf;
+ struct ctdb_req_control request;
+ TDB_DATA data;
+ int ret;
+
+ h = ctdb_fetch_lock_recv(subreq, &header, NULL, NULL, &ret);
+ TALLOC_FREE(subreq);
+ if (h == NULL) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ talloc_free(h);
+
+ header.rsn += 10;
+
+ recbuf = ctdb_rec_buffer_init(state, ctdb_db_id(state->db));
+ if (tevent_req_nomem(recbuf, req)) {
+ return;
+ }
+
+ data.dptr = (uint8_t *)talloc_asprintf(recbuf, "%"PRIu64, header.rsn);
+ if (tevent_req_nomem(data.dptr, req)) {
+ return;
+ }
+ data.dsize = strlen((char *)data.dptr);
+
+ ret = ctdb_rec_buffer_add(state, recbuf, 0, &header, state->key, data);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ctdb_req_control_update_record(&request, recbuf);
+ subreq = ctdb_client_control_send(state, state->ev, state->client,
+ CTDB_CURRENT_NODE,
+ tevent_timeval_current_ofs(
+ state->timelimit, 0),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, update_record_update_done, req);
+
+ talloc_free(recbuf);
+}
+
+static void update_record_update_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct update_record_state *state = tevent_req_data(
+ req, struct update_record_state);
+ struct ctdb_reply_control *reply;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_reply_control_update_record(reply);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ talloc_free(reply);
+
+ tevent_req_done(req);
+}
+
+static bool update_record_recv(struct tevent_req *req, int *perr)
+{
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+ return true;
+}
+
+int main(int argc, const char *argv[])
+{
+ const struct test_options *opts;
+ TALLOC_CTX *mem_ctx;
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct ctdb_db_context *ctdb_db;
+ struct tevent_req *req;
+ int ret;
+ bool status;
+
+ setup_logging("update_record", DEBUG_STDERR);
+
+ status = process_options_database(argc, argv, &opts);
+ if (! status) {
+ exit(1);
+ }
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ev = tevent_context_init(mem_ctx);
+ if (ev == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ret = ctdb_client_init(mem_ctx, ev, opts->socket, &client);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to initialize client (%s), %s\n",
+ opts->socket, strerror(ret));
+ exit(1);
+ }
+
+ if (! ctdb_recovery_wait(ev, client)) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ret = ctdb_attach(ev, client, tevent_timeval_zero(), opts->dbname,
+ 0, &ctdb_db);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to attach DB %s\n", opts->dbname);
+ exit(1);
+ }
+
+ req = update_record_send(mem_ctx, ev, client, ctdb_db,
+ opts->keystr, opts->timelimit);
+ if (req == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = update_record_recv(req, &ret);
+ if (! status) {
+ fprintf(stderr, "update record failed\n");
+ exit(1);
+ }
+
+ talloc_free(mem_ctx);
+ return 0;
+}
diff --git a/ctdb/tests/src/update_record_persistent.c b/ctdb/tests/src/update_record_persistent.c
new file mode 100644
index 0000000..2d6d21e
--- /dev/null
+++ b/ctdb/tests/src/update_record_persistent.c
@@ -0,0 +1,218 @@
+/*
+ Update a record in persistent database
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include "lib/util/debug.h"
+#include "lib/util/tevent_unix.h"
+
+#include "protocol/protocol_api.h"
+#include "client/client.h"
+#include "tests/src/test_options.h"
+#include "tests/src/cluster_wait.h"
+
+struct update_record_state {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct ctdb_db_context *db;
+ int timelimit;
+ TDB_DATA key, data;
+};
+
+static void update_record_update_done(struct tevent_req *subreq);
+
+static struct tevent_req *update_record_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct ctdb_db_context *db,
+ const char *keystr,
+ const char *valuestr,
+ int timelimit)
+{
+ struct tevent_req *req, *subreq;
+ struct update_record_state *state;
+ struct ctdb_ltdb_header header;
+ struct ctdb_rec_buffer *recbuf;
+ struct ctdb_req_control request;
+ int ret;
+
+ req = tevent_req_create(mem_ctx, &state, struct update_record_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->client = client;
+ state->db = db;
+ state->timelimit = timelimit;
+ state->key.dptr = (uint8_t *)discard_const(keystr);
+ state->key.dsize = strlen(keystr);
+ state->data.dptr = (uint8_t *)discard_const(valuestr);
+ state->data.dsize = strlen(valuestr);
+
+ ret = ctdb_ltdb_fetch(state->db, state->key, &header, NULL, NULL);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ header.rsn += 1;
+
+ recbuf = ctdb_rec_buffer_init(state, ctdb_db_id(state->db));
+ if (tevent_req_nomem(recbuf, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ ret = ctdb_rec_buffer_add(state, recbuf, 0, &header,
+ state->key, state->data);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ ctdb_req_control_update_record(&request, recbuf);
+ subreq = ctdb_client_control_send(state, state->ev, state->client,
+ CTDB_CURRENT_NODE,
+ tevent_timeval_current_ofs(
+ state->timelimit, 0),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, update_record_update_done, req);
+
+ talloc_free(recbuf);
+ return req;
+}
+
+static void update_record_update_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct update_record_state *state = tevent_req_data(
+ req, struct update_record_state);
+ struct ctdb_reply_control *reply;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_reply_control_update_record(reply);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ talloc_free(reply);
+
+ tevent_req_done(req);
+}
+
+static bool update_record_recv(struct tevent_req *req, int *perr)
+{
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+ return true;
+}
+
+int main(int argc, const char *argv[])
+{
+ const struct test_options *opts;
+ TALLOC_CTX *mem_ctx;
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct ctdb_db_context *ctdb_db;
+ struct tevent_req *req;
+ int ret;
+ bool status;
+
+ setup_logging("update_record_persistene", DEBUG_STDERR);
+
+ status = process_options_database(argc, argv, &opts);
+ if (! status) {
+ exit(1);
+ }
+
+ if (opts->valuestr == NULL) {
+ fprintf(stderr, "Error: please specify key value (-v)\n");
+ exit(1);
+ }
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ev = tevent_context_init(mem_ctx);
+ if (ev == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ret = ctdb_client_init(mem_ctx, ev, opts->socket, &client);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to initialize client (%s), %s\n",
+ opts->socket, strerror(ret));
+ exit(1);
+ }
+
+ if (! ctdb_recovery_wait(ev, client)) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ret = ctdb_attach(ev, client, tevent_timeval_zero(), opts->dbname,
+ CTDB_DB_FLAGS_PERSISTENT, &ctdb_db);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to attach DB %s\n", opts->dbname);
+ exit(1);
+ }
+
+ req = update_record_send(mem_ctx, ev, client, ctdb_db,
+ opts->keystr, opts->valuestr,
+ opts->timelimit);
+ if (req == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = update_record_recv(req, &ret);
+ if (! status) {
+ fprintf(stderr, "update record failed\n");
+ exit(1);
+ }
+
+ talloc_free(mem_ctx);
+ return 0;
+}
diff --git a/ctdb/tests/test_check_tcp_ports.sh b/ctdb/tests/test_check_tcp_ports.sh
new file mode 100755
index 0000000..1272d88
--- /dev/null
+++ b/ctdb/tests/test_check_tcp_ports.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+
+DIRNAME=$(dirname $0)
+
+CTDB_BASE="${DIRNAME}/../config"
+. "${CTDB_BASE}/functions"
+
+SERVICE="test-service"
+
+PORTS="$@"
+
+if [ "x${PORTS}" = "x" ] ; then
+ PORTS=139
+fi
+
+ctdb_check_tcp_ports ${SERVICE} ${PORTS}
+
+echo "Test for service '${SERVICE}' on tcp ports ${PORTS} succeeded!"
diff --git a/ctdb/tools/ctdb.c b/ctdb/tools/ctdb.c
new file mode 100644
index 0000000..6e8185f
--- /dev/null
+++ b/ctdb/tools/ctdb.c
@@ -0,0 +1,6600 @@
+/*
+ CTDB control tool
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+#include "system/time.h"
+#include "system/wait.h"
+#include "system/dir.h"
+
+#include <ctype.h>
+#include <popt.h>
+#include <talloc.h>
+#include <tevent.h>
+#include <tdb.h>
+
+#include "version.h"
+#include "lib/util/debug.h"
+#include "lib/util/samba_util.h"
+#include "lib/util/sys_rw.h"
+#include "lib/util/smb_strtox.h"
+
+#include "common/db_hash.h"
+#include "common/logging.h"
+#include "common/path.h"
+#include "protocol/protocol.h"
+#include "protocol/protocol_basic.h"
+#include "protocol/protocol_api.h"
+#include "protocol/protocol_util.h"
+#include "common/system_socket.h"
+#include "client/client.h"
+#include "client/client_sync.h"
+
+#define TIMEOUT() timeval_current_ofs(options.timelimit, 0)
+
+#define SRVID_CTDB_TOOL (CTDB_SRVID_TOOL_RANGE | 0x0001000000000000LL)
+#define SRVID_CTDB_PUSHDB (CTDB_SRVID_TOOL_RANGE | 0x0002000000000000LL)
+
+#define NODE_FLAGS_UNKNOWN 0x00000040
+
+static struct {
+ const char *debuglevelstr;
+ int timelimit;
+ int pnn;
+ int machinereadable;
+ const char *sep;
+ int machineparsable;
+ int verbose;
+ int maxruntime;
+ int printemptyrecords;
+ int printdatasize;
+ int printlmaster;
+ int printhash;
+ int printrecordflags;
+} options;
+
+static poptContext pc;
+
+struct ctdb_context {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct ctdb_node_map *nodemap;
+ uint32_t pnn, cmd_pnn, leader_pnn;
+ uint64_t srvid;
+};
+
+static void usage(const char *command);
+
+static int disable_takeover_runs(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ uint32_t timeout,
+ uint32_t *pnn_list,
+ int count);
+static int send_ipreallocated_control_to_nodes(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ uint32_t *pnn_list,
+ int count);
+
+/*
+ * Utility Functions
+ */
+
+static double timeval_delta(struct timeval *tv2, struct timeval *tv)
+{
+ return (tv2->tv_sec - tv->tv_sec) +
+ (tv2->tv_usec - tv->tv_usec) * 1.0e-6;
+}
+
+static struct ctdb_node_and_flags *get_node_by_pnn(
+ struct ctdb_node_map *nodemap,
+ uint32_t pnn)
+{
+ unsigned int i;
+
+ for (i=0; i<nodemap->num; i++) {
+ if (nodemap->node[i].pnn == pnn) {
+ return &nodemap->node[i];
+ }
+ }
+ return NULL;
+}
+
+static const char *pretty_print_flags(TALLOC_CTX *mem_ctx, uint32_t flags)
+{
+ static const struct {
+ uint32_t flag;
+ const char *name;
+ } flag_names[] = {
+ { NODE_FLAGS_DISCONNECTED, "DISCONNECTED" },
+ { NODE_FLAGS_UNKNOWN, "UNKNOWN" },
+ { NODE_FLAGS_PERMANENTLY_DISABLED, "DISABLED" },
+ { NODE_FLAGS_BANNED, "BANNED" },
+ { NODE_FLAGS_UNHEALTHY, "UNHEALTHY" },
+ { NODE_FLAGS_DELETED, "DELETED" },
+ { NODE_FLAGS_STOPPED, "STOPPED" },
+ { NODE_FLAGS_INACTIVE, "INACTIVE" },
+ };
+ char *flags_str = NULL;
+ size_t i;
+
+ for (i=0; i<ARRAY_SIZE(flag_names); i++) {
+ if (flags & flag_names[i].flag) {
+ if (flags_str == NULL) {
+ flags_str = talloc_asprintf(mem_ctx,
+ "%s", flag_names[i].name);
+ } else {
+ flags_str = talloc_asprintf_append(flags_str,
+ "|%s", flag_names[i].name);
+ }
+ if (flags_str == NULL) {
+ return "OUT-OF-MEMORY";
+ }
+ }
+ }
+ if (flags_str == NULL) {
+ return "OK";
+ }
+
+ return flags_str;
+}
+
+static uint64_t next_srvid(struct ctdb_context *ctdb)
+{
+ ctdb->srvid += 1;
+ return ctdb->srvid;
+}
+
+/*
+ * Get consistent nodemap information.
+ *
+ * If nodemap is already cached, use that. If not get it.
+ * If the current node is BANNED, then get nodemap from "better" node.
+ */
+static struct ctdb_node_map *get_nodemap(struct ctdb_context *ctdb, bool force)
+{
+ TALLOC_CTX *tmp_ctx;
+ struct ctdb_node_map *nodemap;
+ struct ctdb_node_and_flags *node;
+ uint32_t current_node;
+ int ret;
+
+ if (force) {
+ TALLOC_FREE(ctdb->nodemap);
+ }
+
+ if (ctdb->nodemap != NULL) {
+ return ctdb->nodemap;
+ }
+
+ tmp_ctx = talloc_new(ctdb);
+ if (tmp_ctx == NULL) {
+ return false;
+ }
+
+ current_node = ctdb->pnn;
+again:
+ ret = ctdb_ctrl_get_nodemap(tmp_ctx, ctdb->ev, ctdb->client,
+ current_node, TIMEOUT(), &nodemap);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to get nodemap from node %u\n",
+ current_node);
+ goto failed;
+ }
+
+ node = get_node_by_pnn(nodemap, current_node);
+ if (node->flags & NODE_FLAGS_BANNED) {
+ /* Pick next node */
+ do {
+ current_node = (current_node + 1) % nodemap->num;
+ node = get_node_by_pnn(nodemap, current_node);
+ if (! (node->flags &
+ (NODE_FLAGS_DELETED|NODE_FLAGS_DISCONNECTED))) {
+ break;
+ }
+ } while (current_node != ctdb->pnn);
+
+ if (current_node == ctdb->pnn) {
+ /* Tried all nodes in the cluster */
+ fprintf(stderr, "Warning: All nodes are banned.\n");
+ goto failed;
+ }
+
+ goto again;
+ }
+
+ ctdb->nodemap = talloc_steal(ctdb, nodemap);
+ return nodemap;
+
+failed:
+ talloc_free(tmp_ctx);
+ return NULL;
+}
+
+static void print_pnn(uint32_t pnn)
+{
+ if (pnn == CTDB_UNKNOWN_PNN) {
+ printf("UNKNOWN\n");
+ return;
+ }
+
+ printf("%u\n", pnn);
+}
+
+static bool verify_pnn(struct ctdb_context *ctdb, int pnn)
+{
+ struct ctdb_node_map *nodemap;
+ bool found;
+ unsigned int i;
+
+ if (pnn == -1) {
+ return false;
+ }
+
+ nodemap = get_nodemap(ctdb, false);
+ if (nodemap == NULL) {
+ return false;
+ }
+
+ found = false;
+ for (i=0; i<nodemap->num; i++) {
+ if (nodemap->node[i].pnn == (uint32_t)pnn) {
+ found = true;
+ break;
+ }
+ }
+ if (! found) {
+ fprintf(stderr, "Node %u does not exist\n", pnn);
+ return false;
+ }
+
+ if (nodemap->node[i].flags &
+ (NODE_FLAGS_DISCONNECTED|NODE_FLAGS_DELETED)) {
+ fprintf(stderr, "Node %u has status %s\n", pnn,
+ pretty_print_flags(ctdb, nodemap->node[i].flags));
+ return false;
+ }
+
+ return true;
+}
+
+static struct ctdb_node_map *talloc_nodemap(TALLOC_CTX *mem_ctx,
+ struct ctdb_node_map *nodemap)
+{
+ struct ctdb_node_map *nodemap2;
+
+ nodemap2 = talloc_zero(mem_ctx, struct ctdb_node_map);
+ if (nodemap2 == NULL) {
+ return NULL;
+ }
+
+ nodemap2->node = talloc_array(nodemap2, struct ctdb_node_and_flags,
+ nodemap->num);
+ if (nodemap2->node == NULL) {
+ talloc_free(nodemap2);
+ return NULL;
+ }
+
+ return nodemap2;
+}
+
+/*
+ * Get the number and the list of matching nodes
+ *
+ * nodestring := NULL | all | pnn,[pnn,...]
+ *
+ * If nodestring is NULL, use the current node.
+ */
+static bool parse_nodestring(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ const char *nodestring,
+ struct ctdb_node_map **out)
+{
+ struct ctdb_node_map *nodemap, *nodemap2;
+ struct ctdb_node_and_flags *node;
+ unsigned int i;
+
+ nodemap = get_nodemap(ctdb, false);
+ if (nodemap == NULL) {
+ return false;
+ }
+
+ nodemap2 = talloc_nodemap(mem_ctx, nodemap);
+ if (nodemap2 == NULL) {
+ return false;
+ }
+
+ if (nodestring == NULL) {
+ for (i=0; i<nodemap->num; i++) {
+ if (nodemap->node[i].pnn == ctdb->cmd_pnn) {
+ nodemap2->node[0] = nodemap->node[i];
+ break;
+ }
+ }
+ nodemap2->num = 1;
+
+ goto done;
+ }
+
+ if (strcmp(nodestring, "all") == 0) {
+ for (i=0; i<nodemap->num; i++) {
+ nodemap2->node[i] = nodemap->node[i];
+ }
+ nodemap2->num = nodemap->num;
+
+ goto done;
+ } else {
+ char *ns, *tok;
+ int error = 0;
+
+ ns = talloc_strdup(mem_ctx, nodestring);
+ if (ns == NULL) {
+ return false;
+ }
+
+ tok = strtok(ns, ",");
+ while (tok != NULL) {
+ uint32_t pnn;
+
+ pnn = (uint32_t)smb_strtoul(tok,
+ NULL,
+ 0,
+ &error,
+ SMB_STR_STANDARD);
+ if (error != 0) {
+ fprintf(stderr, "Invalid node %s\n", tok);
+ return false;
+ }
+
+ node = get_node_by_pnn(nodemap, pnn);
+ if (node == NULL) {
+ fprintf(stderr, "Node %u does not exist\n",
+ pnn);
+ return false;
+ }
+
+ nodemap2->node[nodemap2->num] = *node;
+ nodemap2->num += 1;
+
+ tok = strtok(NULL, ",");
+ }
+ }
+
+done:
+ *out = nodemap2;
+ return true;
+}
+
+/*
+ * Remote nodes are initialised as UNHEALTHY in the daemon and their
+ * true status is updated after they are connected. However, there
+ * is a small window when a healthy node may be shown as unhealthy
+ * between connecting and the status update. Hide this for nodes
+ * that are not DISCONNECTED nodes by reporting them as UNKNOWN until
+ * the runstate passes FIRST_RECOVERY. Code paths where this is used
+ * do not make any control decisions depending upon unknown/unhealthy
+ * state.
+ */
+static struct ctdb_node_map *get_nodemap_unknown(
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ struct ctdb_node_map *nodemap_in)
+{
+ unsigned int i;
+ int ret;
+ enum ctdb_runstate runstate;
+ struct ctdb_node_map *nodemap;
+
+ ret = ctdb_ctrl_get_runstate(mem_ctx,
+ ctdb->ev,
+ ctdb->client,
+ ctdb->cmd_pnn,
+ TIMEOUT(),
+ &runstate);
+ if (ret != 0 ) {
+ printf("Unable to get runstate");
+ return NULL;
+ }
+
+ nodemap = talloc_nodemap(mem_ctx, nodemap_in);
+ if (nodemap == NULL) {
+ printf("Unable to get nodemap");
+ return NULL;
+ }
+
+ nodemap->num = nodemap_in->num;
+ for (i=0; i<nodemap->num; i++) {
+ struct ctdb_node_and_flags *node_in = &nodemap_in->node[i];
+ struct ctdb_node_and_flags *node = &nodemap->node[i];
+
+ *node = *node_in;
+
+ if (node->flags & NODE_FLAGS_DELETED) {
+ continue;
+ }
+
+ if ((runstate <= CTDB_RUNSTATE_FIRST_RECOVERY) &&
+ !(node->flags & NODE_FLAGS_DISCONNECTED) &&
+ (node->pnn != ctdb->cmd_pnn)) {
+ node->flags = NODE_FLAGS_UNKNOWN;
+ }
+ }
+
+ return nodemap;
+}
+
+/* Compare IP address */
+static bool ctdb_same_ip(ctdb_sock_addr *ip1, ctdb_sock_addr *ip2)
+{
+ bool ret = false;
+
+ if (ip1->sa.sa_family != ip2->sa.sa_family) {
+ return false;
+ }
+
+ switch (ip1->sa.sa_family) {
+ case AF_INET:
+ ret = (memcmp(&ip1->ip.sin_addr, &ip2->ip.sin_addr,
+ sizeof(struct in_addr)) == 0);
+ break;
+
+ case AF_INET6:
+ ret = (memcmp(&ip1->ip6.sin6_addr, &ip2->ip6.sin6_addr,
+ sizeof(struct in6_addr)) == 0);
+ break;
+ }
+
+ return ret;
+}
+
+/* Append a node to a node map with given address and flags */
+static bool node_map_add(struct ctdb_node_map *nodemap,
+ const char *nstr, uint32_t flags)
+{
+ ctdb_sock_addr addr;
+ uint32_t num;
+ struct ctdb_node_and_flags *n;
+ int ret;
+
+ ret = ctdb_sock_addr_from_string(nstr, &addr, false);
+ if (ret != 0) {
+ fprintf(stderr, "Invalid IP address %s\n", nstr);
+ return false;
+ }
+
+ num = nodemap->num;
+ nodemap->node = talloc_realloc(nodemap, nodemap->node,
+ struct ctdb_node_and_flags, num+1);
+ if (nodemap->node == NULL) {
+ return false;
+ }
+
+ n = &nodemap->node[num];
+ n->addr = addr;
+ n->pnn = num;
+ n->flags = flags;
+
+ nodemap->num = num+1;
+ return true;
+}
+
+/* Read a nodes file into a node map */
+static struct ctdb_node_map *ctdb_read_nodes_file(TALLOC_CTX *mem_ctx,
+ const char *nlist)
+{
+ char **lines;
+ int nlines;
+ int i;
+ struct ctdb_node_map *nodemap;
+
+ nodemap = talloc_zero(mem_ctx, struct ctdb_node_map);
+ if (nodemap == NULL) {
+ return NULL;
+ }
+
+ lines = file_lines_load(nlist, &nlines, 0, mem_ctx);
+ if (lines == NULL) {
+ return NULL;
+ }
+
+ while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
+ nlines--;
+ }
+
+ for (i=0; i<nlines; i++) {
+ char *node;
+ uint32_t flags;
+ size_t len;
+
+ node = lines[i];
+ /* strip leading spaces */
+ while((*node == ' ') || (*node == '\t')) {
+ node++;
+ }
+
+ len = strlen(node);
+
+ /* strip trailing spaces */
+ while ((len > 1) &&
+ ((node[len-1] == ' ') || (node[len-1] == '\t')))
+ {
+ node[len-1] = '\0';
+ len--;
+ }
+
+ if (len == 0) {
+ continue;
+ }
+ if (*node == '#') {
+ /* A "deleted" node is a node that is
+ commented out in the nodes file. This is
+ used instead of removing a line, which
+ would cause subsequent nodes to change
+ their PNN. */
+ flags = NODE_FLAGS_DELETED;
+ node = discard_const("0.0.0.0");
+ } else {
+ flags = 0;
+ }
+ if (! node_map_add(nodemap, node, flags)) {
+ talloc_free(lines);
+ TALLOC_FREE(nodemap);
+ return NULL;
+ }
+ }
+
+ talloc_free(lines);
+ return nodemap;
+}
+
+static struct ctdb_node_map *read_nodes_file(TALLOC_CTX *mem_ctx, uint32_t pnn)
+{
+ struct ctdb_node_map *nodemap;
+ const char *nodes_list = NULL;
+
+ const char *basedir = getenv("CTDB_BASE");
+ if (basedir == NULL) {
+ basedir = CTDB_ETCDIR;
+ }
+ nodes_list = talloc_asprintf(mem_ctx, "%s/nodes", basedir);
+ if (nodes_list == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ return NULL;
+ }
+
+ nodemap = ctdb_read_nodes_file(mem_ctx, nodes_list);
+ if (nodemap == NULL) {
+ fprintf(stderr, "Failed to read nodes file \"%s\"\n",
+ nodes_list);
+ return NULL;
+ }
+
+ return nodemap;
+}
+
+static struct ctdb_dbid *db_find(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ struct ctdb_dbid_map *dbmap,
+ const char *db_name)
+{
+ struct ctdb_dbid *db = NULL;
+ const char *name;
+ unsigned int i;
+ int ret;
+
+ for (i=0; i<dbmap->num; i++) {
+ ret = ctdb_ctrl_get_dbname(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->pnn, TIMEOUT(),
+ dbmap->dbs[i].db_id, &name);
+ if (ret != 0) {
+ return false;
+ }
+
+ if (strcmp(db_name, name) == 0) {
+ talloc_free(discard_const(name));
+ db = &dbmap->dbs[i];
+ break;
+ }
+ }
+
+ return db;
+}
+
+static bool db_exists(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ const char *db_arg, uint32_t *db_id,
+ const char **db_name, uint8_t *db_flags)
+{
+ struct ctdb_dbid_map *dbmap;
+ struct ctdb_dbid *db = NULL;
+ uint32_t id = 0;
+ const char *name = NULL;
+ unsigned int i;
+ int ret = 0;
+
+ ret = ctdb_ctrl_get_dbmap(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->pnn, TIMEOUT(), &dbmap);
+ if (ret != 0) {
+ return false;
+ }
+
+ if (strncmp(db_arg, "0x", 2) == 0) {
+ id = smb_strtoul(db_arg, NULL, 0, &ret, SMB_STR_STANDARD);
+ if (ret != 0) {
+ return false;
+ }
+ for (i=0; i<dbmap->num; i++) {
+ if (id == dbmap->dbs[i].db_id) {
+ db = &dbmap->dbs[i];
+ break;
+ }
+ }
+ } else {
+ name = db_arg;
+ db = db_find(mem_ctx, ctdb, dbmap, name);
+ }
+
+ if (db == NULL) {
+ fprintf(stderr, "No database matching '%s' found\n", db_arg);
+ return false;
+ }
+
+ if (name == NULL) {
+ ret = ctdb_ctrl_get_dbname(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->pnn, TIMEOUT(), id, &name);
+ if (ret != 0) {
+ return false;
+ }
+ }
+
+ if (db_id != NULL) {
+ *db_id = db->db_id;
+ }
+ if (db_name != NULL) {
+ *db_name = talloc_strdup(mem_ctx, name);
+ }
+ if (db_flags != NULL) {
+ *db_flags = db->flags;
+ }
+ return true;
+}
+
+static int hex_to_data(const char *str, size_t len, TALLOC_CTX *mem_ctx,
+ TDB_DATA *out)
+{
+ unsigned int i;
+ TDB_DATA data;
+
+ if (len & 0x01) {
+ fprintf(stderr, "Key (%s) contains odd number of hex digits\n",
+ str);
+ return EINVAL;
+ }
+
+ data.dsize = len / 2;
+ data.dptr = talloc_size(mem_ctx, data.dsize);
+ if (data.dptr == NULL) {
+ return ENOMEM;
+ }
+
+ for (i=0; i<data.dsize; i++) {
+ bool ok = hex_byte(&str[i*2], &data.dptr[i]);
+ if (!ok) {
+ fprintf(stderr, "Invalid hex: %s\n", &str[i*2]);
+ return EINVAL;
+ }
+ }
+
+ *out = data;
+ return 0;
+}
+
+static int str_to_data(const char *str, size_t len, TALLOC_CTX *mem_ctx,
+ TDB_DATA *out)
+{
+ TDB_DATA data;
+ int ret = 0;
+
+ if (strncmp(str, "0x", 2) == 0) {
+ ret = hex_to_data(str+2, len-2, mem_ctx, &data);
+ if (ret != 0) {
+ return ret;
+ }
+ } else {
+ data.dptr = talloc_memdup(mem_ctx, str, len);
+ if (data.dptr == NULL) {
+ return ENOMEM;
+ }
+ data.dsize = len;
+ }
+
+ *out = data;
+ return 0;
+}
+
+static int run_helper(TALLOC_CTX *mem_ctx, const char *command,
+ const char *path, int argc, const char **argv)
+{
+ pid_t pid;
+ int save_errno, status, ret;
+ const char **new_argv;
+ int i;
+
+ new_argv = talloc_array(mem_ctx, const char *, argc + 2);
+ if (new_argv == NULL) {
+ return ENOMEM;
+ }
+
+ new_argv[0] = path;
+ for (i=0; i<argc; i++) {
+ new_argv[i+1] = argv[i];
+ }
+ new_argv[argc+1] = NULL;
+
+ pid = fork();
+ if (pid < 0) {
+ save_errno = errno;
+ talloc_free(new_argv);
+ fprintf(stderr, "Failed to fork %s (%s) - %s\n",
+ command, path, strerror(save_errno));
+ return save_errno;
+ }
+
+ if (pid == 0) {
+ ret = execv(path, discard_const(new_argv));
+ if (ret == -1) {
+ _exit(64+errno);
+ }
+ /* Should not happen */
+ _exit(64+ENOEXEC);
+ }
+
+ talloc_free(new_argv);
+
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ save_errno = errno;
+ fprintf(stderr, "waitpid() failed for %s - %s\n",
+ command, strerror(save_errno));
+ return save_errno;
+ }
+
+ if (WIFEXITED(status)) {
+ int pstatus = WEXITSTATUS(status);
+ if (WIFSIGNALED(status)) {
+ fprintf(stderr, "%s terminated with signal %d\n",
+ command, WTERMSIG(status));
+ ret = EINTR;
+ } else if (pstatus >= 64 && pstatus < 255) {
+ fprintf(stderr, "%s failed with error %d\n",
+ command, pstatus-64);
+ ret = pstatus - 64;
+ } else {
+ ret = pstatus;
+ }
+ return ret;
+ } else if (WIFSIGNALED(status)) {
+ fprintf(stderr, "%s terminated with signal %d\n",
+ command, WTERMSIG(status));
+ return EINTR;
+ }
+
+ return 0;
+}
+
+static void leader_handler(uint64_t srvid,
+ TDB_DATA data,
+ void *private_data)
+{
+ struct ctdb_context *ctdb = talloc_get_type_abort(
+ private_data, struct ctdb_context);
+ uint32_t leader_pnn;
+ size_t np;
+ int ret;
+
+ ret = ctdb_uint32_pull(data.dptr, data.dsize, &leader_pnn, &np);
+ if (ret != 0) {
+ /* Ignore packet */
+ return;
+ }
+
+ ctdb->leader_pnn = leader_pnn;
+}
+
+static bool get_leader_done(void *private_data)
+{
+ struct ctdb_context *ctdb = talloc_get_type_abort(
+ private_data, struct ctdb_context);
+
+ return ctdb->leader_pnn != CTDB_UNKNOWN_PNN;
+}
+
+static int get_leader(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ uint32_t *leader)
+{
+ int ret;
+
+ ret = ctdb_client_wait_func_timeout(ctdb->ev,
+ get_leader_done,
+ ctdb,
+ TIMEOUT());
+ /*
+ * If ETIMEDOUT then assume there is no leader and succeed so
+ * initial value of CTDB_UNKNOWN_PNN is returned
+ */
+ if (ret == ETIMEDOUT) {
+ ret = 0;
+ } else if (ret != 0) {
+ fprintf(stderr, "Error getting leader\n");
+ return ret;
+ }
+
+ *leader = ctdb->leader_pnn;
+ return 0;
+}
+
+/*
+ * Command Functions
+ */
+
+static int control_version(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ printf("%s\n", SAMBA_VERSION_STRING);
+ return 0;
+}
+
+static bool partially_online(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ struct ctdb_node_and_flags *node)
+{
+ struct ctdb_iface_list *iface_list;
+ unsigned int i;
+ int ret;
+ bool status = false;
+
+ if (node->flags != 0) {
+ return false;
+ }
+
+ ret = ctdb_ctrl_get_ifaces(mem_ctx, ctdb->ev, ctdb->client,
+ node->pnn, TIMEOUT(), &iface_list);
+ if (ret != 0) {
+ return false;
+ }
+
+ status = false;
+ for (i=0; i < iface_list->num; i++) {
+ if (iface_list->iface[i].link_state == 0) {
+ status = true;
+ break;
+ }
+ }
+
+ return status;
+}
+
+static void print_nodemap_machine(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ struct ctdb_node_map *nodemap,
+ uint32_t mypnn)
+{
+ struct ctdb_node_and_flags *node;
+ unsigned int i;
+
+ printf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+ options.sep,
+ "Node", options.sep,
+ "IP", options.sep,
+ "Disconnected", options.sep,
+ "Unknown", options.sep,
+ "Banned", options.sep,
+ "Disabled", options.sep,
+ "Unhealthy", options.sep,
+ "Stopped", options.sep,
+ "Inactive", options.sep,
+ "PartiallyOnline", options.sep,
+ "ThisNode", options.sep);
+
+ for (i=0; i<nodemap->num; i++) {
+ node = &nodemap->node[i];
+ if (node->flags & NODE_FLAGS_DELETED) {
+ continue;
+ }
+
+ printf("%s%u%s%s%s%d%s%d%s%d%s%d%s%d%s%d%s%d%s%d%s%c%s\n",
+ options.sep,
+ node->pnn, options.sep,
+ ctdb_sock_addr_to_string(mem_ctx, &node->addr, false),
+ options.sep,
+ !! (node->flags & NODE_FLAGS_DISCONNECTED), options.sep,
+ !! (node->flags & NODE_FLAGS_UNKNOWN), options.sep,
+ !! (node->flags & NODE_FLAGS_BANNED), options.sep,
+ !! (node->flags & NODE_FLAGS_PERMANENTLY_DISABLED),
+ options.sep,
+ !! (node->flags & NODE_FLAGS_UNHEALTHY), options.sep,
+ !! (node->flags & NODE_FLAGS_STOPPED), options.sep,
+ !! (node->flags & NODE_FLAGS_INACTIVE), options.sep,
+ partially_online(mem_ctx, ctdb, node), options.sep,
+ (node->pnn == mypnn)?'Y':'N', options.sep);
+ }
+
+}
+
+static void print_nodemap(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ struct ctdb_node_map *nodemap, uint32_t mypnn,
+ bool print_header)
+{
+ struct ctdb_node_and_flags *node;
+ int num_deleted_nodes = 0;
+ unsigned int i;
+
+ for (i=0; i<nodemap->num; i++) {
+ if (nodemap->node[i].flags & NODE_FLAGS_DELETED) {
+ num_deleted_nodes++;
+ }
+ }
+
+ if (print_header) {
+ if (num_deleted_nodes == 0) {
+ printf("Number of nodes:%d\n", nodemap->num);
+ } else {
+ printf("Number of nodes:%d "
+ "(including %d deleted nodes)\n",
+ nodemap->num, num_deleted_nodes);
+ }
+ }
+
+ for (i=0; i<nodemap->num; i++) {
+ node = &nodemap->node[i];
+ if (node->flags & NODE_FLAGS_DELETED) {
+ continue;
+ }
+
+ printf("pnn:%u %-16s %s%s\n",
+ node->pnn,
+ ctdb_sock_addr_to_string(mem_ctx, &node->addr, false),
+ partially_online(mem_ctx, ctdb, node) ?
+ "PARTIALLYONLINE" :
+ pretty_print_flags(mem_ctx, node->flags),
+ node->pnn == mypnn ? " (THIS NODE)" : "");
+ }
+}
+
+static void print_status(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ struct ctdb_node_map *nodemap,
+ uint32_t mypnn,
+ struct ctdb_vnn_map *vnnmap,
+ int recmode,
+ uint32_t leader)
+{
+ unsigned int i;
+
+ print_nodemap(mem_ctx, ctdb, nodemap, mypnn, true);
+
+ if (vnnmap->generation == INVALID_GENERATION) {
+ printf("Generation:INVALID\n");
+ } else {
+ printf("Generation:%u\n", vnnmap->generation);
+ }
+ printf("Size:%d\n", vnnmap->size);
+ for (i=0; i<vnnmap->size; i++) {
+ printf("hash:%d lmaster:%d\n", i, vnnmap->map[i]);
+ }
+
+ printf("Recovery mode:%s (%d)\n",
+ recmode == CTDB_RECOVERY_NORMAL ? "NORMAL" : "RECOVERY",
+ recmode);
+ printf("Leader:");
+ print_pnn(leader);
+}
+
+static int control_status(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ struct ctdb_node_map *nodemap_in;
+ struct ctdb_node_map *nodemap;
+ struct ctdb_vnn_map *vnnmap;
+ int recmode;
+ uint32_t leader;
+ int ret;
+
+ if (argc != 0) {
+ usage("status");
+ }
+
+ nodemap_in = get_nodemap(ctdb, false);
+ if (nodemap_in == NULL) {
+ return 1;
+ }
+
+ nodemap = get_nodemap_unknown(mem_ctx, ctdb, nodemap_in);
+ if (nodemap == NULL) {
+ return 1;
+ }
+
+ if (options.machinereadable == 1) {
+ print_nodemap_machine(mem_ctx, ctdb, nodemap, ctdb->cmd_pnn);
+ return 0;
+ }
+
+ ret = ctdb_ctrl_getvnnmap(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), &vnnmap);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ret = ctdb_ctrl_get_recmode(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), &recmode);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ret = get_leader(mem_ctx, ctdb, &leader);
+ if (ret != 0) {
+ return ret;
+ }
+
+ print_status(mem_ctx,
+ ctdb,
+ nodemap,
+ ctdb->cmd_pnn,
+ vnnmap,
+ recmode,
+ leader);
+ return 0;
+}
+
+static int control_uptime(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ struct ctdb_uptime *uptime;
+ int ret, tmp, days, hours, minutes, seconds;
+
+ ret = ctdb_ctrl_uptime(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), &uptime);
+ if (ret != 0) {
+ return ret;
+ }
+
+ printf("Current time of node %-4u : %s",
+ ctdb->cmd_pnn, ctime(&uptime->current_time.tv_sec));
+
+ tmp = uptime->current_time.tv_sec - uptime->ctdbd_start_time.tv_sec;
+ seconds = tmp % 60; tmp /= 60;
+ minutes = tmp % 60; tmp /= 60;
+ hours = tmp % 24; tmp /= 24;
+ days = tmp;
+
+ printf("Ctdbd start time : (%03d %02d:%02d:%02d) %s",
+ days, hours, minutes, seconds,
+ ctime(&uptime->ctdbd_start_time.tv_sec));
+
+ tmp = uptime->current_time.tv_sec - uptime->last_recovery_finished.tv_sec;
+ seconds = tmp % 60; tmp /= 60;
+ minutes = tmp % 60; tmp /= 60;
+ hours = tmp % 24; tmp /= 24;
+ days = tmp;
+
+ printf("Time of last recovery/failover: (%03d %02d:%02d:%02d) %s",
+ days, hours, minutes, seconds,
+ ctime(&uptime->last_recovery_finished.tv_sec));
+
+ printf("Duration of last recovery/failover: %lf seconds\n",
+ timeval_delta(&uptime->last_recovery_finished,
+ &uptime->last_recovery_started));
+
+ return 0;
+}
+
+static int control_ping(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ struct timeval tv;
+ int ret, num_clients;
+
+ tv = timeval_current();
+ ret = ctdb_ctrl_ping(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), &num_clients);
+ if (ret != 0) {
+ return ret;
+ }
+
+ printf("response from %u time=%.6f sec (%d clients)\n",
+ ctdb->cmd_pnn, timeval_elapsed(&tv), num_clients);
+ return 0;
+}
+
+const char *runstate_to_string(enum ctdb_runstate runstate);
+enum ctdb_runstate runstate_from_string(const char *runstate_str);
+
+static int control_runstate(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ enum ctdb_runstate runstate;
+ bool found;
+ int ret, i;
+
+ ret = ctdb_ctrl_get_runstate(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), &runstate);
+ if (ret != 0) {
+ return ret;
+ }
+
+ found = true;
+ for (i=0; i<argc; i++) {
+ enum ctdb_runstate t;
+
+ found = false;
+ t = ctdb_runstate_from_string(argv[i]);
+ if (t == CTDB_RUNSTATE_UNKNOWN) {
+ printf("Invalid run state (%s)\n", argv[i]);
+ return 1;
+ }
+
+ if (t == runstate) {
+ found = true;
+ break;
+ }
+ }
+
+ if (! found) {
+ printf("CTDB not in required run state (got %s)\n",
+ ctdb_runstate_to_string(runstate));
+ return 1;
+ }
+
+ printf("%s\n", ctdb_runstate_to_string(runstate));
+ return 0;
+}
+
+static int control_getvar(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ struct ctdb_var_list *tun_var_list;
+ uint32_t value;
+ int ret, i;
+ bool found;
+
+ if (argc != 1) {
+ usage("getvar");
+ }
+
+ ret = ctdb_ctrl_list_tunables(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), &tun_var_list);
+ if (ret != 0) {
+ fprintf(stderr,
+ "Failed to get list of variables from node %u\n",
+ ctdb->cmd_pnn);
+ return ret;
+ }
+
+ found = false;
+ for (i=0; i<tun_var_list->count; i++) {
+ if (strcasecmp(tun_var_list->var[i], argv[0]) == 0) {
+ found = true;
+ break;
+ }
+ }
+
+ if (! found) {
+ printf("No such tunable %s\n", argv[0]);
+ return 1;
+ }
+
+ ret = ctdb_ctrl_get_tunable(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), argv[0], &value);
+ if (ret != 0) {
+ return ret;
+ }
+
+ printf("%-26s = %u\n", argv[0], value);
+ return 0;
+}
+
+static int control_setvar(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ struct ctdb_var_list *tun_var_list;
+ struct ctdb_tunable tunable;
+ bool found;
+ int i;
+ int ret = 0;
+
+ if (argc != 2) {
+ usage("setvar");
+ }
+
+ ret = ctdb_ctrl_list_tunables(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), &tun_var_list);
+ if (ret != 0) {
+ fprintf(stderr,
+ "Failed to get list of variables from node %u\n",
+ ctdb->cmd_pnn);
+ return ret;
+ }
+
+ found = false;
+ for (i=0; i<tun_var_list->count; i++) {
+ if (strcasecmp(tun_var_list->var[i], argv[0]) == 0) {
+ found = true;
+ break;
+ }
+ }
+
+ if (! found) {
+ printf("No such tunable %s\n", argv[0]);
+ return 1;
+ }
+
+ tunable.name = argv[0];
+ tunable.value = smb_strtoul(argv[1], NULL, 0, &ret, SMB_STR_STANDARD);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ret = ctdb_ctrl_set_tunable(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), &tunable);
+ if (ret != 0) {
+ if (ret == 1) {
+ fprintf(stderr,
+ "Setting obsolete tunable variable '%s'\n",
+ tunable.name);
+ return 0;
+ }
+ }
+
+ return ret;
+}
+
+static int control_listvars(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ struct ctdb_var_list *tun_var_list;
+ int ret, i;
+
+ if (argc != 0) {
+ usage("listvars");
+ }
+
+ ret = ctdb_ctrl_list_tunables(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), &tun_var_list);
+ if (ret != 0) {
+ return ret;
+ }
+
+ for (i=0; i<tun_var_list->count; i++) {
+ control_getvar(mem_ctx, ctdb, 1, &tun_var_list->var[i]);
+ }
+
+ return 0;
+}
+
+const struct {
+ const char *name;
+ uint32_t offset;
+} stats_fields[] = {
+#define STATISTICS_FIELD(n) { #n, offsetof(struct ctdb_statistics, n) }
+ STATISTICS_FIELD(num_clients),
+ STATISTICS_FIELD(frozen),
+ STATISTICS_FIELD(recovering),
+ STATISTICS_FIELD(num_recoveries),
+ STATISTICS_FIELD(client_packets_sent),
+ STATISTICS_FIELD(client_packets_recv),
+ STATISTICS_FIELD(node_packets_sent),
+ STATISTICS_FIELD(node_packets_recv),
+ STATISTICS_FIELD(keepalive_packets_sent),
+ STATISTICS_FIELD(keepalive_packets_recv),
+ STATISTICS_FIELD(node.req_call),
+ STATISTICS_FIELD(node.reply_call),
+ STATISTICS_FIELD(node.req_dmaster),
+ STATISTICS_FIELD(node.reply_dmaster),
+ STATISTICS_FIELD(node.reply_error),
+ STATISTICS_FIELD(node.req_message),
+ STATISTICS_FIELD(node.req_control),
+ STATISTICS_FIELD(node.reply_control),
+ STATISTICS_FIELD(node.req_tunnel),
+ STATISTICS_FIELD(client.req_call),
+ STATISTICS_FIELD(client.req_message),
+ STATISTICS_FIELD(client.req_control),
+ STATISTICS_FIELD(client.req_tunnel),
+ STATISTICS_FIELD(timeouts.call),
+ STATISTICS_FIELD(timeouts.control),
+ STATISTICS_FIELD(timeouts.traverse),
+ STATISTICS_FIELD(locks.num_calls),
+ STATISTICS_FIELD(locks.num_current),
+ STATISTICS_FIELD(locks.num_pending),
+ STATISTICS_FIELD(locks.num_failed),
+ STATISTICS_FIELD(total_calls),
+ STATISTICS_FIELD(pending_calls),
+ STATISTICS_FIELD(childwrite_calls),
+ STATISTICS_FIELD(pending_childwrite_calls),
+ STATISTICS_FIELD(memory_used),
+ STATISTICS_FIELD(max_hop_count),
+ STATISTICS_FIELD(total_ro_delegations),
+ STATISTICS_FIELD(total_ro_revokes),
+};
+
+#define LATENCY_AVG(v) ((v).num ? (v).total / (v).num : 0.0 )
+
+static void print_statistics_machine(struct ctdb_statistics *s,
+ bool show_header)
+{
+ size_t i;
+
+ if (show_header) {
+ printf("CTDB version%s", options.sep);
+ printf("Current time of statistics%s", options.sep);
+ printf("Statistics collected since%s", options.sep);
+ for (i=0; i<ARRAY_SIZE(stats_fields); i++) {
+ printf("%s%s", stats_fields[i].name, options.sep);
+ }
+ printf("num_reclock_ctdbd_latency%s", options.sep);
+ printf("min_reclock_ctdbd_latency%s", options.sep);
+ printf("avg_reclock_ctdbd_latency%s", options.sep);
+ printf("max_reclock_ctdbd_latency%s", options.sep);
+
+ printf("num_reclock_recd_latency%s", options.sep);
+ printf("min_reclock_recd_latency%s", options.sep);
+ printf("avg_reclock_recd_latency%s", options.sep);
+ printf("max_reclock_recd_latency%s", options.sep);
+
+ printf("num_call_latency%s", options.sep);
+ printf("min_call_latency%s", options.sep);
+ printf("avg_call_latency%s", options.sep);
+ printf("max_call_latency%s", options.sep);
+
+ printf("num_lockwait_latency%s", options.sep);
+ printf("min_lockwait_latency%s", options.sep);
+ printf("avg_lockwait_latency%s", options.sep);
+ printf("max_lockwait_latency%s", options.sep);
+
+ printf("num_childwrite_latency%s", options.sep);
+ printf("min_childwrite_latency%s", options.sep);
+ printf("avg_childwrite_latency%s", options.sep);
+ printf("max_childwrite_latency%s", options.sep);
+ printf("\n");
+ }
+
+ printf("%u%s", CTDB_PROTOCOL, options.sep);
+ printf("%u%s", (uint32_t)s->statistics_current_time.tv_sec, options.sep);
+ printf("%u%s", (uint32_t)s->statistics_start_time.tv_sec, options.sep);
+ for (i=0;i<ARRAY_SIZE(stats_fields);i++) {
+ printf("%u%s",
+ *(uint32_t *)(stats_fields[i].offset+(uint8_t *)s),
+ options.sep);
+ }
+ printf("%u%s", s->reclock.ctdbd.num, options.sep);
+ printf("%.6f%s", s->reclock.ctdbd.min, options.sep);
+ printf("%.6f%s", LATENCY_AVG(s->reclock.ctdbd), options.sep);
+ printf("%.6f%s", s->reclock.ctdbd.max, options.sep);
+
+ printf("%u%s", s->reclock.recd.num, options.sep);
+ printf("%.6f%s", s->reclock.recd.min, options.sep);
+ printf("%.6f%s", LATENCY_AVG(s->reclock.recd), options.sep);
+ printf("%.6f%s", s->reclock.recd.max, options.sep);
+
+ printf("%d%s", s->call_latency.num, options.sep);
+ printf("%.6f%s", s->call_latency.min, options.sep);
+ printf("%.6f%s", LATENCY_AVG(s->call_latency), options.sep);
+ printf("%.6f%s", s->call_latency.max, options.sep);
+
+ printf("%u%s", s->locks.latency.num, options.sep);
+ printf("%.6f%s", s->locks.latency.min, options.sep);
+ printf("%.6f%s", LATENCY_AVG(s->locks.latency), options.sep);
+ printf("%.6f%s", s->locks.latency.max, options.sep);
+
+ printf("%d%s", s->childwrite_latency.num, options.sep);
+ printf("%.6f%s", s->childwrite_latency.min, options.sep);
+ printf("%.6f%s", LATENCY_AVG(s->childwrite_latency), options.sep);
+ printf("%.6f%s", s->childwrite_latency.max, options.sep);
+ printf("\n");
+}
+
+static void print_statistics(struct ctdb_statistics *s)
+{
+ int tmp, days, hours, minutes, seconds;
+ size_t i;
+ const char *prefix = NULL;
+ int preflen = 0;
+
+ tmp = s->statistics_current_time.tv_sec -
+ s->statistics_start_time.tv_sec;
+ seconds = tmp % 60; tmp /= 60;
+ minutes = tmp % 60; tmp /= 60;
+ hours = tmp % 24; tmp /= 24;
+ days = tmp;
+
+ printf("CTDB version %u\n", CTDB_PROTOCOL);
+ printf("Current time of statistics : %s",
+ ctime(&s->statistics_current_time.tv_sec));
+ printf("Statistics collected since : (%03d %02d:%02d:%02d) %s",
+ days, hours, minutes, seconds,
+ ctime(&s->statistics_start_time.tv_sec));
+
+ for (i=0; i<ARRAY_SIZE(stats_fields); i++) {
+ if (strchr(stats_fields[i].name, '.') != NULL) {
+ preflen = strcspn(stats_fields[i].name, ".") + 1;
+ if (! prefix ||
+ strncmp(prefix, stats_fields[i].name, preflen) != 0) {
+ prefix = stats_fields[i].name;
+ printf(" %*.*s\n", preflen-1, preflen-1,
+ stats_fields[i].name);
+ }
+ } else {
+ preflen = 0;
+ }
+ printf(" %*s%-22s%*s%10u\n", preflen ? 4 : 0, "",
+ stats_fields[i].name+preflen, preflen ? 0 : 4, "",
+ *(uint32_t *)(stats_fields[i].offset+(uint8_t *)s));
+ }
+
+ printf(" hop_count_buckets:");
+ for (i=0; i<MAX_COUNT_BUCKETS; i++) {
+ printf(" %d", s->hop_count_bucket[i]);
+ }
+ printf("\n");
+ printf(" lock_buckets:");
+ for (i=0; i<MAX_COUNT_BUCKETS; i++) {
+ printf(" %d", s->locks.buckets[i]);
+ }
+ printf("\n");
+ printf(" %-30s %.6f/%.6f/%.6f sec out of %d\n",
+ "locks_latency MIN/AVG/MAX",
+ s->locks.latency.min, LATENCY_AVG(s->locks.latency),
+ s->locks.latency.max, s->locks.latency.num);
+
+ printf(" %-30s %.6f/%.6f/%.6f sec out of %d\n",
+ "reclock_ctdbd MIN/AVG/MAX",
+ s->reclock.ctdbd.min, LATENCY_AVG(s->reclock.ctdbd),
+ s->reclock.ctdbd.max, s->reclock.ctdbd.num);
+
+ printf(" %-30s %.6f/%.6f/%.6f sec out of %d\n",
+ "reclock_recd MIN/AVG/MAX",
+ s->reclock.recd.min, LATENCY_AVG(s->reclock.recd),
+ s->reclock.recd.max, s->reclock.recd.num);
+
+ printf(" %-30s %.6f/%.6f/%.6f sec out of %d\n",
+ "call_latency MIN/AVG/MAX",
+ s->call_latency.min, LATENCY_AVG(s->call_latency),
+ s->call_latency.max, s->call_latency.num);
+
+ printf(" %-30s %.6f/%.6f/%.6f sec out of %d\n",
+ "childwrite_latency MIN/AVG/MAX",
+ s->childwrite_latency.min,
+ LATENCY_AVG(s->childwrite_latency),
+ s->childwrite_latency.max, s->childwrite_latency.num);
+}
+
+static int control_statistics(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ struct ctdb_statistics *stats;
+ int ret;
+
+ if (argc != 0) {
+ usage("statistics");
+ }
+
+ ret = ctdb_ctrl_statistics(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), &stats);
+ if (ret != 0) {
+ return ret;
+ }
+
+ if (options.machinereadable) {
+ print_statistics_machine(stats, true);
+ } else {
+ print_statistics(stats);
+ }
+
+ return 0;
+}
+
+static int control_statistics_reset(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ int ret;
+
+ if (argc != 0) {
+ usage("statisticsreset");
+ }
+
+ ret = ctdb_ctrl_statistics_reset(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT());
+ if (ret != 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+static int control_stats(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ struct ctdb_statistics_list *slist;
+ int ret, count = 0, i;
+ bool show_header = true;
+
+ if (argc > 1) {
+ usage("stats");
+ }
+
+ if (argc == 1) {
+ count = atoi(argv[0]);
+ }
+
+ ret = ctdb_ctrl_get_stat_history(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), &slist);
+ if (ret != 0) {
+ return ret;
+ }
+
+ for (i=0; i<slist->num; i++) {
+ if (slist->stats[i].statistics_start_time.tv_sec == 0) {
+ continue;
+ }
+ if (options.machinereadable == 1) {
+ print_statistics_machine(&slist->stats[i],
+ show_header);
+ show_header = false;
+ } else {
+ print_statistics(&slist->stats[i]);
+ }
+ if (count > 0 && i == count) {
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int ctdb_public_ip_cmp(const void *a, const void *b)
+{
+ const struct ctdb_public_ip *ip_a = a;
+ const struct ctdb_public_ip *ip_b = b;
+
+ return ctdb_sock_addr_cmp(&ip_a->addr, &ip_b->addr);
+}
+
+static void print_ip(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ struct ctdb_public_ip_list *ips,
+ struct ctdb_public_ip_info **ipinfo,
+ bool all_nodes)
+{
+ unsigned int i, j;
+ char *conf, *avail, *active;
+
+ if (options.machinereadable == 1) {
+ printf("%s%s%s%s%s", options.sep,
+ "Public IP", options.sep,
+ "Node", options.sep);
+ if (options.verbose == 1) {
+ printf("%s%s%s%s%s%s\n",
+ "ActiveInterfaces", options.sep,
+ "AvailableInterfaces", options.sep,
+ "ConfiguredInterfaces", options.sep);
+ } else {
+ printf("\n");
+ }
+ } else {
+ if (all_nodes) {
+ printf("Public IPs on ALL nodes\n");
+ } else {
+ printf("Public IPs on node %u\n", ctdb->cmd_pnn);
+ }
+ }
+
+ for (i = 0; i < ips->num; i++) {
+
+ if (options.machinereadable == 1) {
+ printf("%s%s%s%d%s", options.sep,
+ ctdb_sock_addr_to_string(
+ mem_ctx, &ips->ip[i].addr, false),
+ options.sep,
+ (int)ips->ip[i].pnn, options.sep);
+ } else {
+ printf("%s", ctdb_sock_addr_to_string(
+ mem_ctx, &ips->ip[i].addr, false));
+ }
+
+ if (options.verbose == 0) {
+ if (options.machinereadable == 1) {
+ printf("\n");
+ } else {
+ printf(" %d\n", (int)ips->ip[i].pnn);
+ }
+ continue;
+ }
+
+ conf = NULL;
+ avail = NULL;
+ active = NULL;
+
+ if (ipinfo[i] == NULL) {
+ goto skip_ipinfo;
+ }
+
+ for (j=0; j<ipinfo[i]->ifaces->num; j++) {
+ struct ctdb_iface *iface;
+
+ iface = &ipinfo[i]->ifaces->iface[j];
+ if (conf == NULL) {
+ conf = talloc_strdup(mem_ctx, iface->name);
+ } else {
+ conf = talloc_asprintf_append(
+ conf, ",%s", iface->name);
+ }
+
+ if (ipinfo[i]->active_idx == j) {
+ active = iface->name;
+ }
+
+ if (iface->link_state == 0) {
+ continue;
+ }
+
+ if (avail == NULL) {
+ avail = talloc_strdup(mem_ctx, iface->name);
+ } else {
+ avail = talloc_asprintf_append(
+ avail, ",%s", iface->name);
+ }
+ }
+
+ skip_ipinfo:
+
+ if (options.machinereadable == 1) {
+ printf("%s%s%s%s%s%s\n",
+ active ? active : "", options.sep,
+ avail ? avail : "", options.sep,
+ conf ? conf : "", options.sep);
+ } else {
+ printf(" node[%d] active[%s] available[%s]"
+ " configured[%s]\n",
+ (int)ips->ip[i].pnn, active ? active : "",
+ avail ? avail : "", conf ? conf : "");
+ }
+ }
+}
+
+static int collect_ips(uint8_t *keybuf, size_t keylen, uint8_t *databuf,
+ size_t datalen, void *private_data)
+{
+ struct ctdb_public_ip_list *ips = talloc_get_type_abort(
+ private_data, struct ctdb_public_ip_list);
+ struct ctdb_public_ip *ip;
+
+ ip = (struct ctdb_public_ip *)databuf;
+ ips->ip[ips->num] = *ip;
+ ips->num += 1;
+
+ return 0;
+}
+
+static int get_all_public_ips(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx,
+ struct ctdb_public_ip_list **out)
+{
+ struct ctdb_node_map *nodemap;
+ struct ctdb_public_ip_list *ips;
+ struct db_hash_context *ipdb;
+ uint32_t *pnn_list;
+ unsigned int j;
+ int ret, count, i;
+
+ nodemap = get_nodemap(ctdb, false);
+ if (nodemap == NULL) {
+ return 1;
+ }
+
+ ret = db_hash_init(mem_ctx, "ips", 101, DB_HASH_COMPLEX, &ipdb);
+ if (ret != 0) {
+ goto failed;
+ }
+
+ count = list_of_active_nodes(nodemap, CTDB_UNKNOWN_PNN, mem_ctx,
+ &pnn_list);
+ if (count <= 0) {
+ goto failed;
+ }
+
+ for (i=0; i<count; i++) {
+ ret = ctdb_ctrl_get_public_ips(mem_ctx, ctdb->ev, ctdb->client,
+ pnn_list[i], TIMEOUT(),
+ false, &ips);
+ if (ret != 0) {
+ goto failed;
+ }
+
+ for (j=0; j<ips->num; j++) {
+ struct ctdb_public_ip ip;
+
+ ip.pnn = ips->ip[j].pnn;
+ ip.addr = ips->ip[j].addr;
+
+ if (pnn_list[i] == ip.pnn) {
+ /* Node claims IP is hosted on it, so
+ * save that information
+ */
+ ret = db_hash_add(ipdb, (uint8_t *)&ip.addr,
+ sizeof(ip.addr),
+ (uint8_t *)&ip, sizeof(ip));
+ if (ret != 0) {
+ goto failed;
+ }
+ } else {
+ /* Node thinks IP is hosted elsewhere,
+ * so overwrite with CTDB_UNKNOWN_PNN
+ * if there's no existing entry
+ */
+ ret = db_hash_exists(ipdb, (uint8_t *)&ip.addr,
+ sizeof(ip.addr));
+ if (ret == ENOENT) {
+ ip.pnn = CTDB_UNKNOWN_PNN;
+ ret = db_hash_add(ipdb,
+ (uint8_t *)&ip.addr,
+ sizeof(ip.addr),
+ (uint8_t *)&ip,
+ sizeof(ip));
+ if (ret != 0) {
+ goto failed;
+ }
+ }
+ }
+ }
+
+ TALLOC_FREE(ips);
+ }
+
+ talloc_free(pnn_list);
+
+ ret = db_hash_traverse(ipdb, NULL, NULL, &count);
+ if (ret != 0) {
+ goto failed;
+ }
+
+ ips = talloc_zero(mem_ctx, struct ctdb_public_ip_list);
+ if (ips == NULL) {
+ goto failed;
+ }
+
+ ips->ip = talloc_array(ips, struct ctdb_public_ip, count);
+ if (ips->ip == NULL) {
+ goto failed;
+ }
+
+ ret = db_hash_traverse(ipdb, collect_ips, ips, &count);
+ if (ret != 0) {
+ goto failed;
+ }
+
+ if ((unsigned int)count != ips->num) {
+ goto failed;
+ }
+
+ talloc_free(ipdb);
+
+ *out = ips;
+ return 0;
+
+failed:
+ talloc_free(ipdb);
+ return 1;
+}
+
+static int control_ip(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ struct ctdb_public_ip_list *ips;
+ struct ctdb_public_ip_info **ipinfo;
+ unsigned int i;
+ int ret;
+ bool do_all = false;
+
+ if (argc > 1) {
+ usage("ip");
+ }
+
+ if (argc == 1) {
+ if (strcmp(argv[0], "all") == 0) {
+ do_all = true;
+ } else {
+ usage("ip");
+ }
+ }
+
+ if (do_all) {
+ ret = get_all_public_ips(ctdb, mem_ctx, &ips);
+ } else {
+ ret = ctdb_ctrl_get_public_ips(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(),
+ false, &ips);
+ }
+ if (ret != 0) {
+ return ret;
+ }
+
+ qsort(ips->ip, ips->num, sizeof(struct ctdb_public_ip),
+ ctdb_public_ip_cmp);
+
+ ipinfo = talloc_array(mem_ctx, struct ctdb_public_ip_info *, ips->num);
+ if (ipinfo == NULL) {
+ return 1;
+ }
+
+ for (i=0; i<ips->num; i++) {
+ uint32_t pnn;
+ if (do_all) {
+ pnn = ips->ip[i].pnn;
+ } else {
+ pnn = ctdb->cmd_pnn;
+ }
+ if (pnn == CTDB_UNKNOWN_PNN) {
+ ipinfo[i] = NULL;
+ continue;
+ }
+ ret = ctdb_ctrl_get_public_ip_info(mem_ctx, ctdb->ev,
+ ctdb->client, pnn,
+ TIMEOUT(), &ips->ip[i].addr,
+ &ipinfo[i]);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ print_ip(mem_ctx, ctdb, ips, ipinfo, do_all);
+ return 0;
+}
+
+static int control_ipinfo(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ struct ctdb_public_ip_info *ipinfo;
+ ctdb_sock_addr addr;
+ unsigned int i;
+ int ret;
+
+ if (argc != 1) {
+ usage("ipinfo");
+ }
+
+ ret = ctdb_sock_addr_from_string(argv[0], &addr, false);
+ if (ret != 0) {
+ fprintf(stderr, "Invalid IP address %s\n", argv[0]);
+ return 1;
+ }
+
+ ret = ctdb_ctrl_get_public_ip_info(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), &addr,
+ &ipinfo);
+ if (ret != 0) {
+ if (ret == -1) {
+ printf("Node %u does not know about IP %s\n",
+ ctdb->cmd_pnn, argv[0]);
+ }
+ return ret;
+ }
+
+ printf("Public IP[%s] info on node %u\n",
+ ctdb_sock_addr_to_string(mem_ctx, &ipinfo->ip.addr, false),
+ ctdb->cmd_pnn);
+
+ printf("IP:%s\nCurrentNode:%u\nNumInterfaces:%u\n",
+ ctdb_sock_addr_to_string(mem_ctx, &ipinfo->ip.addr, false),
+ ipinfo->ip.pnn, ipinfo->ifaces->num);
+
+ for (i=0; i<ipinfo->ifaces->num; i++) {
+ struct ctdb_iface *iface;
+
+ iface = &ipinfo->ifaces->iface[i];
+ iface->name[CTDB_IFACE_SIZE] = '\0';
+ printf("Interface[%u]: Name:%s Link:%s References:%u%s\n",
+ i+1, iface->name,
+ iface->link_state == 0 ? "down" : "up",
+ iface->references,
+ (i == ipinfo->active_idx) ? " (active)" : "");
+ }
+
+ return 0;
+}
+
+static int control_ifaces(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ struct ctdb_iface_list *ifaces;
+ unsigned int i;
+ int ret;
+
+ if (argc != 0) {
+ usage("ifaces");
+ }
+
+ ret = ctdb_ctrl_get_ifaces(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), &ifaces);
+ if (ret != 0) {
+ return ret;
+ }
+
+ if (ifaces->num == 0) {
+ printf("No interfaces configured on node %u\n",
+ ctdb->cmd_pnn);
+ return 0;
+ }
+
+ if (options.machinereadable) {
+ printf("%s%s%s%s%s%s%s\n", options.sep,
+ "Name", options.sep,
+ "LinkStatus", options.sep,
+ "References", options.sep);
+ } else {
+ printf("Interfaces on node %u\n", ctdb->cmd_pnn);
+ }
+
+ for (i=0; i<ifaces->num; i++) {
+ if (options.machinereadable) {
+ printf("%s%s%s%u%s%u%s\n", options.sep,
+ ifaces->iface[i].name, options.sep,
+ ifaces->iface[i].link_state, options.sep,
+ ifaces->iface[i].references, options.sep);
+ } else {
+ printf("name:%s link:%s references:%u\n",
+ ifaces->iface[i].name,
+ ifaces->iface[i].link_state ? "up" : "down",
+ ifaces->iface[i].references);
+ }
+ }
+
+ return 0;
+}
+
+static int control_setifacelink(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ struct ctdb_iface_list *ifaces;
+ struct ctdb_iface *iface;
+ unsigned int i;
+ int ret;
+
+ if (argc != 2) {
+ usage("setifacelink");
+ }
+
+ if (strlen(argv[0]) > CTDB_IFACE_SIZE) {
+ fprintf(stderr, "Interface name '%s' too long\n", argv[0]);
+ return 1;
+ }
+
+ ret = ctdb_ctrl_get_ifaces(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), &ifaces);
+ if (ret != 0) {
+ fprintf(stderr,
+ "Failed to get interface information from node %u\n",
+ ctdb->cmd_pnn);
+ return ret;
+ }
+
+ iface = NULL;
+ for (i=0; i<ifaces->num; i++) {
+ if (strcmp(ifaces->iface[i].name, argv[0]) == 0) {
+ iface = &ifaces->iface[i];
+ break;
+ }
+ }
+
+ if (iface == NULL) {
+ printf("Interface %s not configured on node %u\n",
+ argv[0], ctdb->cmd_pnn);
+ return 1;
+ }
+
+ if (strcmp(argv[1], "up") == 0) {
+ iface->link_state = 1;
+ } else if (strcmp(argv[1], "down") == 0) {
+ iface->link_state = 0;
+ } else {
+ usage("setifacelink");
+ return 1;
+ }
+
+ iface->references = 0;
+
+ ret = ctdb_ctrl_set_iface_link_state(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), iface);
+ if (ret != 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+static int control_process_exists(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ pid_t pid;
+ uint64_t srvid = 0;
+ int status;
+ int ret = 0;
+
+ if (argc != 1 && argc != 2) {
+ usage("process-exists");
+ }
+
+ pid = atoi(argv[0]);
+ if (argc == 2) {
+ srvid = smb_strtoull(argv[1], NULL, 0, &ret, SMB_STR_STANDARD);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ if (srvid == 0) {
+ ret = ctdb_ctrl_process_exists(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), pid, &status);
+ } else {
+ struct ctdb_pid_srvid pid_srvid;
+
+ pid_srvid.pid = pid;
+ pid_srvid.srvid = srvid;
+
+ ret = ctdb_ctrl_check_pid_srvid(mem_ctx, ctdb->ev,
+ ctdb->client, ctdb->cmd_pnn,
+ TIMEOUT(), &pid_srvid,
+ &status);
+ }
+
+ if (ret != 0) {
+ return ret;
+ }
+
+ if (srvid == 0) {
+ printf("PID %d %s\n", pid,
+ (status == 0 ? "exists" : "does not exist"));
+ } else {
+ printf("PID %d with SRVID 0x%"PRIx64" %s\n", pid, srvid,
+ (status == 0 ? "exists" : "does not exist"));
+ }
+ return status;
+}
+
+static int control_getdbmap(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ struct ctdb_dbid_map *dbmap;
+ unsigned int i;
+ int ret;
+
+ if (argc != 0) {
+ usage("getdbmap");
+ }
+
+ ret = ctdb_ctrl_get_dbmap(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), &dbmap);
+ if (ret != 0) {
+ return ret;
+ }
+
+ if (options.machinereadable == 1) {
+ printf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+ options.sep,
+ "ID", options.sep,
+ "Name", options.sep,
+ "Path", options.sep,
+ "Persistent", options.sep,
+ "Sticky", options.sep,
+ "Unhealthy", options.sep,
+ "Readonly", options.sep,
+ "Replicated", options.sep);
+ } else {
+ printf("Number of databases:%d\n", dbmap->num);
+ }
+
+ for (i=0; i<dbmap->num; i++) {
+ const char *name;
+ const char *path;
+ const char *health;
+ bool persistent;
+ bool readonly;
+ bool sticky;
+ bool replicated;
+ uint32_t db_id;
+
+ db_id = dbmap->dbs[i].db_id;
+
+ ret = ctdb_ctrl_get_dbname(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), db_id,
+ &name);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ret = ctdb_ctrl_getdbpath(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), db_id,
+ &path);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ret = ctdb_ctrl_db_get_health(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), db_id,
+ &health);
+ if (ret != 0) {
+ return ret;
+ }
+
+ persistent = dbmap->dbs[i].flags & CTDB_DB_FLAGS_PERSISTENT;
+ readonly = dbmap->dbs[i].flags & CTDB_DB_FLAGS_READONLY;
+ sticky = dbmap->dbs[i].flags & CTDB_DB_FLAGS_STICKY;
+ replicated = dbmap->dbs[i].flags & CTDB_DB_FLAGS_REPLICATED;
+
+ if (options.machinereadable == 1) {
+ printf("%s0x%08X%s%s%s%s%s%d%s%d%s%d%s%d%s%d%s\n",
+ options.sep,
+ db_id, options.sep,
+ name, options.sep,
+ path, options.sep,
+ !! (persistent), options.sep,
+ !! (sticky), options.sep,
+ !! (health), options.sep,
+ !! (readonly), options.sep,
+ !! (replicated), options.sep);
+ } else {
+ printf("dbid:0x%08x name:%s path:%s%s%s%s%s%s\n",
+ db_id, name, path,
+ persistent ? " PERSISTENT" : "",
+ sticky ? " STICKY" : "",
+ readonly ? " READONLY" : "",
+ replicated ? " REPLICATED" : "",
+ health ? " UNHEALTHY" : "");
+ }
+
+ talloc_free(discard_const(name));
+ talloc_free(discard_const(path));
+ talloc_free(discard_const(health));
+ }
+
+ return 0;
+}
+
+static int control_getdbstatus(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ uint32_t db_id;
+ const char *db_name, *db_path, *db_health;
+ uint8_t db_flags;
+ int ret;
+
+ if (argc != 1) {
+ usage("getdbstatus");
+ }
+
+ if (! db_exists(mem_ctx, ctdb, argv[0], &db_id, &db_name, &db_flags)) {
+ return 1;
+ }
+
+ ret = ctdb_ctrl_getdbpath(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), db_id,
+ &db_path);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ret = ctdb_ctrl_db_get_health(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), db_id,
+ &db_health);
+ if (ret != 0) {
+ return ret;
+ }
+
+ printf("dbid: 0x%08x\nname: %s\npath: %s\n", db_id, db_name, db_path);
+ printf("PERSISTENT: %s\nREPLICATED: %s\nSTICKY: %s\nREADONLY: %s\n",
+ (db_flags & CTDB_DB_FLAGS_PERSISTENT ? "yes" : "no"),
+ (db_flags & CTDB_DB_FLAGS_REPLICATED ? "yes" : "no"),
+ (db_flags & CTDB_DB_FLAGS_STICKY ? "yes" : "no"),
+ (db_flags & CTDB_DB_FLAGS_READONLY ? "yes" : "no"));
+ printf("HEALTH: %s\n", (db_health ? db_health : "OK"));
+ return 0;
+}
+
+struct dump_record_state {
+ uint32_t count;
+};
+
+#define ISASCII(x) (isprint(x) && ! strchr("\"\\", (x)))
+
+static void dump_tdb_data(const char *name, TDB_DATA val)
+{
+ size_t i;
+
+ fprintf(stdout, "%s(%zu) = \"", name, val.dsize);
+ for (i=0; i<val.dsize; i++) {
+ if (ISASCII(val.dptr[i])) {
+ fprintf(stdout, "%c", val.dptr[i]);
+ } else {
+ fprintf(stdout, "\\%02X", val.dptr[i]);
+ }
+ }
+ fprintf(stdout, "\"\n");
+}
+
+static void dump_ltdb_header(struct ctdb_ltdb_header *header)
+{
+ fprintf(stdout, "dmaster: %u\n", header->dmaster);
+ fprintf(stdout, "rsn: %" PRIu64 "\n", header->rsn);
+ fprintf(stdout, "flags: 0x%08x", header->flags);
+ if (header->flags & CTDB_REC_FLAG_MIGRATED_WITH_DATA) {
+ fprintf(stdout, " MIGRATED_WITH_DATA");
+ }
+ if (header->flags & CTDB_REC_FLAG_VACUUM_MIGRATED) {
+ fprintf(stdout, " VACUUM_MIGRATED");
+ }
+ if (header->flags & CTDB_REC_FLAG_AUTOMATIC) {
+ fprintf(stdout, " AUTOMATIC");
+ }
+ if (header->flags & CTDB_REC_RO_HAVE_DELEGATIONS) {
+ fprintf(stdout, " RO_HAVE_DELEGATIONS");
+ }
+ if (header->flags & CTDB_REC_RO_HAVE_READONLY) {
+ fprintf(stdout, " RO_HAVE_READONLY");
+ }
+ if (header->flags & CTDB_REC_RO_REVOKING_READONLY) {
+ fprintf(stdout, " RO_REVOKING_READONLY");
+ }
+ if (header->flags & CTDB_REC_RO_REVOKE_COMPLETE) {
+ fprintf(stdout, " RO_REVOKE_COMPLETE");
+ }
+ fprintf(stdout, "\n");
+
+}
+
+static int dump_record(uint32_t reqid, struct ctdb_ltdb_header *header,
+ TDB_DATA key, TDB_DATA data, void *private_data)
+{
+ struct dump_record_state *state =
+ (struct dump_record_state *)private_data;
+
+ state->count += 1;
+
+ dump_tdb_data("key", key);
+ dump_ltdb_header(header);
+ dump_tdb_data("data", data);
+ fprintf(stdout, "\n");
+
+ return 0;
+}
+
+static int control_catdb(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ struct ctdb_db_context *db;
+ const char *db_name;
+ uint32_t db_id;
+ uint8_t db_flags;
+ struct dump_record_state state;
+ int ret;
+
+ if (argc != 1) {
+ usage("catdb");
+ }
+
+ if (! db_exists(mem_ctx, ctdb, argv[0], &db_id, &db_name, &db_flags)) {
+ return 1;
+ }
+
+ ret = ctdb_attach(ctdb->ev, ctdb->client, TIMEOUT(), db_name,
+ db_flags, &db);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to attach to DB %s\n", db_name);
+ return ret;
+ }
+
+ state.count = 0;
+
+ ret = ctdb_db_traverse(mem_ctx, ctdb->ev, ctdb->client, db,
+ ctdb->cmd_pnn, TIMEOUT(),
+ dump_record, &state);
+
+ printf("Dumped %u records\n", state.count);
+
+ return ret;
+}
+
+static int control_cattdb(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ struct ctdb_db_context *db;
+ const char *db_name;
+ uint32_t db_id;
+ uint8_t db_flags;
+ struct dump_record_state state;
+ int ret;
+
+ if (argc != 1) {
+ usage("cattdb");
+ }
+
+ if (! db_exists(mem_ctx, ctdb, argv[0], &db_id, &db_name, &db_flags)) {
+ return 1;
+ }
+
+ ret = ctdb_attach(ctdb->ev, ctdb->client, TIMEOUT(), db_name,
+ db_flags, &db);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to attach to DB %s\n", db_name);
+ return ret;
+ }
+
+ state.count = 0;
+ ret = ctdb_db_traverse_local(db, true, true, dump_record, &state);
+
+ printf("Dumped %u record(s)\n", state.count);
+
+ return ret;
+}
+
+static int control_getcapabilities(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ uint32_t caps;
+ int ret;
+
+ if (argc != 0) {
+ usage("getcapabilities");
+ }
+
+ ret = ctdb_ctrl_get_capabilities(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), &caps);
+ if (ret != 0) {
+ return ret;
+ }
+
+ if (options.machinereadable == 1) {
+ printf("%s%s%s%s%s\n",
+ options.sep,
+ "LEADER", options.sep,
+ "LMASTER", options.sep);
+ printf("%s%d%s%d%s\n", options.sep,
+ !! (caps & CTDB_CAP_RECMASTER), options.sep,
+ !! (caps & CTDB_CAP_LMASTER), options.sep);
+ } else {
+ printf("LEADER: %s\n",
+ (caps & CTDB_CAP_RECMASTER) ? "YES" : "NO");
+ printf("LMASTER: %s\n",
+ (caps & CTDB_CAP_LMASTER) ? "YES" : "NO");
+ }
+
+ return 0;
+}
+
+static int control_pnn(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ printf("%u\n", ctdb_client_pnn(ctdb->client));
+ return 0;
+}
+
+static int control_lvs(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ char *t, *lvs_helper = NULL;
+
+ if (argc != 1) {
+ usage("lvs");
+ }
+
+ t = getenv("CTDB_LVS_HELPER");
+ if (t != NULL) {
+ lvs_helper = talloc_strdup(mem_ctx, t);
+ } else {
+ lvs_helper = talloc_asprintf(mem_ctx, "%s/ctdb_lvs",
+ CTDB_HELPER_BINDIR);
+ }
+
+ if (lvs_helper == NULL) {
+ fprintf(stderr, "Unable to set LVS helper\n");
+ return 1;
+ }
+
+ return run_helper(mem_ctx, "LVS helper", lvs_helper, argc, argv);
+}
+
+static int control_setdebug(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ int log_level;
+ int ret;
+ bool found;
+
+ if (argc != 1) {
+ usage("setdebug");
+ }
+
+ found = debug_level_parse(argv[0], &log_level);
+ if (! found) {
+ fprintf(stderr,
+ "Invalid debug level '%s'. Valid levels are:\n",
+ argv[0]);
+ fprintf(stderr, "\tERROR | WARNING | NOTICE | INFO | DEBUG\n");
+ return 1;
+ }
+
+ ret = ctdb_ctrl_setdebug(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), log_level);
+ if (ret != 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+static int control_getdebug(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ int loglevel;
+ const char *log_str;
+ int ret;
+
+ if (argc != 0) {
+ usage("getdebug");
+ }
+
+ ret = ctdb_ctrl_getdebug(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), &loglevel);
+ if (ret != 0) {
+ return ret;
+ }
+
+ log_str = debug_level_to_string(loglevel);
+ printf("%s\n", log_str);
+
+ return 0;
+}
+
+static int control_attach(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ const char *db_name;
+ uint8_t db_flags = 0;
+ int ret;
+
+ if (argc < 1 || argc > 2) {
+ usage("attach");
+ }
+
+ db_name = argv[0];
+ if (argc == 2) {
+ if (strcmp(argv[1], "persistent") == 0) {
+ db_flags = CTDB_DB_FLAGS_PERSISTENT;
+ } else if (strcmp(argv[1], "readonly") == 0) {
+ db_flags = CTDB_DB_FLAGS_READONLY;
+ } else if (strcmp(argv[1], "sticky") == 0) {
+ db_flags = CTDB_DB_FLAGS_STICKY;
+ } else if (strcmp(argv[1], "replicated") == 0) {
+ db_flags = CTDB_DB_FLAGS_REPLICATED;
+ } else {
+ usage("attach");
+ }
+ }
+
+ ret = ctdb_attach(ctdb->ev, ctdb->client, TIMEOUT(), db_name,
+ db_flags, NULL);
+ if (ret != 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+static int control_detach(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ const char *db_name;
+ uint32_t db_id;
+ uint8_t db_flags;
+ struct ctdb_node_map *nodemap;
+ int recmode;
+ unsigned int j;
+ int ret, ret2, i;
+
+ if (argc < 1) {
+ usage("detach");
+ }
+
+ ret = ctdb_ctrl_get_recmode(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), &recmode);
+ if (ret != 0) {
+ return ret;
+ }
+
+ if (recmode == CTDB_RECOVERY_ACTIVE) {
+ fprintf(stderr, "Database cannot be detached"
+ " when recovery is active\n");
+ return 1;
+ }
+
+ nodemap = get_nodemap(ctdb, false);
+ if (nodemap == NULL) {
+ return 1;
+ }
+
+ for (j=0; j<nodemap->num; j++) {
+ uint32_t value;
+
+ if (nodemap->node[j].flags & NODE_FLAGS_DISCONNECTED) {
+ continue;
+ }
+ if (nodemap->node[j].flags & NODE_FLAGS_DELETED) {
+ continue;
+ }
+ if (nodemap->node[j].flags & NODE_FLAGS_INACTIVE) {
+ fprintf(stderr, "Database cannot be detached on"
+ " inactive (stopped or banned) node %u\n",
+ nodemap->node[j].pnn);
+ return 1;
+ }
+
+ ret = ctdb_ctrl_get_tunable(mem_ctx, ctdb->ev, ctdb->client,
+ nodemap->node[j].pnn, TIMEOUT(),
+ "AllowClientDBAttach", &value);
+ if (ret != 0) {
+ fprintf(stderr,
+ "Unable to get tunable AllowClientDBAttach"
+ " from node %u\n", nodemap->node[j].pnn);
+ return ret;
+ }
+
+ if (value == 1) {
+ fprintf(stderr,
+ "Database access is still active on node %u."
+ " Set AllowclientDBAttach=0 on all nodes.\n",
+ nodemap->node[j].pnn);
+ return 1;
+ }
+ }
+
+ ret2 = 0;
+ for (i=0; i<argc; i++) {
+ if (! db_exists(mem_ctx, ctdb, argv[i], &db_id, &db_name,
+ &db_flags)) {
+ continue;
+ }
+
+ if (db_flags &
+ (CTDB_DB_FLAGS_PERSISTENT | CTDB_DB_FLAGS_REPLICATED)) {
+ fprintf(stderr,
+ "Only volatile databases can be detached\n");
+ return 1;
+ }
+
+ ret = ctdb_detach(ctdb->ev, ctdb->client, TIMEOUT(), db_id);
+ if (ret != 0) {
+ fprintf(stderr, "Database %s detach failed\n", db_name);
+ ret2 = ret;
+ }
+ }
+
+ return ret2;
+}
+
+static int control_dumpmemory(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ const char *mem_str;
+ ssize_t n;
+ int ret;
+
+ ret = ctdb_ctrl_dump_memory(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), &mem_str);
+ if (ret != 0) {
+ return ret;
+ }
+
+ n = write(1, mem_str, strlen(mem_str));
+ if (n < 0 || (size_t)n != strlen(mem_str)) {
+ fprintf(stderr, "Failed to write talloc summary\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+static void dump_memory(uint64_t srvid, TDB_DATA data, void *private_data)
+{
+ bool *done = (bool *)private_data;
+ size_t len;
+ ssize_t n;
+
+ len = strnlen((const char *)data.dptr, data.dsize);
+ n = write(1, data.dptr, len);
+ if (n < 0 || (size_t)n != len) {
+ fprintf(stderr, "Failed to write talloc summary\n");
+ }
+
+ *done = true;
+}
+
+static int control_rddumpmemory(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ struct ctdb_srvid_message msg = { 0 };
+ int ret;
+ bool done = false;
+
+ msg.pnn = ctdb->pnn;
+ msg.srvid = next_srvid(ctdb);
+
+ ret = ctdb_client_set_message_handler(ctdb->ev, ctdb->client,
+ msg.srvid, dump_memory, &done);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ret = ctdb_message_mem_dump(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, &msg);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ctdb_client_wait(ctdb->ev, &done);
+ return 0;
+}
+
+static int control_getpid(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ pid_t pid;
+ int ret;
+
+ ret = ctdb_ctrl_get_pid(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), &pid);
+ if (ret != 0) {
+ return ret;
+ }
+
+ printf("%u\n", pid);
+ return 0;
+}
+
+static int check_flags(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ const char *desc, uint32_t flag, bool set_flag)
+{
+ struct ctdb_node_map *nodemap;
+ bool flag_is_set;
+
+ nodemap = get_nodemap(ctdb, false);
+ if (nodemap == NULL) {
+ return 1;
+ }
+
+ flag_is_set = nodemap->node[ctdb->cmd_pnn].flags & flag;
+ if (set_flag == flag_is_set) {
+ if (set_flag) {
+ fprintf(stderr, "Node %u is already %s\n",
+ ctdb->cmd_pnn, desc);
+ } else {
+ fprintf(stderr, "Node %u is not %s\n",
+ ctdb->cmd_pnn, desc);
+ }
+ return 0;
+ }
+
+ return 1;
+}
+
+static void wait_for_flags(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ uint32_t flag, bool set_flag)
+{
+ struct ctdb_node_map *nodemap;
+ bool flag_is_set;
+
+ while (1) {
+ nodemap = get_nodemap(ctdb, true);
+ if (nodemap == NULL) {
+ fprintf(stderr,
+ "Failed to get nodemap, trying again\n");
+ sleep(1);
+ continue;
+ }
+
+ flag_is_set = nodemap->node[ctdb->cmd_pnn].flags & flag;
+ if (flag_is_set == set_flag) {
+ break;
+ }
+
+ sleep(1);
+ }
+}
+
+struct ipreallocate_state {
+ int status;
+ bool done;
+};
+
+static void ipreallocate_handler(uint64_t srvid, TDB_DATA data,
+ void *private_data)
+{
+ struct ipreallocate_state *state =
+ (struct ipreallocate_state *)private_data;
+
+ if (data.dsize != sizeof(int)) {
+ /* Ignore packet */
+ return;
+ }
+
+ state->status = *(int *)data.dptr;
+ state->done = true;
+}
+
+static int ipreallocate(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb)
+{
+ struct ctdb_srvid_message msg = { 0 };
+ struct ipreallocate_state state;
+ int ret;
+
+ msg.pnn = ctdb->pnn;
+ msg.srvid = next_srvid(ctdb);
+
+ state.done = false;
+ ret = ctdb_client_set_message_handler(ctdb->ev, ctdb->client,
+ msg.srvid,
+ ipreallocate_handler, &state);
+ if (ret != 0) {
+ return ret;
+ }
+
+ while (true) {
+ ret = ctdb_message_takeover_run(mem_ctx, ctdb->ev,
+ ctdb->client,
+ CTDB_BROADCAST_CONNECTED,
+ &msg);
+ if (ret != 0) {
+ goto fail;
+ }
+
+ ret = ctdb_client_wait_timeout(ctdb->ev, &state.done,
+ TIMEOUT());
+ if (ret != 0) {
+ continue;
+ }
+
+ if (state.status >= 0) {
+ ret = 0;
+ } else {
+ ret = state.status;
+ }
+ break;
+ }
+
+fail:
+ ctdb_client_remove_message_handler(ctdb->ev, ctdb->client,
+ msg.srvid, &state);
+ return ret;
+}
+
+static int control_disable(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ int ret;
+
+ if (argc != 0) {
+ usage("disable");
+ }
+
+ ret = check_flags(mem_ctx, ctdb, "disabled",
+ NODE_FLAGS_PERMANENTLY_DISABLED, true);
+ if (ret == 0) {
+ return 0;
+ }
+
+ ret = ctdb_ctrl_disable_node(mem_ctx,
+ ctdb->ev,
+ ctdb->client,
+ ctdb->cmd_pnn,
+ TIMEOUT());
+ if (ret != 0) {
+ fprintf(stderr, "Failed to disable node %u\n", ctdb->cmd_pnn);
+ return ret;
+ }
+
+ wait_for_flags(mem_ctx, ctdb, NODE_FLAGS_PERMANENTLY_DISABLED, true);
+ return ipreallocate(mem_ctx, ctdb);
+}
+
+static int control_enable(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ int ret;
+
+ if (argc != 0) {
+ usage("enable");
+ }
+
+ ret = check_flags(mem_ctx, ctdb, "disabled",
+ NODE_FLAGS_PERMANENTLY_DISABLED, false);
+ if (ret == 0) {
+ return 0;
+ }
+
+ ret = ctdb_ctrl_enable_node(mem_ctx,
+ ctdb->ev,
+ ctdb->client,
+ ctdb->cmd_pnn,
+ TIMEOUT());
+ if (ret != 0) {
+ fprintf(stderr, "Failed to enable node %u\n", ctdb->cmd_pnn);
+ return ret;
+ }
+
+ wait_for_flags(mem_ctx, ctdb, NODE_FLAGS_PERMANENTLY_DISABLED, false);
+ return ipreallocate(mem_ctx, ctdb);
+}
+
+static int control_stop(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ int ret;
+
+ if (argc != 0) {
+ usage("stop");
+ }
+
+ ret = check_flags(mem_ctx, ctdb, "stopped",
+ NODE_FLAGS_STOPPED, true);
+ if (ret == 0) {
+ return 0;
+ }
+
+ ret = ctdb_ctrl_stop_node(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT());
+ if (ret != 0) {
+ fprintf(stderr, "Failed to stop node %u\n", ctdb->cmd_pnn);
+ return ret;
+ }
+
+ wait_for_flags(mem_ctx, ctdb, NODE_FLAGS_STOPPED, true);
+ return ipreallocate(mem_ctx, ctdb);
+}
+
+static int control_continue(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ int ret;
+
+ if (argc != 0) {
+ usage("continue");
+ }
+
+ ret = check_flags(mem_ctx, ctdb, "stopped",
+ NODE_FLAGS_STOPPED, false);
+ if (ret == 0) {
+ return 0;
+ }
+
+ ret = ctdb_ctrl_continue_node(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT());
+ if (ret != 0) {
+ fprintf(stderr, "Failed to continue stopped node %u\n",
+ ctdb->cmd_pnn);
+ return ret;
+ }
+
+ wait_for_flags(mem_ctx, ctdb, NODE_FLAGS_STOPPED, false);
+ return ipreallocate(mem_ctx, ctdb);
+}
+
+static int control_ban(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ struct ctdb_ban_state ban_state;
+ int ret = 0;
+
+ if (argc != 1) {
+ usage("ban");
+ }
+
+ ret = check_flags(mem_ctx, ctdb, "banned",
+ NODE_FLAGS_BANNED, true);
+ if (ret == 0) {
+ return 0;
+ }
+
+ ban_state.pnn = ctdb->cmd_pnn;
+ ban_state.time = smb_strtoul(argv[0], NULL, 0, &ret, SMB_STR_STANDARD);
+ if (ret != 0) {
+ return ret;
+ }
+
+ if (ban_state.time == 0) {
+ fprintf(stderr, "Ban time cannot be zero\n");
+ return EINVAL;
+ }
+
+ ret = ctdb_ctrl_set_ban_state(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), &ban_state);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to ban node %u\n", ctdb->cmd_pnn);
+ return ret;
+ }
+
+ wait_for_flags(mem_ctx, ctdb, NODE_FLAGS_BANNED, true);
+ return ipreallocate(mem_ctx, ctdb);
+
+}
+
+static int control_unban(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ struct ctdb_ban_state ban_state;
+ int ret;
+
+ if (argc != 0) {
+ usage("unban");
+ }
+
+ ret = check_flags(mem_ctx, ctdb, "banned",
+ NODE_FLAGS_BANNED, false);
+ if (ret == 0) {
+ return 0;
+ }
+
+ ban_state.pnn = ctdb->cmd_pnn;
+ ban_state.time = 0;
+
+ ret = ctdb_ctrl_set_ban_state(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), &ban_state);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to unban node %u\n", ctdb->cmd_pnn);
+ return ret;
+ }
+
+ wait_for_flags(mem_ctx, ctdb, NODE_FLAGS_BANNED, false);
+ return ipreallocate(mem_ctx, ctdb);
+
+}
+
+static void wait_for_shutdown(void *private_data)
+{
+ bool *done = (bool *)private_data;
+
+ *done = true;
+}
+
+static int control_shutdown(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ int ret;
+ bool done = false;
+
+ if (argc != 0) {
+ usage("shutdown");
+ }
+
+ if (ctdb->pnn == ctdb->cmd_pnn) {
+ ctdb_client_set_disconnect_callback(ctdb->client,
+ wait_for_shutdown,
+ &done);
+ }
+
+ ret = ctdb_ctrl_shutdown(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT());
+ if (ret != 0) {
+ fprintf(stderr, "Unable to shutdown node %u\n", ctdb->cmd_pnn);
+ return ret;
+ }
+
+ if (ctdb->pnn == ctdb->cmd_pnn) {
+ ctdb_client_wait(ctdb->ev, &done);
+ }
+
+ return 0;
+}
+
+static int get_generation(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ uint32_t *generation)
+{
+ uint32_t leader;
+ int recmode;
+ struct ctdb_vnn_map *vnnmap;
+ int ret;
+
+again:
+ ret = get_leader(mem_ctx, ctdb, &leader);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to find leader\n");
+ return ret;
+ }
+
+ ret = ctdb_ctrl_get_recmode(mem_ctx,
+ ctdb->ev,
+ ctdb->client,
+ leader,
+ TIMEOUT(),
+ &recmode);
+ if (ret != 0) {
+ fprintf(stderr,
+ "Failed to get recovery mode from node %u\n",
+ leader);
+ return ret;
+ }
+
+ if (recmode == CTDB_RECOVERY_ACTIVE) {
+ sleep(1);
+ goto again;
+ }
+
+ ret = ctdb_ctrl_getvnnmap(mem_ctx,
+ ctdb->ev,
+ ctdb->client,
+ leader,
+ TIMEOUT(),
+ &vnnmap);
+ if (ret != 0) {
+ fprintf(stderr,
+ "Failed to get generation from node %u\n",
+ leader);
+ return ret;
+ }
+
+ if (vnnmap->generation == INVALID_GENERATION) {
+ talloc_free(vnnmap);
+ sleep(1);
+ goto again;
+ }
+
+ *generation = vnnmap->generation;
+ talloc_free(vnnmap);
+ return 0;
+}
+
+
+static int control_recover(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ uint32_t generation, next_generation;
+ int ret;
+
+ if (argc != 0) {
+ usage("recover");
+ }
+
+ ret = get_generation(mem_ctx, ctdb, &generation);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ret = ctdb_ctrl_set_recmode(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(),
+ CTDB_RECOVERY_ACTIVE);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to set recovery mode active\n");
+ return ret;
+ }
+
+ while (1) {
+ ret = get_generation(mem_ctx, ctdb, &next_generation);
+ if (ret != 0) {
+ fprintf(stderr,
+ "Failed to confirm end of recovery\n");
+ return ret;
+ }
+
+ if (next_generation != generation) {
+ break;
+ }
+
+ sleep (1);
+ }
+
+ return 0;
+}
+
+static int control_ipreallocate(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ if (argc != 0) {
+ usage("ipreallocate");
+ }
+
+ return ipreallocate(mem_ctx, ctdb);
+}
+
+static int control_gratarp(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ struct ctdb_addr_info addr_info;
+ int ret;
+
+ if (argc != 2) {
+ usage("gratarp");
+ }
+
+ ret = ctdb_sock_addr_from_string(argv[0], &addr_info.addr, false);
+ if (ret != 0) {
+ fprintf(stderr, "Invalid IP address %s\n", argv[0]);
+ return 1;
+ }
+ addr_info.iface = argv[1];
+
+ ret = ctdb_ctrl_send_gratuitous_arp(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(),
+ &addr_info);
+ if (ret != 0) {
+ fprintf(stderr, "Unable to send gratuitous arp from node %u\n",
+ ctdb->cmd_pnn);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int control_tickle(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ ctdb_sock_addr src, dst;
+ int ret;
+
+ if (argc != 0 && argc != 2) {
+ usage("tickle");
+ }
+
+ if (argc == 0) {
+ struct ctdb_connection_list *clist;
+ unsigned int i;
+ unsigned int num_failed;
+
+ /* Client first but the src/dst logic is confused */
+ ret = ctdb_connection_list_read(mem_ctx, 0, false, &clist);
+ if (ret != 0) {
+ return ret;
+ }
+
+ num_failed = 0;
+ for (i = 0; i < clist->num; i++) {
+ ret = ctdb_sys_send_tcp(&clist->conn[i].src,
+ &clist->conn[i].dst,
+ 0, 0, 0);
+ if (ret != 0) {
+ num_failed += 1;
+ }
+ }
+
+ TALLOC_FREE(clist);
+
+ if (num_failed > 0) {
+ fprintf(stderr, "Failed to send %d tickles\n",
+ num_failed);
+ return 1;
+ }
+
+ return 0;
+ }
+
+
+ ret = ctdb_sock_addr_from_string(argv[0], &src, true);
+ if (ret != 0) {
+ fprintf(stderr, "Invalid IP address %s\n", argv[0]);
+ return 1;
+ }
+
+ ret = ctdb_sock_addr_from_string(argv[1], &dst, true);
+ if (ret != 0) {
+ fprintf(stderr, "Invalid IP address %s\n", argv[1]);
+ return 1;
+ }
+
+ ret = ctdb_sys_send_tcp(&src, &dst, 0, 0, 0);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to send tickle ack\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int control_gettickles(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ ctdb_sock_addr addr;
+ struct ctdb_tickle_list *tickles;
+ unsigned port = 0;
+ unsigned int i;
+ int ret = 0;
+
+ if (argc < 1 || argc > 2) {
+ usage("gettickles");
+ }
+
+ if (argc == 2) {
+ port = smb_strtoul(argv[1], NULL, 10, &ret, SMB_STR_STANDARD);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ ret = ctdb_sock_addr_from_string(argv[0], &addr, false);
+ if (ret != 0) {
+ fprintf(stderr, "Invalid IP address %s\n", argv[0]);
+ return 1;
+ }
+ ctdb_sock_addr_set_port(&addr, port);
+
+ ret = ctdb_ctrl_get_tcp_tickle_list(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), &addr,
+ &tickles);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to get list of connections\n");
+ return ret;
+ }
+
+ if (options.machinereadable) {
+ printf("%s%s%s%s%s%s%s%s%s\n",
+ options.sep,
+ "Source IP", options.sep,
+ "Port", options.sep,
+ "Destination IP", options.sep,
+ "Port", options.sep);
+ for (i=0; i<tickles->num; i++) {
+ printf("%s%s%s%u%s%s%s%u%s\n", options.sep,
+ ctdb_sock_addr_to_string(
+ mem_ctx, &tickles->conn[i].src, false),
+ options.sep,
+ ntohs(tickles->conn[i].src.ip.sin_port),
+ options.sep,
+ ctdb_sock_addr_to_string(
+ mem_ctx, &tickles->conn[i].dst, false),
+ options.sep,
+ ntohs(tickles->conn[i].dst.ip.sin_port),
+ options.sep);
+ }
+ } else {
+ printf("Connections for IP: %s\n",
+ ctdb_sock_addr_to_string(mem_ctx,
+ &tickles->addr, false));
+ printf("Num connections: %u\n", tickles->num);
+ for (i=0; i<tickles->num; i++) {
+ printf("SRC: %s DST: %s\n",
+ ctdb_sock_addr_to_string(
+ mem_ctx, &tickles->conn[i].src, true),
+ ctdb_sock_addr_to_string(
+ mem_ctx, &tickles->conn[i].dst, true));
+ }
+ }
+
+ talloc_free(tickles);
+ return 0;
+}
+
+typedef void (*clist_request_func)(struct ctdb_req_control *request,
+ struct ctdb_connection *conn);
+
+typedef int (*clist_reply_func)(struct ctdb_reply_control *reply);
+
+struct process_clist_state {
+ struct ctdb_connection_list *clist;
+ int count;
+ unsigned int num_failed, num_total;
+ clist_reply_func reply_func;
+};
+
+static void process_clist_done(struct tevent_req *subreq);
+
+static struct tevent_req *process_clist_send(
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ struct ctdb_connection_list *clist,
+ clist_request_func request_func,
+ clist_reply_func reply_func)
+{
+ struct tevent_req *req, *subreq;
+ struct process_clist_state *state;
+ struct ctdb_req_control request;
+ unsigned int i;
+
+ req = tevent_req_create(mem_ctx, &state, struct process_clist_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->clist = clist;
+ state->reply_func = reply_func;
+
+ for (i = 0; i < clist->num; i++) {
+ request_func(&request, &clist->conn[i]);
+ subreq = ctdb_client_control_send(state, ctdb->ev,
+ ctdb->client, ctdb->cmd_pnn,
+ TIMEOUT(), &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ctdb->ev);
+ }
+ tevent_req_set_callback(subreq, process_clist_done, req);
+ }
+
+ return req;
+}
+
+static void process_clist_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct process_clist_state *state = tevent_req_data(
+ req, struct process_clist_state);
+ struct ctdb_reply_control *reply;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_recv(subreq, NULL, state, &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ state->num_failed += 1;
+ goto done;
+ }
+
+ ret = state->reply_func(reply);
+ if (ret != 0) {
+ state->num_failed += 1;
+ goto done;
+ }
+
+done:
+ state->num_total += 1;
+ if (state->num_total == state->clist->num) {
+ tevent_req_done(req);
+ }
+}
+
+static int process_clist_recv(struct tevent_req *req)
+{
+ struct process_clist_state *state = tevent_req_data(
+ req, struct process_clist_state);
+
+ return state->num_failed;
+}
+
+static int control_addtickle(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ struct ctdb_connection conn;
+ int ret;
+
+ if (argc != 0 && argc != 2) {
+ usage("addtickle");
+ }
+
+ if (argc == 0) {
+ struct ctdb_connection_list *clist;
+ struct tevent_req *req;
+
+ /* Client first but the src/dst logic is confused */
+ ret = ctdb_connection_list_read(mem_ctx, 0, false, &clist);
+ if (ret != 0) {
+ return ret;
+ }
+ if (clist->num == 0) {
+ return 0;
+ }
+
+ req = process_clist_send(mem_ctx, ctdb, clist,
+ ctdb_req_control_tcp_add_delayed_update,
+ ctdb_reply_control_tcp_add_delayed_update);
+ if (req == NULL) {
+ talloc_free(clist);
+ return ENOMEM;
+ }
+
+ tevent_req_poll(req, ctdb->ev);
+ talloc_free(clist);
+
+ ret = process_clist_recv(req);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to add %d tickles\n", ret);
+ return 1;
+ }
+
+ return 0;
+ }
+
+ ret = ctdb_sock_addr_from_string(argv[0], &conn.src, true);
+ if (ret != 0) {
+ fprintf(stderr, "Invalid IP address %s\n", argv[0]);
+ return 1;
+ }
+ ret = ctdb_sock_addr_from_string(argv[1], &conn.dst, true);
+ if (ret != 0) {
+ fprintf(stderr, "Invalid IP address %s\n", argv[1]);
+ return 1;
+ }
+
+ ret = ctdb_ctrl_tcp_add_delayed_update(mem_ctx, ctdb->ev,
+ ctdb->client, ctdb->cmd_pnn,
+ TIMEOUT(), &conn);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to register connection\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int control_deltickle(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ struct ctdb_connection conn;
+ int ret;
+
+ if (argc != 0 && argc != 2) {
+ usage("deltickle");
+ }
+
+ if (argc == 0) {
+ struct ctdb_connection_list *clist;
+ struct tevent_req *req;
+
+ /* Client first but the src/dst logic is confused */
+ ret = ctdb_connection_list_read(mem_ctx, 0, false, &clist);
+ if (ret != 0) {
+ return ret;
+ }
+ if (clist->num == 0) {
+ return 0;
+ }
+
+ req = process_clist_send(mem_ctx, ctdb, clist,
+ ctdb_req_control_tcp_remove,
+ ctdb_reply_control_tcp_remove);
+ if (req == NULL) {
+ talloc_free(clist);
+ return ENOMEM;
+ }
+
+ tevent_req_poll(req, ctdb->ev);
+ talloc_free(clist);
+
+ ret = process_clist_recv(req);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to remove %d tickles\n", ret);
+ return 1;
+ }
+
+ return 0;
+ }
+
+ ret = ctdb_sock_addr_from_string(argv[0], &conn.src, true);
+ if (ret != 0) {
+ fprintf(stderr, "Invalid IP address %s\n", argv[0]);
+ return 1;
+ }
+ ret = ctdb_sock_addr_from_string(argv[1], &conn.dst, true);
+ if (ret != 0) {
+ fprintf(stderr, "Invalid IP address %s\n", argv[1]);
+ return 1;
+ }
+
+ ret = ctdb_ctrl_tcp_remove(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), &conn);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to unregister connection\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int control_listnodes(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ struct ctdb_node_map *nodemap;
+ unsigned int i;
+
+ if (argc != 0) {
+ usage("listnodes");
+ }
+
+ nodemap = read_nodes_file(mem_ctx, CTDB_UNKNOWN_PNN);
+ if (nodemap == NULL) {
+ return 1;
+ }
+
+ for (i=0; i<nodemap->num; i++) {
+ if (nodemap->node[i].flags & NODE_FLAGS_DELETED) {
+ continue;
+ }
+
+ if (options.machinereadable) {
+ printf("%s%u%s%s%s\n", options.sep,
+ nodemap->node[i].pnn, options.sep,
+ ctdb_sock_addr_to_string(
+ mem_ctx, &nodemap->node[i].addr, false),
+ options.sep);
+ } else {
+ printf("%s\n",
+ ctdb_sock_addr_to_string(
+ mem_ctx, &nodemap->node[i].addr, false));
+ }
+ }
+
+ return 0;
+}
+
+static bool nodemap_identical(struct ctdb_node_map *nodemap1,
+ struct ctdb_node_map *nodemap2)
+{
+ unsigned int i;
+
+ if (nodemap1->num != nodemap2->num) {
+ return false;
+ }
+
+ for (i=0; i<nodemap1->num; i++) {
+ struct ctdb_node_and_flags *n1, *n2;
+
+ n1 = &nodemap1->node[i];
+ n2 = &nodemap2->node[i];
+
+ if ((n1->pnn != n2->pnn) ||
+ (n1->flags != n2->flags) ||
+ ! ctdb_sock_addr_same_ip(&n1->addr, &n2->addr)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static int check_node_file_changes(TALLOC_CTX *mem_ctx,
+ struct ctdb_node_map *nm,
+ struct ctdb_node_map *fnm,
+ bool *reload)
+{
+ unsigned int i;
+ bool check_failed = false;
+
+ *reload = false;
+
+ for (i=0; i<nm->num; i++) {
+ if (i >= fnm->num) {
+ fprintf(stderr,
+ "Node %u (%s) missing from nodes file\n",
+ nm->node[i].pnn,
+ ctdb_sock_addr_to_string(
+ mem_ctx, &nm->node[i].addr, false));
+ check_failed = true;
+ continue;
+ }
+ if (nm->node[i].flags & NODE_FLAGS_DELETED &&
+ fnm->node[i].flags & NODE_FLAGS_DELETED) {
+ /* Node remains deleted */
+ continue;
+ }
+
+ if (! (nm->node[i].flags & NODE_FLAGS_DELETED) &&
+ ! (fnm->node[i].flags & NODE_FLAGS_DELETED)) {
+ /* Node not newly nor previously deleted */
+ if (! ctdb_same_ip(&nm->node[i].addr,
+ &fnm->node[i].addr)) {
+ fprintf(stderr,
+ "Node %u has changed IP address"
+ " (was %s, now %s)\n",
+ nm->node[i].pnn,
+ ctdb_sock_addr_to_string(
+ mem_ctx,
+ &nm->node[i].addr, false),
+ ctdb_sock_addr_to_string(
+ mem_ctx,
+ &fnm->node[i].addr, false));
+ check_failed = true;
+ } else {
+ if (nm->node[i].flags & NODE_FLAGS_DISCONNECTED) {
+ fprintf(stderr,
+ "WARNING: Node %u is disconnected."
+ " You MUST fix this node manually!\n",
+ nm->node[i].pnn);
+ }
+ }
+ continue;
+ }
+
+ if (fnm->node[i].flags & NODE_FLAGS_DELETED) {
+ /* Node is being deleted */
+ printf("Node %u is DELETED\n", nm->node[i].pnn);
+ *reload = true;
+ if (! (nm->node[i].flags & NODE_FLAGS_DISCONNECTED)) {
+ fprintf(stderr,
+ "ERROR: Node %u is still connected\n",
+ nm->node[i].pnn);
+ check_failed = true;
+ }
+ continue;
+ }
+
+ if (nm->node[i].flags & NODE_FLAGS_DELETED) {
+ /* Node was previously deleted */
+ printf("Node %u is UNDELETED\n", nm->node[i].pnn);
+ *reload = true;
+ }
+ }
+
+ if (check_failed) {
+ fprintf(stderr,
+ "ERROR: Nodes will not be reloaded due to previous error\n");
+ return 1;
+ }
+
+ /* Leftover nodes in file are NEW */
+ for (; i < fnm->num; i++) {
+ printf("Node %u is NEW\n", fnm->node[i].pnn);
+ *reload = true;
+ }
+
+ return 0;
+}
+
+struct disable_recoveries_state {
+ uint32_t *pnn_list;
+ unsigned int node_count;
+ bool *reply;
+ int status;
+ bool done;
+};
+
+static void disable_recoveries_handler(uint64_t srvid, TDB_DATA data,
+ void *private_data)
+{
+ struct disable_recoveries_state *state =
+ (struct disable_recoveries_state *)private_data;
+ unsigned int i;
+ int ret;
+
+ if (data.dsize != sizeof(int)) {
+ /* Ignore packet */
+ return;
+ }
+
+ /* ret will be a PNN (i.e. >=0) on success, or negative on error */
+ ret = *(int *)data.dptr;
+ if (ret < 0) {
+ state->status = ret;
+ state->done = true;
+ return;
+ }
+ for (i=0; i<state->node_count; i++) {
+ if (state->pnn_list[i] == (uint32_t)ret) {
+ state->reply[i] = true;
+ break;
+ }
+ }
+
+ state->done = true;
+ for (i=0; i<state->node_count; i++) {
+ if (! state->reply[i]) {
+ state->done = false;
+ break;
+ }
+ }
+}
+
+static int disable_recoveries(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ uint32_t timeout, uint32_t *pnn_list, int count)
+{
+ struct ctdb_disable_message disable = { 0 };
+ struct disable_recoveries_state state;
+ int ret, i;
+
+ disable.pnn = ctdb->pnn;
+ disable.srvid = next_srvid(ctdb);
+ disable.timeout = timeout;
+
+ state.pnn_list = pnn_list;
+ state.node_count = count;
+ state.done = false;
+ state.status = 0;
+ state.reply = talloc_zero_array(mem_ctx, bool, count);
+ if (state.reply == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_client_set_message_handler(ctdb->ev, ctdb->client,
+ disable.srvid,
+ disable_recoveries_handler,
+ &state);
+ if (ret != 0) {
+ return ret;
+ }
+
+ for (i=0; i<count; i++) {
+ ret = ctdb_message_disable_recoveries(mem_ctx, ctdb->ev,
+ ctdb->client,
+ pnn_list[i],
+ &disable);
+ if (ret != 0) {
+ goto fail;
+ }
+ }
+
+ ret = ctdb_client_wait_timeout(ctdb->ev, &state.done, TIMEOUT());
+ if (ret == ETIME) {
+ fprintf(stderr, "Timed out waiting to disable recoveries\n");
+ } else {
+ ret = (state.status >= 0 ? 0 : 1);
+ }
+
+fail:
+ ctdb_client_remove_message_handler(ctdb->ev, ctdb->client,
+ disable.srvid, &state);
+ return ret;
+}
+
+static int control_reloadnodes(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ struct ctdb_node_map *nodemap = NULL;
+ struct ctdb_node_map *file_nodemap;
+ struct ctdb_node_map *remote_nodemap;
+ struct ctdb_req_control request;
+ struct ctdb_reply_control **reply;
+ bool reload;
+ unsigned int i;
+ int count;
+ int ret;
+ uint32_t *pnn_list;
+
+ nodemap = get_nodemap(ctdb, false);
+ if (nodemap == NULL) {
+ return 1;
+ }
+
+ file_nodemap = read_nodes_file(mem_ctx, ctdb->pnn);
+ if (file_nodemap == NULL) {
+ return 1;
+ }
+
+ for (i=0; i<nodemap->num; i++) {
+ if (nodemap->node[i].flags & NODE_FLAGS_DISCONNECTED) {
+ continue;
+ }
+
+ ret = ctdb_ctrl_get_nodes_file(mem_ctx, ctdb->ev, ctdb->client,
+ nodemap->node[i].pnn, TIMEOUT(),
+ &remote_nodemap);
+ if (ret != 0) {
+ fprintf(stderr,
+ "ERROR: Failed to get nodes file from node %u\n",
+ nodemap->node[i].pnn);
+ return ret;
+ }
+
+ if (! nodemap_identical(file_nodemap, remote_nodemap)) {
+ fprintf(stderr,
+ "ERROR: Nodes file on node %u differs"
+ " from current node (%u)\n",
+ nodemap->node[i].pnn, ctdb->pnn);
+ return 1;
+ }
+ }
+
+ ret = check_node_file_changes(mem_ctx, nodemap, file_nodemap, &reload);
+ if (ret != 0) {
+ return ret;
+ }
+
+ if (! reload) {
+ fprintf(stderr, "No change in nodes file,"
+ " skipping unnecessary reload\n");
+ return 0;
+ }
+
+ count = list_of_connected_nodes(nodemap, CTDB_UNKNOWN_PNN,
+ mem_ctx, &pnn_list);
+ if (count <= 0) {
+ fprintf(stderr, "Memory allocation error\n");
+ return 1;
+ }
+
+ ret = disable_recoveries(mem_ctx, ctdb, 2*options.timelimit,
+ pnn_list, count);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to disable recoveries\n");
+ return ret;
+ }
+
+ ctdb_req_control_reload_nodes_file(&request);
+ ret = ctdb_client_control_multi(mem_ctx, ctdb->ev, ctdb->client,
+ pnn_list, count, TIMEOUT(),
+ &request, NULL, &reply);
+ if (ret != 0) {
+ bool failed = false;
+ int j;
+
+ for (j=0; j<count; j++) {
+ ret = ctdb_reply_control_reload_nodes_file(reply[j]);
+ if (ret != 0) {
+ fprintf(stderr,
+ "Node %u failed to reload nodes\n",
+ pnn_list[j]);
+ failed = true;
+ }
+ }
+ if (failed) {
+ fprintf(stderr,
+ "You MUST fix failed nodes manually!\n");
+ }
+ }
+
+ ret = disable_recoveries(mem_ctx, ctdb, 0, pnn_list, count);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to enable recoveries\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int moveip(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ ctdb_sock_addr *addr, uint32_t pnn)
+{
+ struct ctdb_public_ip_list *pubip_list;
+ struct ctdb_public_ip pubip;
+ struct ctdb_node_map *nodemap;
+ struct ctdb_req_control request;
+ uint32_t *pnn_list;
+ unsigned int i;
+ int ret, count;
+ uint32_t *connected_pnn = NULL;
+ int connected_count;
+
+ ret = ctdb_ctrl_get_public_ips(mem_ctx, ctdb->ev, ctdb->client,
+ pnn, TIMEOUT(), false, &pubip_list);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to get Public IPs from node %u\n",
+ pnn);
+ return ret;
+ }
+
+ for (i=0; i<pubip_list->num; i++) {
+ if (ctdb_same_ip(addr, &pubip_list->ip[i].addr)) {
+ break;
+ }
+ }
+
+ if (i == pubip_list->num) {
+ fprintf(stderr, "Node %u CANNOT host IP address %s\n",
+ pnn, ctdb_sock_addr_to_string(mem_ctx, addr, false));
+ return 1;
+ }
+
+ nodemap = get_nodemap(ctdb, false);
+ if (nodemap == NULL) {
+ return 1;
+ }
+
+ count = list_of_active_nodes(nodemap, pnn, mem_ctx, &pnn_list);
+ if (count <= 0) {
+ fprintf(stderr, "Memory allocation error\n");
+ return 1;
+ }
+
+ connected_count = list_of_connected_nodes(nodemap,
+ CTDB_UNKNOWN_PNN,
+ mem_ctx,
+ &connected_pnn);
+ if (connected_count <= 0) {
+ fprintf(stderr, "Memory allocation error\n");
+ return 1;
+ }
+
+ /*
+ * Disable takeover runs on all connected nodes. A reply
+ * indicating success is needed from each node so all nodes
+ * will need to be active.
+ *
+ * A check could be added to not allow reloading of IPs when
+ * there are disconnected nodes. However, this should
+ * probably be left up to the administrator.
+ */
+ ret = disable_takeover_runs(mem_ctx,
+ ctdb,
+ 2*options.timelimit,
+ connected_pnn,
+ connected_count);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to disable takeover runs\n");
+ return ret;
+ }
+
+ pubip.pnn = pnn;
+ pubip.addr = *addr;
+ ctdb_req_control_release_ip(&request, &pubip);
+
+ ret = ctdb_client_control_multi(mem_ctx, ctdb->ev, ctdb->client,
+ pnn_list, count, TIMEOUT(),
+ &request, NULL, NULL);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to release IP on nodes\n");
+ return ret;
+ }
+
+ ret = ctdb_ctrl_takeover_ip(mem_ctx, ctdb->ev, ctdb->client,
+ pnn, TIMEOUT(), &pubip);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to takeover IP on node %u\n", pnn);
+ return ret;
+ }
+
+ /*
+ * It isn't strictly necessary to wait until takeover runs are
+ * re-enabled but doing so can't hurt.
+ */
+ ret = disable_takeover_runs(mem_ctx,
+ ctdb,
+ 0,
+ connected_pnn,
+ connected_count);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to enable takeover runs\n");
+ return ret;
+ }
+
+ return send_ipreallocated_control_to_nodes(mem_ctx,
+ ctdb,
+ connected_pnn,
+ connected_count);
+}
+
+static int control_moveip(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ ctdb_sock_addr addr;
+ uint32_t pnn;
+ int retries = 0;
+ int ret = 0;
+
+ if (argc != 2) {
+ usage("moveip");
+ }
+
+ ret = ctdb_sock_addr_from_string(argv[0], &addr, false);
+ if (ret != 0) {
+ fprintf(stderr, "Invalid IP address %s\n", argv[0]);
+ return 1;
+ }
+
+ pnn = smb_strtoul(argv[1], NULL, 10, &ret, SMB_STR_STANDARD);
+ if (pnn == CTDB_UNKNOWN_PNN || ret != 0) {
+ fprintf(stderr, "Invalid PNN %s\n", argv[1]);
+ return 1;
+ }
+
+ while (retries < 5) {
+ ret = moveip(mem_ctx, ctdb, &addr, pnn);
+ if (ret == 0) {
+ break;
+ }
+
+ sleep(3);
+ retries++;
+ }
+
+ if (ret != 0) {
+ fprintf(stderr, "Failed to move IP %s to node %u\n",
+ argv[0], pnn);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int rebalancenode(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ uint32_t pnn)
+{
+ int ret;
+
+ ret = ctdb_message_rebalance_node(mem_ctx, ctdb->ev, ctdb->client,
+ CTDB_BROADCAST_CONNECTED, pnn);
+ if (ret != 0) {
+ fprintf(stderr,
+ "Failed to ask leader to distribute IPs\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int control_addip(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ ctdb_sock_addr addr;
+ struct ctdb_public_ip_list *pubip_list;
+ struct ctdb_addr_info addr_info;
+ unsigned int mask, i;
+ int ret, retries = 0;
+
+ if (argc != 2) {
+ usage("addip");
+ }
+
+ ret = ctdb_sock_addr_mask_from_string(argv[0], &addr, &mask);
+ if (ret != 0) {
+ fprintf(stderr, "Invalid IP/Mask %s\n", argv[0]);
+ return 1;
+ }
+
+ ret = ctdb_ctrl_get_public_ips(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(),
+ false, &pubip_list);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to get Public IPs from node %u\n",
+ ctdb->cmd_pnn);
+ return 1;
+ }
+
+ for (i=0; i<pubip_list->num; i++) {
+ if (ctdb_same_ip(&addr, &pubip_list->ip[i].addr)) {
+ fprintf(stderr, "Node already knows about IP %s\n",
+ ctdb_sock_addr_to_string(mem_ctx,
+ &addr, false));
+ return 0;
+ }
+ }
+
+ addr_info.addr = addr;
+ addr_info.mask = mask;
+ addr_info.iface = argv[1];
+
+ while (retries < 5) {
+ ret = ctdb_ctrl_add_public_ip(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(),
+ &addr_info);
+ if (ret == 0) {
+ break;
+ }
+
+ sleep(3);
+ retries++;
+ }
+
+ if (ret != 0) {
+ fprintf(stderr, "Failed to add public IP to node %u."
+ " Giving up\n", ctdb->cmd_pnn);
+ return ret;
+ }
+
+ ret = rebalancenode(mem_ctx, ctdb, ctdb->cmd_pnn);
+ if (ret != 0) {
+ return ret;
+ }
+
+ /*
+ * CTDB_CONTROL_ADD_PUBLIC_IP will implicitly trigger
+ * CTDB_SRVID_TAKEOVER_RUN broadcast to all connected nodes.
+ *
+ * That means CTDB_{CONTROL,EVENT,SRVID}_IPREALLOCATED is
+ * triggered at the end of the takeover run...
+ *
+ * So we don't need to call ipreallocate() nor
+ * send_ipreallocated_control_to_nodes() here...
+ */
+
+ return 0;
+}
+
+static int control_delip(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ ctdb_sock_addr addr;
+ struct ctdb_public_ip_list *pubip_list;
+ struct ctdb_addr_info addr_info;
+ unsigned int i;
+ int ret;
+
+ if (argc != 1) {
+ usage("delip");
+ }
+
+ ret = ctdb_sock_addr_from_string(argv[0], &addr, false);
+ if (ret != 0) {
+ fprintf(stderr, "Invalid IP address %s\n", argv[0]);
+ return 1;
+ }
+
+ ret = ctdb_ctrl_get_public_ips(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(),
+ false, &pubip_list);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to get Public IPs from node %u\n",
+ ctdb->cmd_pnn);
+ return 1;
+ }
+
+ for (i=0; i<pubip_list->num; i++) {
+ if (ctdb_same_ip(&addr, &pubip_list->ip[i].addr)) {
+ break;
+ }
+ }
+
+ if (i == pubip_list->num) {
+ fprintf(stderr, "Node does not know about IP address %s\n",
+ ctdb_sock_addr_to_string(mem_ctx, &addr, false));
+ return 0;
+ }
+
+ addr_info.addr = addr;
+ addr_info.mask = 0;
+ addr_info.iface = NULL;
+
+ ret = ctdb_ctrl_del_public_ip(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), &addr_info);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to delete public IP from node %u\n",
+ ctdb->cmd_pnn);
+ return ret;
+ }
+
+ /*
+ * CTDB_CONTROL_DEL_PUBLIC_IP only marks the public ip
+ * with pending_delete if it's still in use.
+ *
+ * Any later takeover run will really move the public ip
+ * away from the local node and finally removes it.
+ *
+ * That means CTDB_{CONTROL,EVENT,SRVID}_IPREALLOCATED is
+ * triggered at the end of the takeover run that actually
+ * moves the public ip away.
+ *
+ * So we don't need to call ipreallocate() nor
+ * send_ipreallocated_control_to_nodes() here...
+ */
+
+ return 0;
+}
+
+#define DB_VERSION 3
+#define MAX_DB_NAME 64
+#define MAX_REC_BUFFER_SIZE (100*1000)
+
+struct db_header {
+ unsigned long version;
+ time_t timestamp;
+ unsigned long flags;
+ unsigned long nbuf;
+ unsigned long nrec;
+ char name[MAX_DB_NAME];
+};
+
+struct backup_state {
+ TALLOC_CTX *mem_ctx;
+ struct ctdb_rec_buffer *recbuf;
+ uint32_t db_id;
+ int fd;
+ unsigned int nbuf, nrec;
+};
+
+static int backup_handler(uint32_t reqid, struct ctdb_ltdb_header *header,
+ TDB_DATA key, TDB_DATA data, void *private_data)
+{
+ struct backup_state *state = (struct backup_state *)private_data;
+ size_t len;
+ int ret;
+
+ if (state->recbuf == NULL) {
+ state->recbuf = ctdb_rec_buffer_init(state->mem_ctx,
+ state->db_id);
+ if (state->recbuf == NULL) {
+ return ENOMEM;
+ }
+ }
+
+ ret = ctdb_rec_buffer_add(state->recbuf, state->recbuf, reqid,
+ header, key, data);
+ if (ret != 0) {
+ return ret;
+ }
+
+ len = ctdb_rec_buffer_len(state->recbuf);
+ if (len < MAX_REC_BUFFER_SIZE) {
+ return 0;
+ }
+
+ ret = ctdb_rec_buffer_write(state->recbuf, state->fd);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to write records to backup file\n");
+ return ret;
+ }
+
+ state->nbuf += 1;
+ state->nrec += state->recbuf->count;
+ TALLOC_FREE(state->recbuf);
+
+ return 0;
+}
+
+static int control_backupdb(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ const char *db_name;
+ struct ctdb_db_context *db;
+ uint32_t db_id;
+ uint8_t db_flags;
+ struct backup_state state;
+ struct db_header db_hdr;
+ int fd, ret;
+
+ if (argc != 2) {
+ usage("backupdb");
+ }
+
+ if (! db_exists(mem_ctx, ctdb, argv[0], &db_id, &db_name, &db_flags)) {
+ return 1;
+ }
+
+ ret = ctdb_attach(ctdb->ev, ctdb->client, TIMEOUT(), db_name,
+ db_flags, &db);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to attach to DB %s\n", db_name);
+ return ret;
+ }
+
+ fd = open(argv[1], O_RDWR|O_CREAT, 0600);
+ if (fd == -1) {
+ ret = errno;
+ fprintf(stderr, "Failed to open file %s for writing\n",
+ argv[1]);
+ return ret;
+ }
+
+ /* Write empty header first */
+ ZERO_STRUCT(db_hdr);
+ ret = write(fd, &db_hdr, sizeof(struct db_header));
+ if (ret == -1) {
+ ret = errno;
+ close(fd);
+ fprintf(stderr, "Failed to write header to file %s\n", argv[1]);
+ return ret;
+ }
+
+ state.mem_ctx = mem_ctx;
+ state.recbuf = NULL;
+ state.fd = fd;
+ state.nbuf = 0;
+ state.nrec = 0;
+
+ ret = ctdb_db_traverse_local(db, true, false, backup_handler, &state);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to collect records from DB %s\n",
+ db_name);
+ close(fd);
+ return ret;
+ }
+
+ if (state.recbuf != NULL) {
+ ret = ctdb_rec_buffer_write(state.recbuf, state.fd);
+ if (ret != 0) {
+ fprintf(stderr,
+ "Failed to write records to backup file\n");
+ close(fd);
+ return ret;
+ }
+
+ state.nbuf += 1;
+ state.nrec += state.recbuf->count;
+ TALLOC_FREE(state.recbuf);
+ }
+
+ db_hdr.version = DB_VERSION;
+ db_hdr.timestamp = time(NULL);
+ db_hdr.flags = db_flags;
+ db_hdr.nbuf = state.nbuf;
+ db_hdr.nrec = state.nrec;
+ strncpy(db_hdr.name, db_name, MAX_DB_NAME-1);
+
+ lseek(fd, 0, SEEK_SET);
+ ret = write(fd, &db_hdr, sizeof(struct db_header));
+ if (ret == -1) {
+ ret = errno;
+ close(fd);
+ fprintf(stderr, "Failed to write header to file %s\n", argv[1]);
+ return ret;
+ }
+
+ close(fd);
+ printf("Database backed up to %s\n", argv[1]);
+ return 0;
+}
+
+static int control_restoredb(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ const char *db_name = NULL;
+ struct ctdb_db_context *db;
+ struct db_header db_hdr;
+ struct ctdb_node_map *nodemap;
+ struct ctdb_req_control request;
+ struct ctdb_reply_control **reply;
+ struct ctdb_transdb wipedb;
+ struct ctdb_pulldb_ext pulldb;
+ struct ctdb_rec_buffer *recbuf;
+ uint32_t generation;
+ uint32_t *pnn_list;
+ char timebuf[128];
+ ssize_t n;
+ int fd;
+ unsigned long i, count;
+ int ret;
+ uint8_t db_flags;
+
+ if (argc < 1 || argc > 2) {
+ usage("restoredb");
+ }
+
+ fd = open(argv[0], O_RDONLY, 0600);
+ if (fd == -1) {
+ ret = errno;
+ fprintf(stderr, "Failed to open file %s for reading\n",
+ argv[0]);
+ return ret;
+ }
+
+ if (argc == 2) {
+ db_name = argv[1];
+ }
+
+ n = read(fd, &db_hdr, sizeof(struct db_header));
+ if (n == -1) {
+ ret = errno;
+ close(fd);
+ fprintf(stderr, "Failed to read db header from file %s\n",
+ argv[0]);
+ return ret;
+ }
+ db_hdr.name[sizeof(db_hdr.name)-1] = '\0';
+
+ if (db_hdr.version != DB_VERSION) {
+ fprintf(stderr,
+ "Wrong version of backup file, expected %u, got %lu\n",
+ DB_VERSION, db_hdr.version);
+ close(fd);
+ return EINVAL;
+ }
+
+ if (db_name == NULL) {
+ db_name = db_hdr.name;
+ }
+
+ strftime(timebuf, sizeof(timebuf)-1, "%Y/%m/%d %H:%M:%S",
+ localtime(&db_hdr.timestamp));
+ printf("Restoring database %s from backup @ %s\n", db_name, timebuf);
+
+ db_flags = db_hdr.flags & 0xff;
+ ret = ctdb_attach(ctdb->ev, ctdb->client, TIMEOUT(), db_name,
+ db_flags, &db);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to attach to DB %s\n", db_name);
+ close(fd);
+ return ret;
+ }
+
+ nodemap = get_nodemap(ctdb, false);
+ if (nodemap == NULL) {
+ fprintf(stderr, "Failed to get nodemap\n");
+ close(fd);
+ return ENOMEM;
+ }
+
+ ret = get_generation(mem_ctx, ctdb, &generation);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to get current generation\n");
+ close(fd);
+ return ret;
+ }
+
+ count = list_of_active_nodes(nodemap, CTDB_UNKNOWN_PNN, mem_ctx,
+ &pnn_list);
+ if (count <= 0) {
+ close(fd);
+ return ENOMEM;
+ }
+
+ wipedb.db_id = ctdb_db_id(db);
+ wipedb.tid = generation;
+
+ ctdb_req_control_db_freeze(&request, wipedb.db_id);
+ ret = ctdb_client_control_multi(mem_ctx, ctdb->ev,
+ ctdb->client, pnn_list, count,
+ TIMEOUT(), &request, NULL, NULL);
+ if (ret != 0) {
+ goto failed;
+ }
+
+
+ ctdb_req_control_db_transaction_start(&request, &wipedb);
+ ret = ctdb_client_control_multi(mem_ctx, ctdb->ev, ctdb->client,
+ pnn_list, count, TIMEOUT(),
+ &request, NULL, NULL);
+ if (ret != 0) {
+ goto failed;
+ }
+
+ ctdb_req_control_wipe_database(&request, &wipedb);
+ ret = ctdb_client_control_multi(mem_ctx, ctdb->ev, ctdb->client,
+ pnn_list, count, TIMEOUT(),
+ &request, NULL, NULL);
+ if (ret != 0) {
+ goto failed;
+ }
+
+ pulldb.db_id = ctdb_db_id(db);
+ pulldb.lmaster = 0;
+ pulldb.srvid = SRVID_CTDB_PUSHDB;
+
+ ctdb_req_control_db_push_start(&request, &pulldb);
+ ret = ctdb_client_control_multi(mem_ctx, ctdb->ev, ctdb->client,
+ pnn_list, count, TIMEOUT(),
+ &request, NULL, NULL);
+ if (ret != 0) {
+ goto failed;
+ }
+
+ for (i=0; i<db_hdr.nbuf; i++) {
+ struct ctdb_req_message message;
+ TDB_DATA data;
+ size_t np;
+
+ ret = ctdb_rec_buffer_read(fd, mem_ctx, &recbuf);
+ if (ret != 0) {
+ goto failed;
+ }
+
+ data.dsize = ctdb_rec_buffer_len(recbuf);
+ data.dptr = talloc_size(mem_ctx, data.dsize);
+ if (data.dptr == NULL) {
+ goto failed;
+ }
+
+ ctdb_rec_buffer_push(recbuf, data.dptr, &np);
+
+ message.srvid = pulldb.srvid;
+ message.data.data = data;
+
+ ret = ctdb_client_message_multi(mem_ctx, ctdb->ev,
+ ctdb->client,
+ pnn_list, count,
+ &message, NULL);
+ if (ret != 0) {
+ goto failed;
+ }
+
+ talloc_free(recbuf);
+ talloc_free(data.dptr);
+ }
+
+ ctdb_req_control_db_push_confirm(&request, pulldb.db_id);
+ ret = ctdb_client_control_multi(mem_ctx, ctdb->ev, ctdb->client,
+ pnn_list, count, TIMEOUT(),
+ &request, NULL, &reply);
+ if (ret != 0) {
+ goto failed;
+ }
+
+ for (i=0; i<count; i++) {
+ uint32_t num_records;
+
+ ret = ctdb_reply_control_db_push_confirm(reply[i],
+ &num_records);
+ if (ret != 0) {
+ fprintf(stderr, "Invalid response from node %u\n",
+ pnn_list[i]);
+ goto failed;
+ }
+
+ if (num_records != db_hdr.nrec) {
+ fprintf(stderr, "Node %u received %u of %lu records\n",
+ pnn_list[i], num_records, db_hdr.nrec);
+ goto failed;
+ }
+ }
+
+ ctdb_req_control_db_set_healthy(&request, wipedb.db_id);
+ ret = ctdb_client_control_multi(mem_ctx, ctdb->ev, ctdb->client,
+ pnn_list, count, TIMEOUT(),
+ &request, NULL, NULL);
+ if (ret != 0) {
+ goto failed;
+ }
+
+ ctdb_req_control_db_transaction_commit(&request, &wipedb);
+ ret = ctdb_client_control_multi(mem_ctx, ctdb->ev, ctdb->client,
+ pnn_list, count, TIMEOUT(),
+ &request, NULL, NULL);
+ if (ret != 0) {
+ goto failed;
+ }
+
+ ctdb_req_control_db_thaw(&request, wipedb.db_id);
+ ret = ctdb_client_control_multi(mem_ctx, ctdb->ev,
+ ctdb->client, pnn_list, count,
+ TIMEOUT(), &request, NULL, NULL);
+ if (ret != 0) {
+ goto failed;
+ }
+
+ printf("Database %s restored\n", db_name);
+ close(fd);
+ return 0;
+
+
+failed:
+ close(fd);
+ ctdb_ctrl_set_recmode(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->pnn, TIMEOUT(), CTDB_RECOVERY_ACTIVE);
+ return ret;
+}
+
+struct dumpdbbackup_state {
+ ctdb_rec_parser_func_t parser;
+ struct dump_record_state sub_state;
+};
+
+static int dumpdbbackup_handler(uint32_t reqid,
+ struct ctdb_ltdb_header *header,
+ TDB_DATA key, TDB_DATA data,
+ void *private_data)
+{
+ struct dumpdbbackup_state *state =
+ (struct dumpdbbackup_state *)private_data;
+ struct ctdb_ltdb_header hdr;
+ int ret;
+
+ ret = ctdb_ltdb_header_extract(&data, &hdr);
+ if (ret != 0) {
+ return ret;
+ }
+
+ return state->parser(reqid, &hdr, key, data, &state->sub_state);
+}
+
+static int control_dumpdbbackup(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ struct db_header db_hdr;
+ char timebuf[128];
+ struct dumpdbbackup_state state;
+ ssize_t n;
+ unsigned long i;
+ int fd, ret;
+
+ if (argc != 1) {
+ usage("dumpbackup");
+ }
+
+ fd = open(argv[0], O_RDONLY, 0600);
+ if (fd == -1) {
+ ret = errno;
+ fprintf(stderr, "Failed to open file %s for reading\n",
+ argv[0]);
+ return ret;
+ }
+
+ n = read(fd, &db_hdr, sizeof(struct db_header));
+ if (n == -1) {
+ ret = errno;
+ close(fd);
+ fprintf(stderr, "Failed to read db header from file %s\n",
+ argv[0]);
+ return ret;
+ }
+ db_hdr.name[sizeof(db_hdr.name)-1] = '\0';
+
+ if (db_hdr.version != DB_VERSION) {
+ fprintf(stderr,
+ "Wrong version of backup file, expected %u, got %lu\n",
+ DB_VERSION, db_hdr.version);
+ close(fd);
+ return EINVAL;
+ }
+
+ strftime(timebuf, sizeof(timebuf)-1, "%Y/%m/%d %H:%M:%S",
+ localtime(&db_hdr.timestamp));
+ printf("Dumping database %s from backup @ %s\n",
+ db_hdr.name, timebuf);
+
+ state.parser = dump_record;
+ state.sub_state.count = 0;
+
+ for (i=0; i<db_hdr.nbuf; i++) {
+ struct ctdb_rec_buffer *recbuf;
+
+ ret = ctdb_rec_buffer_read(fd, mem_ctx, &recbuf);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to read records\n");
+ close(fd);
+ return ret;
+ }
+
+ ret = ctdb_rec_buffer_traverse(recbuf, dumpdbbackup_handler,
+ &state);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to dump records\n");
+ close(fd);
+ return ret;
+ }
+ }
+
+ close(fd);
+ printf("Dumped %u record(s)\n", state.sub_state.count);
+ return 0;
+}
+
+static int control_wipedb(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ const char *db_name;
+ struct ctdb_db_context *db;
+ uint32_t db_id;
+ uint8_t db_flags;
+ struct ctdb_node_map *nodemap;
+ struct ctdb_req_control request;
+ struct ctdb_transdb wipedb;
+ uint32_t generation;
+ uint32_t *pnn_list;
+ int count, ret;
+
+ if (argc != 1) {
+ usage("wipedb");
+ }
+
+ if (! db_exists(mem_ctx, ctdb, argv[0], &db_id, &db_name, &db_flags)) {
+ return 1;
+ }
+
+ ret = ctdb_attach(ctdb->ev, ctdb->client, TIMEOUT(), db_name,
+ db_flags, &db);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to attach to DB %s\n", db_name);
+ return ret;
+ }
+
+ nodemap = get_nodemap(ctdb, false);
+ if (nodemap == NULL) {
+ fprintf(stderr, "Failed to get nodemap\n");
+ return ENOMEM;
+ }
+
+ ret = get_generation(mem_ctx, ctdb, &generation);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to get current generation\n");
+ return ret;
+ }
+
+ count = list_of_active_nodes(nodemap, CTDB_UNKNOWN_PNN, mem_ctx,
+ &pnn_list);
+ if (count <= 0) {
+ return ENOMEM;
+ }
+
+ ctdb_req_control_db_freeze(&request, db_id);
+ ret = ctdb_client_control_multi(mem_ctx, ctdb->ev,
+ ctdb->client, pnn_list, count,
+ TIMEOUT(), &request, NULL, NULL);
+ if (ret != 0) {
+ goto failed;
+ }
+
+ wipedb.db_id = db_id;
+ wipedb.tid = generation;
+
+ ctdb_req_control_db_transaction_start(&request, &wipedb);
+ ret = ctdb_client_control_multi(mem_ctx, ctdb->ev, ctdb->client,
+ pnn_list, count, TIMEOUT(),
+ &request, NULL, NULL);
+ if (ret != 0) {
+ goto failed;
+ }
+
+ ctdb_req_control_wipe_database(&request, &wipedb);
+ ret = ctdb_client_control_multi(mem_ctx, ctdb->ev, ctdb->client,
+ pnn_list, count, TIMEOUT(),
+ &request, NULL, NULL);
+ if (ret != 0) {
+ goto failed;
+ }
+
+ ctdb_req_control_db_set_healthy(&request, db_id);
+ ret = ctdb_client_control_multi(mem_ctx, ctdb->ev, ctdb->client,
+ pnn_list, count, TIMEOUT(),
+ &request, NULL, NULL);
+ if (ret != 0) {
+ goto failed;
+ }
+
+ ctdb_req_control_db_transaction_commit(&request, &wipedb);
+ ret = ctdb_client_control_multi(mem_ctx, ctdb->ev, ctdb->client,
+ pnn_list, count, TIMEOUT(),
+ &request, NULL, NULL);
+ if (ret != 0) {
+ goto failed;
+ }
+
+ ctdb_req_control_db_thaw(&request, db_id);
+ ret = ctdb_client_control_multi(mem_ctx, ctdb->ev,
+ ctdb->client, pnn_list, count,
+ TIMEOUT(), &request, NULL, NULL);
+ if (ret != 0) {
+ goto failed;
+ }
+
+ printf("Database %s wiped\n", db_name);
+ return 0;
+
+
+failed:
+ ctdb_ctrl_set_recmode(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->pnn, TIMEOUT(), CTDB_RECOVERY_ACTIVE);
+ return ret;
+}
+
+static int control_leader(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ int argc,
+ const char **argv)
+{
+ uint32_t leader;
+ int ret;
+
+ ret = get_leader(mem_ctx, ctdb, &leader);
+ if (ret != 0) {
+ return ret;
+ }
+
+ print_pnn(leader);
+
+ return 0;
+}
+
+static int control_event(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ char *t, *event_helper = NULL;
+
+ t = getenv("CTDB_EVENT_HELPER");
+ if (t != NULL) {
+ event_helper = talloc_strdup(mem_ctx, t);
+ } else {
+ event_helper = talloc_asprintf(mem_ctx, "%s/ctdb-event",
+ CTDB_HELPER_BINDIR);
+ }
+
+ if (event_helper == NULL) {
+ fprintf(stderr, "Unable to set event daemon helper\n");
+ return 1;
+ }
+
+ return run_helper(mem_ctx, "event daemon helper", event_helper,
+ argc, argv);
+}
+
+static int control_scriptstatus(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ const char *new_argv[4];
+
+ if (argc > 1) {
+ usage("scriptstatus");
+ }
+
+ new_argv[0] = "status";
+ new_argv[1] = "legacy";
+ new_argv[2] = (argc == 0) ? "monitor" : argv[0];
+ new_argv[3] = NULL;
+
+ (void) control_event(mem_ctx, ctdb, 3, new_argv);
+ return 0;
+}
+
+static int control_natgw(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ char *t, *natgw_helper = NULL;
+
+ if (argc != 1) {
+ usage("natgw");
+ }
+
+ t = getenv("CTDB_NATGW_HELPER");
+ if (t != NULL) {
+ natgw_helper = talloc_strdup(mem_ctx, t);
+ } else {
+ natgw_helper = talloc_asprintf(mem_ctx, "%s/ctdb_natgw",
+ CTDB_HELPER_BINDIR);
+ }
+
+ if (natgw_helper == NULL) {
+ fprintf(stderr, "Unable to set NAT gateway helper\n");
+ return 1;
+ }
+
+ return run_helper(mem_ctx, "NAT gateway helper", natgw_helper,
+ argc, argv);
+}
+
+/*
+ * Find the PNN of the current node
+ * discover the pnn by loading the nodes file and try to bind
+ * to all addresses one at a time until the ip address is found.
+ */
+static bool find_node_xpnn(TALLOC_CTX *mem_ctx, uint32_t *pnn)
+{
+ struct ctdb_node_map *nodemap;
+ unsigned int i;
+
+ nodemap = read_nodes_file(mem_ctx, CTDB_UNKNOWN_PNN);
+ if (nodemap == NULL) {
+ return false;
+ }
+
+ for (i=0; i<nodemap->num; i++) {
+ if (nodemap->node[i].flags & NODE_FLAGS_DELETED) {
+ continue;
+ }
+ if (ctdb_sys_have_ip(&nodemap->node[i].addr)) {
+ if (pnn != NULL) {
+ *pnn = nodemap->node[i].pnn;
+ }
+ talloc_free(nodemap);
+ return true;
+ }
+ }
+
+ fprintf(stderr, "Failed to detect PNN of the current node.\n");
+ talloc_free(nodemap);
+ return false;
+}
+
+static int control_getreclock(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ const char *reclock;
+ int ret;
+
+ if (argc != 0) {
+ usage("getreclock");
+ }
+
+ ret = ctdb_ctrl_get_reclock_file(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), &reclock);
+ if (ret != 0) {
+ return ret;
+ }
+
+ if (reclock != NULL) {
+ printf("%s\n", reclock);
+ }
+
+ return 0;
+}
+
+static int control_setlmasterrole(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ uint32_t lmasterrole = 0;
+ int ret;
+
+ if (argc != 1) {
+ usage("setlmasterrole");
+ }
+
+ if (strcmp(argv[0], "on") == 0) {
+ lmasterrole = 1;
+ } else if (strcmp(argv[0], "off") == 0) {
+ lmasterrole = 0;
+ } else {
+ usage("setlmasterrole");
+ }
+
+ ret = ctdb_ctrl_set_lmasterrole(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), lmasterrole);
+ if (ret != 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+static int control_setleaderrole(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ int argc,
+ const char **argv)
+{
+ uint32_t leaderrole = 0;
+ int ret;
+
+ if (argc != 1) {
+ usage("setleaderrole");
+ }
+
+ if (strcmp(argv[0], "on") == 0) {
+ leaderrole = 1;
+ } else if (strcmp(argv[0], "off") == 0) {
+ leaderrole = 0;
+ } else {
+ usage("setleaderrole");
+ }
+
+ ret = ctdb_ctrl_set_recmasterrole(mem_ctx,
+ ctdb->ev,
+ ctdb->client,
+ ctdb->cmd_pnn,
+ TIMEOUT(),
+ leaderrole);
+ if (ret != 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+static int control_setdbreadonly(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ uint32_t db_id;
+ uint8_t db_flags;
+ int ret;
+
+ if (argc != 1) {
+ usage("setdbreadonly");
+ }
+
+ if (! db_exists(mem_ctx, ctdb, argv[0], &db_id, NULL, &db_flags)) {
+ return 1;
+ }
+
+ if (db_flags & (CTDB_DB_FLAGS_PERSISTENT | CTDB_DB_FLAGS_REPLICATED)) {
+ fprintf(stderr, "READONLY can be set only on volatile DB\n");
+ return 1;
+ }
+
+ ret = ctdb_ctrl_set_db_readonly(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), db_id);
+ if (ret != 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+static int control_setdbsticky(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ uint32_t db_id;
+ uint8_t db_flags;
+ int ret;
+
+ if (argc != 1) {
+ usage("setdbsticky");
+ }
+
+ if (! db_exists(mem_ctx, ctdb, argv[0], &db_id, NULL, &db_flags)) {
+ return 1;
+ }
+
+ if (db_flags & (CTDB_DB_FLAGS_PERSISTENT | CTDB_DB_FLAGS_REPLICATED)) {
+ fprintf(stderr, "STICKY can be set only on volatile DB\n");
+ return 1;
+ }
+
+ ret = ctdb_ctrl_set_db_sticky(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), db_id);
+ if (ret != 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+static int control_pfetch(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ const char *db_name;
+ struct ctdb_db_context *db;
+ struct ctdb_transaction_handle *h;
+ uint8_t db_flags;
+ TDB_DATA key, data;
+ int ret;
+
+ if (argc != 2) {
+ usage("pfetch");
+ }
+
+ if (! db_exists(mem_ctx, ctdb, argv[0], NULL, &db_name, &db_flags)) {
+ return 1;
+ }
+
+ if (! (db_flags &
+ (CTDB_DB_FLAGS_PERSISTENT | CTDB_DB_FLAGS_REPLICATED))) {
+ fprintf(stderr, "Transactions not supported on DB %s\n",
+ db_name);
+ return 1;
+ }
+
+ ret = ctdb_attach(ctdb->ev, ctdb->client, TIMEOUT(), db_name,
+ db_flags, &db);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to attach to DB %s\n", db_name);
+ return ret;
+ }
+
+ ret = str_to_data(argv[1], strlen(argv[1]), mem_ctx, &key);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to parse key %s\n", argv[1]);
+ return ret;
+ }
+
+ ret = ctdb_transaction_start(mem_ctx, ctdb->ev, ctdb->client,
+ TIMEOUT(), db, true, &h);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to start transaction on db %s\n",
+ db_name);
+ return ret;
+ }
+
+ ret = ctdb_transaction_fetch_record(h, key, mem_ctx, &data);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to read record for key %s\n",
+ argv[1]);
+ ctdb_transaction_cancel(h);
+ return ret;
+ }
+
+ printf("%.*s\n", (int)data.dsize, data.dptr);
+
+ ctdb_transaction_cancel(h);
+ return 0;
+}
+
+static int control_pstore(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ const char *db_name;
+ struct ctdb_db_context *db;
+ struct ctdb_transaction_handle *h;
+ uint8_t db_flags;
+ TDB_DATA key, data;
+ int ret;
+
+ if (argc != 3) {
+ usage("pstore");
+ }
+
+ if (! db_exists(mem_ctx, ctdb, argv[0], NULL, &db_name, &db_flags)) {
+ return 1;
+ }
+
+ if (! (db_flags &
+ (CTDB_DB_FLAGS_PERSISTENT | CTDB_DB_FLAGS_REPLICATED))) {
+ fprintf(stderr, "Transactions not supported on DB %s\n",
+ db_name);
+ return 1;
+ }
+
+ ret = ctdb_attach(ctdb->ev, ctdb->client, TIMEOUT(), db_name,
+ db_flags, &db);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to attach to DB %s\n", db_name);
+ return ret;
+ }
+
+ ret = str_to_data(argv[1], strlen(argv[1]), mem_ctx, &key);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to parse key %s\n", argv[1]);
+ return ret;
+ }
+
+ ret = str_to_data(argv[2], strlen(argv[2]), mem_ctx, &data);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to parse value %s\n", argv[2]);
+ return ret;
+ }
+
+ ret = ctdb_transaction_start(mem_ctx, ctdb->ev, ctdb->client,
+ TIMEOUT(), db, false, &h);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to start transaction on db %s\n",
+ db_name);
+ return ret;
+ }
+
+ ret = ctdb_transaction_store_record(h, key, data);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to store record for key %s\n",
+ argv[1]);
+ ctdb_transaction_cancel(h);
+ return ret;
+ }
+
+ ret = ctdb_transaction_commit(h);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to commit transaction on db %s\n",
+ db_name);
+ ctdb_transaction_cancel(h);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int control_pdelete(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ const char *db_name;
+ struct ctdb_db_context *db;
+ struct ctdb_transaction_handle *h;
+ uint8_t db_flags;
+ TDB_DATA key;
+ int ret;
+
+ if (argc != 2) {
+ usage("pdelete");
+ }
+
+ if (! db_exists(mem_ctx, ctdb, argv[0], NULL, &db_name, &db_flags)) {
+ return 1;
+ }
+
+ if (! (db_flags &
+ (CTDB_DB_FLAGS_PERSISTENT | CTDB_DB_FLAGS_REPLICATED))) {
+ fprintf(stderr, "Transactions not supported on DB %s\n",
+ db_name);
+ return 1;
+ }
+
+ ret = ctdb_attach(ctdb->ev, ctdb->client, TIMEOUT(), db_name,
+ db_flags, &db);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to attach to DB %s\n", db_name);
+ return ret;
+ }
+
+ ret = str_to_data(argv[1], strlen(argv[1]), mem_ctx, &key);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to parse key %s\n", argv[1]);
+ return ret;
+ }
+
+ ret = ctdb_transaction_start(mem_ctx, ctdb->ev, ctdb->client,
+ TIMEOUT(), db, false, &h);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to start transaction on db %s\n",
+ db_name);
+ return ret;
+ }
+
+ ret = ctdb_transaction_delete_record(h, key);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to delete record for key %s\n",
+ argv[1]);
+ ctdb_transaction_cancel(h);
+ return ret;
+ }
+
+ ret = ctdb_transaction_commit(h);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to commit transaction on db %s\n",
+ db_name);
+ ctdb_transaction_cancel(h);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int ptrans_parse_string(TALLOC_CTX *mem_ctx, const char **ptr, TDB_DATA *data)
+{
+ const char *t;
+ size_t n;
+ int ret;
+
+ *data = tdb_null;
+
+ /* Skip whitespace */
+ n = strspn(*ptr, " \t");
+ t = *ptr + n;
+
+ if (t[0] == '"') {
+ /* Quoted ASCII string - no wide characters! */
+ t++;
+ n = strcspn(t, "\"");
+ if (t[n] == '"') {
+ if (n > 0) {
+ ret = str_to_data(t, n, mem_ctx, data);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+ *ptr = t + n + 1;
+ } else {
+ fprintf(stderr, "Unmatched \" in input %s\n", *ptr);
+ return 1;
+ }
+ } else {
+ fprintf(stderr, "Unsupported input format in %s\n", *ptr);
+ return 1;
+ }
+
+ return 0;
+}
+
+#define MAX_LINE_SIZE 1024
+
+static bool ptrans_get_key_value(TALLOC_CTX *mem_ctx, FILE *file,
+ TDB_DATA *key, TDB_DATA *value)
+{
+ char line [MAX_LINE_SIZE]; /* FIXME: make this more flexible? */
+ const char *ptr;
+ int ret;
+
+ ptr = fgets(line, MAX_LINE_SIZE, file);
+ if (ptr == NULL) {
+ return false;
+ }
+
+ /* Get key */
+ ret = ptrans_parse_string(mem_ctx, &ptr, key);
+ if (ret != 0 || ptr == NULL || key->dptr == NULL) {
+ /* Line Ignored but not EOF */
+ *key = tdb_null;
+ return true;
+ }
+
+ /* Get value */
+ ret = ptrans_parse_string(mem_ctx, &ptr, value);
+ if (ret != 0) {
+ /* Line Ignored but not EOF */
+ talloc_free(key->dptr);
+ *key = tdb_null;
+ return true;
+ }
+
+ return true;
+}
+
+static int control_ptrans(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ const char *db_name;
+ struct ctdb_db_context *db;
+ struct ctdb_transaction_handle *h;
+ uint8_t db_flags;
+ FILE *file;
+ TDB_DATA key = tdb_null, value = tdb_null;
+ int ret;
+
+ if (argc < 1 || argc > 2) {
+ usage("ptrans");
+ }
+
+ if (! db_exists(mem_ctx, ctdb, argv[0], NULL, &db_name, &db_flags)) {
+ return 1;
+ }
+
+ if (! (db_flags &
+ (CTDB_DB_FLAGS_PERSISTENT | CTDB_DB_FLAGS_REPLICATED))) {
+ fprintf(stderr, "Transactions not supported on DB %s\n",
+ db_name);
+ return 1;
+ }
+
+ if (argc == 2) {
+ file = fopen(argv[1], "r");
+ if (file == NULL) {
+ fprintf(stderr, "Failed to open file %s\n", argv[1]);
+ return 1;
+ }
+ } else {
+ file = stdin;
+ }
+
+ ret = ctdb_attach(ctdb->ev, ctdb->client, TIMEOUT(), db_name,
+ db_flags, &db);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to attach to DB %s\n", db_name);
+ goto done;
+ }
+
+ ret = ctdb_transaction_start(mem_ctx, ctdb->ev, ctdb->client,
+ TIMEOUT(), db, false, &h);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to start transaction on db %s\n",
+ db_name);
+ goto done;
+ }
+
+ while (ptrans_get_key_value(mem_ctx, file, &key, &value)) {
+ if (key.dsize != 0) {
+ ret = ctdb_transaction_store_record(h, key, value);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to store record\n");
+ ctdb_transaction_cancel(h);
+ goto done;
+ }
+ talloc_free(key.dptr);
+ talloc_free(value.dptr);
+ }
+ }
+
+ ret = ctdb_transaction_commit(h);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to commit transaction on db %s\n",
+ db_name);
+ ctdb_transaction_cancel(h);
+ }
+
+done:
+ if (file != stdin) {
+ fclose(file);
+ }
+ return ret;
+}
+
+static int control_tfetch(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ struct tdb_context *tdb;
+ TDB_DATA key, data;
+ struct ctdb_ltdb_header header;
+ int ret;
+
+ if (argc < 2 || argc > 3) {
+ usage("tfetch");
+ }
+
+ tdb = tdb_open(argv[0], 0, 0, O_RDWR, 0);
+ if (tdb == NULL) {
+ fprintf(stderr, "Failed to open TDB file %s\n", argv[0]);
+ return 1;
+ }
+
+ ret = str_to_data(argv[1], strlen(argv[1]), mem_ctx, &key);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to parse key %s\n", argv[1]);
+ tdb_close(tdb);
+ return ret;
+ }
+
+ data = tdb_fetch(tdb, key);
+ if (data.dptr == NULL) {
+ fprintf(stderr, "No record for key %s\n", argv[1]);
+ tdb_close(tdb);
+ return 1;
+ }
+
+ if (data.dsize < sizeof(struct ctdb_ltdb_header)) {
+ fprintf(stderr, "Invalid record for key %s\n", argv[1]);
+ tdb_close(tdb);
+ return 1;
+ }
+
+ tdb_close(tdb);
+
+ if (argc == 3) {
+ int fd;
+ ssize_t nwritten;
+
+ fd = open(argv[2], O_WRONLY|O_CREAT|O_TRUNC, 0600);
+ if (fd == -1) {
+ fprintf(stderr, "Failed to open output file %s\n",
+ argv[2]);
+ goto fail;
+ }
+
+ nwritten = sys_write(fd, data.dptr, data.dsize);
+ if (nwritten == -1 ||
+ (size_t)nwritten != data.dsize) {
+ fprintf(stderr, "Failed to write record to file\n");
+ close(fd);
+ goto fail;
+ }
+
+ close(fd);
+ }
+
+fail:
+ ret = ctdb_ltdb_header_extract(&data, &header);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to parse header from data\n");
+ return 1;
+ }
+
+ dump_ltdb_header(&header);
+ dump_tdb_data("data", data);
+
+ return 0;
+}
+
+static int control_tstore(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ struct tdb_context *tdb;
+ TDB_DATA key, data[2], value;
+ struct ctdb_ltdb_header header;
+ uint8_t header_buf[sizeof(struct ctdb_ltdb_header)];
+ size_t np;
+ int ret = 0;
+
+ if (argc < 3 || argc > 5) {
+ usage("tstore");
+ }
+
+ tdb = tdb_open(argv[0], 0, 0, O_RDWR, 0);
+ if (tdb == NULL) {
+ fprintf(stderr, "Failed to open TDB file %s\n", argv[0]);
+ return 1;
+ }
+
+ ret = str_to_data(argv[1], strlen(argv[1]), mem_ctx, &key);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to parse key %s\n", argv[1]);
+ tdb_close(tdb);
+ return ret;
+ }
+
+ ret = str_to_data(argv[2], strlen(argv[2]), mem_ctx, &value);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to parse value %s\n", argv[2]);
+ tdb_close(tdb);
+ return ret;
+ }
+
+ ZERO_STRUCT(header);
+
+ if (argc > 3) {
+ header.rsn = (uint64_t)smb_strtoull(argv[3],
+ NULL,
+ 0,
+ &ret,
+ SMB_STR_STANDARD);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+ if (argc > 4) {
+ header.dmaster = (uint32_t)atol(argv[4]);
+ }
+ if (argc > 5) {
+ header.flags = (uint32_t)atol(argv[5]);
+ }
+
+ ctdb_ltdb_header_push(&header, header_buf, &np);
+
+ data[0].dsize = np;
+ data[0].dptr = header_buf;
+
+ data[1].dsize = value.dsize;
+ data[1].dptr = value.dptr;
+
+ ret = tdb_storev(tdb, key, data, 2, TDB_REPLACE);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to write record %s to file %s\n",
+ argv[1], argv[0]);
+ }
+
+ tdb_close(tdb);
+
+ return ret;
+}
+
+static int control_readkey(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ const char *db_name;
+ struct ctdb_db_context *db;
+ struct ctdb_record_handle *h;
+ uint8_t db_flags;
+ TDB_DATA key, data;
+ bool readonly = false;
+ int ret;
+
+ if (argc < 2 || argc > 3) {
+ usage("readkey");
+ }
+
+ if (argc == 3) {
+ if (strcmp(argv[2], "readonly") == 0) {
+ readonly = true;
+ } else {
+ usage("readkey");
+ }
+ }
+
+ if (! db_exists(mem_ctx, ctdb, argv[0], NULL, &db_name, &db_flags)) {
+ return 1;
+ }
+
+ if (db_flags & (CTDB_DB_FLAGS_PERSISTENT | CTDB_DB_FLAGS_REPLICATED)) {
+ fprintf(stderr, "DB %s is not a volatile database\n",
+ db_name);
+ return 1;
+ }
+
+ ret = ctdb_attach(ctdb->ev, ctdb->client, TIMEOUT(), db_name,
+ db_flags, &db);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to attach to DB %s\n", db_name);
+ return ret;
+ }
+
+ ret = str_to_data(argv[1], strlen(argv[1]), mem_ctx, &key);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to parse key %s\n", argv[1]);
+ return ret;
+ }
+
+ ret = ctdb_fetch_lock(mem_ctx, ctdb->ev, ctdb->client,
+ db, key, readonly, &h, NULL, &data);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to read record for key %s\n",
+ argv[1]);
+ } else {
+ printf("Data: size:%zu ptr:[%.*s]\n", data.dsize,
+ (int)data.dsize, data.dptr);
+ }
+
+ talloc_free(h);
+ return ret;
+}
+
+static int control_writekey(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ const char *db_name;
+ struct ctdb_db_context *db;
+ struct ctdb_record_handle *h;
+ uint8_t db_flags;
+ TDB_DATA key, data;
+ int ret;
+
+ if (argc != 3) {
+ usage("writekey");
+ }
+
+ if (! db_exists(mem_ctx, ctdb, argv[0], NULL, &db_name, &db_flags)) {
+ return 1;
+ }
+
+ if (db_flags & (CTDB_DB_FLAGS_PERSISTENT | CTDB_DB_FLAGS_REPLICATED)) {
+ fprintf(stderr, "DB %s is not a volatile database\n",
+ db_name);
+ return 1;
+ }
+
+ ret = ctdb_attach(ctdb->ev, ctdb->client, TIMEOUT(), db_name,
+ db_flags, &db);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to attach to DB %s\n", db_name);
+ return ret;
+ }
+
+ ret = str_to_data(argv[1], strlen(argv[1]), mem_ctx, &key);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to parse key %s\n", argv[1]);
+ return ret;
+ }
+
+ ret = str_to_data(argv[2], strlen(argv[2]), mem_ctx, &data);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to parse value %s\n", argv[2]);
+ return ret;
+ }
+
+ ret = ctdb_fetch_lock(mem_ctx, ctdb->ev, ctdb->client,
+ db, key, false, &h, NULL, NULL);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to lock record for key %s\n", argv[0]);
+ return ret;
+ }
+
+ ret = ctdb_store_record(h, data);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to store record for key %s\n",
+ argv[1]);
+ }
+
+ talloc_free(h);
+ return ret;
+}
+
+static int control_deletekey(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ const char *db_name;
+ struct ctdb_db_context *db;
+ struct ctdb_record_handle *h;
+ uint8_t db_flags;
+ TDB_DATA key, data;
+ int ret;
+
+ if (argc != 2) {
+ usage("deletekey");
+ }
+
+ if (! db_exists(mem_ctx, ctdb, argv[0], NULL, &db_name, &db_flags)) {
+ return 1;
+ }
+
+ if (db_flags & (CTDB_DB_FLAGS_PERSISTENT | CTDB_DB_FLAGS_REPLICATED)) {
+ fprintf(stderr, "DB %s is not a volatile database\n",
+ db_name);
+ return 1;
+ }
+
+ ret = ctdb_attach(ctdb->ev, ctdb->client, TIMEOUT(), db_name,
+ db_flags, &db);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to attach to DB %s\n", db_name);
+ return ret;
+ }
+
+ ret = str_to_data(argv[1], strlen(argv[1]), mem_ctx, &key);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to parse key %s\n", argv[1]);
+ return ret;
+ }
+
+ ret = ctdb_fetch_lock(mem_ctx, ctdb->ev, ctdb->client,
+ db, key, false, &h, NULL, &data);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to fetch record for key %s\n",
+ argv[1]);
+ return ret;
+ }
+
+ ret = ctdb_delete_record(h);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to delete record for key %s\n",
+ argv[1]);
+ }
+
+ talloc_free(h);
+ return ret;
+}
+
+static int control_checktcpport(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ struct sockaddr_in sin;
+ unsigned int port;
+ int s, v;
+ int ret;
+
+ if (argc != 1) {
+ usage("chktcpport");
+ }
+
+ port = atoi(argv[0]);
+
+ s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
+ if (s == -1) {
+ fprintf(stderr, "Failed to open local socket\n");
+ return errno;
+ }
+
+ v = fcntl(s, F_GETFL, 0);
+ if (v == -1 || fcntl(s, F_SETFL, v | O_NONBLOCK)) {
+ fprintf(stderr, "Unable to set socket non-blocking\n");
+ close(s);
+ return errno;
+ }
+
+ bzero(&sin, sizeof(sin));
+ sin.sin_family = AF_INET;
+ sin.sin_port = htons(port);
+ ret = bind(s, (struct sockaddr *)&sin, sizeof(sin));
+ close(s);
+ if (ret == -1) {
+ fprintf(stderr, "Failed to bind to TCP port %u\n", port);
+ return errno;
+ }
+
+ return 0;
+}
+
+static int control_getdbseqnum(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ uint32_t db_id;
+ const char *db_name;
+ uint64_t seqnum;
+ int ret;
+
+ if (argc != 1) {
+ usage("getdbseqnum");
+ }
+
+ if (! db_exists(mem_ctx, ctdb, argv[0], &db_id, &db_name, NULL)) {
+ return 1;
+ }
+
+ ret = ctdb_ctrl_get_db_seqnum(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), db_id,
+ &seqnum);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to get sequence number for DB %s\n",
+ db_name);
+ return ret;
+ }
+
+ printf("0x%"PRIx64"\n", seqnum);
+ return 0;
+}
+
+static int control_nodestatus(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ const char *nodestring = NULL;
+ struct ctdb_node_map *nodemap_in;
+ struct ctdb_node_map *nodemap;
+ unsigned int i;
+ int ret;
+ bool print_hdr = false;
+
+ if (argc > 1) {
+ usage("nodestatus");
+ }
+
+ if (argc == 1) {
+ nodestring = argv[0];
+ if (strcmp(nodestring, "all") == 0) {
+ print_hdr = true;
+ }
+ }
+
+ if (! parse_nodestring(mem_ctx, ctdb, nodestring, &nodemap_in)) {
+ return 1;
+ }
+
+ nodemap = get_nodemap_unknown(mem_ctx, ctdb, nodemap_in);
+ if (nodemap == NULL) {
+ return 1;
+ }
+
+ if (options.machinereadable) {
+ print_nodemap_machine(mem_ctx, ctdb, nodemap, ctdb->cmd_pnn);
+ } else {
+ print_nodemap(mem_ctx, ctdb, nodemap, ctdb->cmd_pnn, print_hdr);
+ }
+
+ ret = 0;
+ for (i=0; i<nodemap->num; i++) {
+ uint32_t flags = nodemap->node[i].flags;
+
+ if ((flags & NODE_FLAGS_DELETED) != 0) {
+ continue;
+ }
+
+ ret |= flags;
+ }
+
+ return ret;
+}
+
+const struct {
+ const char *name;
+ uint32_t offset;
+} db_stats_fields[] = {
+#define DBSTATISTICS_FIELD(n) { #n, offsetof(struct ctdb_db_statistics, n) }
+ DBSTATISTICS_FIELD(db_ro_delegations),
+ DBSTATISTICS_FIELD(db_ro_revokes),
+ DBSTATISTICS_FIELD(locks.num_calls),
+ DBSTATISTICS_FIELD(locks.num_current),
+ DBSTATISTICS_FIELD(locks.num_pending),
+ DBSTATISTICS_FIELD(locks.num_failed),
+};
+
+static void print_dbstatistics(const char *db_name,
+ struct ctdb_db_statistics *s)
+{
+ size_t i;
+ const char *prefix = NULL;
+ int preflen = 0;
+
+ printf("DB Statistics %s\n", db_name);
+
+ for (i=0; i<ARRAY_SIZE(db_stats_fields); i++) {
+ if (strchr(db_stats_fields[i].name, '.') != NULL) {
+ preflen = strcspn(db_stats_fields[i].name, ".") + 1;
+ if (! prefix ||
+ strncmp(prefix, db_stats_fields[i].name, preflen) != 0) {
+ prefix = db_stats_fields[i].name;
+ printf(" %*.*s\n", preflen-1, preflen-1,
+ db_stats_fields[i].name);
+ }
+ } else {
+ preflen = 0;
+ }
+ printf(" %*s%-22s%*s%10u\n", preflen ? 4 : 0, "",
+ db_stats_fields[i].name+preflen, preflen ? 0 : 4, "",
+ *(uint32_t *)(db_stats_fields[i].offset+(uint8_t *)s));
+ }
+
+ printf(" hop_count_buckets:");
+ for (i=0; i<MAX_COUNT_BUCKETS; i++) {
+ printf(" %d", s->hop_count_bucket[i]);
+ }
+ printf("\n");
+
+ printf(" lock_buckets:");
+ for (i=0; i<MAX_COUNT_BUCKETS; i++) {
+ printf(" %d", s->locks.buckets[i]);
+ }
+ printf("\n");
+
+ printf(" %-30s %.6f/%.6f/%.6f sec out of %d\n",
+ "locks_latency MIN/AVG/MAX",
+ s->locks.latency.min, LATENCY_AVG(s->locks.latency),
+ s->locks.latency.max, s->locks.latency.num);
+
+ printf(" %-30s %.6f/%.6f/%.6f sec out of %d\n",
+ "vacuum_latency MIN/AVG/MAX",
+ s->vacuum.latency.min, LATENCY_AVG(s->vacuum.latency),
+ s->vacuum.latency.max, s->vacuum.latency.num);
+
+ printf(" Num Hot Keys: %d\n", s->num_hot_keys);
+ for (i=0; i<s->num_hot_keys; i++) {
+ size_t j;
+ printf(" Count:%d Key:", s->hot_keys[i].count);
+ for (j=0; j<s->hot_keys[i].key.dsize; j++) {
+ printf("%02x", s->hot_keys[i].key.dptr[j] & 0xff);
+ }
+ printf("\n");
+ }
+}
+
+static int control_dbstatistics(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ uint32_t db_id;
+ const char *db_name;
+ struct ctdb_db_statistics *dbstats;
+ int ret;
+
+ if (argc != 1) {
+ usage("dbstatistics");
+ }
+
+ if (! db_exists(mem_ctx, ctdb, argv[0], &db_id, &db_name, NULL)) {
+ return 1;
+ }
+
+ ret = ctdb_ctrl_get_db_statistics(mem_ctx, ctdb->ev, ctdb->client,
+ ctdb->cmd_pnn, TIMEOUT(), db_id,
+ &dbstats);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to get statistics for DB %s\n",
+ db_name);
+ return ret;
+ }
+
+ print_dbstatistics(db_name, dbstats);
+ return 0;
+}
+
+struct disable_takeover_runs_state {
+ uint32_t *pnn_list;
+ unsigned int node_count;
+ bool *reply;
+ int status;
+ bool done;
+};
+
+static void disable_takeover_run_handler(uint64_t srvid, TDB_DATA data,
+ void *private_data)
+{
+ struct disable_takeover_runs_state *state =
+ (struct disable_takeover_runs_state *)private_data;
+ unsigned int i;
+ int ret;
+
+ if (data.dsize != sizeof(int)) {
+ /* Ignore packet */
+ return;
+ }
+
+ /* ret will be a PNN (i.e. >=0) on success, or negative on error */
+ ret = *(int *)data.dptr;
+ if (ret < 0) {
+ state->status = ret;
+ state->done = true;
+ return;
+ }
+ for (i=0; i<state->node_count; i++) {
+ if (state->pnn_list[i] == (uint32_t)ret) {
+ state->reply[i] = true;
+ break;
+ }
+ }
+
+ state->done = true;
+ for (i=0; i<state->node_count; i++) {
+ if (! state->reply[i]) {
+ state->done = false;
+ break;
+ }
+ }
+}
+
+static int disable_takeover_runs(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb, uint32_t timeout,
+ uint32_t *pnn_list, int count)
+{
+ struct ctdb_disable_message disable = { 0 };
+ struct disable_takeover_runs_state state;
+ int ret, i;
+
+ disable.pnn = ctdb->pnn;
+ disable.srvid = next_srvid(ctdb);
+ disable.timeout = timeout;
+
+ state.pnn_list = pnn_list;
+ state.node_count = count;
+ state.done = false;
+ state.status = 0;
+ state.reply = talloc_zero_array(mem_ctx, bool, count);
+ if (state.reply == NULL) {
+ return ENOMEM;
+ }
+
+ ret = ctdb_client_set_message_handler(ctdb->ev, ctdb->client,
+ disable.srvid,
+ disable_takeover_run_handler,
+ &state);
+ if (ret != 0) {
+ return ret;
+ }
+
+ for (i=0; i<count; i++) {
+ ret = ctdb_message_disable_takeover_runs(mem_ctx, ctdb->ev,
+ ctdb->client,
+ pnn_list[i],
+ &disable);
+ if (ret != 0) {
+ goto fail;
+ }
+ }
+
+ ret = ctdb_client_wait_timeout(ctdb->ev, &state.done, TIMEOUT());
+ if (ret == ETIME) {
+ fprintf(stderr, "Timed out waiting to disable takeover runs\n");
+ } else {
+ ret = (state.status >= 0 ? 0 : 1);
+ }
+
+fail:
+ ctdb_client_remove_message_handler(ctdb->ev, ctdb->client,
+ disable.srvid, &state);
+ return ret;
+}
+
+static int send_ipreallocated_control_to_nodes(TALLOC_CTX *mem_ctx,
+ struct ctdb_context *ctdb,
+ uint32_t *pnn_list,
+ int count)
+{
+ struct ctdb_req_control request;
+ int ret;
+
+ ctdb_req_control_ipreallocated(&request);
+ ret = ctdb_client_control_multi(mem_ctx,
+ ctdb->ev,
+ ctdb->client,
+ pnn_list,
+ count,
+ TIMEOUT(),
+ &request,
+ NULL, /* perr_list */
+ NULL); /* preply */
+ if (ret != 0) {
+ fprintf(stderr, "Failed to send ipreallocated\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int control_reloadips(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
+ int argc, const char **argv)
+{
+ const char *nodestring = NULL;
+ struct ctdb_node_map *nodemap, *nodemap2;
+ struct ctdb_req_control request;
+ uint32_t *pnn_list, *pnn_list2;
+ int ret, count, count2;
+
+ if (argc > 1) {
+ usage("reloadips");
+ }
+
+ if (argc == 1) {
+ nodestring = argv[0];
+ }
+
+ nodemap = get_nodemap(ctdb, false);
+ if (nodemap == NULL) {
+ return 1;
+ }
+
+ if (! parse_nodestring(mem_ctx, ctdb, nodestring, &nodemap2)) {
+ return 1;
+ }
+
+ count = list_of_connected_nodes(nodemap, CTDB_UNKNOWN_PNN,
+ mem_ctx, &pnn_list);
+ if (count <= 0) {
+ fprintf(stderr, "Memory allocation error\n");
+ return 1;
+ }
+
+ count2 = list_of_active_nodes(nodemap2, CTDB_UNKNOWN_PNN,
+ mem_ctx, &pnn_list2);
+ if (count2 <= 0) {
+ fprintf(stderr, "Memory allocation error\n");
+ return 1;
+ }
+
+ /* Disable takeover runs on all connected nodes. A reply
+ * indicating success is needed from each node so all nodes
+ * will need to be active.
+ *
+ * A check could be added to not allow reloading of IPs when
+ * there are disconnected nodes. However, this should
+ * probably be left up to the administrator.
+ */
+ ret = disable_takeover_runs(mem_ctx, ctdb, 2*options.timelimit,
+ pnn_list, count);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to disable takeover runs\n");
+ return ret;
+ }
+
+ /* Now tell all the desired nodes to reload their public IPs.
+ * Keep trying this until it succeeds. This assumes all
+ * failures are transient, which might not be true...
+ */
+ ctdb_req_control_reload_public_ips(&request);
+ ret = ctdb_client_control_multi(mem_ctx, ctdb->ev, ctdb->client,
+ pnn_list2, count2, TIMEOUT(),
+ &request, NULL, NULL);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to reload IPs on some nodes.\n");
+ }
+
+ /* It isn't strictly necessary to wait until takeover runs are
+ * re-enabled but doing so can't hurt.
+ */
+ ret = disable_takeover_runs(mem_ctx, ctdb, 0, pnn_list, count);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to enable takeover runs\n");
+ return ret;
+ }
+
+ return ipreallocate(mem_ctx, ctdb);
+}
+
+
+static const struct ctdb_cmd {
+ const char *name;
+ int (*fn)(TALLOC_CTX *, struct ctdb_context *, int, const char **);
+ bool without_daemon; /* can be run without daemon running ? */
+ bool remote; /* can be run on remote nodes */
+ const char *msg;
+ const char *args;
+} ctdb_commands[] = {
+ { "version", control_version, true, false,
+ "show version of ctdb", NULL },
+ { "status", control_status, false, true,
+ "show node status", NULL },
+ { "uptime", control_uptime, false, true,
+ "show node uptime", NULL },
+ { "ping", control_ping, false, true,
+ "ping a node", NULL },
+ { "runstate", control_runstate, false, true,
+ "get/check runstate of a node",
+ "[setup|first_recovery|startup|running]" },
+ { "getvar", control_getvar, false, true,
+ "get a tunable variable", "<name>" },
+ { "setvar", control_setvar, false, true,
+ "set a tunable variable", "<name> <value>" },
+ { "listvars", control_listvars, false, true,
+ "list tunable variables", NULL },
+ { "statistics", control_statistics, false, true,
+ "show ctdb statistics", NULL },
+ { "statisticsreset", control_statistics_reset, false, true,
+ "reset ctdb statistics", NULL },
+ { "stats", control_stats, false, true,
+ "show rolling statistics", "[count]" },
+ { "ip", control_ip, false, true,
+ "show public ips", "[all]" },
+ { "ipinfo", control_ipinfo, false, true,
+ "show public ip details", "<ip>" },
+ { "ifaces", control_ifaces, false, true,
+ "show interfaces", NULL },
+ { "setifacelink", control_setifacelink, false, true,
+ "set interface link status", "<iface> up|down" },
+ { "process-exists", control_process_exists, false, true,
+ "check if a process exists on a node", "<pid> [<srvid>]" },
+ { "getdbmap", control_getdbmap, false, true,
+ "show attached databases", NULL },
+ { "getdbstatus", control_getdbstatus, false, true,
+ "show database status", "<dbname|dbid>" },
+ { "catdb", control_catdb, false, false,
+ "dump cluster-wide ctdb database", "<dbname|dbid>" },
+ { "cattdb", control_cattdb, false, false,
+ "dump local ctdb database", "<dbname|dbid>" },
+ { "getcapabilities", control_getcapabilities, false, true,
+ "show node capabilities", NULL },
+ { "pnn", control_pnn, false, false,
+ "show the pnn of the current node", NULL },
+ { "lvs", control_lvs, false, false,
+ "show lvs configuration", "leader|list|status" },
+ { "setdebug", control_setdebug, false, true,
+ "set debug level", "ERROR|WARNING|NOTICE|INFO|DEBUG" },
+ { "getdebug", control_getdebug, false, true,
+ "get debug level", NULL },
+ { "attach", control_attach, false, false,
+ "attach a database", "<dbname> [persistent|replicated]" },
+ { "detach", control_detach, false, false,
+ "detach database(s)", "<dbname|dbid> ..." },
+ { "dumpmemory", control_dumpmemory, false, true,
+ "dump ctdbd memory map", NULL },
+ { "rddumpmemory", control_rddumpmemory, false, true,
+ "dump recoverd memory map", NULL },
+ { "getpid", control_getpid, false, true,
+ "get ctdbd process ID", NULL },
+ { "disable", control_disable, false, true,
+ "disable a node", NULL },
+ { "enable", control_enable, false, true,
+ "enable a node", NULL },
+ { "stop", control_stop, false, true,
+ "stop a node", NULL },
+ { "continue", control_continue, false, true,
+ "continue a stopped node", NULL },
+ { "ban", control_ban, false, true,
+ "ban a node", "<bantime>"},
+ { "unban", control_unban, false, true,
+ "unban a node", NULL },
+ { "shutdown", control_shutdown, false, true,
+ "shutdown ctdb daemon", NULL },
+ { "recover", control_recover, false, true,
+ "force recovery", NULL },
+ { "sync", control_ipreallocate, false, true,
+ "run ip reallocation (deprecated)", NULL },
+ { "ipreallocate", control_ipreallocate, false, true,
+ "run ip reallocation", NULL },
+ { "gratarp", control_gratarp, false, true,
+ "send a gratuitous arp", "<ip> <interface>" },
+ { "tickle", control_tickle, true, false,
+ "send a tcp tickle ack", "<srcip:port> <dstip:port>" },
+ { "gettickles", control_gettickles, false, true,
+ "get the list of tickles", "<ip> [<port>]" },
+ { "addtickle", control_addtickle, false, true,
+ "add a tickle", "<ip>:<port> <ip>:<port>" },
+ { "deltickle", control_deltickle, false, true,
+ "delete a tickle", "<ip>:<port> <ip>:<port>" },
+ { "listnodes", control_listnodes, true, true,
+ "list nodes in the cluster", NULL },
+ { "reloadnodes", control_reloadnodes, false, false,
+ "reload the nodes file all nodes", NULL },
+ { "moveip", control_moveip, false, false,
+ "move an ip address to another node", "<ip> <node>" },
+ { "addip", control_addip, false, true,
+ "add an ip address to a node", "<ip/mask> <iface>" },
+ { "delip", control_delip, false, true,
+ "delete an ip address from a node", "<ip>" },
+ { "backupdb", control_backupdb, false, false,
+ "backup a database into a file", "<dbname|dbid> <file>" },
+ { "restoredb", control_restoredb, false, false,
+ "restore a database from a file", "<file> [dbname]" },
+ { "dumpdbbackup", control_dumpdbbackup, true, false,
+ "dump database from a backup file", "<file>" },
+ { "wipedb", control_wipedb, false, false,
+ "wipe the contents of a database.", "<dbname|dbid>"},
+ { "leader", control_leader, false, true,
+ "show the pnn of the leader", NULL },
+ { "event", control_event, true, false,
+ "event and event script commands", NULL },
+ { "scriptstatus", control_scriptstatus, true, false,
+ "show event script status",
+ "[init|setup|startup|monitor|takeip|releaseip|ipreallocated]" },
+ { "natgw", control_natgw, false, false,
+ "show natgw configuration", "leader|list|status" },
+ { "getreclock", control_getreclock, false, true,
+ "get recovery lock file", NULL },
+ { "setlmasterrole", control_setlmasterrole, false, true,
+ "set LMASTER role", "on|off" },
+ { "setleaderrole", control_setleaderrole, false, true,
+ "set LEADER role", "on|off"},
+ { "setdbreadonly", control_setdbreadonly, false, true,
+ "enable readonly records", "<dbname|dbid>" },
+ { "setdbsticky", control_setdbsticky, false, true,
+ "enable sticky records", "<dbname|dbid>"},
+ { "pfetch", control_pfetch, false, false,
+ "fetch record from persistent database", "<dbname|dbid> <key>" },
+ { "pstore", control_pstore, false, false,
+ "write record to persistent database", "<dbname|dbid> <key> <value>" },
+ { "pdelete", control_pdelete, false, false,
+ "delete record from persistent database", "<dbname|dbid> <key>" },
+ { "ptrans", control_ptrans, false, false,
+ "update a persistent database (from file or stdin)", "<dbname|dbid> [<file>]" },
+ { "tfetch", control_tfetch, false, true,
+ "fetch a record", "<tdb-file> <key> [<file>]" },
+ { "tstore", control_tstore, false, true,
+ "store a record", "<tdb-file> <key> <data> [<rsn> <dmaster> <flags>]" },
+ { "readkey", control_readkey, false, false,
+ "read value of a database key", "<dbname|dbid> <key> [readonly]" },
+ { "writekey", control_writekey, false, false,
+ "write value for a database key", "<dbname|dbid> <key> <value>" },
+ { "deletekey", control_deletekey, false, false,
+ "delete a database key", "<dbname|dbid> <key>" },
+ { "checktcpport", control_checktcpport, true, false,
+ "check if a service is bound to a specific tcp port or not", "<port>" },
+ { "getdbseqnum", control_getdbseqnum, false, false,
+ "get database sequence number", "<dbname|dbid>" },
+ { "nodestatus", control_nodestatus, false, true,
+ "show and return node status", "[all|<pnn-list>]" },
+ { "dbstatistics", control_dbstatistics, false, true,
+ "show database statistics", "<dbname|dbid>" },
+ { "reloadips", control_reloadips, false, false,
+ "reload the public addresses file", "[all|<pnn-list>]" },
+};
+
+static const struct ctdb_cmd *match_command(const char *command)
+{
+ const struct ctdb_cmd *cmd;
+ size_t i;
+
+ for (i=0; i<ARRAY_SIZE(ctdb_commands); i++) {
+ cmd = &ctdb_commands[i];
+ if (strcmp(command, cmd->name) == 0) {
+ return cmd;
+ }
+ }
+
+ return NULL;
+}
+
+
+/**
+ * Show usage message
+ */
+static void usage_full(void)
+{
+ size_t i;
+
+ poptPrintHelp(pc, stdout, 0);
+ printf("\nCommands:\n");
+ for (i=0; i<ARRAY_SIZE(ctdb_commands); i++) {
+ printf(" %-15s %-27s %s\n",
+ ctdb_commands[i].name,
+ ctdb_commands[i].args ? ctdb_commands[i].args : "",
+ ctdb_commands[i].msg);
+ }
+}
+
+static void usage(const char *command)
+{
+ const struct ctdb_cmd *cmd;
+
+ if (command == NULL) {
+ usage_full();
+ exit(1);
+ }
+
+ cmd = match_command(command);
+ if (cmd == NULL) {
+ usage_full();
+ } else {
+ poptPrintUsage(pc, stdout, 0);
+ printf("\nCommands:\n");
+ printf(" %-15s %-27s %s\n",
+ cmd->name, cmd->args ? cmd->args : "", cmd->msg);
+ }
+
+ exit(1);
+}
+
+struct poptOption cmdline_options[] = {
+ POPT_AUTOHELP
+ {
+ .longName = "debug",
+ .shortName = 'd',
+ .argInfo = POPT_ARG_STRING,
+ .arg = &options.debuglevelstr,
+ .val = 0,
+ .descrip = "debug level",
+ },
+ {
+ .longName = "timelimit",
+ .shortName = 't',
+ .argInfo = POPT_ARG_INT,
+ .arg = &options.timelimit,
+ .val = 0,
+ .descrip = "timelimit (in seconds)",
+ },
+ {
+ .longName = "node",
+ .shortName = 'n',
+ .argInfo = POPT_ARG_INT,
+ .arg = &options.pnn,
+ .val = 0,
+ .descrip = "node specification - integer",
+ },
+ {
+ .longName = NULL,
+ .shortName = 'Y',
+ .argInfo = POPT_ARG_NONE,
+ .arg = &options.machinereadable,
+ .val = 0,
+ .descrip = "enable machine readable output",
+ },
+ {
+ .longName = "separator",
+ .shortName = 'x',
+ .argInfo = POPT_ARG_STRING,
+ .arg = &options.sep,
+ .val = 0,
+ .descrip = "specify separator for machine readable output",
+ .argDescrip = "CHAR",
+ },
+ {
+ .shortName = 'X',
+ .argInfo = POPT_ARG_NONE,
+ .arg = &options.machineparsable,
+ .val = 0,
+ .descrip = "enable machine parsable output with separator |",
+ },
+ {
+ .longName = "verbose",
+ .shortName = 'v',
+ .argInfo = POPT_ARG_NONE,
+ .arg = &options.verbose,
+ .val = 0,
+ .descrip = "enable verbose output",
+ },
+ {
+ .longName = "maxruntime",
+ .shortName = 'T',
+ .argInfo = POPT_ARG_INT,
+ .arg = &options.maxruntime,
+ .val = 0,
+ .descrip = "die if runtime exceeds this limit (in seconds)",
+ },
+ POPT_TABLEEND
+};
+
+static int process_command(const struct ctdb_cmd *cmd, int argc,
+ const char **argv)
+{
+ TALLOC_CTX *tmp_ctx;
+ struct ctdb_context *ctdb;
+ const char *ctdb_socket;
+ int ret;
+ bool status;
+ uint64_t srvid_offset;
+
+ tmp_ctx = talloc_new(NULL);
+ if (tmp_ctx == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ goto fail;
+ }
+
+ if (cmd->without_daemon) {
+ if (options.pnn != -1) {
+ fprintf(stderr,
+ "Cannot specify node for command %s\n",
+ cmd->name);
+ goto fail;
+ }
+
+ ret = cmd->fn(tmp_ctx, NULL, argc-1, argv+1);
+ talloc_free(tmp_ctx);
+ return ret;
+ }
+
+ ctdb = talloc_zero(tmp_ctx, struct ctdb_context);
+ if (ctdb == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ goto fail;
+ }
+
+ ctdb->ev = tevent_context_init(ctdb);
+ if (ctdb->ev == NULL) {
+ fprintf(stderr, "Failed to initialize tevent\n");
+ goto fail;
+ }
+
+ ctdb_socket = path_socket(ctdb, "ctdbd");
+ if (ctdb_socket == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ goto fail;
+ }
+
+ ret = ctdb_client_init(ctdb, ctdb->ev, ctdb_socket, &ctdb->client);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to connect to CTDB daemon (%s)\n",
+ ctdb_socket);
+
+ if (!find_node_xpnn(ctdb, NULL)) {
+ fprintf(stderr, "Is this node part of CTDB cluster?\n");
+ }
+ goto fail;
+ }
+
+ ctdb->pnn = ctdb_client_pnn(ctdb->client);
+ srvid_offset = getpid() & 0xFFFF;
+ ctdb->srvid = SRVID_CTDB_TOOL | (srvid_offset << 16);
+
+ if (options.pnn != -1) {
+ status = verify_pnn(ctdb, options.pnn);
+ if (! status) {
+ goto fail;
+ }
+
+ ctdb->cmd_pnn = options.pnn;
+ } else {
+ ctdb->cmd_pnn = ctdb->pnn;
+ }
+
+ if (! cmd->remote && ctdb->pnn != ctdb->cmd_pnn) {
+ fprintf(stderr, "Node cannot be specified for command %s\n",
+ cmd->name);
+ goto fail;
+ }
+
+ ctdb->leader_pnn = CTDB_UNKNOWN_PNN;
+ ret = ctdb_client_set_message_handler(ctdb->ev,
+ ctdb->client,
+ CTDB_SRVID_LEADER,
+ leader_handler,
+ ctdb);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to setup leader handler\n");
+ goto fail;
+ }
+
+ ret = cmd->fn(tmp_ctx, ctdb, argc-1, argv+1);
+ talloc_free(tmp_ctx);
+ return ret;
+
+fail:
+ talloc_free(tmp_ctx);
+ return 1;
+}
+
+static void signal_handler(int sig)
+{
+ fprintf(stderr, "Maximum runtime exceeded - exiting\n");
+}
+
+static void alarm_handler(int sig)
+{
+ /* Kill any child processes */
+ signal(SIGTERM, signal_handler);
+ kill(0, SIGTERM);
+
+ _exit(1);
+}
+
+int main(int argc, const char *argv[])
+{
+ int opt;
+ const char **extra_argv;
+ int extra_argc;
+ const struct ctdb_cmd *cmd;
+ const char *test_mode;
+ int loglevel;
+ bool ok;
+ int ret = 0;
+
+ setlinebuf(stdout);
+
+ /* Set default options */
+ options.debuglevelstr = NULL;
+ options.timelimit = 10;
+ options.sep = "|";
+ options.maxruntime = 0;
+ options.pnn = -1;
+
+ pc = poptGetContext(argv[0], argc, argv, cmdline_options,
+ POPT_CONTEXT_KEEP_FIRST);
+ while ((opt = poptGetNextOpt(pc)) != -1) {
+ fprintf(stderr, "Invalid option %s: %s\n",
+ poptBadOption(pc, 0), poptStrerror(opt));
+ exit(1);
+ }
+
+ if (options.maxruntime == 0) {
+ const char *ctdb_timeout;
+
+ ctdb_timeout = getenv("CTDB_TIMEOUT");
+ if (ctdb_timeout != NULL) {
+ options.maxruntime = smb_strtoul(ctdb_timeout,
+ NULL,
+ 0,
+ &ret,
+ SMB_STR_STANDARD);
+ if (ret != 0) {
+ fprintf(stderr, "Invalid value CTDB_TIMEOUT\n");
+ exit(1);
+ }
+ } else {
+ options.maxruntime = 120;
+ }
+ }
+
+ if (options.machineparsable) {
+ options.machinereadable = 1;
+ }
+
+ /* setup the remaining options for the commands */
+ extra_argc = 0;
+ extra_argv = poptGetArgs(pc);
+ if (extra_argv) {
+ extra_argv++;
+ while (extra_argv[extra_argc]) extra_argc++;
+ }
+
+ if (extra_argc < 1) {
+ usage(NULL);
+ }
+
+ cmd = match_command(extra_argv[0]);
+ if (cmd == NULL) {
+ fprintf(stderr, "Unknown command '%s'\n", extra_argv[0]);
+ exit(1);
+ }
+
+ /* Enable logging */
+ setup_logging("ctdb", DEBUG_STDERR);
+ ok = debug_level_parse(options.debuglevelstr, &loglevel);
+ if (!ok) {
+ loglevel = DEBUG_ERR;
+ }
+ debuglevel_set(loglevel);
+
+ /* Stop process group kill in alarm_handler() from killing tests */
+ test_mode = getenv("CTDB_TEST_MODE");
+ if (test_mode != NULL) {
+ const char *have_setpgid = getenv("CTDB_TOOL_SETPGID");
+ if (have_setpgid == NULL) {
+ setpgid(0, 0);
+ setenv("CTDB_TOOL_SETPGID", "1", 1);
+ }
+ }
+
+ signal(SIGALRM, alarm_handler);
+ alarm(options.maxruntime);
+
+ ret = process_command(cmd, extra_argc, extra_argv);
+ if (ret == -1) {
+ ret = 1;
+ }
+
+ (void)poptFreeContext(pc);
+
+ return ret;
+}
diff --git a/ctdb/tools/ctdb_diagnostics b/ctdb/tools/ctdb_diagnostics
new file mode 100755
index 0000000..d16a71c
--- /dev/null
+++ b/ctdb/tools/ctdb_diagnostics
@@ -0,0 +1,346 @@
+#!/bin/sh
+# a script to test the basic setup of a CTDB/Samba install
+# tridge@samba.org September 2007
+# martin@meltin.net August 2010
+
+usage ()
+{
+ cat >&2 <<EOF
+Usage: ctdb_diagnostics [OPTION] ...
+ options:
+ -n <nodes> Comma separated list of nodes to operate on
+ -c Ignore comment lines (starting with '#') in file comparisons
+ -w Ignore whitespace in file comparisons
+ --no-ads Do not use commands that assume an Active Directory Server
+EOF
+ exit 1
+
+}
+
+nodes=$(ctdb listnodes -X | cut -d'|' -f2)
+bad_nodes=""
+diff_opts=
+no_ads=false
+
+parse_options ()
+{
+ temp=$(getopt -n "ctdb_diagnostics" -o "n:cwh" -l no-ads,help -- "$@")
+
+ # No! Checking the exit code afterwards is actually clearer...
+ # shellcheck disable=SC2181
+ [ $? -eq 0 ] || usage
+
+ eval set -- "$temp"
+
+ while true ; do
+ case "$1" in
+ -n) nodes=$(echo "$2" | sed -e 's@,@ @g') ; shift 2 ;;
+ -c) diff_opts="${diff_opts} -I ^#.*" ; shift ;;
+ -w) diff_opts="${diff_opts} -w" ; shift ;;
+ --no-ads) no_ads=true ; shift ;;
+ --) shift ; break ;;
+ -h|--help|*) usage ;;
+ esac
+ done
+
+ [ $# -ne 0 ] && usage
+}
+
+parse_options "$@"
+
+# Use 5s ssh timeout if EXTRA_SSH_OPTS doesn't set a timeout.
+case "$EXTRA_SSH_OPTS" in
+ *ConnectTimeout=*) : ;;
+ *)
+ export EXTRA_SSH_OPTS="${EXTRA_SSH_OPTS} -o ConnectTimeout=5"
+esac
+
+# Filter nodes. Remove any nodes we can't contact from $node and add
+# them to $bad_nodes.
+_nodes=""
+for _i in $nodes ; do
+ if onnode "$_i" true >/dev/null 2>&1 ; then
+ _nodes="${_nodes}${_nodes:+ }${_i}"
+ else
+ bad_nodes="${bad_nodes}${bad_nodes:+,}${_i}"
+ fi
+done
+nodes="$_nodes"
+
+nodes_comma=$(echo "$nodes" | sed -e 's@[[:space:]]@,@g')
+
+PATH="$PATH:/sbin:/usr/sbin:/usr/lpp/mmfs/bin"
+
+# list of config files that must exist and that we check are the same
+# on the nodes
+if [ -d /etc/sysconfig ] ; then
+ CONFIG_FILES_MUST="/etc/krb5.conf /etc/hosts /usr/local/etc/ctdb/nodes /etc/sysconfig/ctdb /etc/resolv.conf /etc/nsswitch.conf /etc/sysctl.conf /etc/samba/smb.conf /etc/fstab /etc/multipath.conf /etc/pam.d/system-auth /etc/sysconfig/nfs /etc/exports /etc/vsftpd/vsftpd.conf"
+else
+ CONFIG_FILES_MUST="/etc/krb5.conf /etc/hosts /usr/local/etc/ctdb/nodes /etc/default/ctdb /etc/resolv.conf /etc/nsswitch.conf /etc/sysctl.conf /etc/samba/smb.conf /etc/fstab /etc/multipath.conf /etc/pam.d/system-auth /etc/default/nfs /etc/exports /etc/vsftpd/vsftpd.conf"
+fi
+
+# list of config files that may exist and should be checked that they
+# are the same on the nodes
+CONFIG_FILES_MAY="/usr/local/etc/ctdb/public_addresses /usr/local/etc/ctdb/static-routes"
+
+exec 2>&1
+
+cat <<EOF
+--------------------------------------------------------------------
+ctdb_diagnostics starting. This script will gather information about
+your ctdb cluster. You should send the output of this script along
+with any ctdb or clustered Samba bug reports.
+--------------------------------------------------------------------
+EOF
+
+date
+
+error() {
+ msg="$1"
+ echo "ERROR: $msg"
+ NUM_ERRORS=$((NUM_ERRORS + 1))
+ echo " ERROR[$NUM_ERRORS]: $msg" >> "$ERRORS"
+}
+
+show_file() {
+ fname="$1"
+ _fdetails=$(ls -l "$fname" 2>&1)
+ echo " ================================"
+ echo " File: $fname"
+ echo " $_fdetails"
+ sed 's/^/ /' "$fname" 2>&1
+ echo " ================================"
+}
+
+show_all() {
+ echo "running $1 on nodes $nodes_comma"
+ onnode "$nodes_comma" "hostname; date; $1 2>&1 | sed 's/^/ /'" 2>&1
+}
+
+show_and_compare_files () {
+
+ fmt="$1" ; shift
+
+ for f ; do
+ _bf=$(basename "$f")
+ first=true
+
+ for n in $nodes ; do
+
+ if $first ; then
+ onnode "$n" [ -r "$f" ] || {
+ # This function takes a format string
+ # shellcheck disable=SC2059
+ msg=$(printf "$fmt" "$f" "$n")
+ error "$msg"
+ continue 2;
+ }
+
+ fstf="${tmpdir}/${_bf}.node${n}"
+ onnode "$n" cat "$f" >"$fstf" 2>&1
+
+ _fdetails=$(onnode "$n" ls -l "$f" 2>&1)
+ echo " ================================"
+ echo " File (on node $n): $f"
+ echo " $_fdetails"
+ sed 's/^/ /' "$fstf"
+ echo " ================================"
+ first=false
+ else
+ echo "Testing for same config file $f on node $n"
+ tmpf="${tmpdir}/${_bf}.node${n}"
+ onnode "$n" cat "$f" >"$tmpf" 2>&1
+ # Intentional multi-word splitting on diff_opts
+ # shellcheck disable=SC2086
+ diff $diff_opts "$fstf" "$tmpf" >/dev/null 2>&1 || {
+ error "File $f is different on node $n"
+ diff -u $diff_opts "$fstf" "$tmpf"
+ }
+ rm -f "$tmpf"
+ fi
+ done
+
+ rm -f "$fstf"
+ done
+}
+
+if ! tmpdir=$(mktemp -d) ; then
+ echo "Unable to create a temporary directory"
+ exit 1
+fi
+ERRORS="${tmpdir}/diag_err"
+NUM_ERRORS=0
+
+cat <<EOF
+Diagnosis started on these nodes:
+$nodes_comma
+EOF
+
+if [ -n "$bad_nodes" ] ; then
+ cat <<EOF
+
+NOT RUNNING DIAGNOSTICS on these uncontactable nodes:
+$bad_nodes
+EOF
+
+fi
+
+cat <<EOF
+
+For reference, here is the nodes file on the current node...
+EOF
+
+show_file /usr/local/etc/ctdb/nodes
+
+cat <<EOF
+--------------------------------------------------------------------
+Comping critical config files on nodes $nodes_comma
+EOF
+
+# Intentional multi-word splitting on CONFIG_FILES_MUST
+# shellcheck disable=SC2086
+show_and_compare_files \
+ "%s is missing on node %d" \
+ $CONFIG_FILES_MUST
+
+# Intentional multi-word splitting on CONFIG_FILES_MAY
+# shellcheck disable=SC2086
+show_and_compare_files \
+ "Optional file %s is not present on node %d" \
+ $CONFIG_FILES_MAY
+
+cat <<EOF
+--------------------------------------------------------------------
+Checking for clock drift
+EOF
+t=$(date +%s)
+for i in $nodes; do
+ t2=$(onnode "$i" date +%s)
+ d=$((t2 - t))
+ if [ "$d" -gt 30 ] || [ "$d" -lt -30 ]; then
+ error "time on node $i differs by $d seconds"
+ fi
+done
+
+cat <<EOF
+--------------------------------------------------------------------
+Showing software versions
+EOF
+show_all "uname -a"
+[ -x /bin/rpm ] && {
+ show_all "rpm -qa | grep -E 'samba|ctdb|gpfs'"
+}
+[ -x /usr/bin/dpkg-query ] && {
+ show_all "/usr/bin/dpkg-query --show 'ctdb'"
+ show_all "/usr/bin/dpkg-query --show 'samba'"
+ #show_all "/usr/bin/dpkg-query --show 'gpfs'"
+}
+
+
+cat <<EOF
+--------------------------------------------------------------------
+Showing ctdb status and recent log entries
+EOF
+show_all "ctdb status; ctdb ip"
+show_all "ctdb statistics"
+show_all "ctdb uptime"
+show_all "ctdb listvars"
+show_all "ctdb getdbmap"
+show_all "ctdb -X getdbmap | awk -F'|' 'NR > 1 {print \$3}' | sort | xargs -n 1 ctdb dbstatistics"
+
+echo "Showing log.ctdb"
+show_all "test -f /usr/local/var/log/log.ctdb && tail -100 /usr/local/var/log/log.ctdb"
+
+show_all "tail -200 /var/log/messages"
+show_all "ls -lRs /usr/local/var/lib/ctdb"
+show_all "ls -lRs /usr/local/etc/ctdb"
+
+
+cat <<EOF
+--------------------------------------------------------------------
+Showing system and process status
+EOF
+show_all "df"
+show_all "df -i"
+show_all "mount"
+show_all "w"
+show_all "ps axfwu"
+show_all "dmesg"
+show_all "/sbin/lspci"
+show_all "dmidecode"
+show_all "cat /proc/partitions"
+show_all "cat /proc/cpuinfo"
+show_all "cat /proc/scsi/scsi"
+show_all "/sbin/ifconfig -a"
+show_all "/sbin/ifconfig -a"
+show_all "cat /proc/net/dev"
+show_all "/sbin/ip addr list"
+show_all "/sbin/route -n"
+show_all "ss -s"
+show_all "free"
+show_all "crontab -l"
+show_all "sysctl -a"
+show_all "iptables -L -n"
+show_all "iptables -L -n -t nat"
+show_all "/usr/sbin/rpcinfo -p"
+show_all "/usr/sbin/showmount -a"
+show_all "/usr/sbin/showmount -e"
+show_all "/usr/sbin/nfsstat -v"
+[ -x /sbin/multipath ] && {
+ show_all "/sbin/multipath -ll"
+}
+[ -x /sbin/chkconfig ] && {
+ show_all "/sbin/chkconfig --list"
+}
+[ -x /usr/sbin/getenforce ] && {
+ show_all "/usr/sbin/getenforce"
+}
+[ -d /proc/net/bonding ] && {
+ for f in /proc/net/bonding/*; do
+ show_all "cat $f"
+ done
+}
+
+cat <<EOF
+--------------------------------------------------------------------
+Showing Samba status
+EOF
+show_all "smbstatus -n -B"
+if $no_ads ; then
+ echo
+ echo "Skipping \"net ads testjoin\" as requested"
+ echo
+else
+ show_all "net ads testjoin"
+fi
+show_all "net conf list"
+show_all "lsof -n | grep smbd"
+show_all "lsof -n | grep ctdbd"
+show_all "netstat -tan"
+if $no_ads ; then
+ echo
+ echo "Skipping \"net ads info\" as requested"
+ echo
+else
+ show_all "net ads info"
+fi
+show_all "date"
+show_all "smbclient -U% -L 127.0.0.1"
+WORKGROUP=$(testparm -s --parameter-name=WORKGROUP 2> /dev/null)
+show_all id "$WORKGROUP/Administrator"
+show_all "wbinfo -p"
+show_all "wbinfo --online-status"
+show_all "smbd -b"
+
+date
+echo "Diagnostics finished with $NUM_ERRORS errors"
+
+[ -r "$ERRORS" ] && {
+ cat "$ERRORS"
+ rm -f "$ERRORS"
+}
+
+rm -rf "$tmpdir"
+
+exit $NUM_ERRORS
+
diff --git a/ctdb/tools/ctdb_killtcp.c b/ctdb/tools/ctdb_killtcp.c
new file mode 100644
index 0000000..fd07f4b
--- /dev/null
+++ b/ctdb/tools/ctdb_killtcp.c
@@ -0,0 +1,418 @@
+/*
+ CTDB TCP connection killing utility
+
+ Copyright (C) Martin Schwenke <martin@meltin.net> 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/debug.h"
+#include "lib/util/tevent_unix.h"
+
+#include "protocol/protocol.h"
+#include "protocol/protocol_util.h"
+
+#include "common/db_hash.h"
+#include "common/system_socket.h"
+#include "common/logging.h"
+
+
+struct reset_connections_state {
+ struct tevent_context *ev;
+ int capture_fd;
+ struct tevent_fd *fde;
+ struct db_hash_context *connections;
+ void *private_data;
+ unsigned int attempts;
+ unsigned int max_attempts;
+ struct timeval retry_interval;
+ unsigned int batch_count;
+ unsigned int batch_size;
+};
+
+
+static void reset_connections_capture_tcp_handler(struct tevent_context *ev,
+ struct tevent_fd *fde,
+ uint16_t flags,
+ void *private_data);
+static void reset_connections_batch(struct tevent_req *subreq);
+static int reset_connections_tickle_connection(
+ uint8_t *keybuf, size_t keylen,
+ uint8_t *databuf, size_t datalen,
+ void *private_data);
+
+static struct tevent_req *reset_connections_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ const char *iface,
+ struct ctdb_connection_list *conn_list)
+{
+ struct tevent_req *req, *subreq;
+ struct reset_connections_state *state;
+ unsigned int i;
+ int ret;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct reset_connections_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+
+ if (conn_list->num == 0) {
+ /* No connections, done! */
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+ }
+
+ ret = db_hash_init(state, "connections", 2048, DB_HASH_SIMPLE,
+ &state->connections);
+ if (ret != 0) {
+ D_ERR("Failed to initialise connection hash (%s)\n",
+ strerror(ret));
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ DBG_DEBUG("Adding %u connections to hash\n", conn_list->num);
+ for (i = 0; i < conn_list->num; i++) {
+ struct ctdb_connection *c = &conn_list->conn[i];
+
+ DBG_DEBUG("Adding connection to hash: %s\n",
+ ctdb_connection_to_string(conn_list, c, true));
+
+ /* Connection is stored as a key in the connections hash */
+ ret = db_hash_add(state->connections,
+ (uint8_t *)discard_const(c), sizeof(*c),
+ NULL, 0);
+ if (ret != 0) {
+ D_ERR("Error adding connection to hash (%s)\n",
+ strerror(ret));
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+ }
+
+ state->attempts = 0;
+ state->max_attempts = 50;
+
+ state->retry_interval.tv_sec = 0;
+ state->retry_interval.tv_usec = 100 * 1000;
+
+ state->batch_count = 0;
+ state->batch_size = 300;
+
+ state->capture_fd =
+ ctdb_sys_open_capture_socket(iface, &state->private_data);
+ if (state->capture_fd == -1) {
+ D_ERR("Failed to open capture socket on iface '%s' (%s)\n",
+ iface, strerror(errno));
+ tevent_req_error(req, EIO);
+ return tevent_req_post(req, ev);
+ }
+
+ state->fde = tevent_add_fd(ev, state, state->capture_fd,
+ TEVENT_FD_READ,
+ reset_connections_capture_tcp_handler,
+ state);
+ if (tevent_req_nomem(state->fde, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_fd_set_auto_close(state->fde);
+
+ subreq = tevent_wakeup_send(state, ev, tevent_timeval_current_ofs(0,0));
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, reset_connections_batch, req);
+
+ return req;
+}
+
+/*
+ called when we get a read event on the raw socket
+ */
+static void reset_connections_capture_tcp_handler(struct tevent_context *ev,
+ struct tevent_fd *fde,
+ uint16_t flags,
+ void *private_data)
+{
+ struct reset_connections_state *state = talloc_get_type_abort(
+ private_data, struct reset_connections_state);
+ /* 0 the parts that don't get set by ctdb_sys_read_tcp_packet */
+ struct ctdb_connection conn;
+ uint32_t ack_seq, seq;
+ int rst;
+ uint16_t window;
+ int ret;
+
+ ret = ctdb_sys_read_tcp_packet(state->capture_fd,
+ state->private_data,
+ &conn.server, &conn.client,
+ &ack_seq, &seq, &rst, &window);
+ if (ret != 0) {
+ /* Not a TCP-ACK? Unexpected protocol? */
+ DBG_DEBUG("Failed to parse packet, errno=%d\n", ret);
+ return;
+ }
+
+ if (window == htons(1234) && (rst || seq == 0)) {
+ /* Ignore packets that we sent! */
+ DBG_DEBUG("Ignoring sent packet: %s, "
+ "seq=%"PRIu32", ack_seq=%"PRIu32", "
+ "rst=%d, window=%"PRIu16"\n",
+ ctdb_connection_to_string(state, &conn, false),
+ seq, ack_seq, rst, ntohs(window));
+ return;
+ }
+
+ /* Check if this connection is one being reset, if found then delete */
+ ret = db_hash_delete(state->connections,
+ (uint8_t*)&conn, sizeof(conn));
+ if (ret == ENOENT) {
+ /* Packet for some other connection, ignore */
+ DBG_DEBUG("Ignoring packet for unknown connection: %s\n",
+ ctdb_connection_to_string(state, &conn, true));
+ return;
+ }
+ if (ret != 0) {
+ DBG_WARNING("Internal error (%s)\n", strerror(ret));
+ return;
+ }
+
+ D_INFO("Sending a TCP RST for connection %s\n",
+ ctdb_connection_to_string(state, &conn, true));
+
+ ret = ctdb_sys_send_tcp(&conn.server, &conn.client, ack_seq, seq, 1);
+ if (ret != 0) {
+ DBG_ERR("Error sending TCP RST for connection\n");
+ }
+}
+
+/*
+ * Called periodically until all sentenced connections have been reset
+ * or enough attempts have been made
+ */
+static void reset_connections_batch(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct reset_connections_state *state = tevent_req_data(
+ req, struct reset_connections_state);
+ bool status;
+ int count, ret;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+
+ if (! status) {
+ DBG_WARNING("Unexpected error on timer expiry\n");
+ /* Keep going... */
+ }
+
+ /* loop over up to batch_size connections sending tickle ACKs */
+ state->batch_count = 0;
+ ret = db_hash_traverse(state->connections,
+ reset_connections_tickle_connection,
+ state, NULL);
+ if (ret != 0) {
+ DBG_WARNING("Unexpected error traversing connections (%s)\n",
+ strerror(ret));
+ }
+
+ state->attempts++;
+
+ /*
+ * If there are no more connections to kill or we have tried
+ * too many times we're finished
+ */
+ ret = db_hash_traverse(state->connections, NULL, NULL, &count);
+ if (ret != 0) {
+ /* What now? Try again until max_attempts reached */
+ DBG_WARNING("Unexpected error traversing connections (%s)\n",
+ strerror(ret));
+ count = 1;
+ }
+ if (count == 0 ||
+ state->attempts >= state->max_attempts) {
+ tevent_req_done(req);
+ return;
+ }
+
+ /* Schedule next attempt */
+ subreq = tevent_wakeup_send(state, state->ev,
+ tevent_timeval_current_ofs(
+ state->retry_interval.tv_sec,
+ state->retry_interval.tv_usec));
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, reset_connections_batch, req);
+}
+
+static int reset_connections_tickle_connection(
+ uint8_t *keybuf, size_t keylen,
+ uint8_t *databuf, size_t datalen,
+ void *private_data)
+{
+ struct reset_connections_state *state = talloc_get_type_abort(
+ private_data, struct reset_connections_state);
+ struct ctdb_connection *conn;
+ int ret;
+
+ if (keylen != sizeof(*conn)) {
+ DBG_WARNING("Unexpected data in connection hash\n");
+ return 0;
+ }
+
+ conn = (struct ctdb_connection *)keybuf;
+
+ state->batch_count++;
+ if (state->batch_count > state->batch_size) {
+ /* Terminate the traverse */
+ return 1;
+ }
+
+ DBG_INFO("Sending tickle ACK for connection '%s'\n",
+ ctdb_connection_to_string(state, conn, true));
+ ret = ctdb_sys_send_tcp(&conn->server, &conn->client, 0, 0, 0);
+ if (ret != 0) {
+ DBG_ERR("Error sending tickle ACK\n");
+ /* continue */
+ }
+
+ return 0;
+}
+
+static bool reset_connections_recv(struct tevent_req *req, int *perr)
+{
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+static void usage(const char *prog)
+{
+ printf("usage: %s <interface> [ <srcip:port> <dstip:port> ]\n", prog);
+ exit(1);
+}
+
+int main(int argc, char **argv)
+{
+ struct ctdb_connection conn;
+ struct tevent_context *ev = NULL;
+ TALLOC_CTX *mem_ctx = NULL;
+ struct ctdb_connection_list *conn_list = NULL;
+ const char *t;
+ struct tevent_req *req;
+ int debug_level;
+ bool status;
+ bool ok;
+ int ret;
+
+ /* Set the debug level */
+ t = getenv("CTDB_DEBUGLEVEL");
+ if (t != NULL) {
+ ok = debug_level_parse(t, &debug_level);
+ if (!ok) {
+ debug_level = DEBUG_ERR;
+ }
+ debuglevel_set(debug_level);
+ }
+
+ if (argc != 2 && argc != 4) {
+ usage(argv[0]);
+ }
+
+ if (argc == 4) {
+ ret = ctdb_sock_addr_from_string(argv[2], &conn.client, true);
+ if (ret != 0) {
+ D_ERR("Bad IP:port '%s'\n", argv[2]);
+ goto fail;
+ }
+
+ ret = ctdb_sock_addr_from_string(argv[3], &conn.server, true);
+ if (ret != 0) {
+ D_ERR("Bad IP:port '%s'\n", argv[3]);
+ goto fail;
+ }
+
+
+ conn_list = talloc_zero(mem_ctx, struct ctdb_connection_list);
+ if (conn_list == NULL) {
+ ret = ENOMEM;
+ DBG_ERR("Internal error (%s)\n", strerror(ret));
+ goto fail;
+ }
+ ret = ctdb_connection_list_add(conn_list, &conn);
+ if (ret != 0) {
+ DBG_ERR("Internal error (%s)\n", strerror(ret));
+ goto fail;
+ }
+ } else {
+ ret = ctdb_connection_list_read(mem_ctx, 0, true, &conn_list);
+ if (ret != 0) {
+ D_ERR("Unable to parse connections (%s)\n",
+ strerror(ret));
+ goto fail;
+ }
+ }
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
+ goto fail;
+ }
+
+ ev = tevent_context_init(mem_ctx);
+ if (ev == NULL) {
+ DEBUG(DEBUG_ERR, ("Failed to initialise tevent\n"));
+ goto fail;
+ }
+
+ req = reset_connections_send(mem_ctx, ev, argv[1], conn_list);
+ if (req == NULL) {
+ goto fail;
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = reset_connections_recv(req, &ret);
+ if (! status) {
+ D_ERR("Failed to kill connections (%s)\n", strerror(ret));
+ goto fail;
+ }
+
+ talloc_free(mem_ctx);
+
+ return 0;
+
+fail:
+ TALLOC_FREE(mem_ctx);
+ return -1;
+}
diff --git a/ctdb/tools/ctdb_lvs b/ctdb/tools/ctdb_lvs
new file mode 100755
index 0000000..d0249b9
--- /dev/null
+++ b/ctdb/tools/ctdb_lvs
@@ -0,0 +1,204 @@
+#!/bin/sh
+
+if [ -z "$CTDB_BASE" ] ; then
+ export CTDB_BASE="/usr/local/etc/ctdb"
+fi
+
+. "${CTDB_BASE}/functions"
+
+load_script_options "failover" "91.lvs"
+
+# Default LVS nodes file location
+[ -n "$CTDB_LVS_NODES" ] || CTDB_LVS_NODES="${CTDB_BASE}/lvs_nodes"
+
+if [ -z "$CTDB" ] ; then
+ CTDB=ctdb
+fi
+
+############################################################
+
+usage ()
+{
+ cat <<EOF
+$0 <option>
+
+<option> is one of:
+ leader Display node number of leader node
+ list List node number and private IP address of usable nodes in group
+ status Show status of all nodes in LVS group
+EOF
+ exit 1
+}
+
+nodestatus_X=""
+# Fields are:
+# Node|IP|Disconnected|Unknown|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode
+get_nodestatus_X ()
+{
+ # Result is cached in global variable nodestatus_X
+ [ -n "$nodestatus_X" ] || \
+ nodestatus_X=$($CTDB -X nodestatus all |
+ sed -e '1d' -e 's@^|@@' -e 's@|$@@')
+}
+
+get_nodestatus ()
+{
+ # Result is cached in global variable nodestatus
+ [ -n "$nodestatus" ] || nodestatus=$($CTDB nodestatus all)
+ case $? in
+ # $CTDB nodestatus returns 255 on failure
+ 0|255) return 0 ;;
+ *) return 1 ;;
+ esac
+}
+
+get_lvs_nodes ()
+{
+ # Result is cached in global variable lvs_nodes
+ if [ -n "$lvs_nodes" ] ; then
+ return
+ fi
+
+ if [ ! -r "$CTDB_LVS_NODES" ] ; then
+ return 1
+ fi
+
+ lvs_nodes=$(cat "$CTDB_LVS_NODES") || return 1
+
+ # Sanity check file contents here
+ while read _ip _options ; do
+ # Skip comments
+ case "$_ip" in
+ \#*) continue ;;
+ esac
+ case "$_options" in
+ follower-only|"") : ;;
+ *) die "${prog}: Invalid options \"${_options}\" in \"$CTDB_LVS_NODES\""
+ esac
+ done <<EOF
+$lvs_nodes
+EOF
+
+ return 0
+}
+
+# Print PNN and IP address of given nodes meeting the criteria for
+# usable LVS nodes. That is, either those that are healthy or, if no
+# healthy nodes, then nodes that are active and not-disabled.
+# Return codes: 0 = nodes found, 255 = no nodes found, 10 = error.
+filter_nodes ()
+{
+ # $_ns is an @-delimited list of nodes to be considered
+ _ns="$1"
+
+ get_nodestatus_X
+ [ -n "$nodestatus_X" ] || return 10
+
+ # Now filter by $_ns and by status of nodes...
+
+ # Note that the 2 awk invocations below have "||" between
+ # them, so the first to succeed will print the nodes.
+
+ # First try for a fully active and healthy node, so must not
+ # be UNKNOWN, DISABLED, UNHEALTHY or INACTIVE (last covers
+ # DISCONNECTED, BANNED or STOPPED)
+ awk -F '|' -v ns="$_ns" '
+ BEGIN { ret = 255 }
+ ns ~ "@" $2 "@" && $4 == 0 && $6 == 0 && $7 == 0 && $9 == 0 {
+ print $1, $2 ; ret=0
+ }
+ END { exit ret }
+ ' <<EOF ||
+$nodestatus_X
+EOF
+ # Not found? UNHEALTHY do, so node must not be INACTIVE or
+ # DISABLED
+ awk -F '|' -v ns="$_ns" '
+ BEGIN { ret = 255 }
+ ns ~ "@" $2 "@" && $6 == 0 && $9 == 0 {
+ print $1, $2 ; ret=0
+ }
+ END { exit ret }
+ ' <<EOF
+$nodestatus_X
+EOF
+}
+
+# Print the PNN of the LVS leader node
+find_leader ()
+{
+ get_lvs_nodes || \
+ die "${prog}: LVS nodes file \"$CTDB_LVS_NODES\" not found"
+
+ # $_ms is an @-delimited list of nodes that are allowed to be the leader
+ _ms="@"
+ while read _ip _options ; do
+ case "$_options" in
+ "") _ms="${_ms}${_ip}@" ;;
+ esac
+ done <<EOF
+$lvs_nodes
+EOF
+
+ _leader_candidates=$(filter_nodes "$_ms") || return $?
+ echo "${_leader_candidates%% *}"
+}
+
+# List all usable nodes in the LVS group
+nodes_list ()
+{
+ get_lvs_nodes || \
+ die "${prog}: LVS nodes file \"$CTDB_LVS_NODES\" not found"
+
+ # $_ns is a @-delimited list of nodes in the LVS group
+ _ns="@"
+ while read _ip _options ; do
+ _ns="${_ns}${_ip}@"
+ done <<EOF
+$lvs_nodes
+EOF
+
+ _usable_nodes=$(filter_nodes "$_ns")
+ case $? in
+ 0) : ;;
+ 255) exit 0 ;; # Return 0 even if no usable nodes
+ *) exit 10 ;;
+ esac
+
+ awk '{ print $1, $2 }'<<EOF
+$_usable_nodes
+EOF
+}
+
+# Print the status of all nodes in the LVS group, along with a count
+nodes_status ()
+{
+ get_lvs_nodes || \
+ die "${prog}: LVS nodes file \"$CTDB_LVS_NODES\" not found"
+ get_nodestatus
+ [ -n "$nodestatus" ] || exit 10
+
+ # $_ns is a @-delimited list of nodes in the LVS group
+ _ns="@"
+ while read _ip _options ; do
+ _ns="${_ns}${_ip}@"
+ done <<EOF
+$lvs_nodes
+EOF
+
+ # Print status of nodes in $_ns, along with node count
+ awk -v ns="$_ns" 'ns ~ "@" $2 "@" { print }' <<EOF
+$nodestatus
+EOF
+}
+
+# For backward compatibility
+prog=$(basename "$0")
+cmd="$1"
+
+case "$cmd" in
+leader) find_leader ;;
+list) nodes_list ;;
+status) nodes_status ;;
+*) usage ;;
+esac
diff --git a/ctdb/tools/ctdb_natgw b/ctdb/tools/ctdb_natgw
new file mode 100755
index 0000000..728cd9c
--- /dev/null
+++ b/ctdb/tools/ctdb_natgw
@@ -0,0 +1,194 @@
+#!/bin/sh
+
+if [ -z "$CTDB_BASE" ] ; then
+ export CTDB_BASE="/usr/local/etc/ctdb"
+fi
+
+. "${CTDB_BASE}/functions"
+
+load_script_options "failover" "11.natgw"
+
+# Default NAT gateway nodes file location
+[ -n "$CTDB_NATGW_NODES" ] || CTDB_NATGW_NODES="${CTDB_BASE}/natgw_nodes"
+
+if [ -z "$CTDB" ] ; then
+ CTDB=ctdb
+fi
+
+############################################################
+
+usage ()
+{
+cat <<EOF
+$0 <option>
+
+<option> is one of:
+ leader Display node number and private IP address of leader node
+ list List private IP addresses of nodes in group, annotate leader
+ status Show status of nodes in NAT gateway group
+EOF
+ exit 1
+}
+
+nodestatus_X=""
+# Fields are:
+# Node|IP|Disconnected|Unknown|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode
+get_nodestatus_X ()
+{
+ # Result is cached in global variable nodestatus_X
+ [ -n "$nodestatus_X" ] || \
+ nodestatus_X=$($CTDB -X nodestatus all |
+ sed -e '1d' -e 's@^|@@' -e 's@|$@@')
+}
+
+get_nodestatus ()
+{
+ # Result is cached in global variable nodestatus
+ [ -n "$nodestatus" ] || nodestatus=$($CTDB nodestatus all)
+ [ $? -ne 255 ] # ctdb nodestatus returns 255 on failure
+}
+
+get_natgw_nodes ()
+{
+ # Result is cached in global variable natgw_nodes
+ if [ -n "$natgw_nodes" ] ; then
+ return
+ fi
+
+ if [ ! -r "$CTDB_NATGW_NODES" ] ; then
+ return 1
+ fi
+
+ natgw_nodes=$(cat "$CTDB_NATGW_NODES") || return 1
+
+ # Sanity check file contents here
+ while read _ip _options ; do
+ # Skip comments
+ case "$_ip" in
+ \#*) continue ;;
+ esac
+ case "$_options" in
+ follower-only|"") : ;;
+ *) die "${prog}: Invalid options \"${_options}\" in \"$CTDB_NATGW_NODES\""
+ esac
+ done <<EOF
+$natgw_nodes
+EOF
+
+ return 0
+}
+
+# Print the PNN and IP address of the NAT gateway leader node
+find_leader ()
+{
+ get_natgw_nodes || \
+ die "${prog}: NAT gateway nodes file \"$CTDB_NATGW_NODES\" not found"
+ get_nodestatus_X || \
+ die "${prog}: Unable to get status of nodes"
+
+ # $_ms is an @-delimited list of nodes that are allowed to be the leader
+ _ms="@"
+ while read _ip _options ; do
+ case "$_options" in
+ "") _ms="${_ms}${_ip}@" ;;
+ esac
+ done <<EOF
+$natgw_nodes
+EOF
+
+ # Now filter by $ms and by status of nodes...
+
+ # Note that the 3 awk invocations below have "||" between them, so
+ # the first to succeed will select the leader node.
+
+ # First try for a fully active and healthy node, so must not be
+ # UNKNOWN, DISABLED, UNHEALTHY or INACTIVE (last covers DISCONNECTED,
+ # BANNED or STOPPED)
+ awk -F '|' -v ms="$_ms" \
+ 'BEGIN { ret = 2 }
+ ms ~ "@" $2 "@" &&
+ $4 == 0 && $6 == 0 && $7 == 0 && $9 == 0 { print $1, $2 ; ret=0 ; exit }
+ END { exit ret }' <<EOF ||
+$nodestatus_X
+EOF
+ # Not found? UNHEALTHY/BANNED will do, so node must not be
+ # DISCONNECTED, DISABLED or STOPPED
+ awk -F '|' -v ms="$_ms" \
+ 'BEGIN { ret = 2 }
+ ms ~ "@" $2 "@" &&
+ $3 == 0 && $6 == 0 && $8 == 0 { print $1, $2 ; ret=0 ; exit }
+ END { exit ret }' <<EOF ||
+$nodestatus_X
+EOF
+ # Not found? STOPPED will do, so node must not be DISCONNECTED or
+ # DISABLED
+ awk -F '|' -v ms="$_ms" \
+ 'BEGIN { ret = 2 }
+ ms ~ "@" $2 "@" &&
+ $3 == 0 && $6 == 0 { print $1, $2 ; ret=0 ; exit }
+ END { exit ret }' <<EOF
+$nodestatus_X
+EOF
+}
+
+# List all nodes in the NAT gateway group, annotating the leader node
+nodes_list ()
+{
+ get_natgw_nodes || \
+ die "${prog}: NAT gateway nodes file \"$CTDB_NATGW_NODES\" not found"
+ # Intentional word splitting here
+ # shellcheck disable=SC2046
+ set -- $(find_leader) || \
+ die "${prog}: Unable to determine NAT gateway leader node"
+ _leader_ip="$2"
+
+ # Annotate the leader node
+ while read _ip _options ; do
+ if [ "$_ip" = "$_leader_ip" ] ; then
+ _options="LEADER${_options:+,}${_options}"
+ fi
+ # There is no other way to do this and keep shellcheck happy.
+ # The tab character must be in the format string and the
+ # format string must contain no variables. Some shells will
+ # expand a tab if it is in an argument but others won't.
+ if [ -n "$_options" ] ; then
+ printf '%s\t%s\n' "$_ip" "$_options"
+ else
+ echo "$_ip"
+ fi
+ done <<EOF
+$natgw_nodes
+EOF
+}
+
+# Print the status of all nodes in the NAT gateway group, along with a count
+nodes_status ()
+{
+ get_natgw_nodes || \
+ die "${prog}: NAT gateway nodes file \"$CTDB_NATGW_NODES\" not found"
+ get_nodestatus || \
+ die "${prog}: Unable to get status of nodes"
+
+ # $_ns is a @-delimited list of nodes in the NAT gateway group
+ _ns="@"
+ while read _ip _options ; do
+ _ns="${_ns}${_ip}@"
+ done <<EOF
+$natgw_nodes
+EOF
+
+ # Print status of nodes in $_ns, along with node count
+ awk -v ns="$_ns" 'ns ~ "@" $2 "@" { print $0 }' <<EOF
+$nodestatus
+EOF
+}
+
+prog=$(basename "$0")
+cmd="$1"
+
+case "$cmd" in
+ leader) find_leader ;;
+ list) nodes_list ;;
+ status) nodes_status ;;
+ *) usage ;;
+esac
diff --git a/ctdb/tools/ltdbtool.c b/ctdb/tools/ltdbtool.c
new file mode 100644
index 0000000..98a1b51
--- /dev/null
+++ b/ctdb/tools/ltdbtool.c
@@ -0,0 +1,395 @@
+/*
+ * ctdb local tdb tool
+ *
+ * Copyright (C) Gregor Beck 2011
+ * Copyright (C) Michael Adam 2011
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/network.h"
+#include "system/locale.h"
+
+#include <tdb.h>
+
+#include "protocol/protocol.h"
+
+enum {
+ MAX_HEADER_SIZE=24,
+ OUT_MODE = S_IRUSR | S_IWUSR,
+ OUT_FLAGS = O_EXCL|O_CREAT|O_RDWR,
+};
+
+union ltdb_header {
+ struct ctdb_ltdb_header hdr;
+ uint32_t uints[MAX_HEADER_SIZE/4];
+};
+
+static const union ltdb_header DEFAULT_HDR = {
+ .hdr = {
+ .dmaster = -1,
+ }
+};
+
+static int help(const char* cmd)
+{
+ fprintf(stdout, ""
+"Usage: %s [options] <command>\n"
+"\n"
+"Options:\n"
+" -s {0|32|64} specify how to determine the ctdb record header size\n"
+" for the input database:\n"
+" 0: no ctdb header\n"
+" 32: ctdb header size of a 32 bit system (20 bytes)\n"
+" 64: ctdb header size of a 64 bit system (24 bytes)\n"
+" default: 32 or 64 depending on the system architecture\n"
+"\n"
+" -S <num> the number of bytes to interpret as ctdb record header\n"
+" for the input database (beware!)\n"
+"\n"
+" -o {0|32|64} specify how to determine the ctdb record header size\n"
+" for the output database\n"
+" 0: no ctdb header\n"
+" 32: ctdb header size of a 32 bit system (20 bytes)\n"
+" 64: ctdb header size of a 64 bit system (24 bytes)\n"
+" default: 32 or 64 depending on the system architecture\n"
+"\n"
+" -O <num> the number of bytes to interpret as ctdb record header\n"
+" for the output database (beware!)\n"
+"\n"
+" -e Include empty records, defaults to off\n"
+"\n"
+" -p print header (for the dump command), defaults to off\n"
+"\n"
+" -h print this help\n"
+"\n"
+"Commands:\n"
+" help print this help\n"
+" dump <db> dump the db to stdout\n"
+" convert <in_db> <out_db> convert the db\n\n", cmd);
+ return 0;
+}
+
+static int usage(const char* cmd)
+{
+ fprintf(stderr,
+ "Usage: %s dump [-e] [-p] [-s{0|32|64}] <idb>\n"
+ " %s convert [-e] [-s{0|32|64}] [-o{0|32|64}] <idb> <odb>\n"
+ " %s {help|-h}\n"
+ , cmd, cmd, cmd);
+ return -1;
+}
+
+static int
+ltdb_traverse(TDB_CONTEXT *tdb, int (*fn)(TDB_CONTEXT*, TDB_DATA, TDB_DATA,
+ struct ctdb_ltdb_header*, void *),
+ void *state, size_t hsize, bool skip_empty);
+
+struct write_record_ctx {
+ TDB_CONTEXT* tdb;
+ size_t hsize;
+ int tdb_store_flags;
+};
+
+static int
+write_record(TDB_CONTEXT* tdb, TDB_DATA key, TDB_DATA val,
+ struct ctdb_ltdb_header* hdr,
+ void* write_record_ctx);
+
+
+struct dump_record_ctx {
+ FILE* file;
+ void (*print_data)(FILE*, TDB_DATA);
+ void (*dump_header)(struct dump_record_ctx*, struct ctdb_ltdb_header*);
+};
+
+static int dump_record(TDB_CONTEXT* tdb, TDB_DATA key, TDB_DATA val,
+ struct ctdb_ltdb_header* hdr,
+ void* dump_record_ctx);
+static void print_data_tdbdump(FILE* file, TDB_DATA data);
+static void dump_header_full(struct dump_record_ctx*, struct ctdb_ltdb_header*);
+static void dump_header_nop(struct dump_record_ctx* c,
+ struct ctdb_ltdb_header* h)
+{}
+
+static int dump_db(const char* iname,
+ FILE* ofile,
+ size_t hsize,
+ bool dump_header,
+ bool empty)
+{
+ int ret = -1;
+ TDB_CONTEXT* idb = tdb_open(iname, 0, TDB_DEFAULT, O_RDONLY, 0);
+ if (!idb) {
+ perror("tdbopen in");
+ } else {
+ struct dump_record_ctx dump_ctx = {
+ .file = ofile,
+ .print_data = &print_data_tdbdump,
+ .dump_header = dump_header ? &dump_header_full
+ : &dump_header_nop,
+ };
+ ret = ltdb_traverse(idb, &dump_record, &dump_ctx, hsize, !empty);
+ tdb_close(idb);
+ }
+ return ret;
+}
+
+static int conv_db(const char* iname, const char* oname, size_t isize,
+ size_t osize, bool keep_empty)
+{
+ int ret = -1;
+ TDB_CONTEXT* idb = tdb_open(iname, 0, TDB_DEFAULT, O_RDONLY, 0);
+ if (!idb) {
+ perror("tdbopen in");
+ } else {
+ TDB_CONTEXT* odb = tdb_open(oname, 0, TDB_DEFAULT, OUT_FLAGS, OUT_MODE);
+ if (!odb) {
+ perror("tdbopen out");
+ } else {
+ struct write_record_ctx ctx = {
+ .tdb = odb,
+ .hsize = osize,
+ .tdb_store_flags = TDB_REPLACE,
+ };
+ ret = ltdb_traverse(idb, &write_record, &ctx, isize, !keep_empty);
+ tdb_close(odb);
+ }
+ tdb_close(idb);
+ }
+ return ret;
+}
+
+static bool parse_size(size_t* size, const char* arg, bool raw) {
+ long val;
+ errno = 0;
+ val = strtol(arg, (char **) NULL, 10);
+ if (errno != 0) {
+ return false;
+ }
+ if (!raw) {
+ switch(val) {
+ case 0:
+ break;
+ case 32:
+ val = 20;
+ break;
+ case 64:
+ val = 24;
+ break;
+ default:
+ return false;
+ }
+ }
+ *size = MIN(val, MAX_HEADER_SIZE);
+ return true;
+}
+
+
+int main(int argc, char* argv[])
+{
+ size_t isize = sizeof(struct ctdb_ltdb_header);
+ size_t osize = sizeof(struct ctdb_ltdb_header);
+ bool print_header = false;
+ bool keep_empty = false;
+ int opt;
+ const char *cmd, *idb, *odb;
+
+ while ((opt = getopt(argc, argv, "s:o:S:O:phe")) != -1) {
+ switch (opt) {
+ case 's':
+ case 'S':
+ if (!parse_size(&isize, optarg, isupper(opt))) {
+ return usage(argv[0]);
+ }
+ break;
+ case 'o':
+ case 'O':
+ if (!parse_size(&osize, optarg, isupper(opt))) {
+ return usage(argv[0]);
+ }
+ break;
+ case 'p':
+ print_header = true;
+ break;
+ case 'e':
+ keep_empty = true;
+ break;
+ case 'h':
+ return help(argv[0]);
+ default:
+ return usage(argv[0]);
+ }
+ }
+
+ if (argc - optind < 1) {
+ return usage(argv[0]);
+ }
+
+ cmd = argv[optind];
+
+ if (strcmp(cmd, "help") == 0) {
+ return help(argv[0]);
+ }
+ else if (strcmp(cmd, "dump") == 0) {
+ int ret;
+ if (argc - optind != 2) {
+ return usage(argv[0]);
+ }
+ idb = argv[optind+1];
+ ret = dump_db(idb, stdout, isize, print_header, keep_empty);
+ return (ret >= 0) ? 0 : ret;
+ }
+ else if (strcmp(cmd, "convert") == 0) {
+ int ret;
+ if (argc - optind != 3) {
+ return usage(argv[0]);
+ }
+ idb = argv[optind+1];
+ odb = argv[optind+2];
+ ret = conv_db(idb, odb, isize, osize, keep_empty);
+ return (ret >= 0) ? 0 : ret;
+ }
+
+ return usage(argv[0]);
+}
+
+struct ltdb_traverse_ctx {
+ int (*fn)(TDB_CONTEXT*,TDB_DATA,TDB_DATA,struct ctdb_ltdb_header*,void *);
+ void* state;
+ size_t hsize;
+ bool skip_empty;
+ int nempty;
+};
+
+static int
+ltdb_traverse_fn(TDB_CONTEXT* tdb, TDB_DATA key, TDB_DATA val,
+ void* ltdb_traverse_ctx)
+{
+ struct ltdb_traverse_ctx* ctx =
+ (struct ltdb_traverse_ctx*)ltdb_traverse_ctx;
+ union ltdb_header hdr = DEFAULT_HDR;
+
+ const size_t hsize = MIN(sizeof(hdr), ctx->hsize);
+ if (val.dsize < hsize) {
+ fprintf(stderr, "Value too short to contain a ctdb header: ");
+ print_data_tdbdump(stderr, key);
+ fprintf(stderr, " = ");
+ print_data_tdbdump(stderr, val);
+ fputc('\n', stderr);
+ return -1;
+ }
+ if (val.dsize == hsize && ctx->skip_empty) {
+ ctx->nempty++;
+ return 0;
+ }
+
+ memcpy(&hdr, val.dptr, hsize);
+
+ if (hdr.uints[5] != 0) {
+ fprintf(stderr, "Warning: header padding isn't zero! Wrong header size?\n");
+ }
+ val.dptr += ctx->hsize;
+ val.dsize -= ctx->hsize;
+ return ctx->fn(tdb, key, val, &hdr.hdr, ctx->state);
+}
+
+static int ltdb_traverse(TDB_CONTEXT *tdb,
+ int (*fn)(TDB_CONTEXT*, TDB_DATA, TDB_DATA,
+ struct ctdb_ltdb_header*, void *),
+ void *state, size_t hsize, bool skip_empty)
+{
+ struct ltdb_traverse_ctx ctx = {
+ .fn = fn,
+ .state = state,
+ .hsize = hsize,
+ .skip_empty = skip_empty,
+ .nempty = 0,
+ };
+ int ret = tdb_traverse(tdb, &ltdb_traverse_fn, &ctx);
+
+ return (ret < 0) ? ret : (ret - ctx.nempty);
+}
+
+static int write_record(TDB_CONTEXT* tdb, TDB_DATA key, TDB_DATA val,
+ struct ctdb_ltdb_header* hdr,
+ void* write_record_ctx)
+{
+ struct write_record_ctx* ctx
+ = (struct write_record_ctx*)write_record_ctx;
+ int ret;
+
+ if (ctx->hsize == 0) {
+ ret = tdb_store(ctx->tdb, key, val, ctx->tdb_store_flags);
+ } else {
+ TDB_DATA rec[2];
+
+ rec[0].dsize = ctx->hsize;
+ rec[0].dptr = (uint8_t *)hdr;
+
+ rec[1].dsize = val.dsize;
+ rec[1].dptr = val.dptr;
+
+ ret = tdb_storev(ctx->tdb, key, rec, 2, ctx->tdb_store_flags);
+ }
+
+ if (ret == -1) {
+ fprintf(stderr, "tdb_store: %s\n", tdb_errorstr(ctx->tdb));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int dump_record(TDB_CONTEXT* tdb, TDB_DATA key, TDB_DATA val,
+ struct ctdb_ltdb_header* hdr,
+ void* dump_record_ctx)
+{
+ struct dump_record_ctx* ctx = (struct dump_record_ctx*)dump_record_ctx;
+
+ fprintf(ctx->file, "{\nkey(%d) = ", (int)key.dsize);
+ ctx->print_data(ctx->file, key);
+ fputc('\n', ctx->file);
+ ctx->dump_header(ctx, hdr);
+ fprintf(ctx->file, "data(%d) = ", (int)val.dsize);
+ ctx->print_data(ctx->file, val);
+ fprintf(ctx->file, "\n}\n");
+ return 0;
+}
+
+static void dump_header_full(struct dump_record_ctx* c,
+ struct ctdb_ltdb_header* h)
+{
+ fprintf(c->file, "dmaster: %d\nrsn: %llu\nflags: 0x%X\n",
+ (int)h->dmaster,
+ (unsigned long long)h->rsn, h->flags);
+}
+
+static void print_data_tdbdump(FILE* file, TDB_DATA data)
+{
+ unsigned char *ptr = data.dptr;
+ fputc('"', file);
+ while (data.dsize--) {
+ if (isprint(*ptr) && !strchr("\"\\", *ptr)) {
+ fputc(*ptr, file);
+ } else {
+ fprintf(file, "\\%02X", *ptr);
+ }
+ ptr++;
+ }
+ fputc('"',file);
+}
+
diff --git a/ctdb/tools/onnode b/ctdb/tools/onnode
new file mode 100755
index 0000000..f04d33f
--- /dev/null
+++ b/ctdb/tools/onnode
@@ -0,0 +1,344 @@
+#!/usr/bin/env bash
+
+# Run commands on CTDB nodes.
+
+# See http://ctdb.samba.org/ for more information about CTDB.
+
+# Copyright (C) Martin Schwenke 2008
+
+# Based on an earlier script by Andrew Tridgell and Ronnie Sahlberg.
+
+# Copyright (C) Andrew Tridgell 2007
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+prog=$(basename "$0")
+
+usage ()
+{
+ cat >&2 <<EOF
+Usage: onnode [OPTION] ... <NODES> <COMMAND> ...
+ options:
+ -c Run in current working directory on specified nodes.
+ -f Specify nodes file, overriding default.
+ -i Keep standard input open - the default is to close it.
+ -n Allow nodes to be specified by name.
+ -p Run command in parallel on specified nodes.
+ -P Push given files to nodes instead of running commands.
+ -q Do not print node addresses (overrides -v).
+ -v Print node address even for a single node.
+ <NODES> "all", "any", "ok" (or "healthy"), "con" (or "connected") ; or
+ a node number (0 base); or
+ a hostname (if -n is specified); or
+ list (comma separated) of <NODES>; or
+ range (hyphen separated) of node numbers.
+EOF
+ exit 1
+
+}
+
+invalid_nodespec ()
+{
+ echo "Invalid <nodespec>" >&2 ; echo >&2
+ usage
+}
+
+# Defaults.
+current=false
+ctdb_nodes_file=""
+parallel=false
+verbose=false
+quiet=false
+names_ok=false
+push=false
+stdin=false
+
+if [ -z "$CTDB_BASE" ] ; then
+ CTDB_BASE="/usr/local/etc/ctdb"
+fi
+
+parse_options ()
+{
+ local opt
+
+ while getopts "cf:hnpqvPi?" opt ; do
+ case "$opt" in
+ c) current=true ;;
+ f) ctdb_nodes_file="$OPTARG" ;;
+ n) names_ok=true ;;
+ p) parallel=true ;;
+ q) quiet=true ;;
+ v) verbose=true ;;
+ P) push=true ;;
+ i) stdin=true ;;
+ \?|h) usage ;;
+ esac
+ done
+ shift $((OPTIND - 1))
+
+ if [ $# -lt 2 ] ; then
+ usage
+ fi
+
+ nodespec="$1" ; shift
+ command="$*"
+}
+
+echo_nth ()
+{
+ local n="$1" ; shift
+
+ # Note that this is 0-based
+ local node=""
+ if [ "$n" -le $# ] ; then
+ shift "$n"
+ node="$1"
+ fi
+
+ if [ -n "$node" ] && [ "$node" != "#DEAD" ] ; then
+ echo "$node"
+ else
+ echo "${prog}: \"node ${n}\" does not exist" >&2
+ exit 1
+ fi
+}
+
+parse_nodespec ()
+{
+ # Subshell avoids hacks to restore $IFS.
+ (
+ IFS=","
+ for i in $1 ; do
+ case "$i" in
+ *-*) seq "${i%-*}" "${i#*-}" 2>/dev/null || invalid_nodespec ;;
+ all|any|ok|healthy|con|connected) echo "$i" ;;
+ *)
+ [ "$i" -gt -1 ] 2>/dev/null || $names_ok || invalid_nodespec
+ echo "$i"
+ esac
+ done
+ )
+}
+
+ctdb_status_output="" # cache
+get_nodes_with_status ()
+{
+ local all_nodes="$1"
+ local status="$2"
+
+ if [ -z "$ctdb_status_output" ] ; then
+ ctdb_status_output=$(ctdb -X status 2>&1)
+ # No! Checking the exit code afterwards is actually clearer...
+ # shellcheck disable=SC2181
+ if [ $? -ne 0 ] ; then
+ echo "${prog}: unable to get status of CTDB nodes" >&2
+ echo "$ctdb_status_output" >&2
+ exit 1
+ fi
+ local nl="
+"
+ ctdb_status_output="${ctdb_status_output#*"${nl}"}"
+ fi
+
+ (
+ local i
+ IFS="${IFS}|"
+ while IFS="" read -r i ; do
+
+ # Intentional word splitting
+ # shellcheck disable=SC2086
+ set -- $i # split line on colons
+ shift # line starts with : so 1st field is empty
+ local pnn="$1" ; shift
+ shift # ignore IP address but need status bits below
+
+ case "$status" in
+ healthy)
+ # If any bit is 1, don't match this address.
+ local s
+ for s ; do
+ [ "$s" != "1" ] || continue 2
+ done
+ ;;
+ connected)
+ # If disconnected bit is not 0, don't match this address.
+ [ "$1" = "0" ] || continue
+ ;;
+ *)
+ invalid_nodespec
+ esac
+
+ # Intentional multi-word expansion
+ # shellcheck disable=SC2086
+ echo_nth "$pnn" $all_nodes
+ done <<<"$ctdb_status_output"
+ )
+}
+
+get_any_available_node ()
+{
+ local all_nodes="$1"
+
+ # We do a recursive onnode to find which nodes are up and running.
+ local out line
+ out=$("$0" -pq all ctdb pnn 2>&1)
+ while read -r line ; do
+ if [[ "$line" =~ ^[0-9]+$ ]] ; then
+ local pnn="$line"
+ # Intentional multi-word expansion
+ # shellcheck disable=SC2086
+ echo_nth "$pnn" $all_nodes
+ return 0
+ fi
+ # Else must be an error message from a down node.
+ done <<<"$out"
+ return 1
+}
+
+get_nodes ()
+{
+ local all_nodes
+
+ local f="${CTDB_BASE}/nodes"
+ if [ -n "$ctdb_nodes_file" ] ; then
+ f="$ctdb_nodes_file"
+ if [ ! -e "$f" ] && [ "${f#/}" = "$f" ] ; then
+ # $f is relative, try in $CTDB_BASE
+ f="${CTDB_BASE}/${f}"
+ fi
+ fi
+
+ if [ ! -r "$f" ] ; then
+ echo "${prog}: unable to open nodes file \"${f}\"" >&2
+ exit 1
+ fi
+
+ all_nodes=$(sed -e 's@#.*@@g' -e 's@ *@@g' -e 's@^$@#DEAD@' "$f")
+
+ local n nodes
+ nodes=$(parse_nodespec "$1") || exit $?
+ for n in $nodes ; do
+ case "$n" in
+ all)
+ echo "${all_nodes//#DEAD/}"
+ ;;
+ any)
+ get_any_available_node "$all_nodes" || exit 1
+ ;;
+ ok|healthy)
+ get_nodes_with_status "$all_nodes" "healthy" || exit 1
+ ;;
+ con|connected)
+ get_nodes_with_status "$all_nodes" "connected" || exit 1
+ ;;
+ [0-9]|[0-9][0-9]|[0-9][0-9][0-9])
+ # Intentional multi-word expansion
+ # shellcheck disable=SC2086
+ echo_nth "$n" $all_nodes
+ ;;
+ *)
+ $names_ok || invalid_nodespec
+ echo "$n"
+ esac
+ done
+}
+
+# shellcheck disable=SC2317
+# push() called indirectly via $ONNODE_SSH
+push ()
+{
+ local host="$1"
+ local files="$2"
+
+ local f
+ for f in $files ; do
+ $verbose && echo "Pushing $f"
+ case "$f" in
+ /*) rsync "$f" "[${host}]:${f}" ;;
+ *) rsync "${PWD}/${f}" "[${host}]:${PWD}/${f}" ;;
+ esac
+ done
+}
+
+######################################################################
+
+parse_options "$@"
+
+ssh_opts=
+if $push ; then
+ if [ -n "$ONNODE_SSH" ] ; then
+ export RSYNC_RSH="$ONNODE_SSH"
+ fi
+ ONNODE_SSH=push
+else
+ $current && command="cd $PWD && $command"
+
+ # Could "2>/dev/null || true" but want to see errors from typos in file.
+ [ -r "${CTDB_BASE}/onnode.conf" ] && . "${CTDB_BASE}/onnode.conf"
+ [ -n "$ONNODE_SSH" ] || ONNODE_SSH=ssh
+ # $ONNODE_SSH must accept the -n option - it can be ignored!
+ if $parallel || ! $stdin ; then
+ ssh_opts="-n"
+ fi
+fi
+
+######################################################################
+
+nodes=$(get_nodes "$nodespec") || exit $?
+
+if $quiet ; then
+ verbose=false
+else
+ # If $nodes contains a space or a newline then assume multiple nodes.
+ nl="
+"
+ [ "$nodes" != "${nodes%[ "${nl}"]*}" ] && verbose=true
+fi
+
+pids=""
+# Intentional multi-word expansion
+# shellcheck disable=SC2086
+trap 'kill -TERM $pids 2>/dev/null' INT TERM
+# There's a small race here where the kill can fail if no processes
+# have been added to $pids and the script is interrupted. However,
+# the part of the window where it matter is very small.
+retcode=0
+for n in $nodes ; do
+ set -o pipefail 2>/dev/null
+
+ ssh_cmd="$ONNODE_SSH $ssh_opts"
+ if $parallel ; then
+ if $verbose ; then
+ $ssh_cmd "$n" "$command" 2>&1 | sed -e "s@^@[$n] @"
+ else
+ $ssh_cmd "$n" "$command"
+ fi &
+ pids="${pids} $!"
+ else
+ if $verbose ; then
+ echo >&2 ; echo ">> NODE: $n <<" >&2
+ fi
+ {
+ $ssh_cmd "$n" "$command"
+ } || retcode=$?
+ fi
+done
+
+if $parallel ; then
+ for p in $pids; do
+ wait "$p" || retcode=$?
+ done
+fi
+
+exit $retcode
diff --git a/ctdb/utils/ceph/ctdb_mutex_ceph_rados_helper.c b/ctdb/utils/ceph/ctdb_mutex_ceph_rados_helper.c
new file mode 100644
index 0000000..7d868a3
--- /dev/null
+++ b/ctdb/utils/ceph/ctdb_mutex_ceph_rados_helper.c
@@ -0,0 +1,457 @@
+/*
+ CTDB mutex helper using Ceph librados locks
+
+ Copyright (C) David Disseldorp 2016-2020
+
+ Based on ctdb_mutex_fcntl_helper.c, which is:
+ Copyright (C) Martin Schwenke 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include "tevent.h"
+#include "talloc.h"
+#include "rados/librados.h"
+
+#define CTDB_MUTEX_CEPH_LOCK_NAME "ctdb_reclock_mutex"
+#define CTDB_MUTEX_CEPH_LOCK_COOKIE CTDB_MUTEX_CEPH_LOCK_NAME
+#define CTDB_MUTEX_CEPH_LOCK_DESC "CTDB cluster lock"
+/*
+ * During failover it may take up to <lock duration> seconds before the
+ * newly elected recovery master can obtain the lock.
+ */
+#define CTDB_MUTEX_CEPH_LOCK_DURATION_SECS_DEFAULT 10
+
+#define CTDB_MUTEX_STATUS_HOLDING "0"
+#define CTDB_MUTEX_STATUS_CONTENDED "1"
+#define CTDB_MUTEX_STATUS_TIMEOUT "2"
+#define CTDB_MUTEX_STATUS_ERROR "3"
+
+static char *progname = NULL;
+
+static int ctdb_mutex_rados_ctx_create(const char *ceph_cluster_name,
+ const char *ceph_auth_name,
+ const char *pool_name,
+ rados_t *_ceph_cluster,
+ rados_ioctx_t *_ioctx)
+{
+ rados_t ceph_cluster = NULL;
+ rados_ioctx_t ioctx = NULL;
+ int ret;
+
+ ret = rados_create2(&ceph_cluster, ceph_cluster_name, ceph_auth_name, 0);
+ if (ret < 0) {
+ fprintf(stderr, "%s: failed to initialise Ceph cluster %s as %s"
+ " - (%s)\n", progname, ceph_cluster_name, ceph_auth_name,
+ strerror(-ret));
+ return ret;
+ }
+
+ /* path=NULL tells librados to use default locations */
+ ret = rados_conf_read_file(ceph_cluster, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "%s: failed to parse Ceph cluster config"
+ " - (%s)\n", progname, strerror(-ret));
+ rados_shutdown(ceph_cluster);
+ return ret;
+ }
+
+ ret = rados_connect(ceph_cluster);
+ if (ret < 0) {
+ fprintf(stderr, "%s: failed to connect to Ceph cluster %s as %s"
+ " - (%s)\n", progname, ceph_cluster_name, ceph_auth_name,
+ strerror(-ret));
+ rados_shutdown(ceph_cluster);
+ return ret;
+ }
+
+
+ ret = rados_ioctx_create(ceph_cluster, pool_name, &ioctx);
+ if (ret < 0) {
+ fprintf(stderr, "%s: failed to create Ceph ioctx for pool %s"
+ " - (%s)\n", progname, pool_name, strerror(-ret));
+ rados_shutdown(ceph_cluster);
+ return ret;
+ }
+
+ *_ceph_cluster = ceph_cluster;
+ *_ioctx = ioctx;
+
+ return 0;
+}
+
+static int ctdb_mutex_rados_lock(rados_ioctx_t *ioctx,
+ const char *oid,
+ uint64_t lock_duration_s,
+ uint8_t flags)
+{
+ int ret;
+ struct timeval tv = { lock_duration_s, 0 };
+
+ ret = rados_lock_exclusive(ioctx, oid,
+ CTDB_MUTEX_CEPH_LOCK_NAME,
+ CTDB_MUTEX_CEPH_LOCK_COOKIE,
+ CTDB_MUTEX_CEPH_LOCK_DESC,
+ lock_duration_s == 0 ? NULL : &tv,
+ flags);
+ if ((ret == -EEXIST) || (ret == -EBUSY)) {
+ /* lock contention */
+ return ret;
+ } else if (ret < 0) {
+ /* unexpected failure */
+ fprintf(stderr,
+ "%s: Failed to get lock on RADOS object '%s' - (%s)\n",
+ progname, oid, strerror(-ret));
+ return ret;
+ }
+
+ /* lock obtained */
+ return 0;
+}
+
+static int ctdb_mutex_rados_unlock(rados_ioctx_t *ioctx,
+ const char *oid)
+{
+ int ret;
+
+ ret = rados_unlock(ioctx, oid,
+ CTDB_MUTEX_CEPH_LOCK_NAME,
+ CTDB_MUTEX_CEPH_LOCK_COOKIE);
+ if (ret < 0) {
+ fprintf(stderr,
+ "%s: Failed to drop lock on RADOS object '%s' - (%s)\n",
+ progname, oid, strerror(-ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+struct ctdb_mutex_rados_state {
+ bool holding_mutex;
+ const char *ceph_cluster_name;
+ const char *ceph_auth_name;
+ const char *pool_name;
+ const char *object;
+ uint64_t lock_duration_s;
+ int ppid;
+ struct tevent_context *ev;
+ struct tevent_signal *sigterm_ev;
+ struct tevent_signal *sigint_ev;
+ struct tevent_timer *ppid_timer_ev;
+ struct tevent_timer *renew_timer_ev;
+ rados_t ceph_cluster;
+ rados_ioctx_t ioctx;
+};
+
+static void ctdb_mutex_rados_sigterm_cb(struct tevent_context *ev,
+ struct tevent_signal *se,
+ int signum,
+ int count,
+ void *siginfo,
+ void *private_data)
+{
+ struct ctdb_mutex_rados_state *cmr_state = private_data;
+ int ret = 0;
+
+ if (!cmr_state->holding_mutex) {
+ fprintf(stderr, "Sigterm callback invoked without mutex!\n");
+ ret = -EINVAL;
+ }
+
+ talloc_free(cmr_state);
+ exit(ret ? 1 : 0);
+}
+
+static void ctdb_mutex_rados_ppid_timer_cb(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval current_time,
+ void *private_data)
+{
+ struct ctdb_mutex_rados_state *cmr_state = private_data;
+ int ret = 0;
+
+ if (!cmr_state->holding_mutex) {
+ fprintf(stderr, "Timer callback invoked without mutex!\n");
+ ret = -EINVAL;
+ goto err_ctx_cleanup;
+ }
+
+ if ((kill(cmr_state->ppid, 0) == 0) || (errno != ESRCH)) {
+ /* parent still around, keep waiting */
+ cmr_state->ppid_timer_ev = tevent_add_timer(cmr_state->ev,
+ cmr_state,
+ tevent_timeval_current_ofs(5, 0),
+ ctdb_mutex_rados_ppid_timer_cb,
+ cmr_state);
+ if (cmr_state->ppid_timer_ev == NULL) {
+ fprintf(stderr, "Failed to create timer event\n");
+ /* rely on signal cb */
+ }
+ return;
+ }
+
+ /* parent ended, drop lock (via destructor) and exit */
+err_ctx_cleanup:
+ talloc_free(cmr_state);
+ exit(ret ? 1 : 0);
+}
+
+#define USECS_IN_SEC 1000000
+
+static void ctdb_mutex_rados_lock_renew_timer_cb(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval current_time,
+ void *private_data)
+{
+ struct ctdb_mutex_rados_state *cmr_state = private_data;
+ struct timeval tv;
+ int ret;
+
+ ret = ctdb_mutex_rados_lock(cmr_state->ioctx, cmr_state->object,
+ cmr_state->lock_duration_s,
+ LIBRADOS_LOCK_FLAG_RENEW);
+ if (ret == -EBUSY) {
+ /* should never get -EEXIST on renewal */
+ fprintf(stderr, "Lock contention during renew: %d\n", ret);
+ goto err_ctx_cleanup;
+ } else if (ret < 0) {
+ fprintf(stderr, "Lock renew failed\n");
+ goto err_ctx_cleanup;
+ }
+
+ tv = tevent_timeval_current_ofs(0,
+ cmr_state->lock_duration_s * (USECS_IN_SEC / 2));
+ cmr_state->renew_timer_ev = tevent_add_timer(cmr_state->ev,
+ cmr_state,
+ tv,
+ ctdb_mutex_rados_lock_renew_timer_cb,
+ cmr_state);
+ if (cmr_state->renew_timer_ev == NULL) {
+ fprintf(stderr, "Failed to create timer event\n");
+ goto err_ctx_cleanup;
+ }
+
+ return;
+
+err_ctx_cleanup:
+ /* drop lock (via destructor) and exit */
+ talloc_free(cmr_state);
+ exit(1);
+}
+
+static int ctdb_mutex_rados_state_destroy(struct ctdb_mutex_rados_state *cmr_state)
+{
+ if (cmr_state->holding_mutex) {
+ ctdb_mutex_rados_unlock(cmr_state->ioctx, cmr_state->object);
+ }
+ if (cmr_state->ioctx != NULL) {
+ rados_ioctx_destroy(cmr_state->ioctx);
+ }
+ if (cmr_state->ceph_cluster != NULL) {
+ rados_shutdown(cmr_state->ceph_cluster);
+ }
+ return 0;
+}
+
+/* register this host+service with ceph-mgr for visibility */
+static int ctdb_mutex_rados_mgr_reg(rados_t ceph_cluster)
+{
+ int ret;
+ uint64_t instance_guid;
+ char id_buf[128];
+
+ instance_guid = rados_get_instance_id(ceph_cluster);
+ ret = snprintf(id_buf, sizeof(id_buf), "%s:0x%016llx",
+ "ctdb_mutex_ceph_rados_helper",
+ (unsigned long long)instance_guid);
+ if (ret < 0 || ret >= sizeof(id_buf)) {
+ fprintf(stderr, "Ceph instance name too long\n");
+ return -ENAMETOOLONG;
+ }
+
+ ret = rados_service_register(ceph_cluster, "ctdb", id_buf, "");
+ if (ret < 0) {
+ fprintf(stderr, "failed to register service with ceph-mgr\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int ret;
+ struct ctdb_mutex_rados_state *cmr_state;
+
+ progname = argv[0];
+
+ if ((argc != 5) && (argc != 6)) {
+ fprintf(stderr, "Usage: %s <Ceph Cluster> <Ceph user> "
+ "<RADOS pool> <RADOS object> "
+ "[lock duration secs]\n",
+ progname);
+ ret = -EINVAL;
+ goto err_out;
+ }
+
+ ret = setvbuf(stdout, NULL, _IONBF, 0);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to configure unbuffered stdout I/O\n");
+ }
+
+ cmr_state = talloc_zero(NULL, struct ctdb_mutex_rados_state);
+ if (cmr_state == NULL) {
+ fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
+ ret = -ENOMEM;
+ goto err_out;
+ }
+
+ talloc_set_destructor(cmr_state, ctdb_mutex_rados_state_destroy);
+ cmr_state->ceph_cluster_name = argv[1];
+ cmr_state->ceph_auth_name = argv[2];
+ cmr_state->pool_name = argv[3];
+ cmr_state->object = argv[4];
+ if (argc == 6) {
+ /* optional lock duration provided */
+ char *endptr = NULL;
+ cmr_state->lock_duration_s = strtoull(argv[5], &endptr, 0);
+ if ((endptr == argv[5]) || (*endptr != '\0')) {
+ fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
+ ret = -EINVAL;
+ goto err_ctx_cleanup;
+ }
+ } else {
+ cmr_state->lock_duration_s
+ = CTDB_MUTEX_CEPH_LOCK_DURATION_SECS_DEFAULT;
+ }
+
+ cmr_state->ppid = getppid();
+ if (cmr_state->ppid == 1) {
+ /*
+ * The original parent is gone and the process has
+ * been reparented to init. This can happen if the
+ * helper is started just as the parent is killed
+ * during shutdown. The error message doesn't need to
+ * be stellar, since there won't be anything around to
+ * capture and log it...
+ */
+ fprintf(stderr, "%s: PPID == 1\n", progname);
+ ret = -EPIPE;
+ goto err_ctx_cleanup;
+ }
+
+ cmr_state->ev = tevent_context_init(cmr_state);
+ if (cmr_state->ev == NULL) {
+ fprintf(stderr, "tevent_context_init failed\n");
+ fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
+ ret = -ENOMEM;
+ goto err_ctx_cleanup;
+ }
+
+ /* wait for sigterm */
+ cmr_state->sigterm_ev = tevent_add_signal(cmr_state->ev, cmr_state, SIGTERM, 0,
+ ctdb_mutex_rados_sigterm_cb,
+ cmr_state);
+ if (cmr_state->sigterm_ev == NULL) {
+ fprintf(stderr, "Failed to create term signal event\n");
+ fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
+ ret = -ENOMEM;
+ goto err_ctx_cleanup;
+ }
+
+ cmr_state->sigint_ev = tevent_add_signal(cmr_state->ev, cmr_state, SIGINT, 0,
+ ctdb_mutex_rados_sigterm_cb,
+ cmr_state);
+ if (cmr_state->sigint_ev == NULL) {
+ fprintf(stderr, "Failed to create int signal event\n");
+ fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
+ ret = -ENOMEM;
+ goto err_ctx_cleanup;
+ }
+
+ /* periodically check parent */
+ cmr_state->ppid_timer_ev = tevent_add_timer(cmr_state->ev, cmr_state,
+ tevent_timeval_current_ofs(5, 0),
+ ctdb_mutex_rados_ppid_timer_cb,
+ cmr_state);
+ if (cmr_state->ppid_timer_ev == NULL) {
+ fprintf(stderr, "Failed to create timer event\n");
+ fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
+ ret = -ENOMEM;
+ goto err_ctx_cleanup;
+ }
+
+ ret = ctdb_mutex_rados_ctx_create(cmr_state->ceph_cluster_name,
+ cmr_state->ceph_auth_name,
+ cmr_state->pool_name,
+ &cmr_state->ceph_cluster,
+ &cmr_state->ioctx);
+ if (ret < 0) {
+ fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
+ goto err_ctx_cleanup;
+ }
+
+ ret = ctdb_mutex_rados_mgr_reg(cmr_state->ceph_cluster);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to register with ceph-mgr\n");
+ /* ignore: ceph-mgr service registration is informational */
+ }
+
+ ret = ctdb_mutex_rados_lock(cmr_state->ioctx, cmr_state->object,
+ cmr_state->lock_duration_s,
+ 0);
+ if ((ret == -EEXIST) || (ret == -EBUSY)) {
+ fprintf(stdout, CTDB_MUTEX_STATUS_CONTENDED);
+ goto err_ctx_cleanup;
+ } else if (ret < 0) {
+ fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
+ goto err_ctx_cleanup;
+ }
+ cmr_state->holding_mutex = true;
+
+ if (cmr_state->lock_duration_s != 0) {
+ /*
+ * renew (reobtain) the lock, using a period of half the lock
+ * duration. Convert to usecs to avoid rounding.
+ */
+ struct timeval tv = tevent_timeval_current_ofs(0,
+ cmr_state->lock_duration_s * (USECS_IN_SEC / 2));
+ cmr_state->renew_timer_ev = tevent_add_timer(cmr_state->ev,
+ cmr_state,
+ tv,
+ ctdb_mutex_rados_lock_renew_timer_cb,
+ cmr_state);
+ if (cmr_state->renew_timer_ev == NULL) {
+ fprintf(stderr, "Failed to create timer event\n");
+ fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
+ ret = -ENOMEM;
+ goto err_ctx_cleanup;
+ }
+ }
+
+ fprintf(stdout, CTDB_MUTEX_STATUS_HOLDING);
+
+ /* wait for the signal / timer events to do their work */
+ ret = tevent_loop_wait(cmr_state->ev);
+ if (ret < 0) {
+ goto err_ctx_cleanup;
+ }
+err_ctx_cleanup:
+ talloc_free(cmr_state);
+err_out:
+ return ret ? 1 : 0;
+}
diff --git a/ctdb/utils/ceph/test_ceph_rados_reclock.sh b/ctdb/utils/ceph/test_ceph_rados_reclock.sh
new file mode 100755
index 0000000..bfb9c32
--- /dev/null
+++ b/ctdb/utils/ceph/test_ceph_rados_reclock.sh
@@ -0,0 +1,212 @@
+#!/bin/bash
+# standalone test for ctdb_mutex_ceph_rados_helper
+#
+# Copyright (C) David Disseldorp 2016-2020
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+# XXX The following parameters may require configuration:
+CLUSTER="ceph" # Name of the Ceph cluster under test
+USER="client.admin" # Ceph user - a keyring must exist
+POOL="rbd" # RADOS pool - must exist
+OBJECT="ctdb_reclock" # RADOS object: target for lock requests
+
+# test procedure:
+# - using ctdb_mutex_ceph_rados_helper, take a lock on the Ceph RADOS object at
+# CLUSTER/$POOL/$OBJECT using the Ceph keyring for $USER
+# + confirm that lock is obtained, via ctdb_mutex_ceph_rados_helper "0" output
+# - check for ceph-mgr service registration
+# - check RADOS object lock state, using the "rados lock info" command
+# - attempt to obtain the lock again, using ctdb_mutex_ceph_rados_helper
+# + confirm that the lock is not successfully taken ("1" output=contention)
+# - tell the first locker to drop the lock and exit, via SIGTERM
+# - once the first locker has exited, attempt to get the lock again
+# + confirm that this attempt succeeds
+
+function _fail() {
+ echo "FAILED: $*"
+ exit 1
+}
+
+# this test requires the Ceph "rados" binary, and "jq" json parser
+which jq > /dev/null || exit 1
+which rados > /dev/null || exit 1
+which ceph > /dev/null || exit 1
+which ctdb_mutex_ceph_rados_helper || exit 1
+
+TMP_DIR="$(mktemp --directory)" || exit 1
+rados -p "$POOL" rm "$OBJECT"
+
+# explicitly disable lock expiry (duration=0), to ensure that we don't get
+# intermittent failures (due to renewal) from the lock state diff further down
+(ctdb_mutex_ceph_rados_helper "$CLUSTER" "$USER" "$POOL" "$OBJECT" 0 \
+ > ${TMP_DIR}/first) &
+locker_pid=$!
+
+# TODO wait for ctdb_mutex_ceph_rados_helper to write one byte to stdout,
+# indicating lock acquisition success/failure
+sleep 1
+
+first_out=$(cat ${TMP_DIR}/first)
+[ "$first_out" == "0" ] \
+ || _fail "expected lock acquisition (0), but got $first_out"
+
+ceph service dump > ${TMP_DIR}/service_dump
+SERVICE_COUNT=$(jq -r '.services.ctdb.daemons | length' ${TMP_DIR}/service_dump)
+[ $SERVICE_COUNT -gt 0 ] || _fail "lock holder missing from ceph service dump"
+
+rados -p "$POOL" lock info "$OBJECT" ctdb_reclock_mutex \
+ > ${TMP_DIR}/lock_state_first
+
+# echo "with lock: `cat ${TMP_DIR}/lock_state_first`"
+
+LOCK_NAME="$(jq -r '.name' ${TMP_DIR}/lock_state_first)"
+[ "$LOCK_NAME" == "ctdb_reclock_mutex" ] \
+ || _fail "unexpected lock name: $LOCK_NAME"
+LOCK_TYPE="$(jq -r '.type' ${TMP_DIR}/lock_state_first)"
+[ "$LOCK_TYPE" == "exclusive" ] \
+ || _fail "unexpected lock type: $LOCK_TYPE"
+
+LOCK_COUNT="$(jq -r '.lockers | length' ${TMP_DIR}/lock_state_first)"
+[ $LOCK_COUNT -eq 1 ] || _fail "expected 1 lock in rados state, got $LOCK_COUNT"
+LOCKER_COOKIE="$(jq -r '.lockers[0].cookie' ${TMP_DIR}/lock_state_first)"
+[ "$LOCKER_COOKIE" == "ctdb_reclock_mutex" ] \
+ || _fail "unexpected locker cookie: $LOCKER_COOKIE"
+LOCKER_DESC="$(jq -r '.lockers[0].description' ${TMP_DIR}/lock_state_first)"
+[ "$LOCKER_DESC" == "CTDB cluster lock" ] \
+ || _fail "unexpected locker description: $LOCKER_DESC"
+LOCKER_EXP="$(jq -r '.lockers[0].expiration' ${TMP_DIR}/lock_state_first)"
+[ "$LOCKER_EXP" == "0.000000" ] \
+ || _fail "unexpected locker expiration: $LOCKER_EXP"
+
+# second attempt while first is still holding the lock - expect failure
+ctdb_mutex_ceph_rados_helper "$CLUSTER" "$USER" "$POOL" "$OBJECT" \
+ > ${TMP_DIR}/second
+second_out=$(cat ${TMP_DIR}/second)
+[ "$second_out" == "1" ] \
+ || _fail "expected lock contention (1), but got $second_out"
+
+# confirm lock state didn't change
+rados -p "$POOL" lock info "$OBJECT" ctdb_reclock_mutex \
+ > ${TMP_DIR}/lock_state_second
+
+diff ${TMP_DIR}/lock_state_first ${TMP_DIR}/lock_state_second \
+ || _fail "unexpected lock state change"
+
+# tell first locker to drop the lock and terminate
+kill $locker_pid || exit 1
+
+wait $locker_pid &> /dev/null
+
+rados -p "$POOL" lock info "$OBJECT" ctdb_reclock_mutex \
+ > ${TMP_DIR}/lock_state_third
+# echo "without lock: `cat ${TMP_DIR}/lock_state_third`"
+
+LOCK_NAME="$(jq -r '.name' ${TMP_DIR}/lock_state_third)"
+[ "$LOCK_NAME" == "ctdb_reclock_mutex" ] \
+ || _fail "unexpected lock name: $LOCK_NAME"
+LOCK_TYPE="$(jq -r '.type' ${TMP_DIR}/lock_state_third)"
+[ "$LOCK_TYPE" == "exclusive" ] \
+ || _fail "unexpected lock type: $LOCK_TYPE"
+
+LOCK_COUNT="$(jq -r '.lockers | length' ${TMP_DIR}/lock_state_third)"
+[ $LOCK_COUNT -eq 0 ] \
+ || _fail "didn\'t expect any locks in rados state, got $LOCK_COUNT"
+
+exec >${TMP_DIR}/third -- ctdb_mutex_ceph_rados_helper "$CLUSTER" "$USER" "$POOL" "$OBJECT" &
+locker_pid=$!
+
+sleep 1
+
+rados -p "$POOL" lock info "$OBJECT" ctdb_reclock_mutex \
+ > ${TMP_DIR}/lock_state_fourth
+# echo "with lock again: `cat ${TMP_DIR}/lock_state_fourth`"
+
+LOCK_NAME="$(jq -r '.name' ${TMP_DIR}/lock_state_fourth)"
+[ "$LOCK_NAME" == "ctdb_reclock_mutex" ] \
+ || _fail "unexpected lock name: $LOCK_NAME"
+LOCK_TYPE="$(jq -r '.type' ${TMP_DIR}/lock_state_fourth)"
+[ "$LOCK_TYPE" == "exclusive" ] \
+ || _fail "unexpected lock type: $LOCK_TYPE"
+
+LOCK_COUNT="$(jq -r '.lockers | length' ${TMP_DIR}/lock_state_fourth)"
+[ $LOCK_COUNT -eq 1 ] || _fail "expected 1 lock in rados state, got $LOCK_COUNT"
+LOCKER_COOKIE="$(jq -r '.lockers[0].cookie' ${TMP_DIR}/lock_state_fourth)"
+[ "$LOCKER_COOKIE" == "ctdb_reclock_mutex" ] \
+ || _fail "unexpected locker cookie: $LOCKER_COOKIE"
+LOCKER_DESC="$(jq -r '.lockers[0].description' ${TMP_DIR}/lock_state_fourth)"
+[ "$LOCKER_DESC" == "CTDB cluster lock" ] \
+ || _fail "unexpected locker description: $LOCKER_DESC"
+
+kill $locker_pid || exit 1
+wait $locker_pid &> /dev/null
+
+third_out=$(cat ${TMP_DIR}/third)
+[ "$third_out" == "0" ] \
+ || _fail "expected lock acquisition (0), but got $third_out"
+
+# test renew / expire behaviour using a 1s expiry (update period = 500ms)
+exec >${TMP_DIR}/forth -- ctdb_mutex_ceph_rados_helper "$CLUSTER" "$USER" \
+ "$POOL" "$OBJECT" 1 &
+locker_pid=$!
+
+sleep 1
+
+rados -p "$POOL" lock info "$OBJECT" ctdb_reclock_mutex \
+ > ${TMP_DIR}/lock_state_fifth_a
+#echo "with lock fifth: `cat ${TMP_DIR}/lock_state_fifth_a`"
+
+LOCK_NAME="$(jq -r '.name' ${TMP_DIR}/lock_state_fifth_a)"
+[ "$LOCK_NAME" == "ctdb_reclock_mutex" ] \
+ || _fail "unexpected lock name: $LOCK_NAME"
+LOCK_TYPE="$(jq -r '.type' ${TMP_DIR}/lock_state_fifth_a)"
+[ "$LOCK_TYPE" == "exclusive" ] \
+ || _fail "unexpected lock type: $LOCK_TYPE"
+LOCK_COUNT="$(jq -r '.lockers | length' ${TMP_DIR}/lock_state_fifth_a)"
+[ $LOCK_COUNT -eq 1 ] || _fail "expected 1 lock in rados state, got $LOCK_COUNT"
+LOCKER_EXP_A="$(jq -r '.lockers[0].expiration' ${TMP_DIR}/lock_state_fifth_a)"
+[ "$LOCKER_EXP_A" != "0.000000" ] \
+ || _fail "unexpected locker expiration: $LOCKER_EXP_A"
+sleep 1 # sleep until renewal
+rados -p "$POOL" lock info "$OBJECT" ctdb_reclock_mutex \
+ > ${TMP_DIR}/lock_state_fifth_b
+LOCKER_EXP_B="$(jq -r '.lockers[0].expiration' ${TMP_DIR}/lock_state_fifth_b)"
+[ "$LOCKER_EXP_B" != "0.000000" ] \
+ || _fail "unexpected locker expiration: $LOCKER_EXP_B"
+#echo "lock expiration before renewal $LOCKER_EXP_A, after renewal $LOCKER_EXP_B"
+[ "$LOCKER_EXP_B" != "$LOCKER_EXP_A" ] \
+ || _fail "locker expiration matches: $LOCKER_EXP_B"
+
+# no chance to drop the lock, rely on expiry
+kill -KILL $locker_pid || exit 1
+wait $locker_pid &> /dev/null
+sleep 1 # sleep until lock expiry
+
+rados -p "$POOL" lock info "$OBJECT" ctdb_reclock_mutex \
+ > ${TMP_DIR}/lock_state_sixth
+#echo "lock expiry sixth: `cat ${TMP_DIR}/lock_state_sixth`"
+
+LOCK_NAME="$(jq -r '.name' ${TMP_DIR}/lock_state_sixth)"
+[ "$LOCK_NAME" == "ctdb_reclock_mutex" ] \
+ || _fail "unexpected lock name: $LOCK_NAME"
+LOCK_TYPE="$(jq -r '.type' ${TMP_DIR}/lock_state_sixth)"
+[ "$LOCK_TYPE" == "exclusive" ] \
+ || _fail "unexpected lock type: $LOCK_TYPE"
+LOCK_COUNT="$(jq -r '.lockers | length' ${TMP_DIR}/lock_state_sixth)"
+[ $LOCK_COUNT -eq 0 ] || _fail "expected 0 locks in rados state, got $LOCK_COUNT"
+
+rm ${TMP_DIR}/*
+rmdir $TMP_DIR
+
+echo "$0: all tests passed"
diff --git a/ctdb/utils/etcd/ctdb_etcd_lock b/ctdb/utils/etcd/ctdb_etcd_lock
new file mode 100755
index 0000000..dac2436
--- /dev/null
+++ b/ctdb/utils/etcd/ctdb_etcd_lock
@@ -0,0 +1,213 @@
+#!/usr/bin/env python3
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# Copyright (C) 2016 Jose A. Rivera <jarrpa@samba.org>
+# Copyright (C) 2016 Ira Cooper <ira@samba.org>
+"""CTDB mutex helper using etcd.
+
+This script is intended to be run as a mutex helper for CTDB. It will try to
+connect to an existing etcd cluster and grab an etcd.Lock() to function as
+CTDB's cluster lock. Please see ctdb/doc/cluster_mutex_helper.txt for
+details on what we're SUPPOSED to be doing. :) To use this, include
+the following line in the ctdb.conf:
+
+ cluster lock = !/path/to/script
+
+You can also pass "-v", "-vv", or "-vvv" to include verbose output in the
+CTDB log. Additional "v"s indicate increases in verbosity.
+
+This mutex helper expects the system Python interpreter to have access to the
+etcd Python module. It also expects an etcd cluster to be configured and
+running. To integrate with this, there is an optional config file of the
+following format:
+
+key = value
+
+The following configuration variables (and their defaults) are defined for
+use by this script:
+
+port = 2379 # connecting port for the etcd cluster
+lock_ttl = 9 # seconds for TTL
+refresh = 2 # seconds between attempts to maintain lock
+locks_dir = _ctdb # where to store CTDB locks in etcd
+ # The final etcd directory for any given lock looks like:
+ # /_locks/{locks_dir}/{netbios name}/
+
+In addition, any keyword parameter that can be used to configure an etcd
+client may be specified and modified here. For more documentation on these
+parameters, see here: https://github.com/jplana/python-etcd/
+
+"""
+import signal
+import time
+import sys
+import os
+import argparse
+import logging
+import subprocess
+
+import etcd
+
+# Helper Functions ------------------------------------------------------------
+#
+
+
+def process_args():
+ '''Process command-line arguments and return them.
+ '''
+ parser = argparse.ArgumentParser(
+ description=__doc__,
+ epilog='',
+ formatter_class=argparse.RawDescriptionHelpFormatter)
+ parser.add_argument('-v', '--verbose',
+ action='count',
+ help='Display verbose output to stderr. '
+ 'Default is no output.',
+ default=0,
+ )
+ parser.add_argument('-c', '--config',
+ action='store',
+ help='Configuration file to use. The default behavior '
+ 'is to look is the base CTDB configuration '
+ 'directory, which can be overwritten by setting '
+ 'the CTDB_BASE environment variable, for a file '
+ 'called \'etcd\'. Default value is %(default)s.',
+ default=os.path.join(os.getenv('CTDB_BASE',
+ '/usr/local/etc/ctdb'),
+ 'etcd'),
+ )
+ args = parser.parse_args()
+
+ return args
+
+
+def setup_logging(verbose):
+ '''Setup logging based on specified verbosity.
+ '''
+
+ log_levels = [logging.ERROR, logging.WARNING, logging.DEBUG]
+ logging.basicConfig(level=log_levels[min(verbose, len(log_levels)-1)])
+
+
+def sigterm_handler(signum, frame):
+ """Handler for SIGTERM signals.
+ """
+ sys.exit()
+
+
+def print_nonl(out):
+ """Dumb shortcut for printing to stdout with no newline.
+ """
+ sys.stdout.write(str(out))
+ sys.stdout.flush()
+
+
+def int_or_not(s):
+ """Try to convert input to an integer.
+ """
+ try:
+ return int(s)
+ except ValueError:
+ return s
+
+# Mainline --------------------------------------------------------------------
+#
+
+
+def main():
+ args = process_args()
+
+ setup_logging(args.verbose)
+
+ # etcd config defaults
+ etcd_config = {
+ 'port': 2379,
+ 'locks_dir': '_ctdb',
+ 'lock_ttl': 9,
+ 'lock_refresh': 2,
+ }
+ # Find and read etcd config file
+ etcd_client_params = (
+ 'host',
+ 'port',
+ 'srv_domain',
+ 'version_prefix',
+ 'read_timeout',
+ 'allow_redirect',
+ 'protocol',
+ 'cert',
+ 'ca_cert',
+ 'username',
+ 'password',
+ 'allow_reconnect',
+ 'use_proxies',
+ 'expected_cluster_id',
+ 'per_host_pool_size',
+ )
+ if os.path.isfile(args.config):
+ f = open(args.config, 'r')
+ for line in f:
+ (key, value) = line.split("=", 1)
+ etcd_config[key.strip()] = int_or_not(value.strip())
+
+ # Minor hack: call out to shell to retrieve CTDB netbios name and PNN.
+ tmp = subprocess.Popen("testparm -s --parameter-name 'netbios name'; \
+ ctdb pnn",
+ shell=True,
+ universal_newlines=True,
+ stdout=subprocess.PIPE
+ ).stdout.read().strip()
+ nb_name, pnn = tmp.split()
+
+ # Try to get and hold the lock
+ try:
+ client = etcd.Client(
+ **{k: etcd_config[k] for k in
+ set(etcd_client_params).intersection(etcd_config)})
+ lock = etcd.Lock(client, etcd_config['locks_dir'] + "/" + nb_name)
+ lock._uuid = lock._uuid + "_" + pnn
+ logging.debug("Updated lock UUID: %s", lock.uuid)
+ ppid = os.getppid()
+ while True:
+ lock.acquire(blocking=False, lock_ttl=etcd_config['lock_ttl'])
+ if lock.is_acquired:
+ print_nonl(0)
+ else:
+ locks = "No locks found."
+ if logging.getLogger().getEffectiveLevel() == logging.DEBUG:
+ keys = client.read(lock.path, recursive=True)
+ if keys is not None:
+ locks = "Existing locks:\n "
+ locks += '\n '.join(
+ (child.key + ": " + child.value for child in
+ keys.children))
+ logging.debug("Lock contention. %s", locks)
+ print_nonl(1)
+ break
+ os.kill(ppid, 0)
+ time.sleep(etcd_config['lock_refresh'])
+ except (OSError, SystemExit):
+ if lock is not None and lock.is_acquired:
+ lock.release()
+ except Exception:
+ print_nonl(3)
+ if logging.getLogger().getEffectiveLevel() == logging.DEBUG:
+ raise
+
+
+if __name__ == "__main__":
+ signal.signal(signal.SIGTERM, sigterm_handler)
+
+ main()
diff --git a/ctdb/utils/nagios/README b/ctdb/utils/nagios/README
new file mode 100644
index 0000000..99fa6dc
--- /dev/null
+++ b/ctdb/utils/nagios/README
@@ -0,0 +1,56 @@
+check_ctdb 0.3
+
+This nagios plugin is free software, and comes with ABSOLUTELY NO WARRANTY.
+It may be used, redistributed and/or modified under the terms of the GNU
+General Public Licence (see http://www.fsf.org/licensing/licenses/gpl.txt).
+
+CTDB plugin
+
+Usage: check_ctdb -i <info>
+ [ -t <timeout> ] [ -w <warn_range> ] [ -c <crit_range> ]
+ [ -H <host> ] [-s] [ -l <login_name> ]
+ [ -V ] [ -h ]
+
+ -?, --usage
+ Print usage information
+ -h, --help
+ Print detailed help screen
+ -V, --version
+ Print version information
+ --extra-opts=[section][@file]
+ Read options from an ini file. See http://nagiosplugins.org/extra-opts for usage
+ -i, --info=<info>
+ Information: One of scriptstatus or ping.
+ -H, --hostname=<login_name>
+ Host name or IP Address.
+ -s, --sudo
+ Use sudo.
+ -l, --login=<host>
+ The user to log in as on the remote machine.
+ -w, --warning=THRESHOLD
+ Warning threshold. See
+ http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT
+ for the threshold format.
+ -c, --critical=THRESHOLD
+ Critical threshold. See
+ http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT
+ for the threshold format.
+ -t, --timeout=INTEGER
+ Seconds before plugin times out (default: 30)
+ -v, --verbose
+ Show details for command-line debugging (can repeat up to 3 times)
+Supported commands:
+ * scriptstatus :
+ check the ctdb scriptstatus command and return CRITICAL if one of the
+ scripts fails.
+ Perfdata count the number of scripts by state (ok, disabled, error,
+ total).
+ * ping :
+ check the ctdb ping command.
+ Perfdata count the number of nodes, the total ping time and the number
+ of clients.
+ Thresholds are checked against the number of nodes.
+
+
+Copyright (c) 2011 Nantes Metropole
+
diff --git a/ctdb/utils/nagios/check_ctdb b/ctdb/utils/nagios/check_ctdb
new file mode 100755
index 0000000..7803f9a
--- /dev/null
+++ b/ctdb/utils/nagios/check_ctdb
@@ -0,0 +1,279 @@
+#!/usr/bin/perl -w
+# Nagios plugin to monitor CTDB (Clustered Trivial Database)
+#
+# License: GPL
+# Copyright (c) 2011 Nantes Metropole
+# Author: Mathieu Parent <math.parent@gmail.com>
+# Contributor(s): -
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+use strict;
+use warnings;
+use vars qw($PROGNAME $VERSION $output $values $result);
+use Nagios::Plugin;
+use File::Basename;
+
+$PROGNAME = basename($0);
+$VERSION = '0.4';
+
+my $np = Nagios::Plugin->new(
+ usage => "Usage: %s -i <info>\n"
+ . " [ -t <timeout> ] [ -w <warn_range> ] [ -c <crit_range> ]\n"
+ . " [ -H <host> ] [-s] [ -l <login_name> ]\n"
+ . ' [ -V ] [ -h ]',
+ version => $VERSION,
+ plugin => $PROGNAME,
+ shortname => uc($PROGNAME),
+ blurb => 'CTDB plugin',
+ extra => "Supported commands:\n"
+ . " * scriptstatus :\n"
+ . " check the ctdb scriptstatus command and return CRITICAL if one of the\n"
+ . " scripts fails.\n"
+ . " Perfdata count the number of scripts by state (ok, disabled, error,\n"
+ . " total).\n"
+ . " * ping :\n"
+ . " check the ctdb ping command.\n"
+ . " Perfdata count the number of nodes, the total ping time and the number\n"
+ . " of clients.\n"
+ . " Thresholds are checked against the number of nodes.\n"
+ . "\n\nCopyright (c) 2011 Nantes Metropole",
+ timeout => 30,
+);
+
+$np->add_arg(
+ spec => 'info|i=s',
+ help => "-i, --info=<info>\n"
+ . ' Information: One of scriptstatus or ping.',
+ required => 1,
+);
+
+$np->add_arg(
+ spec => 'hostname|H=s',
+ help => "-H, --hostname=<login_name>\n"
+ . ' Host name or IP Address.',
+ required => 0,
+);
+
+$np->add_arg(
+ spec => 'sudo|s',
+ help => "-s, --sudo\n"
+ . ' Use sudo.',
+ required => 0,
+);
+
+$np->add_arg(
+ spec => 'login|l=s',
+ help => "-l, --login=<host>\n"
+ . ' The user to log in as on the remote machine.',
+ required => 0,
+);
+
+$np->add_arg(
+ spec => 'warning|w=s',
+ help => "-w, --warning=THRESHOLD\n"
+ . " Warning threshold. See\n"
+ . " http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT\n"
+ . ' for the threshold format.',
+ required => 0,
+);
+
+$np->add_arg(
+ spec => 'critical|c=s',
+ help => "-c, --critical=THRESHOLD\n"
+ . " Critical threshold. See\n"
+ . " http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT\n"
+ . ' for the threshold format.',
+ required => 0,
+);
+
+$np->getopts;
+
+my $info = $np->opts->info;
+my $hostname = $np->opts->hostname;
+my $login = $np->opts->login;
+my $sudo = $np->opts->sudo;
+my $warning = $np->opts->warning;
+my $critical = $np->opts->critical;
+my $percw;
+my $percc;
+
+$output = "";
+
+if (defined($critical))
+{
+ ($percc, $critical) = check_percantage($critical);
+ $critical = undef if ($critical eq '');
+}
+
+if (defined($warning))
+{
+ ($percw, $warning) = check_percantage($warning);
+ $warning = undef if ($warning eq '');
+}
+
+$np->set_thresholds(critical => $critical, warning => $warning);
+
+my $stderr;
+
+sub safe_open_command {
+ unshift @_, "sudo" if $sudo;
+ if ($hostname) {
+ unshift @_, $hostname;
+ unshift @_, "-l", $login if $login;
+ unshift @_, "ssh";
+ }
+ open(OLDERR, ">&", \*STDERR) or die "Can't dup STDERR: $!";
+ $stderr = "";
+ close STDERR;
+ open(STDERR, ">>", \$stderr) or die "Can't open STDERR: $!";
+ if ($np->opts->verbose) {
+ print "Executing: @_\n";
+ }
+ if (!open(PIPE, '-|', @_)) {
+ $result = CRITICAL;
+ $output .= "Cannot open command '@_': $! ($stderr). ";
+ # restore STDERR
+ open(STDERR, ">", \*OLDERR) or die "Can't dup OLDERR: $!";
+ }
+}
+
+sub safe_close_command {
+ close(PIPE);
+
+ if ($? == -1) {
+ $result = CRITICAL;
+ $output .= "failed to execute: $!. ";
+ } elsif ($? & 127) {
+ $result = CRITICAL;
+ $output .= sprintf("child died with signal %d, %s coredump. ",
+ ($? & 127), ($? & 128) ? 'with' : 'without');
+ } elsif ($? >> 8) {
+ if (($? >> 8) == 255) {
+ # ctdb returns -1=255 if any node is disconnected
+ $result = WARNING;
+ $output .= sprintf("child exited with value %d. ", $? >> 8) if $output eq "";
+ } else {
+ $result = CRITICAL;
+ $output .= sprintf("child exited with value %d. ", $? >> 8);
+ }
+ }
+ # restore STDERR
+ open(STDERR, ">&OLDERR") or die "Can't dup OLDERR: $!";
+}
+
+# main :
+
+if ($info eq "scriptstatus") {
+ $result = OK;
+ safe_open_command('ctdb', '-X', 'scriptstatus');
+ if ($result == OK) {
+ my $script_count = 0;
+ my $ok_script_count = 0;
+ my $disabled_script_count = 0;
+ my $error_script_count = 0;
+ while (<PIPE>) {
+ next if $. == 1; # Header
+ $script_count++;
+ chop;
+ my ($col0, $type, $name, $code, $status, $start, $end, @error) = split("|");
+ if ($col0 ne '') {
+ # Old version, before 30 Aug 2011 and commit a779d83a6213
+ ($type, $name, $code, $status, $start, $end, @error) = ($col0, $type, $name, $code, $status, $start, $end, @error);
+ }
+ my $error = join(':', @error);
+ if ($error ne "") {
+ $output = "$output ;; " if $output;
+ $output = "$output$name ($status=$code): $error ";
+ if ($result != CRITICAL) {
+ $result = WARNING;
+ }
+ }
+ if ($status eq "OK") {
+ $ok_script_count++;
+ next;
+ }
+ if ($status eq "DISABLED") {
+ $disabled_script_count++;
+ next;
+ }
+ $error_script_count++;
+ $result = WARNING;
+ }
+ safe_close_command();
+ $np->add_perfdata(label => "ok", value => $ok_script_count, uom => '',
+ min => 0, max => $script_count);
+ $np->add_perfdata(label => "disabled", value => $disabled_script_count, uom => '',
+ min => 0, max => $script_count);
+ $np->add_perfdata(label => "error", value => $error_script_count, uom => '',
+ min => 0, max => $script_count, warning => '0', critical => '0');
+ $np->add_perfdata(label => "total", value => $script_count, uom => '',
+ min => 0, max => $script_count);
+ if ($result == OK) {
+ $result = $np->check_threshold(check => $error_script_count, warning => '0', critical => '0');
+ }
+ }
+ $np->nagios_exit($result, $output);
+} elsif ($info eq "ping") {
+ # Get expected nodes count
+ $result = OK;
+ safe_open_command('cat', '/etc/ctdb/nodes');
+ 1 while( <PIPE> );
+ my $max_nodes_count = $.;
+ safe_close_command();
+ # ctdb ping
+ $result = OK;
+ safe_open_command('ctdb', '-n', 'all', 'ping');
+ if ($result == OK) {
+ my $nodes_count = 0;
+ my $time_total = 0.0;
+ my $clients_count = 0;
+ while (<PIPE>) {
+ chop;
+ if ($_ =~ /^response from (\d+) time=([0-9.]+) sec \((\d+) clients\)$/) {
+ my ($node_id, $time, $clients) = ($1,$2,$3);
+ $nodes_count += 1;
+ $time_total += $time;
+ $clients_count += $clients;
+ } elsif ($_ =~ /^Unable to get ping response from node (\d+)$/) {
+ #
+ } else {
+ $result = CRITICAL;
+ $output .= "'$_' doesn't match regexp. "
+ }
+ }
+ $output .= sprintf("%d missing nodes. ", $max_nodes_count - $nodes_count) if $nodes_count < $max_nodes_count;
+ safe_close_command();
+ $np->add_perfdata(label => "nodes", value => $nodes_count, uom => '',
+ min => 0, max => $max_nodes_count, warning => $warning, critical => $critical);
+ $np->add_perfdata(label => "ping_time", value => $time_total, uom => 's',
+ min => 0, max => undef);
+ $np->add_perfdata(label => "clients", value => $clients_count, uom => '',
+ min => 0, max => undef);
+ if ($result == OK) {
+ $result = $np->check_threshold(check => $nodes_count);
+ }
+ }
+ $np->nagios_exit($result, $output);
+} else {
+ $np->nagios_exit(UNKNOWN, "Unknown command: '$info'");
+}
+
+sub check_percantage
+{
+ my ($number) = shift(@_);
+ my $perc = $number =~ s/\%//;
+ return ($perc, $number);
+}
+
diff --git a/ctdb/utils/ping_pong/ping_pong.c b/ctdb/utils/ping_pong/ping_pong.c
new file mode 100644
index 0000000..3d28f34
--- /dev/null
+++ b/ctdb/utils/ping_pong/ping_pong.c
@@ -0,0 +1,303 @@
+/*
+ A ping-pong fcntl byte range lock test
+
+ Copyright (C) Andrew Tridgell 2002
+ Copyright (C) Michael Adam 2012
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+ This measures the ping-pong byte range lock latency. It is
+ especially useful on a cluster of nodes sharing a common lock
+ manager as it will give some indication of the lock managers
+ performance under stress.
+
+ tridge@samba.org, February 2002
+
+*/
+
+#define _XOPEN_SOURCE 500
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <time.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <stdbool.h>
+
+static struct timeval tp1,tp2;
+
+static int do_reads, do_writes, use_mmap, do_check, do_brl_test;
+
+static void start_timer(void)
+{
+ gettimeofday(&tp1,NULL);
+}
+
+static double end_timer(void)
+{
+ gettimeofday(&tp2,NULL);
+ return (tp2.tv_sec + (tp2.tv_usec*1.0e-6)) -
+ (tp1.tv_sec + (tp1.tv_usec*1.0e-6));
+}
+
+/* lock a byte range in a open file */
+static int lock_range(int fd, int offset, int len, bool wait)
+{
+ struct flock lock;
+
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = offset;
+ lock.l_len = len;
+ lock.l_pid = 0;
+
+ return fcntl(fd, wait ? F_SETLKW : F_SETLK, &lock);
+}
+
+/* check whether we could place a lock */
+static int check_lock(int fd, int offset, int len)
+{
+ struct flock lock;
+ int ret;
+
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = offset;
+ lock.l_len = len;
+ lock.l_pid = 0;
+
+ ret = fcntl(fd, F_GETLK, &lock);
+ if (ret != 0) {
+ printf("error calling fcntl F_GETLCK: %s\n", strerror(errno));
+ return -1;
+ }
+
+ if (lock.l_type == F_UNLCK) {
+ /* we would be able to place the lock */
+ return 0;
+ }
+
+ /* we would not be able to place lock */
+ printf("check_lock failed: lock held: "
+ "pid='%d', type='%d', start='%d', len='%d'\n",
+ (int)lock.l_pid, (int)lock.l_type, (int)lock.l_start, (int)lock.l_len);
+ return 1;
+}
+
+/* unlock a byte range in a open file */
+static int unlock_range(int fd, int offset, int len)
+{
+ struct flock lock;
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = offset;
+ lock.l_len = len;
+ lock.l_pid = 0;
+
+ return fcntl(fd,F_SETLKW,&lock);
+}
+
+/* run the ping pong test on fd */
+static void ping_pong(int fd, int num_locks)
+{
+ unsigned count = 0;
+ int i=0, loops=0;
+ unsigned char *val;
+ unsigned char incr=0, last_incr=0;
+ unsigned char *p = NULL;
+ int ret;
+
+ ret = ftruncate(fd, num_locks+1);
+ if (ret == -1) {
+ printf("ftruncate failed: %s\n", strerror(errno));
+ return;
+ }
+
+ if (use_mmap) {
+ p = mmap(NULL, num_locks+1, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+ if (p == MAP_FAILED) {
+ printf("mmap failed: %s\n", strerror(errno));
+ return;
+ }
+ }
+
+ val = (unsigned char *)calloc(num_locks+1, sizeof(unsigned char));
+ if (val == NULL) {
+ printf("calloc failed\n");
+ if (use_mmap) {
+ munmap(p, num_locks+1);
+ }
+ return;
+ }
+
+ start_timer();
+
+ ret = lock_range(fd, 0, 1, true);
+ if (ret != 0) {
+ printf("initial lock at 0 failed! - %s\n", strerror(errno));
+ goto done;
+ }
+
+ i = 0;
+
+ while (1) {
+ if (lock_range(fd, (i+1) % num_locks, 1, true) != 0) {
+ printf("lock at %d failed! - %s\n",
+ (i+1) % num_locks, strerror(errno));
+ }
+ if (do_check) {
+ ret = check_lock(fd, i, 1);
+ if (ret != 0) {
+ goto done;
+ }
+ }
+ if (do_reads) {
+ unsigned char c;
+ if (use_mmap) {
+ c = p[i];
+ } else if (pread(fd, &c, 1, i) != 1) {
+ printf("read failed at %d\n", i);
+ }
+ incr = c - val[i];
+ val[i] = c;
+ }
+ if (do_writes) {
+ char c = val[i] + 1;
+ if (use_mmap) {
+ p[i] = c;
+ } else if (pwrite(fd, &c, 1, i) != 1) {
+ printf("write failed at %d\n", i);
+ }
+ }
+ if (unlock_range(fd, i, 1) != 0) {
+ printf("unlock at %d failed! - %s\n",
+ i, strerror(errno));
+ }
+ i = (i+1) % num_locks;
+ count++;
+ if (loops > num_locks && incr != last_incr) {
+ last_incr = incr;
+ printf("data increment = %u\n", incr);
+ fflush(stdout);
+ }
+ if (end_timer() > 1.0) {
+ printf("%8u locks/sec\r",
+ (unsigned)(2*count/end_timer()));
+ fflush(stdout);
+ start_timer();
+ count=0;
+ }
+ loops++;
+ }
+
+done:
+ if (use_mmap) {
+ munmap(p, num_locks+1);
+ }
+ free(val);
+}
+
+static void usage(void)
+{
+ printf("ping_pong -rwmc <file> <num_locks>\n");
+ printf("ping_pong -l <file>\n\n");
+ printf("Options\n");
+ printf(" -r do reads\n");
+ printf(" -w do writes\n");
+ printf(" -m use mmap\n");
+ printf(" -c check locks\n");
+ printf(" -l test for working byte range locks\n");
+}
+
+int main(int argc, char *argv[])
+{
+ char *fname;
+ int fd, num_locks;
+ int c;
+
+ while ((c = getopt(argc, argv, "rwmcl")) != -1) {
+ switch (c){
+ case 'w':
+ do_writes = 1;
+ break;
+ case 'r':
+ do_reads = 1;
+ break;
+ case 'm':
+ use_mmap = 1;
+ break;
+ case 'c':
+ do_check = 1;
+ break;
+ case 'l':
+ do_brl_test = 1;
+ break;
+ default:
+ fprintf(stderr, "Unknown option '%c'\n", c);
+ exit(1);
+ }
+ }
+
+ argv += optind;
+ argc -= optind;
+
+ if (argc < 1) {
+ usage();
+ exit(1);
+ }
+
+ fname = argv[0];
+
+ fd = open(fname, O_CREAT|O_RDWR, 0600);
+ if (fd == -1) {
+ exit(1);
+ }
+
+ if (do_brl_test) {
+ if (lock_range(fd, 0, 0, false) != 0) {
+ printf("file already locked, calling check_lock to tell us who has it locked:\n");
+ (void)check_lock(fd, 0, 0);
+ printf("Working POSIX byte range locks\n");
+ exit(0);
+ }
+
+ printf("Holding lock, press any key to continue...\n");
+ printf("You should run the same command on another node now.\n");
+ (void)getchar();
+ printf("Good bye.\n");
+ exit(0);
+ }
+
+ if (argc < 2) {
+ usage();
+ exit(1);
+ }
+
+ num_locks = atoi(argv[1]);
+ if (num_locks <= 0) {
+ printf("num_locks should be > 0\n");
+ exit(1);
+ }
+
+ ping_pong(fd, num_locks);
+
+ return 0;
+}
diff --git a/ctdb/utils/pmda/Install b/ctdb/utils/pmda/Install
new file mode 100644
index 0000000..a56a635
--- /dev/null
+++ b/ctdb/utils/pmda/Install
@@ -0,0 +1,36 @@
+#! /bin/sh
+#
+# Copyright (c) 1997 Silicon Graphics, Inc. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# Install the ctdb PMDA and/or PMNS
+#
+
+. $PCP_DIR/etc/pcp.env
+. $PCP_SHARE_DIR/lib/pmdaproc.sh
+
+iam=ctdb
+pmda_interface=2
+
+# runs as daemon and only supports pipe IPC
+daemon_opt=true
+dso_opt=false
+pipe_opt=true
+socket_opt=false
+
+pmdaSetup
+pmdaInstall
+exit 0
diff --git a/ctdb/utils/pmda/README b/ctdb/utils/pmda/README
new file mode 100644
index 0000000..f8dbbbc
--- /dev/null
+++ b/ctdb/utils/pmda/README
@@ -0,0 +1,84 @@
+CTDB PMDA
+===========
+
+This PMDA extracts metrics from the locally running ctdbd daemon for
+export to PMCD.
+
+Note:
+ This PMDA may be remade from source and hence requires IDO (or
+ more specifically a C compiler) to be installed.
+
+ Uses of make(1) may fail (without removing or clobbering files)
+ if the C compiler cannot be found. This is most likely to
+ happen when running the PMDA ./Install script.
+
+ The only remedial action is to install the C compiler, or
+ hand-craft changes to the Makefile.
+
+Metrics
+=======
+
+The file ./help contains descriptions for all of the metrics exported
+by this PMDA.
+
+Once the PMDA has been installed, the following command will list all
+the available metrics and their explanatory "help" text:
+
+ $ pminfo -fT ctdb
+
+Installation
+============
+
+ + # cd $PCP_PMDAS_DIR/ctdb
+
+ + Check that there is no clash in the Performance Metrics Domain
+ defined in ./domain.h and the other PMDAs currently in use (see
+ $PCP_PMCDCONF_PATH). If there is, edit ./domain.h to choose another
+ domain number.
+
+ + Then simply use
+
+ # ./Install
+
+ and choose both the "collector" and "monitor" installation
+ configuration options.
+
+ You will be prompted to choose either a daemon implementation
+ or a DSO implementation of the PMDA, and in the case of the daemon
+ variant to select an IPC method -- everything else is automated
+
+De-installation
+===============
+
+ + Simply use
+
+ # cd $PCP_PMDAS_DIR/ctdb
+ # ./Remove
+
+Troubleshooting
+===============
+
+ + After installing or restarting the agent, the PMCD log file
+ ($PCP_LOG_DIR/pmcd/pmcd.log) and the PMDA log file
+ ($PCP_LOG_DIR/pmcd/pmda_ctdb.log) should be checked for any warnings
+ or errors.
+
+
+Adding a New Metric
+===================
+
+This section walks through the development task of adding a new metric to the
+CTDB PMDA.
+
+ + Define the metric in the pmns file with a unique metric id. See the pmns(4)
+ man page for details.
+
+ + Add a description of the metric to the help file.
+
+ + Taking note of the previously assigned metric id, add a new entry to the
+ metrictab structure in pmda_ctdb.c. See the pmdaInit(3) man page for
+ details.
+
+ + Ensure the counter is already a member of the ctdb_statistics structure.
+ Finally, add code to pmda_ctdb_fetch_cb() to handle fetch requests for the
+ newly defined metric.
diff --git a/ctdb/utils/pmda/Remove b/ctdb/utils/pmda/Remove
new file mode 100644
index 0000000..7d1c509
--- /dev/null
+++ b/ctdb/utils/pmda/Remove
@@ -0,0 +1,29 @@
+#! /bin/sh
+#
+# Copyright (c) 1997 Silicon Graphics, Inc. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# Remove the ctdb PMDA
+#
+
+. $PCP_DIR/etc/pcp.env
+. $PCP_SHARE_DIR/lib/pmdaproc.sh
+
+iam=ctdb
+
+pmdaSetup
+pmdaRemove
+exit 0
diff --git a/ctdb/utils/pmda/domain.h b/ctdb/utils/pmda/domain.h
new file mode 100644
index 0000000..0bed7fe
--- /dev/null
+++ b/ctdb/utils/pmda/domain.h
@@ -0,0 +1,19 @@
+/* domain.h
+ *
+ * Copyright (c) 2004-2009 Silicon Graphics, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#define CTDB 110
diff --git a/ctdb/utils/pmda/help b/ctdb/utils/pmda/help
new file mode 100644
index 0000000..0e9984e
--- /dev/null
+++ b/ctdb/utils/pmda/help
@@ -0,0 +1,106 @@
+#
+# Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# ctdb PMDA help file in the ASCII format
+#
+# lines beginning with a # are ignored
+# lines beginning @ introduce a new entry of the form
+# @ metric_name oneline-text
+# help test goes
+# here over multiple lines
+# ...
+#
+# the metric_name is decoded against the default PMNS -- as a special case,
+# a name of the form NNN.MM (for numeric NNN and MM) is interpreted as an
+# instance domain identification, and the text describes the instance domain
+#
+# blank lines before the @ line are ignored
+#
+
+@ ctdb.num_clients number of clients connected to ctdbd
+
+@ ctdb.frozen whether any databases are frozen
+
+@ ctdb.recovering whether recovery is active
+
+@ ctdb.client_packets_sent number of packets sent to all clients
+
+@ ctdb.client_packets_recv number of packets received from all clients
+
+@ ctdb.node_packets_sent number of packets sent to other nodes
+
+@ ctdb.node_packets_recv number of packets received from other nodes
+
+@ ctdb.keepalive_packets_sent number of keepalive packets sent to other nodes
+
+@ ctdb.keepalive_packets_recv number of keepalive packets received from other nodes
+
+@ ctdb.node.req_call number of node CTDB_REQ_CALL packets handled
+
+@ ctdb.node.reply_call number of node CTDB_REPLY_CALL packets handled
+
+@ ctdb.node.req_dmaster number of node CTDB_REQ_DMASTER packets handled
+
+@ ctdb.node.reply_dmaster number of node CTDB_REPLY_DMASTER packets handled
+
+@ ctdb.node.reply_error number of node CTDB_REPLY_ERROR packets handled
+
+@ ctdb.node.req_message number of node CTDB_REQ_MESSAGE packets handled
+
+@ ctdb.node.req_control number of node CTDB_REQ_CONTROL packets handled
+
+@ ctdb.node.reply_control number of node CTDB_REPLY_CONTROL packets handled
+
+@ ctdb.client.req_call number of client CTDB_REQ_CALL packets handled
+
+@ ctdb.client.req_message number of client CTDB_REQ_MESSAGE packets handled
+
+@ ctdb.client.req_control number of client CTDB_REQ_CONTROL packets handled
+
+@ ctdb.timeouts.call (counter not implemented) number of call timeouts
+
+@ ctdb.timeouts.control number of node control message request timeouts awaiting reply
+
+@ ctdb.timeouts.traverse number of database traversal timeouts
+
+@ ctdb.total_calls total number of client ctdb request calls received
+
+@ ctdb.pending_calls total number of client ctdb request calls in progress
+
+@ ctdb.lockwait_calls number of tdb chainlock lockwait calls
+
+@ ctdb.pending_lockwait_calls number of lockwait calls waiting for a lock
+
+@ ctdb.childwrite_calls number of childwrite calls
+
+@ ctdb.pending_childwrite_calls number of childwrite calls in progress
+
+@ ctdb.memory_used total size of the ctdbd null talloc pool
+
+@ ctdb.max_hop_count maximum hops performed by a CTDB_REQ_CALL packet
+
+@ ctdb.max_reclock_ctdbd maximum recovery lock latency during setrecmode
+
+@ ctdb.max_reclock_recd maximum recovery lock latency as reported by the recovery process
+
+@ ctdb.max_call_latency maximum time spent handling a client request call
+
+@ ctdb.max_lockwait_latency maximum time spent waiting for a tdb chainlock
+
+@ ctdb.max_childwrite_latency maximum time spent performing a childwrite
+
+@ ctdb.num_recoveries number of recoveries finished
diff --git a/ctdb/utils/pmda/pmda_ctdb.c b/ctdb/utils/pmda/pmda_ctdb.c
new file mode 100644
index 0000000..4f7933d
--- /dev/null
+++ b/ctdb/utils/pmda/pmda_ctdb.c
@@ -0,0 +1,559 @@
+/*
+ * CTDB Performance Metrics Domain Agent (PMDA) for Performance Co-Pilot (PCP)
+ *
+ * Copyright (c) 1995,2004 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) 2011 David Disseldorp
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <tevent.h>
+#include <tdb.h>
+
+#include "lib/util/time.h"
+#include "lib/util/blocking.h"
+
+#include "client/client.h"
+#include "client/client_sync.h"
+
+#include <pcp/pmapi.h>
+#include <pcp/pmda.h>
+
+#ifdef HAVE___PMID_INT
+#include <pcp/impl.h>
+
+#define pmID_cluster(id) id->cluster
+#define pmID_item(id) id->item
+#define pmGetProgname() pmProgname
+#define pmSetProgname(a) __pmSetProgname(a)
+#endif
+
+#include "domain.h"
+
+/*
+ * CTDB PMDA
+ *
+ * This PMDA connects to the locally running ctdbd daemon and pulls
+ * statistics for export via PCP. The ctdbd Unix domain socket path can be
+ * specified with the CTDB_SOCKET environment variable, otherwise the default
+ * path is used.
+ */
+
+/*
+ * All metrics supported in this PMDA - one table entry for each.
+ * The 4th field specifies the serial number of the instance domain
+ * for the metric, and must be either PM_INDOM_NULL (denoting a
+ * metric that only ever has a single value), or the serial number
+ * of one of the instance domains declared in the instance domain table
+ * (i.e. in indomtab, above).
+ */
+static pmdaMetric metrictab[] = {
+ /* num_clients */
+ { NULL, { PMDA_PMID(0,0), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0) }, },
+ /* frozen */
+ { NULL, { PMDA_PMID(0,1), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0) }, },
+ /* recovering */
+ { NULL, { PMDA_PMID(0,2), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0) }, },
+ /* client_packets_sent */
+ { NULL, { PMDA_PMID(0,3), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* client_packets_recv */
+ { NULL, { PMDA_PMID(0,4), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* node_packets_sent */
+ { NULL, { PMDA_PMID(0,5), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* node_packets_recv */
+ { NULL, { PMDA_PMID(0,6), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* keepalive_packets_sent */
+ { NULL, { PMDA_PMID(0,7), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* keepalive_packets_recv */
+ { NULL, { PMDA_PMID(0,8), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* req_call */
+ { NULL, { PMDA_PMID(1,0), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* reply_call */
+ { NULL, { PMDA_PMID(1,1), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* req_dmaster */
+ { NULL, { PMDA_PMID(1,2), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* reply_dmaster */
+ { NULL, { PMDA_PMID(1,3), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* reply_error */
+ { NULL, { PMDA_PMID(1,4), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* req_message */
+ { NULL, { PMDA_PMID(1,5), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* req_control */
+ { NULL, { PMDA_PMID(1,6), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* reply_control */
+ { NULL, { PMDA_PMID(1,7), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* req_call */
+ { NULL, { PMDA_PMID(2,0), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* req_message */
+ { NULL, { PMDA_PMID(2,1), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* req_control */
+ { NULL, { PMDA_PMID(2,2), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* call */
+ { NULL, { PMDA_PMID(3,0), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,0) }, },
+ /* control */
+ { NULL, { PMDA_PMID(3,1), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,0) }, },
+ /* traverse */
+ { NULL, { PMDA_PMID(3,2), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,0) }, },
+ /* total_calls */
+ { NULL, { PMDA_PMID(0,9), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* pending_calls */
+ { NULL, { PMDA_PMID(0,10), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0) }, },
+ /* locks.num_calls */
+ { NULL, { PMDA_PMID(0,11), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* locks.num_pending */
+ { NULL, { PMDA_PMID(0,12), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0) }, },
+ /* childwrite_calls */
+ { NULL, { PMDA_PMID(0,13), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* pending_childwrite_calls */
+ { NULL, { PMDA_PMID(0,14), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0) }, },
+ /* memory_used */
+ { NULL, { PMDA_PMID(0,15), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(1,0,0,PM_SPACE_BYTE,0,0) }, },
+ /* max_hop_count */
+ { NULL, { PMDA_PMID(0,16), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0) }, },
+ /* reclock.ctdbd.max */
+ { NULL, { PMDA_PMID(0,17), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
+ /* reclock.recd.max */
+ { NULL, { PMDA_PMID(0,18), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
+ /* call_latency.max */
+ { NULL, { PMDA_PMID(0,19), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
+ /* locks.latency.max */
+ { NULL, { PMDA_PMID(0,20), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
+ /* childwrite_latency.max */
+ { NULL, { PMDA_PMID(0,21), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
+ /* num_recoveries */
+ { NULL, { PMDA_PMID(0,22), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0) }, },
+};
+
+static struct tevent_context *ev;
+static struct ctdb_client_context *client;
+static struct ctdb_statistics *stats;
+
+static void
+pmda_ctdb_disconnected(void *args)
+{
+ fprintf(stderr, "ctdbd unreachable\n");
+ TALLOC_FREE(client);
+}
+
+
+static int
+pmda_ctdb_daemon_connect(void)
+{
+ const char *socket_name;
+ int ret;
+
+ ev = tevent_context_init(NULL);
+ if (ev == NULL) {
+ fprintf(stderr, "Failed to init event ctx\n");
+ return -1;
+ }
+
+ socket_name = getenv("CTDB_SOCKET");
+ if (socket_name == NULL) {
+ socket_name = CTDB_SOCKET;
+ }
+
+ ret = ctdb_client_init(ev, ev, socket_name, &client);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to connect to ctdb daemon via %s\n",
+ socket_name);
+ goto err_ev;
+ }
+
+ ctdb_client_set_disconnect_callback(client, pmda_ctdb_disconnected,
+ NULL);
+
+ return 0;
+
+err_ev:
+ talloc_free(ev);
+ client = NULL;
+ return -1;
+}
+
+static void
+pmda_ctdb_daemon_disconnect(void)
+{
+ TALLOC_FREE(client);
+ talloc_free(ev);
+}
+
+static int
+fill_base(unsigned int item, pmAtomValue *atom)
+{
+ switch (item) {
+ case 0:
+ atom->ul = stats->num_clients;
+ break;
+ case 1:
+ atom->ul = stats->frozen;
+ break;
+ case 2:
+ atom->ul = stats->recovering;
+ break;
+ case 3:
+ atom->ul = stats->client_packets_sent;
+ break;
+ case 4:
+ atom->ul = stats->client_packets_recv;
+ break;
+ case 5:
+ atom->ul = stats->node_packets_sent;
+ break;
+ case 6:
+ atom->ul = stats->node_packets_recv;
+ break;
+ case 7:
+ atom->ul = stats->keepalive_packets_sent;
+ break;
+ case 8:
+ atom->ul = stats->keepalive_packets_recv;
+ break;
+ case 9:
+ atom->ul = stats->total_calls;
+ break;
+ case 10:
+ atom->ul = stats->pending_calls;
+ break;
+ case 11:
+ atom->ul = stats->locks.num_calls;
+ break;
+ case 12:
+ atom->ul = stats->locks.num_pending;
+ break;
+ case 13:
+ atom->ul = stats->childwrite_calls;
+ break;
+ case 14:
+ atom->ul = stats->pending_childwrite_calls;
+ break;
+ case 15:
+ atom->ul = stats->memory_used;
+ break;
+ case 16:
+ atom->ul = stats->max_hop_count;
+ break;
+ case 17:
+ atom->d = stats->reclock.ctdbd.max;
+ break;
+ case 18:
+ atom->d = stats->reclock.recd.max;
+ break;
+ case 19:
+ atom->d = stats->call_latency.max;
+ break;
+ case 20:
+ atom->d = stats->locks.latency.max;
+ break;
+ case 21:
+ atom->d = stats->childwrite_latency.max;
+ break;
+ case 22:
+ atom->ul = stats->num_recoveries;
+ break;
+ default:
+ return PM_ERR_PMID;
+ }
+
+ return 0;
+}
+
+static int
+fill_node(unsigned int item, pmAtomValue *atom)
+{
+ switch (item) {
+ case 0:
+ atom->ul = stats->node.req_call;
+ break;
+ case 1:
+ atom->ul = stats->node.reply_call;
+ break;
+ case 2:
+ atom->ul = stats->node.req_dmaster;
+ break;
+ case 3:
+ atom->ul = stats->node.reply_dmaster;
+ break;
+ case 4:
+ atom->ul = stats->node.reply_error;
+ break;
+ case 5:
+ atom->ul = stats->node.req_message;
+ break;
+ case 6:
+ atom->ul = stats->node.req_control;
+ break;
+ case 7:
+ atom->ul = stats->node.reply_control;
+ break;
+ default:
+ return PM_ERR_PMID;
+ }
+
+ return 0;
+}
+
+
+static int
+fill_client(unsigned int item, pmAtomValue *atom)
+{
+ switch (item) {
+ case 0:
+ atom->ul = stats->client.req_call;
+ break;
+ case 1:
+ atom->ul = stats->client.req_message;
+ break;
+ case 2:
+ atom->ul = stats->client.req_control;
+ break;
+ default:
+ return PM_ERR_PMID;
+ }
+
+ return 0;
+}
+
+static int
+fill_timeout(unsigned int item, pmAtomValue *atom)
+{
+ switch (item) {
+ case 0:
+ atom->ul = stats->timeouts.call;
+ break;
+ case 1:
+ atom->ul = stats->timeouts.control;
+ break;
+ case 2:
+ atom->ul = stats->timeouts.traverse;
+ break;
+ default:
+ return PM_ERR_PMID;
+ }
+
+ return 0;
+}
+
+/*
+ * callback provided to pmdaFetch
+ */
+static int
+pmda_ctdb_fetch_cb(pmdaMetric *mdesc, unsigned int inst, pmAtomValue *atom)
+{
+ int ret;
+#ifdef HAVE___PMID_INT
+ __pmID_int *id = (__pmID_int *)&(mdesc->m_desc.pmid);
+#else
+ pmID id = *(pmID *)&(mdesc->m_desc.pmid);
+#endif
+
+ if (inst != PM_IN_NULL) {
+ return PM_ERR_INST;
+ }
+
+ if (stats == NULL) {
+ fprintf(stderr, "stats not available\n");
+ ret = PM_ERR_VALUE;
+ goto err_out;
+ }
+
+
+ switch (pmID_cluster(id)) {
+ case 0:
+ ret = fill_base(pmID_item(id), atom);
+ if (ret) {
+ goto err_out;
+ }
+ break;
+ case 1:
+ ret = fill_node(pmID_item(id), atom);
+ if (ret) {
+ goto err_out;
+ }
+ break;
+ case 2:
+ ret = fill_client(pmID_item(id), atom);
+ if (ret) {
+ goto err_out;
+ }
+ break;
+ case 3:
+ ret = fill_timeout(pmID_item(id), atom);
+ if (ret) {
+ goto err_out;
+ }
+ break;
+ default:
+ return PM_ERR_PMID;
+ }
+
+ ret = 0;
+err_out:
+ return ret;
+}
+
+/*
+ * This routine is called once for each pmFetch(3) operation, so is a
+ * good place to do once-per-fetch functions, such as value caching or
+ * instance domain evaluation.
+ */
+static int
+pmda_ctdb_fetch(int numpmid, pmID pmidlist[], pmResult **resp, pmdaExt *pmda)
+{
+ int ret;
+
+ if (client == NULL) {
+ fprintf(stderr, "attempting reconnect to ctdbd\n");
+ ret = pmda_ctdb_daemon_connect();
+ if (ret < 0) {
+ fprintf(stderr, "reconnect failed\n");
+ return PM_ERR_VALUE;
+ }
+ }
+
+ ret = ctdb_ctrl_statistics(client, ev, client, CTDB_CURRENT_NODE,
+ tevent_timeval_current_ofs(1,0), &stats);
+ if (ret != 0) {
+ fprintf(stderr, "ctdb control for statistics failed, reconnecting\n");
+ pmda_ctdb_daemon_disconnect();
+ ret = PM_ERR_VALUE;
+ goto err_out;
+ }
+
+ ret = pmdaFetch(numpmid, pmidlist, resp, pmda);
+
+ talloc_free(stats);
+err_out:
+ return ret;
+}
+
+void pmda_ctdb_init(pmdaInterface *dp);
+
+/*
+ * Initialise the agent
+ */
+void
+pmda_ctdb_init(pmdaInterface *dp)
+{
+ if (dp->status != 0) {
+ return;
+ }
+
+ dp->version.two.fetch = pmda_ctdb_fetch;
+ pmdaSetFetchCallBack(dp, pmda_ctdb_fetch_cb);
+
+ pmdaInit(dp, NULL, 0, metrictab,
+ (sizeof(metrictab) / sizeof(metrictab[0])));
+}
+
+static char *
+helpfile(void)
+{
+ static char buf[MAXPATHLEN];
+
+ if (!buf[0]) {
+ snprintf(buf, sizeof(buf), "%s/ctdb/help",
+ pmGetConfig("PCP_PMDAS_DIR"));
+ }
+ return buf;
+}
+
+static void
+usage(void)
+{
+ fprintf(stderr, "Usage: %s [options]\n\n", pmGetProgname());
+ fputs("Options:\n"
+ " -d domain use domain (numeric) for metrics domain of PMDA\n"
+ " -l logfile write log into logfile rather than using default log name\n"
+ "\nExactly one of the following options may appear:\n"
+ " -i port expect PMCD to connect on given inet port (number or name)\n"
+ " -p expect PMCD to supply stdin/stdout (pipe)\n"
+ " -u socket expect PMCD to connect on given unix domain socket\n",
+ stderr);
+ exit(1);
+}
+
+/*
+ * Set up the agent if running as a daemon.
+ */
+int
+main(int argc, char **argv)
+{
+ int err = 0;
+ char log_file[] = "pmda_ctdb.log";
+ pmdaInterface dispatch;
+
+ pmSetProgname(argv[0]);
+
+ pmdaDaemon(&dispatch, PMDA_INTERFACE_2, argv[0], CTDB,
+ log_file, helpfile());
+
+ if (pmdaGetOpt(argc, argv, "d:i:l:pu:?", &dispatch, &err) != EOF) {
+ err++;
+ }
+
+ if (err) {
+ usage();
+ }
+
+ pmdaOpenLog(&dispatch);
+ pmda_ctdb_init(&dispatch);
+ pmdaConnect(&dispatch);
+ pmdaMain(&dispatch);
+
+ exit(0);
+}
+
diff --git a/ctdb/utils/pmda/pmns b/ctdb/utils/pmda/pmns
new file mode 100644
index 0000000..dc7e3ac
--- /dev/null
+++ b/ctdb/utils/pmda/pmns
@@ -0,0 +1,73 @@
+/*
+ * Metrics for CTDB PMDA
+ *
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) 2011 David Disseldorp
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+ctdb {
+ num_clients CTDB:0:0
+ frozen CTDB:0:1
+ recovering CTDB:0:2
+ client_packets_sent CTDB:0:3
+ client_packets_recv CTDB:0:4
+ node_packets_sent CTDB:0:5
+ node_packets_recv CTDB:0:6
+ keepalive_packets_sent CTDB:0:7
+ keepalive_packets_recv CTDB:0:8
+ node
+ client
+ timeouts
+ total_calls CTDB:0:9
+ pending_calls CTDB:0:10
+ lockwait_calls CTDB:0:11
+ pending_lockwait_calls CTDB:0:12
+ childwrite_calls CTDB:0:13
+ pending_childwrite_calls CTDB:0:14
+ memory_used CTDB:0:15
+ max_hop_count CTDB:0:16
+ max_reclock_ctdbd CTDB:0:17
+ max_reclock_recd CTDB:0:18
+ max_call_latency CTDB:0:19
+ max_lockwait_latency CTDB:0:20
+ max_childwrite_latency CTDB:0:21
+ num_recoveries CTDB:0:22
+}
+
+ctdb.node {
+ req_call CTDB:1:0
+ reply_call CTDB:1:1
+ req_dmaster CTDB:1:2
+ reply_dmaster CTDB:1:3
+ reply_error CTDB:1:4
+ req_message CTDB:1:5
+ req_control CTDB:1:6
+ reply_control CTDB:1:7
+}
+
+ctdb.client {
+ req_call CTDB:2:0
+ req_message CTDB:2:1
+ req_control CTDB:2:2
+}
+
+ctdb.timeouts {
+ call CTDB:3:0
+ control CTDB:3:1
+ traverse CTDB:3:2
+}
+
diff --git a/ctdb/utils/pmda/root b/ctdb/utils/pmda/root
new file mode 100644
index 0000000..ff036ed
--- /dev/null
+++ b/ctdb/utils/pmda/root
@@ -0,0 +1,10 @@
+/*
+ * fake "root" for validating the local PMNS subtree
+ */
+
+#include <stdpmid>
+
+root { ctdb }
+
+#include "pmns"
+
diff --git a/ctdb/utils/smnotify/smnotify.c b/ctdb/utils/smnotify/smnotify.c
new file mode 100644
index 0000000..5907bd6
--- /dev/null
+++ b/ctdb/utils/smnotify/smnotify.c
@@ -0,0 +1,151 @@
+/*
+ simple smnotify tool
+
+ Copyright (C) Ronnie Sahlberg 2007
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <stdlib.h>
+#include "smnotify.h"
+#include "popt.h"
+
+static char *client = NULL;
+static const char *ip = NULL;
+static char *server = NULL;
+static int stateval = 0;
+static int clientport = 0;
+static int sendport = 0;
+
+static void usage(void)
+{
+ exit(0);
+}
+
+static int create_socket(const char *addr, int port)
+{
+ int s;
+ struct sockaddr_in sock_in;
+
+ s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
+ if (s == -1) {
+ printf("Failed to open local socket\n");
+ exit(10);
+ }
+
+ bzero(&sock_in, sizeof(sock_in));
+ sock_in.sin_family = AF_INET;
+ sock_in.sin_port = htons(port);
+ inet_aton(addr, &sock_in.sin_addr);
+ if (bind(s, (struct sockaddr *)&sock_in, sizeof(sock_in)) == -1) {
+ printf("Failed to bind to local socket\n");
+ exit(10);
+ }
+
+ return s;
+}
+
+int main(int argc, const char *argv[])
+{
+ struct poptOption popt_options[] = {
+ POPT_AUTOHELP
+ { "client", 'c', POPT_ARG_STRING, &client, 0, "remote client to send the notify to", "hostname/ip" },
+ { "clientport", 0, POPT_ARG_INT, &clientport, 0, "clientport", "integer" },
+ { "ip", 'i', POPT_ARG_STRING, &ip, 0, "local ip address to send the notification from", "ip" },
+ { "sendport", 0, POPT_ARG_INT, &sendport, 0, "port to send the notify from", "integer" },
+ { "server", 's', POPT_ARG_STRING, &server, 0, "servername to use in the notification", "hostname/ip" },
+ { "stateval", 0, POPT_ARG_INT, &stateval, 0, "stateval", "integer" },
+ POPT_TABLEEND
+ };
+ int opt;
+ poptContext pc;
+ CLIENT *clnt;
+ int s;
+ struct sockaddr_in sock_cl;
+ struct timeval w;
+ struct status st;
+
+ pc = poptGetContext(argv[0], argc, argv, popt_options, POPT_CONTEXT_KEEP_FIRST);
+
+ while ((opt = poptGetNextOpt(pc)) != -1) {
+ switch (opt) {
+ default:
+ fprintf(stderr, "Invalid option %s: %s\n",
+ poptBadOption(pc, 0), poptStrerror(opt));
+ exit(1);
+ }
+ }
+
+ if (client == NULL) {
+ printf("ERROR: client not specified\n");
+ usage();
+ }
+
+ if (ip == NULL) {
+ printf("ERROR: ip not specified\n");
+ usage();
+ }
+
+ if (server == NULL) {
+ printf("ERROR: server not specified\n");
+ usage();
+ }
+
+ if (stateval == 0) {
+ printf("ERROR: stateval not specified\n");
+ usage();
+ }
+
+
+ /* Since we want to control from which address these packets are
+ sent we must create the socket ourself and use low-level rpc
+ calls.
+ */
+ s = create_socket(ip, sendport);
+
+ /* only wait for at most 3 seconds before giving up */
+ alarm(3);
+
+ /* Setup a sockaddr_in for the client we want to notify */
+ bzero(&sock_cl, sizeof(sock_cl));
+ sock_cl.sin_family = AF_INET;
+ sock_cl.sin_port = htons(clientport);
+ inet_aton(client, &sock_cl.sin_addr);
+
+ w.tv_sec = 1;
+ w.tv_usec= 0;
+
+ clnt = clntudp_create(&sock_cl, 100024, 1, w, &s);
+ if (clnt == NULL) {
+ printf("ERROR: failed to connect to client\n");
+ exit(10);
+ }
+
+ /* we don't want to wait for any reply */
+ w.tv_sec = 0;
+ w.tv_usec = 0;
+ clnt_control(clnt, CLSET_TIMEOUT, (char *)&w);
+
+ st.mon_name=server;
+ st.state=stateval;
+ sm_notify_1(&st, clnt);
+
+ return 0;
+}
diff --git a/ctdb/utils/smnotify/smnotify.x b/ctdb/utils/smnotify/smnotify.x
new file mode 100644
index 0000000..94239f8
--- /dev/null
+++ b/ctdb/utils/smnotify/smnotify.x
@@ -0,0 +1,21 @@
+#ifdef RPC_HDR
+%#ifdef _AIX
+%#include <rpc/rpc.h>
+%#endif /* _AIX */
+#endif /* RPC_HDR */
+
+const SM_MAXSTRLEN = 1024;
+
+struct status {
+ string mon_name<SM_MAXSTRLEN>;
+ int state;
+};
+
+
+program SMNOTIFY {
+ version SMVERSION {
+ void SM_NOTIFY(struct status) = 6;
+ } = 1;
+} = 100024;
+
+
diff --git a/ctdb/utils/tdb/tdb_mutex_check.c b/ctdb/utils/tdb/tdb_mutex_check.c
new file mode 100644
index 0000000..440bd48
--- /dev/null
+++ b/ctdb/utils/tdb/tdb_mutex_check.c
@@ -0,0 +1,160 @@
+/*
+ Check the mutex lock information in tdb database
+
+ Copyright (C) Amitay Isaacs 2015-2021
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <errno.h>
+
+#ifndef USE_TDB_MUTEX_LOCKING
+#define USE_TDB_MUTEX_LOCKING 1
+#endif
+
+#include "lib/tdb/common/tdb_private.h"
+#include "lib/tdb/common/mutex.c"
+
+static uint8_t *hex_decode(const char *hex_in, size_t *plen)
+{
+ size_t i;
+ int num;
+ uint8_t *buffer;
+ size_t len;
+
+ len = strlen(hex_in) / 2;
+ if (len == 0) {
+ return NULL;
+ }
+
+ buffer = malloc(len);
+ if (buffer == NULL) {
+ return NULL;
+ }
+
+ for (i = 0; i < len; i++) {
+ sscanf(&hex_in[i*2], "%02X", &num);
+ buffer[i] = (uint8_t)num;
+ }
+
+ *plen = len;
+
+ return buffer;
+}
+
+static int get_hash_chain(struct tdb_context *tdb, const char *hex_key)
+{
+ TDB_DATA key = {
+ .dsize = 0,
+ };
+ unsigned int hash;
+
+ key.dptr = hex_decode(hex_key, &key.dsize);
+ if (key.dptr == NULL || key.dsize == 0) {
+ return -1;
+ }
+ hash = tdb_jenkins_hash(&key);
+ free(key.dptr);
+
+ return hash % tdb_hash_size(tdb);
+}
+
+static void check_one(struct tdb_mutexes *mutexes, int chain)
+{
+ pthread_mutex_t *m;
+ int ret;
+ int pthread_mutex_consistent_np(pthread_mutex_t *);
+
+ m = &mutexes->hashchains[chain+1];
+ ret = pthread_mutex_trylock(m);
+ if (ret == 0) {
+ pthread_mutex_unlock(m);
+ return;
+ }
+ if (ret == EOWNERDEAD) {
+ ret = pthread_mutex_consistent_np(m);
+ if (ret != 0) {
+ printf("[%6d] consistent failed (%d)\n", chain, ret);
+ return;
+ }
+ ret = pthread_mutex_unlock(m);
+ if (ret != 0) {
+ printf("[%6d] unlock failed (%d)\n", chain, ret);
+ return;
+ }
+ printf("[%6d] cleaned\n", chain);
+ return;
+ }
+ if (ret == EBUSY) {
+ printf("[%6d] pid=%d\n", chain, m->__data.__owner);
+ return;
+ }
+ printf("[%6d] trylock failed (%d)\n", chain, ret);
+}
+
+static void check_all(struct tdb_mutexes *mutexes, unsigned int hash_size)
+{
+ unsigned int i;
+
+ for (i=0; i<hash_size; i++) {
+ check_one(mutexes, i);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ const char *tdb_file;
+ TDB_CONTEXT *tdb;
+ uint32_t tdb_flags;
+ int chain, i;
+
+ if (argc < 2) {
+ printf("Usage %s <tdb file> [<key1> <key2>]\n", argv[0]);
+ exit(1);
+ }
+
+ tdb_file = argv[1];
+
+ tdb_flags = TDB_MUTEX_LOCKING | TDB_INCOMPATIBLE_HASH |
+ TDB_CLEAR_IF_FIRST;
+ tdb = tdb_open(tdb_file, 0, tdb_flags, O_RDWR, 0);
+ if (tdb == NULL) {
+ printf("Error opening %s\n", tdb_file);
+ exit(1);
+ }
+
+ if (tdb->mutexes == NULL) {
+ printf("Mutexes are not mmapped\n");
+ exit(1);
+ }
+
+ if (argc == 2) {
+ check_all(tdb->mutexes, tdb_hash_size(tdb));
+ } else {
+ for (i=2; i<argc; i++) {
+ chain = get_hash_chain(tdb, argv[i]);
+ if (chain == -1) {
+ continue;
+ }
+ check_one(tdb->mutexes, chain);
+ }
+ }
+
+ tdb_close(tdb);
+ return 0;
+}
diff --git a/ctdb/wscript b/ctdb/wscript
new file mode 100644
index 0000000..a7b0454
--- /dev/null
+++ b/ctdb/wscript
@@ -0,0 +1,1329 @@
+#!/usr/bin/env python
+
+APPNAME = 'ctdb'
+
+import sys, os
+
+# find the buildtools directory
+top = '.'
+while not os.path.exists(top+'/buildtools') and len(top.split('/')) < 5:
+ top = top + '/..'
+sys.path.insert(0, top + '/buildtools/wafsamba')
+
+out = 'bin'
+
+from waflib import Options, Logs, Errors, Context
+import wafsamba
+from wafsamba import samba_dist, samba_utils
+from samba_utils import MODE_644, MODE_744, MODE_755, MODE_777
+
+if os.path.isfile('./VERSION'):
+ vdir = '.'
+elif os.path.isfile('../VERSION'):
+ vdir = '..'
+else:
+ Logs.error("VERSION file not found")
+
+default_prefix = Options.default_prefix = '/usr/local'
+
+samba_dist.DIST_DIRS('''ctdb:. lib/replace:lib/replace lib/talloc:lib/talloc
+ lib/tevent:lib/tevent lib/tdb:lib/tdb
+ third_party/socket_wrapper:third_party/socket_wrapper
+ third_party/popt:third_party/popt
+ lib/util:lib/util lib/tdb_wrap:lib/tdb_wrap
+ lib/ccan:lib/ccan libcli/util:libcli/util
+ lib/async_req:lib/async_req
+ lib/pthreadpool:lib/pthreadpool
+ lib/messaging:lib/messaging
+ buildtools:buildtools third_party/waf:third_party/waf''')
+
+manpages_binary = [
+ 'ctdb.1',
+ 'ctdbd.1',
+ 'ltdbtool.1',
+ 'ping_pong.1'
+]
+
+manpages_misc = [
+ 'ctdb_diagnostics.1',
+ 'onnode.1',
+ 'ctdb.conf.5',
+ 'ctdb-script.options.5',
+ 'ctdb.sysconfig.5',
+ 'ctdb.7',
+ 'ctdb-statistics.7',
+ 'ctdb-tunables.7',
+]
+
+manpages_etcd = [
+ 'ctdb-etcd.7',
+]
+
+manpages_ceph = [
+ 'ctdb_mutex_ceph_rados_helper.7',
+]
+
+VERSION = ''
+
+def get_version():
+ import samba_version
+ env = samba_utils.LOAD_ENVIRONMENT()
+
+ return samba_version.samba_version_file('%s/VERSION' % vdir, vdir, env)
+
+def get_version_string():
+ if Context.g_module.VERSION:
+ return Context.g_module.VERSION
+ version = get_version()
+ Context.g_module.VERSION = version.STRING.replace('-', '.')
+ return Context.g_module.VERSION
+
+def options(opt):
+ opt.PRIVATE_EXTENSION_DEFAULT('ctdb')
+
+ opt.RECURSE('lib/replace')
+
+ opt.RECURSE('lib/util')
+
+ opt.RECURSE('lib/talloc')
+ opt.RECURSE('lib/tevent')
+ opt.RECURSE('lib/tdb')
+
+ opt.add_option('--enable-infiniband',
+ help=("Turn on infiniband support (default=no)"),
+ action="store_true", dest='ctdb_infiniband', default=False)
+ opt.add_option('--enable-pmda',
+ help=("Turn on PCP pmda support (default=no)"),
+ action="store_true", dest='ctdb_pmda', default=False)
+ opt.add_option('--enable-etcd-reclock',
+ help=("Enable etcd recovery lock helper (default=no)"),
+ action="store_true", dest='ctdb_etcd_reclock', default=False)
+ opt.add_option('--enable-pcap',
+ help=("Use pcap for packet capture (default=no)"),
+ action="store_true", dest='ctdb_pcap', default=False)
+
+ opt.add_option('--with-libcephfs',
+ help=("Directory under which libcephfs is installed"),
+ action="store", dest='libcephfs_dir', default=None)
+ opt.add_option('--enable-ceph-reclock',
+ help=("Enable Ceph CTDB recovery lock helper (default=no)"),
+ action="store_true", dest='ctdb_ceph_reclock', default=False)
+
+ opt.add_option('--with-logdir',
+ help=("Path to log directory"),
+ action="store", dest='ctdb_logdir', default=None)
+ opt.add_option('--with-socketpath',
+ help=("path to CTDB daemon socket"),
+ action="store", dest='ctdb_sockpath', default=None)
+
+
+def configure(conf):
+ # No need to build python bindings for talloc/tevent/tdb
+ if conf.IN_LAUNCH_DIR():
+ conf.env.standalone_ctdb = True
+ Options.options.disable_python = True
+
+ conf.RECURSE('lib/replace')
+
+ conf.CHECK_HEADERS(headers='''sys/socket.h
+ netinet/in.h
+ netinet/if_ether.h
+ netinet/ip.h
+ netinet/ip6.h
+ netinet/icmp6.h''',
+ together=True)
+
+ conf.CHECK_CODE('int s = socket(AF_PACKET, SOCK_RAW, 0);',
+ define='HAVE_AF_PACKET',
+ headers='sys/socket.h linux/if_packet.h')
+
+ conf.CHECK_CODE('struct sockaddr_ll sall; sall.sll_family = AF_PACKET;',
+ define='HAVE_PACKETSOCKET',
+ headers='sys/socket.h linux/if_packet.h')
+
+ conf.CHECK_CODE('''pthread_mutex_t m;
+ int pid = 0;
+ m.__data.__owner = pid;
+ ''',
+ 'HAVE_PTHREAD_INTERNAL_MUTEX_OWNER',
+ headers='pthread.h',
+ msg='Checking for internal POSIX mutex owner field')
+ if not conf.env.HAVE_PTHREAD_INTERNAL_MUTEX_OWNER:
+ # This is unsupported - please see note in debug_locks.sh
+ Logs.info('Building without unsupported mutex debugging hack')
+
+ if conf.env.standalone_ctdb:
+ conf.SAMBA_CHECK_PERL(mandatory=True)
+
+ # This is just for consistency and to check the version for the
+ # build system, see Options.options.disable_python = True above
+ conf.SAMBA_CHECK_PYTHON()
+ conf.SAMBA_CHECK_PYTHON_HEADERS()
+
+ # We just want gnutls_rnd for rand subsystem
+ conf.CHECK_FUNCS_IN('gnutls_rnd', 'gnutls')
+
+
+ if conf.CHECK_FOR_THIRD_PARTY():
+ conf.RECURSE('third_party/popt')
+ if conf.env.standalone_ctdb or conf.CONFIG_GET('ENABLE_SELFTEST'):
+ conf.RECURSE('third_party/socket_wrapper')
+ conf.env.SOCKET_WRAPPER_SO_PATH = conf.CONFIG_GET('LIBSOCKET_WRAPPER_SO_PATH')
+ else:
+ if not conf.CHECK_POPT():
+ raise Errors.WafError('popt development packages have not been found\nIf third_party is installed, check that it is in the proper place.')
+ else:
+ conf.define('USING_SYSTEM_POPT', 1)
+ conf.env.SOCKET_WRAPPER_SO_PATH = ''
+
+
+ if conf.env.standalone_ctdb or conf.CONFIG_GET('ENABLE_SELFTEST'):
+ if not conf.CHECK_SOCKET_WRAPPER():
+ raise Errors.WafError('socket_wrapper package has not been found.\nIf third_party is installed, check that it is in the proper place.')
+ else:
+ conf.define('USING_SYSTEM_SOCKET_WRAPPER', 1)
+ conf.env.SOCKET_WRAPPER_SO_PATH = conf.CONFIG_GET('LIBSOCKET_WRAPPER_SO_PATH')
+
+ conf.RECURSE('lib/util')
+
+ conf.RECURSE('lib/talloc')
+ conf.RECURSE('lib/tevent')
+ conf.RECURSE('lib/tdb')
+
+ conf.CHECK_HEADERS('sched.h')
+ conf.CHECK_HEADERS('procinfo.h')
+ if sys.platform.startswith('aix') and not conf.CHECK_FUNCS('thread_setsched'):
+ Logs.error('Need thread_setsched() on AIX')
+ sys.exit(1)
+ elif not conf.CHECK_FUNCS('sched_setscheduler'):
+ Logs.error('Need sched_setscheduler()')
+ sys.exit(1)
+ conf.CHECK_FUNCS('mlockall')
+ conf.CHECK_FUNCS('getrusage', headers="sys/time.h sys/resource.h")
+
+ if not conf.CHECK_VARIABLE('ETIME', headers='errno.h'):
+ conf.DEFINE('ETIME', 'ETIMEDOUT')
+
+ if Options.options.ctdb_pcap or not sys.platform.startswith('linux'):
+ conf.DEFINE('ENABLE_PCAP', 1)
+ if not conf.env.ENABLE_PCAP:
+ conf.SET_TARGET_TYPE('pcap', 'EMPTY')
+ else:
+ conf.find_program('pcap-config', var='PCAP_CONFIG')
+ if conf.env.PCAP_CONFIG:
+ conf.CHECK_CFG(path=conf.env.PCAP_CONFIG,
+ args="--cflags --libs",
+ package="",
+ uselib_store="PCAP")
+ if not conf.CHECK_HEADERS('pcap.h'):
+ Logs.error('Need libpcap')
+ sys.exit(1)
+ if not conf.CHECK_FUNCS_IN('pcap_open_live', 'pcap', headers='pcap.h'):
+ Logs.error('Need libpcap')
+ sys.exit(1)
+ conf.CHECK_FUNCS_IN('pcap_set_immediate_mode', 'pcap', headers='pcap.h')
+
+ if not conf.CHECK_FUNCS_IN('backtrace backtrace_symbols', 'execinfo',
+ checklibc=True, headers='execinfo.h'):
+ Logs.error('backtrace support not available')
+
+ have_pmda = False
+ if Options.options.ctdb_pmda:
+ pmda_support = True
+
+ if not conf.CHECK_HEADERS('pcp/pmapi.h pcp/impl.h pcp/pmda.h',
+ together=True):
+ pmda_support = False
+ if not conf.CHECK_FUNCS_IN('pmProgname', 'pcp'):
+ pmda_support = False
+ if not conf.CHECK_FUNCS_IN('pmdaDaemon', 'pcp_pmda'):
+ pmda_support = False
+ if pmda_support:
+ conf.CHECK_TYPE_IN('__pmID_int', 'pcp/pmapi.h pcp/impl.h')
+ have_pmda = True
+ else:
+ Logs.error("PMDA support not available")
+ sys.exit(1)
+ if have_pmda:
+ Logs.info('Building with PMDA support')
+ conf.define('HAVE_PMDA', 1)
+ conf.env.CTDB_PMDADIR = os.path.join(conf.env.LOCALSTATEDIR,
+ 'lib/pcp/pmdas/ctdb')
+
+ have_infiniband = False
+ if Options.options.ctdb_infiniband:
+ ib_support = True
+
+ if not conf.CHECK_HEADERS('infiniband/verbs.h rdma/rdma_cma.h'):
+ ib_support = False
+ if not conf.CHECK_FUNCS_IN('ibv_create_qp', 'ibverbs'):
+ ib_support = False
+ if not conf.CHECK_FUNCS_IN('rdma_connect', 'rdmacm'):
+ ib_support = False
+ if ib_support:
+ have_infiniband = True
+ else:
+ Logs.error("Infiniband support not available")
+ sys.exit(1)
+ if have_infiniband:
+ Logs.info('Building with Infiniband support')
+ conf.define('HAVE_INFINIBAND', 1)
+ conf.define('USE_INFINIBAND', 1)
+
+ have_etcd_reclock = False
+ if Options.options.ctdb_etcd_reclock:
+ try:
+ conf.check_python_module('etcd')
+ have_etcd_reclock = True
+ except:
+ Logs.error('etcd support not available')
+ sys.exit(1)
+ if have_etcd_reclock:
+ Logs.info('Building with etcd support')
+ conf.env.etcd_reclock = have_etcd_reclock
+
+ if Options.options.libcephfs_dir:
+ Logs.error('''--with-libcephfs no longer supported, please use compiler
+ flags instead, e.g. GCC LIBRARY_PATH and C_INCLUDE_PATH''')
+ sys.exit(1)
+
+ if Options.options.ctdb_ceph_reclock:
+ if (conf.CHECK_HEADERS('rados/librados.h', False, False, 'rados') and
+ conf.CHECK_LIB('rados', shlib=True)):
+ Logs.info('Building with Ceph librados recovery lock support')
+ conf.define('HAVE_LIBRADOS', 1)
+ else:
+ Logs.error("Missing librados for Ceph recovery lock support")
+ sys.exit(1)
+
+ conf.env.CTDB_BINDIR = os.path.join(conf.env.EXEC_PREFIX, 'bin')
+ conf.env.CTDB_DATADIR = os.path.join(conf.env.EXEC_PREFIX, 'share/ctdb')
+ conf.env.CTDB_ETCDIR = os.path.join(conf.env.SYSCONFDIR, 'ctdb')
+ conf.env.CTDB_VARDIR = os.path.join(conf.env.LOCALSTATEDIR, 'lib/ctdb')
+ conf.env.CTDB_RUNDIR = os.path.join(conf.env.LOCALSTATEDIR, 'run/ctdb')
+ conf.env.CTDB_HELPER_BINDIR = os.path.join(conf.env.LIBEXECDIR, 'ctdb')
+
+ if Options.options.ctdb_logdir:
+ conf.env.CTDB_LOGDIR = Options.options.ctdb_logdir
+ else:
+ conf.env.CTDB_LOGDIR = os.path.join(conf.env.LOCALSTATEDIR, 'log')
+
+ if Options.options.ctdb_sockpath:
+ conf.env.CTDB_SOCKPATH = Options.options.ctdb_sockpath
+ else:
+ conf.env.CTDB_SOCKPATH = os.path.join(conf.env.CTDB_RUNDIR,
+ 'ctdbd.socket')
+ conf.define('CTDB_SOCKET', conf.env.CTDB_SOCKPATH)
+
+ conf.ADD_CFLAGS('''-DCTDB_HELPER_BINDIR=\"%s\"
+ -DLOGDIR=\"%s\"
+ -DCTDB_DATADIR=\"%s\"
+ -DCTDB_ETCDIR=\"%s\"
+ -DCTDB_VARDIR=\"%s\"
+ -DCTDB_RUNDIR=\"%s\"''' % (
+ conf.env.CTDB_HELPER_BINDIR,
+ conf.env.CTDB_LOGDIR,
+ conf.env.CTDB_DATADIR,
+ conf.env.CTDB_ETCDIR,
+ conf.env.CTDB_VARDIR,
+ conf.env.CTDB_RUNDIR))
+
+ conf.env.CTDB_TEST_DATADIR = os.path.join(conf.env.CTDB_DATADIR, 'tests')
+ conf.env.CTDB_TEST_LIBEXECDIR = os.path.join(conf.env.LIBEXECDIR, 'ctdb/tests')
+
+ # Allow unified compilation and separate compilation of utilities
+ # to find includes
+ if not conf.env.standalone_ctdb:
+ conf.ADD_EXTRA_INCLUDES('#include/public #ctdb/include #ctdb')
+ else:
+ if Context.g_module.top == '.':
+ # Building from tarball
+ conf.ADD_EXTRA_INCLUDES('#include')
+ else:
+ # Building standalone CTDB from within Samba tree
+ conf.ADD_EXTRA_INCLUDES('#ctdb/include')
+ conf.ADD_EXTRA_INCLUDES('#ctdb')
+ conf.ADD_EXTRA_INCLUDES('#lib #lib/replace')
+
+ conf.DEFINE('HAVE_CONFIG_H', 1, add_to_cflags=True)
+ conf.DEFINE('SAMBA_UTIL_CORE_ONLY', 1, add_to_cflags=True)
+ conf.SAMBA_CONFIG_H()
+
+ if 'XSLTPROC_MANPAGES' in conf.env and conf.env['XSLTPROC_MANPAGES']:
+ conf.env.ctdb_generate_manpages = True
+ else:
+ conf.env.ctdb_generate_manpages = False
+
+ Logs.info("xsltproc unavailable, checking for pre-built manpages")
+ conf.env.ctdb_prebuilt_manpages = []
+ manpages = manpages_binary + manpages_misc
+ if conf.env.etcd_reclock:
+ manpages += manpages_etcd
+ if conf.env.HAVE_LIBRADOS:
+ manpages += manpages_ceph
+ for m in manpages:
+ if os.path.exists(os.path.join("doc", m)):
+ Logs.info(" %s: yes" % (m))
+ conf.env.ctdb_prebuilt_manpages.append(m)
+ else:
+ Logs.info(" %s: no" % (m))
+
+def build(bld):
+ if bld.env.standalone_ctdb:
+ # enable building of public headers in the build tree
+ bld.env.build_public_headers = 'include/public'
+
+ if bld.env.standalone_ctdb:
+ bld.SAMBA_MKVERSION('version.h', '%s/VERSION' % vdir)
+
+ bld.env.PKGCONFIGDIR = '${LIBDIR}/pkgconfig'
+
+ bld.RECURSE('lib/replace')
+ if bld.CHECK_FOR_THIRD_PARTY():
+ bld.RECURSE('third_party/popt')
+ if bld.env.standalone_ctdb or bld.CONFIG_GET('SOCKET_WRAPPER'):
+ bld.RECURSE('third_party/socket_wrapper')
+
+ bld.RECURSE('lib/tdb_wrap')
+ bld.RECURSE('lib/util')
+ bld.RECURSE('lib/async_req')
+ bld.RECURSE('lib/pthreadpool')
+ bld.RECURSE('lib/messaging')
+
+ bld.RECURSE('lib/talloc')
+ bld.RECURSE('lib/tevent')
+ bld.RECURSE('lib/tdb')
+
+ if bld.env.standalone_ctdb:
+ # If a combined build is implemented, CTDB will want to
+ # build against samba-util rather than samba-util-core.
+ # Similarly, other Samba subsystems expect samba-util. So,
+ # for a standalone build, just define a fake samba-util
+ # subsystem that pulls in samba-util-core.
+ bld.SAMBA_SUBSYSTEM('samba-util',
+ source='',
+ deps='samba-util-core')
+
+ bld.SAMBA_SUBSYSTEM('ctdb-tcp',
+ source=bld.SUBDIR('tcp',
+ 'tcp_connect.c tcp_init.c tcp_io.c'),
+ includes='include',
+ deps='replace tdb talloc tevent')
+
+ ib_deps = ''
+ if bld.env.HAVE_INFINIBAND:
+ bld.SAMBA_SUBSYSTEM('ctdb-ib',
+ source=bld.SUBDIR('ib',
+ '''ibwrapper.c ibw_ctdb.c
+ ibw_ctdb_init.c'''),
+ includes='include',
+ deps='replace talloc tevent tdb')
+ ib_deps = ' ctdb-ib rdmacm ibverbs'
+
+ bld.SAMBA_SUBSYSTEM('ctdb-system',
+ source=bld.SUBDIR('common',
+ 'system_socket.c system.c'),
+ deps='replace talloc tevent tdb pcap samba-util')
+
+ bld.SAMBA_SUBSYSTEM('ctdb-common',
+ source=bld.SUBDIR('common',
+ '''ctdb_io.c ctdb_util.c ctdb_ltdb.c
+ sock_io.c'''),
+ includes='include',
+ deps='''replace popt talloc tevent tdb popt ctdb-system
+ ctdb-protocol-util''')
+
+ bld.SAMBA_SUBSYSTEM('ctdb-util',
+ source=bld.SUBDIR('common',
+ '''cmdline.c
+ comm.c
+ conf.c
+ db_hash.c
+ event_script.c
+ hash_count.c
+ line.c
+ logging.c
+ path.c
+ pidfile.c
+ pkt_read.c
+ pkt_write.c
+ rb_tree.c
+ reqid.c
+ run_event.c
+ run_proc.c
+ sock_client.c
+ srvid.c
+ tmon.c
+ tunable.c
+ '''),
+ deps='''samba-util
+ LIBASYNC_REQ
+ sys_rw
+ tevent-util
+ replace
+ talloc
+ tevent
+ tdb
+ popt
+ ''')
+
+ bld.SAMBA_SUBSYSTEM('ctdb-logging-conf',
+ source='common/logging_conf.c',
+ deps='ctdb-util talloc')
+
+ bld.SAMBA_SUBSYSTEM('ctdb-protocol-basic',
+ source=bld.SUBDIR('protocol', 'protocol_basic.c'),
+ deps='talloc tdb')
+
+ bld.SAMBA_SUBSYSTEM('ctdb-protocol',
+ source=bld.SUBDIR('protocol',
+ '''protocol_header.c protocol_packet.c
+ protocol_types.c
+ protocol_call.c
+ protocol_message.c
+ protocol_control.c
+ protocol_keepalive.c
+ protocol_tunnel.c
+ protocol_client.c
+ protocol_debug.c
+ protocol_sock.c'''),
+ deps='ctdb-protocol-basic replace talloc tdb')
+
+ bld.SAMBA_SUBSYSTEM('ctdb-protocol-util',
+ source='protocol/protocol_util.c',
+ deps='ctdb-util replace talloc tdb')
+
+ bld.SAMBA_SUBSYSTEM('ctdb-client',
+ source=bld.SUBDIR('client',
+ '''client_connect.c client_call.c
+ client_message.c client_control.c
+ client_message_sync.c
+ client_control_sync.c
+ client_db.c client_util.c
+ client_tunnel.c
+ '''),
+ deps='''ctdb-protocol
+ ctdb-util
+ samba-util
+ replace
+ talloc
+ tevent
+ tdb
+ tdb-wrap
+ ''')
+
+ bld.SAMBA_SUBSYSTEM('ctdb-server-util',
+ source=bld.SUBDIR('common',
+ '''sock_daemon.c'''),
+ deps='''samba-util ctdb-util ctdb-system tevent-util
+ LIBASYNC_REQ replace talloc tevent''')
+
+ bld.SAMBA_SUBSYSTEM('ctdb-ipalloc',
+ source=bld.SUBDIR('server',
+ '''ipalloc_deterministic.c
+ ipalloc_nondeterministic.c
+ ipalloc_lcp2.c
+ ipalloc_common.c
+ ipalloc.c
+ '''),
+ includes='include',
+ deps='ctdb-protocol-util replace talloc tevent')
+
+ bld.SAMBA_BINARY('ctdb-path',
+ source='common/path_tool.c',
+ cflags='-DCTDB_PATH_TOOL',
+ deps='''ctdb-util samba-util talloc replace popt''',
+ install_path='${CTDB_HELPER_BINDIR}')
+
+ bld.SAMBA_SUBSYSTEM('ctdb-cluster-conf',
+ source='cluster/cluster_conf.c',
+ deps='ctdb-util')
+
+ bld.SAMBA_SUBSYSTEM('ctdb-database-conf',
+ source='database/database_conf.c',
+ deps='ctdb-util')
+
+ bld.SAMBA_SUBSYSTEM('ctdb-event-conf',
+ source='event/event_conf.c',
+ deps='ctdb-util')
+
+ bld.SAMBA_SUBSYSTEM('ctdb-failover-conf',
+ source='failover/failover_conf.c',
+ deps='ctdb-util')
+
+ bld.SAMBA_SUBSYSTEM('ctdb-legacy-conf',
+ source='server/legacy_conf.c',
+ deps='ctdb-util')
+
+ bld.SAMBA_BINARY('ctdb-config',
+ source='common/conf_tool.c',
+ cflags='-DCTDB_CONF_TOOL',
+ deps='''ctdb-logging-conf
+ ctdb-event-conf
+ ctdb-cluster-conf
+ ctdb-database-conf
+ ctdb-failover-conf
+ ctdb-legacy-conf
+ ctdb-util samba-util talloc replace popt''',
+ install_path='${CTDB_HELPER_BINDIR}')
+
+ bld.SAMBA_SUBSYSTEM('ctdb-event-protocol',
+ source=bld.SUBDIR('event',
+ '''event_protocol.c
+ event_protocol_util.c
+ '''),
+ deps='ctdb-protocol-basic')
+
+ bld.SAMBA_LIBRARY('ctdb-event-client',
+ source='event/event_client.c',
+ deps='ctdb-event-protocol ctdb-util tevent talloc',
+ private_library=True)
+
+ bld.SAMBA_BINARY('ctdb-eventd',
+ source=bld.SUBDIR('event',
+ '''event_cmd.c
+ event_config.c
+ event_context.c
+ event_daemon.c
+ event_request.c
+ '''),
+ deps='''ctdb-event-protocol
+ ctdb-event-conf ctdb-logging-conf
+ ctdb-server-util samba-util ctdb-util
+ talloc tevent replace popt''',
+ install_path='${CTDB_HELPER_BINDIR}')
+
+ bld.SAMBA_BINARY('ctdb-event',
+ source='event/event_tool.c',
+ cflags='-DCTDB_EVENT_TOOL',
+ deps='''ctdb-event-client ctdb-event-protocol
+ ctdb-util samba-util talloc replace''',
+ install_path='${CTDB_HELPER_BINDIR}')
+
+ bld.SAMBA_BINARY('ctdbd',
+ source='server/ctdbd.c ' +
+ bld.SUBDIR('server',
+ '''ctdb_daemon.c ctdb_recoverd.c
+ ctdb_recover.c ctdb_freeze.c
+ ctdb_tunables.c ctdb_monitor.c
+ ctdb_server.c ctdb_control.c
+ ctdb_call.c ctdb_ltdb_server.c
+ ctdb_traverse.c eventscript.c
+ ctdb_takeover.c
+ ctdb_persistent.c ctdb_keepalive.c
+ ctdb_cluster_mutex.c
+ ctdb_logging.c
+ ctdb_uptime.c
+ ctdb_vacuum.c ctdb_banning.c
+ ctdb_statistics.c
+ ctdb_update_record.c
+ ctdb_lock.c ctdb_fork.c
+ ctdb_tunnel.c ctdb_client.c
+ ctdb_config.c
+ '''),
+ includes='include',
+ deps='''ctdb-common ctdb-system ctdb-protocol
+ ctdb-tcp ctdb-util replace sys_rw popt
+ ctdb-logging-conf
+ ctdb-cluster-conf
+ ctdb-database-conf
+ ctdb-event-conf
+ ctdb-failover-conf
+ ctdb-legacy-conf
+ ctdb-event-protocol
+ talloc tevent tdb-wrap tdb talloc_report''' +
+ ib_deps,
+ install_path='${SBINDIR}',
+ manpages='ctdbd.1')
+
+ bld.SAMBA_BINARY('ctdb',
+ source='tools/ctdb.c',
+ deps='''ctdb-client ctdb-protocol ctdb-protocol-util
+ ctdb-util ctdb-system samba-util sys_rw popt''',
+ install_path='${BINDIR}',
+ manpages='ctdb.1')
+
+ bld.SAMBA_BINARY('ctdb_killtcp',
+ source='tools/ctdb_killtcp.c',
+ deps='''ctdb-protocol-util ctdb-util ctdb-system
+ samba-util replace''',
+ install_path='${CTDB_HELPER_BINDIR}')
+
+ bld.SAMBA_BINARY('ltdbtool',
+ source='tools/ltdbtool.c',
+ includes='include',
+ deps='tdb',
+ install_path='${BINDIR}',
+ manpages='ltdbtool.1')
+
+ bld.SAMBA_BINARY('ctdb_lock_helper',
+ source='server/ctdb_lock_helper.c',
+ deps='''samba-util sys_rw ctdb-system tevent-util
+ talloc tevent tdb''',
+ includes='include',
+ install_path='${CTDB_HELPER_BINDIR}')
+
+ bld.SAMBA_BINARY('ctdb_recovery_helper',
+ source='server/ctdb_recovery_helper.c',
+ deps='''ctdb-client ctdb-protocol ctdb-util
+ samba-util sys_rw replace tdb''',
+ install_path='${CTDB_HELPER_BINDIR}')
+
+ bld.SAMBA_BINARY('ctdb_takeover_helper',
+ source='server/ctdb_takeover_helper.c',
+ deps='''ctdb-client ctdb-protocol ctdb-util
+ samba-util sys_rw replace ctdb-ipalloc popt''',
+ install_path='${CTDB_HELPER_BINDIR}')
+
+ bld.SAMBA_BINARY('ctdb_mutex_fcntl_helper',
+ source='server/ctdb_mutex_fcntl_helper.c',
+ deps='''sys_rw ctdb-system tevent-util ctdb-util
+ talloc tevent
+ ''',
+ includes='include',
+ install_path='${CTDB_HELPER_BINDIR}')
+
+ bld.SAMBA_GENERATOR('ctdb-smnotify-h',
+ source='utils/smnotify/smnotify.x',
+ target='utils/smnotify/smnotify.h',
+ rule='rpcgen -h ${SRC} > ${TGT}')
+
+ xdr_buf_hack = 'grep -Fv "register int32_t *buf;"'
+
+ bld.SAMBA_GENERATOR('ctdb-smnotify-x',
+ source='utils/smnotify/smnotify.x',
+ target='utils/smnotify/gen_xdr.c',
+ rule='rpcgen -c ${SRC} | ' + xdr_buf_hack + ' > ${TGT}')
+
+ bld.SAMBA_GENERATOR('ctdb-smnotify-c',
+ source='utils/smnotify/smnotify.x',
+ target='utils/smnotify/gen_smnotify.c',
+ rule='rpcgen -l ${SRC} > ${TGT}')
+
+ bld.SAMBA_BINARY('smnotify',
+ source=bld.SUBDIR('utils/smnotify',
+ 'smnotify.c gen_smnotify.c gen_xdr.c'),
+ deps='ctdb-smnotify-h ctdb-smnotify-c ctdb-smnotify-x popt tirpc',
+ includes='utils utils/smnotify',
+ install_path='${CTDB_HELPER_BINDIR}')
+
+ bld.SAMBA_BINARY('ping_pong',
+ source='utils/ping_pong/ping_pong.c',
+ deps='',
+ install_path='${BINDIR}',
+ manpages='ping_pong.1')
+
+ if bld.env.HAVE_PTHREAD_INTERNAL_MUTEX_OWNER:
+ bld.SAMBA_BINARY('tdb_mutex_check',
+ source='utils/tdb/tdb_mutex_check.c',
+ deps='tdb pthread',
+ install_path='${CTDB_HELPER_BINDIR}')
+
+ if bld.env.HAVE_PMDA:
+ bld.SAMBA_BINARY('pmdactdb',
+ source='utils/pmda/pmda_ctdb.c',
+ deps='''ctdb-client ctdb-protocol ctdb-util
+ samba-util pcp_pmda pcp''',
+ install_path='${CTDB_PMDADIR}')
+ bld.INSTALL_FILES('${CTDB_PMDADIR}', 'utils/pmda/Install',
+ destname='Install')
+ bld.INSTALL_FILES('${CTDB_PMDADIR}', 'utils/pmda/Remove',
+ destname='Remove')
+ bld.INSTALL_FILES('${CTDB_PMDADIR}', 'utils/pmda/pmns',
+ destname='pmns')
+ bld.INSTALL_FILES('${CTDB_PMDADIR}', 'utils/pmda/domain.h',
+ destname='domain.h')
+ bld.INSTALL_FILES('${CTDB_PMDADIR}', 'utils/pmda/help',
+ destname='help')
+ bld.INSTALL_FILES('${CTDB_PMDADIR}', 'utils/pmda/README',
+ destname='README')
+
+ if bld.env.HAVE_LIBRADOS:
+ bld.SAMBA_BINARY('ctdb_mutex_ceph_rados_helper',
+ source='utils/ceph/ctdb_mutex_ceph_rados_helper.c',
+ deps='talloc tevent rados',
+ includes='include',
+ install_path='${CTDB_HELPER_BINDIR}')
+
+ sed_expr1 = 's|/usr/local/var/lib/ctdb|%s|g' % (bld.env.CTDB_VARDIR)
+ sed_expr2 = 's|/usr/local/etc/ctdb|%s|g' % (bld.env.CTDB_ETCDIR)
+ sed_expr3 = 's|/usr/local/var/log|%s|g' % (bld.env.CTDB_LOGDIR)
+ sed_expr4 = 's|/usr/local/var/run/ctdb|%s|g' % (bld.env.CTDB_RUNDIR)
+ sed_expr5 = 's|/usr/local/sbin|%s|g' % (bld.env.SBINDIR)
+ sed_expr6 = 's|/usr/local/libexec/ctdb|%s|g' % (bld.env.CTDB_HELPER_BINDIR)
+ sed_expr7 = 's|/usr/local/bin|%s|g' % (bld.env.BINDIR)
+ sed_expr8 = 's|/usr/local/share/ctdb|%s|g' % (bld.env.CTDB_DATADIR)
+ sed_cmdline = '-e "%s" ' * 8 % \
+ (sed_expr1, sed_expr2, sed_expr3, sed_expr4, sed_expr5,
+ sed_expr6, sed_expr7, sed_expr8)
+
+ manpages_extra = list(manpages_misc)
+ if bld.env.etcd_reclock:
+ manpages_extra += manpages_etcd
+ if bld.env.HAVE_LIBRADOS:
+ manpages_extra += manpages_ceph
+ for f in manpages_binary + manpages_extra:
+ x = '%s.xml' % (f)
+ bld.SAMBA_GENERATOR(x,
+ source=os.path.join('doc', x),
+ target=x,
+ rule='sed %s ${SRC} > ${TGT}' % (sed_cmdline))
+
+ if bld.env.ctdb_generate_manpages:
+ bld.MANPAGES(' '.join(manpages_extra), True)
+ else:
+ for m in bld.env.ctdb_prebuilt_manpages:
+ bld.SAMBA_GENERATOR(m,
+ source=os.path.join("doc", m),
+ target=m,
+ rule='sed %s ${SRC} > ${TGT}' % (sed_cmdline))
+ bld.INSTALL_FILES('${MANDIR}/man%s' % m[-1], m)
+
+ bld.SAMBA_GENERATOR('ctdb-onnode',
+ source='tools/onnode',
+ target='onnode',
+ rule='sed %s ${SRC} > ${TGT}' % (sed_cmdline))
+ bld.INSTALL_FILES('${BINDIR}', 'onnode',
+ destname='onnode', chmod=MODE_755)
+
+ bld.SAMBA_GENERATOR('ctdb-diagnostics',
+ source='tools/ctdb_diagnostics',
+ target='ctdb_diagnostics',
+ rule='sed %s ${SRC} > ${TGT}' % (sed_cmdline))
+ bld.INSTALL_FILES('${BINDIR}', 'ctdb_diagnostics',
+ destname='ctdb_diagnostics', chmod=MODE_755)
+
+ if bld.env.etcd_reclock:
+ bld.SAMBA_GENERATOR('ctdb-etcd-lock',
+ source='utils/etcd/ctdb_etcd_lock',
+ target='ctdb_etcd_lock',
+ rule='sed %s ${SRC} > ${TGT}' % (sed_cmdline))
+ bld.INSTALL_FILES('${CTDB_HELPER_BINDIR}', 'ctdb_etcd_lock',
+ destname='ctdb_etcd_lock', chmod=MODE_744)
+
+ bld.SAMBA_GENERATOR('ctdb-natgw',
+ source='tools/ctdb_natgw',
+ target='ctdb_natgw',
+ rule='sed %s ${SRC} > ${TGT}' % (sed_cmdline))
+ bld.INSTALL_FILES('${CTDB_HELPER_BINDIR}', 'ctdb_natgw',
+ destname='ctdb_natgw', chmod=MODE_755)
+
+ bld.SAMBA_GENERATOR('ctdb-lvs',
+ source='tools/ctdb_lvs',
+ target='ctdb_lvs',
+ rule='sed %s ${SRC} > ${TGT}' % (sed_cmdline))
+ bld.INSTALL_FILES('${CTDB_HELPER_BINDIR}', 'ctdb_lvs',
+ destname='ctdb_lvs', chmod=MODE_755)
+
+ def SUBDIR_MODE_callback(arg, dirname, fnames):
+ for f in fnames:
+ fl = os.path.join(dirname, f)
+ if os.path.isdir(fl) or os.path.islink(fl):
+ continue
+ mode = os.lstat(fl).st_mode & MODE_777
+ if arg['trim_path']:
+ fl = samba_utils.os.path.relpath(fl, arg['trim_path'])
+ arg['file_list'].append([fl, mode])
+
+ def SUBDIR_MODE(path, trim_path=None):
+ pd = {'trim_path': trim_path, 'file_list': []}
+ for dirname, _subdirs, fnames in os.walk(path):
+ SUBDIR_MODE_callback(pd, dirname, fnames)
+ return pd['file_list']
+
+ event_script_subdirs = [
+ 'events/legacy',
+ ]
+
+ etc_subdirs = [
+ 'nfs-checks.d'
+ ]
+
+ if bld.env.standalone_ctdb:
+ configdir = 'config'
+ else:
+ configdir = 'ctdb/config'
+
+ for t in event_script_subdirs:
+ bld.INSTALL_DIR(os.path.join(bld.env.CTDB_ETCDIR, t))
+ files = SUBDIR_MODE('%s/%s' % (configdir, t), trim_path=configdir)
+ for fmode in files:
+ bld.INSTALL_FILES(bld.env.CTDB_DATADIR, 'config/%s' % fmode[0],
+ destname=fmode[0], chmod=fmode[1])
+
+ for t in etc_subdirs:
+ files = SUBDIR_MODE('%s/%s' % (configdir, t), trim_path=configdir)
+ for fmode in files:
+ bld.INSTALL_FILES(bld.env.CTDB_ETCDIR, 'config/%s' % fmode[0],
+ destname=fmode[0], chmod=fmode[1])
+
+ # If this is a direct install and there are no event scripts
+ # linked/enabled then enable some standard ones
+ if os.environ.get('DESTDIR') is None:
+ fmt = 'events/legacy/%s.script'
+ required_script = '00.ctdb'
+ required_path = os.path.join(bld.env.CTDB_ETCDIR,
+ fmt % (required_script))
+ if not os.path.islink(required_path) and \
+ not os.path.exists(required_path):
+ default_scripts = [ required_script,
+ '01.reclock',
+ '05.system',
+ '10.interface',
+ ]
+ for t in default_scripts:
+ tgt = os.path.join(bld.env.CTDB_DATADIR, fmt % (t))
+ name = os.path.join(bld.env.CTDB_ETCDIR, fmt % (t))
+ bld.symlink_as(name, tgt)
+
+ bld.SAMBA_GENERATOR('ctdb-functions',
+ source='config/functions',
+ target='functions',
+ rule='sed %s ${SRC} > ${TGT}' % (sed_cmdline))
+ bld.INSTALL_FILES(bld.env.CTDB_ETCDIR, 'functions', destname='functions')
+
+ etc_scripts = [
+ 'ctdb-crash-cleanup.sh',
+ 'debug-hung-script.sh',
+ 'debug_locks.sh',
+ 'nfs-linux-kernel-callout',
+ 'notify.sh',
+ 'statd-callout'
+ ]
+
+ for t in etc_scripts:
+ bld.INSTALL_FILES(bld.env.CTDB_ETCDIR, 'config/%s' % t,
+ destname=t, chmod=MODE_755)
+
+ bld.SAMBA_GENERATOR('ctdb-sudoers',
+ source='config/ctdb.sudoers',
+ target='ctdb.sudoers',
+ rule='sed %s ${SRC} > ${TGT}' % (sed_cmdline))
+ bld.INSTALL_FILES('${SYSCONFDIR}/sudoers.d', 'ctdb.sudoers',
+ destname='ctdb')
+
+ bld.INSTALL_FILES('${CTDB_ETCDIR}/events/notification',
+ 'config/notification.README',
+ destname='README')
+
+ bld.INSTALL_DIR(bld.env.CTDB_LOGDIR)
+ bld.INSTALL_DIR(bld.env.CTDB_RUNDIR)
+ bld.INSTALL_DIR(bld.env.CTDB_VARDIR)
+
+ for d in ['volatile', 'persistent', 'state']:
+ bld.INSTALL_DIR(os.path.join(bld.env.CTDB_VARDIR, d))
+
+ #
+ # Test-only below this point
+ #
+
+ if not bld.env.standalone_ctdb and not bld.CONFIG_GET('ENABLE_SELFTEST'):
+ return
+
+ bld.SAMBA_BINARY('errcode',
+ source='tests/src/errcode.c',
+ deps='replace',
+ install_path='${CTDB_TEST_LIBEXECDIR}')
+
+ bld.SAMBA_BINARY('sigcode',
+ source='tests/src/sigcode.c',
+ deps='replace',
+ install_path='${CTDB_TEST_LIBEXECDIR}')
+
+ # Unit tests
+ ctdb_util_tests = [
+ 'cmdline_test',
+ 'comm_client_test',
+ 'comm_server_test',
+ 'comm_test',
+ 'conf_test',
+ 'db_hash_test',
+ 'event_script_test',
+ 'hash_count_test',
+ 'line_test',
+ 'pidfile_test',
+ 'pkt_read_test',
+ 'pkt_write_test',
+ 'run_event_test',
+ 'run_proc_test',
+ 'sock_io_test',
+ 'srvid_test',
+ 'tunable_test',
+ ]
+
+ for target in ctdb_util_tests:
+ src = 'tests/src/' + target + '.c'
+
+ bld.SAMBA_BINARY(target,
+ source=src,
+ deps='''ctdb-tests-backtrace
+ LIBASYNC_REQ
+ samba-util
+ sys_rw
+ tevent-util
+ talloc
+ tevent
+ tdb
+ popt
+ ''',
+ install_path='${CTDB_TEST_LIBEXECDIR}')
+
+ bld.SAMBA_BINARY('reqid_test',
+ source='tests/src/reqid_test.c',
+ deps='samba-util talloc',
+ install_path='${CTDB_TEST_LIBEXECDIR}')
+
+ bld.SAMBA_BINARY('rb_test',
+ source='tests/src/rb_test.c',
+ deps='samba-util talloc',
+ install_path='${CTDB_TEST_LIBEXECDIR}')
+
+ bld.SAMBA_BINARY('ctdb_packet_parse',
+ source='tests/src/ctdb_packet_parse.c',
+ deps='talloc tevent tdb ctdb-protocol',
+ install_path='${CTDB_TEST_LIBEXECDIR}')
+
+ bld.SAMBA_BINARY('system_socket_test',
+ source='tests/src/system_socket_test.c',
+ deps='''ctdb-tests-backtrace
+ talloc
+ ctdb-protocol-util
+ pcap
+ ''',
+ install_path='${CTDB_TEST_LIBEXECDIR}')
+
+ bld.SAMBA_BINARY('porting_tests',
+ source='tests/src/porting_tests.c',
+ deps='samba-util ctdb-system popt',
+ install_path='${CTDB_TEST_LIBEXECDIR}')
+
+ bld.SAMBA_BINARY('sock_daemon_test',
+ source='tests/src/sock_daemon_test.c',
+ deps='''ctdb-system talloc tevent tevent-util
+ LIBASYNC_REQ samba-util sys_rw''',
+ install_path='${CTDB_TEST_LIBEXECDIR}')
+
+ bld.SAMBA_BINARY('ctdb_io_test',
+ source='tests/src/ctdb_io_test.c',
+ deps='''talloc tevent tdb samba-util sys_rw''',
+ install_path='${CTDB_TEST_LIBEXECDIR}')
+
+ bld.SAMBA_BINARY('ctdb-db-test',
+ source='tests/src/db_test_tool.c',
+ cflags='-DCTDB_DB_TEST_TOOL',
+ deps='''ctdb-client ctdb-protocol
+ ctdb-util samba-util talloc tevent replace''',
+ install_path='${CTDB_TEST_LIBEXECDIR}')
+
+ for target in ['tmon_ping_test', 'tmon_test']:
+ src = 'tests/src/' + target + '.c'
+
+ bld.SAMBA_BINARY(target,
+ source=src,
+ deps='''ctdb-util
+ ctdb-tests-backtrace
+ LIBASYNC_REQ
+ samba-util
+ sys_rw
+ tevent-util
+ talloc
+ tevent
+ tdb
+ popt
+ ''',
+ install_path='${CTDB_TEST_LIBEXECDIR}')
+
+ bld.SAMBA_SUBSYSTEM('ctdb-protocol-tests-basic',
+ source=bld.SUBDIR('tests/src',
+ 'protocol_common_basic.c'),
+ deps='samba-util replace talloc')
+
+ bld.SAMBA_SUBSYSTEM('ctdb-protocol-tests-common',
+ source=bld.SUBDIR('tests/src',
+ '''protocol_common.c
+ protocol_common_ctdb.c
+ '''),
+ deps='ctdb-protocol-tests-basic replace talloc tdb')
+
+ bld.SAMBA_BINARY('protocol_basic_test',
+ source=bld.SUBDIR('tests/src', 'protocol_basic_test.c'),
+ deps='ctdb-protocol-tests-basic talloc',
+ install_path='${CTDB_TEST_LIBEXECDIR}')
+
+ ctdb_protocol_tests = [
+ 'protocol_types_test',
+ 'protocol_ctdb_test',
+ 'protocol_util_test',
+ 'protocol_types_compat_test',
+ 'protocol_ctdb_compat_test',
+ ]
+
+ for target in ctdb_protocol_tests:
+ src = 'tests/src/' + target + '.c'
+
+ bld.SAMBA_BINARY(target,
+ source=src,
+ deps='''ctdb-protocol-tests-common
+ samba-util ctdb-util talloc tdb''',
+ install_path='${CTDB_TEST_LIBEXECDIR}')
+
+ bld.SAMBA_BINARY('event_protocol_test',
+ source='event/event_protocol_test.c',
+ deps='''ctdb-protocol-tests-basic
+ ctdb-protocol-basic talloc''',
+ install_path='${CTDB_TEST_LIBEXECDIR}')
+
+ bld.SAMBA_SUBSYSTEM('ctdb-tests-common',
+ source=bld.SUBDIR('tests/src',
+ '''cluster_wait.c
+ test_options.c
+ '''),
+ deps='''ctdb-client
+ samba-util
+ replace
+ popt
+ talloc
+ tevent
+ tdb''')
+
+ bld.SAMBA_SUBSYSTEM('ctdb-tests-backtrace',
+ source=bld.SUBDIR('tests/src',
+ 'test_backtrace.c'),
+ deps='''samba-util
+ replace''')
+
+ # Test binaries
+ ctdb_tests = [
+ 'g_lock_loop',
+ 'message_ring',
+ 'fetch_ring',
+ 'fetch_loop',
+ 'fetch_loop_key',
+ 'fetch_readonly',
+ 'fetch_readonly_loop',
+ 'transaction_loop',
+ 'update_record',
+ 'update_record_persistent',
+ 'lock_tdb',
+ 'dummy_client',
+ 'tunnel_test',
+ 'tunnel_cmd',
+ ]
+
+ for target in ctdb_tests:
+ src = 'tests/src/' + target + '.c'
+
+ bld.SAMBA_BINARY(target,
+ source=src,
+ includes='include',
+ deps='''ctdb-client ctdb-protocol ctdb-util
+ samba-util ctdb-tests-common''',
+ install_path='${CTDB_TEST_LIBEXECDIR}')
+
+ bld.SAMBA_BINARY('ctdb_takeover_tests',
+ source='''tests/src/ctdb_takeover_tests.c
+ tests/src/ipalloc_read_known_ips.c''',
+ deps='''replace popt tdb tevent talloc ctdb-system
+ samba-util tdb-wrap talloc_report
+ ctdb-ipalloc ctdb-protocol ctdb-util''',
+ includes='include',
+ install_path='${CTDB_TEST_LIBEXECDIR}')
+
+ bld.SAMBA_BINARY('fake_ctdbd',
+ source='''tests/src/fake_ctdbd.c
+ tests/src/ipalloc_read_known_ips.c''',
+ deps='''ctdb-util ctdb-protocol ctdb-protocol-util
+ ctdb-system samba-util tevent-util
+ LIBASYNC_REQ popt''',
+ install_path='${CTDB_TEST_LIBEXECDIR}')
+
+ bld.SAMBA_BINARY('cluster_mutex_test',
+ source='tests/src/cluster_mutex_test.c',
+ deps='''ctdb-tests-backtrace
+ samba-util
+ talloc
+ tevent
+ ''',
+ install_path='${CTDB_TEST_LIBEXECDIR}')
+
+ if bld.env.HAVE_INFINIBAND:
+ bld.SAMBA_BINARY('ibwrapper_test',
+ source='ib/ibwrapper_test.c',
+ includes='include',
+ deps='replace talloc ctdb-common sys_rw' +
+ ib_deps,
+ install_path='${CTDB_TEST_LIBEXECDIR}')
+
+ if bld.env.HAVE_ROBUST_MUTEXES and sys.platform.startswith('linux') and bld.env.DEVELOPER:
+ bld.SAMBA_BINARY('test_mutex_raw',
+ source='tests/src/test_mutex_raw.c',
+ deps='pthread',
+ install_path='${CTDB_TEST_LIBEXECDIR}')
+
+ test_subdirs = [
+ 'CLUSTER',
+ 'INTEGRATION',
+ 'UNIT',
+ 'etc-ctdb'
+ ]
+
+ if bld.env.standalone_ctdb:
+ testdir = 'tests'
+ else:
+ testdir = 'ctdb/tests'
+
+ for t in test_subdirs:
+ files = SUBDIR_MODE('%s/%s' % (testdir, t), trim_path=testdir)
+ for fmode in files:
+ bld.INSTALL_FILES(bld.env.CTDB_TEST_DATADIR, 'tests/%s' % fmode[0],
+ destname=fmode[0], chmod=fmode[1])
+
+ # Install tests/scripts directory, excluding files that need munging
+ test_scripts = [
+ 'cluster.bash',
+ 'common.sh',
+ 'integration.bash',
+ 'integration_local_daemons.bash',
+ 'integration_real_cluster.bash',
+ 'unit.sh'
+ ]
+
+ for t in test_scripts:
+ bld.INSTALL_FILES(bld.env.CTDB_TEST_DATADIR,
+ os.path.join('tests/scripts', t),
+ destname=os.path.join('scripts', t))
+
+ bld.INSTALL_FILES(bld.env.CTDB_TEST_DATADIR,
+ 'tests/scripts/test_wrap',
+ destname='scripts/test_wrap',
+ chmod=MODE_755)
+
+ bld.SAMBA_GENERATOR('ctdb-test-script-install-paths',
+ source='tests/scripts/script_install_paths.sh',
+ target='script_install_paths.sh',
+ rule='sed %s ${SRC} > ${TGT}' % (sed_cmdline))
+ bld.INSTALL_FILES(bld.env.CTDB_TEST_DATADIR+"/scripts",
+ 'script_install_paths.sh',
+ destname='script_install_paths.sh', chmod=MODE_644)
+
+ sed_expr1 = 's@^\\(%s\\)=.*@\\1=%s@' % (
+ 'CTDB_TEST_DIR', bld.env.CTDB_TEST_DATADIR)
+ sed_expr2 = 's@^\\(CTDB_TESTS_ARE_INSTALLED\\)=false@\\\\1=true@'
+ bld.SAMBA_GENERATOR('ctdb-test-runner',
+ source='tests/run_tests.sh',
+ target='ctdb_run_tests.sh',
+ rule='sed -e "%s" -e "%s" ${SRC} > ${TGT}' % (
+ sed_expr1, sed_expr2))
+ bld.INSTALL_FILES('${BINDIR}', 'ctdb_run_tests.sh',
+ destname='ctdb_run_tests', chmod=MODE_755)
+ bld.symlink_as(os.path.join(bld.env.BINDIR, 'ctdb_run_cluster_tests'),
+ 'ctdb_run_tests')
+
+ bld.SAMBA_GENERATOR('ctdb-local-daemons',
+ source='tests/local_daemons.sh',
+ target='ctdb_local_daemons.sh',
+ rule='sed -e "%s" -e "%s" ${SRC} > ${TGT}' % (
+ sed_expr1, sed_expr2))
+ bld.INSTALL_FILES('${BINDIR}', 'ctdb_local_daemons.sh',
+ destname='ctdb_local_daemons', chmod=MODE_755)
+
+
+def testonly(ctx):
+ cmd = 'tests/run_tests.sh'
+ ret = samba_utils.RUN_COMMAND(cmd)
+ if ret != 0:
+ print('tests exited with exit status %d' % ret)
+ sys.exit(ret)
+
+
+def test(ctx):
+ Options.commands.append('build')
+ Options.commands.append('testonly')
+
+
+def autotest(ctx):
+ env = samba_utils.LOAD_ENVIRONMENT()
+ cmd = 'tests/run_tests.sh -eL -S %s' % env.SOCKET_WRAPPER_SO_PATH
+ ret = samba_utils.RUN_COMMAND(cmd)
+ if ret != 0:
+ print('autotest exited with exit status %d' % ret)
+ sys.exit(ret)
+
+
+def show_version(ctx):
+ print(get_version_string())
+
+
+def manpages(ctx):
+ BASE_URL = 'http://docbook.sourceforge.net/release/xsl/current'
+ MAN_XSL = '%s/manpages/docbook.xsl' % BASE_URL
+ HTML_XSL = '%s/html/docbook.xsl' % BASE_URL
+ CMD_TEMPLATE = 'xsltproc --xinclude -o %s --nonet %s %s'
+ manpages = manpages_binary + manpages_misc + manpages_etcd + manpages_ceph
+ for t in manpages:
+ cmd = CMD_TEMPLATE % ('doc/%s' % t, MAN_XSL, 'doc/%s.xml' % t)
+ ret = samba_utils.RUN_COMMAND(cmd)
+ if ret != 0:
+ print('Command %s failed with exit status %d' % (cmd, ret))
+ sys.exit(ret)
+
+ cmd = CMD_TEMPLATE % ('doc/%s.html' % t, HTML_XSL, 'doc/%s.xml' % t)
+ ret = samba_utils.RUN_COMMAND(cmd)
+ if ret != 0:
+ print('Command %s failed with exit status %d' % (cmd, ret))
+ sys.exit(ret)
+
+
+def distonly(ctx):
+ samba_dist.DIST_FILES('VERSION:VERSION', extend=True)
+
+ t = 'ctdb.spec'
+ sed_expr1 = 's/@VERSION@/%s/g' % get_version_string()
+ sed_expr2 = 's/@RELEASE@/%s/g' % '1'
+ cmd = 'sed -e "%s" -e "%s" packaging/RPM/ctdb.spec.in > %s' % (
+ sed_expr1, sed_expr2, t)
+ ret = samba_utils.RUN_COMMAND(cmd)
+ if ret != 0:
+ print('Command "%s" failed with exit status %d' % (cmd, ret))
+ sys.exit(ret)
+ samba_dist.DIST_FILES('ctdb/%s:%s' % (t, t), extend=True)
+
+ manpages = manpages_binary + manpages_misc + manpages_etcd + manpages_ceph
+ for t in manpages:
+ samba_dist.DIST_FILES('ctdb/doc/%s:doc/%s' % (t, t), extend=True)
+ samba_dist.DIST_FILES('ctdb/doc/%s.html:doc/%s.html' % (t, t),
+ extend=True)
+
+ samba_dist.dist()
+
+
+def dist():
+ Options.commands.append('manpages')
+ Options.commands.append('distonly')
+
+
+def rpmonly(ctx):
+ opts = os.getenv('RPM_OPTIONS') or ''
+ cmd = 'rpmbuild -ta --clean --rmsource %s ctdb-%s.tar.gz' % \
+ (opts, get_version_string())
+ ret = samba_utils.RUN_COMMAND(cmd)
+ if ret != 0:
+ print('rpmbuild exited with exit status %d' % ret)
+ sys.exit(ret)
+
+
+def rpm(ctx):
+ Options.commands.append('manpages')
+ Options.commands.append('distonly')
+ Options.commands.append('rpmonly')
+
+
+def ctags(ctx):
+ "build 'tags' file using ctags"
+ source_root = os.path.dirname(Context.g_module.root_path)
+ cmd = 'ctags $(find %s -name "*.[ch]")' % source_root
+ print("Running: %s" % cmd)
+ ret = samba_utils.RUN_COMMAND(cmd)
+ if ret != 0:
+ print('ctags failed with exit status %d' % ret)
+ sys.exit(ret)