blob: d7aaa3b8552c65b0a8649c234c102fa9e83bcc50 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
|
#!/usr/bin/env bash
# Test that recovery correctly handles RSNs
# Recovery can under certain circumstances lead to old record copies
# resurrecting: Recovery selects the newest record copy purely by RSN. At
# the end of the recovery, the leader is the dmaster for all
# records in all (non-persistent) databases. And the other nodes locally
# hold the complete copy of the databases. The bug is that the recovery
# process does not increment the RSN on the leader at the end of
# the recovery. Now clients acting directly on the leader will
# directly change a record's content on the leader without migration
# and hence without RSN bump. So a subsequent recovery can not tell that
# the leader's copy is newer than the copies on the other nodes, since
# their RSN is the same. Hence, if the leader is not node 0 (or more
# precisely not the active node with the lowest node number), the recovery
# will choose copies from nodes with lower number and stick to these.
# 1. Create a test database
# 2. Add a record with value value1 on leader
# 3. Force a recovery
# 4. Update the record with value value2 on leader
# 5. Force a recovery
# 6. Confirm that the value is value2
. "${TEST_SCRIPTS_DIR}/integration.bash"
set -e
ctdb_test_init
#
# Main test
#
TESTDB="rec_test.tdb"
status=0
# Make sure node 0 is not the leader
echo "find out which node is leader"
ctdb_onnode 0 leader
leader="$out"
if [ "$leader" = "0" ]; then
echo "node 0 is leader, disable leader role on node 0"
#
# Note:
# It should be sufficient to run "ctdb setleaderrole off"
# on node 0 and wait for election and recovery to finish.
# But there were problems related to this in this automatic
# test, so for now use "ctdb stop" and "ctdb continue".
#
echo "stop node 0"
try_command_on_node 0 $CTDB stop
wait_until_node_has_status 0 stopped
echo "continue node 0"
try_command_on_node 0 $CTDB continue
wait_until_node_has_status 0 notstopped
ctdb_onnode 0 leader
leader="$out"
if [ "$leader" = "0" ]; then
echo "failed to move leader to different node"
exit 1
fi
fi
echo "Leader:${leader}"
# Create a temporary non-persistent database to test with
echo "create test database $TESTDB"
ctdb_onnode "$leader" attach "$TESTDB"
# Wipe Test database
echo "wipe test database"
ctdb_onnode "$leader" wipedb "$TESTDB"
# Add a record key=test1 data=value1
echo "store key(test1) data(value1)"
ctdb_onnode "$leader" writekey "$TESTDB" test1 value1
# Fetch a record key=test1
echo "read key(test1)"
ctdb_onnode "$leader" readkey "$TESTDB" test1
cat "$outfile"
# Do a recovery
echo "force recovery"
ctdb_onnode "$leader" recover
wait_until_node_has_status "$leader" recovered
# Add a record key=test1 data=value2
echo "store key(test1) data(value2)"
ctdb_onnode "$leader" writekey "$TESTDB" test1 value2
# Fetch a record key=test1
echo "read key(test1)"
ctdb_onnode "$leader" readkey "$TESTDB" test1
cat "$outfile"
# Do a recovery
echo "force recovery"
ctdb_onnode "$leader" recover
wait_until_node_has_status "$leader" recovered
# Verify record key=test1
echo "read key(test1)"
ctdb_onnode "$leader" readkey "$TESTDB" test1
cat "$outfile"
if [ "$out" = "Data: size:6 ptr:[value2]" ]; then
echo "GOOD: Recovery did not corrupt database"
else
echo "BAD: Recovery corrupted database"
status=1
fi
exit $status
|