summaryrefslogtreecommitdiffstats
path: root/ctdb/config/events/legacy/13.per_ip_routing.script
blob: d7949c6dedb7ec0e68e9c4c0ea2322396d2749d1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
#!/bin/sh

[ -n "$CTDB_BASE" ] || \
	CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD")

. "${CTDB_BASE}/functions"

load_script_options

service_name="per_ip_routing"

# Do nothing if unconfigured 
[ -n "$CTDB_PER_IP_ROUTING_CONF" ] || exit 0

table_id_prefix="ctdb."

[ -n "$CTDB_PER_IP_ROUTING_RULE_PREF" ] || \
    die "error: CTDB_PER_IP_ROUTING_RULE_PREF not configured"

[ "$CTDB_PER_IP_ROUTING_TABLE_ID_LOW" -lt "$CTDB_PER_IP_ROUTING_TABLE_ID_HIGH" ] 2>/dev/null || \
    die "error: CTDB_PER_IP_ROUTING_TABLE_ID_LOW[$CTDB_PER_IP_ROUTING_TABLE_ID_LOW] and/or CTDB_PER_IP_ROUTING_TABLE_ID_HIGH[$CTDB_PER_IP_ROUTING_TABLE_ID_HIGH] improperly configured"

if [ "$CTDB_PER_IP_ROUTING_TABLE_ID_LOW" -le 253 ] &&  \
	   [ 255 -le "$CTDB_PER_IP_ROUTING_TABLE_ID_HIGH" ] ; then
    die "error: range CTDB_PER_IP_ROUTING_TABLE_ID_LOW[$CTDB_PER_IP_ROUTING_TABLE_ID_LOW]..CTDB_PER_IP_ROUTING_TABLE_ID_HIGH[$CTDB_PER_IP_ROUTING_TABLE_ID_HIGH] must not include 253-255"
fi

have_link_local_config ()
{
    [ "$CTDB_PER_IP_ROUTING_CONF" = "__auto_link_local__" ]
}

if ! have_link_local_config && [ ! -r "$CTDB_PER_IP_ROUTING_CONF" ] ; then
    die "error: CTDB_PER_IP_ROUTING_CONF=$CTDB_PER_IP_ROUTING_CONF file not found"
fi

ctdb_setup_state_dir "failover" "$service_name"

######################################################################

ipv4_is_valid_addr()
{
    _ip="$1"

    _count=0
    # Get the shell to break up the address into 1 word per octet 
    # Intentional word splitting here
    # shellcheck disable=SC2086
    for _o in $(export IFS="." ; echo $_ip) ; do
	# The 2>/dev/null stops output from failures where an "octet"
	# is not numeric.  The test will still fail.
	if ! [ 0 -le $_o ] && [ $_o -le 255 ] 2>/dev/null ; then
	    return 1
	fi
	_count=$((_count + 1))
    done

    # A valid IPv4 address has 4 octets
    [ $_count -eq 4 ]
}

ensure_ipv4_is_valid_addr ()
{
    _event="$1"
    _ip="$2"

    ipv4_is_valid_addr "$_ip" || {
	echo "$0: $_event not an ipv4 address skipping IP:$_ip"
	exit 0
    }
}

ipv4_host_addr_to_net ()
{
    _host="$1"
    _maskbits="$2"

    # Convert the host address to an unsigned long by splitting out
    # the octets and doing the math.
    _host_ul=0
    # Intentional word splitting here
    # shellcheck disable=SC2086
    for _o in $(export IFS="." ; echo $_host) ; do
	_host_ul=$(( (_host_ul << 8) + _o)) # work around Emacs color bug
    done

    # Calculate the mask and apply it.
    _mask_ul=$(( 0xffffffff << (32 - _maskbits) ))
    _net_ul=$(( _host_ul & _mask_ul ))
 
    # Now convert to a network address one byte at a time.
    _net=""
    for _o in $(seq 1 4) ; do
	_net="$((_net_ul & 255))${_net:+.}${_net}"
	_net_ul=$((_net_ul >> 8))
    done

    echo "${_net}/${_maskbits}"
}

######################################################################

ensure_rt_tables ()
{
    rt_tables="$CTDB_SYS_ETCDIR/iproute2/rt_tables"
    # script_state_dir set by ctdb_setup_state_dir()
    # shellcheck disable=SC2154
    rt_tables_lock="${script_state_dir}/rt_tables_lock"

    # This file should always exist.  Even if this didn't exist on the
    # system, adding a route will have created it.  What if we startup
    # and immediately shutdown?  Let's be sure.
    if [ ! -f "$rt_tables" ] ; then
	mkdir -p "${rt_tables%/*}" # dirname
	touch "$rt_tables"
    fi
}

# Setup a table id to use for the given IP.  We don't need to know it,
# it just needs to exist in /etc/iproute2/rt_tables.  Fail if no free
# table id could be found in the configured range.
ensure_table_id_for_ip ()
{
    _ip=$1

    ensure_rt_tables

    # Maintain a table id for each IP address we've ever seen in
    # rt_tables.  We use a "ctdb." prefix on the label.
    _label="${table_id_prefix}${_ip}"

    # This finds either the table id corresponding to the label or a
    # new unused one (that is greater than all the used ones in the
    # range).
    (
	# Note that die() just gets us out of the subshell...
	flock --timeout 30 9 || \
	    die "ensure_table_id_for_ip: failed to lock file $rt_tables"

	_new="$CTDB_PER_IP_ROUTING_TABLE_ID_LOW"
	while read _t _l ; do
	    # Skip comments
	    case "$_t" in
		\#*) continue ;;
	    esac
	    # Found existing: done
	    if [ "$_l" = "$_label" ] ; then
		return 0
	    fi
	    # Potentially update the new table id to be used.  The
	    # redirect stops error spam for a non-numeric value.
	    if [ "$_new" -le "$_t" ] && \
		       [ "$_t" -le "$CTDB_PER_IP_ROUTING_TABLE_ID_HIGH" ] \
			       2>/dev/null ; then
		_new=$((_t + 1))
	    fi
	done <"$rt_tables"

	# If the new table id is legal then add it to the file and
	# print it.
	if [ "$_new" -le "$CTDB_PER_IP_ROUTING_TABLE_ID_HIGH" ] ; then
	    printf '%d\t%s\n' "$_new" "$_label" >>"$rt_tables"
	    return 0
	else
	    return 1
	fi
    ) 9>"$rt_tables_lock"
}

# Clean up all the table ids that we might own.
clean_up_table_ids ()
{
    ensure_rt_tables

    (
	# Note that die() just gets us out of the subshell...
	flock --timeout 30 9 || \
	    die "clean_up_table_ids: failed to lock file $rt_tables"

	# Delete any items from the file that have a table id in our
	# range or a label matching our label.  Preserve comments.
	_tmp="${rt_tables}.$$.ctdb"
	awk -v min="$CTDB_PER_IP_ROUTING_TABLE_ID_LOW" \
	    -v max="$CTDB_PER_IP_ROUTING_TABLE_ID_HIGH" \
	    -v pre="$table_id_prefix" \
	    '/^#/ ||
	     !(min <= $1 && $1 <= max) &&
	     !(index($2, pre) == 1) {
		print $0 }' "$rt_tables" >"$_tmp"

	mv "$_tmp" "$rt_tables"
    ) 9>"$rt_tables_lock"
}

######################################################################

# This prints the config for an IP, which is either relevant entries
# from the config file or, if set to the magic link local value, some
# link local routing config for the IP.
get_config_for_ip ()
{
    _ip="$1"

    if have_link_local_config ; then
	# When parsing public_addresses also split on '/'.  This means
	# that we get the maskbits as item #2 without further parsing.
	while IFS="/$IFS" read _i _maskbits _x ; do
	    if [ "$_ip" = "$_i" ] ; then
		printf "%s" "$_ip "; ipv4_host_addr_to_net "$_ip" "$_maskbits"
	    fi
	done <"${CTDB_BASE}/public_addresses"
    else
	while read _i _rest ; do
	    if [ "$_ip" = "$_i" ] ; then
		printf '%s\t%s\n' "$_ip" "$_rest"
	    fi
	done <"$CTDB_PER_IP_ROUTING_CONF"
    fi
}

ip_has_configuration ()
{
    _ip="$1"

    _conf=$(get_config_for_ip "$_ip")
    [ -n "$_conf" ]
}

add_routing_for_ip ()
{
    _iface="$1"
    _ip="$2"

    # Do nothing if no config for this IP.
    ip_has_configuration "$_ip" || return 0

    ensure_table_id_for_ip "$_ip" || \
	die "add_routing_for_ip: out of table ids in range $CTDB_PER_IP_ROUTING_TABLE_ID_LOW - $CTDB_PER_IP_ROUTING_TABLE_ID_HIGH"

    _pref="$CTDB_PER_IP_ROUTING_RULE_PREF"
    _table_id="${table_id_prefix}${_ip}"

    del_routing_for_ip "$_ip" >/dev/null 2>&1

    ip rule add from "$_ip" pref "$_pref" table "$_table_id" || \
	die "add_routing_for_ip: failed to add rule for $_ip"

    # Add routes to table for any lines matching the IP.
    get_config_for_ip "$_ip" |
    while read _i _dest _gw ; do
	_r="$_dest ${_gw:+via} $_gw dev $_iface table $_table_id"
	# Intentionally unquoted multi-word value here
	# shellcheck disable=SC2086
	ip route add $_r || \
	    die "add_routing_for_ip: failed to add route: $_r"
    done
}

del_routing_for_ip ()
{
    _ip="$1"

    _pref="$CTDB_PER_IP_ROUTING_RULE_PREF"
    _table_id="${table_id_prefix}${_ip}"

    # Do this unconditionally since we own any matching table ids.
    # However, print a meaningful message if something goes wrong.
    _cmd="ip rule del from $_ip pref $_pref table $_table_id"
    _out=$($_cmd 2>&1) || \
	cat <<EOF
WARNING: Failed to delete policy routing rule
  Command "$_cmd" failed:
  $_out
EOF
    # This should never usually fail, so don't redirect output.
    # However, it can fail when deleting a rogue IP, since there will
    # be no routes for that IP.  In this case it should only fail when
    # the rule deletion above has already failed because the table id
    # is invalid.  Therefore, go to a little bit of trouble to indent
    # the failure message so that it is associated with the above
    # warning message and doesn't look too nasty.
    ip route flush table "$_table_id" 2>&1 | sed -e 's@^.@  &@'
}

######################################################################

flush_rules_and_routes ()
{
	ip rule show |
	while read _p _x _i _x _t ; do
	    # Remove trailing colon after priority/preference.
	    _p="${_p%:}"
	    # Only remove rules that match our priority/preference.
	    [ "$CTDB_PER_IP_ROUTING_RULE_PREF" = "$_p" ] || continue

	    echo "Removing ip rule for public address $_i for routing table $_t"
	    ip rule del from "$_i" table "$_t" pref "$_p"
	    ip route flush table "$_t" 2>/dev/null
	done
}

# Add any missing routes.  Some might have gone missing if, for
# example, all IPs on the network were removed (possibly if the
# primary was removed).  If $1 is "force" then (re-)add all the
# routes.
add_missing_routes ()
{
    $CTDB ip -v -X | {
	read _x # skip header line

	# Read the rest of the lines.  We're only interested in the
	# "IP" and "ActiveInterface" columns.  The latter is only set
	# for addresses local to this node, making it easy to skip
	# non-local addresses.  For each IP local address we check if
	# the relevant routing table is populated and populate it if
	# not.
	while IFS="|" read _x _ip _x _iface _x ; do
	    [ -n "$_iface" ] || continue

	    _table_id="${table_id_prefix}${_ip}"
	    if [ -z "$(ip route show table "$_table_id" 2>/dev/null)" ] || \
		       [ "$1" = "force" ]  ; then
		add_routing_for_ip "$_iface" "$_ip"
	    fi
	done
    } || exit $?
}

# Remove rules/routes for addresses that we're not hosting.  If a
# releaseip event failed in an earlier script then we might not have
# had a chance to remove the corresponding rules/routes.
remove_bogus_routes ()
{
    # Get a IPs current hosted by this node, each anchored with '@'.
    _ips=$($CTDB ip -v -X | awk -F'|' 'NR > 1 && $4 != "" {printf "@%s@\n", $2}')

    # x is intentionally ignored
    # shellcheck disable=SC2034
    ip rule show |
    while read _p _x _i _x _t ; do
	# Remove trailing colon after priority/preference.
	_p="${_p%:}"
	# Only remove rules that match our priority/preference.
	[ "$CTDB_PER_IP_ROUTING_RULE_PREF" = "$_p" ] || continue
	# Only remove rules for which we don't have an IP.  This could
	# be done with grep, but let's do it with shell prefix removal
	# to avoid unnecessary processes.  This falls through if
	# "@${_i}@" isn't present in $_ips.
	[ "$_ips" = "${_ips#*@"${_i}"@}" ] || continue

	echo "Removing ip rule/routes for unhosted public address $_i"
	del_routing_for_ip "$_i"
    done
}

######################################################################

ctdb_check_args "$@"

case "$1" in
startup)
	flush_rules_and_routes

	# make sure that we only respond to ARP messages from the NIC
	# where a particular ip address is associated.
	get_proc sys/net/ipv4/conf/all/arp_filter >/dev/null 2>&1 && {
	    set_proc sys/net/ipv4/conf/all/arp_filter 1
	}
	;;

shutdown)
	flush_rules_and_routes
	clean_up_table_ids
	;;

takeip)
	iface=$2
	ip=$3
	# maskbits included here so argument order is obvious
	# shellcheck disable=SC2034
	maskbits=$4

	ensure_ipv4_is_valid_addr "$1" "$ip"
	add_routing_for_ip "$iface" "$ip"

	# flush our route cache
	set_proc sys/net/ipv4/route/flush 1

	$CTDB gratarp "$ip" "$iface"
	;;

updateip)
	# oiface, maskbits included here so argument order is obvious
	# shellcheck disable=SC2034
	oiface=$2
	niface=$3
	ip=$4
	# shellcheck disable=SC2034
	maskbits=$5

	ensure_ipv4_is_valid_addr "$1" "$ip"
	add_routing_for_ip "$niface" "$ip"

	# flush our route cache
	set_proc sys/net/ipv4/route/flush 1

	$CTDB gratarp "$ip" "$niface"
	tickle_tcp_connections "$ip"
	;;

releaseip)
	iface=$2
	ip=$3
	# maskbits included here so argument order is obvious
	# shellcheck disable=SC2034
	maskbits=$4

	ensure_ipv4_is_valid_addr "$1" "$ip"
	del_routing_for_ip "$ip"
	;;

ipreallocated)
	add_missing_routes
	remove_bogus_routes
	;;

reconfigure)
	echo "Reconfiguring service \"${service_name}\"..."

	add_missing_routes "force"
	remove_bogus_routes

	# flush our route cache
	set_proc sys/net/ipv4/route/flush 1
	;;
esac

exit 0