summaryrefslogtreecommitdiffstats
path: root/heartbeat/aws-vpc-route53.in
blob: 18ab157e8a7358e3c0b8e929866b00b9769f4a9e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
#!@BASH_SHELL@
#
#   Copyright 2017 Amazon.com, Inc. and its affiliates. All Rights Reserved.
#   Licensed under the MIT License.
#
#  Copyright 2017 Amazon.com, Inc. and its affiliates

# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
# of the Software, and to permit persons to whom the Software is furnished to do
# so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.

#
#
#
# OCF resource agent to move an IP address within a VPC in the AWS
# Written by Stefan Schneider , Martin Tegmeier (AWS)
# Based on code of Markus Guertler#
#
#
# OCF resource agent to move an IP address within a VPC in the AWS
# Written by Stefan Schneider (AWS) , Martin Tegmeier (AWS)
# Based on code of Markus Guertler (SUSE)
#
# Mar. 15, 2017, vers 1.0.2


#######################################################################
# Initialization:

: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs

# Defaults
OCF_RESKEY_awscli_default="/usr/bin/aws"
OCF_RESKEY_auth_type_default="key"
OCF_RESKEY_profile_default="default"
OCF_RESKEY_region_default=""
OCF_RESKEY_hostedzoneid_default=""
OCF_RESKEY_fullname_default=""
OCF_RESKEY_ip_default="local"
OCF_RESKEY_ttl_default=10

: ${OCF_RESKEY_awscli=${OCF_RESKEY_awscli_default}}
: ${OCF_RESKEY_auth_type=${OCF_RESKEY_auth_type_default}}
: ${OCF_RESKEY_profile=${OCF_RESKEY_profile_default}}
: ${OCF_RESKEY_region=${OCF_RESKEY_region_default}}
: ${OCF_RESKEY_hostedzoneid:=${OCF_RESKEY_hostedzoneid_default}}
: ${OCF_RESKEY_fullname:=${OCF_RESKEY_fullname_default}}
: ${OCF_RESKEY_ip:=${OCF_RESKEY_ip_default}}
: ${OCF_RESKEY_ttl:=${OCF_RESKEY_ttl_default}}

usage() {
	cat <<-EOT
	usage: $0 {start|stop|status|monitor|validate-all|meta-data}
	EOT
}

metadata() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="aws-vpc-route53" version="1.0">
<version>1.0</version>
<longdesc lang="en">
Update Route53 record of Amazon Webservices EC2 by updating an entry in a
hosted zone ID table.

AWS instances will require policies which allow them to update Route53 ARecords:
{
	"Version": "2012-10-17",
	"Statement": [
		{
			"Sid": "Stmt1471878724000",
			"Effect": "Allow",
			"Action": [
				"route53:ChangeResourceRecordSets",
				"route53:GetChange",
				"route53:ListResourceRecordSets",
			],
			"Resource": [
				"*"
			]
		}
	]
}

Example Cluster Configuration:

Use a configuration in "crm configure edit" which looks as follows. Replace
hostedzoneid, fullname and profile with the appropriate values:

primitive res_route53 ocf:heartbeat:aws-vpc-route53 \
		params hostedzoneid=EX4MPL3EX4MPL3 fullname=service.cloud.example.corp. profile=cluster \
		op start interval=0 timeout=180 \
		op stop interval=0 timeout=180 \
		op monitor interval=300 timeout=180 \
		meta target-role=Started
</longdesc>
<shortdesc lang="en">Update Route53 VPC record for AWS EC2</shortdesc>

<parameters>
<parameter name="awscli">
<longdesc lang="en">
Path to command line tools for AWS
</longdesc>
<shortdesc lang="en">Path to AWS CLI tools</shortdesc>
<content type="string" default="${OCF_RESKEY_awscli_default}" />
</parameter>

<parameter name="auth_type">
<longdesc lang="en">
Authentication type "key" for AccessKey and SecretAccessKey set via "aws configure",
or "role" to use AWS Policies.
</longdesc>
<shortdesc lang="en">Authentication type</shortdesc>
<content type="string" default="${OCF_RESKEY_auth_type_default}" />
</parameter>

<parameter name="profile">
<longdesc lang="en">
The name of the AWS CLI profile of the root account. This
profile will have to use the "text" format for CLI output.
The file /root/.aws/config should have an entry which looks
like:

  [profile cluster]
	region = us-east-1
	output = text

"cluster" is the name which has to be used in the cluster
configuration. The region has to be the current one. The
output has to be "text".
</longdesc>
<shortdesc lang="en">AWS Profile Name</shortdesc>
<content type="string" default="${OCF_RESKEY_profile_default}" />
</parameter>

<parameter name="hostedzoneid" required="1">
<longdesc lang="en">
Hosted zone ID of Route 53. This is the table of
the Route 53 record.
</longdesc>
<shortdesc lang="en">AWS hosted zone ID</shortdesc>
<content type="string" default="${OCF_RESKEY_hostedzoneid_default}" />
</parameter>

<parameter name="fullname" required="1">
<longdesc lang="en">
The full name of the service which will host the IP address.
Example: service.cloud.example.corp.
Note: The trailing dot is important to Route53!
</longdesc>
<shortdesc lang="en">Full service name</shortdesc>
<content type="string" default="${OCF_RESKEY_fullname_default}" />
</parameter>

<parameter name="ip" required="0">
<longdesc lang="en">
IP (local (default), public or secondary private IP address (e.g. 10.0.0.1).

A secondary private IP can be setup with the awsvip agent.
</longdesc>
<shortdesc lang="en">Type of IP or secondary private IP address (local, public or e.g. 10.0.0.1)</shortdesc>
<content type="string" default="${OCF_RESKEY_ip_default}" />
</parameter>

<parameter name="ttl" required="0">
<longdesc lang="en">
Time to live for Route53 ARECORD
</longdesc>
<shortdesc lang="en">ARECORD TTL</shortdesc>
<content type="string" default="${OCF_RESKEY_ttl_default}" />
</parameter>
</parameters>

<actions>
<action name="start" timeout="180s" />
<action name="stop" timeout="180s" />
<action name="monitor" depth="0" timeout="180s" interval="300s" />
<action name="validate-all" timeout="5s" />
<action name="meta-data" timeout="5s" />
</actions>
</resource-agent>
END
}

r53_validate() {
	ocf_log debug "function: validate"

	# Check for required binaries
	ocf_log debug "Checking for required binaries"
	for command in "${OCF_RESKEY_awscli}" curl dig; do
		check_binary "$command"
	done

	# Full name
	[[ -z "$OCF_RESKEY_fullname" ]] && ocf_log error "Full name parameter not set $OCF_RESKEY_fullname!" && exit $OCF_ERR_CONFIGURED

	# Hosted Zone ID
	[[ -z "$OCF_RESKEY_hostedzoneid" ]] && ocf_log error "Hosted Zone ID parameter not set $OCF_RESKEY_hostedzoneid!" && exit $OCF_ERR_CONFIGURED

	# Type of IP/secondary IP address
	case $OCF_RESKEY_ip in
		local|public|*.*.*.*)
			;;
		*)
			ocf_exit_reason "Invalid value for ip: ${OCF_RESKEY_ip}"
			exit $OCF_ERR_CONFIGURED
	esac

	# profile
	if [ "x${OCF_RESKEY_auth_type}" = "xkey" ] && [ -z "$OCF_RESKEY_profile" ]; then
		ocf_exit_reason "profile parameter not set"
		return $OCF_ERR_CONFIGURED
	fi

	# TTL
	[[ -z "$OCF_RESKEY_ttl" ]] && ocf_log error "TTL not set $OCF_RESKEY_ttl!" && exit $OCF_ERR_CONFIGURED

	ocf_log debug "Testing aws command"
	$OCF_RESKEY_awscli --version 2>&1
	if [ "$?" -gt 0 ]; then
		ocf_log error "Error while executing aws command as user root! Please check if AWS CLI tools (Python flavor) are properly installed and configured." && exit $OCF_ERR_INSTALLED
	fi
	ocf_log debug "ok"

	return $OCF_SUCCESS
}

r53_start() {
	#
	# Start agent and config DNS in Route53
	#
	ocf_log info "Starting Route53 DNS update...."
	_get_ip
	r53_monitor
	if [ $? != $OCF_SUCCESS ]; then
		ocf_log info "Could not start agent - check configurations"
		return $OCF_ERR_GENERIC
	fi
	return $OCF_SUCCESS
}

r53_stop() {
	#
	# Stop operation doesn't perform any API call or try to remove the DNS record
	# this mostly because this is not necessarily mandatory or desired
	# the start and monitor functions will take care of changing the DNS record
	# if the agent starts in a different cluster node
	#
	ocf_log info "Bringing down Route53 agent. (Will NOT remove Route53 DNS record)"
	return $OCF_SUCCESS
}

r53_monitor() {
	#
	# For every start action the agent  will call Route53 API to check for DNS record
	# otherwise it will try to get results directly by querying the DNS using "dig".
	# Due to complexity in some DNS architectures "dig" can fail, and if this happens
	# the monitor will fallback to the Route53 API call.
	#
	# There will be no failure, failover or restart of the agent if the monitor operation fails
	# hence we only return $OCF_SUCESS in this function
	#
	# In case of the monitor operation detects a wrong or non-existent Route53 DNS entry
	# it will try to fix the existing one, or create it again
	#
	#
	ARECORD=""
	IPREGEX="^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$"
	r53_validate
	ocf_log debug "Checking Route53 record sets"
	#
	_get_ip
	#
	if [ "$__OCF_ACTION" = "start" ] || ocf_is_probe ; then
		#
		cmd="$OCF_RESKEY_awscli $AWS_PROFILE_OPT route53 list-resource-record-sets --hosted-zone-id $OCF_RESKEY_hostedzoneid --query ResourceRecordSets[?Name=='$OCF_RESKEY_fullname']"
		ocf_log info "Route53 Agent Starting or probing - executing monitoring API call: $cmd"
		CLIRES="$($cmd 2>&1)"
		rc=$?
		ocf_log debug "awscli returned code: $rc"
		if [ $rc -ne 0 ]; then
			CLIRES=$(echo $CLIRES | grep -v '^$')
			ocf_log warn "Route53 API returned an error: $CLIRES"
			ocf_log warn "Skipping cluster action due to API call error"
			return $OCF_ERR_GENERIC
		fi
		ARECORD=$(echo $CLIRES | grep RESOURCERECORDS | awk '{ print $5 }')
		#
		if ocf_is_probe; then
			#
			# Prevent R53 record change during probe
			#
			if [[ $ARECORD =~ $IPREGEX ]] && [ "$ARECORD" != "$IPADDRESS" ]; then
				ocf_log debug "Route53 DNS record $ARECORD found at probing, disregarding"
				return $OCF_NOT_RUNNING
			fi
		fi
	else
		#
		cmd="dig +retries=3 +time=5 +short $OCF_RESKEY_fullname 2>/dev/null"
		ocf_log info "executing monitoring command : $cmd"
		ARECORD="$($cmd)"
		rc=$?
		ocf_log debug "dig return code: $rc"
		#
		if  [[ ! $ARECORD =~ $IPREGEX ]] || [ $rc -ne 0 ]; then
			ocf_log info "Fallback to Route53 API query due to DNS resolution failure"
			cmd="$OCF_RESKEY_awscli $AWS_PROFILE_OPT route53 list-resource-record-sets --hosted-zone-id $OCF_RESKEY_hostedzoneid --query ResourceRecordSets[?Name=='$OCF_RESKEY_fullname']"
			ocf_log debug "executing monitoring API call: $cmd"
			CLIRES="$($cmd 2>&1)"
			rc=$?
			ocf_log debug "awscli return code: $rc"
			if [ $rc -ne 0 ]; then
				CLIRES=$(echo $CLIRES | grep -v '^$')
				ocf_log warn "Route53 API returned an error: $CLIRES"
				ocf_log warn "Monitor skipping cluster action due to API call error"
				return $OCF_SUCCESS
			fi
			ARECORD=$(echo $CLIRES | grep RESOURCERECORDS | awk '{ print $5 }')
		fi
		#
	fi
	ocf_log info "Route53 DNS record pointing $OCF_RESKEY_fullname to IP address $ARECORD"
	#
	if [ "$ARECORD" == "$IPADDRESS" ]; then
		ocf_log info "Route53 DNS record $ARECORD found"
		return $OCF_SUCCESS
	elif [[ $ARECORD =~ $IPREGEX ]] && [ "$ARECORD" != "$IPADDRESS" ]; then
		ocf_log info "Route53 DNS record points to a different host, setting DNS record on Route53 to this host"
		_update_record "UPSERT" "$IPADDRESS"
		return $OCF_SUCCESS
	else
		ocf_log info "No Route53 DNS record found, setting DNS record on Route53 to this host"
		_update_record "UPSERT" "$IPADDRESS"
		return $OCF_SUCCESS
	fi

	return $OCF_SUCCESS
}

_get_ip() {
	case $OCF_RESKEY_ip in
		local|public)
			TOKEN=$(curl -sX PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600")
			IPADDRESS=$(curl -s http://169.254.169.254/latest/meta-data/${OCF_RESKEY_ip}-ipv4 -H "X-aws-ec2-metadata-token: $TOKEN");;
		*.*.*.*)
			IPADDRESS="${OCF_RESKEY_ip}";;
	esac
}

_update_record() {
	#
	# This function is the one that will actually execute Route53's API call
	# and configure the DNS record using the correct API calls and parameters
	#
	# It creates a temporary JSON file under /tmp with the required API payload
	#
	# Failures in this function are critical and will cause the agent to fail
	#
	update_action="$1"
	IPADDRESS="$2"
	ocf_log info "Updating Route53 $OCF_RESKEY_hostedzoneid with $IPADDRESS for $OCF_RESKEY_fullname"
	ROUTE53RECORD="$(maketempfile)"
	if [ $? -ne 0 ] || [ -z "$ROUTE53RECORD" ]; then
		ocf_exit_reason "Failed to create temporary file for record update"
		exit $OCF_ERR_GENERIC
	fi
	cat >>"$ROUTE53RECORD" <<-EOF
	{
		  "Comment": "Update record to reflect new IP address for a system ",
		  "Changes": [
			  {
				  "Action": "$update_action",
				  "ResourceRecordSet": {
					  "Name": "$OCF_RESKEY_fullname",
					  "Type": "A",
					  "TTL": $OCF_RESKEY_ttl,
					  "ResourceRecords": [
						  {
							  "Value": "$IPADDRESS"
						  }
					  ]
				  }
			  }
		  ]
	}
	EOF
	cmd="$OCF_RESKEY_awscli $AWS_PROFILE_OPT route53 change-resource-record-sets --hosted-zone-id $OCF_RESKEY_hostedzoneid --change-batch file://$ROUTE53RECORD "
	ocf_log debug "Executing command: $cmd"
	CLIRES="$($cmd 2>&1)"
	rc=$?
	ocf_log debug "awscli returned code: $rc"
	if [ $rc -ne 0 ]; then
		CLIRES=$(echo $CLIRES | grep -v '^$')
		ocf_log warn "Route53 API returned an error: $CLIRES"
		ocf_log warn "Skipping cluster action due to API call error"
		return $OCF_ERR_GENERIC
	fi
	CHANGEID=$(echo $CLIRES | awk '{ print $12 }')
	ocf_log debug "Change id: $CHANGEID"
	rmtempfile $ROUTE53RECORD
	CHANGEID=$(echo $CHANGEID | cut -d'/' -f 3 | cut -d'"' -f 1 )
	ocf_log debug "Change id: $CHANGEID"
	STATUS="PENDING"
	MYSECONDS=20
	while [ "$STATUS" = 'PENDING' ]; do
		sleep $MYSECONDS
		STATUS="$($OCF_RESKEY_awscli $AWS_PROFILE_OPT route53 get-change --id $CHANGEID | grep CHANGEINFO | awk -F'\t' '{ print $4 }' |cut -d'"' -f 2 )"
		ocf_log debug "Waited for $MYSECONDS seconds and checked execution of Route 53 update status: $STATUS "
	done
}

###############################################################################
case $__OCF_ACTION in
	usage|help)
		usage
		exit $OCF_SUCCESS
		;;
	meta-data)
		metadata
		exit $OCF_SUCCESS
		;;
esac

AWSCLI_CMD="${OCF_RESKEY_awscli}"
if [ "x${OCF_RESKEY_auth_type}" = "xkey" ]; then
	AWSCLI_CMD="$AWSCLI_CMD --profile ${OCF_RESKEY_profile}"
elif [ "x${OCF_RESKEY_auth_type}" = "xrole" ]; then
	if [ -z "${OCF_RESKEY_region}" ]; then
		ocf_exit_reason "region needs to be set when using role-based authentication"
		exit $OCF_ERR_CONFIGURED
	fi
else
	ocf_exit_reason "Incorrect auth_type: ${OCF_RESKEY_auth_type}"
	exit $OCF_ERR_CONFIGURED
fi
if [ -n "${OCF_RESKEY_region}" ]; then
	AWSCLI_CMD="$AWSCLI_CMD --region ${OCF_RESKEY_region}"
fi
AWSCLI_CMD="$AWSCLI_CMD --cli-connect-timeout 10"

case $__OCF_ACTION in
	start)
		r53_validate || exit $?
		r53_start
		;;
	stop)
		r53_stop
		;;
	monitor)
		r53_monitor
		;;
	validate-all)
		r53_validate
		;;
	*)
		usage
		exit $OCF_ERR_UNIMPLEMENTED
		;;
esac

exit $?