summaryrefslogtreecommitdiffstats
path: root/pkg/retry/retry.go
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--pkg/retry/retry.go184
1 files changed, 184 insertions, 0 deletions
diff --git a/pkg/retry/retry.go b/pkg/retry/retry.go
new file mode 100644
index 0000000..da73943
--- /dev/null
+++ b/pkg/retry/retry.go
@@ -0,0 +1,184 @@
+package retry
+
+import (
+ "context"
+ "database/sql/driver"
+ "github.com/go-sql-driver/mysql"
+ "github.com/icinga/icingadb/pkg/backoff"
+ "github.com/lib/pq"
+ "github.com/pkg/errors"
+ "net"
+ "strings"
+ "syscall"
+ "time"
+)
+
+// RetryableFunc is a retryable function.
+type RetryableFunc func(context.Context) error
+
+// IsRetryable checks whether a new attempt can be started based on the error passed.
+type IsRetryable func(error) bool
+
+// Settings aggregates optional settings for WithBackoff.
+type Settings struct {
+ // Timeout lets WithBackoff give up once elapsed (if >0).
+ Timeout time.Duration
+ // OnError is called if an error occurs.
+ OnError func(elapsed time.Duration, attempt uint64, err, lastErr error)
+ // OnSuccess is called once the operation succeeds.
+ OnSuccess func(elapsed time.Duration, attempt uint64, lastErr error)
+}
+
+// WithBackoff retries the passed function if it fails and the error allows it to retry.
+// The specified backoff policy is used to determine how long to sleep between attempts.
+func WithBackoff(
+ ctx context.Context, retryableFunc RetryableFunc, retryable IsRetryable, b backoff.Backoff, settings Settings,
+) (err error) {
+ parentCtx := ctx
+
+ if settings.Timeout > 0 {
+ var cancelCtx context.CancelFunc
+ ctx, cancelCtx = context.WithTimeout(ctx, settings.Timeout)
+ defer cancelCtx()
+ }
+
+ start := time.Now()
+ for attempt := uint64(0); ; /* true */ attempt++ {
+ prevErr := err
+
+ if err = retryableFunc(ctx); err == nil {
+ if settings.OnSuccess != nil {
+ settings.OnSuccess(time.Since(start), attempt, prevErr)
+ }
+
+ return
+ }
+
+ if settings.OnError != nil {
+ settings.OnError(time.Since(start), attempt, err, prevErr)
+ }
+
+ isRetryable := retryable(err)
+
+ if prevErr != nil && (errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled)) {
+ err = prevErr
+ }
+
+ if !isRetryable {
+ err = errors.Wrap(err, "can't retry")
+
+ return
+ }
+
+ sleep := b(attempt)
+ select {
+ case <-ctx.Done():
+ if outerErr := parentCtx.Err(); outerErr != nil {
+ err = errors.Wrap(outerErr, "outer context canceled")
+ } else {
+ if err == nil {
+ err = ctx.Err()
+ }
+ err = errors.Wrap(err, "can't retry")
+ }
+
+ return
+ case <-time.After(sleep):
+ }
+ }
+}
+
+// Retryable returns true for common errors that are considered retryable,
+// i.e. temporary, timeout, DNS, connection refused and reset, host down and unreachable and
+// network down and unreachable errors.
+func Retryable(err error) bool {
+ var temporary interface {
+ Temporary() bool
+ }
+ if errors.As(err, &temporary) && temporary.Temporary() {
+ return true
+ }
+
+ var timeout interface {
+ Timeout() bool
+ }
+ if errors.As(err, &timeout) && timeout.Timeout() {
+ return true
+ }
+
+ var dnsError *net.DNSError
+ if errors.As(err, &dnsError) {
+ return true
+ }
+
+ var opError *net.OpError
+ if errors.As(err, &opError) {
+ // OpError provides Temporary() and Timeout(), but not Unwrap(),
+ // so we have to extract the underlying error ourselves to also check for ECONNREFUSED,
+ // which is not considered temporary or timed out by Go.
+ err = opError.Err
+ }
+ if errors.Is(err, syscall.ECONNREFUSED) || errors.Is(err, syscall.ENOENT) {
+ // syscall errors provide Temporary() and Timeout(),
+ // which do not include ECONNREFUSED or ENOENT, so we check these ourselves.
+ return true
+ }
+ if errors.Is(err, syscall.ECONNRESET) {
+ // ECONNRESET is treated as a temporary error by Go only if it comes from calling accept.
+ return true
+ }
+ if errors.Is(err, syscall.EHOSTDOWN) || errors.Is(err, syscall.EHOSTUNREACH) {
+ return true
+ }
+ if errors.Is(err, syscall.ENETDOWN) || errors.Is(err, syscall.ENETUNREACH) {
+ return true
+ }
+
+ if errors.Is(err, driver.ErrBadConn) {
+ return true
+ }
+ if errors.Is(err, mysql.ErrInvalidConn) {
+ return true
+ }
+
+ var e *mysql.MySQLError
+ if errors.As(err, &e) {
+ switch e.Number {
+ case 1053, 1205, 1213, 2006:
+ // 1053: Server shutdown in progress
+ // 1205: Lock wait timeout
+ // 1213: Deadlock found when trying to get lock
+ // 2006: MySQL server has gone away
+ return true
+ default:
+ return false
+ }
+ }
+
+ var pe *pq.Error
+ if errors.As(err, &pe) {
+ switch pe.Code {
+ case "08000", // connection_exception
+ "08006", // connection_failure
+ "08001", // sqlclient_unable_to_establish_sqlconnection
+ "08004", // sqlserver_rejected_establishment_of_sqlconnection
+ "40001", // serialization_failure
+ "40P01", // deadlock_detected
+ "54000", // program_limit_exceeded
+ "55006", // object_in_use
+ "55P03", // lock_not_available
+ "57P01", // admin_shutdown
+ "57P02", // crash_shutdown
+ "57P03", // cannot_connect_now
+ "58000", // system_error
+ "58030", // io_error
+ "XX000": // internal_error
+ return true
+ default:
+ // Class 53 - Insufficient Resources
+ return strings.HasPrefix(string(pe.Code), "53")
+ }
+ }
+
+ return false
+}