summaryrefslogtreecommitdiffstats
path: root/fluent-bit/lib/librdkafka-2.1.0/packaging/nuget/cleanup-s3.py
diff options
context:
space:
mode:
Diffstat (limited to 'fluent-bit/lib/librdkafka-2.1.0/packaging/nuget/cleanup-s3.py')
-rwxr-xr-xfluent-bit/lib/librdkafka-2.1.0/packaging/nuget/cleanup-s3.py143
1 files changed, 143 insertions, 0 deletions
diff --git a/fluent-bit/lib/librdkafka-2.1.0/packaging/nuget/cleanup-s3.py b/fluent-bit/lib/librdkafka-2.1.0/packaging/nuget/cleanup-s3.py
new file mode 100755
index 00000000..2093af0c
--- /dev/null
+++ b/fluent-bit/lib/librdkafka-2.1.0/packaging/nuget/cleanup-s3.py
@@ -0,0 +1,143 @@
+#!/usr/bin/env python3
+#
+# Clean up test builds from librdkafka's S3 bucket.
+# This also covers python builds.
+
+import re
+from datetime import datetime, timezone
+import boto3
+import argparse
+
+# Collects CI artifacts from S3 storage, downloading them
+# to a local directory, or collecting already downloaded artifacts from
+# local directory.
+#
+# The artifacts' folder in the S3 bucket must have the following token
+# format:
+# <token>-[<value>]__ (repeat)
+#
+# Recognized tokens (unrecognized tokens are ignored):
+# p - project (e.g., "confluent-kafka-python")
+# bld - builder (e.g., "travis")
+# plat - platform ("osx", "linux", ..)
+# arch - arch ("x64", ..)
+# tag - git tag
+# sha - git sha
+# bid - builder's build-id
+# bldtype - Release, Debug (appveyor)
+# lnk - std, static
+#
+# Example:
+# librdkafka/p-librdkafka__bld-travis__plat-linux__arch-x64__tag-v0.0.62__sha-d051b2c19eb0c118991cd8bc5cf86d8e5e446cde__bid-1562.1/librdkafka.tar.gz
+
+
+s3_bucket = 'librdkafka-ci-packages'
+
+
+def may_delete(path):
+ """ Returns true if S3 object path is eligible for deletion, e.g.
+ has a non-release/rc tag. """
+
+ # The path contains the tokens needed to perform
+ # matching of project, gitref, etc.
+ rinfo = re.findall(r'(?P<tag>[^-]+)-(?P<val>.*?)(?:__|$)', path)
+ if rinfo is None or len(rinfo) == 0:
+ print(f"Incorrect folder/file name format for {path}")
+ return False
+
+ info = dict(rinfo)
+
+ tag = info.get('tag', None)
+ if tag is not None and (len(tag) == 0 or tag.startswith('$(')):
+ # AppVeyor doesn't substite $(APPVEYOR_REPO_TAG_NAME)
+ # with an empty value when not set, it leaves that token
+ # in the string - so translate that to no tag.
+ del info['tag']
+ tag = None
+
+ if tag is None:
+ return True
+
+ if re.match(r'^v?\d+\.\d+\.\d+(-?RC\d+)?$', tag,
+ flags=re.IGNORECASE) is None:
+ return True
+
+ return False
+
+
+def collect_s3(s3, min_age_days=60):
+ """ Collect artifacts from S3 """
+ now = datetime.now(timezone.utc)
+ eligible = []
+ totcnt = 0
+ # note: list_objects will return at most 1000 objects per call,
+ # use continuation token to read full list.
+ cont_token = None
+ more = True
+ while more:
+ if cont_token is not None:
+ res = s3.list_objects_v2(Bucket=s3_bucket,
+ ContinuationToken=cont_token)
+ else:
+ res = s3.list_objects_v2(Bucket=s3_bucket)
+
+ if res.get('IsTruncated') is True:
+ cont_token = res.get('NextContinuationToken')
+ else:
+ more = False
+
+ for item in res.get('Contents'):
+ totcnt += 1
+ age = (now - item.get('LastModified')).days
+ path = item.get('Key')
+ if age >= min_age_days and may_delete(path):
+ eligible.append(path)
+
+ return (eligible, totcnt)
+
+
+def chunk_list(lst, cnt):
+ """ Split list into lists of cnt """
+ for i in range(0, len(lst), cnt):
+ yield lst[i:i + cnt]
+
+
+if __name__ == '__main__':
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--delete",
+ help="WARNING! Don't just check, actually delete "
+ "S3 objects.",
+ action="store_true")
+ parser.add_argument("--age", help="Minimum object age in days.",
+ type=int, default=360)
+
+ args = parser.parse_args()
+ dry_run = args.delete is not True
+ min_age_days = args.age
+
+ if dry_run:
+ op = "Eligible for deletion"
+ else:
+ op = "Deleting"
+
+ s3 = boto3.client('s3')
+
+ # Collect eligible artifacts
+ eligible, totcnt = collect_s3(s3, min_age_days=min_age_days)
+ print(f"{len(eligible)}/{totcnt} eligible artifacts to delete")
+
+ # Delete in chunks of 1000 (max what the S3 API can do)
+ for chunk in chunk_list(eligible, 1000):
+ print(op + ":\n" + '\n'.join(chunk))
+ if dry_run:
+ continue
+
+ res = s3.delete_objects(Bucket=s3_bucket,
+ Delete={
+ 'Objects': [{'Key': x} for x in chunk],
+ 'Quiet': True
+ })
+ errors = res.get('Errors', [])
+ if len(errors) > 0:
+ raise Exception(f"Delete failed: {errors}")