summaryrefslogtreecommitdiffstats
path: root/admin/release-estimator/release-estimator.py
blob: bf005df3faf62901842bbd2bde055dbe1f664f57 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
#!/usr/bin/python3
#
# Release estimator for HAProxy
#
# A tool that monitors the HAProxy stable branches and calculates a proposed
# release date for the next minor release based on the bug fixes that are in
# the queue.
#
# Copyright 2020 HAProxy Technologies, Daniel Corbett <dcorbett@haproxy.com>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version
# 3 of the License, or (at your option) any later version.
#
#

from lxml import html
import requests
import traceback
import smtplib
import math
import copy
import time
import sys
import argparse
from datetime import datetime
from datetime import timedelta
from email.mime.text import MIMEText

# Do not report on versions older than
# MAX_VERSION_AGE.
MAX_VERSION_AGE = 1095 # days

# Do not report on non-lts releases (odd releases) that
# are older than MAX_VERSION_AGE_NONLTS
MAX_VERSION_AGE_NONLTS = 547 # days

# For each severity/issue type, set thresholds
# count - indicates how many bugs for this issue type should be in the queue
# time - indicates how many days should be added to the release date
THRESHOLDS = {
    'BUG' :{ 'count' : 1, 'time' : 28},
    'BUILD' :{ 'count' : 1, 'time' : 28},
    'MINOR' : { 'count' : 1, 'time' : 28},
    'MEDIUM' : { 'count' : 1, 'time' : 30},
    'MAJOR' : { 'count' : 1, 'time' : 14 },
    'CRITICAL' : { 'count' : 1, 'time' : 2 }
}

# Increase the urgency of a release as estimated time (in days) gets closer.
RELEASE_URGENCY = { 'WARNING' : 7, 'NOTICE' : 21, 'INFO' : '' }

def search_set(s, f):
    for t in s:
        if f in t:
            return True

def check_for_email(s, parser):
    if "@" not in s:
        parser.print_help()
        sys.exit()

def main():
    global MAX_VERSION_AGE
    global MAX_VERSION_AGE_NONLTS
    global THRESHOLDS
    global RELEASE_URGENCY

    SEND_MAIL=False
    VERSIONS = []
    issues = {}
    BUGQUEUE = {}
    BUGS = { "bugs" :[] }
    email_message = """Hi,

This is a friendly bot that watches fixes pending for the next haproxy-stable release!  One such e-mail is sent periodically once patches are waiting in the last maintenance branch, and an ideal release date is computed based on the severity of these fixes and their merge date.  Responses to this mail must be sent to the mailing list.

"""

    parser = argparse.ArgumentParser(description='HAProxy Stable Release Estimator')
    parser.add_argument('--print', action="store_true",
                        help='Print email only')
    parser.add_argument('--to-email', nargs=1, required=False,
                        help='Send email to <email>')
    parser.add_argument('--from-email', nargs=1, required=False,
                        help='Send email from <email>')
    parser.add_argument('--send-mail', action="store_true",
                        help='Send email')
    args = parser.parse_args()

    if not args.print and not args.send_mail and not args.to_email and not args.from_email:
        parser.print_help()
        sys.exit()

    if args.send_mail and (not args.to_email or not args.from_email):
        parser.print_help()
        sys.exit()

    if args.to_email:
        check_for_email(args.to_email[0], parser)
        TO_EMAIL = args.to_email[0]

    if args.from_email:
        check_for_email(args.from_email[0], parser)
        FROM_EMAIL = args.from_email[0]

    if args.send_mail:
        SEND_MAIL = True

    if SEND_MAIL:
        try:
            TO_EMAIL
            FROM_EMAIL
        except:
            parser.print_help()
            sys.exit()

    #
    # Let's get the list of the current stable versions
    #

    page = requests.get('http://www.haproxy.org/bugs/')
    tree = html.fromstring(page.content)

    for x in (tree.xpath('//th')):
        if x.xpath('./a/text()'):
            VERSIONS.append(x.xpath('./a/text()')[0])


    #
    # For each version let's check it's age. We'll apply the following logic:
    #  - Skip the release if it's:
    #    * older than MAX_VERSION_AGE days
    #    * older than MAX_VERSION_AGE_NONLTS days and an odd numbered release (1.9,2.1,2.3)
    #
    # For all other valid releases we will then collect the number of bug fixes
    # in queue for each of the defined severity levels:
    #  - BUG
    #  - BUILD
    #  - MINOR
    #  - MEDIUM
    #  - MAJOR
    #  - CRITICAL
    #
    # We'll then begin calculating the proposed release date based on the last
    # release date plus the first commit date of the first bug fix for the defined
    # severity level.
    #
    # By default the proposed release dates use the following padding:
    #  (Can be modified in THRESHOLDS)
    #  - BUG/BUILD/MINOR - 28 days
    #  - MEDIUM - 30 days
    #  - MAJOR - 14 days
    #  - CRITICAL - 2 days
    #
    # After we have a proposed release date we will assign a release urgency
    # to it. As we get closer to the proposed release date the urgency level changes.
    # By default the urgency levels and their times are:
    #  - WARNING - proposed date is 7 days or less
    #  - NOTICE  - proposed date is 21 days or less
    #  - INFO    - proposed date is longer than the above
    #

    for version in VERSIONS:
        BUGQUEUE[version] = { "total" : 0, "last": ""  }
        VERSION_THRESHOLDS = copy.deepcopy(THRESHOLDS)
        print("Collecting information on %s" % (version))
        page = requests.get('http://www.haproxy.org/bugs/bugs-%s.html' % (version))
        tree = html.fromstring(page.content)

        issues[version] = {}
        issues_count = {}
        release_soon = False
        num_to_word = {
            1 : 'one',
            2 : 'two',
            3 : 'three',
            4 : 'four',
            5 : 'five',
            6 : 'six',
            7 : 'seven',
            8 : 'eight',
            9 : 'nine',
            10 : 'ten',
            11 : 'eleven',
            12 : 'twelve',
            13 : 'thirteen',
        }

        # parse out the CHANGELOG link
        CHANGELOG = tree.xpath('//a[contains(@href,"CHANGELOG")]/@href')[0]

        last_version = tree.xpath('//td[contains(text(), "last")]/../td/a/text()')[0]
        first_version = "%s.0" % (version)

        # Get CHANGELOG for release
        changelog_page = requests.get(CHANGELOG)
        try:
            for l in changelog_page.content.decode('utf-8').split('\n'):
                # the below is a bit of a hack to parse out valid years in the CHANGELOG
                if (last_version in l) and ('201' in l or '202' in l or '200' in l) and '/' in l:
                    # set the date in which this version was last released
                    last_release_date = l.split(' ')[0]
                    last_release_datetime = datetime.strptime(last_release_date.strip(), '%Y/%m/%d')
                    BUGQUEUE[version]['last'] = last_release_date
                    break
            for l in changelog_page.content.decode('utf-8').split('\n'):
                 # the below is a bit of a hack to parse out valid years in the CHANGELOG
                 if (first_version in l) and ('201' in l or '202' in l or '200' in l) and '/' in l:
                     # set the date in which this version was first released
                     first_release_date = l.split(' ')[0]
                     first_release_datetime = datetime.strptime(first_release_date.strip(), '%Y/%m/%d')
                     BUGQUEUE[version]['first'] = first_release_datetime
                     break
        except:
            print(traceback.format_exc())
            last_release_date = False

        # get unix timestamp for today and timestamp of first release date
        today_ts = datetime.today().timestamp()
        first_version_ts = BUGQUEUE[version]['first'].timestamp()

        # calculate the age of this version in days and years
        version_age = math.ceil((today_ts-first_version_ts)/86400)
        version_age_years = math.ceil(version_age/365)

        # We do not want to monitor versions that are older
        # than MAX_VERSION_AGE or MAX_VERSION_AGE_NONLTS
        if version_age >= MAX_VERSION_AGE:
            print("\t - Version: %s is older than %d days, skipping" % (version, MAX_VERSION_AGE))
            continue

        if version_age > MAX_VERSION_AGE_NONLTS:
            if int(version.split('.')[1]) % 2 > 0:
                print("\t - Version: %s is not LTS and is older than %d days, skipping" % (version, MAX_VERSION_AGE_NONLTS))
                continue

        # If the release is older than 1 year let's increase the time until
        # a release is due. <base time threshold> * <version age years>
        if version_age_years > 1:
            for k in VERSION_THRESHOLDS.keys():
                VERSION_THRESHOLDS[k]['time'] *= int(version_age_years)

        # Let's capture the bug table which contains each bug & their severity
        bug_table = tree.xpath('//th[contains(text(), "Severity")]/ancestor::table[last()]')[0]

        # Loop through bug table and parse out the title of each bug
        # found within the links and their merge date.
        # Example is: 2020-10-19 BUG/MINOR: disable dynamic OCSP load with BoringSSL
        for x in bug_table.xpath('.//a[contains(@href,"commitdiff")]'):
            # Capture the bug label
            # Example: BUG/MINOR: disable dynamic OCSP load with BoringSSL
            issue_tmp = x.xpath('./text()')[0]
            # Capture the date
            # Example: 2020-10-19
            date_tmp = x.xpath('../preceding-sibling::td/text()')[0]

            # Split the bug into a severity
            if "/" in issue_tmp:
                bug_type = issue_tmp.split(':')[0].split('/')[1].strip()
            else:
                bug_type = issue_tmp.split(':')[0].strip()
            bug_text = ":".join(issue_tmp.split(':')[1:]).strip()
            if bug_type not in issues[version].keys():
                issues[version][bug_type] = set()
            issues[version][bug_type].add("%s|%s" % (date_tmp, bug_text))

        # Loop through the issue_types (severities) (MINOR, MEDIUM, MAJOR, etc.)
        # We'll check if the severity has already been accounted for
        # If not, we'll set the timestamp to the timestamp of the current issue
        # If so, we'll check if the current bugs timestamp is less than the
        # previous one. This will help us to determine when we first saw this
        # severity type as calculations are based on the first time seeing a
        # severity type. We'll then set the number of issues for each severity.
        for issue_type in issues[version]:
            issues_count[issue_type] = {}
            for k in issues[version][issue_type]:
                if 'timestamp' not in issues_count[issue_type].keys():
                    issues_count[issue_type]['timestamp'] = int(time.mktime(datetime.strptime(k.split('|')[0], "%Y-%m-%d").timetuple()))
                else:
                    if issues_count[issue_type]['timestamp'] > int(time.mktime(datetime.strptime(k.split('|')[0], "%Y-%m-%d").timetuple())):
                        issues_count[issue_type]['timestamp'] = int(time.mktime(datetime.strptime(k.split('|')[0], "%Y-%m-%d").timetuple()))
            issues_count[issue_type]['count'] = len(issues[version][issue_type])

        release_date = None
        total_count = 0

        # Let's check the count for each severity type and see if they
        # are greater than our thresholds count. This can be used to
        # hold off on calculating release estimates until a certain number of
        # MINOR bugs have accumulated.
        for issue_type in issues_count.keys():
            if issues_count[issue_type]['count'] >= VERSION_THRESHOLDS[issue_type]['count']:
                # If the total number of issues is greater than the threshold
                # for a severity we'll attempt to set a release date.
                # We'll use the timestamp from the first time an issue was
                # seen and add on the number of days specified within the
                # THRESHOLDS for that issue type. We'll also increment
                # the total number of issues that have been fixed in this
                # version across all severities/issue types.
                total_count += issues_count[issue_type]['count']
                issue_timestamp_delta = datetime.fromtimestamp(int(issues_count[issue_type]['timestamp'])) + timedelta(days=int(VERSION_THRESHOLDS[issue_type]['time']))
                if not release_date: release_date = issue_timestamp_delta
                elif release_date > issue_timestamp_delta: release_date = issue_timestamp_delta

        if release_date: release_soon = True
        if release_soon:
            time_until_release = release_date - datetime.now()

        # If a release date has been sent, let's calculate how long
        # in words until that release. i.e. "less than 2 weeks"
        if release_soon:
            for k in sorted(RELEASE_URGENCY.keys()):
                if not RELEASE_URGENCY[k]:
                    release_urgency_msg = k
                elif time_until_release.days <= RELEASE_URGENCY[k]:
                    release_urgency_msg = k
            rounded_week_time = math.ceil(time_until_release.days/7.0)
            if abs(rounded_week_time) > 1:
                week_word = 'weeks'
            else:
                week_word = 'week'
            try:
                # We now have all of the required information for building
                # the email message.
                # TODO: Fix alignment
                email_message = """%s
    Last release %s was issued on %s.  There are currently %d patches in the queue cut down this way:
""" % (email_message, last_version, last_release_datetime.strftime("%Y-%m-%d"), total_count)
                for issue_type in sorted(issues_count.keys()):
                    email_message = "%s    - %d %s, first one merged on %s\n" % (email_message, issues_count[issue_type]['count'],issue_type,datetime.fromtimestamp(int(issues_count[issue_type]['timestamp'])).strftime("%Y-%m-%d"))
                email_message = "%s\nThus the computed ideal release date for %s would be %s, " % (email_message, ".".join(last_version.split(".")[:-1])+"."+str(int(last_version.split(".")[-1])+1), release_date.strftime("%Y-%m-%d"))
                if rounded_week_time < 0:
                    email_message = "%swhich was %s %s ago.\n" % (email_message, num_to_word[abs(rounded_week_time)], week_word)
                elif rounded_week_time == 0:
                    email_message = "%swhich was within the last week.\n" % (email_message)
                else:
                    email_message = "%swhich is in %s %s or less.\n" % (email_message, num_to_word[rounded_week_time], week_word)
            except Exception as err:
                print(traceback.format_exc())
                sys.exit()
            # Total number of bugs fixed in this version
            # since last release.
            BUGQUEUE[version]['total'] = total_count

    email_subject = "stable-bot: Bugfixes waiting for a release "

    # Add each version & their number of bugs to the subject
    for k in sorted(BUGQUEUE.keys(), reverse=True):
        if BUGQUEUE[k]['total'] > 0:
            email_subject = "%s %s (%d)," % ( email_subject, k, BUGQUEUE[k]['total'])

    email_subject = email_subject.rstrip(",")
    email_message = "%s\nThe current list of patches in the queue is:\n" % (email_message)
    uniq_issues = set()

    # Parse out unique issues across all versions so that we can
    # print them once with the list of affected versions.
    for k in BUGQUEUE.keys():
        for issue_type in sorted(issues[k].keys()):
            for issue in issues[k][issue_type]:
                uniq_issues.add("%s|%s" % (issue_type,issue.split('|')[1]))

    # Loop through the unique issues and determine which versions
    # are affected.
    for i in uniq_issues:
        affected_versions = []
        for k in BUGQUEUE.keys():
            try:
                if search_set(issues[k][i.split('|')[0]], i.split('|')[1]):
                    affected_versions.append(k)
            except Exception as e:
                pass
        if affected_versions:
            affected_versions.sort()
            try:
                BUGS["bugs"].append({ "affected_versions" : affected_versions, "bug":i.split('|')[1], "severity":i.split('|')[0] })
            except:
                BUGS["bugs"] = [ { "affected_versions" : affected_versions, "bug":i.split('|')[1], "severity":i.split('|')[0] } ]

    BUGS["bugs"] = sorted(BUGS["bugs"], key = lambda i: i['severity'])

    # Add each issue with affected versions to email message
    # Example:
    # - 1.8, 2.0, 2.1, 2.2 - MINOR   : stats: fix validity of the json schema
    for bug in BUGS["bugs"]:
        email_message = "%s - %s %s %s : %s\n" % (email_message, ", ".join(bug["affected_versions"]).ljust(14), "-".rjust(12), bug["severity"].ljust(7), bug["bug"])

    email_message="%s\n-- \nThe haproxy stable-bot is freely provided by HAProxy Technologies to help improve the quality of each HAProxy release.  If you have any issue with these emails or if you want to suggest some improvements, please post them on the list so that the solutions suiting the most users can be found.\n" % (email_message)

    # If a message with actual issues exists let's either print it or send out
    # an email.
    if "first one merged on" in  email_message:
        if args.print:
            print(email_subject)
            print(email_message)
        if SEND_MAIL:
            print('Send email to:%s from:%s' % (TO_EMAIL, FROM_EMAIL), end="")
            msg = MIMEText(email_message)
            msg['to'] = TO_EMAIL
            msg['from'] = FROM_EMAIL
            msg['subject'] = email_subject
            msg.add_header('reply-to', TO_EMAIL)
            try:
                server = smtplib.SMTP('127.0.0.1', timeout=10)
                server.sendmail(msg['from'], [msg['to']], msg.as_string())
                print(" - Email sent")
            except (ConnectionRefusedError, smtplib.SMTPConnectError):
                print("- Error: SMTP Connection Error")
                sys.exit()
            except smtplib.SMTPServerDisconnected:
                print('- Error: SMTP Server Disconnect (possible timeout)')
                sys.exit()
            except (smtplib.SMTPRecipientsRefused, smtplib.SMTPSenderRefused):
                print('- Error: Recipients or Sender Refused')
                sys.exit()
            except (smtplib.SMTPHeloError, smtplib.SMTPAuthenticationError):
                print('- Error: SMTP rejected HELO or requires Authentication')
                sys.exit()
            except:
                print(traceback.format_exc())
                sys.exit()


if __name__ == "__main__":
    main()

sys.exit()