diff options
Diffstat (limited to 'src/tools/git_changelog')
-rwxr-xr-x | src/tools/git_changelog | 419 |
1 files changed, 419 insertions, 0 deletions
diff --git a/src/tools/git_changelog b/src/tools/git_changelog new file mode 100755 index 0000000..189cfc8 --- /dev/null +++ b/src/tools/git_changelog @@ -0,0 +1,419 @@ +#!/usr/bin/perl + +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# +# src/tools/git_changelog +# +# Display all commits on active branches, merging together commits from +# different branches that occur close together in time and with identical +# log messages. +# +# By default, commits are annotated with branch and release info thus: +# Branch: REL8_3_STABLE Release: REL8_3_2 [92c3a8004] 2008-03-29 00:15:37 +0000 +# This shows that the commit on REL8_3_STABLE was released in 8.3.2. +# Commits on master will usually instead have notes like +# Branch: master Release: REL8_4_BR [6fc9d4272] 2008-03-29 00:15:28 +0000 +# showing that this commit is ancestral to release branches 8.4 and later. +# If no Release: marker appears, the commit hasn't yet made it into any +# release. +# +# The --brief option shortens that to a format like: +# YYYY-MM-DD [hash] abbreviated commit subject line +# Since the branch isn't shown, this is mainly useful in conjunction +# with --master-only. +# +# Most of the time, matchable commits occur in the same order on all branches, +# and we print them out in that order. However, if commit A occurs before +# commit B on branch X and commit B occurs before commit A on branch Y, then +# there's no ordering which is consistent with both branches. In such cases +# we sort a merged commit according to its timestamp on the newest branch +# it appears in. +# +# The default output of this script is meant for generating minor release +# notes, where we need to know which branches a merged commit affects. +# +# To generate major release notes, use: +# git_changelog --master-only --brief --oldest-first --since='start-date' +# To find the appropriate start date, use: +# git show --summary $(git merge-base REL_12_STABLE master) +# where the branch to mention is the previously forked-off branch. This +# shows the last commit before that branch was made. +# +# Note that --master-only is an imperfect filter, since it will not detect +# cases where a master patch was back-patched awhile later or with a slightly +# different commit message. To find such cases, it's a good idea to look +# through the output of +# git_changelog --non-master-only --oldest-first --since='start-date' +# and then remove anything from the --master-only output that would be +# duplicative. + + +use strict; +use warnings; +require Time::Local; +require Getopt::Long; +require IPC::Open2; + +# Adjust this list when the set of interesting branches changes. +# (We could get this from "git branches", but not worth the trouble.) +# NB: master must be first! +my @BRANCHES = qw(master + REL_14_STABLE REL_13_STABLE + REL_12_STABLE REL_11_STABLE REL_10_STABLE REL9_6_STABLE REL9_5_STABLE + REL9_4_STABLE REL9_3_STABLE REL9_2_STABLE REL9_1_STABLE REL9_0_STABLE + REL8_4_STABLE REL8_3_STABLE REL8_2_STABLE REL8_1_STABLE REL8_0_STABLE + REL7_4_STABLE REL7_3_STABLE REL7_2_STABLE REL7_1_STABLE REL7_0_PATCHES + REL6_5_PATCHES REL6_4); + +# Might want to make this parameter user-settable. +my $timestamp_slop = 24 * 60 * 60; + +my $brief = 0; +my $details_after = 0; +my $post_date = 0; +my $master_only = 0; +my $non_master_only = 0; +my $oldest_first = 0; +my $since; +my @output_buffer; +my $output_line = ''; + +Getopt::Long::GetOptions( + 'brief' => \$brief, + 'details-after' => \$details_after, + 'master-only' => \$master_only, + 'non-master-only' => \$non_master_only, + 'post-date' => \$post_date, + 'oldest-first' => \$oldest_first, + 'since=s' => \$since) || usage(); +usage() if @ARGV; + +my @git = qw(git log --format=fuller --date=iso); +push @git, '--since=' . $since if defined $since; + +# Collect the release tag data +my %rel_tags; + +{ + my $cmd = "git for-each-ref refs/tags"; + my $pid = IPC::Open2::open2(my $git_out, my $git_in, $cmd) + || die "can't run $cmd: $!"; + while (my $line = <$git_out>) + { + if ($line =~ m|^([a-f0-9]+)\s+commit\s+refs/tags/(\S+)|) + { + my $commit = $1; + my $tag = $2; + if ( $tag =~ /^REL_\d+_\d+$/ + || $tag =~ /^REL\d+_\d+$/ + || $tag =~ /^REL\d+_\d+_\d+$/) + { + $rel_tags{$commit} = $tag; + } + } + } + waitpid($pid, 0); + my $child_exit_status = $? >> 8; + die "$cmd failed" if $child_exit_status != 0; +} + +# Collect the commit data +my %all_commits; +my %all_commits_by_branch; + +# This remembers where each branch sprouted from master. Note the values +# will be wrong if --since terminates the log listing before the branch +# sprouts; but in that case it doesn't matter since we also won't reach +# the part of master where it would matter. +my %sprout_tags; + +for my $branch (@BRANCHES) +{ + my @cmd = @git; + if ($branch eq "master") + { + push @cmd, "origin/$branch"; + } + else + { + push @cmd, "--parents"; + push @cmd, "master..origin/$branch"; + } + my $pid = IPC::Open2::open2(my $git_out, my $git_in, @cmd) + || die "can't run @cmd: $!"; + my $last_tag = undef; + my $last_parent; + my %commit; + while (my $line = <$git_out>) + { + if ($line =~ /^commit\s+(\S+)/) + { + push_commit(\%commit) if %commit; + $last_tag = $rel_tags{$1} if defined $rel_tags{$1}; + %commit = ( + 'branch' => $branch, + 'commit' => $1, + 'last_tag' => $last_tag, + 'message' => '',); + if ($line =~ /^commit\s+\S+\s+(\S+)/) + { + $last_parent = $1; + } + else + { + $last_parent = undef; + } + } + elsif ($line =~ /^Author:\s+(.*)/) + { + $commit{'author'} = $1; + } + elsif ($line =~ /^CommitDate:\s+(.*)/) + { + $commit{'date'} = $1; + } + elsif ($line =~ /^\s\s/) + { + $commit{'message'} .= $line; + } + } + push_commit(\%commit) if %commit; + $sprout_tags{$last_parent} = $branch if defined $last_parent; + waitpid($pid, 0); + my $child_exit_status = $? >> 8; + die "@cmd failed" if $child_exit_status != 0; +} + +# Run through the master branch and apply tags. We already tagged the other +# branches, but master needs a separate pass after we've acquired the +# sprout_tags data. Also, in post-date mode we need to add phony entries +# for branches that sprouted after a particular master commit was made. +{ + my $last_tag = undef; + my %sprouted_branches; + for my $cc (@{ $all_commits_by_branch{'master'} }) + { + my $commit = $cc->{'commit'}; + my $c = $cc->{'commits'}->[0]; + $last_tag = $rel_tags{$commit} if defined $rel_tags{$commit}; + if (defined $sprout_tags{$commit}) + { + $last_tag = $sprout_tags{$commit}; + + # normalize branch names for making sprout tags + $last_tag =~ s/^(REL_\d+).*/$1_BR/; + $last_tag =~ s/^(REL\d+_\d+).*/$1_BR/; + } + $c->{'last_tag'} = $last_tag; + if ($post_date) + { + if (defined $sprout_tags{$commit}) + { + $sprouted_branches{ $sprout_tags{$commit} } = 1; + } + + # insert new commits between master and any other commits + my @new_commits = (shift @{ $cc->{'commits'} }); + for my $branch (reverse sort keys %sprouted_branches) + { + my $ccopy = { %{$c} }; + $ccopy->{'branch'} = $branch; + push @new_commits, $ccopy; + } + $cc->{'commits'} = [ @new_commits, @{ $cc->{'commits'} } ]; + } + } +} + +my %position; +for my $branch (@BRANCHES) +{ + $position{$branch} = 0; +} + +while (1) +{ + my $best_branch; + my $best_timestamp; + for my $branch (@BRANCHES) + { + my $leader = $all_commits_by_branch{$branch}->[ $position{$branch} ]; + next if !defined $leader; + if (!defined $best_branch + || $leader->{'timestamp'} > $best_timestamp) + { + $best_branch = $branch; + $best_timestamp = $leader->{'timestamp'}; + } + } + last if !defined $best_branch; + my $winner = + $all_commits_by_branch{$best_branch}->[ $position{$best_branch} ]; + + my $print_it = 1; + if ($master_only) + { + $print_it = (@{ $winner->{'commits'} } == 1) + && ($winner->{'commits'}[0]->{'branch'} eq 'master'); + } + elsif ($non_master_only) + { + foreach my $c (@{ $winner->{'commits'} }) + { + $print_it = 0 if ($c->{'branch'} eq 'master'); + } + } + + if ($print_it) + { + output_details($winner) if (!$details_after); + output_str("%s", $winner->{'message'} . "\n"); + output_details($winner) if ($details_after); + unshift(@output_buffer, $output_line) if ($oldest_first); + $output_line = ''; + } + + $winner->{'done'} = 1; + for my $branch (@BRANCHES) + { + my $leader = $all_commits_by_branch{$branch}->[ $position{$branch} ]; + if (defined $leader && $leader->{'done'}) + { + ++$position{$branch}; + redo; + } + } +} + +print @output_buffer if ($oldest_first); + +sub push_commit +{ + my ($c) = @_; + my $ht = hash_commit($c); + my $ts = parse_datetime($c->{'date'}); + my $cc; + + # Note that this code will never merge two commits on the same branch, + # even if they have the same hash (author/message) and nearby + # timestamps. This means that there could be multiple potential + # matches when we come to add a commit from another branch. Prefer + # the closest-in-time one. + for my $candidate (@{ $all_commits{$ht} }) + { + my $diff = abs($ts - $candidate->{'timestamp'}); + if ($diff < $timestamp_slop + && !exists $candidate->{'branch_position'}{ $c->{'branch'} }) + { + if (!defined $cc + || $diff < abs($ts - $cc->{'timestamp'})) + { + $cc = $candidate; + } + } + } + if (!defined $cc) + { + $cc = { + 'author' => $c->{'author'}, + 'message' => $c->{'message'}, + 'commit' => $c->{'commit'}, + 'commits' => [], + 'timestamp' => $ts + }; + push @{ $all_commits{$ht} }, $cc; + } + + # stash only the fields we'll need later + my $smallc = { + 'branch' => $c->{'branch'}, + 'commit' => $c->{'commit'}, + 'date' => $c->{'date'}, + 'last_tag' => $c->{'last_tag'} + }; + push @{ $cc->{'commits'} }, $smallc; + push @{ $all_commits_by_branch{ $c->{'branch'} } }, $cc; + $cc->{'branch_position'}{ $c->{'branch'} } = + -1 + @{ $all_commits_by_branch{ $c->{'branch'} } }; + return; +} + +sub hash_commit +{ + my ($c) = @_; + return $c->{'author'} . "\0" . $c->{'message'}; +} + +sub parse_datetime +{ + my ($dt) = @_; + $dt =~ + /^(\d\d\d\d)-(\d\d)-(\d\d)\s+(\d\d):(\d\d):(\d\d)\s+([-+])(\d\d)(\d\d)$/; + my $gm = Time::Local::timegm($6, $5, $4, $3, $2 - 1, $1); + my $tzoffset = ($8 * 60 + $9) * 60; + $tzoffset = -$tzoffset if $7 eq '-'; + return $gm - $tzoffset; +} + +sub output_str +{ + ($oldest_first) ? ($output_line .= sprintf(shift, @_)) : printf(@_); + return; +} + +sub output_details +{ + my $item = shift; + + if ($details_after) + { + $item->{'author'} =~ m{^(.*?)\s*<[^>]*>$}; + + # output only author name, not email address + output_str("(%s)\n", $1); + } + else + { + output_str("Author: %s\n", $item->{'author'}); + } + foreach my $c (@{ $item->{'commits'} }) + { + if ($brief) + { + $item->{'message'} =~ m/^\s*(.*)/; + + output_str( + "%s [%s] %s\n", + substr($c->{'date'}, 0, 10), + substr($c->{'commit'}, 0, 9), + substr($1, 0, 56)); + } + else + { + output_str("Branch: %s ", $c->{'branch'}) + if (!$master_only); + output_str("Release: %s ", $c->{'last_tag'}) + if (defined $c->{'last_tag'}); + output_str("[%s] %s\n", substr($c->{'commit'}, 0, 9), + $c->{'date'}); + } + } + output_str("\n"); + return; +} + +sub usage +{ + print STDERR <<EOM; +Usage: git_changelog [--brief/-b] [--details-after/-d] [--master-only/-m] [--non-master-only/-n] [--oldest-first/-o] [--post-date/-p] [--since=SINCE] + --brief Shorten commit descriptions, omitting branch identification + --details-after Show branch and author info after the commit description + --master-only Show only commits made just in the master branch + --non-master-only Show only commits made just in back branches + --oldest-first Show oldest commits first + --post-date Show branches made after a commit occurred + --since Show only commits dated since SINCE +EOM + exit 1; +} |