summaryrefslogtreecommitdiffstats
path: root/ziguard.awk
diff options
context:
space:
mode:
Diffstat (limited to 'ziguard.awk')
-rw-r--r--ziguard.awk386
1 files changed, 386 insertions, 0 deletions
diff --git a/ziguard.awk b/ziguard.awk
new file mode 100644
index 0000000..7a3404f
--- /dev/null
+++ b/ziguard.awk
@@ -0,0 +1,386 @@
+# Convert tzdata source into vanguard or rearguard form.
+
+# Contributed by Paul Eggert. This file is in the public domain.
+
+# This is not a general-purpose converter; it is designed for current tzdata.
+# It just converts from current source to main, vanguard, and rearguard forms.
+# Although it might be nice for it to be idempotent, or to be useful
+# for converting back and forth between vanguard and rearguard formats,
+# it does not do these nonessential tasks now.
+#
+# Although main and vanguard forms are currently equivalent,
+# this need not always be the case. When the two forms differ,
+# this script can convert either from main to vanguard form (needed then),
+# or from vanguard to main form (this conversion would be needed later,
+# after main became rearguard and vanguard became main).
+# There is no need to convert rearguard to other forms.
+#
+# When converting to vanguard form, the output can use the line
+# "Zone GMT 0 - GMT" which TZUpdater 2.3.2 mistakenly rejects.
+#
+# When converting to vanguard form, the output can use negative SAVE
+# values.
+#
+# When converting to rearguard form, the output uses only nonnegative
+# SAVE values. The idea is for the output data to simulate the behavior
+# of the input data as best it can within the constraints of the
+# rearguard format.
+
+# Given a FIELD like "-0:30", return a minute count like -30.
+function get_minutes(field, \
+ sign, hours, minutes)
+{
+ sign = field ~ /^-/ ? -1 : 1
+ hours = +field
+ if (field ~ /:/) {
+ minutes = field
+ sub(/[^:]*:/, "", minutes)
+ }
+ return 60 * hours + sign * minutes
+}
+
+# Given an OFFSET, which is a minute count like 300 or 330,
+# return a %z-style abbreviation like "+05" or "+0530".
+function offset_abbr(offset, \
+ hours, minutes, sign)
+{
+ hours = int(offset / 60)
+ minutes = offset % 60
+ if (minutes) {
+ return sprintf("%+.4d", hours * 100 + minutes);
+ } else {
+ return sprintf("%+.2d", hours)
+ }
+}
+
+# Round TIMESTAMP (a +-hh:mm:ss.dddd string) to the nearest second.
+function round_to_second(timestamp, \
+ hh, mm, ss, seconds, dot_dddd, subseconds)
+{
+ dot_dddd = timestamp
+ if (!sub(/^[+-]?[0-9]+:[0-9]+:[0-9]+\./, ".", dot_dddd))
+ return timestamp
+ hh = mm = ss = timestamp
+ sub(/^[-+]?[0-9]+:[0-9]+:/, "", ss)
+ sub(/^[-+]?[0-9]+:/, "", mm)
+ sub(/^[-+]?/, "", hh)
+ seconds = 3600 * hh + 60 * mm + ss
+ subseconds = +dot_dddd
+ seconds += 0.5 < subseconds || ((subseconds == 0.5) && (seconds % 2));
+ return sprintf("%s%d:%.2d:%.2d", timestamp ~ /^-/ ? "-" : "", \
+ seconds / 3600, seconds / 60 % 60, seconds % 60)
+}
+
+BEGIN {
+ dataform_type["vanguard"] = 1
+ dataform_type["main"] = 1
+ dataform_type["rearguard"] = 1
+
+ if (PACKRATLIST) {
+ while (getline <PACKRATLIST) {
+ if ($0 ~ /^#/) continue
+ packratlist[$3] = 1
+ }
+ }
+
+ # The command line should set DATAFORM.
+ if (!dataform_type[DATAFORM]) exit 1
+}
+
+$1 == "#PACKRATLIST" && $2 == PACKRATLIST {
+ sub(/^#PACKRATLIST[\t ]+[^\t ]+[\t ]+/, "")
+}
+
+/^Zone/ { zone = $2 }
+
+DATAFORM != "main" {
+ in_comment = $0 ~ /^#/
+ uncomment = comment_out = 0
+
+ # If this line should differ due to Czechoslovakia using negative SAVE values,
+ # uncomment the desired version and comment out the undesired one.
+ if (zone == "Europe/Prague" && $0 ~ /^#?[\t ]+[01]:00[\t ]/ \
+ && $0 ~ /1947 Feb 23/) {
+ if (($(in_comment + 2) != "-") == (DATAFORM != "rearguard")) {
+ uncomment = in_comment
+ } else {
+ comment_out = !in_comment
+ }
+ }
+
+ # If this line should differ due to Ireland using negative SAVE values,
+ # uncomment the desired version and comment out the undesired one.
+ Rule_Eire = $0 ~ /^#?Rule[\t ]+Eire[\t ]/
+ Zone_Dublin_post_1968 \
+ = (zone == "Europe/Dublin" && $0 ~ /^#?[\t ]+[01]:00[\t ]/ \
+ && (!$(in_comment + 4) || 1968 < $(in_comment + 4)))
+ if (Rule_Eire || Zone_Dublin_post_1968) {
+ if ((Rule_Eire \
+ || (Zone_Dublin_post_1968 && $(in_comment + 3) == "IST/GMT")) \
+ == (DATAFORM != "rearguard")) {
+ uncomment = in_comment
+ } else {
+ comment_out = !in_comment
+ }
+ }
+
+ # If this line should differ due to Namibia using negative SAVE values,
+ # uncomment the desired version and comment out the undesired one.
+ Rule_Namibia = $0 ~ /^#?Rule[\t ]+Namibia[\t ]/
+ Zone_using_Namibia_rule \
+ = (zone == "Africa/Windhoek" && $0 ~ /^#?[\t ]+[12]:00[\t ]/ \
+ && ($(in_comment + 2) == "Namibia" \
+ || ($(in_comment + 2) == "-" && $(in_comment + 3) == "CAT" \
+ && ((1994 <= $(in_comment + 4) && $(in_comment + 4) <= 2017) \
+ || in_comment + 3 == NF))))
+ if (Rule_Namibia || Zone_using_Namibia_rule) {
+ if ((Rule_Namibia \
+ ? ($9 ~ /^-/ || ($9 == 0 && $10 == "CAT")) \
+ : $(in_comment + 1) == "2:00" && $(in_comment + 2) == "Namibia") \
+ == (DATAFORM != "rearguard")) {
+ uncomment = in_comment
+ } else {
+ comment_out = !in_comment
+ }
+ }
+
+ # If this line should differ due to Portugal benefiting from %z if supported,
+ # uncomment the desired version and comment out the undesired one.
+ if ($0 ~ /^#?[\t ]+-[12]:00[\t ]+Port[\t ]+[%+-]/) {
+ if (($0 ~ /%z/) == (DATAFORM == "vanguard")) {
+ uncomment = in_comment
+ } else {
+ comment_out = !in_comment
+ }
+ }
+
+ # In vanguard form, use the line "Zone GMT 0 - GMT" instead of
+ # "Zone Etc/GMT 0 - GMT" and adjust Link lines accordingly.
+ # This works around a bug in TZUpdater 2.3.2.
+ if (/^#?(Zone|Link)[\t ]+(Etc\/)?GMT[\t ]/) {
+ if (($2 == "GMT") == (DATAFORM == "vanguard")) {
+ uncomment = in_comment
+ } else {
+ comment_out = !in_comment
+ }
+ }
+
+ if (uncomment) {
+ sub(/^#/, "")
+ }
+ if (comment_out) {
+ sub(/^/, "#")
+ }
+
+ # Prefer %z in vanguard form, explicit abbreviations otherwise.
+ if (DATAFORM == "vanguard") {
+ sub(/^(Zone[\t ]+[^\t ]+)?[\t ]+[^\t ]+[\t ]+[^\t ]+[\t ]+[-+][^\t ]+/, \
+ "&CHANGE-TO-%z")
+ sub(/-00CHANGE-TO-%z/, "-00")
+ sub(/[-+][^\t ]+CHANGE-TO-/, "")
+ } else {
+ if ($0 ~ /^[^#]*%z/) {
+ stdoff_column = 2 * ($0 ~ /^Zone/) + 1
+ rules_column = stdoff_column + 1
+ stdoff = get_minutes($stdoff_column)
+ rules = $rules_column
+ stdabbr = offset_abbr(stdoff)
+ if (rules == "-") {
+ abbr = stdabbr
+ } else {
+ dstabbr_only = rules ~ /^[+0-9-]/
+ if (dstabbr_only) {
+ dstoff = get_minutes(rules)
+ } else {
+ # The DST offset is normally an hour, but there are special cases.
+ if (rules == "Morocco" && NF == 3) {
+ dstoff = -60
+ } else if (rules == "NBorneo") {
+ dstoff = 20
+ } else if (((rules == "Cook" || rules == "LH") && NF == 3) \
+ || (rules == "Uruguay" \
+ && $0 ~ /[\t ](1942 Dec 14|1960|1970|1974 Dec 22)$/)) {
+ dstoff = 30
+ } else if (rules == "Uruguay" && $0 ~ /[\t ]1974 Mar 10$/) {
+ dstoff = 90
+ } else {
+ dstoff = 60
+ }
+ }
+ dstabbr = offset_abbr(stdoff + dstoff)
+ if (dstabbr_only) {
+ abbr = dstabbr
+ } else {
+ abbr = stdabbr "/" dstabbr
+ }
+ }
+ sub(/%z/, abbr)
+ }
+ }
+
+ # Normally, prefer whole seconds. However, prefer subseconds
+ # if generating vanguard form and the otherwise-undocumented
+ # VANGUARD_SUBSECONDS environment variable is set.
+ # This relies on #STDOFF comment lines in the data.
+ # It is for hypothetical clients that support UT offsets that are
+ # not integer multiples of one second (e.g., Europe/Lisbon, 1884 to 1912).
+ # No known clients need this currently, and this experimental
+ # feature may be changed or withdrawn in future releases.
+ if ($1 == "#STDOFF") {
+ stdoff = $2
+ rounded_stdoff = round_to_second(stdoff)
+ if (DATAFORM == "vanguard" && ENVIRON["VANGUARD_SUBSECONDS"]) {
+ stdoff_subst[0] = rounded_stdoff
+ stdoff_subst[1] = stdoff
+ } else {
+ stdoff_subst[0] = stdoff
+ stdoff_subst[1] = rounded_stdoff
+ }
+ } else if (stdoff_subst[0]) {
+ stdoff_column = 2 * ($0 ~ /^Zone/) + 1
+ stdoff_column_val = $stdoff_column
+ if (stdoff_column_val == stdoff_subst[0]) {
+ sub(stdoff_subst[0], stdoff_subst[1])
+ } else if (stdoff_column_val != stdoff_subst[1]) {
+ stdoff_subst[0] = 0
+ }
+ }
+
+ # In rearguard form, change the Japan rule line with "Sat>=8 25:00"
+ # to "Sun>=9 1:00", to cater to zic before 2007 and to older Java.
+ if ($0 ~ /^Rule/ && $2 == "Japan") {
+ if (DATAFORM == "rearguard") {
+ if ($7 == "Sat>=8" && $8 == "25:00") {
+ sub(/Sat>=8/, "Sun>=9")
+ sub(/25:00/, " 1:00")
+ }
+ } else {
+ if ($7 == "Sun>=9" && $8 == "1:00") {
+ sub(/Sun>=9/, "Sat>=8")
+ sub(/ 1:00/, "25:00")
+ }
+ }
+ }
+
+ # In rearguard form, change the Morocco lines with negative SAVE values
+ # to use positive SAVE values.
+ if ($2 == "Morocco") {
+ if ($0 ~ /^Rule/) {
+ if ($4 ~ /^201[78]$/ && $6 == "Oct") {
+ if (DATAFORM == "rearguard") {
+ sub(/\t2018\t/, "\t2017\t")
+ } else {
+ sub(/\t2017\t/, "\t2018\t")
+ }
+ }
+
+ if (2019 <= $3) {
+ if ($8 == "2:00") {
+ if (DATAFORM == "rearguard") {
+ sub(/\t0\t/, "\t1:00\t")
+ } else {
+ sub(/\t1:00\t/, "\t0\t")
+ }
+ } else {
+ if (DATAFORM == "rearguard") {
+ sub(/\t-1:00\t/, "\t0\t")
+ } else {
+ sub(/\t0\t/, "\t-1:00\t")
+ }
+ }
+ }
+ }
+ if ($1 ~ /^[+0-9-]/ && NF == 3) {
+ if (DATAFORM == "rearguard") {
+ sub(/1:00\tMorocco/, "0:00\tMorocco")
+ sub(/\t\+01\/\+00$/, "\t+00/+01")
+ } else {
+ sub(/0:00\tMorocco/, "1:00\tMorocco")
+ sub(/\t\+00\/+01$/, "\t+01/+00")
+ }
+ }
+ }
+}
+
+/^Zone/ {
+ packrat_ignored = FILENAME == PACKRATDATA && PACKRATLIST && !packratlist[$2];
+}
+{
+ if (packrat_ignored && $0 !~ /^Rule/) {
+ sub(/^/, "#")
+ }
+}
+
+# Return a link line resulting by changing OLDLINE to link to TARGET
+# from LINKNAME, instead of linking to OLDTARGET from LINKNAME.
+# Align data columns the same as they were in OLDLINE.
+# Also, replace any existing white space followed by comment with COMMENT.
+function make_linkline(oldline, target, linkname, oldtarget, comment, \
+ oldprefix, oldprefixlen, oldtargettabs, \
+ replsuffix, targettabs)
+{
+ oldprefix = "Link\t" oldtarget "\t"
+ oldprefixlen = length(oldprefix)
+ if (substr(oldline, 1, oldprefixlen) == oldprefix) {
+ # Use tab stops to preserve LINKNAME's column.
+ replsuffix = substr(oldline, oldprefixlen + 1)
+ sub(/[\t ]*#.*/, "", replsuffix)
+ oldtargettabs = int(length(oldtarget) / 8) + 1
+ targettabs = int(length(target) / 8) + 1
+ for (; targettabs < oldtargettabs; targettabs++) {
+ replsuffix = "\t" replsuffix
+ }
+ for (; oldtargettabs < targettabs && replsuffix ~ /^\t/; targettabs--) {
+ replsuffix = substr(replsuffix, 2)
+ }
+ } else {
+ # Odd format line; don't bother lining up its replacement nicely.
+ replsuffix = linkname
+ }
+ return "Link\t" target "\t" replsuffix comment
+}
+
+/^Link/ && $4 == "#=" && DATAFORM == "vanguard" {
+ $0 = make_linkline($0, $5, $3, $2)
+}
+
+# If a Link line is followed by a Link or Zone line for the same data, comment
+# out the Link line. This can happen if backzone overrides a Link
+# with a Zone or a different Link.
+/^Zone/ {
+ sub(/^Link/, "#Link", line[linkline[$2]])
+}
+/^Link/ {
+ sub(/^Link/, "#Link", line[linkline[$3]])
+ linkline[$3] = NR
+ linktarget[$3] = $2
+}
+
+{ line[NR] = $0 }
+
+function cut_link_chains_short( \
+ l, linkname, t, target)
+{
+ for (linkname in linktarget) {
+ target = linktarget[linkname]
+ t = linktarget[target]
+ if (t) {
+ # TARGET is itself a link name. Replace the line "Link TARGET LINKNAME"
+ # with "Link T LINKNAME #= TARGET", where T is at the end of the chain
+ # of links that LINKNAME points to.
+ while ((u = linktarget[t])) {
+ t = u
+ }
+ l = linkline[linkname]
+ line[l] = make_linkline(line[l], t, linkname, target, "\t#= " target)
+ }
+ }
+}
+
+END {
+ if (DATAFORM != "vanguard") {
+ cut_link_chains_short()
+ }
+ for (i = 1; i <= NR; i++)
+ print line[i]
+}