diff options
Diffstat (limited to 'ziguard.awk')
-rw-r--r-- | ziguard.awk | 236 |
1 files changed, 205 insertions, 31 deletions
diff --git a/ziguard.awk b/ziguard.awk index 2be6d52..0728baa 100644 --- a/ziguard.awk +++ b/ziguard.awk @@ -9,7 +9,11 @@ # it does not do these nonessential tasks now. # # Although main and vanguard forms are currently equivalent, -# this need not always be the case. +# this need not always be the case. When the two forms differ, +# this script can convert either from main to vanguard form (needed then), +# or from vanguard to main form (this conversion would be needed later, +# after main became rearguard and vanguard became main). +# There is no need to convert rearguard to other forms. # # When converting to vanguard form, the output can use negative SAVE # values. @@ -19,14 +23,69 @@ # of the input data as best it can within the constraints of the # rearguard format. +# Given a FIELD like "-0:30", return a minute count like -30. +function get_minutes(field, \ + sign, hours, minutes) +{ + sign = field ~ /^-/ ? -1 : 1 + hours = +field + if (field ~ /:/) { + minutes = field + sub(/[^:]*:/, "", minutes) + } + return 60 * hours + sign * minutes +} + +# Given an OFFSET, which is a minute count like 300 or 330, +# return a %z-style abbreviation like "+05" or "+0530". +function offset_abbr(offset, \ + hours, minutes, sign) +{ + hours = int(offset / 60) + minutes = offset % 60 + if (minutes) { + return sprintf("%+.4d", hours * 100 + minutes); + } else { + return sprintf("%+.2d", hours) + } +} + +# Round TIMESTAMP (a +-hh:mm:ss.dddd string) to the nearest second. +function round_to_second(timestamp, \ + hh, mm, ss, seconds, dot_dddd, subseconds) +{ + dot_dddd = timestamp + if (!sub(/^[+-]?[0-9]+:[0-9]+:[0-9]+\./, ".", dot_dddd)) + return timestamp + hh = mm = ss = timestamp + sub(/^[-+]?[0-9]+:[0-9]+:/, "", ss) + sub(/^[-+]?[0-9]+:/, "", mm) + sub(/^[-+]?/, "", hh) + seconds = 3600 * hh + 60 * mm + ss + subseconds = +dot_dddd + seconds += 0.5 < subseconds || ((subseconds == 0.5) && (seconds % 2)); + return sprintf("%s%d:%.2d:%.2d", timestamp ~ /^-/ ? "-" : "", \ + seconds / 3600, seconds / 60 % 60, seconds % 60) +} + BEGIN { dataform_type["vanguard"] = 1 dataform_type["main"] = 1 dataform_type["rearguard"] = 1 + if (PACKRATLIST) { + while (getline <PACKRATLIST) { + if ($0 ~ /^#/) continue + packratlist[$3] = 1 + } + } + # The command line should set DATAFORM. if (!dataform_type[DATAFORM]) exit 1 - vanguard = DATAFORM == "vanguard" +} + +$1 == "#PACKRATLIST" && $2 == PACKRATLIST { + sub(/^#PACKRATLIST[\t ]+[^\t ]+[\t ]+/, "") } /^Zone/ { zone = $2 } @@ -38,7 +97,7 @@ DATAFORM != "main" { # If this line should differ due to Czechoslovakia using negative SAVE values, # uncomment the desired version and comment out the undesired one. if (zone == "Europe/Prague" && /^#?[\t ]+[01]:00[\t ]/ && /1947 Feb 23/) { - if (($(in_comment + 2) != "-") == vanguard) { + if (($(in_comment + 2) != "-") == (DATAFORM != "rearguard")) { uncomment = in_comment } else { comment_out = !in_comment @@ -54,7 +113,7 @@ DATAFORM != "main" { if (Rule_Eire || Zone_Dublin_post_1968) { if ((Rule_Eire \ || (Zone_Dublin_post_1968 && $(in_comment + 3) == "IST/GMT")) \ - == vanguard) { + == (DATAFORM != "rearguard")) { uncomment = in_comment } else { comment_out = !in_comment @@ -71,11 +130,20 @@ DATAFORM != "main" { && ((1994 <= $(in_comment + 4) && $(in_comment + 4) <= 2017) \ || in_comment + 3 == NF)))) if (Rule_Namibia || Zone_using_Namibia_rule) { - if ((Rule_Namibia \ - ? ($(in_comment + 9) ~ /^-/ \ - || ($(in_comment + 9) == 0 && $(in_comment + 10) == "CAT")) \ - : $(in_comment + 1) == "2:00" && $(in_comment + 2) == "Namibia") \ - == vanguard) { + if ((Rule_Namibia \ + ? ($9 ~ /^-/ || ($9 == 0 && $10 == "CAT")) \ + : $(in_comment + 1) == "2:00" && $(in_comment + 2) == "Namibia") \ + == (DATAFORM != "rearguard")) { + uncomment = in_comment + } else { + comment_out = !in_comment + } + } + + # If this line should differ due to Portugal benefiting from %z if supported, + # uncomment the desired version and comment out the undesired one. + if (/^#?[\t ]+-[12]:00[\t ]+Port[\t ]+[%+-]/) { + if (/%z/ == (DATAFORM == "vanguard")) { uncomment = in_comment } else { comment_out = !in_comment @@ -89,37 +157,143 @@ DATAFORM != "main" { sub(/^/, "#") } - # In rearguard format, change the Japan rule line with "Sat>=8 25:00" - # to "Sun>=9 1:00", to cater to zic before 2007 and to older Java. - if (!vanguard && $1 == "Rule" && $7 == "Sat>=8" && $8 == "25:00") { - sub(/Sat>=8/, "Sun>=9") - sub(/25:00/, " 1:00") + # Prefer %z in vanguard form, explicit abbreviations otherwise. + if (DATAFORM == "vanguard") { + sub(/^(Zone[\t ]+[^\t ]+)?[\t ]+[^\t ]+[\t ]+[^\t ]+[\t ]+[-+][^\t ]+/, \ + "&CHANGE-TO-%z") + sub(/-00CHANGE-TO-%z/, "-00") + sub(/[-+][^\t ]+CHANGE-TO-/, "") + } else { + if (/^[^#]*%z/) { + stdoff_column = 2 * /^Zone/ + 1 + rules_column = stdoff_column + 1 + stdoff = get_minutes($stdoff_column) + rules = $rules_column + stdabbr = offset_abbr(stdoff) + if (rules == "-") { + abbr = stdabbr + } else { + dstabbr_only = rules ~ /^[+0-9-]/ + if (dstabbr_only) { + dstoff = get_minutes(rules) + } else { + # The DST offset is normally an hour, but there are special cases. + if (rules == "Morocco" && NF == 3) { + dstoff = -60 + } else if (rules == "NBorneo") { + dstoff = 20 + } else if (((rules == "Cook" || rules == "LH") && NF == 3) \ + || (rules == "Uruguay" \ + && /[\t ](1942 Dec 14|1960|1970|1974 Dec 22)$/)) { + dstoff = 30 + } else if (rules == "Uruguay" && /[\t ]1974 Mar 10$/) { + dstoff = 90 + } else { + dstoff = 60 + } + } + dstabbr = offset_abbr(stdoff + dstoff) + if (dstabbr_only) { + abbr = dstabbr + } else { + abbr = stdabbr "/" dstabbr + } + } + sub(/%z/, abbr) + } } - # In rearguard format, change the Morocco lines with negative SAVE values - # to use positive SAVE values. - if (!vanguard && $1 == "Rule" && $2 == "Morocco" && $4 == 2018 \ - && $6 == "Oct") { - sub(/\t2018\t/, "\t2017\t") + # Normally, prefer whole seconds. However, prefer subseconds + # if generating vanguard form and the otherwise-undocumented + # VANGUARD_SUBSECONDS environment variable is set. + # This relies on #STDOFF comment lines in the data. + # It is for hypothetical clients that support UT offsets that are + # not integer multiples of one second (e.g., Europe/Lisbon, 1884 to 1912). + # No known clients need this currently, and this experimental + # feature may be changed or withdrawn in future releases. + if ($1 == "#STDOFF") { + stdoff = $2 + rounded_stdoff = round_to_second(stdoff) + if (DATAFORM == "vanguard" && ENVIRON["VANGUARD_SUBSECONDS"]) { + stdoff_subst[0] = rounded_stdoff + stdoff_subst[1] = stdoff + } else { + stdoff_subst[0] = stdoff + stdoff_subst[1] = rounded_stdoff + } + } else if (stdoff_subst[0]) { + stdoff_column = 2 * /^Zone/ + 1 + stdoff_column_val = $stdoff_column + if (stdoff_column_val == stdoff_subst[0]) { + sub(stdoff_subst[0], stdoff_subst[1]) + } else if (stdoff_column_val != stdoff_subst[1]) { + stdoff_subst[0] = 0 + } } - if (!vanguard && $1 == "Rule" && $2 == "Morocco" && 2019 <= $3) { - if ($9 == "0") { - last_std_date = $3 " " $6 " " $7 " " $8 - sub(/\t0\t/, "\t1:00\t") + + # In rearguard form, change the Japan rule line with "Sat>=8 25:00" + # to "Sun>=9 1:00", to cater to zic before 2007 and to older Java. + if (/^Rule/ && $2 == "Japan") { + if (DATAFORM == "rearguard") { + if ($7 == "Sat>=8" && $8 == "25:00") { + sub(/Sat>=8/, "Sun>=9") + sub(/25:00/, " 1:00") + } } else { - sub(/\t-1:00\t/, "\t0\t") + if ($7 == "Sun>=9" && $8 == "1:00") { + sub(/Sun>=9/, "Sat>=8") + sub(/ 1:00/, "25:00") + } } } - if (!vanguard && $1 == "1:00" && $2 == "Morocco" && $3 == "+01/+00") { - # This introduces a transition from 01:59:59 +00 to 03:00:00 +01 - # with both times being standard (i.e., a change to standard UT offset). - # This is rearguard's way to approximate the actual prediction, - # which is that of an ordinary transition from DST to standard time. - sub(/1:00\tMorocco\t\+01\/\+00$/, - "0:00\tMorocco\t+00/+01\t" last_std_date "\n\t\t\t 1:00\t-\t+01") + + # In rearguard form, change the Morocco lines with negative SAVE values + # to use positive SAVE values. + if ($2 == "Morocco") { + if (/^Rule/) { + if ($4 ~ /^201[78]$/ && $6 == "Oct") { + if (DATAFORM == "rearguard") { + sub(/\t2018\t/, "\t2017\t") + } else { + sub(/\t2017\t/, "\t2018\t") + } + } + + if (2019 <= $3) { + if ($8 == "2:00") { + if (DATAFORM == "rearguard") { + sub(/\t0\t/, "\t1:00\t") + } else { + sub(/\t1:00\t/, "\t0\t") + } + } else { + if (DATAFORM == "rearguard") { + sub(/\t-1:00\t/, "\t0\t") + } else { + sub(/\t0\t/, "\t-1:00\t") + } + } + } + } + if ($1 ~ /^[+0-9-]/ && NF == 3) { + if (DATAFORM == "rearguard") { + sub(/1:00\tMorocco/, "0:00\tMorocco") + sub(/\t\+01\/\+00$/, "\t+00/+01") + } else { + sub(/0:00\tMorocco/, "1:00\tMorocco") + sub(/\t\+00\/+01$/, "\t+01/+00") + } + } } } +/^Zone/ { + packrat_ignored = FILENAME == PACKRATDATA && PACKRATLIST && !packratlist[$2]; +} +packrat_ignored && !/^Rule/ { + sub(/^/, "#") +} + # If a Link line is followed by a Link or Zone line for the same data, comment # out the Link line. This can happen if backzone overrides a Link # with a Zone or a different Link. |