diff options
author | Stuart Bishop <stuart@stuartbishop.net> | 2022-08-12 09:36:24 +1000 |
---|---|---|
committer | Stuart Bishop <stuart@stuartbishop.net> | 2022-08-12 09:36:24 +1000 |
commit | 301a880b1dd96605a98374576b0e210994a69f2d (patch) | |
tree | bf28f9116b188da1d70cb05b6e502ccdd39b9005 /ziguard.awk | |
parent | 9e15fadcb930d6781591d14a0fb20e1135bde9c0 (diff) | |
download | pytz-git-301a880b1dd96605a98374576b0e210994a69f2d.tar.gz |
Squashed 'tz/' changes from 95ecc37d2..32a4af56f
b61a7acb4 Release 2022b
39dc54c76 Use more-official URL for Chile change
cc9d7b92f Fix typo in previous commit.
711b46f8f Chile's DST is delayed by a week in September 2022
12be3e4ce Omit pacificnew from tailored vanguard tarballs
9b665ce9a Put 'leapseconds' etc. into tailored tarballs
35fa37fbb Finish duplicate-since-1970 moves
0b925f6d8 Move nine more zones to ‘backzone’
0f9ac4ff2 Improve zishrink after PACKRATLIST
3be5b73fe Add PACKRATLIST build-time option
406d29f4a Remove hack to work around circa-2006 zic issue
4000ea353 Simplify subseconds for Asia/Jakarta
4b78b8bb3 Don’t generate subseconds in vanguard form
ff6ead112 Add a couple of #STDOFF comments
6b32cae26 Simplify subsecond precision handling
26e522baa Check that ziguard substitutions revert
de89db718 Omit tiny 1906 change in Vietnam
4d39a0ee9 Apply subsecond precision to Amsterdam Mean Time
4f8ac112b Vanguard form now uses subsecond precision
4c7a2921c Omit tiny 1880 change in Dublin
4feb18cd4 * theory.html: Source data can do subseconds.
9cd5f5339 * europe (Atlantic/Madeira): Fix typo in vanguard section.
bffc26e90 * leapseconds.awk: no longer executable
42eda0722 Fix location of recently added posix_packrat line.
bd548f646 Remove obsolescent posix_packrat target
13a0921ac New ‘make’ target tailored_tarballs
52f8d925f * zic.8: Don’t pagebreak just after "EXTENDED EXAMPLE".
e4374a954 Fix minor glitches in .txt output
83a2b918a * tz-link.html: Link to the unsmear library.
dfe016bc5 No leap second on 2022-12-31
a249a0c64 Use %z in vanguard form
0deba9f3e * europe: Fix a few more "Kiev"s in comments.
729c2d34d Refactor ziguard.awk
e0d136f49 * europe, backzone: Correct and move Liechtenstein comment.
634b9361d * backward: Add backward-compatibility comment.
dd0a679c9 * NEWS: Mention LOCALTIME change.
172d43570 Change LOCALTIME default back to Factory
50df7d69f gmtime etc. now say "UTC", not "GMT"
bf8aa9414 Remove ancient asctime.c cruft
3da46e2af Avoid C macros when this is easy
4551a1ae6 Remove macros duplicated from private.h
a4095bda6 Improve #if indenting
918e10e89 * zic.8: fix minus typo
eaa6bf832 Add -R option to zic
a9bb98690 Chile's 1946/7 DST started early
0b094598d April Fool note
7ffb999b1 Chile’s 1946/7 time was DST
37c27ce43 Iran 1977 fallback was 10-20 not 09-23
93b40c75a Upgrade GCC_DEBUG_FLAGS to GCC 12
ce8774c37 Document that zic -r doesn’t necessarily shrink
5e7de49c3 Fix bug with zic -r cutoff before 1st transition
1fa2731ce Refactor by using max and min macros
0e8f0b06a strftime %s no longer worries about mktime failure
6f2e9b693 Stricter mktime -1 heuristic in strftime
d8655c6ae * asia: Commentary corrections from Michael Deckers.
52061c178 Correct Iran transitions in 1977 and 1978
ec42353f5 Fix bug uncovered by recent change to Iran history
6af4cda4f Refactor outzone
538c64e13 Iran DST changes in late 1970s
3d605c53f Iran switched to standard time in 1935
4742526b7 Prefer specified time for vestigial variables
9fa1a5395 * NEWS: Mention other Ukrainian locations.
8b6a387b1 Go back to 2021e Morocco rearguard workaround
fd03aae45 Add </p> to end element.
726ef41a5 Clarify old-version doc
1fd3bd4c2 Check that FORMAT has / only with rules
e18c7ac28 * zic.8: Say that STDOFF lacks suffix letters.
221bf5fe3 Simplify Asia/Dushanbe
6c018546a Remove already-obsolete link
66b18d983 Iran will stop DST in 2023
6f96f5590 Follow Shanks for Crimea 1994/1996
b5d501457 Simplify Europe/Simferopol
a012287a6 Avoid “traditional” and “popular”
e13e9c531 Rename Europe/Kiev to Europe/Kyiv
1f023d598 Cite 2022 Magallanes decree
ac377a1f0 Simplify field memory allocation in zic
65f616d2e zic now checks input bytes more carefully
ce5b21b84 Omit or paraphrase some quotes
b6a8aeca9 Improve Morocco 2087 rearguard workaround
a0f887648 Fix Dawson Creek latitude (thanks to Michael Ewert).
64c5d196f Fix spelling and grammar in commentary
32a9a151e Fix some typos in NEWS, africa and australasia.
git-subtree-dir: tz
git-subtree-split: 32a4af56f4afa83720840dc4e325636428283b84
Diffstat (limited to 'ziguard.awk')
-rw-r--r-- | ziguard.awk | 236 |
1 files changed, 205 insertions, 31 deletions
diff --git a/ziguard.awk b/ziguard.awk index 2be6d52..0728baa 100644 --- a/ziguard.awk +++ b/ziguard.awk @@ -9,7 +9,11 @@ # it does not do these nonessential tasks now. # # Although main and vanguard forms are currently equivalent, -# this need not always be the case. +# this need not always be the case. When the two forms differ, +# this script can convert either from main to vanguard form (needed then), +# or from vanguard to main form (this conversion would be needed later, +# after main became rearguard and vanguard became main). +# There is no need to convert rearguard to other forms. # # When converting to vanguard form, the output can use negative SAVE # values. @@ -19,14 +23,69 @@ # of the input data as best it can within the constraints of the # rearguard format. +# Given a FIELD like "-0:30", return a minute count like -30. +function get_minutes(field, \ + sign, hours, minutes) +{ + sign = field ~ /^-/ ? -1 : 1 + hours = +field + if (field ~ /:/) { + minutes = field + sub(/[^:]*:/, "", minutes) + } + return 60 * hours + sign * minutes +} + +# Given an OFFSET, which is a minute count like 300 or 330, +# return a %z-style abbreviation like "+05" or "+0530". +function offset_abbr(offset, \ + hours, minutes, sign) +{ + hours = int(offset / 60) + minutes = offset % 60 + if (minutes) { + return sprintf("%+.4d", hours * 100 + minutes); + } else { + return sprintf("%+.2d", hours) + } +} + +# Round TIMESTAMP (a +-hh:mm:ss.dddd string) to the nearest second. +function round_to_second(timestamp, \ + hh, mm, ss, seconds, dot_dddd, subseconds) +{ + dot_dddd = timestamp + if (!sub(/^[+-]?[0-9]+:[0-9]+:[0-9]+\./, ".", dot_dddd)) + return timestamp + hh = mm = ss = timestamp + sub(/^[-+]?[0-9]+:[0-9]+:/, "", ss) + sub(/^[-+]?[0-9]+:/, "", mm) + sub(/^[-+]?/, "", hh) + seconds = 3600 * hh + 60 * mm + ss + subseconds = +dot_dddd + seconds += 0.5 < subseconds || ((subseconds == 0.5) && (seconds % 2)); + return sprintf("%s%d:%.2d:%.2d", timestamp ~ /^-/ ? "-" : "", \ + seconds / 3600, seconds / 60 % 60, seconds % 60) +} + BEGIN { dataform_type["vanguard"] = 1 dataform_type["main"] = 1 dataform_type["rearguard"] = 1 + if (PACKRATLIST) { + while (getline <PACKRATLIST) { + if ($0 ~ /^#/) continue + packratlist[$3] = 1 + } + } + # The command line should set DATAFORM. if (!dataform_type[DATAFORM]) exit 1 - vanguard = DATAFORM == "vanguard" +} + +$1 == "#PACKRATLIST" && $2 == PACKRATLIST { + sub(/^#PACKRATLIST[\t ]+[^\t ]+[\t ]+/, "") } /^Zone/ { zone = $2 } @@ -38,7 +97,7 @@ DATAFORM != "main" { # If this line should differ due to Czechoslovakia using negative SAVE values, # uncomment the desired version and comment out the undesired one. if (zone == "Europe/Prague" && /^#?[\t ]+[01]:00[\t ]/ && /1947 Feb 23/) { - if (($(in_comment + 2) != "-") == vanguard) { + if (($(in_comment + 2) != "-") == (DATAFORM != "rearguard")) { uncomment = in_comment } else { comment_out = !in_comment @@ -54,7 +113,7 @@ DATAFORM != "main" { if (Rule_Eire || Zone_Dublin_post_1968) { if ((Rule_Eire \ || (Zone_Dublin_post_1968 && $(in_comment + 3) == "IST/GMT")) \ - == vanguard) { + == (DATAFORM != "rearguard")) { uncomment = in_comment } else { comment_out = !in_comment @@ -71,11 +130,20 @@ DATAFORM != "main" { && ((1994 <= $(in_comment + 4) && $(in_comment + 4) <= 2017) \ || in_comment + 3 == NF)))) if (Rule_Namibia || Zone_using_Namibia_rule) { - if ((Rule_Namibia \ - ? ($(in_comment + 9) ~ /^-/ \ - || ($(in_comment + 9) == 0 && $(in_comment + 10) == "CAT")) \ - : $(in_comment + 1) == "2:00" && $(in_comment + 2) == "Namibia") \ - == vanguard) { + if ((Rule_Namibia \ + ? ($9 ~ /^-/ || ($9 == 0 && $10 == "CAT")) \ + : $(in_comment + 1) == "2:00" && $(in_comment + 2) == "Namibia") \ + == (DATAFORM != "rearguard")) { + uncomment = in_comment + } else { + comment_out = !in_comment + } + } + + # If this line should differ due to Portugal benefiting from %z if supported, + # uncomment the desired version and comment out the undesired one. + if (/^#?[\t ]+-[12]:00[\t ]+Port[\t ]+[%+-]/) { + if (/%z/ == (DATAFORM == "vanguard")) { uncomment = in_comment } else { comment_out = !in_comment @@ -89,37 +157,143 @@ DATAFORM != "main" { sub(/^/, "#") } - # In rearguard format, change the Japan rule line with "Sat>=8 25:00" - # to "Sun>=9 1:00", to cater to zic before 2007 and to older Java. - if (!vanguard && $1 == "Rule" && $7 == "Sat>=8" && $8 == "25:00") { - sub(/Sat>=8/, "Sun>=9") - sub(/25:00/, " 1:00") + # Prefer %z in vanguard form, explicit abbreviations otherwise. + if (DATAFORM == "vanguard") { + sub(/^(Zone[\t ]+[^\t ]+)?[\t ]+[^\t ]+[\t ]+[^\t ]+[\t ]+[-+][^\t ]+/, \ + "&CHANGE-TO-%z") + sub(/-00CHANGE-TO-%z/, "-00") + sub(/[-+][^\t ]+CHANGE-TO-/, "") + } else { + if (/^[^#]*%z/) { + stdoff_column = 2 * /^Zone/ + 1 + rules_column = stdoff_column + 1 + stdoff = get_minutes($stdoff_column) + rules = $rules_column + stdabbr = offset_abbr(stdoff) + if (rules == "-") { + abbr = stdabbr + } else { + dstabbr_only = rules ~ /^[+0-9-]/ + if (dstabbr_only) { + dstoff = get_minutes(rules) + } else { + # The DST offset is normally an hour, but there are special cases. + if (rules == "Morocco" && NF == 3) { + dstoff = -60 + } else if (rules == "NBorneo") { + dstoff = 20 + } else if (((rules == "Cook" || rules == "LH") && NF == 3) \ + || (rules == "Uruguay" \ + && /[\t ](1942 Dec 14|1960|1970|1974 Dec 22)$/)) { + dstoff = 30 + } else if (rules == "Uruguay" && /[\t ]1974 Mar 10$/) { + dstoff = 90 + } else { + dstoff = 60 + } + } + dstabbr = offset_abbr(stdoff + dstoff) + if (dstabbr_only) { + abbr = dstabbr + } else { + abbr = stdabbr "/" dstabbr + } + } + sub(/%z/, abbr) + } } - # In rearguard format, change the Morocco lines with negative SAVE values - # to use positive SAVE values. - if (!vanguard && $1 == "Rule" && $2 == "Morocco" && $4 == 2018 \ - && $6 == "Oct") { - sub(/\t2018\t/, "\t2017\t") + # Normally, prefer whole seconds. However, prefer subseconds + # if generating vanguard form and the otherwise-undocumented + # VANGUARD_SUBSECONDS environment variable is set. + # This relies on #STDOFF comment lines in the data. + # It is for hypothetical clients that support UT offsets that are + # not integer multiples of one second (e.g., Europe/Lisbon, 1884 to 1912). + # No known clients need this currently, and this experimental + # feature may be changed or withdrawn in future releases. + if ($1 == "#STDOFF") { + stdoff = $2 + rounded_stdoff = round_to_second(stdoff) + if (DATAFORM == "vanguard" && ENVIRON["VANGUARD_SUBSECONDS"]) { + stdoff_subst[0] = rounded_stdoff + stdoff_subst[1] = stdoff + } else { + stdoff_subst[0] = stdoff + stdoff_subst[1] = rounded_stdoff + } + } else if (stdoff_subst[0]) { + stdoff_column = 2 * /^Zone/ + 1 + stdoff_column_val = $stdoff_column + if (stdoff_column_val == stdoff_subst[0]) { + sub(stdoff_subst[0], stdoff_subst[1]) + } else if (stdoff_column_val != stdoff_subst[1]) { + stdoff_subst[0] = 0 + } } - if (!vanguard && $1 == "Rule" && $2 == "Morocco" && 2019 <= $3) { - if ($9 == "0") { - last_std_date = $3 " " $6 " " $7 " " $8 - sub(/\t0\t/, "\t1:00\t") + + # In rearguard form, change the Japan rule line with "Sat>=8 25:00" + # to "Sun>=9 1:00", to cater to zic before 2007 and to older Java. + if (/^Rule/ && $2 == "Japan") { + if (DATAFORM == "rearguard") { + if ($7 == "Sat>=8" && $8 == "25:00") { + sub(/Sat>=8/, "Sun>=9") + sub(/25:00/, " 1:00") + } } else { - sub(/\t-1:00\t/, "\t0\t") + if ($7 == "Sun>=9" && $8 == "1:00") { + sub(/Sun>=9/, "Sat>=8") + sub(/ 1:00/, "25:00") + } } } - if (!vanguard && $1 == "1:00" && $2 == "Morocco" && $3 == "+01/+00") { - # This introduces a transition from 01:59:59 +00 to 03:00:00 +01 - # with both times being standard (i.e., a change to standard UT offset). - # This is rearguard's way to approximate the actual prediction, - # which is that of an ordinary transition from DST to standard time. - sub(/1:00\tMorocco\t\+01\/\+00$/, - "0:00\tMorocco\t+00/+01\t" last_std_date "\n\t\t\t 1:00\t-\t+01") + + # In rearguard form, change the Morocco lines with negative SAVE values + # to use positive SAVE values. + if ($2 == "Morocco") { + if (/^Rule/) { + if ($4 ~ /^201[78]$/ && $6 == "Oct") { + if (DATAFORM == "rearguard") { + sub(/\t2018\t/, "\t2017\t") + } else { + sub(/\t2017\t/, "\t2018\t") + } + } + + if (2019 <= $3) { + if ($8 == "2:00") { + if (DATAFORM == "rearguard") { + sub(/\t0\t/, "\t1:00\t") + } else { + sub(/\t1:00\t/, "\t0\t") + } + } else { + if (DATAFORM == "rearguard") { + sub(/\t-1:00\t/, "\t0\t") + } else { + sub(/\t0\t/, "\t-1:00\t") + } + } + } + } + if ($1 ~ /^[+0-9-]/ && NF == 3) { + if (DATAFORM == "rearguard") { + sub(/1:00\tMorocco/, "0:00\tMorocco") + sub(/\t\+01\/\+00$/, "\t+00/+01") + } else { + sub(/0:00\tMorocco/, "1:00\tMorocco") + sub(/\t\+00\/+01$/, "\t+01/+00") + } + } } } +/^Zone/ { + packrat_ignored = FILENAME == PACKRATDATA && PACKRATLIST && !packratlist[$2]; +} +packrat_ignored && !/^Rule/ { + sub(/^/, "#") +} + # If a Link line is followed by a Link or Zone line for the same data, comment # out the Link line. This can happen if backzone overrides a Link # with a Zone or a different Link. |