summaryrefslogtreecommitdiff
path: root/ziguard.awk
diff options
context:
space:
mode:
authorStuart Bishop <stuart@stuartbishop.net>2022-08-12 09:36:24 +1000
committerStuart Bishop <stuart@stuartbishop.net>2022-08-12 09:36:24 +1000
commit301a880b1dd96605a98374576b0e210994a69f2d (patch)
treebf28f9116b188da1d70cb05b6e502ccdd39b9005 /ziguard.awk
parent9e15fadcb930d6781591d14a0fb20e1135bde9c0 (diff)
downloadpytz-git-301a880b1dd96605a98374576b0e210994a69f2d.tar.gz
Squashed 'tz/' changes from 95ecc37d2..32a4af56f
b61a7acb4 Release 2022b 39dc54c76 Use more-official URL for Chile change cc9d7b92f Fix typo in previous commit. 711b46f8f Chile's DST is delayed by a week in September 2022 12be3e4ce Omit pacificnew from tailored vanguard tarballs 9b665ce9a Put 'leapseconds' etc. into tailored tarballs 35fa37fbb Finish duplicate-since-1970 moves 0b925f6d8 Move nine more zones to ‘backzone’ 0f9ac4ff2 Improve zishrink after PACKRATLIST 3be5b73fe Add PACKRATLIST build-time option 406d29f4a Remove hack to work around circa-2006 zic issue 4000ea353 Simplify subseconds for Asia/Jakarta 4b78b8bb3 Don’t generate subseconds in vanguard form ff6ead112 Add a couple of #STDOFF comments 6b32cae26 Simplify subsecond precision handling 26e522baa Check that ziguard substitutions revert de89db718 Omit tiny 1906 change in Vietnam 4d39a0ee9 Apply subsecond precision to Amsterdam Mean Time 4f8ac112b Vanguard form now uses subsecond precision 4c7a2921c Omit tiny 1880 change in Dublin 4feb18cd4 * theory.html: Source data can do subseconds. 9cd5f5339 * europe (Atlantic/Madeira): Fix typo in vanguard section. bffc26e90 * leapseconds.awk: no longer executable 42eda0722 Fix location of recently added posix_packrat line. bd548f646 Remove obsolescent posix_packrat target 13a0921ac New ‘make’ target tailored_tarballs 52f8d925f * zic.8: Don’t pagebreak just after "EXTENDED EXAMPLE". e4374a954 Fix minor glitches in .txt output 83a2b918a * tz-link.html: Link to the unsmear library. dfe016bc5 No leap second on 2022-12-31 a249a0c64 Use %z in vanguard form 0deba9f3e * europe: Fix a few more "Kiev"s in comments. 729c2d34d Refactor ziguard.awk e0d136f49 * europe, backzone: Correct and move Liechtenstein comment. 634b9361d * backward: Add backward-compatibility comment. dd0a679c9 * NEWS: Mention LOCALTIME change. 172d43570 Change LOCALTIME default back to Factory 50df7d69f gmtime etc. now say "UTC", not "GMT" bf8aa9414 Remove ancient asctime.c cruft 3da46e2af Avoid C macros when this is easy 4551a1ae6 Remove macros duplicated from private.h a4095bda6 Improve #if indenting 918e10e89 * zic.8: fix minus typo eaa6bf832 Add -R option to zic a9bb98690 Chile's 1946/7 DST started early 0b094598d April Fool note 7ffb999b1 Chile’s 1946/7 time was DST 37c27ce43 Iran 1977 fallback was 10-20 not 09-23 93b40c75a Upgrade GCC_DEBUG_FLAGS to GCC 12 ce8774c37 Document that zic -r doesn’t necessarily shrink 5e7de49c3 Fix bug with zic -r cutoff before 1st transition 1fa2731ce Refactor by using max and min macros 0e8f0b06a strftime %s no longer worries about mktime failure 6f2e9b693 Stricter mktime -1 heuristic in strftime d8655c6ae * asia: Commentary corrections from Michael Deckers. 52061c178 Correct Iran transitions in 1977 and 1978 ec42353f5 Fix bug uncovered by recent change to Iran history 6af4cda4f Refactor outzone 538c64e13 Iran DST changes in late 1970s 3d605c53f Iran switched to standard time in 1935 4742526b7 Prefer specified time for vestigial variables 9fa1a5395 * NEWS: Mention other Ukrainian locations. 8b6a387b1 Go back to 2021e Morocco rearguard workaround fd03aae45 Add </p> to end element. 726ef41a5 Clarify old-version doc 1fd3bd4c2 Check that FORMAT has / only with rules e18c7ac28 * zic.8: Say that STDOFF lacks suffix letters. 221bf5fe3 Simplify Asia/Dushanbe 6c018546a Remove already-obsolete link 66b18d983 Iran will stop DST in 2023 6f96f5590 Follow Shanks for Crimea 1994/1996 b5d501457 Simplify Europe/Simferopol a012287a6 Avoid “traditional” and “popular” e13e9c531 Rename Europe/Kiev to Europe/Kyiv 1f023d598 Cite 2022 Magallanes decree ac377a1f0 Simplify field memory allocation in zic 65f616d2e zic now checks input bytes more carefully ce5b21b84 Omit or paraphrase some quotes b6a8aeca9 Improve Morocco 2087 rearguard workaround a0f887648 Fix Dawson Creek latitude (thanks to Michael Ewert). 64c5d196f Fix spelling and grammar in commentary 32a9a151e Fix some typos in NEWS, africa and australasia. git-subtree-dir: tz git-subtree-split: 32a4af56f4afa83720840dc4e325636428283b84
Diffstat (limited to 'ziguard.awk')
-rw-r--r--ziguard.awk236
1 files changed, 205 insertions, 31 deletions
diff --git a/ziguard.awk b/ziguard.awk
index 2be6d52..0728baa 100644
--- a/ziguard.awk
+++ b/ziguard.awk
@@ -9,7 +9,11 @@
# it does not do these nonessential tasks now.
#
# Although main and vanguard forms are currently equivalent,
-# this need not always be the case.
+# this need not always be the case. When the two forms differ,
+# this script can convert either from main to vanguard form (needed then),
+# or from vanguard to main form (this conversion would be needed later,
+# after main became rearguard and vanguard became main).
+# There is no need to convert rearguard to other forms.
#
# When converting to vanguard form, the output can use negative SAVE
# values.
@@ -19,14 +23,69 @@
# of the input data as best it can within the constraints of the
# rearguard format.
+# Given a FIELD like "-0:30", return a minute count like -30.
+function get_minutes(field, \
+ sign, hours, minutes)
+{
+ sign = field ~ /^-/ ? -1 : 1
+ hours = +field
+ if (field ~ /:/) {
+ minutes = field
+ sub(/[^:]*:/, "", minutes)
+ }
+ return 60 * hours + sign * minutes
+}
+
+# Given an OFFSET, which is a minute count like 300 or 330,
+# return a %z-style abbreviation like "+05" or "+0530".
+function offset_abbr(offset, \
+ hours, minutes, sign)
+{
+ hours = int(offset / 60)
+ minutes = offset % 60
+ if (minutes) {
+ return sprintf("%+.4d", hours * 100 + minutes);
+ } else {
+ return sprintf("%+.2d", hours)
+ }
+}
+
+# Round TIMESTAMP (a +-hh:mm:ss.dddd string) to the nearest second.
+function round_to_second(timestamp, \
+ hh, mm, ss, seconds, dot_dddd, subseconds)
+{
+ dot_dddd = timestamp
+ if (!sub(/^[+-]?[0-9]+:[0-9]+:[0-9]+\./, ".", dot_dddd))
+ return timestamp
+ hh = mm = ss = timestamp
+ sub(/^[-+]?[0-9]+:[0-9]+:/, "", ss)
+ sub(/^[-+]?[0-9]+:/, "", mm)
+ sub(/^[-+]?/, "", hh)
+ seconds = 3600 * hh + 60 * mm + ss
+ subseconds = +dot_dddd
+ seconds += 0.5 < subseconds || ((subseconds == 0.5) && (seconds % 2));
+ return sprintf("%s%d:%.2d:%.2d", timestamp ~ /^-/ ? "-" : "", \
+ seconds / 3600, seconds / 60 % 60, seconds % 60)
+}
+
BEGIN {
dataform_type["vanguard"] = 1
dataform_type["main"] = 1
dataform_type["rearguard"] = 1
+ if (PACKRATLIST) {
+ while (getline <PACKRATLIST) {
+ if ($0 ~ /^#/) continue
+ packratlist[$3] = 1
+ }
+ }
+
# The command line should set DATAFORM.
if (!dataform_type[DATAFORM]) exit 1
- vanguard = DATAFORM == "vanguard"
+}
+
+$1 == "#PACKRATLIST" && $2 == PACKRATLIST {
+ sub(/^#PACKRATLIST[\t ]+[^\t ]+[\t ]+/, "")
}
/^Zone/ { zone = $2 }
@@ -38,7 +97,7 @@ DATAFORM != "main" {
# If this line should differ due to Czechoslovakia using negative SAVE values,
# uncomment the desired version and comment out the undesired one.
if (zone == "Europe/Prague" && /^#?[\t ]+[01]:00[\t ]/ && /1947 Feb 23/) {
- if (($(in_comment + 2) != "-") == vanguard) {
+ if (($(in_comment + 2) != "-") == (DATAFORM != "rearguard")) {
uncomment = in_comment
} else {
comment_out = !in_comment
@@ -54,7 +113,7 @@ DATAFORM != "main" {
if (Rule_Eire || Zone_Dublin_post_1968) {
if ((Rule_Eire \
|| (Zone_Dublin_post_1968 && $(in_comment + 3) == "IST/GMT")) \
- == vanguard) {
+ == (DATAFORM != "rearguard")) {
uncomment = in_comment
} else {
comment_out = !in_comment
@@ -71,11 +130,20 @@ DATAFORM != "main" {
&& ((1994 <= $(in_comment + 4) && $(in_comment + 4) <= 2017) \
|| in_comment + 3 == NF))))
if (Rule_Namibia || Zone_using_Namibia_rule) {
- if ((Rule_Namibia \
- ? ($(in_comment + 9) ~ /^-/ \
- || ($(in_comment + 9) == 0 && $(in_comment + 10) == "CAT")) \
- : $(in_comment + 1) == "2:00" && $(in_comment + 2) == "Namibia") \
- == vanguard) {
+ if ((Rule_Namibia \
+ ? ($9 ~ /^-/ || ($9 == 0 && $10 == "CAT")) \
+ : $(in_comment + 1) == "2:00" && $(in_comment + 2) == "Namibia") \
+ == (DATAFORM != "rearguard")) {
+ uncomment = in_comment
+ } else {
+ comment_out = !in_comment
+ }
+ }
+
+ # If this line should differ due to Portugal benefiting from %z if supported,
+ # uncomment the desired version and comment out the undesired one.
+ if (/^#?[\t ]+-[12]:00[\t ]+Port[\t ]+[%+-]/) {
+ if (/%z/ == (DATAFORM == "vanguard")) {
uncomment = in_comment
} else {
comment_out = !in_comment
@@ -89,37 +157,143 @@ DATAFORM != "main" {
sub(/^/, "#")
}
- # In rearguard format, change the Japan rule line with "Sat>=8 25:00"
- # to "Sun>=9 1:00", to cater to zic before 2007 and to older Java.
- if (!vanguard && $1 == "Rule" && $7 == "Sat>=8" && $8 == "25:00") {
- sub(/Sat>=8/, "Sun>=9")
- sub(/25:00/, " 1:00")
+ # Prefer %z in vanguard form, explicit abbreviations otherwise.
+ if (DATAFORM == "vanguard") {
+ sub(/^(Zone[\t ]+[^\t ]+)?[\t ]+[^\t ]+[\t ]+[^\t ]+[\t ]+[-+][^\t ]+/, \
+ "&CHANGE-TO-%z")
+ sub(/-00CHANGE-TO-%z/, "-00")
+ sub(/[-+][^\t ]+CHANGE-TO-/, "")
+ } else {
+ if (/^[^#]*%z/) {
+ stdoff_column = 2 * /^Zone/ + 1
+ rules_column = stdoff_column + 1
+ stdoff = get_minutes($stdoff_column)
+ rules = $rules_column
+ stdabbr = offset_abbr(stdoff)
+ if (rules == "-") {
+ abbr = stdabbr
+ } else {
+ dstabbr_only = rules ~ /^[+0-9-]/
+ if (dstabbr_only) {
+ dstoff = get_minutes(rules)
+ } else {
+ # The DST offset is normally an hour, but there are special cases.
+ if (rules == "Morocco" && NF == 3) {
+ dstoff = -60
+ } else if (rules == "NBorneo") {
+ dstoff = 20
+ } else if (((rules == "Cook" || rules == "LH") && NF == 3) \
+ || (rules == "Uruguay" \
+ && /[\t ](1942 Dec 14|1960|1970|1974 Dec 22)$/)) {
+ dstoff = 30
+ } else if (rules == "Uruguay" && /[\t ]1974 Mar 10$/) {
+ dstoff = 90
+ } else {
+ dstoff = 60
+ }
+ }
+ dstabbr = offset_abbr(stdoff + dstoff)
+ if (dstabbr_only) {
+ abbr = dstabbr
+ } else {
+ abbr = stdabbr "/" dstabbr
+ }
+ }
+ sub(/%z/, abbr)
+ }
}
- # In rearguard format, change the Morocco lines with negative SAVE values
- # to use positive SAVE values.
- if (!vanguard && $1 == "Rule" && $2 == "Morocco" && $4 == 2018 \
- && $6 == "Oct") {
- sub(/\t2018\t/, "\t2017\t")
+ # Normally, prefer whole seconds. However, prefer subseconds
+ # if generating vanguard form and the otherwise-undocumented
+ # VANGUARD_SUBSECONDS environment variable is set.
+ # This relies on #STDOFF comment lines in the data.
+ # It is for hypothetical clients that support UT offsets that are
+ # not integer multiples of one second (e.g., Europe/Lisbon, 1884 to 1912).
+ # No known clients need this currently, and this experimental
+ # feature may be changed or withdrawn in future releases.
+ if ($1 == "#STDOFF") {
+ stdoff = $2
+ rounded_stdoff = round_to_second(stdoff)
+ if (DATAFORM == "vanguard" && ENVIRON["VANGUARD_SUBSECONDS"]) {
+ stdoff_subst[0] = rounded_stdoff
+ stdoff_subst[1] = stdoff
+ } else {
+ stdoff_subst[0] = stdoff
+ stdoff_subst[1] = rounded_stdoff
+ }
+ } else if (stdoff_subst[0]) {
+ stdoff_column = 2 * /^Zone/ + 1
+ stdoff_column_val = $stdoff_column
+ if (stdoff_column_val == stdoff_subst[0]) {
+ sub(stdoff_subst[0], stdoff_subst[1])
+ } else if (stdoff_column_val != stdoff_subst[1]) {
+ stdoff_subst[0] = 0
+ }
}
- if (!vanguard && $1 == "Rule" && $2 == "Morocco" && 2019 <= $3) {
- if ($9 == "0") {
- last_std_date = $3 " " $6 " " $7 " " $8
- sub(/\t0\t/, "\t1:00\t")
+
+ # In rearguard form, change the Japan rule line with "Sat>=8 25:00"
+ # to "Sun>=9 1:00", to cater to zic before 2007 and to older Java.
+ if (/^Rule/ && $2 == "Japan") {
+ if (DATAFORM == "rearguard") {
+ if ($7 == "Sat>=8" && $8 == "25:00") {
+ sub(/Sat>=8/, "Sun>=9")
+ sub(/25:00/, " 1:00")
+ }
} else {
- sub(/\t-1:00\t/, "\t0\t")
+ if ($7 == "Sun>=9" && $8 == "1:00") {
+ sub(/Sun>=9/, "Sat>=8")
+ sub(/ 1:00/, "25:00")
+ }
}
}
- if (!vanguard && $1 == "1:00" && $2 == "Morocco" && $3 == "+01/+00") {
- # This introduces a transition from 01:59:59 +00 to 03:00:00 +01
- # with both times being standard (i.e., a change to standard UT offset).
- # This is rearguard's way to approximate the actual prediction,
- # which is that of an ordinary transition from DST to standard time.
- sub(/1:00\tMorocco\t\+01\/\+00$/,
- "0:00\tMorocco\t+00/+01\t" last_std_date "\n\t\t\t 1:00\t-\t+01")
+
+ # In rearguard form, change the Morocco lines with negative SAVE values
+ # to use positive SAVE values.
+ if ($2 == "Morocco") {
+ if (/^Rule/) {
+ if ($4 ~ /^201[78]$/ && $6 == "Oct") {
+ if (DATAFORM == "rearguard") {
+ sub(/\t2018\t/, "\t2017\t")
+ } else {
+ sub(/\t2017\t/, "\t2018\t")
+ }
+ }
+
+ if (2019 <= $3) {
+ if ($8 == "2:00") {
+ if (DATAFORM == "rearguard") {
+ sub(/\t0\t/, "\t1:00\t")
+ } else {
+ sub(/\t1:00\t/, "\t0\t")
+ }
+ } else {
+ if (DATAFORM == "rearguard") {
+ sub(/\t-1:00\t/, "\t0\t")
+ } else {
+ sub(/\t0\t/, "\t-1:00\t")
+ }
+ }
+ }
+ }
+ if ($1 ~ /^[+0-9-]/ && NF == 3) {
+ if (DATAFORM == "rearguard") {
+ sub(/1:00\tMorocco/, "0:00\tMorocco")
+ sub(/\t\+01\/\+00$/, "\t+00/+01")
+ } else {
+ sub(/0:00\tMorocco/, "1:00\tMorocco")
+ sub(/\t\+00\/+01$/, "\t+01/+00")
+ }
+ }
}
}
+/^Zone/ {
+ packrat_ignored = FILENAME == PACKRATDATA && PACKRATLIST && !packratlist[$2];
+}
+packrat_ignored && !/^Rule/ {
+ sub(/^/, "#")
+}
+
# If a Link line is followed by a Link or Zone line for the same data, comment
# out the Link line. This can happen if backzone overrides a Link
# with a Zone or a different Link.