diff options
-rw-r--r-- | Makefile | 3 | ||||
-rw-r--r-- | NEWS | 10 | ||||
-rw-r--r-- | zic.8 | 29 | ||||
-rw-r--r-- | zic.c | 200 |
4 files changed, 177 insertions, 65 deletions
@@ -362,6 +362,9 @@ LEAPSECONDS= zic= ./zic ZIC= $(zic) $(ZFLAGS) +# To shrink the size of installed TZif files, +# append "-r @N" to omit data before N-seconds-after-the-Epoch. +# See the zic man page for more about -r. ZFLAGS= # How to use zic to install TZif files. @@ -24,6 +24,16 @@ Unreleased, experimental changes which nowadays is typically a typo. (Problem reported by Isiah Meadows.) + Changes to code + + zic now has an -r option to limit the time range of output data. + For example, 'zic -r @1000000000' limits the output data to + timestamps starting 1000000000 seconds after the Epoch. + This helps shrink output size and can be useful for applications + not needing the full timestamp history, such as TZDIST truncation; + see Internet RFC 8536 section 5.1. (Inspired by a feature request + from Christopher Wong.) + Changes to documentation Mention Internet RFC 8536 (February 2019), which documents TZif. @@ -79,6 +79,30 @@ Read leap second information from the file with the given name. If this option is not used, no leap second information appears in output files. .TP +.BR "\*-r " "[\fB@\fP\fIlo\fP][\fB/@\fP\fIhi\fP]" +Reduce the size of output files by limiting their applicability +to timestamps in the range from +.I lo +through +.IR hi , +where +.I lo +and +.I hi +are possibly-signed decimal counts of seconds since the Epoch +(1970-01-01 00:00:00 UTC). +Omitted counts default to extreme values. +For example, +.q "zic \*-r @0" +omits data intended for negative timestamps (i.e., before the Epoch), and +.q "zic \*-r @0/@2147483647" +outputs data intended only for nonnegative timestamps that fit into +31-bit signed integers. +On platforms with GNU +.BR date , +.q "zic \-r @$(date +%s)" +omits data intended for past timestamps. +.TP .B \*-v Be more verbose, and complain about the following situations: .RS @@ -125,11 +149,6 @@ or it contains a file name component that contains more than 14 bytes or that starts with .q "\*-" . .RE -.TP -.B \*-s -Limit time values stored in output files to values that are the same -whether they're taken to be signed or unsigned. -You can use this option to generate SVVS-compatible files. .SH FILES Input files use the format described in this section; output files use .IR tzfile (5) @@ -575,7 +575,8 @@ usage(FILE *stream, int status) fprintf(stream, _("%s: usage is %s [ --version ] [ --help ] [ -v ] \\\n" "\t[ -l localtime ] [ -p posixrules ] [ -d directory ] \\\n" - "\t[ -t localtime-link ] [ -L leapseconds ] [ filename ... ]\n\n" + "\t[ -t localtime-link ] [ -L leapseconds ] [ -r '[@lo][/@hi]' ] \\\n" + "\t[ filename ... ]\n\n" "Report bugs to %s.\n"), progname, progname, REPORT_BUGS_TO); if (status == EXIT_SUCCESS) @@ -603,6 +604,38 @@ change_directory (char const *dir) } } +#define TIME_T_BITS_IN_FILE 64 +static zic_t const min_time = MINVAL(zic_t, TIME_T_BITS_IN_FILE); +static zic_t const max_time = MAXVAL(zic_t, TIME_T_BITS_IN_FILE); +static zic_t lo_time = MINVAL(zic_t, TIME_T_BITS_IN_FILE); +static zic_t hi_time = MAXVAL(zic_t, TIME_T_BITS_IN_FILE); + +/* Set the time range of the output to TIMERANGE. + Return true if successful. */ +static bool +timerange_option(char *timerange) +{ + intmax_t lo = min_time, hi = max_time; + char *lo_end = timerange, *hi_end; + bool badrange = false; + if (*timerange == '@') { + lo = strtoimax (timerange + 1, &lo_end, 10); + if (lo_end == timerange + 1) + badrange = true; + } + hi_end = lo_end; + if (lo_end[0] == '/' && lo_end[1] == '@') { + hi = strtoimax (lo_end + 2, &hi_end, 10); + if (hi_end == lo_end + 2) + badrange = true; + } + if (badrange || *hi_end || hi < lo) + return false; + lo_time = lo < min_time ? min_time : lo <= max_time ? lo : max_time; + hi_time = hi < min_time ? min_time : hi <= max_time ? hi : max_time; + return true; +} + static const char * psxrules; static const char * lcltime; static const char * directory; @@ -615,6 +648,7 @@ main(int argc, char **argv) { register int c, k; register ptrdiff_t i, j; + bool timerange_given = false; #ifdef S_IWGRP umask(umask(S_IWGRP | S_IWOTH) | (S_IWGRP | S_IWOTH)); @@ -640,7 +674,7 @@ main(int argc, char **argv) } else if (strcmp(argv[k], "--help") == 0) { usage(stdout, EXIT_SUCCESS); } - while ((c = getopt(argc, argv, "d:l:L:p:st:vy:")) != EOF && c != -1) + while ((c = getopt(argc, argv, "d:l:L:p:r:st:vy:")) != EOF && c != -1) switch (c) { default: usage(stderr, EXIT_FAILURE); @@ -708,6 +742,21 @@ _("%s: More than one -L option specified\n"), case 'v': noise = true; break; + case 'r': + if (timerange_given) { + fprintf(stderr, +_("%s: More than one -r option specified\n"), + progname); + return EXIT_FAILURE; + } + if (! timerange_option(optarg)) { + fprintf(stderr, +_("%s: invalid time range: %s\n"), + progname, optarg); + return EXIT_FAILURE; + } + timerange_given = true; + break; case 's': warning(_("-s ignored")); break; @@ -961,11 +1010,6 @@ dolink(char const *fromfield, char const *tofield, bool staysymlink) } } -#define TIME_T_BITS_IN_FILE 64 - -static zic_t const min_time = MINVAL(zic_t, TIME_T_BITS_IN_FILE); -static zic_t const max_time = MAXVAL(zic_t, TIME_T_BITS_IN_FILE); - /* Return true if NAME is a directory. */ static bool itsdir(char const *name) @@ -1742,14 +1786,43 @@ swaptypes(int i, int j) { bool t = ttisgmts[i]; ttisgmts[i] = ttisgmts[j]; ttisgmts[j] = t; } } +struct timerange { + int defaulttype; + ptrdiff_t base, count; + int leapbase, leapcount; +}; + +static struct timerange +limitrange(struct timerange r, zic_t lo, zic_t hi, + zic_t const *ats, unsigned char const *types) +{ + while (0 < r.count && hi < ats[r.base + r.count - 1]) + r.count--; + while (1 < r.count && ats[r.base] < lo && ats[r.base + 1] <= lo) { + /* Discard too-low transitions, except keep any last too-low + transition if no transition is exactly at LO. The kept + transition will be output as a LO "transition" appropriate for + buggy clients that do not use time type 0 for timestamps before + the first transition; see "Output a LO_TIME transition" below. */ + r.count--; + r.base++; + r.defaulttype = types[r.base - (ats[r.base] == lo)]; + } + while (0 < r.leapcount && hi < trans[r.leapbase + r.leapcount - 1]) + r.leapcount--; + while (0 < r.leapcount && trans[r.leapbase] < lo) { + r.leapcount--; + r.leapbase++; + } + return r; +} + static void writezone(const char *const name, const char *const string, char version, int defaulttype) { register FILE * fp; register ptrdiff_t i, j; - register int leapcnt32, leapi32; - register ptrdiff_t timecnt32, timei32; register int pass; static const struct tzhead tzh0; static struct tzhead tzh; @@ -1764,6 +1837,7 @@ writezone(const char *const name, const char *const string, char version, _Alignof(zic_t))); void *typesptr = ats + nats; unsigned char *types = typesptr; + struct timerange rangeall, range32, range64; /* ** Sort. @@ -1839,32 +1913,13 @@ writezone(const char *const name, const char *const string, char version, timecnt++; } - /* - ** Figure out 32-bit-limited starts and counts. - */ - timecnt32 = timecnt; - timei32 = 0; - leapcnt32 = leapcnt; - leapi32 = 0; - while (0 < timecnt32 && INT32_MAX < ats[timecnt32 - 1]) - --timecnt32; - while (1 < timecnt32 && ats[timei32] < INT32_MIN - && ats[timei32 + 1] <= INT32_MIN) { - /* Discard too-low transitions, except keep any last too-low - transition if no transition is exactly at INT32_MIN. - The kept transition will be output as an INT32_MIN - "transition" appropriate for buggy 32-bit clients that do - not use time type 0 for timestamps before the first - transition; see below. */ - --timecnt32; - ++timei32; - } - while (0 < leapcnt32 && INT32_MAX < trans[leapcnt32 - 1]) - --leapcnt32; - while (0 < leapcnt32 && trans[leapi32] < INT32_MIN) { - --leapcnt32; - ++leapi32; - } + rangeall.defaulttype = defaulttype; + rangeall.base = rangeall.leapbase = 0; + rangeall.count = timecnt; + rangeall.leapcount = leapcnt; + range64 = limitrange(rangeall, lo_time, hi_time, ats, types); + range32 = limitrange(range64, INT32_MIN, INT32_MAX, ats, types); + /* ** Remove old file, if any, to snap links. */ @@ -1894,6 +1949,8 @@ writezone(const char *const name, const char *const string, char version, for (pass = 1; pass <= 2; ++pass) { register ptrdiff_t thistimei, thistimecnt, thistimelim; register int thisleapi, thisleapcnt, thisleaplim; + int thisdefaulttype; + zic_t lo; int old0; char omittype[TZ_MAX_TYPES]; int typemap[TZ_MAX_TYPES]; @@ -1904,33 +1961,47 @@ writezone(const char *const name, const char *const string, char version, int indmap[TZ_MAX_CHARS]; if (pass == 1) { - thistimei = timei32; - thistimecnt = timecnt32; + /* Arguably the default time type in the 32-bit data + should be range32.defaulttype, which is suited for + timestamps just before INT32_MIN. However, zic + traditionally used the time type of the indefinite + past instead. Internet RFC 8532 says readers should + ignore 32-bit data, so this discrepancy matters only + to obsolete readers where the traditional type might + be more appropriate even if it's "wrong". So, use + the historical zic value, unless -r specifies a low + cutoff that excludes some 32-bit timestamps. */ + thisdefaulttype = (lo_time <= INT32_MIN + ? range64.defaulttype + : range32.defaulttype); + + thistimei = range32.base; + thistimecnt = range32.count; toomanytimes = thistimecnt >> 31 >> 1 != 0; - thisleapi = leapi32; - thisleapcnt = leapcnt32; + thisleapi = range32.leapbase; + thisleapcnt = range32.leapcount; } else { - thistimei = 0; - thistimecnt = timecnt; + thistimei = range64.base; + thistimecnt = range64.count; toomanytimes = thistimecnt >> 31 >> 31 >> 2 != 0; - thisleapi = 0; - thisleapcnt = leapcnt; + thisleapi = range64.leapbase; + thisleapcnt = range64.leapcount; } if (toomanytimes) error(_("too many transition times")); thistimelim = thistimei + thistimecnt; thisleaplim = thisleapi + thisleapcnt; memset(omittype, true, typecnt); - omittype[defaulttype] = false; + omittype[thisdefaulttype] = false; for (i = thistimei; i < thistimelim; i++) omittype[types[i]] = false; - /* Reorder types to make DEFAULTTYPE type 0. - Use TYPEMAP to swap OLD0 and DEFAULTTYPE so that - DEFAULTTYPE appears as type 0 in the output instead + /* Reorder types to make THISDEFAULTTYPE type 0. + Use TYPEMAP to swap OLD0 and THISDEFAULTTYPE so that + THISDEFAULTTYPE appears as type 0 in the output instead of OLD0. TYPEMAP also omits unused types. */ old0 = strlen(omittype); - swaptypes(old0, defaulttype); + swaptypes(old0, thisdefaulttype); #ifndef LEAVE_SOME_PRE_2011_SYSTEMS_IN_THE_LURCH /* @@ -1982,8 +2053,8 @@ writezone(const char *const name, const char *const string, char version, thistypecnt = 0; for (i = old0; i < typecnt; i++) if (!omittype[i]) - typemap[i == old0 ? defaulttype - : i == defaulttype ? old0 : i] + typemap[i == old0 ? thisdefaulttype + : i == thisdefaulttype ? old0 : i] = thistypecnt++; for (i = 0; i < sizeof indmap / sizeof indmap[0]; ++i) @@ -2026,15 +2097,18 @@ writezone(const char *const name, const char *const string, char version, DO(tzh_typecnt); DO(tzh_charcnt); #undef DO - for (i = thistimei; i < thistimelim; ++i) - if (pass == 1) - /* - ** Output an INT32_MIN "transition" - ** if appropriate; see above. - */ - puttzcode(((ats[i] < INT32_MIN) ? - INT32_MIN : ats[i]), fp); - else puttzcode64(ats[i], fp); + /* Output a LO_TIME transition if needed; see limitrange. + But do not go below the minimum representable value + for this pass. */ + lo = pass == 1 && lo_time < INT32_MIN ? INT32_MIN : lo_time; + + for (i = thistimei; i < thistimelim; ++i) { + zic_t at = ats[i] < lo ? lo : ats[i]; + if (pass == 1) + puttzcode(at, fp); + else + puttzcode64(at, fp); + } for (i = thistimei; i < thistimelim; ++i) { unsigned char uc; @@ -2081,7 +2155,7 @@ writezone(const char *const name, const char *const string, char version, for (i = old0; i < typecnt; i++) if (!omittype[i]) putc(ttisgmts[i], fp); - swaptypes(old0, defaulttype); + swaptypes(old0, thisdefaulttype); } fprintf(fp, "\n%s\n", string); close_file(fp, directory, name); @@ -2301,6 +2375,12 @@ stringzone(char *result, struct zone const *zpfirst, ptrdiff_t zonecount) struct rule stdr, dstr; result[0] = '\0'; + + /* Internet RFC 8536 section 5.1 says to use an empty TZ string if + future timestamps are truncated. */ + if (hi_time < max_time) + return -1; + zp = zpfirst + zonecount - 1; stdrp = dstrp = NULL; for (i = 0; i < zp->z_nrules; ++i) { |