1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
|
/***************************************************************************
* _ _ ____ _
* Project ___| | | | _ \| |
* / __| | | | |_) | |
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
* Copyright (C) 1998 - 2014, Daniel Stenberg, <daniel@haxx.se>, et al.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
* are also available at http://curl.haxx.se/docs/copyright.html.
*
* You may opt to use, copy, modify, merge, publish, distribute and/or sell
* copies of the Software, and permit persons to whom the Software is
* furnished to do so, under the terms of the COPYING file.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
***************************************************************************/
/*
A brief summary of the date string formats this parser groks:
RFC 2616 3.3.1
Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
we support dates without week day name:
06 Nov 1994 08:49:37 GMT
06-Nov-94 08:49:37 GMT
Nov 6 08:49:37 1994
without the time zone:
06 Nov 1994 08:49:37
06-Nov-94 08:49:37
weird order:
1994 Nov 6 08:49:37 (GNU date fails)
GMT 08:49:37 06-Nov-94 Sunday
94 6 Nov 08:49:37 (GNU date fails)
time left out:
1994 Nov 6
06-Nov-94
Sun Nov 6 94
unusual separators:
1994.Nov.6
Sun/Nov/6/94/GMT
commonly used time zone names:
Sun, 06 Nov 1994 08:49:37 CET
06 Nov 1994 08:49:37 EST
time zones specified using RFC822 style:
Sun, 12 Sep 2004 15:05:58 -0700
Sat, 11 Sep 2004 21:32:11 +0200
compact numerical date strings:
20040912 15:05:58 -0700
20040911 +0200
*/
#include "parsedate.h"
#ifdef __cplusplus
extern "C" {
#endif
#include <limits.h>
#include <stdbool.h>
#include <errno.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <stdio.h>
#define ERRNO (errno)
#define SET_ERRNO(x) (errno = (x))
/* Portable, consistent toupper (remember EBCDIC). Do not use toupper() because
its behavior is altered by the current locale. */
char raw_toupper(char in)
{
switch (in) {
case 'a':
return 'A';
case 'b':
return 'B';
case 'c':
return 'C';
case 'd':
return 'D';
case 'e':
return 'E';
case 'f':
return 'F';
case 'g':
return 'G';
case 'h':
return 'H';
case 'i':
return 'I';
case 'j':
return 'J';
case 'k':
return 'K';
case 'l':
return 'L';
case 'm':
return 'M';
case 'n':
return 'N';
case 'o':
return 'O';
case 'p':
return 'P';
case 'q':
return 'Q';
case 'r':
return 'R';
case 's':
return 'S';
case 't':
return 'T';
case 'u':
return 'U';
case 'v':
return 'V';
case 'w':
return 'W';
case 'x':
return 'X';
case 'y':
return 'Y';
case 'z':
return 'Z';
}
return in;
}
/*
* raw_equal() is for doing "raw" case insensitive strings. This is meant
* to be locale independent and only compare strings we know are safe for
* this. See http://daniel.haxx.se/blog/2008/10/15/strcasecmp-in-turkish/ for
* some further explanation to why this function is necessary.
*
* The function is capable of comparing a-z case insensitively even for
* non-ascii.
*/
int raw_equal(const char *first, const char *second)
{
while(*first && *second) {
if(raw_toupper(*first) != raw_toupper(*second))
/* get out of the loop as soon as they don't match */
break;
first++;
second++;
}
/* we do the comparison here (possibly again), just to make sure that if the
loop above is skipped because one of the strings reached zero, we must not
return this as a successful match */
return (raw_toupper(*first) == raw_toupper(*second));
}
#define ISSPACE(x) (isspace((int) ((unsigned char)x)))
#define ISDIGIT(x) (isdigit((int) ((unsigned char)x)))
#define ISALNUM(x) (isalnum((int) ((unsigned char)x)))
#define ISALPHA(x) (isalpha((int) ((unsigned char)x)))
/*
* Redefine TRUE and FALSE too, to catch current use. With this
* change, 'bool found = 1' will give a warning on MIPSPro, but
* 'bool found = TRUE' will not. Change tested on IRIX/MIPSPro,
* AIX 5.1/Xlc, Tru64 5.1/cc, w/make test too.
*/
#ifndef TRUE
#define TRUE true
#endif
#ifndef FALSE
#define FALSE false
#endif
/*
** signed long to signed int
*/
int clamp_to_int(long slnum)
{
return slnum > INT_MAX ? INT_MAX : slnum < INT_MIN ? INT_MIN : (int)slnum;
}
const char * const wkday[] =
{"Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"};
static const char * const weekday[] =
{ "Monday", "Tuesday", "Wednesday", "Thursday",
"Friday", "Saturday", "Sunday" };
const char * const month[]=
{ "Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
struct tzinfo {
char name[5];
int offset; /* +/- in minutes */
};
/*
* parsedate()
*
* Returns:
*
* PARSEDATE_OK - a fine conversion
* PARSEDATE_FAIL - failed to convert
* PARSEDATE_LATER - time overflow at the far end of time_t
* PARSEDATE_SOONER - time underflow at the low end of time_t
*/
static int parsedate(const char *date, time_t *output);
#define PARSEDATE_OK 0
#define PARSEDATE_FAIL -1
#define PARSEDATE_LATER 1
#define PARSEDATE_SOONER 2
/* Here's a bunch of frequently used time zone names. These were supported
by the old getdate parser. */
#define tDAYZONE -60 /* offset for daylight savings time */
static const struct tzinfo tz[]= {
{"GMT", 0}, /* Greenwich Mean */
{"UTC", 0}, /* Universal (Coordinated) */
{"WET", 0}, /* Western European */
{"BST", 0 tDAYZONE}, /* British Summer */
{"WAT", 60}, /* West Africa */
{"AST", 240}, /* Atlantic Standard */
{"ADT", 240 tDAYZONE}, /* Atlantic Daylight */
{"EST", 300}, /* Eastern Standard */
{"EDT", 300 tDAYZONE}, /* Eastern Daylight */
{"CST", 360}, /* Central Standard */
{"CDT", 360 tDAYZONE}, /* Central Daylight */
{"MST", 420}, /* Mountain Standard */
{"MDT", 420 tDAYZONE}, /* Mountain Daylight */
{"PST", 480}, /* Pacific Standard */
{"PDT", 480 tDAYZONE}, /* Pacific Daylight */
{"YST", 540}, /* Yukon Standard */
{"YDT", 540 tDAYZONE}, /* Yukon Daylight */
{"HST", 600}, /* Hawaii Standard */
{"HDT", 600 tDAYZONE}, /* Hawaii Daylight */
{"CAT", 600}, /* Central Alaska */
{"AHST", 600}, /* Alaska-Hawaii Standard */
{"NT", 660}, /* Nome */
{"IDLW", 720}, /* International Date Line West */
{"CET", -60}, /* Central European */
{"MET", -60}, /* Middle European */
{"MEWT", -60}, /* Middle European Winter */
{"MEST", -60 tDAYZONE}, /* Middle European Summer */
{"CEST", -60 tDAYZONE}, /* Central European Summer */
{"MESZ", -60 tDAYZONE}, /* Middle European Summer */
{"FWT", -60}, /* French Winter */
{"FST", -60 tDAYZONE}, /* French Summer */
{"EET", -120}, /* Eastern Europe, USSR Zone 1 */
{"WAST", -420}, /* West Australian Standard */
{"WADT", -420 tDAYZONE}, /* West Australian Daylight */
{"CCT", -480}, /* China Coast, USSR Zone 7 */
{"JST", -540}, /* Japan Standard, USSR Zone 8 */
{"EAST", -600}, /* Eastern Australian Standard */
{"EADT", -600 tDAYZONE}, /* Eastern Australian Daylight */
{"GST", -600}, /* Guam Standard, USSR Zone 9 */
{"NZT", -720}, /* New Zealand */
{"NZST", -720}, /* New Zealand Standard */
{"NZDT", -720 tDAYZONE}, /* New Zealand Daylight */
{"IDLE", -720}, /* International Date Line East */
/* Next up: Military timezone names. RFC822 allowed these, but (as noted in
RFC 1123) had their signs wrong. Here we use the correct signs to match
actual military usage.
*/
{"A", +1 * 60}, /* Alpha */
{"B", +2 * 60}, /* Bravo */
{"C", +3 * 60}, /* Charlie */
{"D", +4 * 60}, /* Delta */
{"E", +5 * 60}, /* Echo */
{"F", +6 * 60}, /* Foxtrot */
{"G", +7 * 60}, /* Golf */
{"H", +8 * 60}, /* Hotel */
{"I", +9 * 60}, /* India */
/* "J", Juliet is not used as a timezone, to indicate the observer's local
time */
{"K", +10 * 60}, /* Kilo */
{"L", +11 * 60}, /* Lima */
{"M", +12 * 60}, /* Mike */
{"N", -1 * 60}, /* November */
{"O", -2 * 60}, /* Oscar */
{"P", -3 * 60}, /* Papa */
{"Q", -4 * 60}, /* Quebec */
{"R", -5 * 60}, /* Romeo */
{"S", -6 * 60}, /* Sierra */
{"T", -7 * 60}, /* Tango */
{"U", -8 * 60}, /* Uniform */
{"V", -9 * 60}, /* Victor */
{"W", -10 * 60}, /* Whiskey */
{"X", -11 * 60}, /* X-ray */
{"Y", -12 * 60}, /* Yankee */
{"Z", 0}, /* Zulu, zero meridian, a.k.a. UTC */
};
/* returns:
-1 no day
0 monday - 6 sunday
*/
static int checkday(const char *check, size_t len)
{
int i;
const char * const *what;
bool found= FALSE;
if(len > 3)
what = &weekday[0];
else
what = &wkday[0];
for(i=0; i<7; i++) {
if(raw_equal(check, what[0])) {
found=TRUE;
break;
}
what++;
}
return found?i:-1;
}
static int checkmonth(const char *check)
{
int i;
const char * const *what;
bool found= FALSE;
what = &month[0];
for(i=0; i<12; i++) {
if(raw_equal(check, what[0])) {
found=TRUE;
break;
}
what++;
}
return found?i:-1; /* return the offset or -1, no real offset is -1 */
}
/* return the time zone offset between GMT and the input one, in number
of seconds or -1 if the timezone wasn't found/legal */
static int checktz(const char *check)
{
unsigned int i;
const struct tzinfo *what;
bool found= FALSE;
what = tz;
for(i=0; i< sizeof(tz)/sizeof(tz[0]); i++) {
if(raw_equal(check, what->name)) {
found=TRUE;
break;
}
what++;
}
return found?what->offset*60:-1;
}
static void skip(const char **date)
{
/* skip everything that aren't letters or digits */
while(**date && !ISALNUM(**date))
(*date)++;
}
enum assume {
DATE_MDAY,
DATE_YEAR,
DATE_TIME
};
/* this is a clone of 'struct tm' but with all fields we don't need or use
cut out */
struct my_tm {
int tm_sec;
int tm_min;
int tm_hour;
int tm_mday;
int tm_mon;
int tm_year;
};
/* struct tm to time since epoch in GMT time zone.
* This is similar to the standard mktime function but for GMT only, and
* doesn't suffer from the various bugs and portability problems that
* some systems' implementations have.
*/
static time_t my_timegm(struct my_tm *tm)
{
static const int month_days_cumulative [12] =
{ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 };
int month, year, leap_days;
if(tm->tm_year < 70)
/* we don't support years before 1970 as they will cause this function
to return a negative value */
return -1;
year = tm->tm_year + 1900;
month = tm->tm_mon;
if(month < 0) {
year += (11 - month) / 12;
month = 11 - (11 - month) % 12;
}
else if(month >= 12) {
year -= month / 12;
month = month % 12;
}
leap_days = year - (tm->tm_mon <= 1);
leap_days = ((leap_days / 4) - (leap_days / 100) + (leap_days / 400)
- (1969 / 4) + (1969 / 100) - (1969 / 400));
return ((((time_t) (year - 1970) * 365
+ leap_days + month_days_cumulative [month] + tm->tm_mday - 1) * 24
+ tm->tm_hour) * 60 + tm->tm_min) * 60 + tm->tm_sec;
}
/*
* parsedate()
*
* Returns:
*
* PARSEDATE_OK - a fine conversion
* PARSEDATE_FAIL - failed to convert
* PARSEDATE_LATER - time overflow at the far end of time_t
* PARSEDATE_SOONER - time underflow at the low end of time_t
*/
static int parsedate(const char *date, time_t *output)
{
time_t t = 0;
int wdaynum=-1; /* day of the week number, 0-6 (mon-sun) */
int monnum=-1; /* month of the year number, 0-11 */
int mdaynum=-1; /* day of month, 1 - 31 */
int hournum=-1;
int minnum=-1;
int secnum=-1;
int yearnum=-1;
int tzoff=-1;
struct my_tm tm;
enum assume dignext = DATE_MDAY;
const char *indate = date; /* save the original pointer */
int part = 0; /* max 6 parts */
while(*date && (part < 6)) {
bool found=FALSE;
skip(&date);
if(ISALPHA(*date)) {
/* a name coming up */
char buf[32]="";
size_t len;
if(sscanf(date, "%31[ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz]", buf))
len = strlen(buf);
else
len = 0;
if(wdaynum == -1) {
wdaynum = checkday(buf, len);
if(wdaynum != -1)
found = TRUE;
}
if(!found && (monnum == -1)) {
monnum = checkmonth(buf);
if(monnum != -1)
found = TRUE;
}
if(!found && (tzoff == -1)) {
/* this just must be a time zone string */
tzoff = checktz(buf);
if(tzoff != -1)
found = TRUE;
}
if(!found)
return PARSEDATE_FAIL; /* bad string */
date += len;
}
else if(ISDIGIT(*date)) {
/* a digit */
int val;
char *end;
if((secnum == -1) &&
(3 == sscanf(date, "%02d:%02d:%02d", &hournum, &minnum, &secnum))) {
/* time stamp! */
date += 8;
}
else if((secnum == -1) &&
(2 == sscanf(date, "%02d:%02d", &hournum, &minnum))) {
/* time stamp without seconds */
date += 5;
secnum = 0;
}
else {
long lval;
int error;
int old_errno;
old_errno = ERRNO;
SET_ERRNO(0);
lval = strtol(date, &end, 10);
error = ERRNO;
if(error != old_errno)
SET_ERRNO(old_errno);
if(error)
return PARSEDATE_FAIL;
#if LONG_MAX != INT_MAX
if((lval > (long)INT_MAX) || (lval < (long)INT_MIN))
return PARSEDATE_FAIL;
#endif
val = clamp_to_int(lval);
if((tzoff == -1) &&
((end - date) == 4) &&
(val <= 1400) &&
(indate< date) &&
((date[-1] == '+' || date[-1] == '-'))) {
/* four digits and a value less than or equal to 1400 (to take into
account all sorts of funny time zone diffs) and it is preceded
with a plus or minus. This is a time zone indication. 1400 is
picked since +1300 is frequently used and +1400 is mentioned as
an edge number in the document "ISO C 200X Proposal: Timezone
Functions" at http://david.tribble.com/text/c0xtimezone.html If
anyone has a more authoritative source for the exact maximum time
zone offsets, please speak up! */
found = TRUE;
tzoff = (val/100 * 60 + val%100)*60;
/* the + and - prefix indicates the local time compared to GMT,
this we need ther reversed math to get what we want */
tzoff = date[-1]=='+'?-tzoff:tzoff;
}
if(((end - date) == 8) &&
(yearnum == -1) &&
(monnum == -1) &&
(mdaynum == -1)) {
/* 8 digits, no year, month or day yet. This is YYYYMMDD */
found = TRUE;
yearnum = val/10000;
monnum = (val%10000)/100-1; /* month is 0 - 11 */
mdaynum = val%100;
}
if(!found && (dignext == DATE_MDAY) && (mdaynum == -1)) {
if((val > 0) && (val<32)) {
mdaynum = val;
found = TRUE;
}
dignext = DATE_YEAR;
}
if(!found && (dignext == DATE_YEAR) && (yearnum == -1)) {
yearnum = val;
found = TRUE;
if(yearnum < 1900) {
if(yearnum > 70)
yearnum += 1900;
else
yearnum += 2000;
}
if(mdaynum == -1)
dignext = DATE_MDAY;
}
if(!found)
return PARSEDATE_FAIL;
date = end;
}
}
part++;
}
if(-1 == secnum)
secnum = minnum = hournum = 0; /* no time, make it zero */
if((-1 == mdaynum) ||
(-1 == monnum) ||
(-1 == yearnum))
/* lacks vital info, fail */
return PARSEDATE_FAIL;
#if SIZEOF_TIME_T < 5
/* 32 bit time_t can only hold dates to the beginning of 2038 */
if(yearnum > 2037) {
*output = 0x7fffffff;
return PARSEDATE_LATER;
}
#endif
if(yearnum < 1970) {
*output = 0;
return PARSEDATE_SOONER;
}
if((mdaynum > 31) || (monnum > 11) ||
(hournum > 23) || (minnum > 59) || (secnum > 60))
return PARSEDATE_FAIL; /* clearly an illegal date */
tm.tm_sec = secnum;
tm.tm_min = minnum;
tm.tm_hour = hournum;
tm.tm_mday = mdaynum;
tm.tm_mon = monnum;
tm.tm_year = yearnum - 1900;
/* my_timegm() returns a time_t. time_t is often 32 bits, even on many
architectures that feature 64 bit 'long'.
Some systems have 64 bit time_t and deal with years beyond 2038. However,
even on some of the systems with 64 bit time_t mktime() returns -1 for
dates beyond 03:14:07 UTC, January 19, 2038. (Such as AIX 5100-06)
*/
t = my_timegm(&tm);
/* time zone adjust (cast t to int to compare to negative one) */
if(-1 != (int)t) {
/* Add the time zone diff between local time zone and GMT. */
long delta = (long)(tzoff!=-1?tzoff:0);
if((delta>0) && (t > LONG_MAX - delta))
return -1; /* time_t overflow */
t += delta;
}
*output = t;
return PARSEDATE_OK;
}
time_t parse_date(const char *p)
{
time_t parsed;
int rc = parsedate(p, &parsed);
switch(rc) {
case PARSEDATE_OK:
case PARSEDATE_LATER:
case PARSEDATE_SOONER:
return parsed;
}
/* everything else is fail */
return -1;
}
#ifdef __cplusplus
}
#endif
|