summaryrefslogtreecommitdiff
path: root/lib/parsedate.c
blob: 1662dd36b89ee1ed2a2eec5e50ecb6e77d4d4861 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
/***************************************************************************
 *                                  _   _ ____  _
 *  Project                     ___| | | |  _ \| |
 *                             / __| | | | |_) | |
 *                            | (__| |_| |  _ <| |___
 *                             \___|\___/|_| \_\_____|
 *
 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
 *
 * This software is licensed as described in the file COPYING, which
 * you should have received as part of this distribution. The terms
 * are also available at https://curl.se/docs/copyright.html.
 *
 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
 * copies of the Software, and permit persons to whom the Software is
 * furnished to do so, under the terms of the COPYING file.
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
 * KIND, either express or implied.
 *
 * SPDX-License-Identifier: curl
 *
 ***************************************************************************/
/*
  A brief summary of the date string formats this parser groks:

  RFC 2616 3.3.1

  Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123
  Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
  Sun Nov  6 08:49:37 1994       ; ANSI C's asctime() format

  we support dates without week day name:

  06 Nov 1994 08:49:37 GMT
  06-Nov-94 08:49:37 GMT
  Nov  6 08:49:37 1994

  without the time zone:

  06 Nov 1994 08:49:37
  06-Nov-94 08:49:37

  weird order:

  1994 Nov 6 08:49:37  (GNU date fails)
  GMT 08:49:37 06-Nov-94 Sunday
  94 6 Nov 08:49:37    (GNU date fails)

  time left out:

  1994 Nov 6
  06-Nov-94
  Sun Nov 6 94

  unusual separators:

  1994.Nov.6
  Sun/Nov/6/94/GMT

  commonly used time zone names:

  Sun, 06 Nov 1994 08:49:37 CET
  06 Nov 1994 08:49:37 EST

  time zones specified using RFC822 style:

  Sun, 12 Sep 2004 15:05:58 -0700
  Sat, 11 Sep 2004 21:32:11 +0200

  compact numerical date strings:

  20040912 15:05:58 -0700
  20040911 +0200

*/

#include "curl_setup.h"

#include <limits.h>

#include <curl/curl.h>
#include "strcase.h"
#include "warnless.h"
#include "parsedate.h"

/*
 * parsedate()
 *
 * Returns:
 *
 * PARSEDATE_OK     - a fine conversion
 * PARSEDATE_FAIL   - failed to convert
 * PARSEDATE_LATER  - time overflow at the far end of time_t
 * PARSEDATE_SOONER - time underflow at the low end of time_t
 */

static int parsedate(const char *date, time_t *output);

#define PARSEDATE_OK     0
#define PARSEDATE_FAIL   -1
#define PARSEDATE_LATER  1
#define PARSEDATE_SOONER 2

#if !defined(CURL_DISABLE_PARSEDATE) || !defined(CURL_DISABLE_FTP) || \
  !defined(CURL_DISABLE_FILE)
/* These names are also used by FTP and FILE code */
const char * const Curl_wkday[] =
{"Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"};
const char * const Curl_month[]=
{ "Jan", "Feb", "Mar", "Apr", "May", "Jun",
  "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
#endif

#ifndef CURL_DISABLE_PARSEDATE
static const char * const weekday[] =
{ "Monday", "Tuesday", "Wednesday", "Thursday",
  "Friday", "Saturday", "Sunday" };

struct tzinfo {
  char name[5];
  int offset; /* +/- in minutes */
};

/* Here's a bunch of frequently used time zone names. These were supported
   by the old getdate parser. */
#define tDAYZONE -60       /* offset for daylight savings time */
static const struct tzinfo tz[]= {
  {"GMT", 0},              /* Greenwich Mean */
  {"UT",  0},              /* Universal Time */
  {"UTC", 0},              /* Universal (Coordinated) */
  {"WET", 0},              /* Western European */
  {"BST", 0 tDAYZONE},     /* British Summer */
  {"WAT", 60},             /* West Africa */
  {"AST", 240},            /* Atlantic Standard */
  {"ADT", 240 tDAYZONE},   /* Atlantic Daylight */
  {"EST", 300},            /* Eastern Standard */
  {"EDT", 300 tDAYZONE},   /* Eastern Daylight */
  {"CST", 360},            /* Central Standard */
  {"CDT", 360 tDAYZONE},   /* Central Daylight */
  {"MST", 420},            /* Mountain Standard */
  {"MDT", 420 tDAYZONE},   /* Mountain Daylight */
  {"PST", 480},            /* Pacific Standard */
  {"PDT", 480 tDAYZONE},   /* Pacific Daylight */
  {"YST", 540},            /* Yukon Standard */
  {"YDT", 540 tDAYZONE},   /* Yukon Daylight */
  {"HST", 600},            /* Hawaii Standard */
  {"HDT", 600 tDAYZONE},   /* Hawaii Daylight */
  {"CAT", 600},            /* Central Alaska */
  {"AHST", 600},           /* Alaska-Hawaii Standard */
  {"NT",  660},            /* Nome */
  {"IDLW", 720},           /* International Date Line West */
  {"CET", -60},            /* Central European */
  {"MET", -60},            /* Middle European */
  {"MEWT", -60},           /* Middle European Winter */
  {"MEST", -60 tDAYZONE},  /* Middle European Summer */
  {"CEST", -60 tDAYZONE},  /* Central European Summer */
  {"MESZ", -60 tDAYZONE},  /* Middle European Summer */
  {"FWT", -60},            /* French Winter */
  {"FST", -60 tDAYZONE},   /* French Summer */
  {"EET", -120},           /* Eastern Europe, USSR Zone 1 */
  {"WAST", -420},          /* West Australian Standard */
  {"WADT", -420 tDAYZONE}, /* West Australian Daylight */
  {"CCT", -480},           /* China Coast, USSR Zone 7 */
  {"JST", -540},           /* Japan Standard, USSR Zone 8 */
  {"EAST", -600},          /* Eastern Australian Standard */
  {"EADT", -600 tDAYZONE}, /* Eastern Australian Daylight */
  {"GST", -600},           /* Guam Standard, USSR Zone 9 */
  {"NZT", -720},           /* New Zealand */
  {"NZST", -720},          /* New Zealand Standard */
  {"NZDT", -720 tDAYZONE}, /* New Zealand Daylight */
  {"IDLE", -720},          /* International Date Line East */
  /* Next up: Military timezone names. RFC822 allowed these, but (as noted in
     RFC 1123) had their signs wrong. Here we use the correct signs to match
     actual military usage.
   */
  {"A",  1 * 60},         /* Alpha */
  {"B",  2 * 60},         /* Bravo */
  {"C",  3 * 60},         /* Charlie */
  {"D",  4 * 60},         /* Delta */
  {"E",  5 * 60},         /* Echo */
  {"F",  6 * 60},         /* Foxtrot */
  {"G",  7 * 60},         /* Golf */
  {"H",  8 * 60},         /* Hotel */
  {"I",  9 * 60},         /* India */
  /* "J", Juliet is not used as a timezone, to indicate the observer's local
     time */
  {"K", 10 * 60},         /* Kilo */
  {"L", 11 * 60},         /* Lima */
  {"M", 12 * 60},         /* Mike */
  {"N",  -1 * 60},         /* November */
  {"O",  -2 * 60},         /* Oscar */
  {"P",  -3 * 60},         /* Papa */
  {"Q",  -4 * 60},         /* Quebec */
  {"R",  -5 * 60},         /* Romeo */
  {"S",  -6 * 60},         /* Sierra */
  {"T",  -7 * 60},         /* Tango */
  {"U",  -8 * 60},         /* Uniform */
  {"V",  -9 * 60},         /* Victor */
  {"W", -10 * 60},         /* Whiskey */
  {"X", -11 * 60},         /* X-ray */
  {"Y", -12 * 60},         /* Yankee */
  {"Z", 0},                /* Zulu, zero meridian, a.k.a. UTC */
};

/* returns:
   -1 no day
   0 monday - 6 sunday
*/

static int checkday(const char *check, size_t len)
{
  int i;
  const char * const *what;
  if(len > 3)
    what = &weekday[0];
  else if(len == 3)
    what = &Curl_wkday[0];
  else
    return -1; /* too short */
  for(i = 0; i<7; i++) {
    size_t ilen = strlen(what[0]);
    if((ilen == len) &&
       strncasecompare(check, what[0], len))
      return i;
    what++;
  }
  return -1;
}

static int checkmonth(const char *check, size_t len)
{
  int i;
  const char * const *what = &Curl_month[0];
  if(len != 3)
    return -1; /* not a month */

  for(i = 0; i<12; i++) {
    if(strncasecompare(check, what[0], 3))
      return i;
    what++;
  }
  return -1; /* return the offset or -1, no real offset is -1 */
}

/* return the time zone offset between GMT and the input one, in number
   of seconds or -1 if the timezone wasn't found/legal */

static int checktz(const char *check, size_t len)
{
  unsigned int i;
  const struct tzinfo *what = tz;
  if(len > 4) /* longer than any valid timezone */
    return -1;

  for(i = 0; i< sizeof(tz)/sizeof(tz[0]); i++) {
    size_t ilen = strlen(what->name);
    if((ilen == len) &&
       strncasecompare(check, what->name, len))
      return what->offset*60;
    what++;
  }
  return -1;
}

static void skip(const char **date)
{
  /* skip everything that aren't letters or digits */
  while(**date && !ISALNUM(**date))
    (*date)++;
}

enum assume {
  DATE_MDAY,
  DATE_YEAR,
  DATE_TIME
};

/*
 * time2epoch: time stamp to seconds since epoch in GMT time zone.  Similar to
 * mktime but for GMT only.
 */
static time_t time2epoch(int sec, int min, int hour,
                         int mday, int mon, int year)
{
  static const int month_days_cumulative [12] =
    { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 };
  int leap_days = year - (mon <= 1);
  leap_days = ((leap_days / 4) - (leap_days / 100) + (leap_days / 400)
               - (1969 / 4) + (1969 / 100) - (1969 / 400));
  return ((((time_t) (year - 1970) * 365
            + leap_days + month_days_cumulative[mon] + mday - 1) * 24
           + hour) * 60 + min) * 60 + sec;
}

/* Returns the value of a single-digit or two-digit decimal number, return
   then pointer to after the number. The 'date' pointer is known to point to a
   digit. */
static int oneortwodigit(const char *date, const char **endp)
{
  int num = date[0] - '0';
  if(ISDIGIT(date[1])) {
    *endp = &date[2];
    return num*10 + (date[1] - '0');
  }
  *endp = &date[1];
  return num;
}


/* HH:MM:SS or HH:MM and accept single-digits too */
static bool match_time(const char *date,
                       int *h, int *m, int *s, char **endp)
{
  const char *p;
  int hh, mm, ss = 0;
  hh = oneortwodigit(date, &p);
  if((hh < 24) && (*p == ':') && ISDIGIT(p[1])) {
    mm = oneortwodigit(&p[1], &p);
    if(mm < 60) {
      if((*p == ':') && ISDIGIT(p[1])) {
        ss = oneortwodigit(&p[1], &p);
        if(ss <= 60) {
          /* valid HH:MM:SS */
          goto match;
        }
      }
      else {
        /* valid HH:MM */
        goto match;
      }
    }
  }
  return FALSE; /* not a time string */
  match:
  *h = hh;
  *m = mm;
  *s = ss;
  *endp = (char *)p;
  return TRUE;
}

/*
 * parsedate()
 *
 * Returns:
 *
 * PARSEDATE_OK     - a fine conversion
 * PARSEDATE_FAIL   - failed to convert
 * PARSEDATE_LATER  - time overflow at the far end of time_t
 * PARSEDATE_SOONER - time underflow at the low end of time_t
 */

/* Wednesday is the longest name this parser knows about */
#define NAME_LEN 12

static int parsedate(const char *date, time_t *output)
{
  time_t t = 0;
  int wdaynum = -1;  /* day of the week number, 0-6 (mon-sun) */
  int monnum = -1;   /* month of the year number, 0-11 */
  int mdaynum = -1; /* day of month, 1 - 31 */
  int hournum = -1;
  int minnum = -1;
  int secnum = -1;
  int yearnum = -1;
  int tzoff = -1;
  enum assume dignext = DATE_MDAY;
  const char *indate = date; /* save the original pointer */
  int part = 0; /* max 6 parts */

  while(*date && (part < 6)) {
    bool found = FALSE;

    skip(&date);

    if(ISALPHA(*date)) {
      /* a name coming up */
      size_t len = 0;
      const char *p = date;
      while(ISALPHA(*p) && (len < NAME_LEN)) {
        p++;
        len++;
      }

      if(len != NAME_LEN) {
        if(wdaynum == -1) {
          wdaynum = checkday(date, len);
          if(wdaynum != -1)
            found = TRUE;
        }
        if(!found && (monnum == -1)) {
          monnum = checkmonth(date, len);
          if(monnum != -1)
            found = TRUE;
        }

        if(!found && (tzoff == -1)) {
          /* this just must be a time zone string */
          tzoff = checktz(date, len);
          if(tzoff != -1)
            found = TRUE;
        }
      }
      if(!found)
        return PARSEDATE_FAIL; /* bad string */

      date += len;
    }
    else if(ISDIGIT(*date)) {
      /* a digit */
      int val;
      char *end;
      if((secnum == -1) &&
         match_time(date, &hournum, &minnum, &secnum, &end)) {
        /* time stamp */
        date = end;
      }
      else {
        long lval;
        int error;
        int old_errno;

        old_errno = errno;
        errno = 0;
        lval = strtol(date, &end, 10);
        error = errno;
        if(errno != old_errno)
          errno = old_errno;

        if(error)
          return PARSEDATE_FAIL;

#if LONG_MAX != INT_MAX
        if((lval > (long)INT_MAX) || (lval < (long)INT_MIN))
          return PARSEDATE_FAIL;
#endif

        val = curlx_sltosi(lval);

        if((tzoff == -1) &&
           ((end - date) == 4) &&
           (val <= 1400) &&
           (indate< date) &&
           ((date[-1] == '+' || date[-1] == '-'))) {
          /* four digits and a value less than or equal to 1400 (to take into
             account all sorts of funny time zone diffs) and it is preceded
             with a plus or minus. This is a time zone indication.  1400 is
             picked since +1300 is frequently used and +1400 is mentioned as
             an edge number in the document "ISO C 200X Proposal: Timezone
             Functions" at http://david.tribble.com/text/c0xtimezone.html If
             anyone has a more authoritative source for the exact maximum time
             zone offsets, please speak up! */
          found = TRUE;
          tzoff = (val/100 * 60 + val%100)*60;

          /* the + and - prefix indicates the local time compared to GMT,
             this we need their reversed math to get what we want */
          tzoff = date[-1]=='+'?-tzoff:tzoff;
        }

        if(((end - date) == 8) &&
           (yearnum == -1) &&
           (monnum == -1) &&
           (mdaynum == -1)) {
          /* 8 digits, no year, month or day yet. This is YYYYMMDD */
          found = TRUE;
          yearnum = val/10000;
          monnum = (val%10000)/100-1; /* month is 0 - 11 */
          mdaynum = val%100;
        }

        if(!found && (dignext == DATE_MDAY) && (mdaynum == -1)) {
          if((val > 0) && (val<32)) {
            mdaynum = val;
            found = TRUE;
          }
          dignext = DATE_YEAR;
        }

        if(!found && (dignext == DATE_YEAR) && (yearnum == -1)) {
          yearnum = val;
          found = TRUE;
          if(yearnum < 100) {
            if(yearnum > 70)
              yearnum += 1900;
            else
              yearnum += 2000;
          }
          if(mdaynum == -1)
            dignext = DATE_MDAY;
        }

        if(!found)
          return PARSEDATE_FAIL;

        date = end;
      }
    }

    part++;
  }

  if(-1 == secnum)
    secnum = minnum = hournum = 0; /* no time, make it zero */

  if((-1 == mdaynum) ||
     (-1 == monnum) ||
     (-1 == yearnum))
    /* lacks vital info, fail */
    return PARSEDATE_FAIL;

#ifdef HAVE_TIME_T_UNSIGNED
  if(yearnum < 1970) {
    /* only positive numbers cannot return earlier */
    *output = TIME_T_MIN;
    return PARSEDATE_SOONER;
  }
#endif

#if (SIZEOF_TIME_T < 5)

#ifdef HAVE_TIME_T_UNSIGNED
  /* an unsigned 32 bit time_t can only hold dates to 2106 */
  if(yearnum > 2105) {
    *output = TIME_T_MAX;
    return PARSEDATE_LATER;
  }
#else
  /* a signed 32 bit time_t can only hold dates to the beginning of 2038 */
  if(yearnum > 2037) {
    *output = TIME_T_MAX;
    return PARSEDATE_LATER;
  }
  if(yearnum < 1903) {
    *output = TIME_T_MIN;
    return PARSEDATE_SOONER;
  }
#endif

#else
  /* The Gregorian calendar was introduced 1582 */
  if(yearnum < 1583)
    return PARSEDATE_FAIL;
#endif

  if((mdaynum > 31) || (monnum > 11) ||
     (hournum > 23) || (minnum > 59) || (secnum > 60))
    return PARSEDATE_FAIL; /* clearly an illegal date */

  /* time2epoch() returns a time_t. time_t is often 32 bits, sometimes even on
     architectures that feature 64 bit 'long' but ultimately time_t is the
     correct data type to use.
  */
  t = time2epoch(secnum, minnum, hournum, mdaynum, monnum, yearnum);

  /* Add the time zone diff between local time zone and GMT. */
  if(tzoff == -1)
    tzoff = 0;

  if((tzoff > 0) && (t > TIME_T_MAX - tzoff)) {
    *output = TIME_T_MAX;
    return PARSEDATE_LATER; /* time_t overflow */
  }

  t += tzoff;

  *output = t;

  return PARSEDATE_OK;
}
#else
/* disabled */
static int parsedate(const char *date, time_t *output)
{
  (void)date;
  *output = 0;
  return PARSEDATE_OK; /* a lie */
}
#endif

time_t curl_getdate(const char *p, const time_t *now)
{
  time_t parsed = -1;
  int rc = parsedate(p, &parsed);
  (void)now; /* legacy argument from the past that we ignore */

  if(rc == PARSEDATE_OK) {
    if(parsed == -1)
      /* avoid returning -1 for a working scenario */
      parsed++;
    return parsed;
  }
  /* everything else is fail */
  return -1;
}

/* Curl_getdate_capped() differs from curl_getdate() in that this will return
   TIME_T_MAX in case the parsed time value was too big, instead of an
   error. */

time_t Curl_getdate_capped(const char *p)
{
  time_t parsed = -1;
  int rc = parsedate(p, &parsed);

  switch(rc) {
  case PARSEDATE_OK:
    if(parsed == -1)
      /* avoid returning -1 for a working scenario */
      parsed++;
    return parsed;
  case PARSEDATE_LATER:
    /* this returns the maximum time value */
    return parsed;
  default:
    return -1; /* everything else is fail */
  }
  /* UNREACHABLE */
}

/*
 * Curl_gmtime() is a gmtime() replacement for portability. Do not use the
 * gmtime_r() or gmtime() functions anywhere else but here.
 *
 */

CURLcode Curl_gmtime(time_t intime, struct tm *store)
{
  const struct tm *tm;
#ifdef HAVE_GMTIME_R
  /* thread-safe version */
  tm = (struct tm *)gmtime_r(&intime, store);
#else
  /* !checksrc! disable BANNEDFUNC 1 */
  tm = gmtime(&intime);
  if(tm)
    *store = *tm; /* copy the pointed struct to the local copy */
#endif

  if(!tm)
    return CURLE_BAD_FUNCTION_ARGUMENT;
  return CURLE_OK;
}