diff options
author | Ævar Arnfjörð Bjarmason <avar@cpan.org> | 2012-02-11 22:31:03 +0000 |
---|---|---|
committer | Ævar Arnfjörð Bjarmason <avar@cpan.org> | 2012-02-11 22:49:05 +0000 |
commit | 0e582130ad8fc3afc6514f60b7a513c550379b7d (patch) | |
tree | c1c629113ce462fad2f0f9da0b41753fa0eb6c90 /ext/POSIX | |
parent | a748fe11f70695552294fe4e31343b2dacb59db2 (diff) | |
parent | 423a1dfc8c367cb58e7dcef73a81b4ec7a8b8810 (diff) | |
download | perl-0e582130ad8fc3afc6514f60b7a513c550379b7d.tar.gz |
Merge branch 'avar/POSIX-strptime' into blead
Merge my rebased version of Paul "LeoNerd" Evans's branch to blead
after I'd cherry-picked the unrelated a748fe1 commit out of it.
This may or may not be the perfect implementation of strptime, but it
seems to work well enough for me, the bugs that have been raised
against it have been addressed, and it's going to work a hell of a lot
better than not having any strptime support at all.
Diffstat (limited to 'ext/POSIX')
-rw-r--r-- | ext/POSIX/POSIX.xs | 147 | ||||
-rw-r--r-- | ext/POSIX/lib/POSIX.pm | 18 | ||||
-rw-r--r-- | ext/POSIX/lib/POSIX.pod | 59 | ||||
-rw-r--r-- | ext/POSIX/t/export.t | 4 | ||||
-rw-r--r-- | ext/POSIX/t/time.t | 82 |
5 files changed, 302 insertions, 8 deletions
diff --git a/ext/POSIX/POSIX.xs b/ext/POSIX/POSIX.xs index 7e30a82839..89d543e5cb 100644 --- a/ext/POSIX/POSIX.xs +++ b/ext/POSIX/POSIX.xs @@ -13,6 +13,9 @@ #define PERL_NO_GET_CONTEXT +/* Solaris needs this in order not to zero out all the untouched fields in strptime() */ +#define _STRPTIME_DONTZERO + #include "EXTERN.h" #define PERLIO_NOT_STDIO 1 #include "perl.h" @@ -1842,6 +1845,150 @@ strftime(fmt, sec, min, hour, mday, mon, year, wday = -1, yday = -1, isdst = -1) } void +strptime(str, fmt, sec=-1, min=-1, hour=-1, mday=-1, mon=-1, year=-1, wday=-1, yday=-1, isdst=-1) + SV * str + SV * fmt + int sec + int min + int hour + int mday + int mon + int year + int wday + int yday + int isdst + PPCODE: + { + const char *str_c; + int returning_pos = 0; /* true if caller wants us to set pos() marker on str */ + SV *orig_str = NULL; /* caller's original SV* if we have had to regrade it */ + const U8 *orig_bytes; /* SvPV of orig_str */ + MAGIC *posmg = NULL; + STRLEN str_offset = 0; + struct tm tm; + char *remains; + + init_tm(&tm); /* XXX workaround - see init_tm() in core util.c */ + tm.tm_sec = sec; + tm.tm_min = min; + tm.tm_hour = hour; + tm.tm_mday = mday; + tm.tm_mon = mon; + tm.tm_year = year; + tm.tm_wday = wday; + tm.tm_yday = yday; + tm.tm_isdst = isdst; + + if(SvROK(str) && !SvOBJECT(SvRV(str))) { + SV *ref = SvRV(str); + + if(SvTYPE(ref) > SVt_PVMG || SvREADONLY(ref)) + croak("str is not a reference to a mutable scalar"); + + str = ref; + returning_pos = 1; + + if(SvTYPE(str) >= SVt_PVMG && SvMAGIC(str)) + posmg = mg_find(str, PERL_MAGIC_regex_global); + + if(posmg) + str_offset = posmg->mg_len; + } + else if(SvROK(str) && SvTYPE(SvRV(str)) == SVt_REGEXP) { + croak("str is not a reference to a mutable scalar"); + } + + /* If fmt and str differ in UTF-8ness then take a temporary copy + * of and regrade it to match fmt, taking care to update the + * offset in both cases. */ + if(!SvUTF8(str) && SvUTF8(fmt)) { + orig_str = str; + str = sv_mortalcopy(str); + sv_utf8_upgrade_nomg(str); + + str_c = SvPV_nolen(str); + + if(str_offset) { + str_offset = utf8_hop(str_c, str_offset) - (U8*)str_c; + } + } + else if(SvUTF8(str) && !SvUTF8(fmt)) { + orig_str = str; + str = sv_mortalcopy(str); + /* If downgrade fails then str must have contained characters + * that could not possibly be matched by fmt */ + if(!sv_utf8_downgrade(str, 1)) + XSRETURN(0); + + str_c = SvPV_nolen(str); + + if(str_offset) { + orig_bytes = SvPV_nolen(orig_str); + str_offset = utf8_distance(orig_bytes + str_offset, orig_bytes); + } + } + else { + /* else it doesn't matter if both or neither are, because they'll match */ + str_c = SvPV_nolen(str); + } + + remains = strptime(str_c + str_offset, SvPV_nolen(fmt), &tm); + + if(!remains) + /* failed parse */ + XSRETURN(0); + if(remains[0] && !returning_pos) + /* leftovers - without ref we can't signal this so this is a failure */ + XSRETURN(0); + + if(returning_pos) { + if(orig_str) { + if(SvUTF8(str)) + /* str is a UTF-8 upgraded copy of the original non-UTF-8 + * string the caller referred us to in orig_str */ + str_offset = utf8_distance(remains, str_c); + else + str_offset = utf8_hop(orig_bytes, remains - str_c) - orig_bytes; + + str = orig_str; + } + else { + str_offset = remains - str_c; + } + if(!posmg) + posmg = sv_magicext(str, NULL, PERL_MAGIC_regex_global, + &PL_vtbl_mglob, NULL, 0); + posmg->mg_len = str_offset; + } + + if(tm.tm_mday > -1 && tm.tm_mon > -1 && tm.tm_year > -1) { + /* if we leave sec/min/hour == -1, then these will be + * normalised to the previous day */ + int was_sec = tm.tm_sec; tm.tm_sec = 0; + int was_min = tm.tm_min; tm.tm_min = 0; + int was_hour = tm.tm_hour; tm.tm_hour = 0; + + if(mktime(&tm) == (time_t)-1) + XSRETURN(0); + + tm.tm_sec = was_sec; + tm.tm_min = was_min; + tm.tm_hour = was_hour; + } + + EXTEND(SP, 9); + PUSHs(tm.tm_sec != -1 ? sv_2mortal(newSViv(tm.tm_sec)) : &PL_sv_undef); + PUSHs(tm.tm_min != -1 ? sv_2mortal(newSViv(tm.tm_min)) : &PL_sv_undef); + PUSHs(tm.tm_hour != -1 ? sv_2mortal(newSViv(tm.tm_hour)) : &PL_sv_undef); + PUSHs(tm.tm_mday != -1 ? sv_2mortal(newSViv(tm.tm_mday)) : &PL_sv_undef); + PUSHs(tm.tm_mon != -1 ? sv_2mortal(newSViv(tm.tm_mon)) : &PL_sv_undef); + PUSHs(tm.tm_year != -1 ? sv_2mortal(newSViv(tm.tm_year)) : &PL_sv_undef); + PUSHs(tm.tm_wday != -1 ? sv_2mortal(newSViv(tm.tm_wday)) : &PL_sv_undef); + PUSHs(tm.tm_yday != -1 ? sv_2mortal(newSViv(tm.tm_yday)) : &PL_sv_undef); + PUSHs(tm.tm_isdst!= -1 ? sv_2mortal(newSViv(tm.tm_isdst)): &PL_sv_undef); + } + +void tzset() PPCODE: my_tzset(aTHX); diff --git a/ext/POSIX/lib/POSIX.pm b/ext/POSIX/lib/POSIX.pm index ec5c076294..e1ba950689 100644 --- a/ext/POSIX/lib/POSIX.pm +++ b/ext/POSIX/lib/POSIX.pm @@ -4,7 +4,7 @@ use warnings; our ($AUTOLOAD, %SIGRT); -our $VERSION = '1.28'; +our $VERSION = '1.29'; require XSLoader; @@ -360,7 +360,7 @@ our %EXPORT_TAGS = ( tcflow tcflush tcgetattr tcsendbreak tcsetattr )], time_h => [qw(CLK_TCK CLOCKS_PER_SEC NULL asctime clock ctime - difftime mktime strftime tzset tzname)], + difftime mktime strftime strptime tzset tzname)], unistd_h => [qw(F_OK NULL R_OK SEEK_CUR SEEK_END SEEK_SET STDERR_FILENO STDIN_FILENO STDOUT_FILENO W_OK X_OK @@ -386,13 +386,21 @@ our %EXPORT_TAGS = ( # De-duplicate the export list: my %export; @export{map {@$_} values %EXPORT_TAGS} = (); - # Doing the de-dup with a temporary hash has the advantage that the SVs in - # @EXPORT are actually shared hash key scalars, which will save some memory. - our @EXPORT = keys %export; our @EXPORT_OK = (qw(close lchown nice open pipe read sleep times write printf sprintf), grep {!exists $export{$_}} keys %reimpl, keys %replacement); + + # Symbols that should not be exported by default because they are recently + # added. It would upset too much of CPAN to export these by default + foreach (qw(strptime)) { + delete $export{$_}; + push @EXPORT_OK, $_; + } + + # Doing the de-dup with a temporary hash has the advantage that the SVs in + # @EXPORT are actually shared hash key scalars, which will save some memory. + our @EXPORT = keys %export; } require Exporter; diff --git a/ext/POSIX/lib/POSIX.pod b/ext/POSIX/lib/POSIX.pod index f935ae0574..b24df0ab85 100644 --- a/ext/POSIX/lib/POSIX.pod +++ b/ext/POSIX/lib/POSIX.pod @@ -1349,6 +1349,65 @@ strncpy() is C-specific, use C<=> instead, see L<perlop>. strpbrk() is C-specific, use regular expressions instead, see L<perlre>. +=item strptime + +Parse date and time information from a string. Returns a 9-element list of +time and date information. + +Synopsis: + + (sec, min, hour, mday, mon, year, wday, yday, isdst) = + strptime(str, fmt, [@init]) + +Optionally, an existing 9-element list of time and date informaiton may be +passed to initialise the structure before parsing. Any fields not parsed by +the format will be left as initialised. + +The month (C<mon>), weekday (C<wday>), and yearday (C<yday>) begin at zero. +I.e. January is 0, not 1; Sunday is 0, not 1; January 1st is 0, not 1. The +year (C<year>) is given in years since 1900. I.e., the year 1995 is 95; the +year 2001 is 101. Consult your system's C<strftime()> manpage for details +about these and the other arguments. + +If you want your code to be portable, your format (C<fmt>) argument +should use only the conversion specifiers defined by the ANSI C +standard (C89, to play safe). These are C<aAbBcdHIjmMpSUwWxXyYZ%>. +But even then, the results of some of the conversion specifiers are +non-portable. For example, the specifiers C<aAbBcpZ> change according +to the locale settings of the user, and both how to set locales (the +locale names) and what output to expect are non-standard. +The specifier C<c> changes according to the timezone settings of the +user and the timezone computation rules of the operating system. +The C<Z> specifier is notoriously unportable since the names of +timezones are non-standard. Sticking to the numeric specifiers is the +safest route. + +The return values are made consistent as though by calling C<mktime()> +before they are returned, if all of the C<mday>, C<mon> and C<year> fields +are valid. + +The string for Tuesday, December 12, 1995. + + @time = POSIX::strptime( "Tuesday, December 12, 1995", + "%A, %B %d, %Y", 0, 0, 0 ); + + local $, = ", "; + print @time, "\n"; + +If the input string is not valid, or not consumed completely by the format, +then an error occurs; indicated by C<strptime()> returning an empty list. + +By passing a reference to a string as the value to parse, C<strptime()> will +use the C<pos()> position to start the parse, and to return the position where +it finished. In this situation, it is not an error if the entire input is not +consumed by the format. + + $str = "18:05:29 is the time"; + @time = POSIX::strptime( \$str, "%H:%M:%S" ); + local $, = ", "; + print @time[0..2], "\n"; + print pos($str) . "\n"; + =item strrchr strrchr() is C-specific, see L<perlfunc/rindex> instead. diff --git a/ext/POSIX/t/export.t b/ext/POSIX/t/export.t index 07d428eb1a..0753178f63 100644 --- a/ext/POSIX/t/export.t +++ b/ext/POSIX/t/export.t @@ -102,8 +102,8 @@ my %expect = ( getpgrp getppid getpwnam getpwuid gmtime kill lchown link localtime log mkdir nice open opendir pipe printf rand read readdir rename rewinddir rmdir sin sleep sprintf sqrt - srand stat system time times umask unlink utime wait - waitpid write)], + srand stat strptime system time times umask unlink utime + wait waitpid write)], ); plan (tests => 2 * keys %expect); diff --git a/ext/POSIX/t/time.t b/ext/POSIX/t/time.t index 90b54caa47..f6954b3695 100644 --- a/ext/POSIX/t/time.t +++ b/ext/POSIX/t/time.t @@ -4,7 +4,7 @@ use strict; use Config; use POSIX; -use Test::More tests => 19; +use Test::More tests => 41; # go to UTC to avoid DST issues around the world when testing. SUS3 says that # null should get you UTC, but some environments want the explicit names. @@ -68,6 +68,86 @@ is(ord strftime($ss, POSIX::localtime(time)), 223, 'Format string has correct character'); unlike($ss, qr/\w/, 'Still not internally UTF-8 encoded'); +my @time = POSIX::strptime("2011-12-18 12:34:56", "%Y-%m-%d %H:%M:%S"); +is_deeply(\@time, [56, 34, 12, 18, 12-1, 2011-1900, 0, 351, 0], 'strptime() all 6 fields'); + +@time = POSIX::strptime("2011-12-18", "%Y-%m-%d", 1, 23, 4); +is_deeply(\@time, [1, 23, 4, 18, 12-1, 2011-1900, 0, 351, 0], 'strptime() all date fields with passed time'); + +@time = POSIX::strptime("2011-12-18", "%Y-%m-%d"); +is_deeply(\@time, [undef, undef, undef, 18, 12-1, 2011-1900, 0, 351, 0], 'strptime() all date fields with no time'); + +# tm_year == 6 => 1906, which is a negative time_t. Lets use 106 as 2006 instead +@time = POSIX::strptime("12:34:56", "%H:%M:%S", 1, 2, 3, 4, 5, 106); +is_deeply(\@time, [56, 34, 12, 4, 5, 106, 0, 154, 1], 'strptime() all time fields with passed date'); + +@time = POSIX::strptime("July 4", "%b %d"); +is_deeply([@time[3,4]], [4, 7-1], 'strptime() partial yields correct mday/mon'); + +@time = POSIX::strptime("Foobar", "%H:%M:%S"); +is(scalar @time, 0, 'strptime() invalid input yields empty list'); + +my $str; +@time = POSIX::strptime(\($str = "01:02:03"), "%H:%M:%S", -1,-1,-1, 1,0,70); +is_deeply(\@time, [3, 2, 1, 1, 0, 70, 4, 0, 0], 'strptime() parses SCALAR ref'); +is(pos($str), 8, 'strptime() sets pos() magic on SCALAR ref'); + +$str = "Text with 2012-12-01 datestamp"; +pos($str) = 10; +@time = POSIX::strptime(\$str, "%Y-%m-%d", 0, 0, 0); +is_deeply(\@time, [0, 0, 0, 1, 12-1, 2012-1900, 6, 335, 0], 'strptime() starts SCALAR ref at pos()'); +is(pos($str), 20, 'strptime() updates pos() magic on SCALAR ref'); + +{ + # Latin-1 vs. UTF-8 strings + my $date = "2012\x{e9}02\x{e9}01"; + utf8::upgrade my $date_U = $date; + my $fmt = "%Y\x{e9}%m\x{e9}%d"; + utf8::upgrade my $fmt_U = $fmt; + + my @want = (undef, undef, undef, 1, 2-1, 2012-1900, 3, 31, 0); + + is_deeply([POSIX::strptime($date_U, $fmt )], \@want, 'strptime() UTF-8 date, legacy fmt'); + is_deeply([POSIX::strptime($date, $fmt_U)], \@want, 'strptime() legacy date, UTF-8 fmt'); + is_deeply([POSIX::strptime($date_U, $fmt_U)], \@want, 'strptime() UTF-8 date, UTF-8 fmt'); + + my $str = "\x{ea} $date \x{ea}"; + pos($str) = 2; + + is_deeply([POSIX::strptime(\$str, $fmt_U)], \@want, 'strptime() legacy data SCALAR ref, UTF-8 fmt'); + is(pos($str), 12, 'pos() of legacy data SCALAR after strptime() UTF-8 fmt'); + + utf8::upgrade my $str_U = $str; + pos($str_U) = 2; + + is_deeply([POSIX::strptime(\$str_U, $fmt)], \@want, 'strptime() UTF-8 data SCALAR ref, legacy fmt'); + is(pos($str_U), 12, 'pos() of UTF-8 data SCALAR after strptime() legacy fmt'); + + # High (>U+FF) strings + my $date_UU = "2012\x{1234}02\x{1234}01"; + my $fmt_UU = "%Y\x{1234}%m\x{1234}%d"; + + is_deeply([POSIX::strptime($date_UU, $fmt_UU)], \@want, 'strptime() on non-Latin-1 Unicode'); +} + +eval { POSIX::strptime({}, "format") }; +like($@, qr/not a reference to a mutable scalar/, 'strptime() dies on HASH ref'); + +eval { POSIX::strptime(\"boo", "format") }; +like($@, qr/not a reference to a mutable scalar/, 'strptime() dies on const literal ref'); + +eval { POSIX::strptime(qr/boo!/, "format") }; +like($@, qr/not a reference to a mutable scalar/, 'strptime() dies on Regexp'); + +$str = bless [], "WithStringOverload"; +{ + package WithStringOverload; + use overload '""' => sub { return "2012-02-01" }; +} + +@time = POSIX::strptime($str, "%Y-%m-%d", 0, 0, 0); +is_deeply(\@time, [0, 0, 0, 1, 2-1, 2012-1900, 3, 31, 0], 'strptime() allows object with string overload'); + setlocale(LC_TIME, $orig_loc) || die "Cannot setlocale() back to orig: $!"; # clock() seems to have different definitions of what it does between POSIX |