diff options
author | SADAHIRO Tomoyuki <BQW10602@nifty.com> | 2004-01-16 13:13:00 +0900 |
---|---|---|
committer | Nicholas Clark <nick@ccl4.org> | 2004-01-23 13:24:41 +0000 |
commit | a6aa349da2cd706a05b205fa788c278b74c24bdc (patch) | |
tree | b1aa3db7f18a14a566d6b7b7e3df4d47418cd027 /doop.c | |
parent | e84ac4e2e047fe0bbb7415313afdde3e76eafca7 (diff) | |
download | perl-a6aa349da2cd706a05b205fa788c278b74c24bdc.tar.gz |
Re: [perl #24888] chomp ignores utf8
Message-Id: <20040116040355.A849.BQW10602@nifty.com>
Date: Fri, 16 Jan 2004 04:13:00 +0900
p4raw-id: //depot/perl@22196
Diffstat (limited to 'doop.c')
-rw-r--r-- | doop.c | 37 |
1 files changed, 34 insertions, 3 deletions
@@ -1009,6 +1009,7 @@ Perl_do_chomp(pTHX_ register SV *sv) STRLEN n_a; char *s; char *temp_buffer = NULL; + SV* svrecode = Nullsv; if (RsSNARF(PL_rs)) return 0; @@ -1044,6 +1045,18 @@ Perl_do_chomp(pTHX_ register SV *sv) if (SvREADONLY(sv)) Perl_croak(aTHX_ PL_no_modify); } + + if (PL_encoding) { + if (!SvUTF8(sv)) { + /* XXX, here sv is utf8-ized as a side-effect! + If encoding.pm is used properly, almost string-generating + operations, including literal strings, chr(), input data, etc. + should have been utf8-ized already, right? + */ + sv_recode_to_utf8(sv, PL_encoding); + } + } + s = SvPV(sv, len); if (s && len) { s += --len; @@ -1058,8 +1071,13 @@ Perl_do_chomp(pTHX_ register SV *sv) } } else { - STRLEN rslen; + STRLEN rslen, rs_charlen; char *rsptr = SvPV(PL_rs, rslen); + + rs_charlen = SvUTF8(PL_rs) + ? sv_len_utf8(PL_rs) + : rslen; + if (SvUTF8(PL_rs) != SvUTF8(sv)) { /* Assumption is that rs is shorter than the scalar. */ if (SvUTF8(PL_rs)) { @@ -1075,7 +1093,16 @@ Perl_do_chomp(pTHX_ register SV *sv) goto nope; } rsptr = temp_buffer; - } else { + } + else if (PL_encoding) { + /* RS is 8 bit, encoding.pm is used. + * Do not recode PL_rs as a side-effect. */ + svrecode = newSVpvn(rsptr, rslen); + sv_recode_to_utf8(svrecode, PL_encoding); + rsptr = SvPV(svrecode, rslen); + rs_charlen = sv_len_utf8(svrecode); + } + else { /* RS is 8 bit, scalar is utf8. */ temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen); rsptr = temp_buffer; @@ -1093,7 +1120,7 @@ Perl_do_chomp(pTHX_ register SV *sv) s -= rslen - 1; if (memNE(s, rsptr, rslen)) goto nope; - count += rslen; + count += rs_charlen; } } s = SvPV_force(sv, n_a); @@ -1103,6 +1130,10 @@ Perl_do_chomp(pTHX_ register SV *sv) SvSETMAGIC(sv); } nope: + + if (svrecode) + SvREFCNT_dec(svrecode); + Safefree(temp_buffer); return count; } |