summaryrefslogtreecommitdiff
path: root/perl
diff options
context:
space:
mode:
authorFuji, Goro <gfuji@cpan.org>2010-10-05 17:10:10 +0900
committerFuji, Goro <gfuji@cpan.org>2010-10-05 17:10:10 +0900
commita4a04872a3e7d331c722938a719711cb3178a5c7 (patch)
tree2b8079f1f9b78aeaa1700bb12c1a67886606f535 /perl
parentf2d13cd6476a0d09525749803461743985ca766d (diff)
downloadmsgpack-python-a4a04872a3e7d331c722938a719711cb3178a5c7.tar.gz
perl: add $unpacker->utf8 mode, decoding strings as UTF-8.
Diffstat (limited to 'perl')
-rw-r--r--perl/lib/Data/MessagePack/PP.pm21
-rw-r--r--perl/lib/Data/MessagePack/Unpacker.pod11
-rw-r--r--perl/t/15_utf8.t27
-rw-r--r--perl/xs-src/MessagePack.c4
-rw-r--r--perl/xs-src/unpack.c28
5 files changed, 86 insertions, 5 deletions
diff --git a/perl/lib/Data/MessagePack/PP.pm b/perl/lib/Data/MessagePack/PP.pm
index 5dccc0b..00e58b9 100644
--- a/perl/lib/Data/MessagePack/PP.pm
+++ b/perl/lib/Data/MessagePack/PP.pm
@@ -248,6 +248,7 @@ sub _pack {
# UNPACK
#
+our $_utf8 = 0;
my $p; # position variables for speed.
sub unpack :method {
@@ -358,7 +359,9 @@ sub _unpack {
$num = $byte & ~0xa0;
$p += $num;
}
- return substr( $value, $p - $num, $num );
+ my $s = substr( $value, $p - $num, $num );
+ utf8::decode($s) if $_utf8;
+ return $s;
}
elsif ( $byte == 0xc0 ) { # nil
@@ -396,9 +399,19 @@ package
Data::MessagePack::PP::Unpacker;
sub new {
- bless { pos => 0 }, shift;
+ bless { pos => 0, utf8 => 0 }, shift;
}
+sub utf8 {
+ my $self = shift;
+ $self->{utf8} = (@_ ? shift : 1);
+ return $self;
+}
+
+sub get_utf8 {
+ my($self) = @_;
+ return $self->{utf8};
+}
sub execute_limit {
execute( @_ );
@@ -540,7 +553,9 @@ sub _count {
sub data {
- return Data::MessagePack->unpack( substr($_[0]->{ data }, 0, $_[0]->{pos}) );
+ my($self) = @_;
+ local $Data::MessagePack::PP::_utf8 = $self->{utf8};
+ return Data::MessagePack->unpack( substr($self->{ data }, 0, $self->{pos}) );
}
diff --git a/perl/lib/Data/MessagePack/Unpacker.pod b/perl/lib/Data/MessagePack/Unpacker.pod
index 2bc4549..37ab3db 100644
--- a/perl/lib/Data/MessagePack/Unpacker.pod
+++ b/perl/lib/Data/MessagePack/Unpacker.pod
@@ -24,6 +24,17 @@ This is a streaming deserializer for messagepack.
creates a new instance of stream deserializer.
+=item $up->utf8([$bool])
+
+sets utf8 mode. true if I<$bool> is omitted.
+returns I<$up> itself.
+
+If utf8 mode is enabled, strings will be decoded as UTF-8.
+
+=item my $ret = $up->get_utf8()
+
+returns the utf8 mode flag of I<$up>.
+
=item my $ret = $up->execute($data, $offset);
=item my $ret = $up->execute_limit($data, $offset, $limit)
diff --git a/perl/t/15_utf8.t b/perl/t/15_utf8.t
new file mode 100644
index 0000000..d7d17b8
--- /dev/null
+++ b/perl/t/15_utf8.t
@@ -0,0 +1,27 @@
+#!perl -w
+use strict;
+use Test::More;
+use Data::MessagePack;
+use utf8;
+
+my $data = [42, undef, 'foo', "\x{99f1}\x{99dd}"];
+my $packed = Data::MessagePack->pack($data);
+
+my $u = Data::MessagePack::Unpacker->new()->utf8();
+ok $u->get_utf8();
+$u->execute($packed);
+my $d = $u->data();
+$u->reset();
+is_deeply $d, $data, 'decoded';
+
+is $u->utf8(0), $u, 'utf8(0)';
+ok !$u->get_utf8();
+$u->execute($packed);
+$d = $u->data();
+$u->reset();
+my $s = $data->[3];
+utf8::encode($s);
+is_deeply $d->[3], $s, 'not decoded';
+
+done_testing;
+
diff --git a/perl/xs-src/MessagePack.c b/perl/xs-src/MessagePack.c
index 69337f4..0c3c0b1 100644
--- a/perl/xs-src/MessagePack.c
+++ b/perl/xs-src/MessagePack.c
@@ -7,6 +7,8 @@
XS(xs_pack);
XS(xs_unpack);
XS(xs_unpacker_new);
+XS(xs_unpacker_utf8);
+XS(xs_unpacker_get_utf8);
XS(xs_unpacker_execute);
XS(xs_unpacker_execute_limit);
XS(xs_unpacker_is_finished);
@@ -28,6 +30,8 @@ XS(boot_Data__MessagePack) {
newXS("Data::MessagePack::unpack", xs_unpack, __FILE__);
newXS("Data::MessagePack::Unpacker::new", xs_unpacker_new, __FILE__);
+ newXS("Data::MessagePack::Unpacker::utf8", xs_unpacker_utf8, __FILE__);
+ newXS("Data::MessagePack::Unpacker::get_utf8", xs_unpacker_get_utf8, __FILE__);
newXS("Data::MessagePack::Unpacker::execute", xs_unpacker_execute, __FILE__);
newXS("Data::MessagePack::Unpacker::execute_limit", xs_unpacker_execute_limit, __FILE__);
newXS("Data::MessagePack::Unpacker::is_finished", xs_unpacker_is_finished, __FILE__);
diff --git a/perl/xs-src/unpack.c b/perl/xs-src/unpack.c
index 065573a..f39d8c1 100644
--- a/perl/xs-src/unpack.c
+++ b/perl/xs-src/unpack.c
@@ -13,6 +13,7 @@ START_MY_CXT
typedef struct {
bool finished;
bool incremented;
+ bool utf8;
} unpack_user;
#include "msgpack/unpack_define.h"
@@ -237,6 +238,9 @@ STATIC_INLINE int template_callback_raw(unpack_user* u PERL_UNUSED_DECL, const c
dTHX;
/* newSVpvn(p, l) returns an undef if p == NULL */
*o = ((l==0) ? newSVpvs("") : newSVpvn(p, l));
+ if(u->utf8) {
+ sv_utf8_decode(*o);
+ }
return 0;
}
@@ -276,7 +280,7 @@ XS(xs_unpack) {
msgpack_unpack_t mp;
template_init(&mp);
- unpack_user const u = {false, false};
+ unpack_user const u = {false, false, false};
mp.user = u;
size_t from = 0;
@@ -303,7 +307,7 @@ XS(xs_unpack) {
STATIC_INLINE void _reset(SV* const self) {
dTHX;
- unpack_user const u = {false, false};
+ unpack_user const u = {false, false, false};
UNPACKER(self, mp);
template_init(mp);
@@ -328,6 +332,26 @@ XS(xs_unpacker_new) {
XSRETURN(1);
}
+XS(xs_unpacker_utf8) {
+ dXSARGS;
+ if (!(items == 1 || items == 2)) {
+ Perl_croak(aTHX_ "Usage: $unpacker->utf8([$bool)");
+ }
+ UNPACKER(ST(0), mp);
+ mp->user.utf8 = (items == 1 || sv_true(ST(1))) ? true : false;
+ XSRETURN(1); // returns $self
+}
+
+XS(xs_unpacker_get_utf8) {
+ dXSARGS;
+ if (items != 1) {
+ Perl_croak(aTHX_ "Usage: $unpacker->get_utf8()");
+ }
+ UNPACKER(ST(0), mp);
+ ST(0) = boolSV(mp->user.utf8);
+ XSRETURN(1);
+}
+
STATIC_INLINE size_t
_execute_impl(SV* const self, SV* const data, UV const offset, UV const limit) {
dTHX;