summaryrefslogtreecommitdiff
path: root/ruby
diff options
context:
space:
mode:
authorfrsyuki <frsyuki@users.sourceforge.jp>2010-08-31 06:30:16 +0900
committerfrsyuki <frsyuki@users.sourceforge.jp>2010-08-31 06:30:16 +0900
commitb5c78de2ddf82783a6f80a199b68927d1a1747ca (patch)
treeaf0605ac128cf76442f81bb17599e1950c7676cd /ruby
parenta1bd14e516a0baef6f96b441da70e29e5be7d175 (diff)
downloadmsgpack-python-b5c78de2ddf82783a6f80a199b68927d1a1747ca.tar.gz
ruby: converts encodings into UTF-8 on Ruby 1.9
Diffstat (limited to 'ruby')
-rw-r--r--ruby/encoding.h33
-rw-r--r--ruby/pack.c21
-rw-r--r--ruby/rbinit.c15
-rw-r--r--ruby/test/test_encoding.rb68
-rw-r--r--ruby/test/test_helper.rb4
-rw-r--r--ruby/unpack.c38
6 files changed, 141 insertions, 38 deletions
diff --git a/ruby/encoding.h b/ruby/encoding.h
new file mode 100644
index 0000000..2ad3fd7
--- /dev/null
+++ b/ruby/encoding.h
@@ -0,0 +1,33 @@
+/*
+ * MessagePack for Ruby
+ *
+ * Copyright (C) 2008-2010 FURUHASHI Sadayuki
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef ENCODING_H__
+#define ENCODING_H__
+
+
+#ifdef HAVE_RUBY_ENCODING_H
+#include "ruby/encoding.h"
+#define MSGPACK_RUBY_ENCODING
+extern int s_enc_utf8;
+extern int s_enc_ascii8bit;
+extern int s_enc_usascii;
+extern VALUE s_enc_utf8_value;
+#endif
+
+
+#endif /* encoding.h */
+
diff --git a/ruby/pack.c b/ruby/pack.c
index bbeac4a..35878c7 100644
--- a/ruby/pack.c
+++ b/ruby/pack.c
@@ -16,6 +16,8 @@
* limitations under the License.
*/
#include "ruby.h"
+#include "encoding.h"
+
#include "msgpack/pack_define.h"
static ID s_to_msgpack;
@@ -131,7 +133,6 @@ static VALUE MessagePack_Fixnum_to_msgpack(int argc, VALUE *argv, VALUE self)
static VALUE MessagePack_Bignum_to_msgpack(int argc, VALUE *argv, VALUE self)
{
ARG_BUFFER(out, argc, argv);
- // FIXME bignum
if(RBIGNUM_SIGN(self)) { // positive
msgpack_pack_uint64(out, rb_big2ull(self));
} else { // negative
@@ -168,6 +169,14 @@ static VALUE MessagePack_Float_to_msgpack(int argc, VALUE *argv, VALUE self)
static VALUE MessagePack_String_to_msgpack(int argc, VALUE *argv, VALUE self)
{
ARG_BUFFER(out, argc, argv);
+#ifdef MSGPACK_RUBY_ENCODING
+ int enc = ENCODING_GET(self);
+ if(enc != s_enc_utf8 && enc != s_enc_ascii8bit && enc != s_enc_usascii) {
+ if(!ENC_CODERANGE_ASCIIONLY(self)) {
+ self = rb_str_encode(self, s_enc_utf8_value, 0, Qnil);
+ }
+ }
+#endif
msgpack_pack_raw(out, RSTRING_LEN(self));
msgpack_pack_raw_body(out, RSTRING_PTR(self), RSTRING_LEN(self));
return out;
@@ -184,12 +193,16 @@ static VALUE MessagePack_String_to_msgpack(int argc, VALUE *argv, VALUE self)
*/
static VALUE MessagePack_Symbol_to_msgpack(int argc, VALUE *argv, VALUE self)
{
+#ifdef MSGPACK_RUBY_ENCODING
+ return MessagePack_String_to_msgpack(argc, argv, rb_id2str(SYM2ID(self)));
+#else
ARG_BUFFER(out, argc, argv);
const char* name = rb_id2name(SYM2ID(self));
size_t len = strlen(name);
msgpack_pack_raw(out, len);
msgpack_pack_raw_body(out, name, len);
return out;
+#endif
}
@@ -205,7 +218,8 @@ static VALUE MessagePack_Symbol_to_msgpack(int argc, VALUE *argv, VALUE self)
static VALUE MessagePack_Array_to_msgpack(int argc, VALUE *argv, VALUE self)
{
ARG_BUFFER(out, argc, argv);
- msgpack_pack_array(out, RARRAY_LEN(self));
+ // FIXME check sizeof(long) > sizeof(unsigned int) && RARRAY_LEN(self) > UINT_MAX
+ msgpack_pack_array(out, (unsigned int)RARRAY_LEN(self));
VALUE* p = RARRAY_PTR(self);
VALUE* const pend = p + RARRAY_LEN(self);
for(;p != pend; ++p) {
@@ -239,7 +253,8 @@ static int MessagePack_Hash_to_msgpack_foreach(VALUE key, VALUE value, VALUE out
static VALUE MessagePack_Hash_to_msgpack(int argc, VALUE *argv, VALUE self)
{
ARG_BUFFER(out, argc, argv);
- msgpack_pack_map(out, RHASH_SIZE(self));
+ // FIXME check sizeof(st_index_t) > sizeof(unsigned int) && RARRAY_LEN(self) > UINT_MAX
+ msgpack_pack_map(out, (unsigned int)RHASH_SIZE(self));
rb_hash_foreach(self, MessagePack_Hash_to_msgpack_foreach, out);
return out;
}
diff --git a/ruby/rbinit.c b/ruby/rbinit.c
index 28a8bfe..4678159 100644
--- a/ruby/rbinit.c
+++ b/ruby/rbinit.c
@@ -17,9 +17,17 @@
*/
#include "pack.h"
#include "unpack.h"
+#include "encoding.h"
static VALUE mMessagePack;
+#ifdef MSGPACK_RUBY_ENCODING
+int s_enc_utf8;
+int s_enc_ascii8bit;
+int s_enc_usascii;
+VALUE s_enc_utf8_value;
+#endif
+
/**
* Document-module: MessagePack
*
@@ -46,6 +54,13 @@ void Init_msgpack(void)
rb_define_const(mMessagePack, "VERSION", rb_str_new2(MESSAGEPACK_VERSION));
+#ifdef MSGPACK_RUBY_ENCODING
+ s_enc_ascii8bit = rb_ascii8bit_encindex();
+ s_enc_utf8 = rb_utf8_encindex();
+ s_enc_usascii = rb_usascii_encindex();
+ s_enc_utf8_value = rb_enc_from_encoding(rb_utf8_encoding());
+#endif
+
Init_msgpack_unpack(mMessagePack);
Init_msgpack_pack(mMessagePack);
}
diff --git a/ruby/test/test_encoding.rb b/ruby/test/test_encoding.rb
new file mode 100644
index 0000000..2cf0767
--- /dev/null
+++ b/ruby/test/test_encoding.rb
@@ -0,0 +1,68 @@
+#!/usr/bin/env ruby
+require File.dirname(__FILE__)+'/test_helper'
+
+if RUBY_VERSION < "1.9"
+ exit
+end
+
+class MessagePackTestEncoding < Test::Unit::TestCase
+ def self.it(name, &block)
+ define_method("test_#{name}", &block)
+ end
+
+ it "US-ASCII" do
+ check_unpack "abc".force_encoding("US-ASCII")
+ end
+
+ it "UTF-8 ascii" do
+ check_unpack "abc".force_encoding("UTF-8")
+ end
+
+ it "UTF-8 mbstr" do
+ check_unpack "\xE3\x81\x82".force_encoding("UTF-8")
+ end
+
+ it "UTF-8 invalid" do
+ check_unpack "\xD0".force_encoding("UTF-8")
+ end
+
+ it "ASCII-8BIT" do
+ check_unpack "\xD0".force_encoding("ASCII-8BIT")
+ end
+
+ it "EUC-JP" do
+ x = "\xA4\xA2".force_encoding("EUC-JP")
+ check_unpack(x)
+ end
+
+ it "EUC-JP invalid" do
+ begin
+ "\xD0".force_encoding("EUC-JP").to_msgpack
+ assert(false)
+ rescue Encoding::InvalidByteSequenceError
+ assert(true)
+ end
+ end
+
+ private
+ def check_unpack(str)
+ if str.encoding.to_s == "ASCII-8BIT"
+ should_str = str.dup.force_encoding("UTF-8")
+ else
+ should_str = str.encode("UTF-8")
+ end
+
+ raw = str.to_msgpack
+ r = MessagePack.unpack(str.to_msgpack)
+ assert_equal(r.encoding.to_s, "UTF-8")
+ assert_equal(r, should_str.force_encoding("UTF-8"))
+
+ if str.valid_encoding?
+ sym = str.to_sym
+ r = MessagePack.unpack(sym.to_msgpack)
+ assert_equal(r.encoding.to_s, "UTF-8")
+ assert_equal(r, should_str.force_encoding("UTF-8"))
+ end
+ end
+end
+
diff --git a/ruby/test/test_helper.rb b/ruby/test/test_helper.rb
index 80d7806..4def861 100644
--- a/ruby/test/test_helper.rb
+++ b/ruby/test/test_helper.rb
@@ -5,4 +5,6 @@ rescue LoadError
require File.dirname(__FILE__) + '/../lib/msgpack'
end
-#GC.stress = true
+if ENV["GC_STRESS"]
+ GC.stress = true
+end
diff --git a/ruby/unpack.c b/ruby/unpack.c
index 0948151..3c5e350 100644
--- a/ruby/unpack.c
+++ b/ruby/unpack.c
@@ -16,17 +16,13 @@
* limitations under the License.
*/
#include "ruby.h"
+#include "encoding.h"
#include "msgpack/unpack_define.h"
static ID s_sysread;
static ID s_readpartial;
-#ifdef HAVE_RUBY_ENCODING_H
-#include "ruby/encoding.h"
-int s_ascii_8bit;
-#endif
-
struct unpack_buffer {
size_t size;
size_t free;
@@ -136,6 +132,9 @@ static inline int template_callback_raw(unpack_user* u, const char* b, const cha
} else {
*o = rb_str_substr(u->source, p - b, l);
}
+#ifdef MSGPACK_RUBY_ENCODING
+ ENCODING_SET(*o, s_enc_utf8);
+#endif
return 0;
}
@@ -163,16 +162,6 @@ static inline int template_callback_raw(unpack_user* u, const char* b, const cha
#endif
-#ifdef HAVE_RUBY_ENCODING_H
-static VALUE template_execute_rescue_enc(VALUE data)
-{
- rb_gc_enable();
- VALUE* resc = (VALUE*)data;
- rb_enc_set_index(resc[0], (int)resc[1]);
- RERAISE;
-}
-#endif
-
static VALUE template_execute_rescue(VALUE nouse)
{
rb_gc_enable();
@@ -203,31 +192,16 @@ static int template_execute_wrap(msgpack_unpack_t* mp,
(VALUE)from,
};
-#ifdef HAVE_RUBY_ENCODING_H
- int enc_orig = rb_enc_get_index(str);
- rb_enc_set_index(str, s_ascii_8bit);
-#endif
-
// FIXME execute実行中はmp->topが更新されないのでGC markが機能しない
rb_gc_disable();
mp->user.source = str;
-#ifdef HAVE_RUBY_ENCODING_H
- VALUE resc[2] = {str, enc_orig};
- int ret = (int)rb_rescue(template_execute_do, (VALUE)args,
- template_execute_rescue_enc, (VALUE)resc);
-#else
int ret = (int)rb_rescue(template_execute_do, (VALUE)args,
template_execute_rescue, Qnil);
-#endif
rb_gc_enable();
-#ifdef HAVE_RUBY_ENCODING_H
- rb_enc_set_index(str, enc_orig);
-#endif
-
return ret;
}
@@ -746,10 +720,6 @@ void Init_msgpack_unpack(VALUE mMessagePack)
s_sysread = rb_intern("sysread");
s_readpartial = rb_intern("readpartial");
-#ifdef HAVE_RUBY_ENCODING_H
- s_ascii_8bit = rb_enc_find_index("ASCII-8BIT");
-#endif
-
eUnpackError = rb_define_class_under(mMessagePack, "UnpackError", rb_eStandardError);
cUnpacker = rb_define_class_under(mMessagePack, "Unpacker", rb_cObject);
rb_define_alloc_func(cUnpacker, MessagePack_Unpacker_alloc);