diff options
author | kou <kou@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2010-10-30 12:10:56 +0000 |
---|---|---|
committer | kou <kou@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2010-10-30 12:10:56 +0000 |
commit | 994f066f76857a781f8819b8da2c2aeceedbf87b (patch) | |
tree | d89e9bcf6ac8d558367f888919de0c4894224063 /lib | |
parent | 767fe5170d97461be5c79936b467dee3d4eb7179 (diff) | |
download | ruby-994f066f76857a781f8819b8da2c2aeceedbf87b.tar.gz |
* lib/rexml/encoding.rb: use Ruby native encoding mechnism. [ruby-dev:42464]
* lib/rexml/encodings/: remove.
* lib/rexml/document.rb, lib/rexml/formatters/default.rb,
lib/rexml/output.rb, lib/rexml/parseexception.rb,
lib/rexml/parsers/baseparser.rb, lib/rexml/source.rb,
lib/rexml/xmldecl.rb: use Ruby's native Encoding object.
* test/rexml/, test/rss/: follow the above encoding chagnes.
* NEWS: add REXML's incompatible change about encoding.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@29646 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib')
-rw-r--r-- | lib/rexml/document.rb | 5 | ||||
-rw-r--r-- | lib/rexml/encoding.rb | 116 | ||||
-rw-r--r-- | lib/rexml/encodings/CP-1252.rb | 103 | ||||
-rw-r--r-- | lib/rexml/encodings/EUC-JP.rb | 35 | ||||
-rw-r--r-- | lib/rexml/encodings/ICONV.rb | 22 | ||||
-rw-r--r-- | lib/rexml/encodings/ISO-8859-1.rb | 7 | ||||
-rw-r--r-- | lib/rexml/encodings/ISO-8859-15.rb | 72 | ||||
-rw-r--r-- | lib/rexml/encodings/SHIFT-JIS.rb | 37 | ||||
-rw-r--r-- | lib/rexml/encodings/SHIFT_JIS.rb | 1 | ||||
-rw-r--r-- | lib/rexml/encodings/UNILE.rb | 34 | ||||
-rw-r--r-- | lib/rexml/encodings/US-ASCII.rb | 30 | ||||
-rw-r--r-- | lib/rexml/encodings/UTF-16.rb | 35 | ||||
-rw-r--r-- | lib/rexml/encodings/UTF-8.rb | 18 | ||||
-rw-r--r-- | lib/rexml/formatters/default.rb | 2 | ||||
-rw-r--r-- | lib/rexml/output.rb | 2 | ||||
-rw-r--r-- | lib/rexml/parseexception.rb | 2 | ||||
-rw-r--r-- | lib/rexml/parsers/baseparser.rb | 6 | ||||
-rw-r--r-- | lib/rexml/source.rb | 4 | ||||
-rw-r--r-- | lib/rexml/xmldecl.rb | 13 |
19 files changed, 78 insertions, 466 deletions
diff --git a/lib/rexml/document.rb b/lib/rexml/document.rb index 0337553a2e..68a744d9e5 100644 --- a/lib/rexml/document.rb +++ b/lib/rexml/document.rb @@ -131,7 +131,8 @@ module REXML xml_decl().version end - # @return the XMLDecl encoding of this document as a String. + # @return the XMLDecl encoding of this document as an + # Encoding object. # If no XMLDecl has been set, returns the default encoding. def encoding xml_decl().encoding @@ -183,7 +184,7 @@ module REXML # that IE's limited abilities can handle. This hack inserts a space # before the /> on empty tags. Defaults to false def write( output=$stdout, indent=-1, transitive=false, ie_hack=false ) - if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output) + if xml_decl.encoding != ::Encoding::UTF_8 && !output.kind_of?(Output) output = Output.new( output, xml_decl.encoding ) end formatter = if indent > -1 diff --git a/lib/rexml/encoding.rb b/lib/rexml/encoding.rb index 3feffb80f4..0c4a88fbeb 100644 --- a/lib/rexml/encoding.rb +++ b/lib/rexml/encoding.rb @@ -1,71 +1,67 @@ # -*- mode: ruby; ruby-indent-level: 2; indent-tabs-mode: t; tab-width: 2 -*- vim: sw=2 ts=2 module REXML module Encoding - @encoding_methods = {} - def self.register(enc, &block) - @encoding_methods[enc] = block - end - def self.apply(obj, enc) - @encoding_methods[enc][obj] - end - def self.encoding_method(enc) - @encoding_methods[enc] - end - - # Native, default format is UTF-8, so it is declared here rather than in - # an encodings/ definition. - UTF_8 = 'UTF-8' - UTF_16 = 'UTF-16' - UNILE = 'UNILE' - - # ID ---> Encoding name - attr_reader :encoding - def encoding=( enc ) - old_verbosity = $VERBOSE - begin - $VERBOSE = false - enc = enc.nil? ? nil : enc.upcase - return false if defined? @encoding and enc == @encoding - if enc and enc != UTF_8 - @encoding = enc - raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/ - @encoding.untaint - begin - require 'rexml/encodings/ICONV.rb' - Encoding.apply(self, "ICONV") - rescue LoadError, Exception - begin - enc_file = File.join( "rexml", "encodings", "#@encoding.rb" ) - require enc_file - Encoding.apply(self, @encoding) - rescue LoadError => err - puts err.message - raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv." - end - end - else - @encoding = UTF_8 - require 'rexml/encodings/UTF-8.rb' - Encoding.apply(self, @encoding) - end - ensure - $VERBOSE = old_verbosity + # ID ---> Encoding object + attr_reader :encoding + def encoding=(encoding) + if encoding.is_a?(String) + original_encoding = encoding + encoding = find_encoding(encoding) + unless encoding + raise ArgumentError, "Bad encoding name #{original_encoding}" + end + end + return false if defined?(@encoding) and encoding == @encoding + if encoding and encoding != ::Encoding::UTF_8 + @encoding = encoding + else + @encoding = ::Encoding::UTF_8 end true end - def check_encoding str + def check_encoding(xml) # We have to recognize UTF-16, LSB UTF-16, and UTF-8 - if str[0,2] == "\xfe\xff" - str[0,2] = "" - return UTF_16 - elsif str[0,2] == "\xff\xfe" - str[0,2] = "" - return UNILE - end - str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/m - return $3.upcase if $3 - return UTF_8 + if xml[0, 2] == "\xfe\xff" + xml[0, 2] = "" + ::Encoding::UTF_16BE + elsif xml[0, 2] == "\xff\xfe" + xml[0, 2] = "" + ::Encoding::UTF_16LE + else + if /\A\s*<\?xml\s+version\s*=\s*(['"]).*?\1 + \s+encoding\s*=\s*(["'])(.*?)\2/mx =~ xml + encoding_name = $3 + if /\Autf-16\z/i =~ encoding_name + ::Encoding::UTF_16BE + else + find_encoding(encoding_name) + end + else + ::Encoding::UTF_8 + end + end end + + def encode(string) + string.encode(@encoding) + end + + def decode(string) + string.encode(::Encoding::UTF_8, @encoding) + end + + private + def find_encoding(name) + case name + when "UTF-16" + name = "UTF-16BE" + when /\Ashift-jis\z/i + name = "Shift_JIS" + when /\ACP-(\d+)\z/ + name = "CP#{$1}" + end + ::Encoding.find(name) + end end end diff --git a/lib/rexml/encodings/CP-1252.rb b/lib/rexml/encodings/CP-1252.rb deleted file mode 100644 index 587c5bdd68..0000000000 --- a/lib/rexml/encodings/CP-1252.rb +++ /dev/null @@ -1,103 +0,0 @@ -# -# This class was contributed by Mikko Tiihonen mikko DOT tiihonen AT hut DOT fi -# -module REXML - module Encoding - register( "CP-1252" ) do |o| - class << o - alias encode encode_cp1252 - alias decode decode_cp1252 - end - end - - # Convert from UTF-8 - def encode_cp1252(content) - array_utf8 = content.unpack('U*') - array_enc = [] - array_utf8.each do |num| - case num - # shortcut first bunch basic characters - when 0..0xFF; array_enc << num - # characters added compared to iso-8859-1 - when 0x20AC; array_enc << 0x80 # 0xe2 0x82 0xac - when 0x201A; array_enc << 0x82 # 0xe2 0x82 0x9a - when 0x0192; array_enc << 0x83 # 0xc6 0x92 - when 0x201E; array_enc << 0x84 # 0xe2 0x82 0x9e - when 0x2026; array_enc << 0x85 # 0xe2 0x80 0xa6 - when 0x2020; array_enc << 0x86 # 0xe2 0x80 0xa0 - when 0x2021; array_enc << 0x87 # 0xe2 0x80 0xa1 - when 0x02C6; array_enc << 0x88 # 0xcb 0x86 - when 0x2030; array_enc << 0x89 # 0xe2 0x80 0xb0 - when 0x0160; array_enc << 0x8A # 0xc5 0xa0 - when 0x2039; array_enc << 0x8B # 0xe2 0x80 0xb9 - when 0x0152; array_enc << 0x8C # 0xc5 0x92 - when 0x017D; array_enc << 0x8E # 0xc5 0xbd - when 0x2018; array_enc << 0x91 # 0xe2 0x80 0x98 - when 0x2019; array_enc << 0x92 # 0xe2 0x80 0x99 - when 0x201C; array_enc << 0x93 # 0xe2 0x80 0x9c - when 0x201D; array_enc << 0x94 # 0xe2 0x80 0x9d - when 0x2022; array_enc << 0x95 # 0xe2 0x80 0xa2 - when 0x2013; array_enc << 0x96 # 0xe2 0x80 0x93 - when 0x2014; array_enc << 0x97 # 0xe2 0x80 0x94 - when 0x02DC; array_enc << 0x98 # 0xcb 0x9c - when 0x2122; array_enc << 0x99 # 0xe2 0x84 0xa2 - when 0x0161; array_enc << 0x9A # 0xc5 0xa1 - when 0x203A; array_enc << 0x9B # 0xe2 0x80 0xba - when 0x0152; array_enc << 0x9C # 0xc5 0x93 - when 0x017E; array_enc << 0x9E # 0xc5 0xbe - when 0x0178; array_enc << 0x9F # 0xc5 0xb8 - else - # all remaining basic characters can be used directly - if num <= 0xFF - array_enc << num - else - # Numeric entity (&#nnnn;); shard by Stefan Scholl - array_enc.concat "&\##{num};".unpack('C*') - end - end - end - array_enc.pack('C*') - end - - # Convert to UTF-8 - def decode_cp1252(str) - array_latin9 = str.unpack('C*') - array_enc = [] - array_latin9.each do |num| - case num - # characters that added compared to iso-8859-1 - when 0x80; array_enc << 0x20AC # 0xe2 0x82 0xac - when 0x82; array_enc << 0x201A # 0xe2 0x82 0x9a - when 0x83; array_enc << 0x0192 # 0xc6 0x92 - when 0x84; array_enc << 0x201E # 0xe2 0x82 0x9e - when 0x85; array_enc << 0x2026 # 0xe2 0x80 0xa6 - when 0x86; array_enc << 0x2020 # 0xe2 0x80 0xa0 - when 0x87; array_enc << 0x2021 # 0xe2 0x80 0xa1 - when 0x88; array_enc << 0x02C6 # 0xcb 0x86 - when 0x89; array_enc << 0x2030 # 0xe2 0x80 0xb0 - when 0x8A; array_enc << 0x0160 # 0xc5 0xa0 - when 0x8B; array_enc << 0x2039 # 0xe2 0x80 0xb9 - when 0x8C; array_enc << 0x0152 # 0xc5 0x92 - when 0x8E; array_enc << 0x017D # 0xc5 0xbd - when 0x91; array_enc << 0x2018 # 0xe2 0x80 0x98 - when 0x92; array_enc << 0x2019 # 0xe2 0x80 0x99 - when 0x93; array_enc << 0x201C # 0xe2 0x80 0x9c - when 0x94; array_enc << 0x201D # 0xe2 0x80 0x9d - when 0x95; array_enc << 0x2022 # 0xe2 0x80 0xa2 - when 0x96; array_enc << 0x2013 # 0xe2 0x80 0x93 - when 0x97; array_enc << 0x2014 # 0xe2 0x80 0x94 - when 0x98; array_enc << 0x02DC # 0xcb 0x9c - when 0x99; array_enc << 0x2122 # 0xe2 0x84 0xa2 - when 0x9A; array_enc << 0x0161 # 0xc5 0xa1 - when 0x9B; array_enc << 0x203A # 0xe2 0x80 0xba - when 0x9C; array_enc << 0x0152 # 0xc5 0x93 - when 0x9E; array_enc << 0x017E # 0xc5 0xbe - when 0x9F; array_enc << 0x0178 # 0xc5 0xb8 - else - array_enc << num - end - end - array_enc.pack('U*') - end - end -end diff --git a/lib/rexml/encodings/EUC-JP.rb b/lib/rexml/encodings/EUC-JP.rb deleted file mode 100644 index db37b6bf0d..0000000000 --- a/lib/rexml/encodings/EUC-JP.rb +++ /dev/null @@ -1,35 +0,0 @@ -module REXML - module Encoding - begin - require 'uconv' - - def decode_eucjp(str) - Uconv::euctou8(str) - end - - def encode_eucjp content - Uconv::u8toeuc(content) - end - rescue LoadError - require 'nkf' - - EUCTOU8 = '-Ewm0' - U8TOEUC = '-Wem0' - - def decode_eucjp(str) - NKF.nkf(EUCTOU8, str) - end - - def encode_eucjp content - NKF.nkf(U8TOEUC, content) - end - end - - register("EUC-JP") do |obj| - class << obj - alias decode decode_eucjp - alias encode encode_eucjp - end - end - end -end diff --git a/lib/rexml/encodings/ICONV.rb b/lib/rexml/encodings/ICONV.rb deleted file mode 100644 index 172fba7cd1..0000000000 --- a/lib/rexml/encodings/ICONV.rb +++ /dev/null @@ -1,22 +0,0 @@ -require "iconv" -raise LoadError unless defined? Iconv - -module REXML - module Encoding - def decode_iconv(str) - Iconv.conv(UTF_8, @encoding, str) - end - - def encode_iconv(content) - Iconv.conv(@encoding, UTF_8, content) - end - - register("ICONV") do |obj| - Iconv.conv(UTF_8, obj.encoding, nil) - class << obj - alias decode decode_iconv - alias encode encode_iconv - end - end - end -end diff --git a/lib/rexml/encodings/ISO-8859-1.rb b/lib/rexml/encodings/ISO-8859-1.rb deleted file mode 100644 index 2873d13bf0..0000000000 --- a/lib/rexml/encodings/ISO-8859-1.rb +++ /dev/null @@ -1,7 +0,0 @@ -require 'rexml/encodings/US-ASCII' - -module REXML - module Encoding - register("ISO-8859-1", &encoding_method("US-ASCII")) - end -end diff --git a/lib/rexml/encodings/ISO-8859-15.rb b/lib/rexml/encodings/ISO-8859-15.rb deleted file mode 100644 index 08a19cb755..0000000000 --- a/lib/rexml/encodings/ISO-8859-15.rb +++ /dev/null @@ -1,72 +0,0 @@ -# -# This class was contributed by Mikko Tiihonen mikko DOT tiihonen AT hut DOT fi -# -module REXML - module Encoding - register("ISO-8859-15") do |o| - alias encode to_iso_8859_15 - alias decode from_iso_8859_15 - end - - # Convert from UTF-8 - def to_iso_8859_15(content) - array_utf8 = content.unpack('U*') - array_enc = [] - array_utf8.each do |num| - case num - # shortcut first bunch basic characters - when 0..0xA3; array_enc << num - # characters removed compared to iso-8859-1 - when 0xA4; array_enc << '¤' - when 0xA6; array_enc << '¦' - when 0xA8; array_enc << '¨' - when 0xB4; array_enc << '´' - when 0xB8; array_enc << '¸' - when 0xBC; array_enc << '¼' - when 0xBD; array_enc << '½' - when 0xBE; array_enc << '¾' - # characters added compared to iso-8859-1 - when 0x20AC; array_enc << 0xA4 # 0xe2 0x82 0xac - when 0x0160; array_enc << 0xA6 # 0xc5 0xa0 - when 0x0161; array_enc << 0xA8 # 0xc5 0xa1 - when 0x017D; array_enc << 0xB4 # 0xc5 0xbd - when 0x017E; array_enc << 0xB8 # 0xc5 0xbe - when 0x0152; array_enc << 0xBC # 0xc5 0x92 - when 0x0153; array_enc << 0xBD # 0xc5 0x93 - when 0x0178; array_enc << 0xBE # 0xc5 0xb8 - else - # all remaining basic characters can be used directly - if num <= 0xFF - array_enc << num - else - # Numeric entity (&#nnnn;); shard by Stefan Scholl - array_enc.concat "&\##{num};".unpack('C*') - end - end - end - array_enc.pack('C*') - end - - # Convert to UTF-8 - def from_iso_8859_15(str) - array_latin9 = str.unpack('C*') - array_enc = [] - array_latin9.each do |num| - case num - # characters that differ compared to iso-8859-1 - when 0xA4; array_enc << 0x20AC - when 0xA6; array_enc << 0x0160 - when 0xA8; array_enc << 0x0161 - when 0xB4; array_enc << 0x017D - when 0xB8; array_enc << 0x017E - when 0xBC; array_enc << 0x0152 - when 0xBD; array_enc << 0x0153 - when 0xBE; array_enc << 0x0178 - else - array_enc << num - end - end - array_enc.pack('U*') - end - end -end diff --git a/lib/rexml/encodings/SHIFT-JIS.rb b/lib/rexml/encodings/SHIFT-JIS.rb deleted file mode 100644 index 9e0f4af20e..0000000000 --- a/lib/rexml/encodings/SHIFT-JIS.rb +++ /dev/null @@ -1,37 +0,0 @@ -module REXML - module Encoding - begin - require 'uconv' - - def decode_sjis content - Uconv::sjistou8(content) - end - - def encode_sjis(str) - Uconv::u8tosjis(str) - end - rescue LoadError - require 'nkf' - - SJISTOU8 = '-Swm0x' - U8TOSJIS = '-Wsm0x' - - def decode_sjis(str) - NKF.nkf(SJISTOU8, str) - end - - def encode_sjis content - NKF.nkf(U8TOSJIS, content) - end - end - - b = proc do |obj| - class << obj - alias decode decode_sjis - alias encode encode_sjis - end - end - register("SHIFT-JIS", &b) - register("SHIFT_JIS", &b) - end -end diff --git a/lib/rexml/encodings/SHIFT_JIS.rb b/lib/rexml/encodings/SHIFT_JIS.rb deleted file mode 100644 index e355704a7c..0000000000 --- a/lib/rexml/encodings/SHIFT_JIS.rb +++ /dev/null @@ -1 +0,0 @@ -require 'rexml/encodings/SHIFT-JIS' diff --git a/lib/rexml/encodings/UNILE.rb b/lib/rexml/encodings/UNILE.rb deleted file mode 100644 index 1a18f0c932..0000000000 --- a/lib/rexml/encodings/UNILE.rb +++ /dev/null @@ -1,34 +0,0 @@ -module REXML - module Encoding - def encode_unile content - array_utf8 = content.unpack("U*") - array_enc = [] - array_utf8.each do |num| - if ((num>>16) > 0) - array_enc << ?? - array_enc << 0 - else - array_enc << (num & 0xFF) - array_enc << (num >> 8) - end - end - array_enc.pack('C*') - end - - def decode_unile(str) - array_enc=str.unpack('C*') - array_utf8 = [] - 0.step(array_enc.size-1, 2){|i| - array_utf8 << (array_enc.at(i) + array_enc.at(i+1)*0x100) - } - array_utf8.pack('U*') - end - - register(UNILE) do |obj| - class << obj - alias decode decode_unile - alias encode encode_unile - end - end - end -end diff --git a/lib/rexml/encodings/US-ASCII.rb b/lib/rexml/encodings/US-ASCII.rb deleted file mode 100644 index fb4c217074..0000000000 --- a/lib/rexml/encodings/US-ASCII.rb +++ /dev/null @@ -1,30 +0,0 @@ -module REXML - module Encoding - # Convert from UTF-8 - def encode_ascii content - array_utf8 = content.unpack('U*') - array_enc = [] - array_utf8.each do |num| - if num <= 0x7F - array_enc << num - else - # Numeric entity (&#nnnn;); shard by Stefan Scholl - array_enc.concat "&\##{num};".unpack('C*') - end - end - array_enc.pack('C*') - end - - # Convert to UTF-8 - def decode_ascii(str) - str.unpack('C*').pack('U*') - end - - register("US-ASCII") do |obj| - class << obj - alias decode decode_ascii - alias encode encode_ascii - end - end - end -end diff --git a/lib/rexml/encodings/UTF-16.rb b/lib/rexml/encodings/UTF-16.rb deleted file mode 100644 index 2ec058eed5..0000000000 --- a/lib/rexml/encodings/UTF-16.rb +++ /dev/null @@ -1,35 +0,0 @@ -module REXML - module Encoding - def encode_utf16 content - array_utf8 = content.unpack("U*") - array_enc = [] - array_utf8.each do |num| - if ((num>>16) > 0) - array_enc << 0 - array_enc << ?? - else - array_enc << (num >> 8) - array_enc << (num & 0xFF) - end - end - array_enc.pack('C*') - end - - def decode_utf16(str) - str = str[2..-1] if /^\376\377/n =~ str - array_enc=str.unpack('C*') - array_utf8 = [] - 0.step(array_enc.size-1, 2){|i| - array_utf8 << (array_enc.at(i+1) + array_enc.at(i)*0x100) - } - array_utf8.pack('U*') - end - - register(UTF_16) do |obj| - class << obj - alias decode decode_utf16 - alias encode encode_utf16 - end - end - end -end diff --git a/lib/rexml/encodings/UTF-8.rb b/lib/rexml/encodings/UTF-8.rb deleted file mode 100644 index bb08f44100..0000000000 --- a/lib/rexml/encodings/UTF-8.rb +++ /dev/null @@ -1,18 +0,0 @@ -module REXML - module Encoding - def encode_utf8 content - content - end - - def decode_utf8(str) - str - end - - register(UTF_8) do |obj| - class << obj - alias decode decode_utf8 - alias encode encode_utf8 - end - end - end -end diff --git a/lib/rexml/formatters/default.rb b/lib/rexml/formatters/default.rb index db44453e1e..ec4149047d 100644 --- a/lib/rexml/formatters/default.rb +++ b/lib/rexml/formatters/default.rb @@ -22,7 +22,7 @@ module REXML case node when Document - if node.xml_decl.encoding != "UTF-8" && !output.kind_of?(Output) + if node.xml_decl.encoding != ::Encoding::UTF_8 && !output.kind_of?(Output) output = Output.new( output, node.xml_decl.encoding ) end write_document( node, output ) diff --git a/lib/rexml/output.rb b/lib/rexml/output.rb index b7f17b9dff..752f6e1d40 100644 --- a/lib/rexml/output.rb +++ b/lib/rexml/output.rb @@ -10,7 +10,7 @@ module REXML @output = real_IO self.encoding = encd - @to_utf = encd == UTF_8 ? false : true + @to_utf = (@encoding != ::Encoding::UTF_8) end def <<( content ) diff --git a/lib/rexml/parseexception.rb b/lib/rexml/parseexception.rb index 0481f72818..0c4d55abda 100644 --- a/lib/rexml/parseexception.rb +++ b/lib/rexml/parseexception.rb @@ -28,7 +28,7 @@ module REXML err << "\nLine: #{line}\n" err << "Position: #{position}\n" err << "Last 80 unconsumed characters:\n" - err << @source.buffer[0..80].gsub(/\n/, ' ') + err << @source.buffer[0..80].force_encoding("ASCII-8BIT").gsub(/\n/, ' ') end err diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index ade35d7921..ee8b160ce5 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -248,10 +248,8 @@ module REXML @document_status = :after_doctype @source.read if @source.buffer.size<2 md = @source.match(/\s*/um, true) - if @source.encoding == "UTF-8" - if @source.buffer.respond_to? :force_encoding - @source.buffer.force_encoding(Encoding::UTF_8) - end + if @source.encoding == ::Encoding::UTF_8 + @source.buffer.force_encoding(::Encoding::UTF_8) end end end diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb index 1206150b16..227b0c56c4 100644 --- a/lib/rexml/source.rb +++ b/lib/rexml/source.rb @@ -54,13 +54,13 @@ module REXML def encoding=(enc) return unless super @line_break = encode( '>' ) - if enc != UTF_8 + if @encoding != ::Encoding::UTF_8 @buffer = decode(@buffer) @to_utf = true else @to_utf = false if @buffer.respond_to? :force_encoding - @buffer.force_encoding Encoding::UTF_8 + @buffer.force_encoding ::Encoding::UTF_8 end end end diff --git a/lib/rexml/xmldecl.rb b/lib/rexml/xmldecl.rb index 361e4b7106..81d3057732 100644 --- a/lib/rexml/xmldecl.rb +++ b/lib/rexml/xmldecl.rb @@ -109,9 +109,20 @@ module REXML end private + def normalized_encoding_name(_encoding) + if _encoding == ::Encoding::UTF_16BE + "UTF-16" + else + return _encoding.name + end + end + def content(enc) rv = "version='#@version'" - rv << " encoding='#{enc}'" if @writeencoding || enc !~ /utf-8/i + if @writeencoding || enc.to_s !~ /\Autf-8\z/i + encoding_name = normalized_encoding_name(enc) + rv << " encoding='#{encoding_name}'" + end rv << " standalone='#@standalone'" if @standalone rv end |