diff options
author | Markus Reiter <me@reitermark.us> | 2017-04-24 03:18:58 +0200 |
---|---|---|
committer | Matt Brictson <mattbrictson@users.noreply.github.com> | 2017-04-23 18:18:58 -0700 |
commit | 39f3518a1a5424982d48e800056bed60c261f550 (patch) | |
tree | 31ffef83767f2c6eb4efefab39e0bc42c343042a | |
parent | 1867b05c3989a10e14d8fb8b12701d7bc55363b2 (diff) | |
download | plist-39f3518a1a5424982d48e800056bed60c261f550.tar.gz |
Fix ASCII/UTF-8 error. (#38)
* Add reproducible test for UTF-8/ASCII error.
* Change encoding according to `xml` tag.
* Add changelog entry.
* Add helper method to parse XML encoding.
-rw-r--r-- | CHANGELOG.rdoc | 1 | ||||
-rwxr-xr-x | lib/plist/parser.rb | 31 | ||||
-rw-r--r-- | test/assets/non-ascii-but-utf-8.plist | 8 | ||||
-rwxr-xr-x | test/test_parser.rb | 14 |
4 files changed, 48 insertions, 6 deletions
diff --git a/CHANGELOG.rdoc b/CHANGELOG.rdoc index 4760115..d3dcc0f 100644 --- a/CHANGELOG.rdoc +++ b/CHANGELOG.rdoc @@ -5,6 +5,7 @@ https://github.com/patsplat/plist/compare/dece870...HEAD * Your contribution here! +* Fix ASCII/UTF-8 error (https://github.com/patsplat/plist/pull/38). * Fix Fixnum, Bignum deprecations in Ruby 2.4 * Fix unused variable `e` warning diff --git a/lib/plist/parser.rb b/lib/plist/parser.rb index 7df9802..4de13f8 100755 --- a/lib/plist/parser.rb +++ b/lib/plist/parser.rb @@ -73,10 +73,10 @@ module Plist end TEXT = /([^<]+)/ - XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>*/um - DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um - COMMENT_START = /\A<!--/u - COMMENT_END = /.*?-->/um + XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>*/m + DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/m + COMMENT_START = /\A<!--/ + COMMENT_END = /.*?-->/m def parse @@ -91,7 +91,14 @@ module Plist if @scanner.scan(COMMENT_START) @scanner.scan(COMMENT_END) elsif @scanner.scan(XMLDECL_PATTERN) + encoding = parse_encoding_from_xml_declaration(@scanner[1]) + next if encoding.nil? + + # use the specified encoding for the rest of the file + next unless String.method_defined?(:force_encoding) + @scanner.string = @scanner.rest.force_encoding(encoding) elsif @scanner.scan(DOCTYPE_PATTERN) + next elsif @scanner.scan(start_tag) @listener.tag_start(@scanner[1], nil) if (@scanner[2] =~ /\/$/) @@ -106,6 +113,22 @@ module Plist end end end + + private + + def parse_encoding_from_xml_declaration(xml_declaration) + return unless defined?(Encoding) + + xml_encoding = xml_declaration.match(/(?:\A|\s)encoding=(?:"(.*?)"|'(.*?)')(?:\s|\Z)/) + + return if xml_encoding.nil? + + begin + Encoding.find(xml_encoding[1]) + rescue ArgumentError + nil + end + end end class PTag diff --git a/test/assets/non-ascii-but-utf-8.plist b/test/assets/non-ascii-but-utf-8.plist new file mode 100644 index 0000000..482470f --- /dev/null +++ b/test/assets/non-ascii-but-utf-8.plist @@ -0,0 +1,8 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> +<plist version="1.0"> +<dict> + <key>non-ascii-but-utf8-character</key> + <string></string> +</dict> +</plist> diff --git a/test/test_parser.rb b/test/test_parser.rb index e096196..3614799 100755 --- a/test/test_parser.rb +++ b/test/test_parser.rb @@ -90,6 +90,16 @@ class TestParser < Test::Unit::TestCase assert_nil data end -end + def test_filename_or_xml_is_encoded_with_ascii_8bit + # skip if Ruby version does not support String#force_encoding + return unless String.method_defined?(:force_encoding) + + xml = File.read("test/assets/non-ascii-but-utf-8.plist") + xml.force_encoding("ASCII-8BIT") -__END__ + assert_nothing_raised do + data = Plist::parse_xml(xml) + assert_equal("\u0099", data["non-ascii-but-utf8-character"]) + end + end +end |