summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarkus Reiter <me@reitermark.us>2017-04-24 03:18:58 +0200
committerMatt Brictson <mattbrictson@users.noreply.github.com>2017-04-23 18:18:58 -0700
commit39f3518a1a5424982d48e800056bed60c261f550 (patch)
tree31ffef83767f2c6eb4efefab39e0bc42c343042a
parent1867b05c3989a10e14d8fb8b12701d7bc55363b2 (diff)
downloadplist-39f3518a1a5424982d48e800056bed60c261f550.tar.gz
Fix ASCII/UTF-8 error. (#38)
* Add reproducible test for UTF-8/ASCII error. * Change encoding according to `xml` tag. * Add changelog entry. * Add helper method to parse XML encoding.
-rw-r--r--CHANGELOG.rdoc1
-rwxr-xr-xlib/plist/parser.rb31
-rw-r--r--test/assets/non-ascii-but-utf-8.plist8
-rwxr-xr-xtest/test_parser.rb14
4 files changed, 48 insertions, 6 deletions
diff --git a/CHANGELOG.rdoc b/CHANGELOG.rdoc
index 4760115..d3dcc0f 100644
--- a/CHANGELOG.rdoc
+++ b/CHANGELOG.rdoc
@@ -5,6 +5,7 @@
https://github.com/patsplat/plist/compare/dece870...HEAD
* Your contribution here!
+* Fix ASCII/UTF-8 error (https://github.com/patsplat/plist/pull/38).
* Fix Fixnum, Bignum deprecations in Ruby 2.4
* Fix unused variable `e` warning
diff --git a/lib/plist/parser.rb b/lib/plist/parser.rb
index 7df9802..4de13f8 100755
--- a/lib/plist/parser.rb
+++ b/lib/plist/parser.rb
@@ -73,10 +73,10 @@ module Plist
end
TEXT = /([^<]+)/
- XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>*/um
- DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
- COMMENT_START = /\A<!--/u
- COMMENT_END = /.*?-->/um
+ XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>*/m
+ DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/m
+ COMMENT_START = /\A<!--/
+ COMMENT_END = /.*?-->/m
def parse
@@ -91,7 +91,14 @@ module Plist
if @scanner.scan(COMMENT_START)
@scanner.scan(COMMENT_END)
elsif @scanner.scan(XMLDECL_PATTERN)
+ encoding = parse_encoding_from_xml_declaration(@scanner[1])
+ next if encoding.nil?
+
+ # use the specified encoding for the rest of the file
+ next unless String.method_defined?(:force_encoding)
+ @scanner.string = @scanner.rest.force_encoding(encoding)
elsif @scanner.scan(DOCTYPE_PATTERN)
+ next
elsif @scanner.scan(start_tag)
@listener.tag_start(@scanner[1], nil)
if (@scanner[2] =~ /\/$/)
@@ -106,6 +113,22 @@ module Plist
end
end
end
+
+ private
+
+ def parse_encoding_from_xml_declaration(xml_declaration)
+ return unless defined?(Encoding)
+
+ xml_encoding = xml_declaration.match(/(?:\A|\s)encoding=(?:"(.*?)"|'(.*?)')(?:\s|\Z)/)
+
+ return if xml_encoding.nil?
+
+ begin
+ Encoding.find(xml_encoding[1])
+ rescue ArgumentError
+ nil
+ end
+ end
end
class PTag
diff --git a/test/assets/non-ascii-but-utf-8.plist b/test/assets/non-ascii-but-utf-8.plist
new file mode 100644
index 0000000..482470f
--- /dev/null
+++ b/test/assets/non-ascii-but-utf-8.plist
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+ <key>non-ascii-but-utf8-character</key>
+ <string>™</string>
+</dict>
+</plist>
diff --git a/test/test_parser.rb b/test/test_parser.rb
index e096196..3614799 100755
--- a/test/test_parser.rb
+++ b/test/test_parser.rb
@@ -90,6 +90,16 @@ class TestParser < Test::Unit::TestCase
assert_nil data
end
-end
+ def test_filename_or_xml_is_encoded_with_ascii_8bit
+ # skip if Ruby version does not support String#force_encoding
+ return unless String.method_defined?(:force_encoding)
+
+ xml = File.read("test/assets/non-ascii-but-utf-8.plist")
+ xml.force_encoding("ASCII-8BIT")
-__END__
+ assert_nothing_raised do
+ data = Plist::parse_xml(xml)
+ assert_equal("\u0099", data["non-ascii-but-utf8-character"])
+ end
+ end
+end