summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShaun McCance <shaunm@gnome.org>2013-09-21 16:24:35 -0400
committerShaun McCance <shaunm@gnome.org>2013-09-21 16:24:35 -0400
commit2928d6f02a0f30415bd993490d1920fd990ce130 (patch)
tree8d235d6d36edbd1ab467200099588fe2f9af7630
parent309b2cee105430c5ed387d7862e9405d41e76088 (diff)
downloaditstool-2928d6f02a0f30415bd993490d1920fd990ce130.tar.gz
Added an option to retain entity references
You still have to load the DTD if the entities are defined in the external subset, because libxml2 checks references even if it doesn't dereference them. It would be nice if this weren't necessary.
-rwxr-xr-xitstool.in32
-rw-r--r--tests/IT-keep-entities-1.ll.po21
-rw-r--r--tests/IT-keep-entities-1.ll.xml7
-rw-r--r--tests/IT-keep-entities-1.pot21
-rw-r--r--tests/IT-keep-entities-1.xml7
-rw-r--r--tests/IT-keep-entities-2.ll.po21
-rw-r--r--tests/IT-keep-entities-2.ll.xml9
-rw-r--r--tests/IT-keep-entities-2.pot21
-rw-r--r--tests/IT-keep-entities-2.xml9
-rw-r--r--tests/run_tests.py20
10 files changed, 157 insertions, 11 deletions
diff --git a/itstool.in b/itstool.in
index aa5b17f..9dc4a0b 100755
--- a/itstool.in
+++ b/itstool.in
@@ -192,6 +192,10 @@ class Message (object):
if re.sub('\s+', ' ', text).strip() != '':
self._empty = False
+ def add_entity_ref (self, name):
+ self._message.append('&' + name + ';')
+ self._empty = False
+
def add_placeholder (self, node):
holder = Message.Placeholder(node)
self._placeholders.append(holder)
@@ -368,7 +372,7 @@ def fix_node_ns (node, nsdefs):
class Document (object):
- def __init__ (self, filename, messages, load_dtd=False):
+ def __init__ (self, filename, messages, load_dtd=False, keep_entities=False):
self._xml_err = ''
libxml2.registerErrorHandler(xml_error_catcher, self)
try:
@@ -377,9 +381,14 @@ class Document (object):
sys.stderr.write('Error: cannot open XML file %s\n' % filename)
sys.exit(1)
ctxt.lineNumbers(1)
+ self._load_dtd = load_dtd
+ self._keep_entities = keep_entities
if load_dtd:
ctxt.loadSubset(1)
- ctxt.replaceEntities(1)
+ if keep_entities:
+ ctxt.replaceEntities(0)
+ else:
+ ctxt.replaceEntities(1)
ctxt.parseDocument()
self._filename = filename
self._doc = ctxt.doc()
@@ -836,7 +845,11 @@ class Document (object):
nsdef = nsdef.next
reg_ns(node, nss)
nss['_'] = NS_BLANK
- blurb = '<' + node.name
+ try:
+ blurb = node.doc.intSubset().serialize('utf-8')
+ except:
+ blurb = ''
+ blurb += '<' + node.name
for nsname in nss.keys():
if nsname is None:
blurb += ' xmlns="%s"' % nss[nsname]
@@ -844,6 +857,8 @@ class Document (object):
blurb += ' xmlns:%s="%s"' % (nsname, nss[nsname])
blurb += '>%s</%s>' % (trans.encode('utf-8'), node.name)
ctxt = libxml2.createDocParserCtxt(blurb)
+ if self._load_dtd:
+ ctxt.loadSubset(1)
ctxt.replaceEntities(0)
ctxt.parseDocument()
trnode = ctxt.doc().getRootElement()
@@ -920,6 +935,8 @@ class Document (object):
if node.type in ('text', 'cdata') and msg is not None:
msg.add_text(node.content)
return
+ if node.type == 'entity_ref':
+ msg.add_entity_ref(node.name);
if node.type != 'element':
return
if node.hasNsProp('drop', NS_ITST) and node.nsProp('drop', NS_ITST) == 'yes':
@@ -1121,6 +1138,11 @@ if __name__ == '__main__':
dest='load_dtd',
default=False,
help='Load external DTDs used by input XML')
+ options.add_option('-k', '--keep-entities',
+ action='store_true',
+ dest='keep_entities',
+ default=False,
+ help='Keep entity reference unexpanded')
options.add_option('-v', '--version',
action='store_true',
dest='version',
@@ -1135,7 +1157,7 @@ if __name__ == '__main__':
if opts.merge is None and opts.join is None:
messages = MessageList()
for filename in args[1:]:
- doc = Document(filename, messages, opts.load_dtd)
+ doc = Document(filename, messages, load_dtd=opts.load_dtd, keep_entities=opts.keep_entities)
doc.apply_its_rules()
if opts.itsfile is not None:
for itsfile in opts.itsfile:
@@ -1173,7 +1195,7 @@ if __name__ == '__main__':
sys.exit(1)
for filename in args[1:]:
messages = MessageList()
- doc = Document(filename, messages, load_dtd=opts.load_dtd)
+ doc = Document(filename, messages, load_dtd=opts.load_dtd, keep_entities=opts.keep_entities)
doc.apply_its_rules()
if opts.itsfile is not None:
for itsfile in opts.itsfile:
diff --git a/tests/IT-keep-entities-1.ll.po b/tests/IT-keep-entities-1.ll.po
new file mode 100644
index 0000000..5fac279
--- /dev/null
+++ b/tests/IT-keep-entities-1.ll.po
@@ -0,0 +1,21 @@
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"POT-Creation-Date: 2012-08-29 09:51-0400\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#. Put one translator per line, in the form NAME <EMAIL>, YEAR1, YEAR2
+msgctxt "_"
+msgid "translator-credits"
+msgstr ""
+
+#. (itstool) path: bookinfo/title
+#: IT-keep-entities-1.xml:5
+msgid "The history of Leonard &ldquo;Bones&rdquo; McCoy"
+msgstr "La historia de Leonard &ldquo;Bones&rdquo; McCoy"
+
diff --git a/tests/IT-keep-entities-1.ll.xml b/tests/IT-keep-entities-1.ll.xml
new file mode 100644
index 0000000..2255308
--- /dev/null
+++ b/tests/IT-keep-entities-1.ll.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<book lang="test">
+ <bookinfo id="startrek">
+ <title>La historia de Leonard &ldquo;Bones&rdquo; McCoy</title>
+ </bookinfo>
+</book>
diff --git a/tests/IT-keep-entities-1.pot b/tests/IT-keep-entities-1.pot
new file mode 100644
index 0000000..1768341
--- /dev/null
+++ b/tests/IT-keep-entities-1.pot
@@ -0,0 +1,21 @@
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"POT-Creation-Date: 2012-08-29 09:51-0400\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#. Put one translator per line, in the form NAME <EMAIL>, YEAR1, YEAR2
+msgctxt "_"
+msgid "translator-credits"
+msgstr ""
+
+#. (itstool) path: bookinfo/title
+#: tests/IT-keep-entities-1.xml:5
+msgid "The history of Leonard &ldquo;Bones&rdquo; McCoy"
+msgstr ""
+
diff --git a/tests/IT-keep-entities-1.xml b/tests/IT-keep-entities-1.xml
new file mode 100644
index 0000000..bb87ea8
--- /dev/null
+++ b/tests/IT-keep-entities-1.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="US-ASCII"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<book>
+ <bookinfo id="startrek">
+ <title>The history of Leonard &ldquo;Bones&rdquo; McCoy</title>
+ </bookinfo>
+</book>
diff --git a/tests/IT-keep-entities-2.ll.po b/tests/IT-keep-entities-2.ll.po
new file mode 100644
index 0000000..cab95b5
--- /dev/null
+++ b/tests/IT-keep-entities-2.ll.po
@@ -0,0 +1,21 @@
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"POT-Creation-Date: 2013-09-16 12:04-0400\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#. (itstool) path: test/p
+#: tests/IT-keep-entities-2.xml:7
+msgid "&first;"
+msgstr "[&first;]"
+
+#. (itstool) path: test/p
+#: tests/IT-keep-entities-2.xml:8
+msgid "&second;"
+msgstr "[&second;]"
+
diff --git a/tests/IT-keep-entities-2.ll.xml b/tests/IT-keep-entities-2.ll.xml
new file mode 100644
index 0000000..95af179
--- /dev/null
+++ b/tests/IT-keep-entities-2.ll.xml
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE test [
+<!ENTITY first "hello">
+<!ENTITY second SYSTEM "world.xml">
+]>
+<test>
+ <p>[&first;]</p>
+ <p>[&second;]</p>
+</test>
diff --git a/tests/IT-keep-entities-2.pot b/tests/IT-keep-entities-2.pot
new file mode 100644
index 0000000..40f755e
--- /dev/null
+++ b/tests/IT-keep-entities-2.pot
@@ -0,0 +1,21 @@
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"POT-Creation-Date: 2013-09-16 12:04-0400\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#. (itstool) path: test/p
+#: tests/IT-keep-entities-2.xml:7
+msgid "&first;"
+msgstr ""
+
+#. (itstool) path: test/p
+#: tests/IT-keep-entities-2.xml:8
+msgid "&second;"
+msgstr ""
+
diff --git a/tests/IT-keep-entities-2.xml b/tests/IT-keep-entities-2.xml
new file mode 100644
index 0000000..0173e48
--- /dev/null
+++ b/tests/IT-keep-entities-2.xml
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE test [
+<!ENTITY first "hello">
+<!ENTITY second SYSTEM "world.xml">
+]>
+<test>
+ <p>&first;</p>
+ <p>&second;</p>
+</test>
diff --git a/tests/run_tests.py b/tests/run_tests.py
index 165d71c..48bd25f 100644
--- a/tests/run_tests.py
+++ b/tests/run_tests.py
@@ -38,6 +38,9 @@ class ItstoolTests(unittest.TestCase):
'out' : os.path.join('tests', "test.pot"),
'in' : os.path.join('tests', start_file),
}, expected_status)
+ # If we expected a failure, don't keep checking stuff
+ if expected_status != 0:
+ return result
# If a reference pot file is present, test the output with this file
if reference_pot is None:
reference_pot = start_file_base + ".pot"
@@ -180,18 +183,23 @@ class ItstoolTests(unittest.TestCase):
res = self._test_pot_generation('IT-malformed.xml', expected_status=1)
#self.assertTrue("libxml2.parserError" in res['errors'])
- def test_IT_malformed(self):
+ def test_IT_translate_with_external_dtds_malformed(self):
""" Test that parsing XML requiring external DTD generates exception """
res = self._test_pot_generation('IT-uses-external-dtds.xml', expected_status=1)
- def test_IT_malformed(self):
- """ Test that parsing XML requiring external DTD generates exception """
- res = self._test_pot_generation('IT-uses-external-dtds.xml', expected_status=0,
- options='--load-dtd')
-
def test_IT_translate_with_external_dtds(self):
self._test_translation_process('IT-uses-external-dtds.xml', options='--load-dtd')
+ # FIXME: It would be nice to be able to do this without loading the
+ # external subset, but libxml2 seems to verify entity references even
+ # if it doesn't do substitution.
+ def test_IT_keep_entities_1(self):
+ self._test_translation_process('IT-keep-entities-1.xml',
+ options='--load-dtd --keep-entities')
+
+ def test_IT_keep_entities_2(self):
+ self._test_translation_process('IT-keep-entities-2.xml', options='--keep-entities')
+
def test_IT_join_1(self):
res = self._test_translation_join('IT-join-1.xml', ('cs', 'de', 'fr'))