summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormilde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2013-04-22 20:29:40 +0000
committermilde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2013-04-22 20:29:40 +0000
commit0297d105f5963cf4c74d6476629691bb23419eb2 (patch)
tree42e705e82039fd759bc1d110de1b99435176a2f7
parent854b2e9f1bfc86797b51a671f1f0b70090d1c8f6 (diff)
downloaddocutils-0297d105f5963cf4c74d6476629691bb23419eb2.tar.gz
Fix for changes in 7650.
git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@7651 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
-rw-r--r--docutils/utils/punctuation_chars.py120
1 files changed, 61 insertions, 59 deletions
diff --git a/docutils/utils/punctuation_chars.py b/docutils/utils/punctuation_chars.py
index f30768197..6bff11b71 100644
--- a/docutils/utils/punctuation_chars.py
+++ b/docutils/utils/punctuation_chars.py
@@ -38,6 +38,33 @@ closers = ur"""\"\'\)\>\]\}༻༽᚜⁆⁾₎〉❩❫❭❯❱❳❵⟆⟧⟩
delimiters = ur"\-\/\:֊־᐀᠆‐‑‒–—―⸗⸚〜〰゠︱︲﹘﹣-¡·¿;·՚՛՜՝՞՟։׀׃׆׳״؉؊،؍؛؞؟٪٫٬٭۔܀܁܂܃܄܅܆܇܈܉܊܋܌܍߷߸߹࠰࠱࠲࠳࠴࠵࠶࠷࠸࠹࠺࠻࠼࠽࠾।॥॰෴๏๚๛༄༅༆༇༈༉༊་༌།༎༏༐༑༒྅࿐࿑࿒࿓࿔၊။၌၍၎၏჻፡።፣፤፥፦፧፨᙭᙮᛫᛬᛭᜵᜶។៕៖៘៙៚᠀᠁᠂᠃᠄᠅᠇᠈᠉᠊᥄᥅᧞᧟᨞᨟᪠᪡᪢᪣᪤᪥᪦᪨᪩᪪᪫᪬᪭᭚᭛᭜᭝᭞᭟᭠᰻᰼᰽᰾᰿᱾᱿᳓‖‗†‡•‣․‥…‧‰‱′″‴‵‶‷‸※‼‽‾⁁⁂⁃⁇⁈⁉⁊⁋⁌⁍⁎⁏⁐⁑⁓⁕⁖⁗⁘⁙⁚⁛⁜⁝⁞⳹⳺⳻⳼⳾⳿⸀⸁⸆⸇⸈⸋⸎⸏⸐⸑⸒⸓⸔⸕⸖⸘⸙⸛⸞⸟⸪⸫⸬⸭⸮⸰⸱、。〃〽・꓾꓿꘍꘎꘏꙳꙾꛲꛳꛴꛵꛶꛷꡴꡵꡶꡷꣎꣏꣸꣹꣺꤮꤯꥟꧁꧂꧃꧄꧅꧆꧇꧈꧉꧊꧋꧌꧍꧞꧟꩜꩝꩞꩟꫞꫟꯫︐︑︒︓︔︕︖︙︰﹅﹆﹉﹊﹋﹌﹐﹑﹒﹔﹕﹖﹗﹟﹠﹡﹨﹪﹫!"#%&'*,./:;?@\。、・𐄀𐄁𐎟𐏐𐡗𐤟𐤿𐩐𐩑𐩒𐩓𐩔𐩕𐩖𐩗𐩘𐩿𐬹𐬺𐬻𐬼𐬽𐬾𐬿𑂻𑂼𑂾𑂿𑃀𑃁𒑰𒑱𒑲𒑳"
closing_delimiters = ur"\.\,\;\!\?"
+
+
+# Matching open/close quotes
+# --------------------------
+
+# Rule (5) requires determination of matching open/close pairs. However,
+# the pairing of open/close quotes is ambigue due to different typographic
+# conventions in different languages.
+
+quote_pairs = {u'\xbb': u'\xbb', # Swedish
+ u'\u2018': u'\u201a', # Greek
+ u'\u2019': u'\u2019', # Swedish
+ u'\u201a': u'\u2018\u2019', # German, Polish
+ u'\u201c': u'\u201e', # German
+ u'\u201e': u'\u201c\u201d',
+ u'\u201d': u'\u201d', # Swedish
+ u'\u203a': u'\u203a', # Swedish
+ }
+
+def match_chars(c1, c2):
+ try:
+ i = openers.index(c1)
+ except ValueError: # c1 not in openers
+ return False
+ return c2 == closers[i] or c2 in quote_pairs.get(c1, '')
+
+
# Running this file as a standalone module checks the definitions against a
# re-calculation::
@@ -156,75 +183,50 @@ if __name__ == '__main__':
for chars in (openers, closers, delimiters, closing_delimiters)]
-# Matching open/close quotes
-# --------------------------
-
-# Rule (5) requires determination of matching open/close pairs. However,
-# the pairing of open/close quotes is ambigue due to different typographic
-# conventions in different languages.
-
- quote_pairs = {u'\xbb': u'\xbb', # Swedish
- u'\u2018': u'\u201a', # Greek
- u'\u2019': u'\u2019', # Swedish
- u'\u201a': u'\u2018\u2019', # German, Polish
- u'\u201c': u'\u201e', # German
- u'\u201e': u'\u201c\u201d',
- u'\u201d': u'\u201d', # Swedish
- u'\u203a': u'\u203a', # Swedish
- }
-
- def match_chars(c1, c2):
- try:
- i = openers.index(c1)
- except ValueError: # c1 not in openers
- return False
- return c2 == closers[i] or c2 in quote_pairs.get(c1, '')
-
-
# print results
# =============
# (re) create and compare the samples:
- (o, c, d, cd) = punctuation_samples()
- if o != openers:
- print '- openers = ur"""%s"""' % openers.encode('utf8')
- print '+ openers = ur"""%s"""' % o.encode('utf8')
- if c != closers:
- print '- closers = ur"""%s"""' % closers.encode('utf8')
- print '+ closers = ur"""%s"""' % c.encode('utf8')
- if d != delimiters:
- print '- delimiters = ur"%s"' % delimiters.encode('utf8')
- print '+ delimiters = ur"%s"' % d.encode('utf8')
- if cd != closing_delimiters:
- print '- closing_delimiters = ur"%s"' % closing_delimiters.encode('utf8')
- print '+ closing_delimiters = ur"%s"' % cd.encode('utf8')
+ (o, c, d, cd) = punctuation_samples()
+ if o != openers:
+ print '- openers = ur"""%s"""' % openers.encode('utf8')
+ print '+ openers = ur"""%s"""' % o.encode('utf8')
+ if c != closers:
+ print '- closers = ur"""%s"""' % closers.encode('utf8')
+ print '+ closers = ur"""%s"""' % c.encode('utf8')
+ if d != delimiters:
+ print '- delimiters = ur"%s"' % delimiters.encode('utf8')
+ print '+ delimiters = ur"%s"' % d.encode('utf8')
+ if cd != closing_delimiters:
+ print '- closing_delimiters = ur"%s"' % closing_delimiters.encode('utf8')
+ print '+ closing_delimiters = ur"%s"' % cd.encode('utf8')
# test prints
- # print 'openers = ', repr(openers)
- # print 'closers = ', repr(closers)
- # print 'delimiters = ', repr(delimiters)
- # print 'closing_delimiters = ', repr(closing_delimiters)
+ # print 'openers = ', repr(openers)
+ # print 'closers = ', repr(closers)
+ # print 'delimiters = ', repr(delimiters)
+ # print 'closing_delimiters = ', repr(closing_delimiters)
- # ucharlists = unicode_charlists(unicode_punctuation_categories)
- # for cat, chars in ucharlists.items():
- # # print cat, chars
- # # compact output (visible with a comprehensive font):
- # print (u":%s: %s" % (cat, u''.join(chars))).encode('utf8')
+ # ucharlists = unicode_charlists(unicode_punctuation_categories)
+ # for cat, chars in ucharlists.items():
+ # # print cat, chars
+ # # compact output (visible with a comprehensive font):
+ # print (u":%s: %s" % (cat, u''.join(chars))).encode('utf8')
# verbose print
- print 'openers:'
- for ch in openers:
- print ch.encode('utf8'), unicodedata.name(ch)
- print 'closers:'
- for ch in closers:
- print ch.encode('utf8'), unicodedata.name(ch)
- print 'delimiters:'
- for ch in delimiters:
- print ch.encode('utf8'), unicodedata.name(ch)
- print 'closing_delimiters:'
- for ch in closing_delimiters:
- print ch.encode('utf8'), unicodedata.name(ch)
+ print 'openers:'
+ for ch in openers:
+ print ch.encode('utf8'), unicodedata.name(ch)
+ print 'closers:'
+ for ch in closers:
+ print ch.encode('utf8'), unicodedata.name(ch)
+ print 'delimiters:'
+ for ch in delimiters:
+ print ch.encode('utf8'), unicodedata.name(ch)
+ print 'closing_delimiters:'
+ for ch in closing_delimiters:
+ print ch.encode('utf8'), unicodedata.name(ch)