diff options
author | Yu-Jie Lin <livibetter@gmail.com> | 2016-12-25 14:02:32 +0800 |
---|---|---|
committer | Yu-Jie Lin <livibetter@gmail.com> | 2016-12-25 14:02:32 +0800 |
commit | 3e3853d07b688a5fdeaddf3a1cdca237011ce3b5 (patch) | |
tree | eaa633bce33aeb4859296d01c4f77f11a93f6a33 | |
parent | 95e665d583fb7eb8fe1d9bf73ed7360022b6c2ad (diff) | |
download | smartypants-git-3e3853d07b688a5fdeaddf3a1cdca237011ce3b5.tar.gz |
add Attr.u (Unicode) and Attr.h (HTML named entities) output options (#6)
-rw-r--r-- | CHANGES.rst | 4 | ||||
-rwxr-xr-x | smartypants.py | 69 | ||||
-rw-r--r-- | tests/test.py | 16 |
3 files changed, 68 insertions, 21 deletions
diff --git a/CHANGES.rst b/CHANGES.rst index 2cf15cf..195f75c 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -58,6 +58,10 @@ Development - drop fooBarXyz functions, such as ``smartyPants``, ``educateQuotes``, and ``processEscapes`` ++ add ``Attr.u`` and ``Attr.h`` for Unicode characters and HTML named entities + outputs, respectively. The ``stupefy_entities`` has become + ``convert_entities`` to support all three types of conversions. (#6) + * Makefile + test packages build in ``test_setup`` target diff --git a/smartypants.py b/smartypants.py index 975297b..7078547 100755 --- a/smartypants.py +++ b/smartypants.py @@ -92,13 +92,28 @@ class _Attr(object): regular quotes so SmartyPants can educate them. """ - s = 1 << 8 + u = 0 << 9 | 1 << 8 """ - Stupefy mode. Reverses the SmartyPants transformation process, turning - the HTML entities produced by SmartyPants into their ASCII equivalents. - E.g. ``“`` is turned into a simple double-quote ("), ``—`` is - turned into two dashes, etc. + Output Unicode characters instead of numeric character references, for + example, from ``“`` to left double quotation mark (``“``) (U+201C). + + .. seealso:: :func:`convert_entities` + """ + h = 1 << 9 | 0 << 8 + """ + Output HTML named entities instead of numeric character references, for + example, from ``“`` to ``“``. + + .. seealso:: :func:`convert_entities` + """ + s = 1 << 9 | 1 << 8 """ + Output ASCII equivalents instead of numeric character references, for + example, from ``—`` to ``--``. + + .. seealso:: :func:`convert_entities` + """ + mask_o = u | h | s set0 = 0 "suppress all transformations. (Do nothing.)" @@ -183,7 +198,7 @@ def smartypants(text, attr=None): do_backticks = attr & Attr.mask_b do_dashes = attr & Attr.mask_d do_ellipses = attr & Attr.e - do_stupefy = attr & Attr.s + do_entities = attr & Attr.mask_o convert_quot = attr & Attr.w tokens = _tokenize(text) @@ -267,8 +282,12 @@ def smartypants(text, attr=None): # Normal case: t = convert_quotes(t) - if do_stupefy: - t = stupefy_entities(t) + if do_entities: + mode = (0 if do_entities == Attr.u else + 1 if do_entities == Attr.h else + 2 if do_entities == Attr.s else + 3) # would result in key error + t = convert_entities(t, mode) prev_token_last_char = last_char result.append(t) @@ -464,24 +483,34 @@ def convert_ellipses(text): return text -def stupefy_entities(text): +def convert_entities(text, mode): """ - Convert SmartyPants HTML entities in *text* into their ASCII counterparts. + Convert numeric character references to, if *mode* is + + - *0*: Unicode characters + - *1*: HTML named entities + - *2*: ASCII equivalents - >>> print(stupefy_entities('“Hello — world.”')) + >>> print(convert_entities('‘', 0)) + ‘ + >>> print(convert_entities('‘SmartyPants’', 1)) + ‘SmartyPants’ + >>> print(convert_entities('“Hello — world.”', 2)) "Hello -- world." """ - text = re.sub('–', '-', text) # en-dash - text = re.sub('—', '--', text) # em-dash - - text = re.sub('‘', "'", text) # open single quote - text = re.sub('’', "'", text) # close single quote - - text = re.sub('“', '"', text) # open double quote - text = re.sub('”', '"', text) # close double quote + CTBL = { + '–': ('–', '–', '-'), + '—': ('—', '—', '--'), + '‘': ('‘', '‘', "'"), + '’': ('’', '’', "'"), + '“': ('“', '“', '"'), + '”': ('”', '”', '"'), + '…': ('…', '…', '...'), + } - text = re.sub('…', '...', text) # ellipsis + for k, v in CTBL.items(): + text = text.replace(k, v[mode]) return text diff --git a/tests/test.py b/tests/test.py index c9451ac..a16178b 100644 --- a/tests/test.py +++ b/tests/test.py @@ -1,5 +1,6 @@ #!/usr/bin/env python -# Copyright (c) 2013 Yu-Jie Lin +# -*- coding: utf-8 -*- +# Copyright (c) 2013, 2016 Yu-Jie Lin # Licensed under the BSD License, for detailed license information, see COPYING import doctest @@ -133,6 +134,19 @@ document.write('<a href="' + href + '">' + linktext + "</a>"); self.assertEqual(sp('"Isn\'t this fun?"'), '“Isn’t this fun?”') + def test_convert_entities(self): + + self.assertEqual(sp('"quote here"', Attr.set1 | Attr.u), + '“quote here”') + self.assertEqual(sp('"quote–here"', Attr.set1 | Attr.u), + '“quote–here”') + + self.assertEqual(sp('"quote here"', Attr.set1 | Attr.h), + '“quote here”') + + self.assertEqual(sp('"quote here"', Attr.set1 | Attr.s), + '"quote here"') + def load_tests(loader, tests, pattern): |