diff options
author | Yu-Jie Lin <livibetter@gmail.com> | 2013-08-11 22:54:53 +0800 |
---|---|---|
committer | Yu-Jie Lin <livibetter@gmail.com> | 2013-08-11 22:54:53 +0800 |
commit | 1e1c267aa36619749a82cfbd62a4f7fa69e44cb1 (patch) | |
tree | ad885fe7339327cf0a83b2cdb1f31538eb20b8fa | |
parent | 7a6a404de3c0cdc15d9511c1596837e444c4a040 (diff) | |
download | smartypants-1e1c267aa36619749a82cfbd62a4f7fa69e44cb1.tar.gz |
tab to 4 spaces
-rwxr-xr-x | smartypants.py | 1074 |
1 files changed, 537 insertions, 537 deletions
diff --git a/smartypants.py b/smartypants.py index 783914a..cafc0e7 100755 --- a/smartypants.py +++ b/smartypants.py @@ -84,28 +84,28 @@ specify configuration options. Numeric values are the easiest way to configure SmartyPants' behavior: "0" - Suppress all transformations. (Do nothing.) + Suppress all transformations. (Do nothing.) "1" - Performs default SmartyPants transformations: quotes (including - \`\`backticks'' -style), em-dashes, and ellipses. "``--``" (dash dash) - is used to signify an em-dash; there is no support for en-dashes. + Performs default SmartyPants transformations: quotes (including + \`\`backticks'' -style), em-dashes, and ellipses. "``--``" (dash dash) + is used to signify an em-dash; there is no support for en-dashes. "2" - Same as smarty_pants="1", except that it uses the old-school typewriter - shorthand for dashes: "``--``" (dash dash) for en-dashes, "``---``" - (dash dash dash) - for em-dashes. + Same as smarty_pants="1", except that it uses the old-school typewriter + shorthand for dashes: "``--``" (dash dash) for en-dashes, "``---``" + (dash dash dash) + for em-dashes. "3" - Same as smarty_pants="2", but inverts the shorthand for dashes: - "``--``" (dash dash) for em-dashes, and "``---``" (dash dash dash) for - en-dashes. + Same as smarty_pants="2", but inverts the shorthand for dashes: + "``--``" (dash dash) for em-dashes, and "``---``" (dash dash dash) for + en-dashes. "-1" - Stupefy mode. Reverses the SmartyPants transformation process, turning - the HTML entities produced by SmartyPants into their ASCII equivalents. - E.g. "“" is turned into a simple double-quote ("), "—" is - turned into two dashes, etc. + Stupefy mode. Reverses the SmartyPants transformation process, turning + the HTML entities produced by SmartyPants into their ASCII equivalents. + E.g. "“" is turned into a simple double-quote ("), "—" is + turned into two dashes, etc. The following single-character attribute values can be combined to toggle @@ -116,41 +116,41 @@ example, to educate normal quotes and em-dashes, but not ellipses or ``py['smartypants_attributes'] = "1"`` "q" - Educates normal quote characters: (") and ('). + Educates normal quote characters: (") and ('). "b" - Educates \`\`backticks'' -style double quotes. + Educates \`\`backticks'' -style double quotes. "B" - Educates \`\`backticks'' -style double quotes and \`single' quotes. + Educates \`\`backticks'' -style double quotes and \`single' quotes. "d" - Educates em-dashes. + Educates em-dashes. "D" - Educates em-dashes and en-dashes, using old-school typewriter shorthand: - (dash dash) for en-dashes, (dash dash dash) for em-dashes. + Educates em-dashes and en-dashes, using old-school typewriter shorthand: + (dash dash) for en-dashes, (dash dash dash) for em-dashes. "i" - Educates em-dashes and en-dashes, using inverted old-school typewriter - shorthand: (dash dash) for em-dashes, (dash dash dash) for en-dashes. + Educates em-dashes and en-dashes, using inverted old-school typewriter + shorthand: (dash dash) for em-dashes, (dash dash dash) for en-dashes. "e" - Educates ellipses. + Educates ellipses. "w" - Translates any instance of ``"`` into a normal double-quote character. - This should be of no interest to most people, but of particular interest - to anyone who writes their posts using Dreamweaver, as Dreamweaver - inexplicably uses this entity to represent a literal double-quote - character. SmartyPants only educates normal quotes, not entities (because - ordinarily, entities are used for the explicit purpose of representing the - specific character they represent). The "w" option must be used in - conjunction with one (or both) of the other quote options ("q" or "b"). - Thus, if you wish to apply all SmartyPants transformations (quotes, en- - and em-dashes, and ellipses) and also translate ``"`` entities into - regular quotes so SmartyPants can educate them, you should pass the - following to the smarty_pants attribute: + Translates any instance of ``"`` into a normal double-quote character. + This should be of no interest to most people, but of particular interest + to anyone who writes their posts using Dreamweaver, as Dreamweaver + inexplicably uses this entity to represent a literal double-quote + character. SmartyPants only educates normal quotes, not entities (because + ordinarily, entities are used for the explicit purpose of representing the + specific character they represent). The "w" option must be used in + conjunction with one (or both) of the other quote options ("q" or "b"). + Thus, if you wish to apply all SmartyPants transformations (quotes, en- + and em-dashes, and ellipses) and also translate ``"`` entities into + regular quotes so SmartyPants can educate them, you should pass the + following to the smarty_pants attribute: The ``smartypants_forbidden_flavours`` list contains pyblosxom flavours for which no Smarty Pants rendering will occur. @@ -229,40 +229,40 @@ Version History =============== 1.5_1.7: Fri, 09 Aug 2013 07:34:16 -0400 - - Add HBS language translation. Patch by by Vera Djuraskovic from - Webhostinggeeks.com - - Add Python3 support. + - Add HBS language translation. Patch by by Vera Djuraskovic from + Webhostinggeeks.com + - Add Python3 support. 1.5_1.6: Fri, 27 Jul 2007 07:06:40 -0400 - - Fixed bug where blocks of precious unalterable text was instead - interpreted. Thanks to Le Roux and Dirk van Oosterbosch. - + - Fixed bug where blocks of precious unalterable text was instead + interpreted. Thanks to Le Roux and Dirk van Oosterbosch. + 1.5_1.5: Sat, 13 Aug 2005 15:50:24 -0400 - - Fix bogus magical quotation when there is no hint that the - user wants it, e.g., in "21st century". Thanks to Nathan Hamblen. - - Be smarter about quotes before terminating numbers in an en-dash'ed - range. + - Fix bogus magical quotation when there is no hint that the + user wants it, e.g., in "21st century". Thanks to Nathan Hamblen. + - Be smarter about quotes before terminating numbers in an en-dash'ed + range. 1.5_1.4: Thu, 10 Feb 2005 20:24:36 -0500 - - Fix a date-processing bug, as reported by jacob childress. - - Begin a test-suite for ensuring correct output. - - Removed import of "string", since I didn't really need it. - (This was my first every Python program. Sue me!) + - Fix a date-processing bug, as reported by jacob childress. + - Begin a test-suite for ensuring correct output. + - Removed import of "string", since I didn't really need it. + (This was my first every Python program. Sue me!) 1.5_1.3: Wed, 15 Sep 2004 18:25:58 -0400 - - Abort processing if the flavour is in forbidden-list. Default of - [ "rss" ] (Idea of Wolfgang SCHNERRING.) - - Remove stray virgules from en-dashes. Patch by Wolfgang SCHNERRING. + - Abort processing if the flavour is in forbidden-list. Default of + [ "rss" ] (Idea of Wolfgang SCHNERRING.) + - Remove stray virgules from en-dashes. Patch by Wolfgang SCHNERRING. 1.5_1.2: Mon, 24 May 2004 08:14:54 -0400 - - Some single quotes weren't replaced properly. Diff-tesuji played - by Benjamin GEIGER. + - Some single quotes weren't replaced properly. Diff-tesuji played + by Benjamin GEIGER. 1.5_1.1: Sun, 14 Mar 2004 14:38:28 -0500 - - Support upcoming pyblosxom 0.9 plugin verification feature. + - Support upcoming pyblosxom 0.9 plugin verification feature. 1.5_1.0: Tue, 09 Mar 2004 08:08:35 -0500 - - Initial release + - Initial release Version Information ------------------- @@ -305,66 +305,66 @@ Copyright and License SmartyPants_ license:: - Copyright (c) 2003 John Gruber - (http://daringfireball.net/) - All rights reserved. + Copyright (c) 2003 John Gruber + (http://daringfireball.net/) + All rights reserved. - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. - * Neither the name "SmartyPants" nor the names of its contributors - may be used to endorse or promote products derived from this - software without specific prior written permission. + * Neither the name "SmartyPants" nor the names of its contributors + may be used to endorse or promote products derived from this + software without specific prior written permission. - This software is provided by the copyright holders and contributors "as - is" and any express or implied warranties, including, but not limited - to, the implied warranties of merchantability and fitness for a - particular purpose are disclaimed. In no event shall the copyright - owner or contributors be liable for any direct, indirect, incidental, - special, exemplary, or consequential damages (including, but not - limited to, procurement of substitute goods or services; loss of use, - data, or profits; or business interruption) however caused and on any - theory of liability, whether in contract, strict liability, or tort - (including negligence or otherwise) arising in any way out of the use - of this software, even if advised of the possibility of such damage. + This software is provided by the copyright holders and contributors "as + is" and any express or implied warranties, including, but not limited + to, the implied warranties of merchantability and fitness for a + particular purpose are disclaimed. In no event shall the copyright + owner or contributors be liable for any direct, indirect, incidental, + special, exemplary, or consequential damages (including, but not + limited to, procurement of substitute goods or services; loss of use, + data, or profits; or business interruption) however caused and on any + theory of liability, whether in contract, strict liability, or tort + (including negligence or otherwise) arising in any way out of the use + of this software, even if advised of the possibility of such damage. smartypants.py license:: - smartypants.py is a derivative work of SmartyPants. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: + smartypants.py is a derivative work of SmartyPants. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. - This software is provided by the copyright holders and contributors "as - is" and any express or implied warranties, including, but not limited - to, the implied warranties of merchantability and fitness for a - particular purpose are disclaimed. In no event shall the copyright - owner or contributors be liable for any direct, indirect, incidental, - special, exemplary, or consequential damages (including, but not - limited to, procurement of substitute goods or services; loss of use, - data, or profits; or business interruption) however caused and on any - theory of liability, whether in contract, strict liability, or tort - (including negligence or otherwise) arising in any way out of the use - of this software, even if advised of the possibility of such damage. + This software is provided by the copyright holders and contributors "as + is" and any express or implied warranties, including, but not limited + to, the implied warranties of merchantability and fitness for a + particular purpose are disclaimed. In no event shall the copyright + owner or contributors be liable for any direct, indirect, incidental, + special, exemplary, or consequential damages (including, but not + limited to, procurement of substitute goods or services; loss of use, + data, or profits; or business interruption) however caused and on any + theory of liability, whether in contract, strict liability, or tort + (including negligence or otherwise) arising in any way out of the use + of this software, even if advised of the possibility of such damage. @@ -392,516 +392,516 @@ tags_to_skip_regex = re.compile(r"<(/)?(pre|code|kbd|script|math)[^>]*>", re.I) def verify_installation(request): - return 1 - # assert the plugin is functional + return 1 + # assert the plugin is functional def cb_story(args): - global default_smartypants_attr + global default_smartypants_attr - try: - forbidden_flavours = args["entry"]["smartypants_forbidden_flavours"] - except KeyError: - forbidden_flavours = [ "rss" ] + try: + forbidden_flavours = args["entry"]["smartypants_forbidden_flavours"] + except KeyError: + forbidden_flavours = [ "rss" ] - try: - attributes = args["entry"]["smartypants_attributes"] - except KeyError: - attributes = default_smartypants_attr + try: + attributes = args["entry"]["smartypants_attributes"] + except KeyError: + attributes = default_smartypants_attr - if attributes is None: - attributes = default_smartypants_attr + if attributes is None: + attributes = default_smartypants_attr - entryData = args["entry"].getData() + entryData = args["entry"].getData() - try: - if args["request"]["flavour"] in forbidden_flavours: - return - except KeyError: - if "<" in args["entry"]["body"][0:15]: # sniff the stream - return # abort if it looks like escaped HTML. FIXME + try: + if args["request"]["flavour"] in forbidden_flavours: + return + except KeyError: + if "<" in args["entry"]["body"][0:15]: # sniff the stream + return # abort if it looks like escaped HTML. FIXME - # FIXME: make these configurable, perhaps? - args["entry"]["body"] = smartyPants(entryData, attributes) - args["entry"]["title"] = smartyPants(args["entry"]["title"], attributes) + # FIXME: make these configurable, perhaps? + args["entry"]["body"] = smartyPants(entryData, attributes) + args["entry"]["title"] = smartyPants(args["entry"]["title"], attributes) ### interal functions below here def smartyPants(text, attr=default_smartypants_attr): - convert_quot = False # should we translate " entities into normal quotes? - - # Parse attributes: - # 0 : do nothing - # 1 : set all - # 2 : set all, using old school en- and em- dash shortcuts - # 3 : set all, using inverted old school en and em- dash shortcuts - # - # q : quotes - # b : backtick quotes (``double'' only) - # B : backtick quotes (``double'' and `single') - # d : dashes - # D : old school dashes - # i : inverted old school dashes - # e : ellipses - # w : convert " entities to " for Dreamweaver users - - skipped_tag_stack = [] - do_dashes = "0" - do_backticks = "0" - do_quotes = "0" - do_ellipses = "0" - do_stupefy = "0" - - if attr == "0": - # Do nothing. - return text - elif attr == "1": - do_quotes = "1" - do_backticks = "1" - do_dashes = "1" - do_ellipses = "1" - elif attr == "2": - # Do everything, turn all options on, use old school dash shorthand. - do_quotes = "1" - do_backticks = "1" - do_dashes = "2" - do_ellipses = "1" - elif attr == "3": - # Do everything, turn all options on, use inverted old school dash shorthand. - do_quotes = "1" - do_backticks = "1" - do_dashes = "3" - do_ellipses = "1" - elif attr == "-1": - # Special "stupefy" mode. - do_stupefy = "1" - else: - for c in attr: - if c == "q": do_quotes = "1" - elif c == "b": do_backticks = "1" - elif c == "B": do_backticks = "2" - elif c == "d": do_dashes = "1" - elif c == "D": do_dashes = "2" - elif c == "i": do_dashes = "3" - elif c == "e": do_ellipses = "1" - elif c == "w": convert_quot = "1" - else: - pass - # ignore unknown option - - tokens = _tokenize(text) - result = [] - in_pre = False - - prev_token_last_char = "" - # This is a cheat, used to get some context - # for one-character tokens that consist of - # just a quote char. What we do is remember - # the last character of the previous text - # token, to use as context to curl single- - # character quote tokens correctly. - - for cur_token in tokens: - if cur_token[0] == "tag": - # Don't mess with quotes inside some tags. This does not handle self <closing/> tags! - result.append(cur_token[1]) - skip_match = tags_to_skip_regex.match(cur_token[1]) - if skip_match is not None: - if not skip_match.group(1): - skipped_tag_stack.append(skip_match.group(2).lower()) - in_pre = True - else: - if len(skipped_tag_stack) > 0: - if skip_match.group(2).lower() == skipped_tag_stack[-1]: - skipped_tag_stack.pop() - else: - pass - # This close doesn't match the open. This isn't XHTML. We should barf here. - if len(skipped_tag_stack) == 0: - in_pre = False - else: - t = cur_token[1] - last_char = t[-1:] # Remember last char of this token before processing. - if not in_pre: - oldstr = t - t = processEscapes(t) - - if convert_quot != "0": - t = re.sub('"', '"', t) - - if do_dashes != "0": - if do_dashes == "1": - t = educateDashes(t) - if do_dashes == "2": - t = educateDashesOldSchool(t) - if do_dashes == "3": - t = educateDashesOldSchoolInverted(t) - - if do_ellipses != "0": - t = educateEllipses(t) - - # Note: backticks need to be processed before quotes. - if do_backticks != "0": - t = educateBackticks(t) - - if do_backticks == "2": - t = educateSingleBackticks(t) - - if do_quotes != "0": - if t == "'": - # Special case: single-character ' token - if re.match("\S", prev_token_last_char): - t = "’" - else: - t = "‘" - elif t == '"': - # Special case: single-character " token - if re.match("\S", prev_token_last_char): - t = "”" - else: - t = "“" - - else: - # Normal case: - t = educateQuotes(t) - - if do_stupefy == "1": - t = stupefyEntities(t) - - prev_token_last_char = last_char - result.append(t) - - return "".join(result) + convert_quot = False # should we translate " entities into normal quotes? + + # Parse attributes: + # 0 : do nothing + # 1 : set all + # 2 : set all, using old school en- and em- dash shortcuts + # 3 : set all, using inverted old school en and em- dash shortcuts + # + # q : quotes + # b : backtick quotes (``double'' only) + # B : backtick quotes (``double'' and `single') + # d : dashes + # D : old school dashes + # i : inverted old school dashes + # e : ellipses + # w : convert " entities to " for Dreamweaver users + + skipped_tag_stack = [] + do_dashes = "0" + do_backticks = "0" + do_quotes = "0" + do_ellipses = "0" + do_stupefy = "0" + + if attr == "0": + # Do nothing. + return text + elif attr == "1": + do_quotes = "1" + do_backticks = "1" + do_dashes = "1" + do_ellipses = "1" + elif attr == "2": + # Do everything, turn all options on, use old school dash shorthand. + do_quotes = "1" + do_backticks = "1" + do_dashes = "2" + do_ellipses = "1" + elif attr == "3": + # Do everything, turn all options on, use inverted old school dash shorthand. + do_quotes = "1" + do_backticks = "1" + do_dashes = "3" + do_ellipses = "1" + elif attr == "-1": + # Special "stupefy" mode. + do_stupefy = "1" + else: + for c in attr: + if c == "q": do_quotes = "1" + elif c == "b": do_backticks = "1" + elif c == "B": do_backticks = "2" + elif c == "d": do_dashes = "1" + elif c == "D": do_dashes = "2" + elif c == "i": do_dashes = "3" + elif c == "e": do_ellipses = "1" + elif c == "w": convert_quot = "1" + else: + pass + # ignore unknown option + + tokens = _tokenize(text) + result = [] + in_pre = False + + prev_token_last_char = "" + # This is a cheat, used to get some context + # for one-character tokens that consist of + # just a quote char. What we do is remember + # the last character of the previous text + # token, to use as context to curl single- + # character quote tokens correctly. + + for cur_token in tokens: + if cur_token[0] == "tag": + # Don't mess with quotes inside some tags. This does not handle self <closing/> tags! + result.append(cur_token[1]) + skip_match = tags_to_skip_regex.match(cur_token[1]) + if skip_match is not None: + if not skip_match.group(1): + skipped_tag_stack.append(skip_match.group(2).lower()) + in_pre = True + else: + if len(skipped_tag_stack) > 0: + if skip_match.group(2).lower() == skipped_tag_stack[-1]: + skipped_tag_stack.pop() + else: + pass + # This close doesn't match the open. This isn't XHTML. We should barf here. + if len(skipped_tag_stack) == 0: + in_pre = False + else: + t = cur_token[1] + last_char = t[-1:] # Remember last char of this token before processing. + if not in_pre: + oldstr = t + t = processEscapes(t) + + if convert_quot != "0": + t = re.sub('"', '"', t) + + if do_dashes != "0": + if do_dashes == "1": + t = educateDashes(t) + if do_dashes == "2": + t = educateDashesOldSchool(t) + if do_dashes == "3": + t = educateDashesOldSchoolInverted(t) + + if do_ellipses != "0": + t = educateEllipses(t) + + # Note: backticks need to be processed before quotes. + if do_backticks != "0": + t = educateBackticks(t) + + if do_backticks == "2": + t = educateSingleBackticks(t) + + if do_quotes != "0": + if t == "'": + # Special case: single-character ' token + if re.match("\S", prev_token_last_char): + t = "’" + else: + t = "‘" + elif t == '"': + # Special case: single-character " token + if re.match("\S", prev_token_last_char): + t = "”" + else: + t = "“" + + else: + # Normal case: + t = educateQuotes(t) + + if do_stupefy == "1": + t = stupefyEntities(t) + + prev_token_last_char = last_char + result.append(t) + + return "".join(result) def educateQuotes(str): - """ - Parameter: String. - - Returns: The string, with "educated" curly quote HTML entities. - - Example input: "Isn't this fun?" - Example output: “Isn’t this fun?” - """ - - oldstr = str - punct_class = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]""" - - # Special case if the very first character is a quote - # followed by punctuation at a non-word-break. Close the quotes by brute force: - str = re.sub(r"""^'(?=%s\\B)""" % (punct_class,), r"""’""", str) - str = re.sub(r"""^"(?=%s\\B)""" % (punct_class,), r"""”""", str) - - # Special case for double sets of quotes, e.g.: - # <p>He said, "'Quoted' words in a larger quote."</p> - str = re.sub(r""""'(?=\w)""", """“‘""", str) - str = re.sub(r"""'"(?=\w)""", """‘“""", str) - - # Special case for decade abbreviations (the '80s): - str = re.sub(r"""\b'(?=\d{2}s)""", r"""’""", str) - - close_class = r"""[^\ \t\r\n\[\{\(\-]""" - dec_dashes = r"""–|—""" - - # Get most opening single quotes: - opening_single_quotes_regex = re.compile(r""" - ( - \s | # a whitespace char, or - | # a non-breaking space entity, or - -- | # dashes, or - &[mn]dash; | # named dash entities - %s | # or decimal entities - &\#x201[34]; # or hex - ) - ' # the quote - (?=\w) # followed by a word character - """ % (dec_dashes,), re.VERBOSE) - str = opening_single_quotes_regex.sub(r"""\1‘""", str) - - closing_single_quotes_regex = re.compile(r""" - (%s) - ' - (?!\s | s\b | \d) - """ % (close_class,), re.VERBOSE) - str = closing_single_quotes_regex.sub(r"""\1’""", str) - - closing_single_quotes_regex = re.compile(r""" - (%s) - ' - (\s | s\b) - """ % (close_class,), re.VERBOSE) - str = closing_single_quotes_regex.sub(r"""\1’\2""", str) - - # Any remaining single quotes should be opening ones: - str = re.sub(r"""'""", r"""‘""", str) - - # Get most opening double quotes: - opening_double_quotes_regex = re.compile(r""" - ( - \s | # a whitespace char, or - | # a non-breaking space entity, or - -- | # dashes, or - &[mn]dash; | # named dash entities - %s | # or decimal entities - &\#x201[34]; # or hex - ) - " # the quote - (?=\w) # followed by a word character - """ % (dec_dashes,), re.VERBOSE) - str = opening_double_quotes_regex.sub(r"""\1“""", str) - - # Double closing quotes: - closing_double_quotes_regex = re.compile(r""" - #(%s)? # character that indicates the quote should be closing - " - (?=\s) - """ % (close_class,), re.VERBOSE) - str = closing_double_quotes_regex.sub(r"""”""", str) - - closing_double_quotes_regex = re.compile(r""" - (%s) # character that indicates the quote should be closing - " - """ % (close_class,), re.VERBOSE) - str = closing_double_quotes_regex.sub(r"""\1”""", str) - - # Any remaining quotes should be opening ones. - str = re.sub(r'"', r"""“""", str) - - return str + """ + Parameter: String. + + Returns: The string, with "educated" curly quote HTML entities. + + Example input: "Isn't this fun?" + Example output: “Isn’t this fun?” + """ + + oldstr = str + punct_class = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]""" + + # Special case if the very first character is a quote + # followed by punctuation at a non-word-break. Close the quotes by brute force: + str = re.sub(r"""^'(?=%s\\B)""" % (punct_class,), r"""’""", str) + str = re.sub(r"""^"(?=%s\\B)""" % (punct_class,), r"""”""", str) + + # Special case for double sets of quotes, e.g.: + # <p>He said, "'Quoted' words in a larger quote."</p> + str = re.sub(r""""'(?=\w)""", """“‘""", str) + str = re.sub(r"""'"(?=\w)""", """‘“""", str) + + # Special case for decade abbreviations (the '80s): + str = re.sub(r"""\b'(?=\d{2}s)""", r"""’""", str) + + close_class = r"""[^\ \t\r\n\[\{\(\-]""" + dec_dashes = r"""–|—""" + + # Get most opening single quotes: + opening_single_quotes_regex = re.compile(r""" + ( + \s | # a whitespace char, or + | # a non-breaking space entity, or + -- | # dashes, or + &[mn]dash; | # named dash entities + %s | # or decimal entities + &\#x201[34]; # or hex + ) + ' # the quote + (?=\w) # followed by a word character + """ % (dec_dashes,), re.VERBOSE) + str = opening_single_quotes_regex.sub(r"""\1‘""", str) + + closing_single_quotes_regex = re.compile(r""" + (%s) + ' + (?!\s | s\b | \d) + """ % (close_class,), re.VERBOSE) + str = closing_single_quotes_regex.sub(r"""\1’""", str) + + closing_single_quotes_regex = re.compile(r""" + (%s) + ' + (\s | s\b) + """ % (close_class,), re.VERBOSE) + str = closing_single_quotes_regex.sub(r"""\1’\2""", str) + + # Any remaining single quotes should be opening ones: + str = re.sub(r"""'""", r"""‘""", str) + + # Get most opening double quotes: + opening_double_quotes_regex = re.compile(r""" + ( + \s | # a whitespace char, or + | # a non-breaking space entity, or + -- | # dashes, or + &[mn]dash; | # named dash entities + %s | # or decimal entities + &\#x201[34]; # or hex + ) + " # the quote + (?=\w) # followed by a word character + """ % (dec_dashes,), re.VERBOSE) + str = opening_double_quotes_regex.sub(r"""\1“""", str) + + # Double closing quotes: + closing_double_quotes_regex = re.compile(r""" + #(%s)? # character that indicates the quote should be closing + " + (?=\s) + """ % (close_class,), re.VERBOSE) + str = closing_double_quotes_regex.sub(r"""”""", str) + + closing_double_quotes_regex = re.compile(r""" + (%s) # character that indicates the quote should be closing + " + """ % (close_class,), re.VERBOSE) + str = closing_double_quotes_regex.sub(r"""\1”""", str) + + # Any remaining quotes should be opening ones. + str = re.sub(r'"', r"""“""", str) + + return str def educateBackticks(str): - """ - Parameter: String. - Returns: The string, with ``backticks'' -style double quotes - translated into HTML curly quote entities. - Example input: ``Isn't this fun?'' - Example output: “Isn't this fun?” - """ + """ + Parameter: String. + Returns: The string, with ``backticks'' -style double quotes + translated into HTML curly quote entities. + Example input: ``Isn't this fun?'' + Example output: “Isn't this fun?” + """ - str = re.sub(r"""``""", r"""“""", str) - str = re.sub(r"""''""", r"""”""", str) - return str + str = re.sub(r"""``""", r"""“""", str) + str = re.sub(r"""''""", r"""”""", str) + return str def educateSingleBackticks(str): - """ - Parameter: String. - Returns: The string, with `backticks' -style single quotes - translated into HTML curly quote entities. - - Example input: `Isn't this fun?' - Example output: ‘Isn’t this fun?’ - """ + """ + Parameter: String. + Returns: The string, with `backticks' -style single quotes + translated into HTML curly quote entities. + + Example input: `Isn't this fun?' + Example output: ‘Isn’t this fun?’ + """ - str = re.sub(r"""`""", r"""‘""", str) - str = re.sub(r"""'""", r"""’""", str) - return str + str = re.sub(r"""`""", r"""‘""", str) + str = re.sub(r"""'""", r"""’""", str) + return str def educateDashes(str): - """ - Parameter: String. - - Returns: The string, with each instance of "--" translated to - an em-dash HTML entity. - """ + """ + Parameter: String. + + Returns: The string, with each instance of "--" translated to + an em-dash HTML entity. + """ - str = re.sub(r"""---""", r"""–""", str) # en (yes, backwards) - str = re.sub(r"""--""", r"""—""", str) # em (yes, backwards) - return str + str = re.sub(r"""---""", r"""–""", str) # en (yes, backwards) + str = re.sub(r"""--""", r"""—""", str) # em (yes, backwards) + return str def educateDashesOldSchool(str): - """ - Parameter: String. - - Returns: The string, with each instance of "--" translated to - an en-dash HTML entity, and each "---" translated to - an em-dash HTML entity. - """ + """ + Parameter: String. + + Returns: The string, with each instance of "--" translated to + an en-dash HTML entity, and each "---" translated to + an em-dash HTML entity. + """ - str = re.sub(r"""---""", r"""—""", str) # em (yes, backwards) - str = re.sub(r"""--""", r"""–""", str) # en (yes, backwards) - return str + str = re.sub(r"""---""", r"""—""", str) # em (yes, backwards) + str = re.sub(r"""--""", r"""–""", str) # en (yes, backwards) + return str def educateDashesOldSchoolInverted(str): - """ - Parameter: String. - - Returns: The string, with each instance of "--" translated to - an em-dash HTML entity, and each "---" translated to - an en-dash HTML entity. Two reasons why: First, unlike the - en- and em-dash syntax supported by - EducateDashesOldSchool(), it's compatible with existing - entries written before SmartyPants 1.1, back when "--" was - only used for em-dashes. Second, em-dashes are more - common than en-dashes, and so it sort of makes sense that - the shortcut should be shorter to type. (Thanks to Aaron - Swartz for the idea.) - """ - str = re.sub(r"""---""", r"""–""", str) # em - str = re.sub(r"""--""", r"""—""", str) # en - return str + """ + Parameter: String. + + Returns: The string, with each instance of "--" translated to + an em-dash HTML entity, and each "---" translated to + an en-dash HTML entity. Two reasons why: First, unlike the + en- and em-dash syntax supported by + EducateDashesOldSchool(), it's compatible with existing + entries written before SmartyPants 1.1, back when "--" was + only used for em-dashes. Second, em-dashes are more + common than en-dashes, and so it sort of makes sense that + the shortcut should be shorter to type. (Thanks to Aaron + Swartz for the idea.) + """ + str = re.sub(r"""---""", r"""–""", str) # em + str = re.sub(r"""--""", r"""—""", str) # en + return str def educateEllipses(str): - """ - Parameter: String. - Returns: The string, with each instance of "..." translated to - an ellipsis HTML entity. - - Example input: Huh...? - Example output: Huh…? - """ + """ + Parameter: String. + Returns: The string, with each instance of "..." translated to + an ellipsis HTML entity. + + Example input: Huh...? + Example output: Huh…? + """ - str = re.sub(r"""\.\.\.""", r"""…""", str) - str = re.sub(r"""\. \. \.""", r"""…""", str) - return str + str = re.sub(r"""\.\.\.""", r"""…""", str) + str = re.sub(r"""\. \. \.""", r"""…""", str) + return str def stupefyEntities(str): - """ - Parameter: String. - Returns: The string, with each SmartyPants HTML entity translated to - its ASCII counterpart. + """ + Parameter: String. + Returns: The string, with each SmartyPants HTML entity translated to + its ASCII counterpart. - Example input: “Hello — world.” - Example output: "Hello -- world." - """ + Example input: “Hello — world.” + Example output: "Hello -- world." + """ - str = re.sub(r"""–""", r"""-""", str) # en-dash - str = re.sub(r"""—""", r"""--""", str) # em-dash + str = re.sub(r"""–""", r"""-""", str) # en-dash + str = re.sub(r"""—""", r"""--""", str) # em-dash - str = re.sub(r"""‘""", r"""'""", str) # open single quote - str = re.sub(r"""’""", r"""'""", str) # close single quote + str = re.sub(r"""‘""", r"""'""", str) # open single quote + str = re.sub(r"""’""", r"""'""", str) # close single quote - str = re.sub(r"""“""", r'''"''', str) # open double quote - str = re.sub(r"""”""", r'''"''', str) # close double quote + str = re.sub(r"""“""", r'''"''', str) # open double quote + str = re.sub(r"""”""", r'''"''', str) # close double quote - str = re.sub(r"""…""", r"""...""", str)# ellipsis + str = re.sub(r"""…""", r"""...""", str)# ellipsis - return str + return str def processEscapes(str): - r""" - Parameter: String. - Returns: The string, with after processing the following backslash - escape sequences. This is useful if you want to force a "dumb" - quote or other character to appear. - - Escape Value - ------ ----- - \\ \ - \" " - \' ' - \. . - \- - - \` ` - """ - str = re.sub(r"""\\\\""", r"""\""", str) - str = re.sub(r'''\\"''', r""""""", str) - str = re.sub(r"""\\'""", r"""'""", str) - str = re.sub(r"""\\\.""", r""".""", str) - str = re.sub(r"""\\-""", r"""-""", str) - str = re.sub(r"""\\`""", r"""`""", str) - - return str + r""" + Parameter: String. + Returns: The string, with after processing the following backslash + escape sequences. This is useful if you want to force a "dumb" + quote or other character to appear. + + Escape Value + ------ ----- + \\ \ + \" " + \' ' + \. . + \- - + \` ` + """ + str = re.sub(r"""\\\\""", r"""\""", str) + str = re.sub(r'''\\"''', r""""""", str) + str = re.sub(r"""\\'""", r"""'""", str) + str = re.sub(r"""\\\.""", r""".""", str) + str = re.sub(r"""\\-""", r"""-""", str) + str = re.sub(r"""\\`""", r"""`""", str) + + return str def _tokenize(str): - """ - Parameter: String containing HTML markup. - Returns: Reference to an array of the tokens comprising the input - string. Each token is either a tag (possibly with nested, - tags contained therein, such as <a href="<MTFoo>">, or a - run of text between tags. Each element of the array is a - two-element array; the first is either 'tag' or 'text'; - the second is the actual value. - - Based on the _tokenize() subroutine from Brad Choate's MTRegex plugin. - <http://www.bradchoate.com/past/mtregex.php> - """ + """ + Parameter: String containing HTML markup. + Returns: Reference to an array of the tokens comprising the input + string. Each token is either a tag (possibly with nested, + tags contained therein, such as <a href="<MTFoo>">, or a + run of text between tags. Each element of the array is a + two-element array; the first is either 'tag' or 'text'; + the second is the actual value. + + Based on the _tokenize() subroutine from Brad Choate's MTRegex plugin. + <http://www.bradchoate.com/past/mtregex.php> + """ - pos = 0 - length = len(str) - tokens = [] + pos = 0 + length = len(str) + tokens = [] - depth = 6 - nested_tags = "|".join(['(?:<(?:[^<>]',] * depth) + (')*>)' * depth) - #match = r"""(?: <! ( -- .*? -- \s* )+ > ) | # comments - # (?: <\? .*? \?> ) | # directives - # %s # nested tags """ % (nested_tags,) - tag_soup = re.compile(r"""([^<]*)(<[^>]*>)""") + depth = 6 + nested_tags = "|".join(['(?:<(?:[^<>]',] * depth) + (')*>)' * depth) + #match = r"""(?: <! ( -- .*? -- \s* )+ > ) | # comments + # (?: <\? .*? \?> ) | # directives + # %s # nested tags """ % (nested_tags,) + tag_soup = re.compile(r"""([^<]*)(<[^>]*>)""") - token_match = tag_soup.search(str) + token_match = tag_soup.search(str) - previous_end = 0 - while token_match is not None: - if token_match.group(1): - tokens.append(['text', token_match.group(1)]) + previous_end = 0 + while token_match is not None: + if token_match.group(1): + tokens.append(['text', token_match.group(1)]) - tokens.append(['tag', token_match.group(2)]) + tokens.append(['tag', token_match.group(2)]) - previous_end = token_match.end() - token_match = tag_soup.search(str, token_match.end()) + previous_end = token_match.end() + token_match = tag_soup.search(str, token_match.end()) - if previous_end < len(str): - tokens.append(['text', str[previous_end:]]) + if previous_end < len(str): + tokens.append(['text', str[previous_end:]]) - return tokens + return tokens if __name__ == "__main__": - import locale + import locale - try: - locale.setlocale(locale.LC_ALL, '') - except: - pass + try: + locale.setlocale(locale.LC_ALL, '') + except: + pass - from docutils.core import publish_string - docstring_html = publish_string(__doc__, writer_name='html') + from docutils.core import publish_string + docstring_html = publish_string(__doc__, writer_name='html') - print(docstring_html) + print(docstring_html) - # Unit test output goes out stderr. No worries. - import unittest - sp = smartyPants + # Unit test output goes out stderr. No worries. + import unittest + sp = smartyPants - class TestSmartypantsAllAttributes(unittest.TestCase): - # the default attribute is "1", which means "all". + class TestSmartypantsAllAttributes(unittest.TestCase): + # the default attribute is "1", which means "all". - def test_dates(self): - self.assertEqual(sp("1440-80's"), "1440-80’s") - self.assertEqual(sp("1440-'80s"), "1440-‘80s") - self.assertEqual(sp("1440---'80s"), "1440–‘80s") - self.assertEqual(sp("1960s"), "1960s") # no effect. - self.assertEqual(sp("1960's"), "1960’s") - self.assertEqual(sp("one two '60s"), "one two ‘60s") - self.assertEqual(sp("'60s"), "‘60s") + def test_dates(self): + self.assertEqual(sp("1440-80's"), "1440-80’s") + self.assertEqual(sp("1440-'80s"), "1440-‘80s") + self.assertEqual(sp("1440---'80s"), "1440–‘80s") + self.assertEqual(sp("1960s"), "1960s") # no effect. + self.assertEqual(sp("1960's"), "1960’s") + self.assertEqual(sp("one two '60s"), "one two ‘60s") + self.assertEqual(sp("'60s"), "‘60s") - def test_skip_tags(self): - self.assertEqual( - sp("""<script type="text/javascript">\n<!--\nvar href = "http://www.google.com";\nvar linktext = "google";\ndocument.write('<a href="' + href + '">' + linktext + "</a>");\n//-->\n</script>"""), - """<script type="text/javascript">\n<!--\nvar href = "http://www.google.com";\nvar linktext = "google";\ndocument.write('<a href="' + href + '">' + linktext + "</a>");\n//-->\n</script>""") - self.assertEqual( - sp("""<p>He said "Let's write some code." This code here <code>if True:\n\tprint "Okay"</code> is python code.</p>"""), - """<p>He said “Let’s write some code.” This code here <code>if True:\n\tprint "Okay"</code> is python code.</p>""") + def test_skip_tags(self): + self.assertEqual( + sp("""<script type="text/javascript">\n<!--\nvar href = "http://www.google.com";\nvar linktext = "google";\ndocument.write('<a href="' + href + '">' + linktext + "</a>");\n//-->\n</script>"""), + """<script type="text/javascript">\n<!--\nvar href = "http://www.google.com";\nvar linktext = "google";\ndocument.write('<a href="' + href + '">' + linktext + "</a>");\n//-->\n</script>""") + self.assertEqual( + sp("""<p>He said "Let's write some code." This code here <code>if True:\n\tprint "Okay"</code> is python code.</p>"""), + """<p>He said “Let’s write some code.” This code here <code>if True:\n\tprint "Okay"</code> is python code.</p>""") - def test_ordinal_numbers(self): - self.assertEqual(sp("21st century"), "21st century") # no effect. - self.assertEqual(sp("3rd"), "3rd") # no effect. + def test_ordinal_numbers(self): + self.assertEqual(sp("21st century"), "21st century") # no effect. + self.assertEqual(sp("3rd"), "3rd") # no effect. - def test_educated_quotes(self): - self.assertEqual(sp('''"Isn't this fun?"'''), '''“Isn’t this fun?”''') + def test_educated_quotes(self): + self.assertEqual(sp('''"Isn't this fun?"'''), '''“Isn’t this fun?”''') - unittest.main() + unittest.main() |