diff options
| author | goodger <goodger@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2016-12-16 03:45:07 +0000 |
|---|---|---|
| committer | goodger <goodger@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2016-12-16 03:45:07 +0000 |
| commit | 710aed2c2e714215e48ff0365d42888dfc5e013a (patch) | |
| tree | 90ff05e29e0d3f3e0000d2c886a9926c2c6a0484 /docutils/utils | |
| parent | 7323a5f7dfb095665c074795c782dad5c151944b (diff) | |
| download | docutils-710aed2c2e714215e48ff0365d42888dfc5e013a.tar.gz | |
Added functionality (plus tests & docs): escaped whitespace in URI contexts.
git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@7998 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'docutils/utils')
| -rw-r--r-- | docutils/utils/__init__.py | 13 |
1 files changed, 12 insertions, 1 deletions
diff --git a/docutils/utils/__init__.py b/docutils/utils/__init__.py index 57ce83abb..919722943 100644 --- a/docutils/utils/__init__.py +++ b/docutils/utils/__init__.py @@ -13,6 +13,7 @@ import sys import os import os.path import re +import itertools import warnings import unicodedata from docutils import ApplicationError, DataError @@ -575,7 +576,7 @@ def escape2null(text): parts.append('\x00' + text[found+1:found+2]) start = found + 2 # skip character after escape -def unescape(text, restore_backslashes=False): +def unescape(text, restore_backslashes=False, respect_whitespace=False): """ Return a string with nulls removed or restored to backslashes. Backslash-escaped spaces are also removed. @@ -587,6 +588,16 @@ def unescape(text, restore_backslashes=False): text = ''.join(text.split(sep)) return text +def split_escaped_whitespace(text): + """ + Split `text` on escaped whitespace (null+space or null+newline). + Return a list of strings. + """ + strings = text.split('\x00 ') + strings = [string.split('\x00\n') for string in strings] + # flatten list of lists of strings to list of strings: + return list(itertools.chain(*strings)) + def strip_combining_chars(text): if isinstance(text, str) and sys.version_info < (3,0): return text |
