summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>2016-03-19 22:50:29 +0000
committerptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>2016-03-19 22:50:29 +0000
commitf207b22ae55e457ca98b455804bcadf509cccd26 (patch)
tree4fcec2571f7a538e7bd29148d6f147f28fdfe24c
parent09ad6b4f0deccf6ae60e697117e3b24fc9cfc513 (diff)
downloadpyparsing-f207b22ae55e457ca98b455804bcadf509cccd26.tar.gz
Added default behavior to QuotedString to convert embedded '\t', '\n', etc. characters to their whitespace counterparts; added support in new init arg, convertWhitespaceEscapes.
git-svn-id: svn://svn.code.sf.net/p/pyparsing/code/trunk@330 9bf210a0-9d2d-494c-87cf-cfb32e7dff7b
-rw-r--r--src/CHANGES4
-rw-r--r--src/pyparsing.py17
-rw-r--r--src/update_pyparsing_timestamp.py17
3 files changed, 36 insertions, 2 deletions
diff --git a/src/CHANGES b/src/CHANGES
index f615f4d..2a2bd57 100644
--- a/src/CHANGES
+++ b/src/CHANGES
@@ -19,6 +19,10 @@ Version 2.1.1 -
an ambiguous signature for the generated parse action, which fails in
PyPy. Reported by Evan Hubinger, thanks Evan!
+- Added default behavior to QuotedString to convert embedded '\t', '\n',
+ etc. characters to their whitespace counterparts. Found during Q&A
+ exchange on SO with Maxim.
+
Version 2.1.0 - February, 2016
------------------------------
diff --git a/src/pyparsing.py b/src/pyparsing.py
index b1d5598..0e62423 100644
--- a/src/pyparsing.py
+++ b/src/pyparsing.py
@@ -58,7 +58,7 @@ The pyparsing module handles some of the problems that are typically vexing when
"""
__version__ = "2.1.1"
-__versionTime__ = "5 Mar 2016 23:42"
+__versionTime__ = "19 Mar 2016 22:48 UTC"
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
import string
@@ -1941,7 +1941,7 @@ class Regex(Token):
class QuotedString(Token):
"""Token for matching strings that are delimited by quoting characters.
"""
- def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
+ def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
"""
Defined with the following parameters:
- quoteChar - string of one or more characters defining the quote delimiting string
@@ -1950,6 +1950,7 @@ class QuotedString(Token):
- multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
- unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
- endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
+ - convertWhitespaceEscapes - convert escaped whitespace ('\t', '\n', etc.) to actual whitespace (default=C{True})
"""
super(QuotedString,self).__init__()
@@ -1975,6 +1976,7 @@ class QuotedString(Token):
self.escChar = escChar
self.escQuote = escQuote
self.unquoteResults = unquoteResults
+ self.convertWhitespaceEscapes = convertWhitespaceEscapes
if multiline:
self.flags = re.MULTILINE | re.DOTALL
@@ -2028,6 +2030,17 @@ class QuotedString(Token):
ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
if isinstance(ret,basestring):
+ # replace escaped whitespace
+ if '\\' in ret and self.convertWhitespaceEscapes:
+ ws_map = {
+ r'\t' : '\t',
+ r'\n' : '\n',
+ r'\f' : '\f',
+ r'\r' : '\r',
+ }
+ for wslit,wschar in ws_map.items():
+ ret = ret.replace(wslit, wschar)
+
# replace escaped characters
if self.escChar:
ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
diff --git a/src/update_pyparsing_timestamp.py b/src/update_pyparsing_timestamp.py
new file mode 100644
index 0000000..80ea3d4
--- /dev/null
+++ b/src/update_pyparsing_timestamp.py
@@ -0,0 +1,17 @@
+from pyparsing import quotedString
+from datetime import datetime
+
+nw = datetime.utcnow()
+nowstring = '"%s"' % (nw.strftime("%d %b %Y %X")[:-3] + " UTC")
+print (nowstring)
+
+quoted_time = quotedString()
+quoted_time.setParseAction(lambda: nowstring)
+
+version_time = "__versionTime__ = " + quoted_time
+with open('pyparsing.py') as oldpp:
+ new_code = version_time.transformString(oldpp.read())
+
+with open('pyparsing.py','w') as newpp:
+ newpp.write(new_code)
+