summaryrefslogtreecommitdiff
path: root/cmd2/parsing.py
diff options
context:
space:
mode:
authorKevin Van Brunt <kmvanbrunt@gmail.com>2019-03-01 18:34:39 -0500
committerKevin Van Brunt <kmvanbrunt@gmail.com>2019-03-01 18:34:39 -0500
commitde5213605d04ded441783ad77e87e91539ceee68 (patch)
treecf340688db1d8df5c0a9602fc872a3af721196bd /cmd2/parsing.py
parentde701086ff832bad0f0d97ffb10c2159d56ede7d (diff)
downloadcmd2-git-de5213605d04ded441783ad77e87e91539ceee68.tar.gz
Removed support for c-style and embedded comments
Diffstat (limited to 'cmd2/parsing.py')
-rw-r--r--cmd2/parsing.py54
1 files changed, 10 insertions, 44 deletions
diff --git a/cmd2/parsing.py b/cmd2/parsing.py
index d4f82ac9..bd3a6900 100644
--- a/cmd2/parsing.py
+++ b/cmd2/parsing.py
@@ -236,33 +236,6 @@ class StatementParser:
else:
self.shortcuts = shortcuts
- # this regular expression matches C-style comments and quoted
- # strings, i.e. stuff between single or double quote marks
- # it's used with _comment_replacer() to strip out the C-style
- # comments, while leaving C-style comments that are inside either
- # double or single quotes.
- #
- # this big regular expression can be broken down into 3 regular
- # expressions that are OR'ed together with a pipe character
- #
- # /\*.*\*/ Matches C-style comments (i.e. /* comment */)
- # does not match unclosed comments.
- # \'(?:\\.|[^\\\'])*\' Matches a single quoted string, allowing
- # for embedded backslash escaped single quote
- # marks.
- # "(?:\\.|[^\\"])*" Matches a double quoted string, allowing
- # for embedded backslash escaped double quote
- # marks.
- #
- # by way of reminder the (?:...) regular expression syntax is just
- # a non-capturing version of regular parenthesis. We need the non-
- # capturing syntax because _comment_replacer() looks at match
- # groups
- self.comment_pattern = re.compile(
- r'/\*.*\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',
- re.DOTALL | re.MULTILINE
- )
-
# commands have to be a word, so make a regular expression
# that matches the first word in the line. This regex has three
# parts:
@@ -315,6 +288,9 @@ class StatementParser:
if not word:
return False, 'cannot be an empty string'
+ if word.startswith(constants.COMMENT_CHAR):
+ return False, 'cannot start with the comment character'
+
for (shortcut, _) in self.shortcuts:
if word.startswith(shortcut):
# Build an error string with all shortcuts listed
@@ -338,24 +314,23 @@ class StatementParser:
def tokenize(self, line: str) -> List[str]:
"""Lex a string into a list of tokens.
- Comments are removed, and shortcuts and aliases are expanded.
+ shortcuts and aliases are expanded and comments are removed
Raises ValueError if there are unclosed quotation marks.
"""
- # strip C-style comments
- # shlex will handle the python/shell style comments for us
- line = re.sub(self.comment_pattern, self._comment_replacer, line)
-
# expand shortcuts and aliases
line = self._expand(line)
+ # check if this line is a comment
+ if line.strip().startswith(constants.COMMENT_CHAR):
+ return []
+
# split on whitespace
- lexer = shlex.shlex(line, posix=False)
- lexer.whitespace_split = True
+ tokens = shlex.split(line, comments=False, posix=False)
# custom lexing
- tokens = self._split_on_punctuation(list(lexer))
+ tokens = self._split_on_punctuation(tokens)
return tokens
def parse(self, line: str) -> Statement:
@@ -610,15 +585,6 @@ class StatementParser:
return command, args
- @staticmethod
- def _comment_replacer(match):
- matched_string = match.group(0)
- if matched_string.startswith('/'):
- # the matched string was a comment, so remove it
- return ''
- # the matched string was a quoted string, return the match
- return matched_string
-
def _split_on_punctuation(self, tokens: List[str]) -> List[str]:
"""Further splits tokens from a command line using punctuation characters