Removed support for c-style and embedded comments

author: Kevin Van Brunt <kmvanbrunt@gmail.com> 2019-03-01 18:34:39 -0500
committer: Kevin Van Brunt <kmvanbrunt@gmail.com> 2019-03-01 18:34:39 -0500
commit: de5213605d04ded441783ad77e87e91539ceee68 (patch)
tree: cf340688db1d8df5c0a9602fc872a3af721196bd /cmd2/parsing.py
parent: de701086ff832bad0f0d97ffb10c2159d56ede7d (diff)
download: cmd2-git-de5213605d04ded441783ad77e87e91539ceee68.tar.gz
1 files changed, 10 insertions, 44 deletions
diff --git a/cmd2/parsing.py b/cmd2/parsing.py
index d4f82ac9..bd3a6900 100644
--- a/cmd2/parsing.py
+++ b/cmd2/parsing.py
@@ -236,33 +236,6 @@ class StatementParser:
         else:
             self.shortcuts = shortcuts
 
-        # this regular expression matches C-style comments and quoted
-        # strings, i.e. stuff between single or double quote marks
-        # it's used with _comment_replacer() to strip out the C-style
-        # comments, while leaving C-style comments that are inside either
-        # double or single quotes.
-        #
-        # this big regular expression can be broken down into 3 regular
-        # expressions that are OR'ed together with a pipe character
-        #
-        # /\*.*\*/               Matches C-style comments (i.e. /* comment */)
-        #                        does not match unclosed comments.
-        # \'(?:\\.|[^\\\'])*\'   Matches a single quoted string, allowing
-        #                        for embedded backslash escaped single quote
-        #                        marks.
-        # "(?:\\.|[^\\"])*"      Matches a double quoted string, allowing
-        #                        for embedded backslash escaped double quote
-        #                        marks.
-        #
-        # by way of reminder the (?:...) regular expression syntax is just
-        # a non-capturing version of regular parenthesis. We need the non-
-        # capturing syntax because _comment_replacer() looks at match
-        # groups
-        self.comment_pattern = re.compile(
-            r'/\*.*\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',
-            re.DOTALL | re.MULTILINE
-        )
-
         # commands have to be a word, so make a regular expression
         # that matches the first word in the line. This regex has three
         # parts:
@@ -315,6 +288,9 @@ class StatementParser:
         if not word:
             return False, 'cannot be an empty string'
 
+        if word.startswith(constants.COMMENT_CHAR):
+            return False, 'cannot start with the comment character'
+
         for (shortcut, _) in self.shortcuts:
             if word.startswith(shortcut):
                 # Build an error string with all shortcuts listed
@@ -338,24 +314,23 @@ class StatementParser:
     def tokenize(self, line: str) -> List[str]:
         """Lex a string into a list of tokens.
 
-        Comments are removed, and shortcuts and aliases are expanded.
+        shortcuts and aliases are expanded and comments are removed
 
         Raises ValueError if there are unclosed quotation marks.
         """
 
-        # strip C-style comments
-        # shlex will handle the python/shell style comments for us
-        line = re.sub(self.comment_pattern, self._comment_replacer, line)
-
         # expand shortcuts and aliases
         line = self._expand(line)
 
+        # check if this line is a comment
+        if line.strip().startswith(constants.COMMENT_CHAR):
+            return []
+
         # split on whitespace
-        lexer = shlex.shlex(line, posix=False)
-        lexer.whitespace_split = True
+        tokens = shlex.split(line, comments=False, posix=False)
 
         # custom lexing
-        tokens = self._split_on_punctuation(list(lexer))
+        tokens = self._split_on_punctuation(tokens)
         return tokens
 
     def parse(self, line: str) -> Statement:
@@ -610,15 +585,6 @@ class StatementParser:
 
         return command, args
 
-    @staticmethod
-    def _comment_replacer(match):
-        matched_string = match.group(0)
-        if matched_string.startswith('/'):
-            # the matched string was a comment, so remove it
-            return ''
-        # the matched string was a quoted string, return the match
-        return matched_string
-
     def _split_on_punctuation(self, tokens: List[str]) -> List[str]:
         """Further splits tokens from a command line using punctuation characters
author	Kevin Van Brunt <kmvanbrunt@gmail.com>	2019-03-01 18:34:39 -0500
committer	Kevin Van Brunt <kmvanbrunt@gmail.com>	2019-03-01 18:34:39 -0500
commit	de5213605d04ded441783ad77e87e91539ceee68 (patch)
tree	cf340688db1d8df5c0a9602fc872a3af721196bd /cmd2/parsing.py
parent	de701086ff832bad0f0d97ffb10c2159d56ede7d (diff)
download	cmd2-git-de5213605d04ded441783ad77e87e91539ceee68.tar.gz