diff options
Diffstat (limited to 'Lib/reconvert.py')
-rwxr-xr-x | Lib/reconvert.py | 192 |
1 files changed, 0 insertions, 192 deletions
diff --git a/Lib/reconvert.py b/Lib/reconvert.py deleted file mode 100755 index 64bab5b272..0000000000 --- a/Lib/reconvert.py +++ /dev/null @@ -1,192 +0,0 @@ -#! /usr/bin/env python - -r"""Convert old ("regex") regular expressions to new syntax ("re"). - -When imported as a module, there are two functions, with their own -strings: - - convert(s, syntax=None) -- convert a regex regular expression to re syntax - - quote(s) -- return a quoted string literal - -When used as a script, read a Python string literal (or any other -expression evaluating to a string) from stdin, and write the -translated expression to stdout as a string literal. Unless stdout is -a tty, no trailing \n is written to stdout. This is done so that it -can be used with Emacs C-U M-| (shell-command-on-region with argument -which filters the region through the shell command). - -No attempt has been made at coding for performance. - -Translation table... - - \( ( (unless RE_NO_BK_PARENS set) - \) ) (unless RE_NO_BK_PARENS set) - \| | (unless RE_NO_BK_VBAR set) - \< \b (not quite the same, but alla...) - \> \b (not quite the same, but alla...) - \` \A - \' \Z - -Not translated... - - . - ^ - $ - * - + (unless RE_BK_PLUS_QM set, then to \+) - ? (unless RE_BK_PLUS_QM set, then to \?) - \ - \b - \B - \w - \W - \1 ... \9 - -Special cases... - - Non-printable characters are always replaced by their 3-digit - escape code (except \t, \n, \r, which use mnemonic escapes) - - Newline is turned into | when RE_NEWLINE_OR is set - -XXX To be done... - - [...] (different treatment of backslashed items?) - [^...] (different treatment of backslashed items?) - ^ $ * + ? (in some error contexts these are probably treated differently) - \vDD \DD (in the regex docs but only works when RE_ANSI_HEX set) - -""" - - -import warnings -warnings.filterwarnings("ignore", ".* regex .*", DeprecationWarning, __name__, - append=1) - -import regex -from regex_syntax import * # RE_* - -__all__ = ["convert","quote"] - -# Default translation table -mastertable = { - r'\<': r'\b', - r'\>': r'\b', - r'\`': r'\A', - r'\'': r'\Z', - r'\(': '(', - r'\)': ')', - r'\|': '|', - '(': r'\(', - ')': r'\)', - '|': r'\|', - '\t': r'\t', - '\n': r'\n', - '\r': r'\r', -} - - -def convert(s, syntax=None): - """Convert a regex regular expression to re syntax. - - The first argument is the regular expression, as a string object, - just like it would be passed to regex.compile(). (I.e., pass the - actual string object -- string quotes must already have been - removed and the standard escape processing has already been done, - e.g. by eval().) - - The optional second argument is the regex syntax variant to be - used. This is an integer mask as passed to regex.set_syntax(); - the flag bits are defined in regex_syntax. When not specified, or - when None is given, the current regex syntax mask (as retrieved by - regex.get_syntax()) is used -- which is 0 by default. - - The return value is a regular expression, as a string object that - could be passed to re.compile(). (I.e., no string quotes have - been added -- use quote() below, or repr().) - - The conversion is not always guaranteed to be correct. More - syntactical analysis should be performed to detect borderline - cases and decide what to do with them. For example, 'x*?' is not - translated correctly. - - """ - table = mastertable.copy() - if syntax is None: - syntax = regex.get_syntax() - if syntax & RE_NO_BK_PARENS: - del table[r'\('], table[r'\)'] - del table['('], table[')'] - if syntax & RE_NO_BK_VBAR: - del table[r'\|'] - del table['|'] - if syntax & RE_BK_PLUS_QM: - table['+'] = r'\+' - table['?'] = r'\?' - table[r'\+'] = '+' - table[r'\?'] = '?' - if syntax & RE_NEWLINE_OR: - table['\n'] = '|' - res = "" - - i = 0 - end = len(s) - while i < end: - c = s[i] - i = i+1 - if c == '\\': - c = s[i] - i = i+1 - key = '\\' + c - key = table.get(key, key) - res = res + key - else: - c = table.get(c, c) - res = res + c - return res - - -def quote(s, quote=None): - """Convert a string object to a quoted string literal. - - This is similar to repr() but will return a "raw" string (r'...' - or r"...") when the string contains backslashes, instead of - doubling all backslashes. The resulting string does *not* always - evaluate to the same string as the original; however it will do - just the right thing when passed into re.compile(). - - The optional second argument forces the string quote; it must be - a single character which is a valid Python string quote. - - """ - if quote is None: - q = "'" - altq = "'" - if q in s and altq not in s: - q = altq - else: - assert quote in ('"', "'", '"""', "'''") - q = quote - res = q - for c in s: - if c == q: c = '\\' + c - elif c < ' ' or c > '~': c = "\\%03o" % ord(c) - res = res + c - res = res + q - if '\\' in res: - res = 'r' + res - return res - - -def main(): - """Main program -- called when run as a script.""" - import sys - s = eval(sys.stdin.read()) - sys.stdout.write(quote(convert(s))) - if sys.stdout.isatty(): - sys.stdout.write("\n") - - -if __name__ == '__main__': - main() |