summaryrefslogtreecommitdiff
path: root/Doc/lib/libreconvert.tex
blob: 29c6e528b21df886f0dfaf6676251f1e4f03e491 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
\section{\module{reconvert} ---
         Convert regular expressions from regex to re form}
\declaremodule{standard}{reconvert}
\moduleauthor{Andrew M. Kuchling}{amk@amk.ca}
\sectionauthor{Skip Montanaro}{skip@pobox.com}


\modulesynopsis{Convert regex-, emacs- or sed-style regular expressions
to re-style syntax.}


This module provides a facility to convert regular expressions from the
syntax used by the deprecated \module{regex} module to those used by the
newer \module{re} module.  Because of similarity between the regular
expression syntax of \code{sed(1)} and \code{emacs(1)} and the
\module{regex} module, it is also helpful to convert patterns written for
those tools to \module{re} patterns.

When used as a script, a Python string literal (or any other expression
evaluating to a string) is read from stdin, and the translated expression is
written to stdout as a string literal.  Unless stdout is a tty, no trailing
newline is written to stdout.  This is done so that it can be used with
Emacs \code{C-U M-|} (shell-command-on-region) which filters the region
through the shell command.

\begin{seealso}
  \seetitle{Mastering Regular Expressions}{Book on regular expressions
            by Jeffrey Friedl, published by O'Reilly.  The second 
            edition of the book no longer covers Python at all, 
            but the first edition covered writing good regular expression
            patterns in great detail.}
\end{seealso}

\subsection{Module Contents}
\nodename{Contents of Module reconvert}

The module defines two functions and a handful of constants.

\begin{funcdesc}{convert}{pattern\optional{, syntax=None}}
 Convert a \var{pattern} representing a \module{regex}-stype regular
 expression into a \module{re}-style regular expression.  The optional
 \var{syntax} parameter is a bitwise-or'd set of flags that control what
 constructs are converted.  See below for a description of the various
 constants.
\end{funcdesc}

\begin{funcdesc}{quote}{s\optional{, quote=None}}
 Convert a string object to a quoted string literal.

 This is similar to \function{repr} but will return a "raw" string (r'...'
 or r"...") when the string contains backslashes, instead of doubling all
 backslashes.  The resulting string does not always evaluate to the same
 string as the original; however it will do just the right thing when passed
 into re.compile().

 The optional second argument forces the string quote; it must be a single
 character which is a valid Python string quote.  Note that prior to Python
 2.5 this would not accept triple-quoted string delimiters.
\end{funcdesc}

\begin{datadesc}{RE_NO_BK_PARENS}
 Suppress paren conversion.  This should be omitted when converting
 \code{sed}-style or \code{emacs}-style regular expressions.
\end{datadesc}

\begin{datadesc}{RE_NO_BK_VBAR}
 Suppress vertical bar conversion.  This should be omitted when converting
 \code{sed}-style or \code{emacs}-style regular expressions.
\end{datadesc}

\begin{datadesc}{RE_BK_PLUS_QM}
 Enable conversion of \code{+} and \code{?} characters.  This should be
 added to the \var{syntax} arg of \function{convert} when converting
 \code{sed}-style regular expressions and omitted when converting
 \code{emacs}-style regular expressions.
\end{datadesc}

\begin{datadesc}{RE_NEWLINE_OR}
 When set, newline characters are replaced by \code{|}.
\end{datadesc}