From 7060380d577690a40ebc201c0725076349e977cd Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sat, 14 Oct 2017 14:21:59 +0900 Subject: bpo-31672: Fix string.Template accidentally matched non-ASCII identifiers (GH-3872) Pattern `[a-z]` with `IGNORECASE` flag can match to some non-ASCII characters. Straightforward solution for this is using `IGNORECASE | ASCII` flag. But users may subclass `Template` and override only `idpattern`. So we want to avoid changing `Template.flags`. So this commit uses local flag `-i` for `idpattern` and change `[a-z]` to `[a-zA-Z]`. (cherry picked from commit b22273ec5d1992b0cbe078b887427ae9977dfb78) --- Lib/string.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'Lib/string.py') diff --git a/Lib/string.py b/Lib/string.py index c902007643..670c1951a8 100644 --- a/Lib/string.py +++ b/Lib/string.py @@ -78,7 +78,11 @@ class Template(metaclass=_TemplateMetaclass): """A string class for supporting $-substitutions.""" delimiter = '$' - idpattern = r'[_a-z][_a-z0-9]*' + # r'[a-z]' matches to non-ASCII letters when used with IGNORECASE, + # but without ASCII flag. We can't add re.ASCII to flags because of + # backward compatibility. So we use local -i flag and [a-zA-Z] pattern. + # See https://bugs.python.org/issue31672 + idpattern = r'(?-i:[_a-zA-Z][_a-zA-Z0-9]*)' flags = _re.IGNORECASE def __init__(self, template): -- cgit v1.2.1