diff options
author | Sergey Fedoseev <fedoseev.sergey@gmail.com> | 2018-09-12 03:47:59 +0500 |
---|---|---|
committer | Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> | 2018-09-11 15:47:59 -0700 |
commit | ec014a101a7f6243b95dfc08acfe1542b9fa5d39 (patch) | |
tree | 62668429576eae74dd178a7bd0a718482500be28 | |
parent | d13e59c1b512069d90efe7ee9b613d3913e79c56 (diff) | |
download | cpython-git-ec014a101a7f6243b95dfc08acfe1542b9fa5d39.tar.gz |
bpo-34636: Use fast path for more chars in SRE category macros. (GH-9170)
When handling \s, \d, or \w (and their inverse) escapes in bytes regexes this a small but measurable performance improvement.
<!-- issue-number: [bpo-34636](https://www.bugs.python.org/issue34636) -->
https://bugs.python.org/issue34636
<!-- /issue-number -->
-rw-r--r-- | Misc/NEWS.d/next/Library/2018-09-11-15-04-05.bpo-34636.capCmt.rst | 2 | ||||
-rw-r--r-- | Modules/_sre.c | 6 |
2 files changed, 5 insertions, 3 deletions
diff --git a/Misc/NEWS.d/next/Library/2018-09-11-15-04-05.bpo-34636.capCmt.rst b/Misc/NEWS.d/next/Library/2018-09-11-15-04-05.bpo-34636.capCmt.rst new file mode 100644 index 0000000000..c982b0a4cd --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-09-11-15-04-05.bpo-34636.capCmt.rst @@ -0,0 +1,2 @@ +Speed up re scanning of many non-matching characters for \s \w and \d within +bytes objects. (microoptimization) diff --git a/Modules/_sre.c b/Modules/_sre.c index d67083037e..483cf5e9ff 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -87,13 +87,13 @@ static const char copyright[] = /* search engine state */ #define SRE_IS_DIGIT(ch)\ - ((ch) < 128 && Py_ISDIGIT(ch)) + ((ch) <= '9' && Py_ISDIGIT(ch)) #define SRE_IS_SPACE(ch)\ - ((ch) < 128 && Py_ISSPACE(ch)) + ((ch) <= ' ' && Py_ISSPACE(ch)) #define SRE_IS_LINEBREAK(ch)\ ((ch) == '\n') #define SRE_IS_WORD(ch)\ - ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_')) + ((ch) <= 'z' && (Py_ISALNUM(ch) || (ch) == '_')) static unsigned int sre_lower_ascii(unsigned int ch) { |