summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Fedoseev <fedoseev.sergey@gmail.com>2018-09-12 03:47:59 +0500
committerMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2018-09-11 15:47:59 -0700
commitec014a101a7f6243b95dfc08acfe1542b9fa5d39 (patch)
tree62668429576eae74dd178a7bd0a718482500be28
parentd13e59c1b512069d90efe7ee9b613d3913e79c56 (diff)
downloadcpython-git-ec014a101a7f6243b95dfc08acfe1542b9fa5d39.tar.gz
bpo-34636: Use fast path for more chars in SRE category macros. (GH-9170)
When handling \s, \d, or \w (and their inverse) escapes in bytes regexes this a small but measurable performance improvement. <!-- issue-number: [bpo-34636](https://www.bugs.python.org/issue34636) --> https://bugs.python.org/issue34636 <!-- /issue-number -->
-rw-r--r--Misc/NEWS.d/next/Library/2018-09-11-15-04-05.bpo-34636.capCmt.rst2
-rw-r--r--Modules/_sre.c6
2 files changed, 5 insertions, 3 deletions
diff --git a/Misc/NEWS.d/next/Library/2018-09-11-15-04-05.bpo-34636.capCmt.rst b/Misc/NEWS.d/next/Library/2018-09-11-15-04-05.bpo-34636.capCmt.rst
new file mode 100644
index 0000000000..c982b0a4cd
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2018-09-11-15-04-05.bpo-34636.capCmt.rst
@@ -0,0 +1,2 @@
+Speed up re scanning of many non-matching characters for \s \w and \d within
+bytes objects. (microoptimization)
diff --git a/Modules/_sre.c b/Modules/_sre.c
index d67083037e..483cf5e9ff 100644
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -87,13 +87,13 @@ static const char copyright[] =
/* search engine state */
#define SRE_IS_DIGIT(ch)\
- ((ch) < 128 && Py_ISDIGIT(ch))
+ ((ch) <= '9' && Py_ISDIGIT(ch))
#define SRE_IS_SPACE(ch)\
- ((ch) < 128 && Py_ISSPACE(ch))
+ ((ch) <= ' ' && Py_ISSPACE(ch))
#define SRE_IS_LINEBREAK(ch)\
((ch) == '\n')
#define SRE_IS_WORD(ch)\
- ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_'))
+ ((ch) <= 'z' && (Py_ISALNUM(ch) || (ch) == '_'))
static unsigned int sre_lower_ascii(unsigned int ch)
{