diff options
| author | Serhiy Storchaka <storchaka@gmail.com> | 2013-10-24 22:04:37 +0300 | 
|---|---|---|
| committer | Serhiy Storchaka <storchaka@gmail.com> | 2013-10-24 22:04:37 +0300 | 
| commit | 8b150ecfc9a57fb2d564381464bb04c9a94ee053 (patch) | |
| tree | 51011ee81a175f761a8ebb10a593f6d792945ba2 | |
| parent | e5019d5183041f4f75cf4a30b2dc84eed347425e (diff) | |
| parent | be80fc9a843e3c51d1030d3eab52d6287e5aef3a (diff) | |
| download | cpython-git-8b150ecfc9a57fb2d564381464bb04c9a94ee053.tar.gz | |
Issue #19327: Fixed the working of regular expressions with too big charset.
| -rw-r--r-- | Lib/sre_compile.py | 2 | ||||
| -rw-r--r-- | Lib/test/test_re.py | 3 | ||||
| -rw-r--r-- | Misc/NEWS | 2 | ||||
| -rw-r--r-- | Modules/_sre.c | 4 | 
4 files changed, 8 insertions, 3 deletions
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py index d6dc60c73e..e08ec661aa 100644 --- a/Lib/sre_compile.py +++ b/Lib/sre_compile.py @@ -339,7 +339,7 @@ def _optimize_unicode(charset, fixup):      else:          code = 'I'      # Convert block indices to byte array of 256 bytes -    mapping = array.array('b', mapping).tobytes() +    mapping = array.array('B', mapping).tobytes()      # Convert byte array to word array      mapping = array.array(code, mapping)      assert mapping.itemsize == _sre.CODESIZE diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 841d3a38c5..5e68585065 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -482,6 +482,9 @@ class ReTests(unittest.TestCase):                                    "\u2222").group(1), "\u2222")          self.assertEqual(re.match("([\u2222\u2223])",                                    "\u2222", re.UNICODE).group(1), "\u2222") +        r = '[%s]' % ''.join(map(chr, range(256, 2**16, 255))) +        self.assertEqual(re.match(r, +                                  "\uff01", re.UNICODE).group(), "\uff01")      def test_big_codesize(self):          # Issue #1160 @@ -19,6 +19,8 @@ Core and Builtins  Library  ------- +- Issue #19327: Fixed the working of regular expressions with too big charset. +  - Issue #17400: New 'is_global' attribute for ipaddress to tell if an address    is allocated by IANA for global or private networks. diff --git a/Modules/_sre.c b/Modules/_sre.c index 84330ef715..dd9d6645d7 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -447,7 +447,7 @@ SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)              count = *(set++);              if (sizeof(SRE_CODE) == 2) { -                block = ((char*)set)[ch >> 8]; +                block = ((unsigned char*)set)[ch >> 8];                  set += 128;                  if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))                      return ok; @@ -457,7 +457,7 @@ SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)                  /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids                   * warnings when c's type supports only numbers < N+1 */                  if (!(ch & ~65535)) -                    block = ((char*)set)[ch >> 8]; +                    block = ((unsigned char*)set)[ch >> 8];                  else                      block = -1;                  set += 64;  | 
