diff options
-rwxr-xr-x | Lib/tarfile.py | 30 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2018-07-04-21-14-35.bpo-34043.0YJNq9.rst | 1 |
2 files changed, 13 insertions, 18 deletions
diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 59f044cc5a..ba3e95f281 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -513,21 +513,10 @@ class _Stream: raise StreamError("seeking backwards is not allowed") return self.pos - def read(self, size=None): - """Return the next size number of bytes from the stream. - If size is not defined, return all bytes of the stream - up to EOF. - """ - if size is None: - t = [] - while True: - buf = self._read(self.bufsize) - if not buf: - break - t.append(buf) - buf = b"".join(t) - else: - buf = self._read(size) + def read(self, size): + """Return the next size number of bytes from the stream.""" + assert size is not None + buf = self._read(size) self.pos += len(buf) return buf @@ -540,9 +529,14 @@ class _Stream: c = len(self.dbuf) t = [self.dbuf] while c < size: - buf = self.__read(self.bufsize) - if not buf: - break + # Skip underlying buffer to avoid unaligned double buffering. + if self.buf: + buf = self.buf + self.buf = b"" + else: + buf = self.fileobj.read(self.bufsize) + if not buf: + break try: buf = self.cmp.decompress(buf) except self.exception: diff --git a/Misc/NEWS.d/next/Library/2018-07-04-21-14-35.bpo-34043.0YJNq9.rst b/Misc/NEWS.d/next/Library/2018-07-04-21-14-35.bpo-34043.0YJNq9.rst new file mode 100644 index 0000000000..c035ba7275 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-07-04-21-14-35.bpo-34043.0YJNq9.rst @@ -0,0 +1 @@ +Optimize tarfile uncompress performance about 15% when gzip is used. |