summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xLib/tarfile.py30
-rw-r--r--Misc/NEWS.d/next/Library/2018-07-04-21-14-35.bpo-34043.0YJNq9.rst1
2 files changed, 13 insertions, 18 deletions
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index 59f044cc5a..ba3e95f281 100755
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -513,21 +513,10 @@ class _Stream:
raise StreamError("seeking backwards is not allowed")
return self.pos
- def read(self, size=None):
- """Return the next size number of bytes from the stream.
- If size is not defined, return all bytes of the stream
- up to EOF.
- """
- if size is None:
- t = []
- while True:
- buf = self._read(self.bufsize)
- if not buf:
- break
- t.append(buf)
- buf = b"".join(t)
- else:
- buf = self._read(size)
+ def read(self, size):
+ """Return the next size number of bytes from the stream."""
+ assert size is not None
+ buf = self._read(size)
self.pos += len(buf)
return buf
@@ -540,9 +529,14 @@ class _Stream:
c = len(self.dbuf)
t = [self.dbuf]
while c < size:
- buf = self.__read(self.bufsize)
- if not buf:
- break
+ # Skip underlying buffer to avoid unaligned double buffering.
+ if self.buf:
+ buf = self.buf
+ self.buf = b""
+ else:
+ buf = self.fileobj.read(self.bufsize)
+ if not buf:
+ break
try:
buf = self.cmp.decompress(buf)
except self.exception:
diff --git a/Misc/NEWS.d/next/Library/2018-07-04-21-14-35.bpo-34043.0YJNq9.rst b/Misc/NEWS.d/next/Library/2018-07-04-21-14-35.bpo-34043.0YJNq9.rst
new file mode 100644
index 0000000000..c035ba7275
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2018-07-04-21-14-35.bpo-34043.0YJNq9.rst
@@ -0,0 +1 @@
+Optimize tarfile uncompress performance about 15% when gzip is used.