summaryrefslogtreecommitdiff
path: root/vendor/Twisted-10.0.0/twisted/python/zipstream.py
blob: ad014c9046e01c44128539c375396b46e5dd1b72 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
# -*- test-case-name: twisted.python.test.test_zipstream -*-
# Copyright (c) 2001-2008 Twisted Matrix Laboratories.
# See LICENSE for details.

"""
An incremental approach to unzipping files.  This allows you to unzip a little
bit of a file at a time, which means you can report progress as a file unzips.
"""

import warnings
import zipfile
import os.path
import zlib
import struct

_fileHeaderSize = struct.calcsize(zipfile.structFileHeader)

class ChunkingZipFile(zipfile.ZipFile):
    """
    A ZipFile object which, with readfile(), also gives you access to a
    filelike object for each entry.
    """

    def readfile(self, name):
        """
        Return file-like object for name.
        """
        if self.mode not in ("r", "a"):
            raise RuntimeError('read() requires mode "r" or "a"')
        if not self.fp:
            raise RuntimeError(
                "Attempt to read ZIP archive that was already closed")
        zinfo = self.getinfo(name)

        self.fp.seek(zinfo.header_offset, 0)

        fheader = self.fp.read(_fileHeaderSize)
        if fheader[0:4] != zipfile.stringFileHeader:
            raise zipfile.BadZipfile("Bad magic number for file header")

        fheader = struct.unpack(zipfile.structFileHeader, fheader)
        fname = self.fp.read(fheader[zipfile._FH_FILENAME_LENGTH])

        if fheader[zipfile._FH_EXTRA_FIELD_LENGTH]:
            self.fp.read(fheader[zipfile._FH_EXTRA_FIELD_LENGTH])

        if fname != zinfo.orig_filename:
            raise zipfile.BadZipfile(
                'File name in directory "%s" and header "%s" differ.' % (
                    zinfo.orig_filename, fname))

        if zinfo.compress_type == zipfile.ZIP_STORED:
            return ZipFileEntry(self, zinfo.compress_size)
        elif zinfo.compress_type == zipfile.ZIP_DEFLATED:
            return DeflatedZipFileEntry(self, zinfo.compress_size)
        else:
            raise zipfile.BadZipfile(
                "Unsupported compression method %d for file %s" %
                    (zinfo.compress_type, name))



class _FileEntry(object):
    """
    Abstract superclass of both compressed and uncompressed variants of
    file-like objects within a zip archive.

    @ivar chunkingZipFile: a chunking zip file.
    @type chunkingZipFile: L{ChunkingZipFile}

    @ivar length: The number of bytes within the zip file that represent this
    file.  (This is the size on disk, not the number of decompressed bytes
    which will result from reading it.)

    @ivar fp: the underlying file object (that contains pkzip data).  Do not
    touch this, please.  It will quite likely move or go away.

    @ivar closed: File-like 'closed' attribute; True before this file has been
    closed, False after.
    @type closed: L{bool}

    @ivar finished: An older, broken synonym for 'closed'.  Do not touch this,
    please.
    @type finished: L{int}
    """
    def __init__(self, chunkingZipFile, length):
        """
        Create a L{_FileEntry} from a L{ChunkingZipFile}.
        """
        self.chunkingZipFile = chunkingZipFile
        self.fp = self.chunkingZipFile.fp
        self.length = length
        self.finished = 0
        self.closed = False


    def isatty(self):
        """
        Returns false because zip files should not be ttys
        """
        return False


    def close(self):
        """
        Close self (file-like object)
        """
        self.closed = True
        self.finished = 1
        del self.fp


    def readline(self):
        """
        Read a line.
        """
        bytes = ""
        for byte in iter(lambda : self.read(1), ""):
            bytes += byte
            if byte == "\n":
                break
        return bytes


    def next(self):
        """
        Implement next as file does (like readline, except raises StopIteration
        at EOF)
        """
        nextline = self.readline()
        if nextline:
            return nextline
        raise StopIteration()


    def readlines(self):
        """
        Returns a list of all the lines
        """
        return list(self)


    def xreadlines(self):
        """
        Returns an iterator (so self)
        """
        return self


    def __iter__(self):
        """
        Returns an iterator (so self)
        """
        return self



class ZipFileEntry(_FileEntry):
    """
    File-like object used to read an uncompressed entry in a ZipFile
    """

    def __init__(self, chunkingZipFile, length):
        _FileEntry.__init__(self, chunkingZipFile, length)
        self.readBytes = 0


    def tell(self):
        return self.readBytes


    def read(self, n=None):
        if n is None:
            n = self.length - self.readBytes
        if n == 0 or self.finished:
            return ''
        data = self.chunkingZipFile.fp.read(
            min(n, self.length - self.readBytes))
        self.readBytes += len(data)
        if self.readBytes == self.length or len(data) <  n:
            self.finished = 1
        return data



class DeflatedZipFileEntry(_FileEntry):
    """
    File-like object used to read a deflated entry in a ZipFile
    """

    def __init__(self, chunkingZipFile, length):
        _FileEntry.__init__(self, chunkingZipFile, length)
        self.returnedBytes = 0
        self.readBytes = 0
        self.decomp = zlib.decompressobj(-15)
        self.buffer = ""


    def tell(self):
        return self.returnedBytes


    def read(self, n=None):
        if self.finished:
            return ""
        if n is None:
            result = [self.buffer,]
            result.append(
                self.decomp.decompress(
                    self.chunkingZipFile.fp.read(
                        self.length - self.readBytes)))
            result.append(self.decomp.decompress("Z"))
            result.append(self.decomp.flush())
            self.buffer = ""
            self.finished = 1
            result = "".join(result)
            self.returnedBytes += len(result)
            return result
        else:
            while len(self.buffer) < n:
                data = self.chunkingZipFile.fp.read(
                    min(n, 1024, self.length - self.readBytes))
                self.readBytes += len(data)
                if not data:
                    result = (self.buffer
                              + self.decomp.decompress("Z")
                              + self.decomp.flush())
                    self.finished = 1
                    self.buffer = ""
                    self.returnedBytes += len(result)
                    return result
                else:
                    self.buffer += self.decomp.decompress(data)
            result = self.buffer[:n]
            self.buffer = self.buffer[n:]
            self.returnedBytes += len(result)
            return result



def unzip(filename, directory=".", overwrite=0):
    """
    Unzip the file

    @param filename: the name of the zip file
    @param directory: the directory into which the files will be
    extracted
    @param overwrite: if on, overwrite files when they exist.  You can
    still get an error if you try to create a directory over a file
    with the same name or vice-versa.
    """
    for i in unzipIter(filename, directory, overwrite):
        pass

DIR_BIT = 16

def unzipIter(filename, directory='.', overwrite=0):
    """
    Return a generator for the zipfile.  This implementation will yield
    after every file.

    The value it yields is the number of files left to unzip.
    """
    zf = zipfile.ZipFile(filename, 'r')
    names = zf.namelist()
    if not os.path.exists(directory):
        os.makedirs(directory)
    remaining = len(zf.namelist())
    for entry in names:
        remaining -= 1
        isdir = zf.getinfo(entry).external_attr & DIR_BIT
        f = os.path.join(directory, entry)
        if isdir:
            # overwrite flag only applies to files
            if not os.path.exists(f):
                os.makedirs(f)
        else:
            # create the directory the file will be in first,
            # since we can't guarantee it exists
            fdir = os.path.split(f)[0]
            if not os.path.exists(fdir):
                os.makedirs(fdir)
            if overwrite or not os.path.exists(f):
                outfile = file(f, 'wb')
                outfile.write(zf.read(entry))
                outfile.close()
        yield remaining


def countZipFileChunks(filename, chunksize):
    """
    Predict the number of chunks that will be extracted from the entire
    zipfile, given chunksize blocks.
    """
    totalchunks = 0
    zf = ChunkingZipFile(filename)
    for info in zf.infolist():
        totalchunks += countFileChunks(info, chunksize)
    return totalchunks


def countFileChunks(zipinfo, chunksize):
    """
    Count the number of chunks that will result from the given L{ZipInfo}.

    @param zipinfo: a L{zipfile.ZipInfo} instance describing an entry in a zip
    archive to be counted.

    @return: the number of chunks present in the zip file.  (Even an empty file
    counts as one chunk.)
    @rtype: L{int}
    """
    count, extra = divmod(zipinfo.file_size, chunksize)
    if extra > 0:
        count += 1
    return count or 1


def countZipFileEntries(filename):
    """
    Count the number of entries in a zip archive.  (Don't use this function.)

    @param filename: The filename of a zip archive.
    @type filename: L{str}
    """
    warnings.warn("countZipFileEntries is deprecated.",
                  DeprecationWarning, 2)
    zf = zipfile.ZipFile(filename)
    return len(zf.namelist())


def unzipIterChunky(filename, directory='.', overwrite=0,
                    chunksize=4096):
    """
    Return a generator for the zipfile.  This implementation will yield after
    every chunksize uncompressed bytes, or at the end of a file, whichever
    comes first.

    The value it yields is the number of chunks left to unzip.
    """
    czf = ChunkingZipFile(filename, 'r')
    if not os.path.exists(directory):
        os.makedirs(directory)
    remaining = countZipFileChunks(filename, chunksize)
    names = czf.namelist()
    infos = czf.infolist()

    for entry, info in zip(names, infos):
        isdir = info.external_attr & DIR_BIT
        f = os.path.join(directory, entry)
        if isdir:
            # overwrite flag only applies to files
            if not os.path.exists(f):
                os.makedirs(f)
            remaining -= 1
            yield remaining
        else:
            # create the directory the file will be in first,
            # since we can't guarantee it exists
            fdir = os.path.split(f)[0]
            if not os.path.exists(fdir):
                os.makedirs(fdir)
            if overwrite or not os.path.exists(f):
                outfile = file(f, 'wb')
                fp = czf.readfile(entry)
                if info.file_size == 0:
                    remaining -= 1
                    yield remaining
                while fp.tell() < info.file_size:
                    hunk = fp.read(chunksize)
                    outfile.write(hunk)
                    remaining -= 1
                    yield remaining
                outfile.close()
            else:
                remaining -= countFileChunks(info, chunksize)
                yield remaining