summaryrefslogtreecommitdiff
path: root/debian/foo/debfile.py
blob: a728a77ef66525edb6c9692d6fe6421b221a7d13 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
# DebFile: a Python representation of Debian .deb binary packages.
# Copyright (C) 2007-2008   Stefano Zacchiroli  <zack@debian.org>
# Copyright (C) 2007        Filippo Giunchedi   <filippo@debian.org>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from __future__ import absolute_import, print_function

import gzip
import tarfile
import sys

from debian.arfile import ArFile, ArError
from debian.changelog import Changelog
from debian.deb822 import Deb822

DATA_PART = 'data.tar'      # w/o extension
CTRL_PART = 'control.tar'
PART_EXTS = ['gz', 'bz2']   # possible extensions
INFO_PART = 'debian-binary'
MAINT_SCRIPTS = ['preinst', 'postinst', 'prerm', 'postrm', 'config']

CONTROL_FILE = 'control'
CHANGELOG_NATIVE = 'usr/share/doc/%s/changelog.gz'  # with package stem
CHANGELOG_DEBIAN = 'usr/share/doc/%s/changelog.Debian.gz'
MD5_FILE = 'md5sums'


class DebError(ArError):
    pass


class DebPart(object):
    """'Part' of a .deb binary package.
    
    A .deb package is considered as made of 2 parts: a 'data' part
    (corresponding to the 'data.tar.gz' archive embedded in a .deb) and a
    'control' part (the 'control.tar.gz' archive). Each of them is represented
    by an instance of this class. Each archive should be a compressed tar
    archive; supported compression formats are: .tar.gz, .tar.bz2 .

    When referring to file members of the underlying .tar.gz archive, file
    names can be specified in one of 3 formats "file", "./file", "/file". In
    all cases the file is considered relative to the root of the archive. For
    the control part the preferred mechanism is the first one (as in
    deb.control.get_content('control') ); for the data part the preferred
    mechanism is the third one (as in deb.data.get_file('/etc/vim/vimrc') ).
    """

    def __init__(self, member):
        self.__member = member  # arfile.ArMember file member
        self.__tgz = None

    def tgz(self):
        """Return a TarFile object corresponding to this part of a .deb
        package.
        
        Despite the name, this method gives access to various kind of
        compressed tar archives, not only gzipped ones.
        """

        if self.__tgz is None:
            name = self.__member.name
            if name.endswith('.gz'):
                gz = gzip.GzipFile(fileobj=self.__member, mode='r')
                self.__tgz = tarfile.TarFile(fileobj=gz, mode='r')
            elif name.endswith('.bz2'):
                # Tarfile's __init__ doesn't allow for r:bz2 modes, but the
                # open() classmethod does ...
                self.__tgz = tarfile.open(fileobj=self.__member, mode='r:bz2')
            else:
                raise DebError("part '%s' has unexpected extension" % name)
        return self.__tgz

    @staticmethod
    def __normalize_member(fname):
        """ try (not so hard) to obtain a member file name in a form relative
        to the .tar.gz root and with no heading '.' """

        if fname.startswith('./'):
            fname = fname[2:]
        elif fname.startswith('/'):
            fname = fname[1:]
        return fname

    # XXX in some of the following methods, compatibility among >= 2.5 and <<
    # 2.5 python versions had to be taken into account. TarFile << 2.5 indeed
    # was buggied and returned member file names with an heading './' only for
    # the *first* file member. TarFile >= 2.5 fixed this and has the heading
    # './' for all file members.

    def has_file(self, fname):
        """Check if this part contains a given file name."""

        fname = DebPart.__normalize_member(fname)
        names = self.tgz().getnames()
        return (('./' + fname in names) \
                or (fname in names)) # XXX python << 2.5 TarFile compatibility

    def get_file(self, fname, encoding=None, errors=None):
        """Return a file object corresponding to a given file name.

        If encoding is given, then the file object will return Unicode data;
        otherwise, it will return binary data.
        """

        fname = DebPart.__normalize_member(fname)
        try:
            fobj = self.tgz().extractfile('./' + fname)
        except KeyError:    # XXX python << 2.5 TarFile compatibility
            fobj = self.tgz().extractfile(fname)
        if encoding is not None:
            if sys.version >= '3':
                import io
                if not hasattr(fobj, 'flush'):
                    # XXX http://bugs.python.org/issue13815
                    fobj.flush = lambda: None
                return io.TextIOWrapper(fobj, encoding=encoding, errors=errors)
            else:
                import codecs
                if errors is None:
                    errors = 'strict'
                return codecs.EncodedFile(fobj, encoding, errors=errors)
        else:
            return fobj

    def get_content(self, fname, encoding=None, errors=None):
        """Return the string content of a given file, or None (e.g. for
        directories).

        If encoding is given, then the content will be a Unicode object;
        otherwise, it will contain binary data.
        """

        f = self.get_file(fname, encoding=encoding, errors=errors)
        content = None
        if f:   # can be None for non regular or link files
            content = f.read()
            f.close()
        return content

    # container emulation

    def __iter__(self):
        return iter(self.tgz().getnames())

    def __contains__(self, fname):
        return self.has_file(fname)

    if sys.version < '3':
        def has_key(self, fname):
            return self.has_file(fname)

    def __getitem__(self, fname):
        return self.get_content(fname)

    def close(self):
        self.__member.close()


class DebData(DebPart):

    pass


class DebControl(DebPart):

    def scripts(self):
        """ Return a dictionary of maintainer scripts (postinst, prerm, ...)
        mapping script names to script text. """

        scripts = {}
        for fname in MAINT_SCRIPTS:
            if self.has_file(fname):
                scripts[fname] = self.get_content(fname)

        return scripts

    def debcontrol(self):
        """ Return the debian/control as a Deb822 (a Debian-specific dict-like
        class) object.
        
        For a string representation of debian/control try
        .get_content('control') """

        return Deb822(self.get_content(CONTROL_FILE))

    def md5sums(self, encoding=None, errors=None):
        """ Return a dictionary mapping filenames (of the data part) to
        md5sums. Fails if the control part does not contain a 'md5sum' file.

        Keys of the returned dictionary are the left-hand side values of lines
        in the md5sums member of control.tar.gz, usually file names relative to
        the file system root (without heading '/' or './').

        The returned keys are Unicode objects if an encoding is specified,
        otherwise binary. The returned values are always Unicode."""

        if not self.has_file(MD5_FILE):
            raise DebError("'%s' file not found, can't list MD5 sums" %
                    MD5_FILE)

        md5_file = self.get_file(MD5_FILE, encoding=encoding, errors=errors)
        sums = {}
        if encoding is None:
            newline = b'\r\n'
        else:
            newline = '\r\n'
        for line in md5_file.readlines():
            # we need to support spaces in filenames, .split() is not enough
            md5, fname = line.rstrip(newline).split(None, 1)
            if sys.version >= '3' and isinstance(md5, bytes):
                sums[fname] = md5.decode()
            else:
                sums[fname] = md5
        md5_file.close()
        return sums


class DebFile(ArFile):
    """Representation of a .deb file (a Debian binary package)

    DebFile objects have the following (read-only) properties:
        - version       debian .deb file format version (not related with the
                        contained package version), 2.0 at the time of writing
                        for all .deb packages in the Debian archive
        - data          DebPart object corresponding to the data.tar.gz (or
                        other compressed tar) archive contained in the .deb
                        file
        - control       DebPart object corresponding to the control.tar.gz (or
                        other compressed tar) archive contained in the .deb
                        file
    """

    def __init__(self, filename=None, mode='r', fileobj=None):
        ArFile.__init__(self, filename, mode, fileobj)
        actual_names = set(self.getnames())

        def compressed_part_name(basename):
            global PART_EXTS
            candidates = [ '%s.%s' % (basename, ext) for ext in PART_EXTS ]
            parts = actual_names.intersection(set(candidates))
            if not parts:
                raise DebError("missing required part in given .deb" \
                        " (expected one of: %s)" % candidates)
            elif len(parts) > 1:
                raise DebError("too many parts in given .deb" \
                        " (was looking for only one of: %s)" % candidates)
            else:   # singleton list
                return list(parts)[0]

        if not INFO_PART in actual_names:
            raise DebError("missing required part in given .deb" \
                    " (expected: '%s')" % INFO_PART)

        self.__parts = {}
        self.__parts[CTRL_PART] = DebControl(self.getmember(
                compressed_part_name(CTRL_PART)))
        self.__parts[DATA_PART] = DebData(self.getmember(
                compressed_part_name(DATA_PART)))
        self.__pkgname = None   # updated lazily by __updatePkgName

        f = self.getmember(INFO_PART)
        self.__version = f.read().strip()
        f.close()

    def __updatePkgName(self):
        self.__pkgname = self.debcontrol()['package']

    version = property(lambda self: self.__version)
    data = property(lambda self: self.__parts[DATA_PART])
    control = property(lambda self: self.__parts[CTRL_PART])

    # proxy methods for the appropriate parts

    def debcontrol(self):
        """ See .control.debcontrol() """
        return self.control.debcontrol()

    def scripts(self):
        """ See .control.scripts() """
        return self.control.scripts()

    def md5sums(self, encoding=None, errors=None):
        """ See .control.md5sums() """
        return self.control.md5sums(encoding=encoding, errors=errors)

    def changelog(self):
        """ Return a Changelog object for the changelog.Debian.gz of the
        present .deb package. Return None if no changelog can be found. """

        if self.__pkgname is None:
            self.__updatePkgName()

        for fname in [ CHANGELOG_DEBIAN % self.__pkgname,
                CHANGELOG_NATIVE % self.__pkgname ]:
            if self.data.has_file(fname):
                gz = gzip.GzipFile(fileobj=self.data.get_file(fname))
                raw_changelog = gz.read()
                gz.close()
                return Changelog(raw_changelog)
        return None

    def close(self):
        self.control.close()
        self.data.close()


if __name__ == '__main__':
    import sys
    deb = DebFile(filename=sys.argv[1])
    tgz = deb.control.tgz()
    print(tgz.getmember('control'))