1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
|
# DebFile: a Python representation of Debian .deb binary packages.
# Copyright (C) 2007-2008 Stefano Zacchiroli <zack@debian.org>
# Copyright (C) 2007 Filippo Giunchedi <filippo@debian.org>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import absolute_import, print_function
import gzip
import tarfile
import sys
from debian.arfile import ArFile, ArError
from debian.changelog import Changelog
from debian.deb822 import Deb822
DATA_PART = 'data.tar' # w/o extension
CTRL_PART = 'control.tar'
PART_EXTS = ['gz', 'bz2'] # possible extensions
INFO_PART = 'debian-binary'
MAINT_SCRIPTS = ['preinst', 'postinst', 'prerm', 'postrm', 'config']
CONTROL_FILE = 'control'
CHANGELOG_NATIVE = 'usr/share/doc/%s/changelog.gz' # with package stem
CHANGELOG_DEBIAN = 'usr/share/doc/%s/changelog.Debian.gz'
MD5_FILE = 'md5sums'
class DebError(ArError):
pass
class DebPart(object):
"""'Part' of a .deb binary package.
A .deb package is considered as made of 2 parts: a 'data' part
(corresponding to the 'data.tar.gz' archive embedded in a .deb) and a
'control' part (the 'control.tar.gz' archive). Each of them is represented
by an instance of this class. Each archive should be a compressed tar
archive; supported compression formats are: .tar.gz, .tar.bz2 .
When referring to file members of the underlying .tar.gz archive, file
names can be specified in one of 3 formats "file", "./file", "/file". In
all cases the file is considered relative to the root of the archive. For
the control part the preferred mechanism is the first one (as in
deb.control.get_content('control') ); for the data part the preferred
mechanism is the third one (as in deb.data.get_file('/etc/vim/vimrc') ).
"""
def __init__(self, member):
self.__member = member # arfile.ArMember file member
self.__tgz = None
def tgz(self):
"""Return a TarFile object corresponding to this part of a .deb
package.
Despite the name, this method gives access to various kind of
compressed tar archives, not only gzipped ones.
"""
if self.__tgz is None:
name = self.__member.name
if name.endswith('.gz'):
gz = gzip.GzipFile(fileobj=self.__member, mode='r')
self.__tgz = tarfile.TarFile(fileobj=gz, mode='r')
elif name.endswith('.bz2'):
# Tarfile's __init__ doesn't allow for r:bz2 modes, but the
# open() classmethod does ...
self.__tgz = tarfile.open(fileobj=self.__member, mode='r:bz2')
else:
raise DebError("part '%s' has unexpected extension" % name)
return self.__tgz
@staticmethod
def __normalize_member(fname):
""" try (not so hard) to obtain a member file name in a form relative
to the .tar.gz root and with no heading '.' """
if fname.startswith('./'):
fname = fname[2:]
elif fname.startswith('/'):
fname = fname[1:]
return fname
# XXX in some of the following methods, compatibility among >= 2.5 and <<
# 2.5 python versions had to be taken into account. TarFile << 2.5 indeed
# was buggied and returned member file names with an heading './' only for
# the *first* file member. TarFile >= 2.5 fixed this and has the heading
# './' for all file members.
def has_file(self, fname):
"""Check if this part contains a given file name."""
fname = DebPart.__normalize_member(fname)
names = self.tgz().getnames()
return (('./' + fname in names) \
or (fname in names)) # XXX python << 2.5 TarFile compatibility
def get_file(self, fname, encoding=None, errors=None):
"""Return a file object corresponding to a given file name.
If encoding is given, then the file object will return Unicode data;
otherwise, it will return binary data.
"""
fname = DebPart.__normalize_member(fname)
try:
fobj = self.tgz().extractfile('./' + fname)
except KeyError: # XXX python << 2.5 TarFile compatibility
fobj = self.tgz().extractfile(fname)
if encoding is not None:
if sys.version >= '3':
import io
if not hasattr(fobj, 'flush'):
# XXX http://bugs.python.org/issue13815
fobj.flush = lambda: None
return io.TextIOWrapper(fobj, encoding=encoding, errors=errors)
else:
import codecs
if errors is None:
errors = 'strict'
return codecs.EncodedFile(fobj, encoding, errors=errors)
else:
return fobj
def get_content(self, fname, encoding=None, errors=None):
"""Return the string content of a given file, or None (e.g. for
directories).
If encoding is given, then the content will be a Unicode object;
otherwise, it will contain binary data.
"""
f = self.get_file(fname, encoding=encoding, errors=errors)
content = None
if f: # can be None for non regular or link files
content = f.read()
f.close()
return content
# container emulation
def __iter__(self):
return iter(self.tgz().getnames())
def __contains__(self, fname):
return self.has_file(fname)
if sys.version < '3':
def has_key(self, fname):
return self.has_file(fname)
def __getitem__(self, fname):
return self.get_content(fname)
def close(self):
self.__member.close()
class DebData(DebPart):
pass
class DebControl(DebPart):
def scripts(self):
""" Return a dictionary of maintainer scripts (postinst, prerm, ...)
mapping script names to script text. """
scripts = {}
for fname in MAINT_SCRIPTS:
if self.has_file(fname):
scripts[fname] = self.get_content(fname)
return scripts
def debcontrol(self):
""" Return the debian/control as a Deb822 (a Debian-specific dict-like
class) object.
For a string representation of debian/control try
.get_content('control') """
return Deb822(self.get_content(CONTROL_FILE))
def md5sums(self, encoding=None, errors=None):
""" Return a dictionary mapping filenames (of the data part) to
md5sums. Fails if the control part does not contain a 'md5sum' file.
Keys of the returned dictionary are the left-hand side values of lines
in the md5sums member of control.tar.gz, usually file names relative to
the file system root (without heading '/' or './').
The returned keys are Unicode objects if an encoding is specified,
otherwise binary. The returned values are always Unicode."""
if not self.has_file(MD5_FILE):
raise DebError("'%s' file not found, can't list MD5 sums" %
MD5_FILE)
md5_file = self.get_file(MD5_FILE, encoding=encoding, errors=errors)
sums = {}
if encoding is None:
newline = b'\r\n'
else:
newline = '\r\n'
for line in md5_file.readlines():
# we need to support spaces in filenames, .split() is not enough
md5, fname = line.rstrip(newline).split(None, 1)
if sys.version >= '3' and isinstance(md5, bytes):
sums[fname] = md5.decode()
else:
sums[fname] = md5
md5_file.close()
return sums
class DebFile(ArFile):
"""Representation of a .deb file (a Debian binary package)
DebFile objects have the following (read-only) properties:
- version debian .deb file format version (not related with the
contained package version), 2.0 at the time of writing
for all .deb packages in the Debian archive
- data DebPart object corresponding to the data.tar.gz (or
other compressed tar) archive contained in the .deb
file
- control DebPart object corresponding to the control.tar.gz (or
other compressed tar) archive contained in the .deb
file
"""
def __init__(self, filename=None, mode='r', fileobj=None):
ArFile.__init__(self, filename, mode, fileobj)
actual_names = set(self.getnames())
def compressed_part_name(basename):
global PART_EXTS
candidates = [ '%s.%s' % (basename, ext) for ext in PART_EXTS ]
parts = actual_names.intersection(set(candidates))
if not parts:
raise DebError("missing required part in given .deb" \
" (expected one of: %s)" % candidates)
elif len(parts) > 1:
raise DebError("too many parts in given .deb" \
" (was looking for only one of: %s)" % candidates)
else: # singleton list
return list(parts)[0]
if not INFO_PART in actual_names:
raise DebError("missing required part in given .deb" \
" (expected: '%s')" % INFO_PART)
self.__parts = {}
self.__parts[CTRL_PART] = DebControl(self.getmember(
compressed_part_name(CTRL_PART)))
self.__parts[DATA_PART] = DebData(self.getmember(
compressed_part_name(DATA_PART)))
self.__pkgname = None # updated lazily by __updatePkgName
f = self.getmember(INFO_PART)
self.__version = f.read().strip()
f.close()
def __updatePkgName(self):
self.__pkgname = self.debcontrol()['package']
version = property(lambda self: self.__version)
data = property(lambda self: self.__parts[DATA_PART])
control = property(lambda self: self.__parts[CTRL_PART])
# proxy methods for the appropriate parts
def debcontrol(self):
""" See .control.debcontrol() """
return self.control.debcontrol()
def scripts(self):
""" See .control.scripts() """
return self.control.scripts()
def md5sums(self, encoding=None, errors=None):
""" See .control.md5sums() """
return self.control.md5sums(encoding=encoding, errors=errors)
def changelog(self):
""" Return a Changelog object for the changelog.Debian.gz of the
present .deb package. Return None if no changelog can be found. """
if self.__pkgname is None:
self.__updatePkgName()
for fname in [ CHANGELOG_DEBIAN % self.__pkgname,
CHANGELOG_NATIVE % self.__pkgname ]:
if self.data.has_file(fname):
gz = gzip.GzipFile(fileobj=self.data.get_file(fname))
raw_changelog = gz.read()
gz.close()
return Changelog(raw_changelog)
return None
def close(self):
self.control.close()
self.data.close()
if __name__ == '__main__':
import sys
deb = DebFile(filename=sys.argv[1])
tgz = deb.control.tgz()
print(tgz.getmember('control'))
|