# DebFile: a Python representation of Debian .deb binary packages. # Copyright (C) 2007-2008 Stefano Zacchiroli # Copyright (C) 2007 Filippo Giunchedi # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . from __future__ import absolute_import, print_function import gzip import tarfile import sys from debian.arfile import ArFile, ArError from debian.changelog import Changelog from debian.deb822 import Deb822 DATA_PART = 'data.tar' # w/o extension CTRL_PART = 'control.tar' PART_EXTS = ['gz', 'bz2'] # possible extensions INFO_PART = 'debian-binary' MAINT_SCRIPTS = ['preinst', 'postinst', 'prerm', 'postrm', 'config'] CONTROL_FILE = 'control' CHANGELOG_NATIVE = 'usr/share/doc/%s/changelog.gz' # with package stem CHANGELOG_DEBIAN = 'usr/share/doc/%s/changelog.Debian.gz' MD5_FILE = 'md5sums' class DebError(ArError): pass class DebPart(object): """'Part' of a .deb binary package. A .deb package is considered as made of 2 parts: a 'data' part (corresponding to the 'data.tar.gz' archive embedded in a .deb) and a 'control' part (the 'control.tar.gz' archive). Each of them is represented by an instance of this class. Each archive should be a compressed tar archive; supported compression formats are: .tar.gz, .tar.bz2 . When referring to file members of the underlying .tar.gz archive, file names can be specified in one of 3 formats "file", "./file", "/file". In all cases the file is considered relative to the root of the archive. For the control part the preferred mechanism is the first one (as in deb.control.get_content('control') ); for the data part the preferred mechanism is the third one (as in deb.data.get_file('/etc/vim/vimrc') ). """ def __init__(self, member): self.__member = member # arfile.ArMember file member self.__tgz = None def tgz(self): """Return a TarFile object corresponding to this part of a .deb package. Despite the name, this method gives access to various kind of compressed tar archives, not only gzipped ones. """ if self.__tgz is None: name = self.__member.name if name.endswith('.gz'): gz = gzip.GzipFile(fileobj=self.__member, mode='r') self.__tgz = tarfile.TarFile(fileobj=gz, mode='r') elif name.endswith('.bz2'): # Tarfile's __init__ doesn't allow for r:bz2 modes, but the # open() classmethod does ... self.__tgz = tarfile.open(fileobj=self.__member, mode='r:bz2') else: raise DebError("part '%s' has unexpected extension" % name) return self.__tgz @staticmethod def __normalize_member(fname): """ try (not so hard) to obtain a member file name in a form relative to the .tar.gz root and with no heading '.' """ if fname.startswith('./'): fname = fname[2:] elif fname.startswith('/'): fname = fname[1:] return fname # XXX in some of the following methods, compatibility among >= 2.5 and << # 2.5 python versions had to be taken into account. TarFile << 2.5 indeed # was buggied and returned member file names with an heading './' only for # the *first* file member. TarFile >= 2.5 fixed this and has the heading # './' for all file members. def has_file(self, fname): """Check if this part contains a given file name.""" fname = DebPart.__normalize_member(fname) names = self.tgz().getnames() return (('./' + fname in names) \ or (fname in names)) # XXX python << 2.5 TarFile compatibility def get_file(self, fname, encoding=None, errors=None): """Return a file object corresponding to a given file name. If encoding is given, then the file object will return Unicode data; otherwise, it will return binary data. """ fname = DebPart.__normalize_member(fname) try: fobj = self.tgz().extractfile('./' + fname) except KeyError: # XXX python << 2.5 TarFile compatibility fobj = self.tgz().extractfile(fname) if encoding is not None: if sys.version >= '3': import io if not hasattr(fobj, 'flush'): # XXX http://bugs.python.org/issue13815 fobj.flush = lambda: None return io.TextIOWrapper(fobj, encoding=encoding, errors=errors) else: import codecs if errors is None: errors = 'strict' return codecs.EncodedFile(fobj, encoding, errors=errors) else: return fobj def get_content(self, fname, encoding=None, errors=None): """Return the string content of a given file, or None (e.g. for directories). If encoding is given, then the content will be a Unicode object; otherwise, it will contain binary data. """ f = self.get_file(fname, encoding=encoding, errors=errors) content = None if f: # can be None for non regular or link files content = f.read() f.close() return content # container emulation def __iter__(self): return iter(self.tgz().getnames()) def __contains__(self, fname): return self.has_file(fname) if sys.version < '3': def has_key(self, fname): return self.has_file(fname) def __getitem__(self, fname): return self.get_content(fname) def close(self): self.__member.close() class DebData(DebPart): pass class DebControl(DebPart): def scripts(self): """ Return a dictionary of maintainer scripts (postinst, prerm, ...) mapping script names to script text. """ scripts = {} for fname in MAINT_SCRIPTS: if self.has_file(fname): scripts[fname] = self.get_content(fname) return scripts def debcontrol(self): """ Return the debian/control as a Deb822 (a Debian-specific dict-like class) object. For a string representation of debian/control try .get_content('control') """ return Deb822(self.get_content(CONTROL_FILE)) def md5sums(self, encoding=None, errors=None): """ Return a dictionary mapping filenames (of the data part) to md5sums. Fails if the control part does not contain a 'md5sum' file. Keys of the returned dictionary are the left-hand side values of lines in the md5sums member of control.tar.gz, usually file names relative to the file system root (without heading '/' or './'). The returned keys are Unicode objects if an encoding is specified, otherwise binary. The returned values are always Unicode.""" if not self.has_file(MD5_FILE): raise DebError("'%s' file not found, can't list MD5 sums" % MD5_FILE) md5_file = self.get_file(MD5_FILE, encoding=encoding, errors=errors) sums = {} if encoding is None: newline = b'\r\n' else: newline = '\r\n' for line in md5_file.readlines(): # we need to support spaces in filenames, .split() is not enough md5, fname = line.rstrip(newline).split(None, 1) if sys.version >= '3' and isinstance(md5, bytes): sums[fname] = md5.decode() else: sums[fname] = md5 md5_file.close() return sums class DebFile(ArFile): """Representation of a .deb file (a Debian binary package) DebFile objects have the following (read-only) properties: - version debian .deb file format version (not related with the contained package version), 2.0 at the time of writing for all .deb packages in the Debian archive - data DebPart object corresponding to the data.tar.gz (or other compressed tar) archive contained in the .deb file - control DebPart object corresponding to the control.tar.gz (or other compressed tar) archive contained in the .deb file """ def __init__(self, filename=None, mode='r', fileobj=None): ArFile.__init__(self, filename, mode, fileobj) actual_names = set(self.getnames()) def compressed_part_name(basename): global PART_EXTS candidates = [ '%s.%s' % (basename, ext) for ext in PART_EXTS ] parts = actual_names.intersection(set(candidates)) if not parts: raise DebError("missing required part in given .deb" \ " (expected one of: %s)" % candidates) elif len(parts) > 1: raise DebError("too many parts in given .deb" \ " (was looking for only one of: %s)" % candidates) else: # singleton list return list(parts)[0] if not INFO_PART in actual_names: raise DebError("missing required part in given .deb" \ " (expected: '%s')" % INFO_PART) self.__parts = {} self.__parts[CTRL_PART] = DebControl(self.getmember( compressed_part_name(CTRL_PART))) self.__parts[DATA_PART] = DebData(self.getmember( compressed_part_name(DATA_PART))) self.__pkgname = None # updated lazily by __updatePkgName f = self.getmember(INFO_PART) self.__version = f.read().strip() f.close() def __updatePkgName(self): self.__pkgname = self.debcontrol()['package'] version = property(lambda self: self.__version) data = property(lambda self: self.__parts[DATA_PART]) control = property(lambda self: self.__parts[CTRL_PART]) # proxy methods for the appropriate parts def debcontrol(self): """ See .control.debcontrol() """ return self.control.debcontrol() def scripts(self): """ See .control.scripts() """ return self.control.scripts() def md5sums(self, encoding=None, errors=None): """ See .control.md5sums() """ return self.control.md5sums(encoding=encoding, errors=errors) def changelog(self): """ Return a Changelog object for the changelog.Debian.gz of the present .deb package. Return None if no changelog can be found. """ if self.__pkgname is None: self.__updatePkgName() for fname in [ CHANGELOG_DEBIAN % self.__pkgname, CHANGELOG_NATIVE % self.__pkgname ]: if self.data.has_file(fname): gz = gzip.GzipFile(fileobj=self.data.get_file(fname)) raw_changelog = gz.read() gz.close() return Changelog(raw_changelog) return None def close(self): self.control.close() self.data.close() if __name__ == '__main__': import sys deb = DebFile(filename=sys.argv[1]) tgz = deb.control.tgz() print(tgz.getmember('control'))