diff options
Diffstat (limited to 'debian/foo/debfile.py')
-rw-r--r-- | debian/foo/debfile.py | 325 |
1 files changed, 325 insertions, 0 deletions
diff --git a/debian/foo/debfile.py b/debian/foo/debfile.py new file mode 100644 index 0000000..a728a77 --- /dev/null +++ b/debian/foo/debfile.py @@ -0,0 +1,325 @@ +# DebFile: a Python representation of Debian .deb binary packages. +# Copyright (C) 2007-2008 Stefano Zacchiroli <zack@debian.org> +# Copyright (C) 2007 Filippo Giunchedi <filippo@debian.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +from __future__ import absolute_import, print_function + +import gzip +import tarfile +import sys + +from debian.arfile import ArFile, ArError +from debian.changelog import Changelog +from debian.deb822 import Deb822 + +DATA_PART = 'data.tar' # w/o extension +CTRL_PART = 'control.tar' +PART_EXTS = ['gz', 'bz2'] # possible extensions +INFO_PART = 'debian-binary' +MAINT_SCRIPTS = ['preinst', 'postinst', 'prerm', 'postrm', 'config'] + +CONTROL_FILE = 'control' +CHANGELOG_NATIVE = 'usr/share/doc/%s/changelog.gz' # with package stem +CHANGELOG_DEBIAN = 'usr/share/doc/%s/changelog.Debian.gz' +MD5_FILE = 'md5sums' + + +class DebError(ArError): + pass + + +class DebPart(object): + """'Part' of a .deb binary package. + + A .deb package is considered as made of 2 parts: a 'data' part + (corresponding to the 'data.tar.gz' archive embedded in a .deb) and a + 'control' part (the 'control.tar.gz' archive). Each of them is represented + by an instance of this class. Each archive should be a compressed tar + archive; supported compression formats are: .tar.gz, .tar.bz2 . + + When referring to file members of the underlying .tar.gz archive, file + names can be specified in one of 3 formats "file", "./file", "/file". In + all cases the file is considered relative to the root of the archive. For + the control part the preferred mechanism is the first one (as in + deb.control.get_content('control') ); for the data part the preferred + mechanism is the third one (as in deb.data.get_file('/etc/vim/vimrc') ). + """ + + def __init__(self, member): + self.__member = member # arfile.ArMember file member + self.__tgz = None + + def tgz(self): + """Return a TarFile object corresponding to this part of a .deb + package. + + Despite the name, this method gives access to various kind of + compressed tar archives, not only gzipped ones. + """ + + if self.__tgz is None: + name = self.__member.name + if name.endswith('.gz'): + gz = gzip.GzipFile(fileobj=self.__member, mode='r') + self.__tgz = tarfile.TarFile(fileobj=gz, mode='r') + elif name.endswith('.bz2'): + # Tarfile's __init__ doesn't allow for r:bz2 modes, but the + # open() classmethod does ... + self.__tgz = tarfile.open(fileobj=self.__member, mode='r:bz2') + else: + raise DebError("part '%s' has unexpected extension" % name) + return self.__tgz + + @staticmethod + def __normalize_member(fname): + """ try (not so hard) to obtain a member file name in a form relative + to the .tar.gz root and with no heading '.' """ + + if fname.startswith('./'): + fname = fname[2:] + elif fname.startswith('/'): + fname = fname[1:] + return fname + + # XXX in some of the following methods, compatibility among >= 2.5 and << + # 2.5 python versions had to be taken into account. TarFile << 2.5 indeed + # was buggied and returned member file names with an heading './' only for + # the *first* file member. TarFile >= 2.5 fixed this and has the heading + # './' for all file members. + + def has_file(self, fname): + """Check if this part contains a given file name.""" + + fname = DebPart.__normalize_member(fname) + names = self.tgz().getnames() + return (('./' + fname in names) \ + or (fname in names)) # XXX python << 2.5 TarFile compatibility + + def get_file(self, fname, encoding=None, errors=None): + """Return a file object corresponding to a given file name. + + If encoding is given, then the file object will return Unicode data; + otherwise, it will return binary data. + """ + + fname = DebPart.__normalize_member(fname) + try: + fobj = self.tgz().extractfile('./' + fname) + except KeyError: # XXX python << 2.5 TarFile compatibility + fobj = self.tgz().extractfile(fname) + if encoding is not None: + if sys.version >= '3': + import io + if not hasattr(fobj, 'flush'): + # XXX http://bugs.python.org/issue13815 + fobj.flush = lambda: None + return io.TextIOWrapper(fobj, encoding=encoding, errors=errors) + else: + import codecs + if errors is None: + errors = 'strict' + return codecs.EncodedFile(fobj, encoding, errors=errors) + else: + return fobj + + def get_content(self, fname, encoding=None, errors=None): + """Return the string content of a given file, or None (e.g. for + directories). + + If encoding is given, then the content will be a Unicode object; + otherwise, it will contain binary data. + """ + + f = self.get_file(fname, encoding=encoding, errors=errors) + content = None + if f: # can be None for non regular or link files + content = f.read() + f.close() + return content + + # container emulation + + def __iter__(self): + return iter(self.tgz().getnames()) + + def __contains__(self, fname): + return self.has_file(fname) + + if sys.version < '3': + def has_key(self, fname): + return self.has_file(fname) + + def __getitem__(self, fname): + return self.get_content(fname) + + def close(self): + self.__member.close() + + +class DebData(DebPart): + + pass + + +class DebControl(DebPart): + + def scripts(self): + """ Return a dictionary of maintainer scripts (postinst, prerm, ...) + mapping script names to script text. """ + + scripts = {} + for fname in MAINT_SCRIPTS: + if self.has_file(fname): + scripts[fname] = self.get_content(fname) + + return scripts + + def debcontrol(self): + """ Return the debian/control as a Deb822 (a Debian-specific dict-like + class) object. + + For a string representation of debian/control try + .get_content('control') """ + + return Deb822(self.get_content(CONTROL_FILE)) + + def md5sums(self, encoding=None, errors=None): + """ Return a dictionary mapping filenames (of the data part) to + md5sums. Fails if the control part does not contain a 'md5sum' file. + + Keys of the returned dictionary are the left-hand side values of lines + in the md5sums member of control.tar.gz, usually file names relative to + the file system root (without heading '/' or './'). + + The returned keys are Unicode objects if an encoding is specified, + otherwise binary. The returned values are always Unicode.""" + + if not self.has_file(MD5_FILE): + raise DebError("'%s' file not found, can't list MD5 sums" % + MD5_FILE) + + md5_file = self.get_file(MD5_FILE, encoding=encoding, errors=errors) + sums = {} + if encoding is None: + newline = b'\r\n' + else: + newline = '\r\n' + for line in md5_file.readlines(): + # we need to support spaces in filenames, .split() is not enough + md5, fname = line.rstrip(newline).split(None, 1) + if sys.version >= '3' and isinstance(md5, bytes): + sums[fname] = md5.decode() + else: + sums[fname] = md5 + md5_file.close() + return sums + + +class DebFile(ArFile): + """Representation of a .deb file (a Debian binary package) + + DebFile objects have the following (read-only) properties: + - version debian .deb file format version (not related with the + contained package version), 2.0 at the time of writing + for all .deb packages in the Debian archive + - data DebPart object corresponding to the data.tar.gz (or + other compressed tar) archive contained in the .deb + file + - control DebPart object corresponding to the control.tar.gz (or + other compressed tar) archive contained in the .deb + file + """ + + def __init__(self, filename=None, mode='r', fileobj=None): + ArFile.__init__(self, filename, mode, fileobj) + actual_names = set(self.getnames()) + + def compressed_part_name(basename): + global PART_EXTS + candidates = [ '%s.%s' % (basename, ext) for ext in PART_EXTS ] + parts = actual_names.intersection(set(candidates)) + if not parts: + raise DebError("missing required part in given .deb" \ + " (expected one of: %s)" % candidates) + elif len(parts) > 1: + raise DebError("too many parts in given .deb" \ + " (was looking for only one of: %s)" % candidates) + else: # singleton list + return list(parts)[0] + + if not INFO_PART in actual_names: + raise DebError("missing required part in given .deb" \ + " (expected: '%s')" % INFO_PART) + + self.__parts = {} + self.__parts[CTRL_PART] = DebControl(self.getmember( + compressed_part_name(CTRL_PART))) + self.__parts[DATA_PART] = DebData(self.getmember( + compressed_part_name(DATA_PART))) + self.__pkgname = None # updated lazily by __updatePkgName + + f = self.getmember(INFO_PART) + self.__version = f.read().strip() + f.close() + + def __updatePkgName(self): + self.__pkgname = self.debcontrol()['package'] + + version = property(lambda self: self.__version) + data = property(lambda self: self.__parts[DATA_PART]) + control = property(lambda self: self.__parts[CTRL_PART]) + + # proxy methods for the appropriate parts + + def debcontrol(self): + """ See .control.debcontrol() """ + return self.control.debcontrol() + + def scripts(self): + """ See .control.scripts() """ + return self.control.scripts() + + def md5sums(self, encoding=None, errors=None): + """ See .control.md5sums() """ + return self.control.md5sums(encoding=encoding, errors=errors) + + def changelog(self): + """ Return a Changelog object for the changelog.Debian.gz of the + present .deb package. Return None if no changelog can be found. """ + + if self.__pkgname is None: + self.__updatePkgName() + + for fname in [ CHANGELOG_DEBIAN % self.__pkgname, + CHANGELOG_NATIVE % self.__pkgname ]: + if self.data.has_file(fname): + gz = gzip.GzipFile(fileobj=self.data.get_file(fname)) + raw_changelog = gz.read() + gz.close() + return Changelog(raw_changelog) + return None + + def close(self): + self.control.close() + self.data.close() + + +if __name__ == '__main__': + import sys + deb = DebFile(filename=sys.argv[1]) + tgz = deb.control.tgz() + print(tgz.getmember('control')) + |