diff options
author | Georg Brandl <georg@python.org> | 2008-05-26 10:29:35 +0000 |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2008-05-26 10:29:35 +0000 |
commit | 7775a1f3aa8166b22293d63b5a7715bbcfa35054 (patch) | |
tree | 14340ef392976deb444b666f90c6f7efed9b8b62 /Lib/dbm | |
parent | 6487a3aa1026c275985d1ae0f7f7bcdde43611d2 (diff) | |
download | cpython-7775a1f3aa8166b22293d63b5a7715bbcfa35054.tar.gz |
Create the dbm package from PEP 3108. #2881.
Diffstat (limited to 'Lib/dbm')
-rw-r--r-- | Lib/dbm/__init__.py | 198 | ||||
-rw-r--r-- | Lib/dbm/bsd.py | 10 | ||||
-rw-r--r-- | Lib/dbm/dumb.py | 257 | ||||
-rw-r--r-- | Lib/dbm/gnu.py | 3 | ||||
-rw-r--r-- | Lib/dbm/ndbm.py | 3 |
5 files changed, 471 insertions, 0 deletions
diff --git a/Lib/dbm/__init__.py b/Lib/dbm/__init__.py new file mode 100644 index 0000000000..9fdd4145cc --- /dev/null +++ b/Lib/dbm/__init__.py @@ -0,0 +1,198 @@ +"""Generic interface to all dbm clones. + +Use + + import dbm + d = dbm.open(file, 'w', 0o666) + +The returned object is a dbm.bsd, dbm.gnu, dbm.ndbm or dbm.dumb +object, dependent on the type of database being opened (determined by +the whichdb function) in the case of an existing dbm. If the dbm does +not exist and the create or new flag ('c' or 'n') was specified, the +dbm type will be determined by the availability of the modules (tested +in the above order). + +It has the following interface (key and data are strings): + + d[key] = data # store data at key (may override data at + # existing key) + data = d[key] # retrieve data at key (raise KeyError if no + # such key) + del d[key] # delete data stored at key (raises KeyError + # if no such key) + flag = key in d # true if the key exists + list = d.keys() # return a list of all existing keys (slow!) + +Future versions may change the order in which implementations are +tested for existence, add interfaces to other dbm-like +implementations. + +The open function has an optional second argument. This can be 'r', +for read-only access, 'w', for read-write access of an existing +database, 'c' for read-write access to a new or existing database, and +'n' for read-write access to a new database. The default is 'r'. + +Note: 'r' and 'w' fail if the database doesn't exist; 'c' creates it +only if it doesn't exist; and 'n' always creates a new database. +""" + +__all__ = ['open', 'whichdb', 'error', 'errors'] + +import io +import os +import struct +import sys + + +class error(Exception): + pass + +_names = ['dbm.bsd', 'dbm.gnu', 'dbm.ndbm', 'dbm.dumb'] +_errors = [error] +_defaultmod = None +_modules = {} + +for _name in _names: + try: + _mod = __import__(_name, fromlist=['open']) + except ImportError: + continue + if not _defaultmod: + _defaultmod = _mod + _modules[_name] = _mod + _errors.append(_mod.error) + +if not _defaultmod: + raise ImportError("no dbm clone found; tried %s" % _names) + +error = tuple(_errors) + + +def open(file, flag = 'r', mode = 0o666): + # guess the type of an existing database + result = whichdb(file) + if result is None: + # db doesn't exist + if 'c' in flag or 'n' in flag: + # file doesn't exist and the new flag was used so use default type + mod = _defaultmod + else: + raise error("need 'c' or 'n' flag to open new db") + elif result == "": + # db type cannot be determined + raise error("db type could not be determined") + else: + mod = _modules[result] + return mod.open(file, flag, mode) + + +try: + from dbm import ndbm + _dbmerror = ndbm.error +except ImportError: + ndbm = None + # just some sort of valid exception which might be raised in the ndbm test + _dbmerror = IOError + +def whichdb(filename): + """Guess which db package to use to open a db file. + + Return values: + + - None if the database file can't be read; + - empty string if the file can be read but can't be recognized + - the name of the dbm submodule (e.g. "ndbm" or "gnu") if recognized. + + Importing the given module may still fail, and opening the + database using that module may still fail. + """ + + # Check for ndbm first -- this has a .pag and a .dir file + try: + f = io.open(filename + ".pag", "rb") + f.close() + # dbm linked with gdbm on OS/2 doesn't have .dir file + if not (ndbm.library == "GNU gdbm" and sys.platform == "os2emx"): + f = io.open(filename + ".dir", "rb") + f.close() + return "dbm.ndbm" + except IOError: + # some dbm emulations based on Berkeley DB generate a .db file + # some do not, but they should be caught by the bsd checks + try: + f = io.open(filename + ".db", "rb") + f.close() + # guarantee we can actually open the file using dbm + # kind of overkill, but since we are dealing with emulations + # it seems like a prudent step + if ndbm is not None: + d = ndbm.open(filename) + d.close() + return "dbm.ndbm" + except (IOError, _dbmerror): + pass + + # Check for dumbdbm next -- this has a .dir and a .dat file + try: + # First check for presence of files + os.stat(filename + ".dat") + size = os.stat(filename + ".dir").st_size + # dumbdbm files with no keys are empty + if size == 0: + return "dbm.dumb" + f = io.open(filename + ".dir", "rb") + try: + if f.read(1) in (b"'", b'"'): + return "dbm.dumb" + finally: + f.close() + except (OSError, IOError): + pass + + # See if the file exists, return None if not + try: + f = io.open(filename, "rb") + except IOError: + return None + + # Read the start of the file -- the magic number + s16 = f.read(16) + f.close() + s = s16[0:4] + + # Return "" if not at least 4 bytes + if len(s) != 4: + return "" + + # Convert to 4-byte int in native byte order -- return "" if impossible + try: + (magic,) = struct.unpack("=l", s) + except struct.error: + return "" + + # Check for GNU dbm + if magic == 0x13579ace: + return "dbm.gnu" + + ## Check for old Berkeley db hash file format v2 + #if magic in (0x00061561, 0x61150600): + # return "bsddb185" # not supported anymore + + # Later versions of Berkeley db hash file have a 12-byte pad in + # front of the file type + try: + (magic,) = struct.unpack("=l", s16[-4:]) + except struct.error: + return "" + + # Check for BSD hash + if magic in (0x00061561, 0x61150600): + return "dbm.bsd" + + # Unknown + return "" + + +if __name__ == "__main__": + for filename in sys.argv[1:]: + print(whichdb(filename) or "UNKNOWN", filename) diff --git a/Lib/dbm/bsd.py b/Lib/dbm/bsd.py new file mode 100644 index 0000000000..8353f50376 --- /dev/null +++ b/Lib/dbm/bsd.py @@ -0,0 +1,10 @@ +"""Provide a (g)dbm-compatible interface to bsddb.hashopen.""" + +import bsddb + +__all__ = ["error", "open"] + +error = bsddb.error + +def open(file, flag = 'r', mode=0o666): + return bsddb.hashopen(file, flag, mode) diff --git a/Lib/dbm/dumb.py b/Lib/dbm/dumb.py new file mode 100644 index 0000000000..76f4a631bc --- /dev/null +++ b/Lib/dbm/dumb.py @@ -0,0 +1,257 @@ +"""A dumb and slow but simple dbm clone. + +For database spam, spam.dir contains the index (a text file), +spam.bak *may* contain a backup of the index (also a text file), +while spam.dat contains the data (a binary file). + +XXX TO DO: + +- seems to contain a bug when updating... + +- reclaim free space (currently, space once occupied by deleted or expanded +items is never reused) + +- support concurrent access (currently, if two processes take turns making +updates, they can mess up the index) + +- support efficient access to large databases (currently, the whole index +is read when the database is opened, and some updates rewrite the whole index) + +- support opening for read-only (flag = 'm') + +""" + +import io as _io +import os as _os +import collections + +__all__ = ["error", "open"] + +_BLOCKSIZE = 512 + +error = IOError + +class _Database(collections.MutableMapping): + + # The on-disk directory and data files can remain in mutually + # inconsistent states for an arbitrarily long time (see comments + # at the end of __setitem__). This is only repaired when _commit() + # gets called. One place _commit() gets called is from __del__(), + # and if that occurs at program shutdown time, module globals may + # already have gotten rebound to None. Since it's crucial that + # _commit() finish successfully, we can't ignore shutdown races + # here, and _commit() must not reference any globals. + _os = _os # for _commit() + _io = _io # for _commit() + + def __init__(self, filebasename, mode): + self._mode = mode + + # The directory file is a text file. Each line looks like + # "%r, (%d, %d)\n" % (key, pos, siz) + # where key is the string key, pos is the offset into the dat + # file of the associated value's first byte, and siz is the number + # of bytes in the associated value. + self._dirfile = filebasename + '.dir' + + # The data file is a binary file pointed into by the directory + # file, and holds the values associated with keys. Each value + # begins at a _BLOCKSIZE-aligned byte offset, and is a raw + # binary 8-bit string value. + self._datfile = filebasename + '.dat' + self._bakfile = filebasename + '.bak' + + # The index is an in-memory dict, mirroring the directory file. + self._index = None # maps keys to (pos, siz) pairs + + # Mod by Jack: create data file if needed + try: + f = _io.open(self._datfile, 'r') + except IOError: + f = _io.open(self._datfile, 'w') + self._chmod(self._datfile) + f.close() + self._update() + + # Read directory file into the in-memory index dict. + def _update(self): + self._index = {} + try: + f = _io.open(self._dirfile, 'r') + except IOError: + pass + else: + for line in f: + line = line.rstrip() + key, pos_and_siz_pair = eval(line) + self._index[key] = pos_and_siz_pair + f.close() + + # Write the index dict to the directory file. The original directory + # file (if any) is renamed with a .bak extension first. If a .bak + # file currently exists, it's deleted. + def _commit(self): + # CAUTION: It's vital that _commit() succeed, and _commit() can + # be called from __del__(). Therefore we must never reference a + # global in this routine. + if self._index is None: + return # nothing to do + + try: + self._os.unlink(self._bakfile) + except self._os.error: + pass + + try: + self._os.rename(self._dirfile, self._bakfile) + except self._os.error: + pass + + f = self._io.open(self._dirfile, 'w') + self._chmod(self._dirfile) + for key, pos_and_siz_pair in self._index.items(): + f.write("%r, %r\n" % (key, pos_and_siz_pair)) + f.close() + + sync = _commit + + def __getitem__(self, key): + key = key.decode("latin-1") + pos, siz = self._index[key] # may raise KeyError + f = _io.open(self._datfile, 'rb') + f.seek(pos) + dat = f.read(siz) + f.close() + return dat + + # Append val to the data file, starting at a _BLOCKSIZE-aligned + # offset. The data file is first padded with NUL bytes (if needed) + # to get to an aligned offset. Return pair + # (starting offset of val, len(val)) + def _addval(self, val): + f = _io.open(self._datfile, 'rb+') + f.seek(0, 2) + pos = int(f.tell()) + npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE + f.write(b'\0'*(npos-pos)) + pos = npos + f.write(val) + f.close() + return (pos, len(val)) + + # Write val to the data file, starting at offset pos. The caller + # is responsible for ensuring that there's enough room starting at + # pos to hold val, without overwriting some other value. Return + # pair (pos, len(val)). + def _setval(self, pos, val): + f = _io.open(self._datfile, 'rb+') + f.seek(pos) + f.write(val) + f.close() + return (pos, len(val)) + + # key is a new key whose associated value starts in the data file + # at offset pos and with length siz. Add an index record to + # the in-memory index dict, and append one to the directory file. + def _addkey(self, key, pos_and_siz_pair): + self._index[key] = pos_and_siz_pair + f = _io.open(self._dirfile, 'a') + self._chmod(self._dirfile) + f.write("%r, %r\n" % (key, pos_and_siz_pair)) + f.close() + + def __setitem__(self, key, val): + if not isinstance(key, bytes): + raise TypeError("keys must be bytes") + key = key.decode("latin-1") # hashable bytes + if not isinstance(val, (bytes, bytearray)): + raise TypeError("values must be byte strings") + if key not in self._index: + self._addkey(key, self._addval(val)) + else: + # See whether the new value is small enough to fit in the + # (padded) space currently occupied by the old value. + pos, siz = self._index[key] + oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE + newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE + if newblocks <= oldblocks: + self._index[key] = self._setval(pos, val) + else: + # The new value doesn't fit in the (padded) space used + # by the old value. The blocks used by the old value are + # forever lost. + self._index[key] = self._addval(val) + + # Note that _index may be out of synch with the directory + # file now: _setval() and _addval() don't update the directory + # file. This also means that the on-disk directory and data + # files are in a mutually inconsistent state, and they'll + # remain that way until _commit() is called. Note that this + # is a disaster (for the database) if the program crashes + # (so that _commit() never gets called). + + def __delitem__(self, key): + key = key.decode("latin-1") + # The blocks used by the associated value are lost. + del self._index[key] + # XXX It's unclear why we do a _commit() here (the code always + # XXX has, so I'm not changing it). _setitem__ doesn't try to + # XXX keep the directory file in synch. Why should we? Or + # XXX why shouldn't __setitem__? + self._commit() + + def keys(self): + return [key.encode("latin-1") for key in self._index.keys()] + + def items(self): + return [(key.encode("latin-1"), self[key.encode("latin-1")]) + for key in self._index.keys()] + + def __contains__(self, key): + key = key.decode("latin-1") + return key in self._index + + def iterkeys(self): + return iter(self._index.keys()) + __iter__ = iterkeys + + def __len__(self): + return len(self._index) + + def close(self): + self._commit() + self._index = self._datfile = self._dirfile = self._bakfile = None + + __del__ = close + + def _chmod (self, file): + if hasattr(self._os, 'chmod'): + self._os.chmod(file, self._mode) + + +def open(file, flag=None, mode=0o666): + """Open the database file, filename, and return corresponding object. + + The flag argument, used to control how the database is opened in the + other DBM implementations, is ignored in the dbm.dumb module; the + database is always opened for update, and will be created if it does + not exist. + + The optional mode argument is the UNIX mode of the file, used only when + the database has to be created. It defaults to octal code 0o666 (and + will be modified by the prevailing umask). + + """ + # flag argument is currently ignored + + # Modify mode depending on the umask + try: + um = _os.umask(0) + _os.umask(um) + except AttributeError: + pass + else: + # Turn off any bits that are set in the umask + mode = mode & (~um) + + return _Database(file, mode) diff --git a/Lib/dbm/gnu.py b/Lib/dbm/gnu.py new file mode 100644 index 0000000000..b07a1defff --- /dev/null +++ b/Lib/dbm/gnu.py @@ -0,0 +1,3 @@ +"""Provide the _gdbm module as a dbm submodule.""" + +from _gdbm import * diff --git a/Lib/dbm/ndbm.py b/Lib/dbm/ndbm.py new file mode 100644 index 0000000000..23056a29ef --- /dev/null +++ b/Lib/dbm/ndbm.py @@ -0,0 +1,3 @@ +"""Provide the _dbm module as a dbm submodule.""" + +from _dbm import * |