diff options
Diffstat (limited to 'git/db/py/loose.py')
-rw-r--r-- | git/db/py/loose.py | 472 |
1 files changed, 236 insertions, 236 deletions
diff --git a/git/db/py/loose.py b/git/db/py/loose.py index 6e72aff0..8267be98 100644 --- a/git/db/py/loose.py +++ b/git/db/py/loose.py @@ -3,53 +3,53 @@ # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php from base import ( - PureRootPathDB, - PureObjectDBR, - PureObjectDBW - ) + PureRootPathDB, + PureObjectDBR, + PureObjectDBW + ) from git.exc import ( - InvalidDBRoot, - BadObject, - AmbiguousObjectName - ) + InvalidDBRoot, + BadObject, + AmbiguousObjectName + ) from git.stream import ( - DecompressMemMapReader, - FDCompressedSha1Writer, - FDStream, - Sha1Writer - ) + DecompressMemMapReader, + FDCompressedSha1Writer, + FDStream, + Sha1Writer + ) from git.base import ( - OStream, - OInfo - ) + OStream, + OInfo + ) from git.util import ( - file_contents_ro_filepath, - ENOENT, - hex_to_bin, - bin_to_hex, - exists, - chmod, - isdir, - isfile, - remove, - mkdir, - rename, - dirname, - basename, - join - ) + file_contents_ro_filepath, + ENOENT, + hex_to_bin, + bin_to_hex, + exists, + chmod, + isdir, + isfile, + remove, + mkdir, + rename, + dirname, + basename, + join + ) from git.fun import ( - chunk_size, - loose_object_header_info, - write_object, - stream_copy - ) + chunk_size, + loose_object_header_info, + write_object, + stream_copy + ) import tempfile import mmap @@ -61,203 +61,203 @@ __all__ = ( 'PureLooseObjectODB', ) class PureLooseObjectODB(PureRootPathDB, PureObjectDBR, PureObjectDBW): - """A database which operates on loose object files""" - - # CONFIGURATION - # chunks in which data will be copied between streams - stream_chunk_size = chunk_size - - # On windows we need to keep it writable, otherwise it cannot be removed - # either - new_objects_mode = 0444 - if os.name == 'nt': - new_objects_mode = 0644 - - - def __init__(self, root_path): - super(PureLooseObjectODB, self).__init__(root_path) - self._hexsha_to_file = dict() - # Additional Flags - might be set to 0 after the first failure - # Depending on the root, this might work for some mounts, for others not, which - # is why it is per instance - self._fd_open_flags = getattr(os, 'O_NOATIME', 0) - - #{ Interface - def object_path(self, hexsha): - """ - :return: path at which the object with the given hexsha would be stored, - relative to the database root""" - return join(hexsha[:2], hexsha[2:]) - - def readable_db_object_path(self, hexsha): - """ - :return: readable object path to the object identified by hexsha - :raise BadObject: If the object file does not exist""" - try: - return self._hexsha_to_file[hexsha] - except KeyError: - pass - # END ignore cache misses - - # try filesystem - path = self.db_path(self.object_path(hexsha)) - if exists(path): - self._hexsha_to_file[hexsha] = path - return path - # END handle cache - raise BadObject(hexsha) - - - #} END interface - - def _map_loose_object(self, sha): - """ - :return: memory map of that file to allow random read access - :raise BadObject: if object could not be located""" - db_path = self.db_path(self.object_path(bin_to_hex(sha))) - try: - return file_contents_ro_filepath(db_path, flags=self._fd_open_flags) - except OSError,e: - if e.errno != ENOENT: - # try again without noatime - try: - return file_contents_ro_filepath(db_path) - except OSError: - raise BadObject(sha) - # didn't work because of our flag, don't try it again - self._fd_open_flags = 0 - else: - raise BadObject(sha) - # END handle error - # END exception handling - try: - return mmap.mmap(fd, 0, access=mmap.ACCESS_READ) - finally: - os.close(fd) - # END assure file is closed - - def set_ostream(self, stream): - """:raise TypeError: if the stream does not support the Sha1Writer interface""" - if stream is not None and not isinstance(stream, Sha1Writer): - raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__) - return super(PureLooseObjectODB, self).set_ostream(stream) - - def info(self, sha): - m = self._map_loose_object(sha) - try: - type, size = loose_object_header_info(m) - return OInfo(sha, type, size) - finally: - m.close() - # END assure release of system resources - - def stream(self, sha): - m = self._map_loose_object(sha) - type, size, stream = DecompressMemMapReader.new(m, close_on_deletion = True) - return OStream(sha, type, size, stream) - - def has_object(self, sha): - try: - self.readable_db_object_path(bin_to_hex(sha)) - return True - except BadObject: - return False - # END check existance - - def partial_to_complete_sha_hex(self, partial_hexsha): - """:return: 20 byte binary sha1 string which matches the given name uniquely - :param name: hexadecimal partial name - :raise AmbiguousObjectName: - :raise BadObject: """ - candidate = None - for binsha in self.sha_iter(): - if bin_to_hex(binsha).startswith(partial_hexsha): - # it can't ever find the same object twice - if candidate is not None: - raise AmbiguousObjectName(partial_hexsha) - candidate = binsha - # END for each object - if candidate is None: - raise BadObject(partial_hexsha) - return candidate - - def store(self, istream): - """note: The sha we produce will be hex by nature""" - tmp_path = None - writer = self.ostream() - if writer is None: - # open a tmp file to write the data to - fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path) - - if istream.binsha is None: - writer = FDCompressedSha1Writer(fd) - else: - writer = FDStream(fd) - # END handle direct stream copies - # END handle custom writer - - try: - try: - if istream.binsha is not None: - # copy as much as possible, the actual uncompressed item size might - # be smaller than the compressed version - stream_copy(istream.read, writer.write, sys.maxint, self.stream_chunk_size) - else: - # write object with header, we have to make a new one - write_object(istream.type, istream.size, istream.read, writer.write, - chunk_size=self.stream_chunk_size) - # END handle direct stream copies - finally: - if tmp_path: - writer.close() - # END assure target stream is closed - except: - if tmp_path: - os.remove(tmp_path) - raise - # END assure tmpfile removal on error - - hexsha = None - if istream.binsha: - hexsha = istream.hexsha - else: - hexsha = writer.sha(as_hex=True) - # END handle sha - - if tmp_path: - obj_path = self.db_path(self.object_path(hexsha)) - obj_dir = dirname(obj_path) - if not isdir(obj_dir): - mkdir(obj_dir) - # END handle destination directory - # rename onto existing doesn't work on windows - if os.name == 'nt' and isfile(obj_path): - remove(obj_path) - # END handle win322 - rename(tmp_path, obj_path) - - # make sure its readable for all ! It started out as rw-- tmp file - # but needs to be rwrr - chmod(obj_path, self.new_objects_mode) - # END handle dry_run - - istream.binsha = hex_to_bin(hexsha) - return istream - - def sha_iter(self): - # find all files which look like an object, extract sha from there - for root, dirs, files in os.walk(self.root_path()): - root_base = basename(root) - if len(root_base) != 2: - continue - - for f in files: - if len(f) != 38: - continue - yield hex_to_bin(root_base + f) - # END for each file - # END for each walk iteration - - def size(self): - return len(tuple(self.sha_iter())) - + """A database which operates on loose object files""" + + # CONFIGURATION + # chunks in which data will be copied between streams + stream_chunk_size = chunk_size + + # On windows we need to keep it writable, otherwise it cannot be removed + # either + new_objects_mode = 0444 + if os.name == 'nt': + new_objects_mode = 0644 + + + def __init__(self, root_path): + super(PureLooseObjectODB, self).__init__(root_path) + self._hexsha_to_file = dict() + # Additional Flags - might be set to 0 after the first failure + # Depending on the root, this might work for some mounts, for others not, which + # is why it is per instance + self._fd_open_flags = getattr(os, 'O_NOATIME', 0) + + #{ Interface + def object_path(self, hexsha): + """ + :return: path at which the object with the given hexsha would be stored, + relative to the database root""" + return join(hexsha[:2], hexsha[2:]) + + def readable_db_object_path(self, hexsha): + """ + :return: readable object path to the object identified by hexsha + :raise BadObject: If the object file does not exist""" + try: + return self._hexsha_to_file[hexsha] + except KeyError: + pass + # END ignore cache misses + + # try filesystem + path = self.db_path(self.object_path(hexsha)) + if exists(path): + self._hexsha_to_file[hexsha] = path + return path + # END handle cache + raise BadObject(hexsha) + + + #} END interface + + def _map_loose_object(self, sha): + """ + :return: memory map of that file to allow random read access + :raise BadObject: if object could not be located""" + db_path = self.db_path(self.object_path(bin_to_hex(sha))) + try: + return file_contents_ro_filepath(db_path, flags=self._fd_open_flags) + except OSError,e: + if e.errno != ENOENT: + # try again without noatime + try: + return file_contents_ro_filepath(db_path) + except OSError: + raise BadObject(sha) + # didn't work because of our flag, don't try it again + self._fd_open_flags = 0 + else: + raise BadObject(sha) + # END handle error + # END exception handling + try: + return mmap.mmap(fd, 0, access=mmap.ACCESS_READ) + finally: + os.close(fd) + # END assure file is closed + + def set_ostream(self, stream): + """:raise TypeError: if the stream does not support the Sha1Writer interface""" + if stream is not None and not isinstance(stream, Sha1Writer): + raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__) + return super(PureLooseObjectODB, self).set_ostream(stream) + + def info(self, sha): + m = self._map_loose_object(sha) + try: + type, size = loose_object_header_info(m) + return OInfo(sha, type, size) + finally: + m.close() + # END assure release of system resources + + def stream(self, sha): + m = self._map_loose_object(sha) + type, size, stream = DecompressMemMapReader.new(m, close_on_deletion = True) + return OStream(sha, type, size, stream) + + def has_object(self, sha): + try: + self.readable_db_object_path(bin_to_hex(sha)) + return True + except BadObject: + return False + # END check existance + + def partial_to_complete_sha_hex(self, partial_hexsha): + """:return: 20 byte binary sha1 string which matches the given name uniquely + :param name: hexadecimal partial name + :raise AmbiguousObjectName: + :raise BadObject: """ + candidate = None + for binsha in self.sha_iter(): + if bin_to_hex(binsha).startswith(partial_hexsha): + # it can't ever find the same object twice + if candidate is not None: + raise AmbiguousObjectName(partial_hexsha) + candidate = binsha + # END for each object + if candidate is None: + raise BadObject(partial_hexsha) + return candidate + + def store(self, istream): + """note: The sha we produce will be hex by nature""" + tmp_path = None + writer = self.ostream() + if writer is None: + # open a tmp file to write the data to + fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path) + + if istream.binsha is None: + writer = FDCompressedSha1Writer(fd) + else: + writer = FDStream(fd) + # END handle direct stream copies + # END handle custom writer + + try: + try: + if istream.binsha is not None: + # copy as much as possible, the actual uncompressed item size might + # be smaller than the compressed version + stream_copy(istream.read, writer.write, sys.maxint, self.stream_chunk_size) + else: + # write object with header, we have to make a new one + write_object(istream.type, istream.size, istream.read, writer.write, + chunk_size=self.stream_chunk_size) + # END handle direct stream copies + finally: + if tmp_path: + writer.close() + # END assure target stream is closed + except: + if tmp_path: + os.remove(tmp_path) + raise + # END assure tmpfile removal on error + + hexsha = None + if istream.binsha: + hexsha = istream.hexsha + else: + hexsha = writer.sha(as_hex=True) + # END handle sha + + if tmp_path: + obj_path = self.db_path(self.object_path(hexsha)) + obj_dir = dirname(obj_path) + if not isdir(obj_dir): + mkdir(obj_dir) + # END handle destination directory + # rename onto existing doesn't work on windows + if os.name == 'nt' and isfile(obj_path): + remove(obj_path) + # END handle win322 + rename(tmp_path, obj_path) + + # make sure its readable for all ! It started out as rw-- tmp file + # but needs to be rwrr + chmod(obj_path, self.new_objects_mode) + # END handle dry_run + + istream.binsha = hex_to_bin(hexsha) + return istream + + def sha_iter(self): + # find all files which look like an object, extract sha from there + for root, dirs, files in os.walk(self.root_path()): + root_base = basename(root) + if len(root_base) != 2: + continue + + for f in files: + if len(f) != 38: + continue + yield hex_to_bin(root_base + f) + # END for each file + # END for each walk iteration + + def size(self): + return len(tuple(self.sha_iter())) + |