diff options
54 files changed, 13750 insertions, 13750 deletions
diff --git a/git/__init__.py b/git/__init__.py index 0658c330..9ea81112 100644 --- a/git/__init__.py +++ b/git/__init__.py @@ -13,15 +13,15 @@ __version__ = 'git' #{ Initialization def _init_externals(): - """Initialize external projects by putting them into the path""" - sys.path.append(os.path.join(os.path.dirname(__file__), 'ext', 'gitdb')) - - try: - import gitdb - except ImportError: - raise ImportError("'gitdb' could not be found in your PYTHONPATH") - #END verify import - + """Initialize external projects by putting them into the path""" + sys.path.append(os.path.join(os.path.dirname(__file__), 'ext', 'gitdb')) + + try: + import gitdb + except ImportError: + raise ImportError("'gitdb' could not be found in your PYTHONPATH") + #END verify import + #} END initialization ################# @@ -41,14 +41,14 @@ from git.repo import Repo from git.remote import * from git.index import * from git.util import ( - LockFile, - BlockingLockFile, - Stats, - Actor - ) + LockFile, + BlockingLockFile, + Stats, + Actor + ) #} END imports __all__ = [ name for name, obj in locals().items() - if not (name.startswith('_') or inspect.ismodule(obj)) ] - + if not (name.startswith('_') or inspect.ismodule(obj)) ] + @@ -6,569 +6,569 @@ import os, sys from util import ( - LazyMixin, - stream_copy - ) + LazyMixin, + stream_copy + ) from exc import GitCommandError from subprocess import ( - call, - Popen, - PIPE - ) + call, + Popen, + PIPE + ) execute_kwargs = ('istream', 'with_keep_cwd', 'with_extended_output', - 'with_exceptions', 'as_process', - 'output_stream' ) + 'with_exceptions', 'as_process', + 'output_stream' ) __all__ = ('Git', ) def dashify(string): - return string.replace('_', '-') + return string.replace('_', '-') class Git(LazyMixin): - """ - The Git class manages communication with the Git binary. - - It provides a convenient interface to calling the Git binary, such as in:: - - g = Git( git_dir ) - g.init() # calls 'git init' program - rval = g.ls_files() # calls 'git ls-files' program - - ``Debugging`` - Set the GIT_PYTHON_TRACE environment variable print each invocation - of the command to stdout. - Set its value to 'full' to see details about the returned values. - """ - __slots__ = ("_working_dir", "cat_file_all", "cat_file_header", "_version_info") - - # CONFIGURATION - # The size in bytes read from stdout when copying git's output to another stream - max_chunk_size = 1024*64 - - git_exec_name = "git" # default that should work on linux and windows - git_exec_name_win = "git.cmd" # alternate command name, windows only - - # Enables debugging of GitPython's git commands - GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False) - - # Provide the full path to the git executable. Otherwise it assumes git is in the path - _git_exec_env_var = "GIT_PYTHON_GIT_EXECUTABLE" - GIT_PYTHON_GIT_EXECUTABLE = os.environ.get(_git_exec_env_var, git_exec_name) - - - class AutoInterrupt(object): - """Kill/Interrupt the stored process instance once this instance goes out of scope. It is - used to prevent processes piling up in case iterators stop reading. - Besides all attributes are wired through to the contained process object. - - The wait method was overridden to perform automatic status code checking - and possibly raise.""" - __slots__= ("proc", "args") - - def __init__(self, proc, args ): - self.proc = proc - self.args = args - - def __del__(self): - # did the process finish already so we have a return code ? - if self.proc.poll() is not None: - return - - # can be that nothing really exists anymore ... - if os is None: - return - - # try to kill it - try: - os.kill(self.proc.pid, 2) # interrupt signal - except AttributeError: - # try windows - # for some reason, providing None for stdout/stderr still prints something. This is why - # we simply use the shell and redirect to nul. Its slower than CreateProcess, question - # is whether we really want to see all these messages. Its annoying no matter what. - call(("TASKKILL /F /T /PID %s 2>nul 1>nul" % str(self.proc.pid)), shell=True) - # END exception handling - - def __getattr__(self, attr): - return getattr(self.proc, attr) - - def wait(self): - """Wait for the process and return its status code. - - :raise GitCommandError: if the return status is not 0""" - status = self.proc.wait() - if status != 0: - raise GitCommandError(self.args, status, self.proc.stderr.read()) - # END status handling - return status - # END auto interrupt - - class CatFileContentStream(object): - """Object representing a sized read-only stream returning the contents of - an object. - It behaves like a stream, but counts the data read and simulates an empty - stream once our sized content region is empty. - If not all data is read to the end of the objects's lifetime, we read the - rest to assure the underlying stream continues to work""" - - __slots__ = ('_stream', '_nbr', '_size') - - def __init__(self, size, stream): - self._stream = stream - self._size = size - self._nbr = 0 # num bytes read - - # special case: if the object is empty, has null bytes, get the - # final newline right away. - if size == 0: - stream.read(1) - # END handle empty streams - - def read(self, size=-1): - bytes_left = self._size - self._nbr - if bytes_left == 0: - return '' - if size > -1: - # assure we don't try to read past our limit - size = min(bytes_left, size) - else: - # they try to read all, make sure its not more than what remains - size = bytes_left - # END check early depletion - data = self._stream.read(size) - self._nbr += len(data) - - # check for depletion, read our final byte to make the stream usable by others - if self._size - self._nbr == 0: - self._stream.read(1) # final newline - # END finish reading - return data - - def readline(self, size=-1): - if self._nbr == self._size: - return '' - - # clamp size to lowest allowed value - bytes_left = self._size - self._nbr - if size > -1: - size = min(bytes_left, size) - else: - size = bytes_left - # END handle size - - data = self._stream.readline(size) - self._nbr += len(data) - - # handle final byte - if self._size - self._nbr == 0: - self._stream.read(1) - # END finish reading - - return data - - def readlines(self, size=-1): - if self._nbr == self._size: - return list() - - # leave all additional logic to our readline method, we just check the size - out = list() - nbr = 0 - while True: - line = self.readline() - if not line: - break - out.append(line) - if size > -1: - nbr += len(line) - if nbr > size: - break - # END handle size constraint - # END readline loop - return out - - def __iter__(self): - return self - - def next(self): - line = self.readline() - if not line: - raise StopIteration - return line - - def __del__(self): - bytes_left = self._size - self._nbr - if bytes_left: - # read and discard - seeking is impossible within a stream - # includes terminating newline - self._stream.read(bytes_left + 1) - # END handle incomplete read - - - def __init__(self, working_dir=None): - """Initialize this instance with: - - :param working_dir: - Git directory we should work in. If None, we always work in the current - directory as returned by os.getcwd(). - It is meant to be the working tree directory if available, or the - .git directory in case of bare repositories.""" - super(Git, self).__init__() - self._working_dir = working_dir - - # cached command slots - self.cat_file_header = None - self.cat_file_all = None + """ + The Git class manages communication with the Git binary. + + It provides a convenient interface to calling the Git binary, such as in:: + + g = Git( git_dir ) + g.init() # calls 'git init' program + rval = g.ls_files() # calls 'git ls-files' program + + ``Debugging`` + Set the GIT_PYTHON_TRACE environment variable print each invocation + of the command to stdout. + Set its value to 'full' to see details about the returned values. + """ + __slots__ = ("_working_dir", "cat_file_all", "cat_file_header", "_version_info") + + # CONFIGURATION + # The size in bytes read from stdout when copying git's output to another stream + max_chunk_size = 1024*64 + + git_exec_name = "git" # default that should work on linux and windows + git_exec_name_win = "git.cmd" # alternate command name, windows only + + # Enables debugging of GitPython's git commands + GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False) + + # Provide the full path to the git executable. Otherwise it assumes git is in the path + _git_exec_env_var = "GIT_PYTHON_GIT_EXECUTABLE" + GIT_PYTHON_GIT_EXECUTABLE = os.environ.get(_git_exec_env_var, git_exec_name) + + + class AutoInterrupt(object): + """Kill/Interrupt the stored process instance once this instance goes out of scope. It is + used to prevent processes piling up in case iterators stop reading. + Besides all attributes are wired through to the contained process object. + + The wait method was overridden to perform automatic status code checking + and possibly raise.""" + __slots__= ("proc", "args") + + def __init__(self, proc, args ): + self.proc = proc + self.args = args + + def __del__(self): + # did the process finish already so we have a return code ? + if self.proc.poll() is not None: + return + + # can be that nothing really exists anymore ... + if os is None: + return + + # try to kill it + try: + os.kill(self.proc.pid, 2) # interrupt signal + except AttributeError: + # try windows + # for some reason, providing None for stdout/stderr still prints something. This is why + # we simply use the shell and redirect to nul. Its slower than CreateProcess, question + # is whether we really want to see all these messages. Its annoying no matter what. + call(("TASKKILL /F /T /PID %s 2>nul 1>nul" % str(self.proc.pid)), shell=True) + # END exception handling + + def __getattr__(self, attr): + return getattr(self.proc, attr) + + def wait(self): + """Wait for the process and return its status code. + + :raise GitCommandError: if the return status is not 0""" + status = self.proc.wait() + if status != 0: + raise GitCommandError(self.args, status, self.proc.stderr.read()) + # END status handling + return status + # END auto interrupt + + class CatFileContentStream(object): + """Object representing a sized read-only stream returning the contents of + an object. + It behaves like a stream, but counts the data read and simulates an empty + stream once our sized content region is empty. + If not all data is read to the end of the objects's lifetime, we read the + rest to assure the underlying stream continues to work""" + + __slots__ = ('_stream', '_nbr', '_size') + + def __init__(self, size, stream): + self._stream = stream + self._size = size + self._nbr = 0 # num bytes read + + # special case: if the object is empty, has null bytes, get the + # final newline right away. + if size == 0: + stream.read(1) + # END handle empty streams + + def read(self, size=-1): + bytes_left = self._size - self._nbr + if bytes_left == 0: + return '' + if size > -1: + # assure we don't try to read past our limit + size = min(bytes_left, size) + else: + # they try to read all, make sure its not more than what remains + size = bytes_left + # END check early depletion + data = self._stream.read(size) + self._nbr += len(data) + + # check for depletion, read our final byte to make the stream usable by others + if self._size - self._nbr == 0: + self._stream.read(1) # final newline + # END finish reading + return data + + def readline(self, size=-1): + if self._nbr == self._size: + return '' + + # clamp size to lowest allowed value + bytes_left = self._size - self._nbr + if size > -1: + size = min(bytes_left, size) + else: + size = bytes_left + # END handle size + + data = self._stream.readline(size) + self._nbr += len(data) + + # handle final byte + if self._size - self._nbr == 0: + self._stream.read(1) + # END finish reading + + return data + + def readlines(self, size=-1): + if self._nbr == self._size: + return list() + + # leave all additional logic to our readline method, we just check the size + out = list() + nbr = 0 + while True: + line = self.readline() + if not line: + break + out.append(line) + if size > -1: + nbr += len(line) + if nbr > size: + break + # END handle size constraint + # END readline loop + return out + + def __iter__(self): + return self + + def next(self): + line = self.readline() + if not line: + raise StopIteration + return line + + def __del__(self): + bytes_left = self._size - self._nbr + if bytes_left: + # read and discard - seeking is impossible within a stream + # includes terminating newline + self._stream.read(bytes_left + 1) + # END handle incomplete read + + + def __init__(self, working_dir=None): + """Initialize this instance with: + + :param working_dir: + Git directory we should work in. If None, we always work in the current + directory as returned by os.getcwd(). + It is meant to be the working tree directory if available, or the + .git directory in case of bare repositories.""" + super(Git, self).__init__() + self._working_dir = working_dir + + # cached command slots + self.cat_file_header = None + self.cat_file_all = None - def __getattr__(self, name): - """A convenience method as it allows to call the command as if it was - an object. - :return: Callable object that will execute call _call_process with your arguments.""" - if name[0] == '_': - return LazyMixin.__getattr__(self, name) - return lambda *args, **kwargs: self._call_process(name, *args, **kwargs) + def __getattr__(self, name): + """A convenience method as it allows to call the command as if it was + an object. + :return: Callable object that will execute call _call_process with your arguments.""" + if name[0] == '_': + return LazyMixin.__getattr__(self, name) + return lambda *args, **kwargs: self._call_process(name, *args, **kwargs) - def _set_cache_(self, attr): - if attr == '_version_info': - # We only use the first 4 numbers, as everthing else could be strings in fact (on windows) - version_numbers = self._call_process('version').split(' ')[2] - self._version_info = tuple(int(n) for n in version_numbers.split('.')[:4]) - else: - super(Git, self)._set_cache_(attr) - #END handle version info - + def _set_cache_(self, attr): + if attr == '_version_info': + # We only use the first 4 numbers, as everthing else could be strings in fact (on windows) + version_numbers = self._call_process('version').split(' ')[2] + self._version_info = tuple(int(n) for n in version_numbers.split('.')[:4]) + else: + super(Git, self)._set_cache_(attr) + #END handle version info + - @property - def working_dir(self): - """:return: Git directory we are working on""" - return self._working_dir - - @property - def version_info(self): - """ - :return: tuple(int, int, int, int) tuple with integers representing the major, minor - and additional version numbers as parsed from git version. - This value is generated on demand and is cached""" - return self._version_info + @property + def working_dir(self): + """:return: Git directory we are working on""" + return self._working_dir + + @property + def version_info(self): + """ + :return: tuple(int, int, int, int) tuple with integers representing the major, minor + and additional version numbers as parsed from git version. + This value is generated on demand and is cached""" + return self._version_info - def execute(self, command, - istream=None, - with_keep_cwd=False, - with_extended_output=False, - with_exceptions=True, - as_process=False, - output_stream=None, - **subprocess_kwargs - ): - """Handles executing the command on the shell and consumes and returns - the returned information (stdout) + def execute(self, command, + istream=None, + with_keep_cwd=False, + with_extended_output=False, + with_exceptions=True, + as_process=False, + output_stream=None, + **subprocess_kwargs + ): + """Handles executing the command on the shell and consumes and returns + the returned information (stdout) - :param command: - The command argument list to execute. - It should be a string, or a sequence of program arguments. The - program to execute is the first item in the args sequence or string. + :param command: + The command argument list to execute. + It should be a string, or a sequence of program arguments. The + program to execute is the first item in the args sequence or string. - :param istream: - Standard input filehandle passed to subprocess.Popen. + :param istream: + Standard input filehandle passed to subprocess.Popen. - :param with_keep_cwd: - Whether to use the current working directory from os.getcwd(). - The cmd otherwise uses its own working_dir that it has been initialized - with if possible. + :param with_keep_cwd: + Whether to use the current working directory from os.getcwd(). + The cmd otherwise uses its own working_dir that it has been initialized + with if possible. - :param with_extended_output: - Whether to return a (status, stdout, stderr) tuple. + :param with_extended_output: + Whether to return a (status, stdout, stderr) tuple. - :param with_exceptions: - Whether to raise an exception when git returns a non-zero status. + :param with_exceptions: + Whether to raise an exception when git returns a non-zero status. - :param as_process: - Whether to return the created process instance directly from which - streams can be read on demand. This will render with_extended_output and - with_exceptions ineffective - the caller will have - to deal with the details himself. - It is important to note that the process will be placed into an AutoInterrupt - wrapper that will interrupt the process once it goes out of scope. If you - use the command in iterators, you should pass the whole process instance - instead of a single stream. - - :param output_stream: - If set to a file-like object, data produced by the git command will be - output to the given stream directly. - This feature only has any effect if as_process is False. Processes will - always be created with a pipe due to issues with subprocess. - This merely is a workaround as data will be copied from the - output pipe to the given output stream directly. - - :param subprocess_kwargs: - Keyword arguments to be passed to subprocess.Popen. Please note that - some of the valid kwargs are already set by this method, the ones you - specify may not be the same ones. - - :return: - * str(output) if extended_output = False (Default) - * tuple(int(status), str(stdout), str(stderr)) if extended_output = True - - if ouput_stream is True, the stdout value will be your output stream: - * output_stream if extended_output = False - * tuple(int(status), output_stream, str(stderr)) if extended_output = True - - :raise GitCommandError: - - :note: - If you add additional keyword arguments to the signature of this method, - you must update the execute_kwargs tuple housed in this module.""" - if self.GIT_PYTHON_TRACE and not self.GIT_PYTHON_TRACE == 'full': - print ' '.join(command) + :param as_process: + Whether to return the created process instance directly from which + streams can be read on demand. This will render with_extended_output and + with_exceptions ineffective - the caller will have + to deal with the details himself. + It is important to note that the process will be placed into an AutoInterrupt + wrapper that will interrupt the process once it goes out of scope. If you + use the command in iterators, you should pass the whole process instance + instead of a single stream. + + :param output_stream: + If set to a file-like object, data produced by the git command will be + output to the given stream directly. + This feature only has any effect if as_process is False. Processes will + always be created with a pipe due to issues with subprocess. + This merely is a workaround as data will be copied from the + output pipe to the given output stream directly. + + :param subprocess_kwargs: + Keyword arguments to be passed to subprocess.Popen. Please note that + some of the valid kwargs are already set by this method, the ones you + specify may not be the same ones. + + :return: + * str(output) if extended_output = False (Default) + * tuple(int(status), str(stdout), str(stderr)) if extended_output = True + + if ouput_stream is True, the stdout value will be your output stream: + * output_stream if extended_output = False + * tuple(int(status), output_stream, str(stderr)) if extended_output = True + + :raise GitCommandError: + + :note: + If you add additional keyword arguments to the signature of this method, + you must update the execute_kwargs tuple housed in this module.""" + if self.GIT_PYTHON_TRACE and not self.GIT_PYTHON_TRACE == 'full': + print ' '.join(command) - # Allow the user to have the command executed in their working dir. - if with_keep_cwd or self._working_dir is None: - cwd = os.getcwd() - else: - cwd=self._working_dir - - # Start the process - proc = Popen(command, - cwd=cwd, - stdin=istream, - stderr=PIPE, - stdout=PIPE, - close_fds=(os.name=='posix'),# unsupported on linux - **subprocess_kwargs - ) - if as_process: - return self.AutoInterrupt(proc, command) - - # Wait for the process to return - status = 0 - stdout_value = '' - stderr_value = '' - try: - if output_stream is None: - stdout_value, stderr_value = proc.communicate() - # strip trailing "\n" - if stdout_value.endswith("\n"): - stdout_value = stdout_value[:-1] - if stderr_value.endswith("\n"): - stderr_value = stderr_value[:-1] - status = proc.returncode - else: - stream_copy(proc.stdout, output_stream, self.max_chunk_size) - stdout_value = output_stream - stderr_value = proc.stderr.read() - # strip trailing "\n" - if stderr_value.endswith("\n"): - stderr_value = stderr_value[:-1] - status = proc.wait() - # END stdout handling - finally: - proc.stdout.close() - proc.stderr.close() + # Allow the user to have the command executed in their working dir. + if with_keep_cwd or self._working_dir is None: + cwd = os.getcwd() + else: + cwd=self._working_dir + + # Start the process + proc = Popen(command, + cwd=cwd, + stdin=istream, + stderr=PIPE, + stdout=PIPE, + close_fds=(os.name=='posix'),# unsupported on linux + **subprocess_kwargs + ) + if as_process: + return self.AutoInterrupt(proc, command) + + # Wait for the process to return + status = 0 + stdout_value = '' + stderr_value = '' + try: + if output_stream is None: + stdout_value, stderr_value = proc.communicate() + # strip trailing "\n" + if stdout_value.endswith("\n"): + stdout_value = stdout_value[:-1] + if stderr_value.endswith("\n"): + stderr_value = stderr_value[:-1] + status = proc.returncode + else: + stream_copy(proc.stdout, output_stream, self.max_chunk_size) + stdout_value = output_stream + stderr_value = proc.stderr.read() + # strip trailing "\n" + if stderr_value.endswith("\n"): + stderr_value = stderr_value[:-1] + status = proc.wait() + # END stdout handling + finally: + proc.stdout.close() + proc.stderr.close() - if self.GIT_PYTHON_TRACE == 'full': - cmdstr = " ".join(command) - if stderr_value: - print "%s -> %d; stdout: '%s'; stderr: '%s'" % (cmdstr, status, stdout_value, stderr_value) - elif stdout_value: - print "%s -> %d; stdout: '%s'" % (cmdstr, status, stdout_value) - else: - print "%s -> %d" % (cmdstr, status) - # END handle debug printing + if self.GIT_PYTHON_TRACE == 'full': + cmdstr = " ".join(command) + if stderr_value: + print "%s -> %d; stdout: '%s'; stderr: '%s'" % (cmdstr, status, stdout_value, stderr_value) + elif stdout_value: + print "%s -> %d; stdout: '%s'" % (cmdstr, status, stdout_value) + else: + print "%s -> %d" % (cmdstr, status) + # END handle debug printing - if with_exceptions and status != 0: - raise GitCommandError(command, status, stderr_value) + if with_exceptions and status != 0: + raise GitCommandError(command, status, stderr_value) - # Allow access to the command's status code - if with_extended_output: - return (status, stdout_value, stderr_value) - else: - return stdout_value + # Allow access to the command's status code + if with_extended_output: + return (status, stdout_value, stderr_value) + else: + return stdout_value - def transform_kwargs(self, **kwargs): - """Transforms Python style kwargs into git command line options.""" - args = list() - for k, v in kwargs.items(): - if len(k) == 1: - if v is True: - args.append("-%s" % k) - elif type(v) is not bool: - args.append("-%s%s" % (k, v)) - else: - if v is True: - args.append("--%s" % dashify(k)) - elif type(v) is not bool: - args.append("--%s=%s" % (dashify(k), v)) - return args + def transform_kwargs(self, **kwargs): + """Transforms Python style kwargs into git command line options.""" + args = list() + for k, v in kwargs.items(): + if len(k) == 1: + if v is True: + args.append("-%s" % k) + elif type(v) is not bool: + args.append("-%s%s" % (k, v)) + else: + if v is True: + args.append("--%s" % dashify(k)) + elif type(v) is not bool: + args.append("--%s=%s" % (dashify(k), v)) + return args - @classmethod - def __unpack_args(cls, arg_list): - if not isinstance(arg_list, (list,tuple)): - return [ str(arg_list) ] - - outlist = list() - for arg in arg_list: - if isinstance(arg_list, (list, tuple)): - outlist.extend(cls.__unpack_args( arg )) - # END recursion - else: - outlist.append(str(arg)) - # END for each arg - return outlist + @classmethod + def __unpack_args(cls, arg_list): + if not isinstance(arg_list, (list,tuple)): + return [ str(arg_list) ] + + outlist = list() + for arg in arg_list: + if isinstance(arg_list, (list, tuple)): + outlist.extend(cls.__unpack_args( arg )) + # END recursion + else: + outlist.append(str(arg)) + # END for each arg + return outlist - def _call_process(self, method, *args, **kwargs): - """Run the given git command with the specified arguments and return - the result as a String + def _call_process(self, method, *args, **kwargs): + """Run the given git command with the specified arguments and return + the result as a String - :param method: - is the command. Contained "_" characters will be converted to dashes, - such as in 'ls_files' to call 'ls-files'. + :param method: + is the command. Contained "_" characters will be converted to dashes, + such as in 'ls_files' to call 'ls-files'. - :param args: - is the list of arguments. If None is included, it will be pruned. - This allows your commands to call git more conveniently as None - is realized as non-existent + :param args: + is the list of arguments. If None is included, it will be pruned. + This allows your commands to call git more conveniently as None + is realized as non-existent - :param kwargs: - is a dict of keyword arguments. - This function accepts the same optional keyword arguments - as execute(). + :param kwargs: + is a dict of keyword arguments. + This function accepts the same optional keyword arguments + as execute(). - ``Examples``:: - git.rev_list('master', max_count=10, header=True) + ``Examples``:: + git.rev_list('master', max_count=10, header=True) - :return: Same as ``execute``""" - # Handle optional arguments prior to calling transform_kwargs - # otherwise these'll end up in args, which is bad. - _kwargs = dict() - for kwarg in execute_kwargs: - try: - _kwargs[kwarg] = kwargs.pop(kwarg) - except KeyError: - pass + :return: Same as ``execute``""" + # Handle optional arguments prior to calling transform_kwargs + # otherwise these'll end up in args, which is bad. + _kwargs = dict() + for kwarg in execute_kwargs: + try: + _kwargs[kwarg] = kwargs.pop(kwarg) + except KeyError: + pass - # Prepare the argument list - opt_args = self.transform_kwargs(**kwargs) - - ext_args = self.__unpack_args([a for a in args if a is not None]) - args = opt_args + ext_args - - def make_call(): - call = [self.GIT_PYTHON_GIT_EXECUTABLE, dashify(method)] - call.extend(args) - return call - #END utility to recreate call after changes - - if sys.platform == 'win32': - try: - try: - return self.execute(make_call(), **_kwargs) - except WindowsError: - # did we switch to git.cmd already, or was it changed from default ? permanently fail - if self.GIT_PYTHON_GIT_EXECUTABLE != self.git_exec_name: - raise - #END handle overridden variable - type(self).GIT_PYTHON_GIT_EXECUTABLE = self.git_exec_name_win - call = [self.GIT_PYTHON_GIT_EXECUTABLE] + list(args) - - try: - return self.execute(make_call(), **_kwargs) - finally: - import warnings - msg = "WARNING: Automatically switched to use git.cmd as git executable, which reduces performance by ~70%." - msg += "Its recommended to put git.exe into the PATH or to set the %s environment variable to the executable's location" % self._git_exec_env_var - warnings.warn(msg) - #END print of warning - #END catch first failure - except WindowsError: - raise WindowsError("The system cannot find or execute the file at %r" % self.GIT_PYTHON_GIT_EXECUTABLE) - #END provide better error message - else: - return self.execute(make_call(), **_kwargs) - #END handle windows default installation - - def _parse_object_header(self, header_line): - """ - :param header_line: - <hex_sha> type_string size_as_int - - :return: (hex_sha, type_string, size_as_int) - - :raise ValueError: if the header contains indication for an error due to - incorrect input sha""" - tokens = header_line.split() - if len(tokens) != 3: - if not tokens: - raise ValueError("SHA could not be resolved, git returned: %r" % (header_line.strip())) - else: - raise ValueError("SHA %s could not be resolved, git returned: %r" % (tokens[0], header_line.strip())) - # END handle actual return value - # END error handling - - if len(tokens[0]) != 40: - raise ValueError("Failed to parse header: %r" % header_line) - return (tokens[0], tokens[1], int(tokens[2])) - - def __prepare_ref(self, ref): - # required for command to separate refs on stdin - refstr = str(ref) # could be ref-object - if refstr.endswith("\n"): - return refstr - return refstr + "\n" - - def __get_persistent_cmd(self, attr_name, cmd_name, *args,**kwargs): - cur_val = getattr(self, attr_name) - if cur_val is not None: - return cur_val - - options = { "istream" : PIPE, "as_process" : True } - options.update( kwargs ) - - cmd = self._call_process( cmd_name, *args, **options ) - setattr(self, attr_name, cmd ) - return cmd - - def __get_object_header(self, cmd, ref): - cmd.stdin.write(self.__prepare_ref(ref)) - cmd.stdin.flush() - return self._parse_object_header(cmd.stdout.readline()) - - def get_object_header(self, ref): - """ Use this method to quickly examine the type and size of the object behind - the given ref. - - :note: The method will only suffer from the costs of command invocation - once and reuses the command in subsequent calls. - - :return: (hexsha, type_string, size_as_int)""" - cmd = self.__get_persistent_cmd("cat_file_header", "cat_file", batch_check=True) - return self.__get_object_header(cmd, ref) - - def get_object_data(self, ref): - """ As get_object_header, but returns object data as well - :return: (hexsha, type_string, size_as_int,data_string) - :note: not threadsafe""" - hexsha, typename, size, stream = self.stream_object_data(ref) - data = stream.read(size) - del(stream) - return (hexsha, typename, size, data) - - def stream_object_data(self, ref): - """As get_object_header, but returns the data as a stream - :return: (hexsha, type_string, size_as_int, stream) - :note: This method is not threadsafe, you need one independent Command instance - per thread to be safe !""" - cmd = self.__get_persistent_cmd("cat_file_all", "cat_file", batch=True) - hexsha, typename, size = self.__get_object_header(cmd, ref) - return (hexsha, typename, size, self.CatFileContentStream(size, cmd.stdout)) - - def clear_cache(self): - """Clear all kinds of internal caches to release resources. - - Currently persistent commands will be interrupted. - - :return: self""" - self.cat_file_all = None - self.cat_file_header = None - return self + # Prepare the argument list + opt_args = self.transform_kwargs(**kwargs) + + ext_args = self.__unpack_args([a for a in args if a is not None]) + args = opt_args + ext_args + + def make_call(): + call = [self.GIT_PYTHON_GIT_EXECUTABLE, dashify(method)] + call.extend(args) + return call + #END utility to recreate call after changes + + if sys.platform == 'win32': + try: + try: + return self.execute(make_call(), **_kwargs) + except WindowsError: + # did we switch to git.cmd already, or was it changed from default ? permanently fail + if self.GIT_PYTHON_GIT_EXECUTABLE != self.git_exec_name: + raise + #END handle overridden variable + type(self).GIT_PYTHON_GIT_EXECUTABLE = self.git_exec_name_win + call = [self.GIT_PYTHON_GIT_EXECUTABLE] + list(args) + + try: + return self.execute(make_call(), **_kwargs) + finally: + import warnings + msg = "WARNING: Automatically switched to use git.cmd as git executable, which reduces performance by ~70%." + msg += "Its recommended to put git.exe into the PATH or to set the %s environment variable to the executable's location" % self._git_exec_env_var + warnings.warn(msg) + #END print of warning + #END catch first failure + except WindowsError: + raise WindowsError("The system cannot find or execute the file at %r" % self.GIT_PYTHON_GIT_EXECUTABLE) + #END provide better error message + else: + return self.execute(make_call(), **_kwargs) + #END handle windows default installation + + def _parse_object_header(self, header_line): + """ + :param header_line: + <hex_sha> type_string size_as_int + + :return: (hex_sha, type_string, size_as_int) + + :raise ValueError: if the header contains indication for an error due to + incorrect input sha""" + tokens = header_line.split() + if len(tokens) != 3: + if not tokens: + raise ValueError("SHA could not be resolved, git returned: %r" % (header_line.strip())) + else: + raise ValueError("SHA %s could not be resolved, git returned: %r" % (tokens[0], header_line.strip())) + # END handle actual return value + # END error handling + + if len(tokens[0]) != 40: + raise ValueError("Failed to parse header: %r" % header_line) + return (tokens[0], tokens[1], int(tokens[2])) + + def __prepare_ref(self, ref): + # required for command to separate refs on stdin + refstr = str(ref) # could be ref-object + if refstr.endswith("\n"): + return refstr + return refstr + "\n" + + def __get_persistent_cmd(self, attr_name, cmd_name, *args,**kwargs): + cur_val = getattr(self, attr_name) + if cur_val is not None: + return cur_val + + options = { "istream" : PIPE, "as_process" : True } + options.update( kwargs ) + + cmd = self._call_process( cmd_name, *args, **options ) + setattr(self, attr_name, cmd ) + return cmd + + def __get_object_header(self, cmd, ref): + cmd.stdin.write(self.__prepare_ref(ref)) + cmd.stdin.flush() + return self._parse_object_header(cmd.stdout.readline()) + + def get_object_header(self, ref): + """ Use this method to quickly examine the type and size of the object behind + the given ref. + + :note: The method will only suffer from the costs of command invocation + once and reuses the command in subsequent calls. + + :return: (hexsha, type_string, size_as_int)""" + cmd = self.__get_persistent_cmd("cat_file_header", "cat_file", batch_check=True) + return self.__get_object_header(cmd, ref) + + def get_object_data(self, ref): + """ As get_object_header, but returns object data as well + :return: (hexsha, type_string, size_as_int,data_string) + :note: not threadsafe""" + hexsha, typename, size, stream = self.stream_object_data(ref) + data = stream.read(size) + del(stream) + return (hexsha, typename, size, data) + + def stream_object_data(self, ref): + """As get_object_header, but returns the data as a stream + :return: (hexsha, type_string, size_as_int, stream) + :note: This method is not threadsafe, you need one independent Command instance + per thread to be safe !""" + cmd = self.__get_persistent_cmd("cat_file_all", "cat_file", batch=True) + hexsha, typename, size = self.__get_object_header(cmd, ref) + return (hexsha, typename, size, self.CatFileContentStream(size, cmd.stdout)) + + def clear_cache(self): + """Clear all kinds of internal caches to release resources. + + Currently persistent commands will be interrupted. + + :return: self""" + self.cat_file_all = None + self.cat_file_header = None + return self diff --git a/git/config.py b/git/config.py index c71bb8ca..285ade6b 100644 --- a/git/config.py +++ b/git/config.py @@ -18,408 +18,408 @@ from git.util import LockFile __all__ = ('GitConfigParser', 'SectionConstraint') class MetaParserBuilder(type): - """Utlity class wrapping base-class methods into decorators that assure read-only properties""" - def __new__(metacls, name, bases, clsdict): - """ - Equip all base-class methods with a needs_values decorator, and all non-const methods - with a set_dirty_and_flush_changes decorator in addition to that.""" - kmm = '_mutating_methods_' - if kmm in clsdict: - mutating_methods = clsdict[kmm] - for base in bases: - methods = ( t for t in inspect.getmembers(base, inspect.ismethod) if not t[0].startswith("_") ) - for name, method in methods: - if name in clsdict: - continue - method_with_values = needs_values(method) - if name in mutating_methods: - method_with_values = set_dirty_and_flush_changes(method_with_values) - # END mutating methods handling - - clsdict[name] = method_with_values - # END for each name/method pair - # END for each base - # END if mutating methods configuration is set - - new_type = super(MetaParserBuilder, metacls).__new__(metacls, name, bases, clsdict) - return new_type - - + """Utlity class wrapping base-class methods into decorators that assure read-only properties""" + def __new__(metacls, name, bases, clsdict): + """ + Equip all base-class methods with a needs_values decorator, and all non-const methods + with a set_dirty_and_flush_changes decorator in addition to that.""" + kmm = '_mutating_methods_' + if kmm in clsdict: + mutating_methods = clsdict[kmm] + for base in bases: + methods = ( t for t in inspect.getmembers(base, inspect.ismethod) if not t[0].startswith("_") ) + for name, method in methods: + if name in clsdict: + continue + method_with_values = needs_values(method) + if name in mutating_methods: + method_with_values = set_dirty_and_flush_changes(method_with_values) + # END mutating methods handling + + clsdict[name] = method_with_values + # END for each name/method pair + # END for each base + # END if mutating methods configuration is set + + new_type = super(MetaParserBuilder, metacls).__new__(metacls, name, bases, clsdict) + return new_type + + def needs_values(func): - """Returns method assuring we read values (on demand) before we try to access them""" - def assure_data_present(self, *args, **kwargs): - self.read() - return func(self, *args, **kwargs) - # END wrapper method - assure_data_present.__name__ = func.__name__ - return assure_data_present - + """Returns method assuring we read values (on demand) before we try to access them""" + def assure_data_present(self, *args, **kwargs): + self.read() + return func(self, *args, **kwargs) + # END wrapper method + assure_data_present.__name__ = func.__name__ + return assure_data_present + def set_dirty_and_flush_changes(non_const_func): - """Return method that checks whether given non constant function may be called. - If so, the instance will be set dirty. - Additionally, we flush the changes right to disk""" - def flush_changes(self, *args, **kwargs): - rval = non_const_func(self, *args, **kwargs) - self.write() - return rval - # END wrapper method - flush_changes.__name__ = non_const_func.__name__ - return flush_changes - + """Return method that checks whether given non constant function may be called. + If so, the instance will be set dirty. + Additionally, we flush the changes right to disk""" + def flush_changes(self, *args, **kwargs): + rval = non_const_func(self, *args, **kwargs) + self.write() + return rval + # END wrapper method + flush_changes.__name__ = non_const_func.__name__ + return flush_changes + class SectionConstraint(object): - """Constrains a ConfigParser to only option commands which are constrained to - always use the section we have been initialized with. - - It supports all ConfigParser methods that operate on an option""" - __slots__ = ("_config", "_section_name") - _valid_attrs_ = ("get_value", "set_value", "get", "set", "getint", "getfloat", "getboolean", "has_option", - "remove_section", "remove_option", "options") - - def __init__(self, config, section): - self._config = config - self._section_name = section - - def __getattr__(self, attr): - if attr in self._valid_attrs_: - return lambda *args, **kwargs: self._call_config(attr, *args, **kwargs) - return super(SectionConstraint,self).__getattribute__(attr) - - def _call_config(self, method, *args, **kwargs): - """Call the configuration at the given method which must take a section name - as first argument""" - return getattr(self._config, method)(self._section_name, *args, **kwargs) - - @property - def config(self): - """return: Configparser instance we constrain""" - return self._config - + """Constrains a ConfigParser to only option commands which are constrained to + always use the section we have been initialized with. + + It supports all ConfigParser methods that operate on an option""" + __slots__ = ("_config", "_section_name") + _valid_attrs_ = ("get_value", "set_value", "get", "set", "getint", "getfloat", "getboolean", "has_option", + "remove_section", "remove_option", "options") + + def __init__(self, config, section): + self._config = config + self._section_name = section + + def __getattr__(self, attr): + if attr in self._valid_attrs_: + return lambda *args, **kwargs: self._call_config(attr, *args, **kwargs) + return super(SectionConstraint,self).__getattribute__(attr) + + def _call_config(self, method, *args, **kwargs): + """Call the configuration at the given method which must take a section name + as first argument""" + return getattr(self._config, method)(self._section_name, *args, **kwargs) + + @property + def config(self): + """return: Configparser instance we constrain""" + return self._config + class GitConfigParser(cp.RawConfigParser, object): - """Implements specifics required to read git style configuration files. - - This variation behaves much like the git.config command such that the configuration - will be read on demand based on the filepath given during initialization. - - The changes will automatically be written once the instance goes out of scope, but - can be triggered manually as well. - - The configuration file will be locked if you intend to change values preventing other - instances to write concurrently. - - :note: - The config is case-sensitive even when queried, hence section and option names - must match perfectly.""" - __metaclass__ = MetaParserBuilder - - - #{ Configuration - # The lock type determines the type of lock to use in new configuration readers. - # They must be compatible to the LockFile interface. - # A suitable alternative would be the BlockingLockFile - t_lock = LockFile - re_comment = re.compile('^\s*[#;]') - - #} END configuration - - OPTCRE = re.compile( - r'\s*(?P<option>[^:=\s][^:=]*)' # very permissive, incuding leading whitespace - r'\s*(?P<vi>[:=])\s*' # any number of space/tab, - # followed by separator - # (either : or =), followed - # by any # space/tab - r'(?P<value>.*)$' # everything up to eol - ) - - # list of RawConfigParser methods able to change the instance - _mutating_methods_ = ("add_section", "remove_section", "remove_option", "set") - __slots__ = ("_sections", "_defaults", "_file_or_files", "_read_only","_is_initialized", '_lock') - - def __init__(self, file_or_files, read_only=True): - """Initialize a configuration reader to read the given file_or_files and to - possibly allow changes to it by setting read_only False - - :param file_or_files: - A single file path or file objects or multiple of these - - :param read_only: - If True, the ConfigParser may only read the data , but not change it. - If False, only a single file path or file object may be given.""" - super(GitConfigParser, self).__init__() - # initialize base with ordered dictionaries to be sure we write the same - # file back - self._sections = OrderedDict() - self._defaults = OrderedDict() - - self._file_or_files = file_or_files - self._read_only = read_only - self._is_initialized = False - self._lock = None - - if not read_only: - if isinstance(file_or_files, (tuple, list)): - raise ValueError("Write-ConfigParsers can operate on a single file only, multiple files have been passed") - # END single file check - - if not isinstance(file_or_files, basestring): - file_or_files = file_or_files.name - # END get filename from handle/stream - # initialize lock base - we want to write - self._lock = self.t_lock(file_or_files) - - self._lock._obtain_lock() - # END read-only check - - - def __del__(self): - """Write pending changes if required and release locks""" - # checking for the lock here makes sure we do not raise during write() - # in case an invalid parser was created who could not get a lock - if self.read_only or not self._lock._has_lock(): - return - - try: - try: - self.write() - except IOError,e: - print "Exception during destruction of GitConfigParser: %s" % str(e) - finally: - self._lock._release_lock() - - def optionxform(self, optionstr): - """Do not transform options in any way when writing""" - return optionstr - - def _read(self, fp, fpname): - """A direct copy of the py2.4 version of the super class's _read method - to assure it uses ordered dicts. Had to change one line to make it work. - - Future versions have this fixed, but in fact its quite embarassing for the - guys not to have done it right in the first place ! - - Removed big comments to make it more compact. - - Made sure it ignores initial whitespace as git uses tabs""" - cursect = None # None, or a dictionary - optname = None - lineno = 0 - e = None # None, or an exception - while True: - line = fp.readline() - if not line: - break - lineno = lineno + 1 - # comment or blank line? - if line.strip() == '' or self.re_comment.match(line): - continue - if line.split(None, 1)[0].lower() == 'rem' and line[0] in "rR": - # no leading whitespace - continue - else: - # is it a section header? - mo = self.SECTCRE.match(line.strip()) - if mo: - sectname = mo.group('header').strip() - if sectname in self._sections: - cursect = self._sections[sectname] - elif sectname == cp.DEFAULTSECT: - cursect = self._defaults - else: - # THE ONLY LINE WE CHANGED ! - cursect = OrderedDict((('__name__', sectname),)) - self._sections[sectname] = cursect - # So sections can't start with a continuation line - optname = None - # no section header in the file? - elif cursect is None: - raise cp.MissingSectionHeaderError(fpname, lineno, line) - # an option line? - else: - mo = self.OPTCRE.match(line) - if mo: - optname, vi, optval = mo.group('option', 'vi', 'value') - if vi in ('=', ':') and ';' in optval: - pos = optval.find(';') - if pos != -1 and optval[pos-1].isspace(): - optval = optval[:pos] - optval = optval.strip() - if optval == '""': - optval = '' - optname = self.optionxform(optname.rstrip()) - cursect[optname] = optval - else: - if not e: - e = cp.ParsingError(fpname) - e.append(lineno, repr(line)) - # END - # END ? - # END ? - # END while reading - # if any parsing errors occurred, raise an exception - if e: - raise e - - - def read(self): - """Reads the data stored in the files we have been initialized with. It will - ignore files that cannot be read, possibly leaving an empty configuration - - :return: Nothing - :raise IOError: if a file cannot be handled""" - if self._is_initialized: - return - - files_to_read = self._file_or_files - if not isinstance(files_to_read, (tuple, list)): - files_to_read = [ files_to_read ] - - for file_object in files_to_read: - fp = file_object - close_fp = False - # assume a path if it is not a file-object - if not hasattr(file_object, "seek"): - try: - fp = open(file_object) - close_fp = True - except IOError,e: - continue - # END fp handling - - try: - self._read(fp, fp.name) - finally: - if close_fp: - fp.close() - # END read-handling - # END for each file object to read - self._is_initialized = True - - def _write(self, fp): - """Write an .ini-format representation of the configuration state in - git compatible format""" - def write_section(name, section_dict): - fp.write("[%s]\n" % name) - for (key, value) in section_dict.items(): - if key != "__name__": - fp.write("\t%s = %s\n" % (key, str(value).replace('\n', '\n\t'))) - # END if key is not __name__ - # END section writing - - if self._defaults: - write_section(cp.DEFAULTSECT, self._defaults) - map(lambda t: write_section(t[0],t[1]), self._sections.items()) + """Implements specifics required to read git style configuration files. + + This variation behaves much like the git.config command such that the configuration + will be read on demand based on the filepath given during initialization. + + The changes will automatically be written once the instance goes out of scope, but + can be triggered manually as well. + + The configuration file will be locked if you intend to change values preventing other + instances to write concurrently. + + :note: + The config is case-sensitive even when queried, hence section and option names + must match perfectly.""" + __metaclass__ = MetaParserBuilder + + + #{ Configuration + # The lock type determines the type of lock to use in new configuration readers. + # They must be compatible to the LockFile interface. + # A suitable alternative would be the BlockingLockFile + t_lock = LockFile + re_comment = re.compile('^\s*[#;]') + + #} END configuration + + OPTCRE = re.compile( + r'\s*(?P<option>[^:=\s][^:=]*)' # very permissive, incuding leading whitespace + r'\s*(?P<vi>[:=])\s*' # any number of space/tab, + # followed by separator + # (either : or =), followed + # by any # space/tab + r'(?P<value>.*)$' # everything up to eol + ) + + # list of RawConfigParser methods able to change the instance + _mutating_methods_ = ("add_section", "remove_section", "remove_option", "set") + __slots__ = ("_sections", "_defaults", "_file_or_files", "_read_only","_is_initialized", '_lock') + + def __init__(self, file_or_files, read_only=True): + """Initialize a configuration reader to read the given file_or_files and to + possibly allow changes to it by setting read_only False + + :param file_or_files: + A single file path or file objects or multiple of these + + :param read_only: + If True, the ConfigParser may only read the data , but not change it. + If False, only a single file path or file object may be given.""" + super(GitConfigParser, self).__init__() + # initialize base with ordered dictionaries to be sure we write the same + # file back + self._sections = OrderedDict() + self._defaults = OrderedDict() + + self._file_or_files = file_or_files + self._read_only = read_only + self._is_initialized = False + self._lock = None + + if not read_only: + if isinstance(file_or_files, (tuple, list)): + raise ValueError("Write-ConfigParsers can operate on a single file only, multiple files have been passed") + # END single file check + + if not isinstance(file_or_files, basestring): + file_or_files = file_or_files.name + # END get filename from handle/stream + # initialize lock base - we want to write + self._lock = self.t_lock(file_or_files) + + self._lock._obtain_lock() + # END read-only check + + + def __del__(self): + """Write pending changes if required and release locks""" + # checking for the lock here makes sure we do not raise during write() + # in case an invalid parser was created who could not get a lock + if self.read_only or not self._lock._has_lock(): + return + + try: + try: + self.write() + except IOError,e: + print "Exception during destruction of GitConfigParser: %s" % str(e) + finally: + self._lock._release_lock() + + def optionxform(self, optionstr): + """Do not transform options in any way when writing""" + return optionstr + + def _read(self, fp, fpname): + """A direct copy of the py2.4 version of the super class's _read method + to assure it uses ordered dicts. Had to change one line to make it work. + + Future versions have this fixed, but in fact its quite embarassing for the + guys not to have done it right in the first place ! + + Removed big comments to make it more compact. + + Made sure it ignores initial whitespace as git uses tabs""" + cursect = None # None, or a dictionary + optname = None + lineno = 0 + e = None # None, or an exception + while True: + line = fp.readline() + if not line: + break + lineno = lineno + 1 + # comment or blank line? + if line.strip() == '' or self.re_comment.match(line): + continue + if line.split(None, 1)[0].lower() == 'rem' and line[0] in "rR": + # no leading whitespace + continue + else: + # is it a section header? + mo = self.SECTCRE.match(line.strip()) + if mo: + sectname = mo.group('header').strip() + if sectname in self._sections: + cursect = self._sections[sectname] + elif sectname == cp.DEFAULTSECT: + cursect = self._defaults + else: + # THE ONLY LINE WE CHANGED ! + cursect = OrderedDict((('__name__', sectname),)) + self._sections[sectname] = cursect + # So sections can't start with a continuation line + optname = None + # no section header in the file? + elif cursect is None: + raise cp.MissingSectionHeaderError(fpname, lineno, line) + # an option line? + else: + mo = self.OPTCRE.match(line) + if mo: + optname, vi, optval = mo.group('option', 'vi', 'value') + if vi in ('=', ':') and ';' in optval: + pos = optval.find(';') + if pos != -1 and optval[pos-1].isspace(): + optval = optval[:pos] + optval = optval.strip() + if optval == '""': + optval = '' + optname = self.optionxform(optname.rstrip()) + cursect[optname] = optval + else: + if not e: + e = cp.ParsingError(fpname) + e.append(lineno, repr(line)) + # END + # END ? + # END ? + # END while reading + # if any parsing errors occurred, raise an exception + if e: + raise e + + + def read(self): + """Reads the data stored in the files we have been initialized with. It will + ignore files that cannot be read, possibly leaving an empty configuration + + :return: Nothing + :raise IOError: if a file cannot be handled""" + if self._is_initialized: + return + + files_to_read = self._file_or_files + if not isinstance(files_to_read, (tuple, list)): + files_to_read = [ files_to_read ] + + for file_object in files_to_read: + fp = file_object + close_fp = False + # assume a path if it is not a file-object + if not hasattr(file_object, "seek"): + try: + fp = open(file_object) + close_fp = True + except IOError,e: + continue + # END fp handling + + try: + self._read(fp, fp.name) + finally: + if close_fp: + fp.close() + # END read-handling + # END for each file object to read + self._is_initialized = True + + def _write(self, fp): + """Write an .ini-format representation of the configuration state in + git compatible format""" + def write_section(name, section_dict): + fp.write("[%s]\n" % name) + for (key, value) in section_dict.items(): + if key != "__name__": + fp.write("\t%s = %s\n" % (key, str(value).replace('\n', '\n\t'))) + # END if key is not __name__ + # END section writing + + if self._defaults: + write_section(cp.DEFAULTSECT, self._defaults) + map(lambda t: write_section(t[0],t[1]), self._sections.items()) - - @needs_values - def write(self): - """Write changes to our file, if there are changes at all - - :raise IOError: if this is a read-only writer instance or if we could not obtain - a file lock""" - self._assure_writable("write") - - fp = self._file_or_files - close_fp = False - - # we have a physical file on disk, so get a lock - if isinstance(fp, (basestring, file)): - self._lock._obtain_lock() - # END get lock for physical files - - if not hasattr(fp, "seek"): - fp = open(self._file_or_files, "w") - close_fp = True - else: - fp.seek(0) - # make sure we do not overwrite into an existing file - if hasattr(fp, 'truncate'): - fp.truncate() - #END - # END handle stream or file - - # WRITE DATA - try: - self._write(fp) - finally: - if close_fp: - fp.close() - # END data writing - - # we do not release the lock - it will be done automatically once the - # instance vanishes - - def _assure_writable(self, method_name): - if self.read_only: - raise IOError("Cannot execute non-constant method %s.%s" % (self, method_name)) - - @needs_values - @set_dirty_and_flush_changes - def add_section(self, section): - """Assures added options will stay in order""" - super(GitConfigParser, self).add_section(section) - self._sections[section] = OrderedDict() - - @property - def read_only(self): - """:return: True if this instance may change the configuration file""" - return self._read_only - - def get_value(self, section, option, default = None): - """ - :param default: - If not None, the given default value will be returned in case - the option did not exist - :return: a properly typed value, either int, float or string - - :raise TypeError: in case the value could not be understood - Otherwise the exceptions known to the ConfigParser will be raised.""" - try: - valuestr = self.get(section, option) - except Exception: - if default is not None: - return default - raise - - types = ( long, float ) - for numtype in types: - try: - val = numtype( valuestr ) + + @needs_values + def write(self): + """Write changes to our file, if there are changes at all + + :raise IOError: if this is a read-only writer instance or if we could not obtain + a file lock""" + self._assure_writable("write") + + fp = self._file_or_files + close_fp = False + + # we have a physical file on disk, so get a lock + if isinstance(fp, (basestring, file)): + self._lock._obtain_lock() + # END get lock for physical files + + if not hasattr(fp, "seek"): + fp = open(self._file_or_files, "w") + close_fp = True + else: + fp.seek(0) + # make sure we do not overwrite into an existing file + if hasattr(fp, 'truncate'): + fp.truncate() + #END + # END handle stream or file + + # WRITE DATA + try: + self._write(fp) + finally: + if close_fp: + fp.close() + # END data writing + + # we do not release the lock - it will be done automatically once the + # instance vanishes + + def _assure_writable(self, method_name): + if self.read_only: + raise IOError("Cannot execute non-constant method %s.%s" % (self, method_name)) + + @needs_values + @set_dirty_and_flush_changes + def add_section(self, section): + """Assures added options will stay in order""" + super(GitConfigParser, self).add_section(section) + self._sections[section] = OrderedDict() + + @property + def read_only(self): + """:return: True if this instance may change the configuration file""" + return self._read_only + + def get_value(self, section, option, default = None): + """ + :param default: + If not None, the given default value will be returned in case + the option did not exist + :return: a properly typed value, either int, float or string + + :raise TypeError: in case the value could not be understood + Otherwise the exceptions known to the ConfigParser will be raised.""" + try: + valuestr = self.get(section, option) + except Exception: + if default is not None: + return default + raise + + types = ( long, float ) + for numtype in types: + try: + val = numtype( valuestr ) - # truncated value ? - if val != float( valuestr ): - continue + # truncated value ? + if val != float( valuestr ): + continue - return val - except (ValueError,TypeError): - continue - # END for each numeric type - - # try boolean values as git uses them - vl = valuestr.lower() - if vl == 'false': - return False - if vl == 'true': - return True - - if not isinstance( valuestr, basestring ): - raise TypeError( "Invalid value type: only int, long, float and str are allowed", valuestr ) - - return valuestr - - @needs_values - @set_dirty_and_flush_changes - def set_value(self, section, option, value): - """Sets the given option in section to the given value. - It will create the section if required, and will not throw as opposed to the default - ConfigParser 'set' method. - - :param section: Name of the section in which the option resides or should reside - :param option: Name of the options whose value to set - - :param value: Value to set the option to. It must be a string or convertible - to a string""" - if not self.has_section(section): - self.add_section(section) - self.set(section, option, str(value)) + return val + except (ValueError,TypeError): + continue + # END for each numeric type + + # try boolean values as git uses them + vl = valuestr.lower() + if vl == 'false': + return False + if vl == 'true': + return True + + if not isinstance( valuestr, basestring ): + raise TypeError( "Invalid value type: only int, long, float and str are allowed", valuestr ) + + return valuestr + + @needs_values + @set_dirty_and_flush_changes + def set_value(self, section, option, value): + """Sets the given option in section to the given value. + It will create the section if required, and will not throw as opposed to the default + ConfigParser 'set' method. + + :param section: Name of the section in which the option resides or should reside + :param option: Name of the options whose value to set + + :param value: Value to set the option to. It must be a string or convertible + to a string""" + if not self.has_section(section): + self.add_section(section) + self.set(section, option, str(value)) @@ -1,18 +1,18 @@ """Module with our own gitdb implementation - it uses the git command""" from exc import ( - GitCommandError, - BadObject - ) + GitCommandError, + BadObject + ) from gitdb.base import ( - OInfo, - OStream - ) + OInfo, + OStream + ) from gitdb.util import ( - bin_to_hex, - hex_to_bin - ) + bin_to_hex, + hex_to_bin + ) from gitdb.db import GitDB from gitdb.db import LooseObjectDB @@ -21,41 +21,41 @@ __all__ = ('GitCmdObjectDB', 'GitDB' ) #class GitCmdObjectDB(CompoundDB, ObjectDBW): class GitCmdObjectDB(LooseObjectDB): - """A database representing the default git object store, which includes loose - objects, pack files and an alternates file - - It will create objects only in the loose object database. - :note: for now, we use the git command to do all the lookup, just until he - have packs and the other implementations - """ - def __init__(self, root_path, git): - """Initialize this instance with the root and a git command""" - super(GitCmdObjectDB, self).__init__(root_path) - self._git = git - - def info(self, sha): - hexsha, typename, size = self._git.get_object_header(bin_to_hex(sha)) - return OInfo(hex_to_bin(hexsha), typename, size) - - def stream(self, sha): - """For now, all lookup is done by git itself""" - hexsha, typename, size, stream = self._git.stream_object_data(bin_to_hex(sha)) - return OStream(hex_to_bin(hexsha), typename, size, stream) - - - # { Interface - - def partial_to_complete_sha_hex(self, partial_hexsha): - """:return: Full binary 20 byte sha from the given partial hexsha - :raise AmbiguousObjectName: - :raise BadObject: - :note: currently we only raise BadObject as git does not communicate - AmbiguousObjects separately""" - try: - hexsha, typename, size = self._git.get_object_header(partial_hexsha) - return hex_to_bin(hexsha) - except (GitCommandError, ValueError): - raise BadObject(partial_hexsha) - # END handle exceptions - - #} END interface + """A database representing the default git object store, which includes loose + objects, pack files and an alternates file + + It will create objects only in the loose object database. + :note: for now, we use the git command to do all the lookup, just until he + have packs and the other implementations + """ + def __init__(self, root_path, git): + """Initialize this instance with the root and a git command""" + super(GitCmdObjectDB, self).__init__(root_path) + self._git = git + + def info(self, sha): + hexsha, typename, size = self._git.get_object_header(bin_to_hex(sha)) + return OInfo(hex_to_bin(hexsha), typename, size) + + def stream(self, sha): + """For now, all lookup is done by git itself""" + hexsha, typename, size, stream = self._git.stream_object_data(bin_to_hex(sha)) + return OStream(hex_to_bin(hexsha), typename, size, stream) + + + # { Interface + + def partial_to_complete_sha_hex(self, partial_hexsha): + """:return: Full binary 20 byte sha from the given partial hexsha + :raise AmbiguousObjectName: + :raise BadObject: + :note: currently we only raise BadObject as git does not communicate + AmbiguousObjects separately""" + try: + hexsha, typename, size = self._git.get_object_header(partial_hexsha) + return hex_to_bin(hexsha) + except (GitCommandError, ValueError): + raise BadObject(partial_hexsha) + # END handle exceptions + + #} END interface diff --git a/git/diff.py b/git/diff.py index 7b3bf6b5..8a4819ab 100644 --- a/git/diff.py +++ b/git/diff.py @@ -11,337 +11,337 @@ from exc import GitCommandError from gitdb.util import hex_to_bin - + __all__ = ('Diffable', 'DiffIndex', 'Diff') - + class Diffable(object): - """Common interface for all object that can be diffed against another object of compatible type. - - :note: - Subclasses require a repo member as it is the case for Object instances, for practical - reasons we do not derive from Object.""" - __slots__ = tuple() - - # standin indicating you want to diff against the index - class Index(object): - pass - - def _process_diff_args(self, args): - """ - :return: - possibly altered version of the given args list. - Method is called right before git command execution. - Subclasses can use it to alter the behaviour of the superclass""" - return args - - def diff(self, other=Index, paths=None, create_patch=False, **kwargs): - """Creates diffs between two items being trees, trees and index or an - index and the working tree. + """Common interface for all object that can be diffed against another object of compatible type. + + :note: + Subclasses require a repo member as it is the case for Object instances, for practical + reasons we do not derive from Object.""" + __slots__ = tuple() + + # standin indicating you want to diff against the index + class Index(object): + pass + + def _process_diff_args(self, args): + """ + :return: + possibly altered version of the given args list. + Method is called right before git command execution. + Subclasses can use it to alter the behaviour of the superclass""" + return args + + def diff(self, other=Index, paths=None, create_patch=False, **kwargs): + """Creates diffs between two items being trees, trees and index or an + index and the working tree. - :param other: - Is the item to compare us with. - If None, we will be compared to the working tree. - If Treeish, it will be compared against the respective tree - If Index ( type ), it will be compared against the index. - It defaults to Index to assure the method will not by-default fail - on bare repositories. + :param other: + Is the item to compare us with. + If None, we will be compared to the working tree. + If Treeish, it will be compared against the respective tree + If Index ( type ), it will be compared against the index. + It defaults to Index to assure the method will not by-default fail + on bare repositories. - :param paths: - is a list of paths or a single path to limit the diff to. - It will only include at least one of the givne path or paths. + :param paths: + is a list of paths or a single path to limit the diff to. + It will only include at least one of the givne path or paths. - :param create_patch: - If True, the returned Diff contains a detailed patch that if applied - makes the self to other. Patches are somwhat costly as blobs have to be read - and diffed. + :param create_patch: + If True, the returned Diff contains a detailed patch that if applied + makes the self to other. Patches are somwhat costly as blobs have to be read + and diffed. - :param kwargs: - Additional arguments passed to git-diff, such as - R=True to swap both sides of the diff. + :param kwargs: + Additional arguments passed to git-diff, such as + R=True to swap both sides of the diff. - :return: git.DiffIndex - - :note: - Rename detection will only work if create_patch is True. - - On a bare repository, 'other' needs to be provided as Index or as - as Tree/Commit, or a git command error will occour""" - args = list() - args.append( "--abbrev=40" ) # we need full shas - args.append( "--full-index" ) # get full index paths, not only filenames - - if create_patch: - args.append("-p") - args.append("-M") # check for renames - else: - args.append("--raw") - - if paths is not None and not isinstance(paths, (tuple,list)): - paths = [ paths ] + :return: git.DiffIndex + + :note: + Rename detection will only work if create_patch is True. + + On a bare repository, 'other' needs to be provided as Index or as + as Tree/Commit, or a git command error will occour""" + args = list() + args.append( "--abbrev=40" ) # we need full shas + args.append( "--full-index" ) # get full index paths, not only filenames + + if create_patch: + args.append("-p") + args.append("-M") # check for renames + else: + args.append("--raw") + + if paths is not None and not isinstance(paths, (tuple,list)): + paths = [ paths ] - if other is not None and other is not self.Index: - args.insert(0, other) - if other is self.Index: - args.insert(0, "--cached") - - args.insert(0,self) - - # paths is list here or None - if paths: - args.append("--") - args.extend(paths) - # END paths handling - - kwargs['as_process'] = True - proc = self.repo.git.diff(*self._process_diff_args(args), **kwargs) - - diff_method = Diff._index_from_raw_format - if create_patch: - diff_method = Diff._index_from_patch_format - index = diff_method(self.repo, proc.stdout) - - status = proc.wait() - return index + if other is not None and other is not self.Index: + args.insert(0, other) + if other is self.Index: + args.insert(0, "--cached") + + args.insert(0,self) + + # paths is list here or None + if paths: + args.append("--") + args.extend(paths) + # END paths handling + + kwargs['as_process'] = True + proc = self.repo.git.diff(*self._process_diff_args(args), **kwargs) + + diff_method = Diff._index_from_raw_format + if create_patch: + diff_method = Diff._index_from_patch_format + index = diff_method(self.repo, proc.stdout) + + status = proc.wait() + return index class DiffIndex(list): - """Implements an Index for diffs, allowing a list of Diffs to be queried by - the diff properties. - - The class improves the diff handling convenience""" - # change type invariant identifying possible ways a blob can have changed - # A = Added - # D = Deleted - # R = Renamed - # M = modified - change_type = ("A", "D", "R", "M") - - - def iter_change_type(self, change_type): - """ - :return: - iterator yieling Diff instances that match the given change_type - - :param change_type: - Member of DiffIndex.change_type, namely: - - * 'A' for added paths - * 'D' for deleted paths - * 'R' for renamed paths - * 'M' for paths with modified data""" - if change_type not in self.change_type: - raise ValueError( "Invalid change type: %s" % change_type ) - - for diff in self: - if change_type == "A" and diff.new_file: - yield diff - elif change_type == "D" and diff.deleted_file: - yield diff - elif change_type == "R" and diff.renamed: - yield diff - elif change_type == "M" and diff.a_blob and diff.b_blob and diff.a_blob != diff.b_blob: - yield diff - # END for each diff - + """Implements an Index for diffs, allowing a list of Diffs to be queried by + the diff properties. + + The class improves the diff handling convenience""" + # change type invariant identifying possible ways a blob can have changed + # A = Added + # D = Deleted + # R = Renamed + # M = modified + change_type = ("A", "D", "R", "M") + + + def iter_change_type(self, change_type): + """ + :return: + iterator yieling Diff instances that match the given change_type + + :param change_type: + Member of DiffIndex.change_type, namely: + + * 'A' for added paths + * 'D' for deleted paths + * 'R' for renamed paths + * 'M' for paths with modified data""" + if change_type not in self.change_type: + raise ValueError( "Invalid change type: %s" % change_type ) + + for diff in self: + if change_type == "A" and diff.new_file: + yield diff + elif change_type == "D" and diff.deleted_file: + yield diff + elif change_type == "R" and diff.renamed: + yield diff + elif change_type == "M" and diff.a_blob and diff.b_blob and diff.a_blob != diff.b_blob: + yield diff + # END for each diff + class Diff(object): - """A Diff contains diff information between two Trees. - - It contains two sides a and b of the diff, members are prefixed with - "a" and "b" respectively to inidcate that. - - Diffs keep information about the changed blob objects, the file mode, renames, - deletions and new files. - - There are a few cases where None has to be expected as member variable value: - - ``New File``:: - - a_mode is None - a_blob is None - - ``Deleted File``:: - - b_mode is None - b_blob is None - - ``Working Tree Blobs`` - - When comparing to working trees, the working tree blob will have a null hexsha - as a corresponding object does not yet exist. The mode will be null as well. - But the path will be available though. - If it is listed in a diff the working tree version of the file must - be different to the version in the index or tree, and hence has been modified.""" - - # precompiled regex - re_header = re.compile(r""" - #^diff[ ]--git - [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n - (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n - ^rename[ ]from[ ](?P<rename_from>\S+)\n - ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))? - (?:^old[ ]mode[ ](?P<old_mode>\d+)\n - ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))? - (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))? - (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))? - (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+) - \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))? - """, re.VERBOSE | re.MULTILINE) - # can be used for comparisons - NULL_HEX_SHA = "0"*40 - NULL_BIN_SHA = "\0"*20 - - __slots__ = ("a_blob", "b_blob", "a_mode", "b_mode", "new_file", "deleted_file", - "rename_from", "rename_to", "diff") + """A Diff contains diff information between two Trees. + + It contains two sides a and b of the diff, members are prefixed with + "a" and "b" respectively to inidcate that. + + Diffs keep information about the changed blob objects, the file mode, renames, + deletions and new files. + + There are a few cases where None has to be expected as member variable value: + + ``New File``:: + + a_mode is None + a_blob is None + + ``Deleted File``:: + + b_mode is None + b_blob is None + + ``Working Tree Blobs`` + + When comparing to working trees, the working tree blob will have a null hexsha + as a corresponding object does not yet exist. The mode will be null as well. + But the path will be available though. + If it is listed in a diff the working tree version of the file must + be different to the version in the index or tree, and hence has been modified.""" + + # precompiled regex + re_header = re.compile(r""" + #^diff[ ]--git + [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n + (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n + ^rename[ ]from[ ](?P<rename_from>\S+)\n + ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))? + (?:^old[ ]mode[ ](?P<old_mode>\d+)\n + ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))? + (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))? + (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))? + (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+) + \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))? + """, re.VERBOSE | re.MULTILINE) + # can be used for comparisons + NULL_HEX_SHA = "0"*40 + NULL_BIN_SHA = "\0"*20 + + __slots__ = ("a_blob", "b_blob", "a_mode", "b_mode", "new_file", "deleted_file", + "rename_from", "rename_to", "diff") - def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode, - b_mode, new_file, deleted_file, rename_from, - rename_to, diff): - - self.a_mode = a_mode - self.b_mode = b_mode - - if self.a_mode: - self.a_mode = mode_str_to_int(self.a_mode) - if self.b_mode: - self.b_mode = mode_str_to_int(self.b_mode) - - if a_blob_id is None: - self.a_blob = None - else: - self.a_blob = Blob(repo, hex_to_bin(a_blob_id), mode=self.a_mode, path=a_path) - if b_blob_id is None: - self.b_blob = None - else: - self.b_blob = Blob(repo, hex_to_bin(b_blob_id), mode=self.b_mode, path=b_path) - - self.new_file = new_file - self.deleted_file = deleted_file - - # be clear and use None instead of empty strings - self.rename_from = rename_from or None - self.rename_to = rename_to or None - - self.diff = diff + def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode, + b_mode, new_file, deleted_file, rename_from, + rename_to, diff): + + self.a_mode = a_mode + self.b_mode = b_mode + + if self.a_mode: + self.a_mode = mode_str_to_int(self.a_mode) + if self.b_mode: + self.b_mode = mode_str_to_int(self.b_mode) + + if a_blob_id is None: + self.a_blob = None + else: + self.a_blob = Blob(repo, hex_to_bin(a_blob_id), mode=self.a_mode, path=a_path) + if b_blob_id is None: + self.b_blob = None + else: + self.b_blob = Blob(repo, hex_to_bin(b_blob_id), mode=self.b_mode, path=b_path) + + self.new_file = new_file + self.deleted_file = deleted_file + + # be clear and use None instead of empty strings + self.rename_from = rename_from or None + self.rename_to = rename_to or None + + self.diff = diff - def __eq__(self, other): - for name in self.__slots__: - if getattr(self, name) != getattr(other, name): - return False - # END for each name - return True - - def __ne__(self, other): - return not ( self == other ) - - def __hash__(self): - return hash(tuple(getattr(self,n) for n in self.__slots__)) + def __eq__(self, other): + for name in self.__slots__: + if getattr(self, name) != getattr(other, name): + return False + # END for each name + return True + + def __ne__(self, other): + return not ( self == other ) + + def __hash__(self): + return hash(tuple(getattr(self,n) for n in self.__slots__)) - def __str__(self): - h = "%s" - if self.a_blob: - h %= self.a_blob.path - elif self.b_blob: - h %= self.b_blob.path - - msg = '' - l = None # temp line - ll = 0 # line length - for b,n in zip((self.a_blob, self.b_blob), ('lhs', 'rhs')): - if b: - l = "\n%s: %o | %s" % (n, b.mode, b.hexsha) - else: - l = "\n%s: None" % n - # END if blob is not None - ll = max(len(l), ll) - msg += l - # END for each blob - - # add headline - h += '\n' + '='*ll - - if self.deleted_file: - msg += '\nfile deleted in rhs' - if self.new_file: - msg += '\nfile added in rhs' - if self.rename_from: - msg += '\nfile renamed from %r' % self.rename_from - if self.rename_to: - msg += '\nfile renamed to %r' % self.rename_to - if self.diff: - msg += '\n---' - msg += self.diff - msg += '\n---' - # END diff info - - return h + msg + def __str__(self): + h = "%s" + if self.a_blob: + h %= self.a_blob.path + elif self.b_blob: + h %= self.b_blob.path + + msg = '' + l = None # temp line + ll = 0 # line length + for b,n in zip((self.a_blob, self.b_blob), ('lhs', 'rhs')): + if b: + l = "\n%s: %o | %s" % (n, b.mode, b.hexsha) + else: + l = "\n%s: None" % n + # END if blob is not None + ll = max(len(l), ll) + msg += l + # END for each blob + + # add headline + h += '\n' + '='*ll + + if self.deleted_file: + msg += '\nfile deleted in rhs' + if self.new_file: + msg += '\nfile added in rhs' + if self.rename_from: + msg += '\nfile renamed from %r' % self.rename_from + if self.rename_to: + msg += '\nfile renamed to %r' % self.rename_to + if self.diff: + msg += '\n---' + msg += self.diff + msg += '\n---' + # END diff info + + return h + msg - @property - def renamed(self): - """:returns: True if the blob of our diff has been renamed""" - return self.rename_from != self.rename_to + @property + def renamed(self): + """:returns: True if the blob of our diff has been renamed""" + return self.rename_from != self.rename_to - @classmethod - def _index_from_patch_format(cls, repo, stream): - """Create a new DiffIndex from the given text which must be in patch format - :param repo: is the repository we are operating on - it is required - :param stream: result of 'git diff' as a stream (supporting file protocol) - :return: git.DiffIndex """ - # for now, we have to bake the stream - text = stream.read() - index = DiffIndex() + @classmethod + def _index_from_patch_format(cls, repo, stream): + """Create a new DiffIndex from the given text which must be in patch format + :param repo: is the repository we are operating on - it is required + :param stream: result of 'git diff' as a stream (supporting file protocol) + :return: git.DiffIndex """ + # for now, we have to bake the stream + text = stream.read() + index = DiffIndex() - diff_header = cls.re_header.match - for diff in ('\n' + text).split('\ndiff --git')[1:]: - header = diff_header(diff) + diff_header = cls.re_header.match + for diff in ('\n' + text).split('\ndiff --git')[1:]: + header = diff_header(diff) - a_path, b_path, similarity_index, rename_from, rename_to, \ - old_mode, new_mode, new_file_mode, deleted_file_mode, \ - a_blob_id, b_blob_id, b_mode = header.groups() - new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode) + a_path, b_path, similarity_index, rename_from, rename_to, \ + old_mode, new_mode, new_file_mode, deleted_file_mode, \ + a_blob_id, b_blob_id, b_mode = header.groups() + new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode) - index.append(Diff(repo, a_path, b_path, a_blob_id, b_blob_id, - old_mode or deleted_file_mode, new_mode or new_file_mode or b_mode, - new_file, deleted_file, rename_from, rename_to, diff[header.end():])) + index.append(Diff(repo, a_path, b_path, a_blob_id, b_blob_id, + old_mode or deleted_file_mode, new_mode or new_file_mode or b_mode, + new_file, deleted_file, rename_from, rename_to, diff[header.end():])) - return index - - @classmethod - def _index_from_raw_format(cls, repo, stream): - """Create a new DiffIndex from the given stream which must be in raw format. - :note: - This format is inherently incapable of detecting renames, hence we only - modify, delete and add files - :return: git.DiffIndex""" - # handles - # :100644 100644 6870991011cc8d9853a7a8a6f02061512c6a8190 37c5e30c879213e9ae83b21e9d11e55fc20c54b7 M .gitignore - index = DiffIndex() - for line in stream: - if not line.startswith(":"): - continue - # END its not a valid diff line - old_mode, new_mode, a_blob_id, b_blob_id, change_type, path = line[1:].split(None, 5) - path = path.strip() - a_path = path - b_path = path - deleted_file = False - new_file = False - - # NOTE: We cannot conclude from the existance of a blob to change type - # as diffs with the working do not have blobs yet - if change_type == 'D': - b_blob_id = None - deleted_file = True - elif change_type == 'A': - a_blob_id = None - new_file = True - # END add/remove handling - - diff = Diff(repo, a_path, b_path, a_blob_id, b_blob_id, old_mode, new_mode, - new_file, deleted_file, None, None, '') - index.append(diff) - # END for each line - - return index + return index + + @classmethod + def _index_from_raw_format(cls, repo, stream): + """Create a new DiffIndex from the given stream which must be in raw format. + :note: + This format is inherently incapable of detecting renames, hence we only + modify, delete and add files + :return: git.DiffIndex""" + # handles + # :100644 100644 6870991011cc8d9853a7a8a6f02061512c6a8190 37c5e30c879213e9ae83b21e9d11e55fc20c54b7 M .gitignore + index = DiffIndex() + for line in stream: + if not line.startswith(":"): + continue + # END its not a valid diff line + old_mode, new_mode, a_blob_id, b_blob_id, change_type, path = line[1:].split(None, 5) + path = path.strip() + a_path = path + b_path = path + deleted_file = False + new_file = False + + # NOTE: We cannot conclude from the existance of a blob to change type + # as diffs with the working do not have blobs yet + if change_type == 'D': + b_blob_id = None + deleted_file = True + elif change_type == 'A': + a_blob_id = None + new_file = True + # END add/remove handling + + diff = Diff(repo, a_path, b_path, a_blob_id, b_blob_id, old_mode, new_mode, + new_file, deleted_file, None, None, '') + index.append(diff) + # END for each line + + return index @@ -8,51 +8,51 @@ from gitdb.exc import * class InvalidGitRepositoryError(Exception): - """ Thrown if the given repository appears to have an invalid format. """ + """ Thrown if the given repository appears to have an invalid format. """ class NoSuchPathError(OSError): - """ Thrown if a path could not be access by the system. """ + """ Thrown if a path could not be access by the system. """ class GitCommandError(Exception): - """ Thrown if execution of the git command fails with non-zero status code. """ - def __init__(self, command, status, stderr=None): - self.stderr = stderr - self.status = status - self.command = command - - def __str__(self): - return ("'%s' returned exit status %i: %s" % - (' '.join(str(i) for i in self.command), self.status, self.stderr)) + """ Thrown if execution of the git command fails with non-zero status code. """ + def __init__(self, command, status, stderr=None): + self.stderr = stderr + self.status = status + self.command = command + + def __str__(self): + return ("'%s' returned exit status %i: %s" % + (' '.join(str(i) for i in self.command), self.status, self.stderr)) class CheckoutError( Exception ): - """Thrown if a file could not be checked out from the index as it contained - changes. - - The .failed_files attribute contains a list of relative paths that failed - to be checked out as they contained changes that did not exist in the index. - - The .failed_reasons attribute contains a string informing about the actual - cause of the issue. - - The .valid_files attribute contains a list of relative paths to files that - were checked out successfully and hence match the version stored in the - index""" - def __init__(self, message, failed_files, valid_files, failed_reasons): - Exception.__init__(self, message) - self.failed_files = failed_files - self.failed_reasons = failed_reasons - self.valid_files = valid_files - - def __str__(self): - return Exception.__str__(self) + ":%s" % self.failed_files - - + """Thrown if a file could not be checked out from the index as it contained + changes. + + The .failed_files attribute contains a list of relative paths that failed + to be checked out as they contained changes that did not exist in the index. + + The .failed_reasons attribute contains a string informing about the actual + cause of the issue. + + The .valid_files attribute contains a list of relative paths to files that + were checked out successfully and hence match the version stored in the + index""" + def __init__(self, message, failed_files, valid_files, failed_reasons): + Exception.__init__(self, message) + self.failed_files = failed_files + self.failed_reasons = failed_reasons + self.valid_files = valid_files + + def __str__(self): + return Exception.__str__(self) + ":%s" % self.failed_files + + class CacheError(Exception): - """Base for all errors related to the git index, which is called cache internally""" + """Base for all errors related to the git index, which is called cache internally""" class UnmergedEntriesError(CacheError): - """Thrown if an operation cannot proceed as there are still unmerged - entries in the cache""" + """Thrown if an operation cannot proceed as there are still unmerged + entries in the cache""" diff --git a/git/ext/gitdb b/git/ext/gitdb -Subproject ec6998f503e4619cd6bdecbbf372552ea126900 +Subproject 6576d5503a64d124fd7bcf639cc8955918b3ac4 diff --git a/git/index/base.py b/git/index/base.py index 354319bd..3bd8634c 100644 --- a/git/index/base.py +++ b/git/index/base.py @@ -13,54 +13,54 @@ from cStringIO import StringIO from stat import S_ISLNK from typ import ( - BaseIndexEntry, - IndexEntry, - ) + BaseIndexEntry, + IndexEntry, + ) from util import ( - TemporaryFileSwap, - post_clear_cache, - default_index, - git_working_dir - ) + TemporaryFileSwap, + post_clear_cache, + default_index, + git_working_dir + ) import git.objects import git.diff as diff from git.exc import ( - GitCommandError, - CheckoutError - ) + GitCommandError, + CheckoutError + ) from git.objects import ( - Blob, - Submodule, - Tree, - Object, - Commit, - ) + Blob, + Submodule, + Tree, + Object, + Commit, + ) from git.objects.util import Serializable from git.util import ( - IndexFileSHA1Writer, - LazyMixin, - LockedFD, - join_path_native, - file_contents_ro, - to_native_path_linux, - to_native_path - ) + IndexFileSHA1Writer, + LazyMixin, + LockedFD, + join_path_native, + file_contents_ro, + to_native_path_linux, + to_native_path + ) from fun import ( - entry_key, - write_cache, - read_cache, - aggressive_tree_merge, - write_tree_from_cache, - stat_mode_to_index_mode, - S_IFGITLINK - ) + entry_key, + write_cache, + read_cache, + aggressive_tree_merge, + write_tree_from_cache, + stat_mode_to_index_mode, + S_IFGITLINK + ) from gitdb.base import IStream from gitdb.db import MemoryDB @@ -71,1087 +71,1087 @@ __all__ = ( 'IndexFile', 'CheckoutError' ) class IndexFile(LazyMixin, diff.Diffable, Serializable): - """ - Implements an Index that can be manipulated using a native implementation in - order to save git command function calls wherever possible. - - It provides custom merging facilities allowing to merge without actually changing - your index or your working tree. This way you can perform own test-merges based - on the index only without having to deal with the working copy. This is useful - in case of partial working trees. - - ``Entries`` - - The index contains an entries dict whose keys are tuples of type IndexEntry - to facilitate access. - - You may read the entries dict or manipulate it using IndexEntry instance, i.e.:: - - index.entries[index.entry_key(index_entry_instance)] = index_entry_instance - - Make sure you use index.write() once you are done manipulating the index directly - before operating on it using the git command""" - __slots__ = ("repo", "version", "entries", "_extension_data", "_file_path") - _VERSION = 2 # latest version we support - S_IFGITLINK = S_IFGITLINK # a submodule - - def __init__(self, repo, file_path=None): - """Initialize this Index instance, optionally from the given ``file_path``. - If no file_path is given, we will be created from the current index file. - - If a stream is not given, the stream will be initialized from the current - repository's index on demand.""" - self.repo = repo - self.version = self._VERSION - self._extension_data = '' - self._file_path = file_path or self._index_path() - - def _set_cache_(self, attr): - if attr == "entries": - # read the current index - # try memory map for speed - lfd = LockedFD(self._file_path) - try: - fd = lfd.open(write=False, stream=False) - except OSError: - lfd.rollback() - # in new repositories, there may be no index, which means we are empty - self.entries = dict() - return - # END exception handling - - # Here it comes: on windows in python 2.5, memory maps aren't closed properly - # Hence we are in trouble if we try to delete a file that is memory mapped, - # which happens during read-tree. - # In this case, we will just read the memory in directly. - # Its insanely bad ... I am disappointed ! - allow_mmap = (os.name != 'nt' or sys.version_info[1] > 5) - stream = file_contents_ro(fd, stream=True, allow_mmap=allow_mmap) - - try: - self._deserialize(stream) - finally: - lfd.rollback() - # The handles will be closed on desctruction - # END read from default index on demand - else: - super(IndexFile, self)._set_cache_(attr) - - def _index_path(self): - return join_path_native(self.repo.git_dir, "index") - - @property - def path(self): - """ :return: Path to the index file we are representing """ - return self._file_path - - def _delete_entries_cache(self): - """Safely clear the entries cache so it can be recreated""" - try: - del(self.entries) - except AttributeError: - # fails in python 2.6.5 with this exception - pass - # END exception handling - - #{ Serializable Interface - - def _deserialize(self, stream): - """Initialize this instance with index values read from the given stream""" - self.version, self.entries, self._extension_data, conten_sha = read_cache(stream) - return self - - def _entries_sorted(self): - """:return: list of entries, in a sorted fashion, first by path, then by stage""" - entries_sorted = self.entries.values() - entries_sorted.sort(key=lambda e: (e.path, e.stage)) # use path/stage as sort key - return entries_sorted - - def _serialize(self, stream, ignore_tree_extension_data=False): - entries = self._entries_sorted() - write_cache(entries, - stream, - (ignore_tree_extension_data and None) or self._extension_data) - return self - - - #} END serializable interface - - def write(self, file_path = None, ignore_tree_extension_data=False): - """Write the current state to our file path or to the given one - - :param file_path: - If None, we will write to our stored file path from which we have - been initialized. Otherwise we write to the given file path. - Please note that this will change the file_path of this index to - the one you gave. - - :param ignore_tree_extension_data: - If True, the TREE type extension data read in the index will not - be written to disk. Use this if you have altered the index and - would like to use git-write-tree afterwards to create a tree - representing your written changes. - If this data is present in the written index, git-write-tree - will instead write the stored/cached tree. - Alternatively, use IndexFile.write_tree() to handle this case - automatically - - :return: self""" - # make sure we have our entries read before getting a write lock - # else it would be done when streaming. This can happen - # if one doesn't change the index, but writes it right away - self.entries - lfd = LockedFD(file_path or self._file_path) - stream = lfd.open(write=True, stream=True) - - self._serialize(stream, ignore_tree_extension_data) - - lfd.commit() - - # make sure we represent what we have written - if file_path is not None: - self._file_path = file_path - - @post_clear_cache - @default_index - def merge_tree(self, rhs, base=None): - """Merge the given rhs treeish into the current index, possibly taking - a common base treeish into account. - - As opposed to the from_tree_ method, this allows you to use an already - existing tree as the left side of the merge - - :param rhs: - treeish reference pointing to the 'other' side of the merge. - - :param base: - optional treeish reference pointing to the common base of 'rhs' and - this index which equals lhs - - :return: - self ( containing the merge and possibly unmerged entries in case of - conflicts ) - - :raise GitCommandError: - If there is a merge conflict. The error will - be raised at the first conflicting path. If you want to have proper - merge resolution to be done by yourself, you have to commit the changed - index ( or make a valid tree from it ) and retry with a three-way - index.from_tree call. """ - # -i : ignore working tree status - # --aggressive : handle more merge cases - # -m : do an actual merge - args = ["--aggressive", "-i", "-m"] - if base is not None: - args.append(base) - args.append(rhs) - - self.repo.git.read_tree(args) - return self - - @classmethod - def new(cls, repo, *tree_sha): - """ Merge the given treeish revisions into a new index which is returned. - This method behaves like git-read-tree --aggressive when doing the merge. - - :param repo: The repository treeish are located in. - - :param tree_sha: - 20 byte or 40 byte tree sha or tree objects - - :return: - New IndexFile instance. Its path will be undefined. - If you intend to write such a merged Index, supply an alternate file_path - to its 'write' method.""" - base_entries = aggressive_tree_merge(repo.odb, [to_bin_sha(str(t)) for t in tree_sha]) - - inst = cls(repo) - # convert to entries dict - entries = dict(izip(((e.path, e.stage) for e in base_entries), - (IndexEntry.from_base(e) for e in base_entries))) - - inst.entries = entries - return inst - - - @classmethod - def from_tree(cls, repo, *treeish, **kwargs): - """Merge the given treeish revisions into a new index which is returned. - The original index will remain unaltered - - :param repo: - The repository treeish are located in. - - :param treeish: - One, two or three Tree Objects, Commits or 40 byte hexshas. The result - changes according to the amount of trees. - If 1 Tree is given, it will just be read into a new index - If 2 Trees are given, they will be merged into a new index using a - two way merge algorithm. Tree 1 is the 'current' tree, tree 2 is the 'other' - one. It behaves like a fast-forward. - If 3 Trees are given, a 3-way merge will be performed with the first tree - being the common ancestor of tree 2 and tree 3. Tree 2 is the 'current' tree, - tree 3 is the 'other' one - - :param kwargs: - Additional arguments passed to git-read-tree - - :return: - New IndexFile instance. It will point to a temporary index location which - does not exist anymore. If you intend to write such a merged Index, supply - an alternate file_path to its 'write' method. - - :note: - In the three-way merge case, --aggressive will be specified to automatically - resolve more cases in a commonly correct manner. Specify trivial=True as kwarg - to override that. - - As the underlying git-read-tree command takes into account the current index, - it will be temporarily moved out of the way to assure there are no unsuspected - interferences.""" - if len(treeish) == 0 or len(treeish) > 3: - raise ValueError("Please specify between 1 and 3 treeish, got %i" % len(treeish)) - - arg_list = list() - # ignore that working tree and index possibly are out of date - if len(treeish)>1: - # drop unmerged entries when reading our index and merging - arg_list.append("--reset") - # handle non-trivial cases the way a real merge does - arg_list.append("--aggressive") - # END merge handling - - # tmp file created in git home directory to be sure renaming - # works - /tmp/ dirs could be on another device - tmp_index = tempfile.mktemp('','',repo.git_dir) - arg_list.append("--index-output=%s" % tmp_index) - arg_list.extend(treeish) - - # move current index out of the way - otherwise the merge may fail - # as it considers existing entries. moving it essentially clears the index. - # Unfortunately there is no 'soft' way to do it. - # The TemporaryFileSwap assure the original file get put back - index_handler = TemporaryFileSwap(join_path_native(repo.git_dir, 'index')) - try: - repo.git.read_tree(*arg_list, **kwargs) - index = cls(repo, tmp_index) - index.entries # force it to read the file as we will delete the temp-file - del(index_handler) # release as soon as possible - finally: - if os.path.exists(tmp_index): - os.remove(tmp_index) - # END index merge handling - - return index - - # UTILITIES - def _iter_expand_paths(self, paths): - """Expand the directories in list of paths to the corresponding paths accordingly, - - Note: git will add items multiple times even if a glob overlapped - with manually specified paths or if paths where specified multiple - times - we respect that and do not prune""" - def raise_exc(e): - raise e - r = self.repo.working_tree_dir - rs = r + os.sep - for path in paths: - abs_path = path - if not os.path.isabs(abs_path): - abs_path = os.path.join(r, path) - # END make absolute path - - # resolve globs if possible - if '?' in path or '*' in path or '[' in path: - for f in self._iter_expand_paths(glob.glob(abs_path)): - yield f.replace(rs, '') - continue - # END glob handling - try: - for root, dirs, files in os.walk(abs_path, onerror=raise_exc): - for rela_file in files: - # add relative paths only - yield os.path.join(root.replace(rs, ''), rela_file) - # END for each file in subdir - # END for each subdirectory - except OSError: - # was a file or something that could not be iterated - yield path.replace(rs, '') - # END path exception handling - # END for each path - - def _write_path_to_stdin(self, proc, filepath, item, fmakeexc, fprogress, - read_from_stdout=True): - """Write path to proc.stdin and make sure it processes the item, including progress. - - :return: stdout string - :param read_from_stdout: if True, proc.stdout will be read after the item - was sent to stdin. In that case, it will return None - :note: There is a bug in git-update-index that prevents it from sending - reports just in time. This is why we have a version that tries to - read stdout and one which doesn't. In fact, the stdout is not - important as the piped-in files are processed anyway and just in time - :note: Newlines are essential here, gits behaviour is somewhat inconsistent - on this depending on the version, hence we try our best to deal with - newlines carefully. Usually the last newline will not be sent, instead - we will close stdin to break the pipe.""" - - fprogress(filepath, False, item) - rval = None - try: - proc.stdin.write("%s\n" % filepath) - except IOError: - # pipe broke, usually because some error happend - raise fmakeexc() - # END write exception handling - proc.stdin.flush() - if read_from_stdout: - rval = proc.stdout.readline().strip() - fprogress(filepath, True, item) - return rval - - def iter_blobs(self, predicate = lambda t: True): - """ - :return: Iterator yielding tuples of Blob objects and stages, tuple(stage, Blob) - - :param predicate: - Function(t) returning True if tuple(stage, Blob) should be yielded by the - iterator. A default filter, the BlobFilter, allows you to yield blobs - only if they match a given list of paths. """ - for entry in self.entries.itervalues(): - # TODO: is it necessary to convert the mode ? We did that when adding - # it to the index, right ? - mode = stat_mode_to_index_mode(entry.mode) - blob = entry.to_blob(self.repo) - blob.size = entry.size - output = (entry.stage, blob) - if predicate(output): - yield output - # END for each entry - - def unmerged_blobs(self): - """ - :return: - Iterator yielding dict(path : list( tuple( stage, Blob, ...))), being - a dictionary associating a path in the index with a list containing - sorted stage/blob pairs - - :note: - Blobs that have been removed in one side simply do not exist in the - given stage. I.e. a file removed on the 'other' branch whose entries - are at stage 3 will not have a stage 3 entry. - """ - is_unmerged_blob = lambda t: t[0] != 0 - path_map = dict() - for stage, blob in self.iter_blobs(is_unmerged_blob): - path_map.setdefault(blob.path, list()).append((stage, blob)) - # END for each unmerged blob - for l in path_map.itervalues(): - l.sort() - return path_map - - @classmethod - def entry_key(cls, *entry): - return entry_key(*entry) - - def resolve_blobs(self, iter_blobs): - """Resolve the blobs given in blob iterator. This will effectively remove the - index entries of the respective path at all non-null stages and add the given - blob as new stage null blob. - - For each path there may only be one blob, otherwise a ValueError will be raised - claiming the path is already at stage 0. - - :raise ValueError: if one of the blobs already existed at stage 0 - :return: self - - :note: - You will have to write the index manually once you are done, i.e. - index.resolve_blobs(blobs).write() - """ - for blob in iter_blobs: - stage_null_key = (blob.path, 0) - if stage_null_key in self.entries: - raise ValueError( "Path %r already exists at stage 0" % blob.path ) - # END assert blob is not stage 0 already - - # delete all possible stages - for stage in (1, 2, 3): - try: - del( self.entries[(blob.path, stage)]) - except KeyError: - pass - # END ignore key errors - # END for each possible stage - - self.entries[stage_null_key] = IndexEntry.from_blob(blob) - # END for each blob - - return self - - def update(self): - """Reread the contents of our index file, discarding all cached information - we might have. - - :note: This is a possibly dangerious operations as it will discard your changes - to index.entries - :return: self""" - self._delete_entries_cache() - # allows to lazily reread on demand - return self - - def write_tree(self): - """Writes this index to a corresponding Tree object into the repository's - object database and return it. - - :return: Tree object representing this index - :note: The tree will be written even if one or more objects the tree refers to - does not yet exist in the object database. This could happen if you added - Entries to the index directly. - :raise ValueError: if there are no entries in the cache - :raise UnmergedEntriesError: """ - # we obtain no lock as we just flush our contents to disk as tree - # If we are a new index, the entries access will load our data accordingly - mdb = MemoryDB() - entries = self._entries_sorted() - binsha, tree_items = write_tree_from_cache(entries, mdb, slice(0, len(entries))) - - # copy changed trees only - mdb.stream_copy(mdb.sha_iter(), self.repo.odb) - - - # note: additional deserialization could be saved if write_tree_from_cache - # would return sorted tree entries - root_tree = Tree(self.repo, binsha, path='') - root_tree._cache = tree_items - return root_tree - - def _process_diff_args(self, args): - try: - args.pop(args.index(self)) - except IndexError: - pass - # END remove self - return args - - def _to_relative_path(self, path): - """:return: Version of path relative to our git directory or raise ValueError - if it is not within our git direcotory""" - if not os.path.isabs(path): - return path - relative_path = path.replace(self.repo.working_tree_dir+os.sep, "") - if relative_path == path: - raise ValueError("Absolute path %r is not in git repository at %r" % (path,self.repo.working_tree_dir)) - return relative_path - - def _preprocess_add_items(self, items): - """ Split the items into two lists of path strings and BaseEntries. """ - paths = list() - entries = list() - - for item in items: - if isinstance(item, basestring): - paths.append(self._to_relative_path(item)) - elif isinstance(item, (Blob, Submodule)): - entries.append(BaseIndexEntry.from_blob(item)) - elif isinstance(item, BaseIndexEntry): - entries.append(item) - else: - raise TypeError("Invalid Type: %r" % item) - # END for each item - return (paths, entries) - - @git_working_dir - def add(self, items, force=True, fprogress=lambda *args: None, path_rewriter=None, - write=True): - """Add files from the working tree, specific blobs or BaseIndexEntries - to the index. - - :param items: - Multiple types of items are supported, types can be mixed within one call. - Different types imply a different handling. File paths may generally be - relative or absolute. - - - path string - strings denote a relative or absolute path into the repository pointing to - an existing file, i.e. CHANGES, lib/myfile.ext, '/home/gitrepo/lib/myfile.ext'. - - Paths provided like this must exist. When added, they will be written - into the object database. - - PathStrings may contain globs, such as 'lib/__init__*' or can be directories - like 'lib', the latter ones will add all the files within the dirctory and - subdirectories. - - This equals a straight git-add. - - They are added at stage 0 - - - Blob or Submodule object - Blobs are added as they are assuming a valid mode is set. - The file they refer to may or may not exist in the file system, but - must be a path relative to our repository. - - If their sha is null ( 40*0 ), their path must exist in the file system - relative to the git repository as an object will be created from - the data at the path. - The handling now very much equals the way string paths are processed, except that - the mode you have set will be kept. This allows you to create symlinks - by settings the mode respectively and writing the target of the symlink - directly into the file. This equals a default Linux-Symlink which - is not dereferenced automatically, except that it can be created on - filesystems not supporting it as well. - - Please note that globs or directories are not allowed in Blob objects. - - They are added at stage 0 - - - BaseIndexEntry or type - Handling equals the one of Blob objects, but the stage may be - explicitly set. Please note that Index Entries require binary sha's. - - :param force: - **CURRENTLY INEFFECTIVE** - If True, otherwise ignored or excluded files will be - added anyway. - As opposed to the git-add command, we enable this flag by default - as the API user usually wants the item to be added even though - they might be excluded. - - :param fprogress: - Function with signature f(path, done=False, item=item) called for each - path to be added, one time once it is about to be added where done==False - and once after it was added where done=True. - item is set to the actual item we handle, either a Path or a BaseIndexEntry - Please note that the processed path is not guaranteed to be present - in the index already as the index is currently being processed. - - :param path_rewriter: - Function with signature (string) func(BaseIndexEntry) function returning a path - for each passed entry which is the path to be actually recorded for the - object created from entry.path. This allows you to write an index which - is not identical to the layout of the actual files on your hard-disk. - If not None and ``items`` contain plain paths, these paths will be - converted to Entries beforehand and passed to the path_rewriter. - Please note that entry.path is relative to the git repository. - - :param write: - If True, the index will be written once it was altered. Otherwise - the changes only exist in memory and are not available to git commands. - - :return: - List(BaseIndexEntries) representing the entries just actually added. - - :raise OSError: - if a supplied Path did not exist. Please note that BaseIndexEntry - Objects that do not have a null sha will be added even if their paths - do not exist. - """ - # sort the entries into strings and Entries, Blobs are converted to entries - # automatically - # paths can be git-added, for everything else we use git-update-index - entries_added = list() - paths, entries = self._preprocess_add_items(items) - if paths and path_rewriter: - for path in paths: - abspath = os.path.abspath(path) - gitrelative_path = abspath[len(self.repo.working_tree_dir)+1:] - blob = Blob(self.repo, Blob.NULL_BIN_SHA, - stat_mode_to_index_mode(os.stat(abspath).st_mode), - to_native_path_linux(gitrelative_path)) - entries.append(BaseIndexEntry.from_blob(blob)) - # END for each path - del(paths[:]) - # END rewrite paths - - - def store_path(filepath): - """Store file at filepath in the database and return the base index entry""" - st = os.lstat(filepath) # handles non-symlinks as well - stream = None - if S_ISLNK(st.st_mode): - stream = StringIO(os.readlink(filepath)) - else: - stream = open(filepath, 'rb') - # END handle stream - fprogress(filepath, False, filepath) - istream = self.repo.odb.store(IStream(Blob.type, st.st_size, stream)) - fprogress(filepath, True, filepath) - return BaseIndexEntry((stat_mode_to_index_mode(st.st_mode), - istream.binsha, 0, to_native_path_linux(filepath))) - # END utility method - - - # HANDLE PATHS - if paths: - assert len(entries_added) == 0 - added_files = list() - for filepath in self._iter_expand_paths(paths): - entries_added.append(store_path(filepath)) - # END for each filepath - # END path handling - - - # HANDLE ENTRIES - if entries: - null_mode_entries = [ e for e in entries if e.mode == 0 ] - if null_mode_entries: - raise ValueError("At least one Entry has a null-mode - please use index.remove to remove files for clarity") - # END null mode should be remove - - # HANLDE ENTRY OBJECT CREATION - # create objects if required, otherwise go with the existing shas - null_entries_indices = [ i for i,e in enumerate(entries) if e.binsha == Object.NULL_BIN_SHA ] - if null_entries_indices: - for ei in null_entries_indices: - null_entry = entries[ei] - new_entry = store_path(null_entry.path) - - # update null entry - entries[ei] = BaseIndexEntry((null_entry.mode, new_entry.binsha, null_entry.stage, null_entry.path)) - # END for each entry index - # END null_entry handling - - # REWRITE PATHS - # If we have to rewrite the entries, do so now, after we have generated - # all object sha's - if path_rewriter: - for i,e in enumerate(entries): - entries[i] = BaseIndexEntry((e.mode, e.binsha, e.stage, path_rewriter(e))) - # END for each entry - # END handle path rewriting - - # just go through the remaining entries and provide progress info - for i, entry in enumerate(entries): - progress_sent = i in null_entries_indices - if not progress_sent: - fprogress(entry.path, False, entry) - fprogress(entry.path, True, entry) - # END handle progress - # END for each enty - entries_added.extend(entries) - # END if there are base entries - - # FINALIZE - # add the new entries to this instance - for entry in entries_added: - self.entries[(entry.path, 0)] = IndexEntry.from_base(entry) - - if write: - self.write() - # END handle write - - return entries_added - - def _items_to_rela_paths(self, items): - """Returns a list of repo-relative paths from the given items which - may be absolute or relative paths, entries or blobs""" - paths = list() - for item in items: - if isinstance(item, (BaseIndexEntry,(Blob, Submodule))): - paths.append(self._to_relative_path(item.path)) - elif isinstance(item, basestring): - paths.append(self._to_relative_path(item)) - else: - raise TypeError("Invalid item type: %r" % item) - # END for each item - return paths - - @post_clear_cache - @default_index - def remove(self, items, working_tree=False, **kwargs): - """Remove the given items from the index and optionally from - the working tree as well. - - :param items: - Multiple types of items are supported which may be be freely mixed. - - - path string - Remove the given path at all stages. If it is a directory, you must - specify the r=True keyword argument to remove all file entries - below it. If absolute paths are given, they will be converted - to a path relative to the git repository directory containing - the working tree - - The path string may include globs, such as *.c. - - - Blob Object - Only the path portion is used in this case. - - - BaseIndexEntry or compatible type - The only relevant information here Yis the path. The stage is ignored. - - :param working_tree: - If True, the entry will also be removed from the working tree, physically - removing the respective file. This may fail if there are uncommited changes - in it. - - :param kwargs: - Additional keyword arguments to be passed to git-rm, such - as 'r' to allow recurive removal of - - :return: - List(path_string, ...) list of repository relative paths that have - been removed effectively. - This is interesting to know in case you have provided a directory or - globs. Paths are relative to the repository. """ - args = list() - if not working_tree: - args.append("--cached") - args.append("--") - - # preprocess paths - paths = self._items_to_rela_paths(items) - removed_paths = self.repo.git.rm(args, paths, **kwargs).splitlines() - - # process output to gain proper paths - # rm 'path' - return [ p[4:-1] for p in removed_paths ] - - @post_clear_cache - @default_index - def move(self, items, skip_errors=False, **kwargs): - """Rename/move the items, whereas the last item is considered the destination of - the move operation. If the destination is a file, the first item ( of two ) - must be a file as well. If the destination is a directory, it may be preceeded - by one or more directories or files. - - The working tree will be affected in non-bare repositories. - - :parma items: - Multiple types of items are supported, please see the 'remove' method - for reference. - :param skip_errors: - If True, errors such as ones resulting from missing source files will - be skpped. - :param kwargs: - Additional arguments you would like to pass to git-mv, such as dry_run - or force. - - :return:List(tuple(source_path_string, destination_path_string), ...) - A list of pairs, containing the source file moved as well as its - actual destination. Relative to the repository root. - - :raise ValueErorr: If only one item was given - GitCommandError: If git could not handle your request""" - args = list() - if skip_errors: - args.append('-k') - - paths = self._items_to_rela_paths(items) - if len(paths) < 2: - raise ValueError("Please provide at least one source and one destination of the move operation") - - was_dry_run = kwargs.pop('dry_run', kwargs.pop('n', None)) - kwargs['dry_run'] = True - - # first execute rename in dryrun so the command tells us what it actually does - # ( for later output ) - out = list() - mvlines = self.repo.git.mv(args, paths, **kwargs).splitlines() - - # parse result - first 0:n/2 lines are 'checking ', the remaining ones - # are the 'renaming' ones which we parse - for ln in xrange(len(mvlines)/2, len(mvlines)): - tokens = mvlines[ln].split(' to ') - assert len(tokens) == 2, "Too many tokens in %s" % mvlines[ln] - - # [0] = Renaming x - # [1] = y - out.append((tokens[0][9:], tokens[1])) - # END for each line to parse - - # either prepare for the real run, or output the dry-run result - if was_dry_run: - return out - # END handle dryrun - - - # now apply the actual operation - kwargs.pop('dry_run') - self.repo.git.mv(args, paths, **kwargs) - - return out - - def commit(self, message, parent_commits=None, head=True): - """Commit the current default index file, creating a commit object. - - For more information on the arguments, see tree.commit. - :note: - If you have manually altered the .entries member of this instance, - don't forget to write() your changes to disk beforehand. - - :return: - Commit object representing the new commit""" - tree = self.write_tree() - return Commit.create_from_tree(self.repo, tree, message, parent_commits, head) - - @classmethod - def _flush_stdin_and_wait(cls, proc, ignore_stdout = False): - proc.stdin.flush() - proc.stdin.close() - stdout = '' - if not ignore_stdout: - stdout = proc.stdout.read() - proc.stdout.close() - proc.wait() - return stdout - - @default_index - def checkout(self, paths=None, force=False, fprogress=lambda *args: None, **kwargs): - """Checkout the given paths or all files from the version known to the index into - the working tree. - - :note: Be sure you have written pending changes using the ``write`` method - in case you have altered the enties dictionary directly - - :param paths: - If None, all paths in the index will be checked out. Otherwise an iterable - of relative or absolute paths or a single path pointing to files or directories - in the index is expected. - - :param force: - If True, existing files will be overwritten even if they contain local modifications. - If False, these will trigger a CheckoutError. - - :param fprogress: - see Index.add_ for signature and explanation. - The provided progress information will contain None as path and item if no - explicit paths are given. Otherwise progress information will be send - prior and after a file has been checked out - - :param kwargs: - Additional arguments to be pasesd to git-checkout-index - - :return: - iterable yielding paths to files which have been checked out and are - guaranteed to match the version stored in the index - - :raise CheckoutError: - If at least one file failed to be checked out. This is a summary, - hence it will checkout as many files as it can anyway. - If one of files or directories do not exist in the index - ( as opposed to the original git command who ignores them ). - Raise GitCommandError if error lines could not be parsed - this truly is - an exceptional state - - .. note:: The checkout is limited to checking out the files in the - index. Files which are not in the index anymore and exist in - the working tree will not be deleted. This behaviour is fundamentally - different to *head.checkout*, i.e. if you want git-checkout like behaviour, - use head.checkout instead of index.checkout. - """ - args = ["--index"] - if force: - args.append("--force") - - def handle_stderr(proc, iter_checked_out_files): - stderr = proc.stderr.read() - if not stderr: - return - # line contents: - # git-checkout-index: this already exists - failed_files = list() - failed_reasons = list() - unknown_lines = list() - endings = (' already exists', ' is not in the cache', ' does not exist at stage', ' is unmerged') - for line in stderr.splitlines(): - if not line.startswith("git checkout-index: ") and not line.startswith("git-checkout-index: "): - is_a_dir = " is a directory" - unlink_issue = "unable to unlink old '" - already_exists_issue = ' already exists, no checkout' # created by entry.c:checkout_entry(...) - if line.endswith(is_a_dir): - failed_files.append(line[:-len(is_a_dir)]) - failed_reasons.append(is_a_dir) - elif line.startswith(unlink_issue): - failed_files.append(line[len(unlink_issue):line.rfind("'")]) - failed_reasons.append(unlink_issue) - elif line.endswith(already_exists_issue): - failed_files.append(line[:-len(already_exists_issue)]) - failed_reasons.append(already_exists_issue) - else: - unknown_lines.append(line) - continue - # END special lines parsing - - for e in endings: - if line.endswith(e): - failed_files.append(line[20:-len(e)]) - failed_reasons.append(e) - break - # END if ending matches - # END for each possible ending - # END for each line - if unknown_lines: - raise GitCommandError(("git-checkout-index", ), 128, stderr) - if failed_files: - valid_files = list(set(iter_checked_out_files) - set(failed_files)) - raise CheckoutError("Some files could not be checked out from the index due to local modifications", failed_files, valid_files, failed_reasons) - # END stderr handler - - - if paths is None: - args.append("--all") - kwargs['as_process'] = 1 - fprogress(None, False, None) - proc = self.repo.git.checkout_index(*args, **kwargs) - proc.wait() - fprogress(None, True, None) - rval_iter = ( e.path for e in self.entries.itervalues() ) - handle_stderr(proc, rval_iter) - return rval_iter - else: - if isinstance(paths, basestring): - paths = [paths] - - # make sure we have our entries loaded before we start checkout_index - # which will hold a lock on it. We try to get the lock as well during - # our entries initialization - self.entries - - args.append("--stdin") - kwargs['as_process'] = True - kwargs['istream'] = subprocess.PIPE - proc = self.repo.git.checkout_index(args, **kwargs) - make_exc = lambda : GitCommandError(("git-checkout-index",)+tuple(args), 128, proc.stderr.read()) - checked_out_files = list() - - for path in paths: - co_path = to_native_path_linux(self._to_relative_path(path)) - # if the item is not in the index, it could be a directory - path_is_directory = False - - try: - self.entries[(co_path, 0)] - except KeyError: - dir = co_path - if not dir.endswith('/'): - dir += '/' - for entry in self.entries.itervalues(): - if entry.path.startswith(dir): - p = entry.path - self._write_path_to_stdin(proc, p, p, make_exc, - fprogress, read_from_stdout=False) - checked_out_files.append(p) - path_is_directory = True - # END if entry is in directory - # END for each entry - # END path exception handlnig - - if not path_is_directory: - self._write_path_to_stdin(proc, co_path, path, make_exc, - fprogress, read_from_stdout=False) - checked_out_files.append(co_path) - # END path is a file - # END for each path - self._flush_stdin_and_wait(proc, ignore_stdout=True) - - handle_stderr(proc, checked_out_files) - return checked_out_files - # END paths handling - assert "Should not reach this point" - - @default_index - def reset(self, commit='HEAD', working_tree=False, paths=None, head=False, **kwargs): - """Reset the index to reflect the tree at the given commit. This will not - adjust our HEAD reference as opposed to HEAD.reset by default. - - :param commit: - Revision, Reference or Commit specifying the commit we should represent. - If you want to specify a tree only, use IndexFile.from_tree and overwrite - the default index. - - :param working_tree: - If True, the files in the working tree will reflect the changed index. - If False, the working tree will not be touched - Please note that changes to the working copy will be discarded without - warning ! - - :param head: - If True, the head will be set to the given commit. This is False by default, - but if True, this method behaves like HEAD.reset. - - :param paths: if given as an iterable of absolute or repository-relative paths, - only these will be reset to their state at the given commit'ish. - The paths need to exist at the commit, otherwise an exception will be - raised. - - :param kwargs: - Additional keyword arguments passed to git-reset - - .. note:: IndexFile.reset, as opposed to HEAD.reset, will not delete anyfiles - in order to maintain a consistent working tree. Instead, it will just - checkout the files according to their state in the index. - If you want git-reset like behaviour, use *HEAD.reset* instead. - - :return: self """ - # what we actually want to do is to merge the tree into our existing - # index, which is what git-read-tree does - new_inst = type(self).from_tree(self.repo, commit) - if not paths: - self.entries = new_inst.entries - else: - nie = new_inst.entries - for path in paths: - path = self._to_relative_path(path) - try: - key = entry_key(path, 0) - self.entries[key] = nie[key] - except KeyError: - # if key is not in theirs, it musn't be in ours - try: - del(self.entries[key]) - except KeyError: - pass - # END handle deletion keyerror - # END handle keyerror - # END for each path - # END handle paths - self.write() - - if working_tree: - self.checkout(paths=paths, force=True) - # END handle working tree - - if head: - self.repo.head.set_commit(self.repo.commit(commit), logmsg="%s: Updating HEAD" % commit) - # END handle head change - - return self - - @default_index - def diff(self, other=diff.Diffable.Index, paths=None, create_patch=False, **kwargs): - """Diff this index against the working copy or a Tree or Commit object - - For a documentation of the parameters and return values, see - Diffable.diff - - :note: - Will only work with indices that represent the default git index as - they have not been initialized with a stream. - """ - # index against index is always empty - if other is self.Index: - return diff.DiffIndex() - - # index against anything but None is a reverse diff with the respective - # item. Handle existing -R flags properly. Transform strings to the object - # so that we can call diff on it - if isinstance(other, basestring): - other = self.repo.rev_parse(other) - # END object conversion - - if isinstance(other, Object): - # invert the existing R flag - cur_val = kwargs.get('R', False) - kwargs['R'] = not cur_val - return other.diff(self.Index, paths, create_patch, **kwargs) - # END diff against other item handlin - - # if other is not None here, something is wrong - if other is not None: - raise ValueError( "other must be None, Diffable.Index, a Tree or Commit, was %r" % other ) - - # diff against working copy - can be handled by superclass natively - return super(IndexFile, self).diff(other, paths, create_patch, **kwargs) + """ + Implements an Index that can be manipulated using a native implementation in + order to save git command function calls wherever possible. + + It provides custom merging facilities allowing to merge without actually changing + your index or your working tree. This way you can perform own test-merges based + on the index only without having to deal with the working copy. This is useful + in case of partial working trees. + + ``Entries`` + + The index contains an entries dict whose keys are tuples of type IndexEntry + to facilitate access. + + You may read the entries dict or manipulate it using IndexEntry instance, i.e.:: + + index.entries[index.entry_key(index_entry_instance)] = index_entry_instance + + Make sure you use index.write() once you are done manipulating the index directly + before operating on it using the git command""" + __slots__ = ("repo", "version", "entries", "_extension_data", "_file_path") + _VERSION = 2 # latest version we support + S_IFGITLINK = S_IFGITLINK # a submodule + + def __init__(self, repo, file_path=None): + """Initialize this Index instance, optionally from the given ``file_path``. + If no file_path is given, we will be created from the current index file. + + If a stream is not given, the stream will be initialized from the current + repository's index on demand.""" + self.repo = repo + self.version = self._VERSION + self._extension_data = '' + self._file_path = file_path or self._index_path() + + def _set_cache_(self, attr): + if attr == "entries": + # read the current index + # try memory map for speed + lfd = LockedFD(self._file_path) + try: + fd = lfd.open(write=False, stream=False) + except OSError: + lfd.rollback() + # in new repositories, there may be no index, which means we are empty + self.entries = dict() + return + # END exception handling + + # Here it comes: on windows in python 2.5, memory maps aren't closed properly + # Hence we are in trouble if we try to delete a file that is memory mapped, + # which happens during read-tree. + # In this case, we will just read the memory in directly. + # Its insanely bad ... I am disappointed ! + allow_mmap = (os.name != 'nt' or sys.version_info[1] > 5) + stream = file_contents_ro(fd, stream=True, allow_mmap=allow_mmap) + + try: + self._deserialize(stream) + finally: + lfd.rollback() + # The handles will be closed on desctruction + # END read from default index on demand + else: + super(IndexFile, self)._set_cache_(attr) + + def _index_path(self): + return join_path_native(self.repo.git_dir, "index") + + @property + def path(self): + """ :return: Path to the index file we are representing """ + return self._file_path + + def _delete_entries_cache(self): + """Safely clear the entries cache so it can be recreated""" + try: + del(self.entries) + except AttributeError: + # fails in python 2.6.5 with this exception + pass + # END exception handling + + #{ Serializable Interface + + def _deserialize(self, stream): + """Initialize this instance with index values read from the given stream""" + self.version, self.entries, self._extension_data, conten_sha = read_cache(stream) + return self + + def _entries_sorted(self): + """:return: list of entries, in a sorted fashion, first by path, then by stage""" + entries_sorted = self.entries.values() + entries_sorted.sort(key=lambda e: (e.path, e.stage)) # use path/stage as sort key + return entries_sorted + + def _serialize(self, stream, ignore_tree_extension_data=False): + entries = self._entries_sorted() + write_cache(entries, + stream, + (ignore_tree_extension_data and None) or self._extension_data) + return self + + + #} END serializable interface + + def write(self, file_path = None, ignore_tree_extension_data=False): + """Write the current state to our file path or to the given one + + :param file_path: + If None, we will write to our stored file path from which we have + been initialized. Otherwise we write to the given file path. + Please note that this will change the file_path of this index to + the one you gave. + + :param ignore_tree_extension_data: + If True, the TREE type extension data read in the index will not + be written to disk. Use this if you have altered the index and + would like to use git-write-tree afterwards to create a tree + representing your written changes. + If this data is present in the written index, git-write-tree + will instead write the stored/cached tree. + Alternatively, use IndexFile.write_tree() to handle this case + automatically + + :return: self""" + # make sure we have our entries read before getting a write lock + # else it would be done when streaming. This can happen + # if one doesn't change the index, but writes it right away + self.entries + lfd = LockedFD(file_path or self._file_path) + stream = lfd.open(write=True, stream=True) + + self._serialize(stream, ignore_tree_extension_data) + + lfd.commit() + + # make sure we represent what we have written + if file_path is not None: + self._file_path = file_path + + @post_clear_cache + @default_index + def merge_tree(self, rhs, base=None): + """Merge the given rhs treeish into the current index, possibly taking + a common base treeish into account. + + As opposed to the from_tree_ method, this allows you to use an already + existing tree as the left side of the merge + + :param rhs: + treeish reference pointing to the 'other' side of the merge. + + :param base: + optional treeish reference pointing to the common base of 'rhs' and + this index which equals lhs + + :return: + self ( containing the merge and possibly unmerged entries in case of + conflicts ) + + :raise GitCommandError: + If there is a merge conflict. The error will + be raised at the first conflicting path. If you want to have proper + merge resolution to be done by yourself, you have to commit the changed + index ( or make a valid tree from it ) and retry with a three-way + index.from_tree call. """ + # -i : ignore working tree status + # --aggressive : handle more merge cases + # -m : do an actual merge + args = ["--aggressive", "-i", "-m"] + if base is not None: + args.append(base) + args.append(rhs) + + self.repo.git.read_tree(args) + return self + + @classmethod + def new(cls, repo, *tree_sha): + """ Merge the given treeish revisions into a new index which is returned. + This method behaves like git-read-tree --aggressive when doing the merge. + + :param repo: The repository treeish are located in. + + :param tree_sha: + 20 byte or 40 byte tree sha or tree objects + + :return: + New IndexFile instance. Its path will be undefined. + If you intend to write such a merged Index, supply an alternate file_path + to its 'write' method.""" + base_entries = aggressive_tree_merge(repo.odb, [to_bin_sha(str(t)) for t in tree_sha]) + + inst = cls(repo) + # convert to entries dict + entries = dict(izip(((e.path, e.stage) for e in base_entries), + (IndexEntry.from_base(e) for e in base_entries))) + + inst.entries = entries + return inst + + + @classmethod + def from_tree(cls, repo, *treeish, **kwargs): + """Merge the given treeish revisions into a new index which is returned. + The original index will remain unaltered + + :param repo: + The repository treeish are located in. + + :param treeish: + One, two or three Tree Objects, Commits or 40 byte hexshas. The result + changes according to the amount of trees. + If 1 Tree is given, it will just be read into a new index + If 2 Trees are given, they will be merged into a new index using a + two way merge algorithm. Tree 1 is the 'current' tree, tree 2 is the 'other' + one. It behaves like a fast-forward. + If 3 Trees are given, a 3-way merge will be performed with the first tree + being the common ancestor of tree 2 and tree 3. Tree 2 is the 'current' tree, + tree 3 is the 'other' one + + :param kwargs: + Additional arguments passed to git-read-tree + + :return: + New IndexFile instance. It will point to a temporary index location which + does not exist anymore. If you intend to write such a merged Index, supply + an alternate file_path to its 'write' method. + + :note: + In the three-way merge case, --aggressive will be specified to automatically + resolve more cases in a commonly correct manner. Specify trivial=True as kwarg + to override that. + + As the underlying git-read-tree command takes into account the current index, + it will be temporarily moved out of the way to assure there are no unsuspected + interferences.""" + if len(treeish) == 0 or len(treeish) > 3: + raise ValueError("Please specify between 1 and 3 treeish, got %i" % len(treeish)) + + arg_list = list() + # ignore that working tree and index possibly are out of date + if len(treeish)>1: + # drop unmerged entries when reading our index and merging + arg_list.append("--reset") + # handle non-trivial cases the way a real merge does + arg_list.append("--aggressive") + # END merge handling + + # tmp file created in git home directory to be sure renaming + # works - /tmp/ dirs could be on another device + tmp_index = tempfile.mktemp('','',repo.git_dir) + arg_list.append("--index-output=%s" % tmp_index) + arg_list.extend(treeish) + + # move current index out of the way - otherwise the merge may fail + # as it considers existing entries. moving it essentially clears the index. + # Unfortunately there is no 'soft' way to do it. + # The TemporaryFileSwap assure the original file get put back + index_handler = TemporaryFileSwap(join_path_native(repo.git_dir, 'index')) + try: + repo.git.read_tree(*arg_list, **kwargs) + index = cls(repo, tmp_index) + index.entries # force it to read the file as we will delete the temp-file + del(index_handler) # release as soon as possible + finally: + if os.path.exists(tmp_index): + os.remove(tmp_index) + # END index merge handling + + return index + + # UTILITIES + def _iter_expand_paths(self, paths): + """Expand the directories in list of paths to the corresponding paths accordingly, + + Note: git will add items multiple times even if a glob overlapped + with manually specified paths or if paths where specified multiple + times - we respect that and do not prune""" + def raise_exc(e): + raise e + r = self.repo.working_tree_dir + rs = r + os.sep + for path in paths: + abs_path = path + if not os.path.isabs(abs_path): + abs_path = os.path.join(r, path) + # END make absolute path + + # resolve globs if possible + if '?' in path or '*' in path or '[' in path: + for f in self._iter_expand_paths(glob.glob(abs_path)): + yield f.replace(rs, '') + continue + # END glob handling + try: + for root, dirs, files in os.walk(abs_path, onerror=raise_exc): + for rela_file in files: + # add relative paths only + yield os.path.join(root.replace(rs, ''), rela_file) + # END for each file in subdir + # END for each subdirectory + except OSError: + # was a file or something that could not be iterated + yield path.replace(rs, '') + # END path exception handling + # END for each path + + def _write_path_to_stdin(self, proc, filepath, item, fmakeexc, fprogress, + read_from_stdout=True): + """Write path to proc.stdin and make sure it processes the item, including progress. + + :return: stdout string + :param read_from_stdout: if True, proc.stdout will be read after the item + was sent to stdin. In that case, it will return None + :note: There is a bug in git-update-index that prevents it from sending + reports just in time. This is why we have a version that tries to + read stdout and one which doesn't. In fact, the stdout is not + important as the piped-in files are processed anyway and just in time + :note: Newlines are essential here, gits behaviour is somewhat inconsistent + on this depending on the version, hence we try our best to deal with + newlines carefully. Usually the last newline will not be sent, instead + we will close stdin to break the pipe.""" + + fprogress(filepath, False, item) + rval = None + try: + proc.stdin.write("%s\n" % filepath) + except IOError: + # pipe broke, usually because some error happend + raise fmakeexc() + # END write exception handling + proc.stdin.flush() + if read_from_stdout: + rval = proc.stdout.readline().strip() + fprogress(filepath, True, item) + return rval + + def iter_blobs(self, predicate = lambda t: True): + """ + :return: Iterator yielding tuples of Blob objects and stages, tuple(stage, Blob) + + :param predicate: + Function(t) returning True if tuple(stage, Blob) should be yielded by the + iterator. A default filter, the BlobFilter, allows you to yield blobs + only if they match a given list of paths. """ + for entry in self.entries.itervalues(): + # TODO: is it necessary to convert the mode ? We did that when adding + # it to the index, right ? + mode = stat_mode_to_index_mode(entry.mode) + blob = entry.to_blob(self.repo) + blob.size = entry.size + output = (entry.stage, blob) + if predicate(output): + yield output + # END for each entry + + def unmerged_blobs(self): + """ + :return: + Iterator yielding dict(path : list( tuple( stage, Blob, ...))), being + a dictionary associating a path in the index with a list containing + sorted stage/blob pairs + + :note: + Blobs that have been removed in one side simply do not exist in the + given stage. I.e. a file removed on the 'other' branch whose entries + are at stage 3 will not have a stage 3 entry. + """ + is_unmerged_blob = lambda t: t[0] != 0 + path_map = dict() + for stage, blob in self.iter_blobs(is_unmerged_blob): + path_map.setdefault(blob.path, list()).append((stage, blob)) + # END for each unmerged blob + for l in path_map.itervalues(): + l.sort() + return path_map + + @classmethod + def entry_key(cls, *entry): + return entry_key(*entry) + + def resolve_blobs(self, iter_blobs): + """Resolve the blobs given in blob iterator. This will effectively remove the + index entries of the respective path at all non-null stages and add the given + blob as new stage null blob. + + For each path there may only be one blob, otherwise a ValueError will be raised + claiming the path is already at stage 0. + + :raise ValueError: if one of the blobs already existed at stage 0 + :return: self + + :note: + You will have to write the index manually once you are done, i.e. + index.resolve_blobs(blobs).write() + """ + for blob in iter_blobs: + stage_null_key = (blob.path, 0) + if stage_null_key in self.entries: + raise ValueError( "Path %r already exists at stage 0" % blob.path ) + # END assert blob is not stage 0 already + + # delete all possible stages + for stage in (1, 2, 3): + try: + del( self.entries[(blob.path, stage)]) + except KeyError: + pass + # END ignore key errors + # END for each possible stage + + self.entries[stage_null_key] = IndexEntry.from_blob(blob) + # END for each blob + + return self + + def update(self): + """Reread the contents of our index file, discarding all cached information + we might have. + + :note: This is a possibly dangerious operations as it will discard your changes + to index.entries + :return: self""" + self._delete_entries_cache() + # allows to lazily reread on demand + return self + + def write_tree(self): + """Writes this index to a corresponding Tree object into the repository's + object database and return it. + + :return: Tree object representing this index + :note: The tree will be written even if one or more objects the tree refers to + does not yet exist in the object database. This could happen if you added + Entries to the index directly. + :raise ValueError: if there are no entries in the cache + :raise UnmergedEntriesError: """ + # we obtain no lock as we just flush our contents to disk as tree + # If we are a new index, the entries access will load our data accordingly + mdb = MemoryDB() + entries = self._entries_sorted() + binsha, tree_items = write_tree_from_cache(entries, mdb, slice(0, len(entries))) + + # copy changed trees only + mdb.stream_copy(mdb.sha_iter(), self.repo.odb) + + + # note: additional deserialization could be saved if write_tree_from_cache + # would return sorted tree entries + root_tree = Tree(self.repo, binsha, path='') + root_tree._cache = tree_items + return root_tree + + def _process_diff_args(self, args): + try: + args.pop(args.index(self)) + except IndexError: + pass + # END remove self + return args + + def _to_relative_path(self, path): + """:return: Version of path relative to our git directory or raise ValueError + if it is not within our git direcotory""" + if not os.path.isabs(path): + return path + relative_path = path.replace(self.repo.working_tree_dir+os.sep, "") + if relative_path == path: + raise ValueError("Absolute path %r is not in git repository at %r" % (path,self.repo.working_tree_dir)) + return relative_path + + def _preprocess_add_items(self, items): + """ Split the items into two lists of path strings and BaseEntries. """ + paths = list() + entries = list() + + for item in items: + if isinstance(item, basestring): + paths.append(self._to_relative_path(item)) + elif isinstance(item, (Blob, Submodule)): + entries.append(BaseIndexEntry.from_blob(item)) + elif isinstance(item, BaseIndexEntry): + entries.append(item) + else: + raise TypeError("Invalid Type: %r" % item) + # END for each item + return (paths, entries) + + @git_working_dir + def add(self, items, force=True, fprogress=lambda *args: None, path_rewriter=None, + write=True): + """Add files from the working tree, specific blobs or BaseIndexEntries + to the index. + + :param items: + Multiple types of items are supported, types can be mixed within one call. + Different types imply a different handling. File paths may generally be + relative or absolute. + + - path string + strings denote a relative or absolute path into the repository pointing to + an existing file, i.e. CHANGES, lib/myfile.ext, '/home/gitrepo/lib/myfile.ext'. + + Paths provided like this must exist. When added, they will be written + into the object database. + + PathStrings may contain globs, such as 'lib/__init__*' or can be directories + like 'lib', the latter ones will add all the files within the dirctory and + subdirectories. + + This equals a straight git-add. + + They are added at stage 0 + + - Blob or Submodule object + Blobs are added as they are assuming a valid mode is set. + The file they refer to may or may not exist in the file system, but + must be a path relative to our repository. + + If their sha is null ( 40*0 ), their path must exist in the file system + relative to the git repository as an object will be created from + the data at the path. + The handling now very much equals the way string paths are processed, except that + the mode you have set will be kept. This allows you to create symlinks + by settings the mode respectively and writing the target of the symlink + directly into the file. This equals a default Linux-Symlink which + is not dereferenced automatically, except that it can be created on + filesystems not supporting it as well. + + Please note that globs or directories are not allowed in Blob objects. + + They are added at stage 0 + + - BaseIndexEntry or type + Handling equals the one of Blob objects, but the stage may be + explicitly set. Please note that Index Entries require binary sha's. + + :param force: + **CURRENTLY INEFFECTIVE** + If True, otherwise ignored or excluded files will be + added anyway. + As opposed to the git-add command, we enable this flag by default + as the API user usually wants the item to be added even though + they might be excluded. + + :param fprogress: + Function with signature f(path, done=False, item=item) called for each + path to be added, one time once it is about to be added where done==False + and once after it was added where done=True. + item is set to the actual item we handle, either a Path or a BaseIndexEntry + Please note that the processed path is not guaranteed to be present + in the index already as the index is currently being processed. + + :param path_rewriter: + Function with signature (string) func(BaseIndexEntry) function returning a path + for each passed entry which is the path to be actually recorded for the + object created from entry.path. This allows you to write an index which + is not identical to the layout of the actual files on your hard-disk. + If not None and ``items`` contain plain paths, these paths will be + converted to Entries beforehand and passed to the path_rewriter. + Please note that entry.path is relative to the git repository. + + :param write: + If True, the index will be written once it was altered. Otherwise + the changes only exist in memory and are not available to git commands. + + :return: + List(BaseIndexEntries) representing the entries just actually added. + + :raise OSError: + if a supplied Path did not exist. Please note that BaseIndexEntry + Objects that do not have a null sha will be added even if their paths + do not exist. + """ + # sort the entries into strings and Entries, Blobs are converted to entries + # automatically + # paths can be git-added, for everything else we use git-update-index + entries_added = list() + paths, entries = self._preprocess_add_items(items) + if paths and path_rewriter: + for path in paths: + abspath = os.path.abspath(path) + gitrelative_path = abspath[len(self.repo.working_tree_dir)+1:] + blob = Blob(self.repo, Blob.NULL_BIN_SHA, + stat_mode_to_index_mode(os.stat(abspath).st_mode), + to_native_path_linux(gitrelative_path)) + entries.append(BaseIndexEntry.from_blob(blob)) + # END for each path + del(paths[:]) + # END rewrite paths + + + def store_path(filepath): + """Store file at filepath in the database and return the base index entry""" + st = os.lstat(filepath) # handles non-symlinks as well + stream = None + if S_ISLNK(st.st_mode): + stream = StringIO(os.readlink(filepath)) + else: + stream = open(filepath, 'rb') + # END handle stream + fprogress(filepath, False, filepath) + istream = self.repo.odb.store(IStream(Blob.type, st.st_size, stream)) + fprogress(filepath, True, filepath) + return BaseIndexEntry((stat_mode_to_index_mode(st.st_mode), + istream.binsha, 0, to_native_path_linux(filepath))) + # END utility method + + + # HANDLE PATHS + if paths: + assert len(entries_added) == 0 + added_files = list() + for filepath in self._iter_expand_paths(paths): + entries_added.append(store_path(filepath)) + # END for each filepath + # END path handling + + + # HANDLE ENTRIES + if entries: + null_mode_entries = [ e for e in entries if e.mode == 0 ] + if null_mode_entries: + raise ValueError("At least one Entry has a null-mode - please use index.remove to remove files for clarity") + # END null mode should be remove + + # HANLDE ENTRY OBJECT CREATION + # create objects if required, otherwise go with the existing shas + null_entries_indices = [ i for i,e in enumerate(entries) if e.binsha == Object.NULL_BIN_SHA ] + if null_entries_indices: + for ei in null_entries_indices: + null_entry = entries[ei] + new_entry = store_path(null_entry.path) + + # update null entry + entries[ei] = BaseIndexEntry((null_entry.mode, new_entry.binsha, null_entry.stage, null_entry.path)) + # END for each entry index + # END null_entry handling + + # REWRITE PATHS + # If we have to rewrite the entries, do so now, after we have generated + # all object sha's + if path_rewriter: + for i,e in enumerate(entries): + entries[i] = BaseIndexEntry((e.mode, e.binsha, e.stage, path_rewriter(e))) + # END for each entry + # END handle path rewriting + + # just go through the remaining entries and provide progress info + for i, entry in enumerate(entries): + progress_sent = i in null_entries_indices + if not progress_sent: + fprogress(entry.path, False, entry) + fprogress(entry.path, True, entry) + # END handle progress + # END for each enty + entries_added.extend(entries) + # END if there are base entries + + # FINALIZE + # add the new entries to this instance + for entry in entries_added: + self.entries[(entry.path, 0)] = IndexEntry.from_base(entry) + + if write: + self.write() + # END handle write + + return entries_added + + def _items_to_rela_paths(self, items): + """Returns a list of repo-relative paths from the given items which + may be absolute or relative paths, entries or blobs""" + paths = list() + for item in items: + if isinstance(item, (BaseIndexEntry,(Blob, Submodule))): + paths.append(self._to_relative_path(item.path)) + elif isinstance(item, basestring): + paths.append(self._to_relative_path(item)) + else: + raise TypeError("Invalid item type: %r" % item) + # END for each item + return paths + + @post_clear_cache + @default_index + def remove(self, items, working_tree=False, **kwargs): + """Remove the given items from the index and optionally from + the working tree as well. + + :param items: + Multiple types of items are supported which may be be freely mixed. + + - path string + Remove the given path at all stages. If it is a directory, you must + specify the r=True keyword argument to remove all file entries + below it. If absolute paths are given, they will be converted + to a path relative to the git repository directory containing + the working tree + + The path string may include globs, such as *.c. + + - Blob Object + Only the path portion is used in this case. + + - BaseIndexEntry or compatible type + The only relevant information here Yis the path. The stage is ignored. + + :param working_tree: + If True, the entry will also be removed from the working tree, physically + removing the respective file. This may fail if there are uncommited changes + in it. + + :param kwargs: + Additional keyword arguments to be passed to git-rm, such + as 'r' to allow recurive removal of + + :return: + List(path_string, ...) list of repository relative paths that have + been removed effectively. + This is interesting to know in case you have provided a directory or + globs. Paths are relative to the repository. """ + args = list() + if not working_tree: + args.append("--cached") + args.append("--") + + # preprocess paths + paths = self._items_to_rela_paths(items) + removed_paths = self.repo.git.rm(args, paths, **kwargs).splitlines() + + # process output to gain proper paths + # rm 'path' + return [ p[4:-1] for p in removed_paths ] + + @post_clear_cache + @default_index + def move(self, items, skip_errors=False, **kwargs): + """Rename/move the items, whereas the last item is considered the destination of + the move operation. If the destination is a file, the first item ( of two ) + must be a file as well. If the destination is a directory, it may be preceeded + by one or more directories or files. + + The working tree will be affected in non-bare repositories. + + :parma items: + Multiple types of items are supported, please see the 'remove' method + for reference. + :param skip_errors: + If True, errors such as ones resulting from missing source files will + be skpped. + :param kwargs: + Additional arguments you would like to pass to git-mv, such as dry_run + or force. + + :return:List(tuple(source_path_string, destination_path_string), ...) + A list of pairs, containing the source file moved as well as its + actual destination. Relative to the repository root. + + :raise ValueErorr: If only one item was given + GitCommandError: If git could not handle your request""" + args = list() + if skip_errors: + args.append('-k') + + paths = self._items_to_rela_paths(items) + if len(paths) < 2: + raise ValueError("Please provide at least one source and one destination of the move operation") + + was_dry_run = kwargs.pop('dry_run', kwargs.pop('n', None)) + kwargs['dry_run'] = True + + # first execute rename in dryrun so the command tells us what it actually does + # ( for later output ) + out = list() + mvlines = self.repo.git.mv(args, paths, **kwargs).splitlines() + + # parse result - first 0:n/2 lines are 'checking ', the remaining ones + # are the 'renaming' ones which we parse + for ln in xrange(len(mvlines)/2, len(mvlines)): + tokens = mvlines[ln].split(' to ') + assert len(tokens) == 2, "Too many tokens in %s" % mvlines[ln] + + # [0] = Renaming x + # [1] = y + out.append((tokens[0][9:], tokens[1])) + # END for each line to parse + + # either prepare for the real run, or output the dry-run result + if was_dry_run: + return out + # END handle dryrun + + + # now apply the actual operation + kwargs.pop('dry_run') + self.repo.git.mv(args, paths, **kwargs) + + return out + + def commit(self, message, parent_commits=None, head=True): + """Commit the current default index file, creating a commit object. + + For more information on the arguments, see tree.commit. + :note: + If you have manually altered the .entries member of this instance, + don't forget to write() your changes to disk beforehand. + + :return: + Commit object representing the new commit""" + tree = self.write_tree() + return Commit.create_from_tree(self.repo, tree, message, parent_commits, head) + + @classmethod + def _flush_stdin_and_wait(cls, proc, ignore_stdout = False): + proc.stdin.flush() + proc.stdin.close() + stdout = '' + if not ignore_stdout: + stdout = proc.stdout.read() + proc.stdout.close() + proc.wait() + return stdout + + @default_index + def checkout(self, paths=None, force=False, fprogress=lambda *args: None, **kwargs): + """Checkout the given paths or all files from the version known to the index into + the working tree. + + :note: Be sure you have written pending changes using the ``write`` method + in case you have altered the enties dictionary directly + + :param paths: + If None, all paths in the index will be checked out. Otherwise an iterable + of relative or absolute paths or a single path pointing to files or directories + in the index is expected. + + :param force: + If True, existing files will be overwritten even if they contain local modifications. + If False, these will trigger a CheckoutError. + + :param fprogress: + see Index.add_ for signature and explanation. + The provided progress information will contain None as path and item if no + explicit paths are given. Otherwise progress information will be send + prior and after a file has been checked out + + :param kwargs: + Additional arguments to be pasesd to git-checkout-index + + :return: + iterable yielding paths to files which have been checked out and are + guaranteed to match the version stored in the index + + :raise CheckoutError: + If at least one file failed to be checked out. This is a summary, + hence it will checkout as many files as it can anyway. + If one of files or directories do not exist in the index + ( as opposed to the original git command who ignores them ). + Raise GitCommandError if error lines could not be parsed - this truly is + an exceptional state + + .. note:: The checkout is limited to checking out the files in the + index. Files which are not in the index anymore and exist in + the working tree will not be deleted. This behaviour is fundamentally + different to *head.checkout*, i.e. if you want git-checkout like behaviour, + use head.checkout instead of index.checkout. + """ + args = ["--index"] + if force: + args.append("--force") + + def handle_stderr(proc, iter_checked_out_files): + stderr = proc.stderr.read() + if not stderr: + return + # line contents: + # git-checkout-index: this already exists + failed_files = list() + failed_reasons = list() + unknown_lines = list() + endings = (' already exists', ' is not in the cache', ' does not exist at stage', ' is unmerged') + for line in stderr.splitlines(): + if not line.startswith("git checkout-index: ") and not line.startswith("git-checkout-index: "): + is_a_dir = " is a directory" + unlink_issue = "unable to unlink old '" + already_exists_issue = ' already exists, no checkout' # created by entry.c:checkout_entry(...) + if line.endswith(is_a_dir): + failed_files.append(line[:-len(is_a_dir)]) + failed_reasons.append(is_a_dir) + elif line.startswith(unlink_issue): + failed_files.append(line[len(unlink_issue):line.rfind("'")]) + failed_reasons.append(unlink_issue) + elif line.endswith(already_exists_issue): + failed_files.append(line[:-len(already_exists_issue)]) + failed_reasons.append(already_exists_issue) + else: + unknown_lines.append(line) + continue + # END special lines parsing + + for e in endings: + if line.endswith(e): + failed_files.append(line[20:-len(e)]) + failed_reasons.append(e) + break + # END if ending matches + # END for each possible ending + # END for each line + if unknown_lines: + raise GitCommandError(("git-checkout-index", ), 128, stderr) + if failed_files: + valid_files = list(set(iter_checked_out_files) - set(failed_files)) + raise CheckoutError("Some files could not be checked out from the index due to local modifications", failed_files, valid_files, failed_reasons) + # END stderr handler + + + if paths is None: + args.append("--all") + kwargs['as_process'] = 1 + fprogress(None, False, None) + proc = self.repo.git.checkout_index(*args, **kwargs) + proc.wait() + fprogress(None, True, None) + rval_iter = ( e.path for e in self.entries.itervalues() ) + handle_stderr(proc, rval_iter) + return rval_iter + else: + if isinstance(paths, basestring): + paths = [paths] + + # make sure we have our entries loaded before we start checkout_index + # which will hold a lock on it. We try to get the lock as well during + # our entries initialization + self.entries + + args.append("--stdin") + kwargs['as_process'] = True + kwargs['istream'] = subprocess.PIPE + proc = self.repo.git.checkout_index(args, **kwargs) + make_exc = lambda : GitCommandError(("git-checkout-index",)+tuple(args), 128, proc.stderr.read()) + checked_out_files = list() + + for path in paths: + co_path = to_native_path_linux(self._to_relative_path(path)) + # if the item is not in the index, it could be a directory + path_is_directory = False + + try: + self.entries[(co_path, 0)] + except KeyError: + dir = co_path + if not dir.endswith('/'): + dir += '/' + for entry in self.entries.itervalues(): + if entry.path.startswith(dir): + p = entry.path + self._write_path_to_stdin(proc, p, p, make_exc, + fprogress, read_from_stdout=False) + checked_out_files.append(p) + path_is_directory = True + # END if entry is in directory + # END for each entry + # END path exception handlnig + + if not path_is_directory: + self._write_path_to_stdin(proc, co_path, path, make_exc, + fprogress, read_from_stdout=False) + checked_out_files.append(co_path) + # END path is a file + # END for each path + self._flush_stdin_and_wait(proc, ignore_stdout=True) + + handle_stderr(proc, checked_out_files) + return checked_out_files + # END paths handling + assert "Should not reach this point" + + @default_index + def reset(self, commit='HEAD', working_tree=False, paths=None, head=False, **kwargs): + """Reset the index to reflect the tree at the given commit. This will not + adjust our HEAD reference as opposed to HEAD.reset by default. + + :param commit: + Revision, Reference or Commit specifying the commit we should represent. + If you want to specify a tree only, use IndexFile.from_tree and overwrite + the default index. + + :param working_tree: + If True, the files in the working tree will reflect the changed index. + If False, the working tree will not be touched + Please note that changes to the working copy will be discarded without + warning ! + + :param head: + If True, the head will be set to the given commit. This is False by default, + but if True, this method behaves like HEAD.reset. + + :param paths: if given as an iterable of absolute or repository-relative paths, + only these will be reset to their state at the given commit'ish. + The paths need to exist at the commit, otherwise an exception will be + raised. + + :param kwargs: + Additional keyword arguments passed to git-reset + + .. note:: IndexFile.reset, as opposed to HEAD.reset, will not delete anyfiles + in order to maintain a consistent working tree. Instead, it will just + checkout the files according to their state in the index. + If you want git-reset like behaviour, use *HEAD.reset* instead. + + :return: self """ + # what we actually want to do is to merge the tree into our existing + # index, which is what git-read-tree does + new_inst = type(self).from_tree(self.repo, commit) + if not paths: + self.entries = new_inst.entries + else: + nie = new_inst.entries + for path in paths: + path = self._to_relative_path(path) + try: + key = entry_key(path, 0) + self.entries[key] = nie[key] + except KeyError: + # if key is not in theirs, it musn't be in ours + try: + del(self.entries[key]) + except KeyError: + pass + # END handle deletion keyerror + # END handle keyerror + # END for each path + # END handle paths + self.write() + + if working_tree: + self.checkout(paths=paths, force=True) + # END handle working tree + + if head: + self.repo.head.set_commit(self.repo.commit(commit), logmsg="%s: Updating HEAD" % commit) + # END handle head change + + return self + + @default_index + def diff(self, other=diff.Diffable.Index, paths=None, create_patch=False, **kwargs): + """Diff this index against the working copy or a Tree or Commit object + + For a documentation of the parameters and return values, see + Diffable.diff + + :note: + Will only work with indices that represent the default git index as + they have not been initialized with a stream. + """ + # index against index is always empty + if other is self.Index: + return diff.DiffIndex() + + # index against anything but None is a reverse diff with the respective + # item. Handle existing -R flags properly. Transform strings to the object + # so that we can call diff on it + if isinstance(other, basestring): + other = self.repo.rev_parse(other) + # END object conversion + + if isinstance(other, Object): + # invert the existing R flag + cur_val = kwargs.get('R', False) + kwargs['R'] = not cur_val + return other.diff(self.Index, paths, create_patch, **kwargs) + # END diff against other item handlin + + # if other is not None here, something is wrong + if other is not None: + raise ValueError( "other must be None, Diffable.Index, a Tree or Commit, was %r" % other ) + + # diff against working copy - can be handled by superclass natively + return super(IndexFile, self).diff(other, paths, create_patch, **kwargs) diff --git a/git/index/fun.py b/git/index/fun.py index 9b35bf04..e39b09d6 100644 --- a/git/index/fun.py +++ b/git/index/fun.py @@ -2,321 +2,321 @@ # more versatile # NOTE: Autodoc hates it if this is a docstring from stat import ( - S_IFDIR, - S_IFLNK, - S_ISLNK, - S_IFDIR, - S_ISDIR, - S_IFMT, - S_IFREG, - ) + S_IFDIR, + S_IFLNK, + S_ISLNK, + S_IFDIR, + S_ISDIR, + S_IFMT, + S_IFREG, + ) -S_IFGITLINK = S_IFLNK | S_IFDIR # a submodule +S_IFGITLINK = S_IFLNK | S_IFDIR # a submodule from cStringIO import StringIO from git.util import IndexFileSHA1Writer from git.exc import UnmergedEntriesError from git.objects.fun import ( - tree_to_stream, - traverse_tree_recursive, - traverse_trees_recursive - ) + tree_to_stream, + traverse_tree_recursive, + traverse_trees_recursive + ) from typ import ( - BaseIndexEntry, - IndexEntry, - CE_NAMEMASK, - CE_STAGESHIFT - ) + BaseIndexEntry, + IndexEntry, + CE_NAMEMASK, + CE_STAGESHIFT + ) CE_NAMEMASK_INV = ~CE_NAMEMASK -from util import ( - pack, - unpack - ) +from util import ( + pack, + unpack + ) from gitdb.base import IStream from gitdb.typ import str_tree_type __all__ = ('write_cache', 'read_cache', 'write_tree_from_cache', 'entry_key', - 'stat_mode_to_index_mode', 'S_IFGITLINK') + 'stat_mode_to_index_mode', 'S_IFGITLINK') def stat_mode_to_index_mode(mode): - """Convert the given mode from a stat call to the corresponding index mode - and return it""" - if S_ISLNK(mode): # symlinks - return S_IFLNK - if S_ISDIR(mode) or S_IFMT(mode) == S_IFGITLINK: # submodules - return S_IFGITLINK - return S_IFREG | 0644 | (mode & 0100) # blobs with or without executable bit + """Convert the given mode from a stat call to the corresponding index mode + and return it""" + if S_ISLNK(mode): # symlinks + return S_IFLNK + if S_ISDIR(mode) or S_IFMT(mode) == S_IFGITLINK: # submodules + return S_IFGITLINK + return S_IFREG | 0644 | (mode & 0100) # blobs with or without executable bit def write_cache(entries, stream, extension_data=None, ShaStreamCls=IndexFileSHA1Writer): - """Write the cache represented by entries to a stream - - :param entries: **sorted** list of entries - :param stream: stream to wrap into the AdapterStreamCls - it is used for - final output. - - :param ShaStreamCls: Type to use when writing to the stream. It produces a sha - while writing to it, before the data is passed on to the wrapped stream - - :param extension_data: any kind of data to write as a trailer, it must begin - a 4 byte identifier, followed by its size ( 4 bytes )""" - # wrap the stream into a compatible writer - stream = ShaStreamCls(stream) - - tell = stream.tell - write = stream.write + """Write the cache represented by entries to a stream + + :param entries: **sorted** list of entries + :param stream: stream to wrap into the AdapterStreamCls - it is used for + final output. + + :param ShaStreamCls: Type to use when writing to the stream. It produces a sha + while writing to it, before the data is passed on to the wrapped stream + + :param extension_data: any kind of data to write as a trailer, it must begin + a 4 byte identifier, followed by its size ( 4 bytes )""" + # wrap the stream into a compatible writer + stream = ShaStreamCls(stream) + + tell = stream.tell + write = stream.write - # header - version = 2 - write("DIRC") - write(pack(">LL", version, len(entries))) + # header + version = 2 + write("DIRC") + write(pack(">LL", version, len(entries))) - # body - for entry in entries: - beginoffset = tell() - write(entry[4]) # ctime - write(entry[5]) # mtime - path = entry[3] - plen = len(path) & CE_NAMEMASK # path length - assert plen == len(path), "Path %s too long to fit into index" % entry[3] - flags = plen | (entry[2] & CE_NAMEMASK_INV) # clear possible previous values - write(pack(">LLLLLL20sH", entry[6], entry[7], entry[0], - entry[8], entry[9], entry[10], entry[1], flags)) - write(path) - real_size = ((tell() - beginoffset + 8) & ~7) - write("\0" * ((beginoffset + real_size) - tell())) - # END for each entry + # body + for entry in entries: + beginoffset = tell() + write(entry[4]) # ctime + write(entry[5]) # mtime + path = entry[3] + plen = len(path) & CE_NAMEMASK # path length + assert plen == len(path), "Path %s too long to fit into index" % entry[3] + flags = plen | (entry[2] & CE_NAMEMASK_INV) # clear possible previous values + write(pack(">LLLLLL20sH", entry[6], entry[7], entry[0], + entry[8], entry[9], entry[10], entry[1], flags)) + write(path) + real_size = ((tell() - beginoffset + 8) & ~7) + write("\0" * ((beginoffset + real_size) - tell())) + # END for each entry - # write previously cached extensions data - if extension_data is not None: - stream.write(extension_data) + # write previously cached extensions data + if extension_data is not None: + stream.write(extension_data) - # write the sha over the content - stream.write_sha() - + # write the sha over the content + stream.write_sha() + def read_header(stream): - """Return tuple(version_long, num_entries) from the given stream""" - type_id = stream.read(4) - if type_id != "DIRC": - raise AssertionError("Invalid index file header: %r" % type_id) - version, num_entries = unpack(">LL", stream.read(4 * 2)) - - # TODO: handle version 3: extended data, see read-cache.c - assert version in (1, 2) - return version, num_entries + """Return tuple(version_long, num_entries) from the given stream""" + type_id = stream.read(4) + if type_id != "DIRC": + raise AssertionError("Invalid index file header: %r" % type_id) + version, num_entries = unpack(">LL", stream.read(4 * 2)) + + # TODO: handle version 3: extended data, see read-cache.c + assert version in (1, 2) + return version, num_entries def entry_key(*entry): - """:return: Key suitable to be used for the index.entries dictionary - :param entry: One instance of type BaseIndexEntry or the path and the stage""" - if len(entry) == 1: - return (entry[0].path, entry[0].stage) - else: - return tuple(entry) - # END handle entry + """:return: Key suitable to be used for the index.entries dictionary + :param entry: One instance of type BaseIndexEntry or the path and the stage""" + if len(entry) == 1: + return (entry[0].path, entry[0].stage) + else: + return tuple(entry) + # END handle entry def read_cache(stream): - """Read a cache file from the given stream - :return: tuple(version, entries_dict, extension_data, content_sha) - * version is the integer version number - * entries dict is a dictionary which maps IndexEntry instances to a path - at a stage - * extension_data is '' or 4 bytes of type + 4 bytes of size + size bytes - * content_sha is a 20 byte sha on all cache file contents""" - version, num_entries = read_header(stream) - count = 0 - entries = dict() - - read = stream.read - tell = stream.tell - while count < num_entries: - beginoffset = tell() - ctime = unpack(">8s", read(8))[0] - mtime = unpack(">8s", read(8))[0] - (dev, ino, mode, uid, gid, size, sha, flags) = \ - unpack(">LLLLLL20sH", read(20 + 4 * 6 + 2)) - path_size = flags & CE_NAMEMASK - path = read(path_size) - - real_size = ((tell() - beginoffset + 8) & ~7) - data = read((beginoffset + real_size) - tell()) - entry = IndexEntry((mode, sha, flags, path, ctime, mtime, dev, ino, uid, gid, size)) - # entry_key would be the method to use, but we safe the effort - entries[(path, entry.stage)] = entry - count += 1 - # END for each entry + """Read a cache file from the given stream + :return: tuple(version, entries_dict, extension_data, content_sha) + * version is the integer version number + * entries dict is a dictionary which maps IndexEntry instances to a path + at a stage + * extension_data is '' or 4 bytes of type + 4 bytes of size + size bytes + * content_sha is a 20 byte sha on all cache file contents""" + version, num_entries = read_header(stream) + count = 0 + entries = dict() + + read = stream.read + tell = stream.tell + while count < num_entries: + beginoffset = tell() + ctime = unpack(">8s", read(8))[0] + mtime = unpack(">8s", read(8))[0] + (dev, ino, mode, uid, gid, size, sha, flags) = \ + unpack(">LLLLLL20sH", read(20 + 4 * 6 + 2)) + path_size = flags & CE_NAMEMASK + path = read(path_size) + + real_size = ((tell() - beginoffset + 8) & ~7) + data = read((beginoffset + real_size) - tell()) + entry = IndexEntry((mode, sha, flags, path, ctime, mtime, dev, ino, uid, gid, size)) + # entry_key would be the method to use, but we safe the effort + entries[(path, entry.stage)] = entry + count += 1 + # END for each entry - # the footer contains extension data and a sha on the content so far - # Keep the extension footer,and verify we have a sha in the end - # Extension data format is: - # 4 bytes ID - # 4 bytes length of chunk - # repeated 0 - N times - extension_data = stream.read(~0) - assert len(extension_data) > 19, "Index Footer was not at least a sha on content as it was only %i bytes in size" % len(extension_data) + # the footer contains extension data and a sha on the content so far + # Keep the extension footer,and verify we have a sha in the end + # Extension data format is: + # 4 bytes ID + # 4 bytes length of chunk + # repeated 0 - N times + extension_data = stream.read(~0) + assert len(extension_data) > 19, "Index Footer was not at least a sha on content as it was only %i bytes in size" % len(extension_data) - content_sha = extension_data[-20:] + content_sha = extension_data[-20:] - # truncate the sha in the end as we will dynamically create it anyway - extension_data = extension_data[:-20] - - return (version, entries, extension_data, content_sha) - + # truncate the sha in the end as we will dynamically create it anyway + extension_data = extension_data[:-20] + + return (version, entries, extension_data, content_sha) + def write_tree_from_cache(entries, odb, sl, si=0): - """Create a tree from the given sorted list of entries and put the respective - trees into the given object database - - :param entries: **sorted** list of IndexEntries - :param odb: object database to store the trees in - :param si: start index at which we should start creating subtrees - :param sl: slice indicating the range we should process on the entries list - :return: tuple(binsha, list(tree_entry, ...)) a tuple of a sha and a list of - tree entries being a tuple of hexsha, mode, name""" - tree_items = list() - tree_items_append = tree_items.append - ci = sl.start - end = sl.stop - while ci < end: - entry = entries[ci] - if entry.stage != 0: - raise UnmergedEntriesError(entry) - # END abort on unmerged - ci += 1 - rbound = entry.path.find('/', si) - if rbound == -1: - # its not a tree - tree_items_append((entry.binsha, entry.mode, entry.path[si:])) - else: - # find common base range - base = entry.path[si:rbound] - xi = ci - while xi < end: - oentry = entries[xi] - orbound = oentry.path.find('/', si) - if orbound == -1 or oentry.path[si:orbound] != base: - break - # END abort on base mismatch - xi += 1 - # END find common base - - # enter recursion - # ci - 1 as we want to count our current item as well - sha, tree_entry_list = write_tree_from_cache(entries, odb, slice(ci-1, xi), rbound+1) - tree_items_append((sha, S_IFDIR, base)) - - # skip ahead - ci = xi - # END handle bounds - # END for each entry - - # finally create the tree - sio = StringIO() - tree_to_stream(tree_items, sio.write) - sio.seek(0) - - istream = odb.store(IStream(str_tree_type, len(sio.getvalue()), sio)) - return (istream.binsha, tree_items) - + """Create a tree from the given sorted list of entries and put the respective + trees into the given object database + + :param entries: **sorted** list of IndexEntries + :param odb: object database to store the trees in + :param si: start index at which we should start creating subtrees + :param sl: slice indicating the range we should process on the entries list + :return: tuple(binsha, list(tree_entry, ...)) a tuple of a sha and a list of + tree entries being a tuple of hexsha, mode, name""" + tree_items = list() + tree_items_append = tree_items.append + ci = sl.start + end = sl.stop + while ci < end: + entry = entries[ci] + if entry.stage != 0: + raise UnmergedEntriesError(entry) + # END abort on unmerged + ci += 1 + rbound = entry.path.find('/', si) + if rbound == -1: + # its not a tree + tree_items_append((entry.binsha, entry.mode, entry.path[si:])) + else: + # find common base range + base = entry.path[si:rbound] + xi = ci + while xi < end: + oentry = entries[xi] + orbound = oentry.path.find('/', si) + if orbound == -1 or oentry.path[si:orbound] != base: + break + # END abort on base mismatch + xi += 1 + # END find common base + + # enter recursion + # ci - 1 as we want to count our current item as well + sha, tree_entry_list = write_tree_from_cache(entries, odb, slice(ci-1, xi), rbound+1) + tree_items_append((sha, S_IFDIR, base)) + + # skip ahead + ci = xi + # END handle bounds + # END for each entry + + # finally create the tree + sio = StringIO() + tree_to_stream(tree_items, sio.write) + sio.seek(0) + + istream = odb.store(IStream(str_tree_type, len(sio.getvalue()), sio)) + return (istream.binsha, tree_items) + def _tree_entry_to_baseindexentry(tree_entry, stage): - return BaseIndexEntry((tree_entry[1], tree_entry[0], stage <<CE_STAGESHIFT, tree_entry[2])) - + return BaseIndexEntry((tree_entry[1], tree_entry[0], stage <<CE_STAGESHIFT, tree_entry[2])) + def aggressive_tree_merge(odb, tree_shas): - """ - :return: list of BaseIndexEntries representing the aggressive merge of the given - trees. All valid entries are on stage 0, whereas the conflicting ones are left - on stage 1, 2 or 3, whereas stage 1 corresponds to the common ancestor tree, - 2 to our tree and 3 to 'their' tree. - :param tree_shas: 1, 2 or 3 trees as identified by their binary 20 byte shas - If 1 or two, the entries will effectively correspond to the last given tree - If 3 are given, a 3 way merge is performed""" - out = list() - out_append = out.append - - # one and two way is the same for us, as we don't have to handle an existing - # index, instrea - if len(tree_shas) in (1,2): - for entry in traverse_tree_recursive(odb, tree_shas[-1], ''): - out_append(_tree_entry_to_baseindexentry(entry, 0)) - # END for each entry - return out - # END handle single tree - - if len(tree_shas) > 3: - raise ValueError("Cannot handle %i trees at once" % len(tree_shas)) + """ + :return: list of BaseIndexEntries representing the aggressive merge of the given + trees. All valid entries are on stage 0, whereas the conflicting ones are left + on stage 1, 2 or 3, whereas stage 1 corresponds to the common ancestor tree, + 2 to our tree and 3 to 'their' tree. + :param tree_shas: 1, 2 or 3 trees as identified by their binary 20 byte shas + If 1 or two, the entries will effectively correspond to the last given tree + If 3 are given, a 3 way merge is performed""" + out = list() + out_append = out.append + + # one and two way is the same for us, as we don't have to handle an existing + # index, instrea + if len(tree_shas) in (1,2): + for entry in traverse_tree_recursive(odb, tree_shas[-1], ''): + out_append(_tree_entry_to_baseindexentry(entry, 0)) + # END for each entry + return out + # END handle single tree + + if len(tree_shas) > 3: + raise ValueError("Cannot handle %i trees at once" % len(tree_shas)) - # three trees - for base, ours, theirs in traverse_trees_recursive(odb, tree_shas, ''): - if base is not None: - # base version exists - if ours is not None: - # ours exists - if theirs is not None: - # it exists in all branches, if it was changed in both - # its a conflict, otherwise we take the changed version - # This should be the most common branch, so it comes first - if( base[0] != ours[0] and base[0] != theirs[0] and ours[0] != theirs[0] ) or \ - ( base[1] != ours[1] and base[1] != theirs[1] and ours[1] != theirs[1] ): - # changed by both - out_append(_tree_entry_to_baseindexentry(base, 1)) - out_append(_tree_entry_to_baseindexentry(ours, 2)) - out_append(_tree_entry_to_baseindexentry(theirs, 3)) - elif base[0] != ours[0] or base[1] != ours[1]: - # only we changed it - out_append(_tree_entry_to_baseindexentry(ours, 0)) - else: - # either nobody changed it, or they did. In either - # case, use theirs - out_append(_tree_entry_to_baseindexentry(theirs, 0)) - # END handle modification - else: - - if ours[0] != base[0] or ours[1] != base[1]: - # they deleted it, we changed it, conflict - out_append(_tree_entry_to_baseindexentry(base, 1)) - out_append(_tree_entry_to_baseindexentry(ours, 2)) - # else: - # we didn't change it, ignore - # pass - # END handle our change - # END handle theirs - else: - if theirs is None: - # deleted in both, its fine - its out - pass - else: - if theirs[0] != base[0] or theirs[1] != base[1]: - # deleted in ours, changed theirs, conflict - out_append(_tree_entry_to_baseindexentry(base, 1)) - out_append(_tree_entry_to_baseindexentry(theirs, 3)) - # END theirs changed - #else: - # theirs didnt change - # pass - # END handle theirs - # END handle ours - else: - # all three can't be None - if ours is None: - # added in their branch - out_append(_tree_entry_to_baseindexentry(theirs, 0)) - elif theirs is None: - # added in our branch - out_append(_tree_entry_to_baseindexentry(ours, 0)) - else: - # both have it, except for the base, see whether it changed - if ours[0] != theirs[0] or ours[1] != theirs[1]: - out_append(_tree_entry_to_baseindexentry(ours, 2)) - out_append(_tree_entry_to_baseindexentry(theirs, 3)) - else: - # it was added the same in both - out_append(_tree_entry_to_baseindexentry(ours, 0)) - # END handle two items - # END handle heads - # END handle base exists - # END for each entries tuple + # three trees + for base, ours, theirs in traverse_trees_recursive(odb, tree_shas, ''): + if base is not None: + # base version exists + if ours is not None: + # ours exists + if theirs is not None: + # it exists in all branches, if it was changed in both + # its a conflict, otherwise we take the changed version + # This should be the most common branch, so it comes first + if( base[0] != ours[0] and base[0] != theirs[0] and ours[0] != theirs[0] ) or \ + ( base[1] != ours[1] and base[1] != theirs[1] and ours[1] != theirs[1] ): + # changed by both + out_append(_tree_entry_to_baseindexentry(base, 1)) + out_append(_tree_entry_to_baseindexentry(ours, 2)) + out_append(_tree_entry_to_baseindexentry(theirs, 3)) + elif base[0] != ours[0] or base[1] != ours[1]: + # only we changed it + out_append(_tree_entry_to_baseindexentry(ours, 0)) + else: + # either nobody changed it, or they did. In either + # case, use theirs + out_append(_tree_entry_to_baseindexentry(theirs, 0)) + # END handle modification + else: + + if ours[0] != base[0] or ours[1] != base[1]: + # they deleted it, we changed it, conflict + out_append(_tree_entry_to_baseindexentry(base, 1)) + out_append(_tree_entry_to_baseindexentry(ours, 2)) + # else: + # we didn't change it, ignore + # pass + # END handle our change + # END handle theirs + else: + if theirs is None: + # deleted in both, its fine - its out + pass + else: + if theirs[0] != base[0] or theirs[1] != base[1]: + # deleted in ours, changed theirs, conflict + out_append(_tree_entry_to_baseindexentry(base, 1)) + out_append(_tree_entry_to_baseindexentry(theirs, 3)) + # END theirs changed + #else: + # theirs didnt change + # pass + # END handle theirs + # END handle ours + else: + # all three can't be None + if ours is None: + # added in their branch + out_append(_tree_entry_to_baseindexentry(theirs, 0)) + elif theirs is None: + # added in our branch + out_append(_tree_entry_to_baseindexentry(ours, 0)) + else: + # both have it, except for the base, see whether it changed + if ours[0] != theirs[0] or ours[1] != theirs[1]: + out_append(_tree_entry_to_baseindexentry(ours, 2)) + out_append(_tree_entry_to_baseindexentry(theirs, 3)) + else: + # it was added the same in both + out_append(_tree_entry_to_baseindexentry(ours, 0)) + # END handle two items + # END handle heads + # END handle base exists + # END for each entries tuple - return out + return out diff --git a/git/index/typ.py b/git/index/typ.py index ad988285..7f27d869 100644 --- a/git/index/typ.py +++ b/git/index/typ.py @@ -1,13 +1,13 @@ """Module with additional types used by the index""" from util import ( - pack, - unpack - ) + pack, + unpack + ) from binascii import ( - b2a_hex, - ) + b2a_hex, + ) from git.objects import Blob __all__ = ('BlobFilter', 'BaseIndexEntry', 'IndexEntry') @@ -22,152 +22,152 @@ CE_STAGESHIFT = 12 #} END invariants class BlobFilter(object): - """ - Predicate to be used by iter_blobs allowing to filter only return blobs which - match the given list of directories or files. - - The given paths are given relative to the repository. - """ - __slots__ = 'paths' - - def __init__(self, paths): - """:param paths: - tuple or list of paths which are either pointing to directories or - to files relative to the current repository - """ - self.paths = paths - - def __call__(self, stage_blob): - path = stage_blob[1].path - for p in self.paths: - if path.startswith(p): - return True - # END for each path in filter paths - return False + """ + Predicate to be used by iter_blobs allowing to filter only return blobs which + match the given list of directories or files. + + The given paths are given relative to the repository. + """ + __slots__ = 'paths' + + def __init__(self, paths): + """:param paths: + tuple or list of paths which are either pointing to directories or + to files relative to the current repository + """ + self.paths = paths + + def __call__(self, stage_blob): + path = stage_blob[1].path + for p in self.paths: + if path.startswith(p): + return True + # END for each path in filter paths + return False class BaseIndexEntry(tuple): - """Small Brother of an index entry which can be created to describe changes - done to the index in which case plenty of additional information is not requried. - - As the first 4 data members match exactly to the IndexEntry type, methods - expecting a BaseIndexEntry can also handle full IndexEntries even if they - use numeric indices for performance reasons. """ - - def __str__(self): - return "%o %s %i\t%s" % (self.mode, self.hexsha, self.stage, self.path) - - def __repr__(self): - return "(%o, %s, %i, %s)" % (self.mode, self.hexsha, self.stage, self.path) - - @property - def mode(self): - """ File Mode, compatible to stat module constants """ - return self[0] - - @property - def binsha(self): - """binary sha of the blob """ - return self[1] - - @property - def hexsha(self): - """hex version of our sha""" - return b2a_hex(self[1]) - - @property - def stage(self): - """Stage of the entry, either: - - * 0 = default stage - * 1 = stage before a merge or common ancestor entry in case of a 3 way merge - * 2 = stage of entries from the 'left' side of the merge - * 3 = stage of entries from the right side of the merge - - :note: For more information, see http://www.kernel.org/pub/software/scm/git/docs/git-read-tree.html - """ - return (self[2] & CE_STAGEMASK) >> CE_STAGESHIFT - - @property - def path(self): - """:return: our path relative to the repository working tree root""" - return self[3] - - @property - def flags(self): - """:return: flags stored with this entry""" - return self[2] - - @classmethod - def from_blob(cls, blob, stage = 0): - """:return: Fully equipped BaseIndexEntry at the given stage""" - return cls((blob.mode, blob.binsha, stage << CE_STAGESHIFT, blob.path)) - - def to_blob(self, repo): - """:return: Blob using the information of this index entry""" - return Blob(repo, self.binsha, self.mode, self.path) + """Small Brother of an index entry which can be created to describe changes + done to the index in which case plenty of additional information is not requried. + + As the first 4 data members match exactly to the IndexEntry type, methods + expecting a BaseIndexEntry can also handle full IndexEntries even if they + use numeric indices for performance reasons. """ + + def __str__(self): + return "%o %s %i\t%s" % (self.mode, self.hexsha, self.stage, self.path) + + def __repr__(self): + return "(%o, %s, %i, %s)" % (self.mode, self.hexsha, self.stage, self.path) + + @property + def mode(self): + """ File Mode, compatible to stat module constants """ + return self[0] + + @property + def binsha(self): + """binary sha of the blob """ + return self[1] + + @property + def hexsha(self): + """hex version of our sha""" + return b2a_hex(self[1]) + + @property + def stage(self): + """Stage of the entry, either: + + * 0 = default stage + * 1 = stage before a merge or common ancestor entry in case of a 3 way merge + * 2 = stage of entries from the 'left' side of the merge + * 3 = stage of entries from the right side of the merge + + :note: For more information, see http://www.kernel.org/pub/software/scm/git/docs/git-read-tree.html + """ + return (self[2] & CE_STAGEMASK) >> CE_STAGESHIFT + + @property + def path(self): + """:return: our path relative to the repository working tree root""" + return self[3] + + @property + def flags(self): + """:return: flags stored with this entry""" + return self[2] + + @classmethod + def from_blob(cls, blob, stage = 0): + """:return: Fully equipped BaseIndexEntry at the given stage""" + return cls((blob.mode, blob.binsha, stage << CE_STAGESHIFT, blob.path)) + + def to_blob(self, repo): + """:return: Blob using the information of this index entry""" + return Blob(repo, self.binsha, self.mode, self.path) class IndexEntry(BaseIndexEntry): - """Allows convenient access to IndexEntry data without completely unpacking it. - - Attributes usully accessed often are cached in the tuple whereas others are - unpacked on demand. - - See the properties for a mapping between names and tuple indices. """ - @property - def ctime(self): - """ - :return: - Tuple(int_time_seconds_since_epoch, int_nano_seconds) of the - file's creation time""" - return unpack(">LL", self[4]) - - @property - def mtime(self): - """See ctime property, but returns modification time """ - return unpack(">LL", self[5]) - - @property - def dev(self): - """ Device ID """ - return self[6] - - @property - def inode(self): - """ Inode ID """ - return self[7] - - @property - def uid(self): - """ User ID """ - return self[8] - - @property - def gid(self): - """ Group ID """ - return self[9] - - @property - def size(self): - """:return: Uncompressed size of the blob """ - return self[10] - - @classmethod - def from_base(cls, base): - """ - :return: - Minimal entry as created from the given BaseIndexEntry instance. - Missing values will be set to null-like values - - :param base: Instance of type BaseIndexEntry""" - time = pack(">LL", 0, 0) - return IndexEntry((base.mode, base.binsha, base.flags, base.path, time, time, 0, 0, 0, 0, 0)) - - @classmethod - def from_blob(cls, blob, stage = 0): - """:return: Minimal entry resembling the given blob object""" - time = pack(">LL", 0, 0) - return IndexEntry((blob.mode, blob.binsha, stage << CE_STAGESHIFT, blob.path, time, time, 0, 0, 0, 0, blob.size)) + """Allows convenient access to IndexEntry data without completely unpacking it. + + Attributes usully accessed often are cached in the tuple whereas others are + unpacked on demand. + + See the properties for a mapping between names and tuple indices. """ + @property + def ctime(self): + """ + :return: + Tuple(int_time_seconds_since_epoch, int_nano_seconds) of the + file's creation time""" + return unpack(">LL", self[4]) + + @property + def mtime(self): + """See ctime property, but returns modification time """ + return unpack(">LL", self[5]) + + @property + def dev(self): + """ Device ID """ + return self[6] + + @property + def inode(self): + """ Inode ID """ + return self[7] + + @property + def uid(self): + """ User ID """ + return self[8] + + @property + def gid(self): + """ Group ID """ + return self[9] + + @property + def size(self): + """:return: Uncompressed size of the blob """ + return self[10] + + @classmethod + def from_base(cls, base): + """ + :return: + Minimal entry as created from the given BaseIndexEntry instance. + Missing values will be set to null-like values + + :param base: Instance of type BaseIndexEntry""" + time = pack(">LL", 0, 0) + return IndexEntry((base.mode, base.binsha, base.flags, base.path, time, time, 0, 0, 0, 0, 0)) + + @classmethod + def from_blob(cls, blob, stage = 0): + """:return: Minimal entry resembling the given blob object""" + time = pack(">LL", 0, 0) + return IndexEntry((blob.mode, blob.binsha, stage << CE_STAGESHIFT, blob.path, time, time, 0, 0, 0, 0, blob.size)) diff --git a/git/index/util.py b/git/index/util.py index bd5fcc03..59f8d591 100644 --- a/git/index/util.py +++ b/git/index/util.py @@ -13,74 +13,74 @@ unpack = struct.unpack #} END aliases class TemporaryFileSwap(object): - """Utility class moving a file to a temporary location within the same directory - and moving it back on to where on object deletion.""" - __slots__ = ("file_path", "tmp_file_path") - - def __init__(self, file_path): - self.file_path = file_path - self.tmp_file_path = self.file_path + tempfile.mktemp('','','') - # it may be that the source does not exist - try: - os.rename(self.file_path, self.tmp_file_path) - except OSError: - pass - - def __del__(self): - if os.path.isfile(self.tmp_file_path): - if os.name == 'nt' and os.path.exists(self.file_path): - os.remove(self.file_path) - os.rename(self.tmp_file_path, self.file_path) - # END temp file exists + """Utility class moving a file to a temporary location within the same directory + and moving it back on to where on object deletion.""" + __slots__ = ("file_path", "tmp_file_path") + + def __init__(self, file_path): + self.file_path = file_path + self.tmp_file_path = self.file_path + tempfile.mktemp('','','') + # it may be that the source does not exist + try: + os.rename(self.file_path, self.tmp_file_path) + except OSError: + pass + + def __del__(self): + if os.path.isfile(self.tmp_file_path): + if os.name == 'nt' and os.path.exists(self.file_path): + os.remove(self.file_path) + os.rename(self.tmp_file_path, self.file_path) + # END temp file exists #{ Decorators def post_clear_cache(func): - """Decorator for functions that alter the index using the git command. This would - invalidate our possibly existing entries dictionary which is why it must be - deleted to allow it to be lazily reread later. - - :note: - This decorator will not be required once all functions are implemented - natively which in fact is possible, but probably not feasible performance wise. - """ - def post_clear_cache_if_not_raised(self, *args, **kwargs): - rval = func(self, *args, **kwargs) - self._delete_entries_cache() - return rval - - # END wrapper method - post_clear_cache_if_not_raised.__name__ = func.__name__ - return post_clear_cache_if_not_raised + """Decorator for functions that alter the index using the git command. This would + invalidate our possibly existing entries dictionary which is why it must be + deleted to allow it to be lazily reread later. + + :note: + This decorator will not be required once all functions are implemented + natively which in fact is possible, but probably not feasible performance wise. + """ + def post_clear_cache_if_not_raised(self, *args, **kwargs): + rval = func(self, *args, **kwargs) + self._delete_entries_cache() + return rval + + # END wrapper method + post_clear_cache_if_not_raised.__name__ = func.__name__ + return post_clear_cache_if_not_raised def default_index(func): - """Decorator assuring the wrapped method may only run if we are the default - repository index. This is as we rely on git commands that operate - on that index only. """ - def check_default_index(self, *args, **kwargs): - if self._file_path != self._index_path(): - raise AssertionError( "Cannot call %r on indices that do not represent the default git index" % func.__name__ ) - return func(self, *args, **kwargs) - # END wrpaper method - - check_default_index.__name__ = func.__name__ - return check_default_index + """Decorator assuring the wrapped method may only run if we are the default + repository index. This is as we rely on git commands that operate + on that index only. """ + def check_default_index(self, *args, **kwargs): + if self._file_path != self._index_path(): + raise AssertionError( "Cannot call %r on indices that do not represent the default git index" % func.__name__ ) + return func(self, *args, **kwargs) + # END wrpaper method + + check_default_index.__name__ = func.__name__ + return check_default_index def git_working_dir(func): - """Decorator which changes the current working dir to the one of the git - repository in order to assure relative paths are handled correctly""" - def set_git_working_dir(self, *args, **kwargs): - cur_wd = os.getcwd() - os.chdir(self.repo.working_tree_dir) - try: - return func(self, *args, **kwargs) - finally: - os.chdir(cur_wd) - # END handle working dir - # END wrapper - - set_git_working_dir.__name__ = func.__name__ - return set_git_working_dir + """Decorator which changes the current working dir to the one of the git + repository in order to assure relative paths are handled correctly""" + def set_git_working_dir(self, *args, **kwargs): + cur_wd = os.getcwd() + os.chdir(self.repo.working_tree_dir) + try: + return func(self, *args, **kwargs) + finally: + os.chdir(cur_wd) + # END handle working dir + # END wrapper + + set_git_working_dir.__name__ = func.__name__ + return set_git_working_dir #} END decorators diff --git a/git/objects/base.py b/git/objects/base.py index 9c1a0bb9..03b22863 100644 --- a/git/objects/base.py +++ b/git/objects/base.py @@ -6,171 +6,171 @@ from git.util import LazyMixin, join_path_native, stream_copy from util import get_object_type_by_name from gitdb.util import ( - hex_to_bin, - bin_to_hex, - basename - ) + hex_to_bin, + bin_to_hex, + basename + ) import gitdb.typ as dbtyp - + _assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r" __all__ = ("Object", "IndexObject") class Object(LazyMixin): - """Implements an Object which may be Blobs, Trees, Commits and Tags""" - NULL_HEX_SHA = '0'*40 - NULL_BIN_SHA = '\0'*20 - - TYPES = (dbtyp.str_blob_type, dbtyp.str_tree_type, dbtyp.str_commit_type, dbtyp.str_tag_type) - __slots__ = ("repo", "binsha", "size" ) - type = None # to be set by subclass - - def __init__(self, repo, binsha): - """Initialize an object by identifying it by its binary sha. - All keyword arguments will be set on demand if None. - - :param repo: repository this object is located in - - :param binsha: 20 byte SHA1""" - super(Object,self).__init__() - self.repo = repo - self.binsha = binsha - assert len(binsha) == 20, "Require 20 byte binary sha, got %r, len = %i" % (binsha, len(binsha)) + """Implements an Object which may be Blobs, Trees, Commits and Tags""" + NULL_HEX_SHA = '0'*40 + NULL_BIN_SHA = '\0'*20 + + TYPES = (dbtyp.str_blob_type, dbtyp.str_tree_type, dbtyp.str_commit_type, dbtyp.str_tag_type) + __slots__ = ("repo", "binsha", "size" ) + type = None # to be set by subclass + + def __init__(self, repo, binsha): + """Initialize an object by identifying it by its binary sha. + All keyword arguments will be set on demand if None. + + :param repo: repository this object is located in + + :param binsha: 20 byte SHA1""" + super(Object,self).__init__() + self.repo = repo + self.binsha = binsha + assert len(binsha) == 20, "Require 20 byte binary sha, got %r, len = %i" % (binsha, len(binsha)) - @classmethod - def new(cls, repo, id): - """ - :return: New Object instance of a type appropriate to the object type behind - id. The id of the newly created object will be a binsha even though - the input id may have been a Reference or Rev-Spec - - :param id: reference, rev-spec, or hexsha - - :note: This cannot be a __new__ method as it would always call __init__ - with the input id which is not necessarily a binsha.""" - return repo.rev_parse(str(id)) - - @classmethod - def new_from_sha(cls, repo, sha1): - """ - :return: new object instance of a type appropriate to represent the given - binary sha1 - :param sha1: 20 byte binary sha1""" - if sha1 == cls.NULL_BIN_SHA: - # the NULL binsha is always the root commit - return get_object_type_by_name('commit')(repo, sha1) - #END handle special case - oinfo = repo.odb.info(sha1) - inst = get_object_type_by_name(oinfo.type)(repo, oinfo.binsha) - inst.size = oinfo.size - return inst - - def _set_cache_(self, attr): - """Retrieve object information""" - if attr == "size": - oinfo = self.repo.odb.info(self.binsha) - self.size = oinfo.size - # assert oinfo.type == self.type, _assertion_msg_format % (self.binsha, oinfo.type, self.type) - else: - super(Object,self)._set_cache_(attr) - - def __eq__(self, other): - """:return: True if the objects have the same SHA1""" - if not hasattr(other, 'binsha'): - return False - return self.binsha == other.binsha - - def __ne__(self, other): - """:return: True if the objects do not have the same SHA1 """ - if not hasattr(other, 'binsha'): - return True - return self.binsha != other.binsha - - def __hash__(self): - """:return: Hash of our id allowing objects to be used in dicts and sets""" - return hash(self.binsha) - - def __str__(self): - """:return: string of our SHA1 as understood by all git commands""" - return bin_to_hex(self.binsha) - - def __repr__(self): - """:return: string with pythonic representation of our object""" - return '<git.%s "%s">' % (self.__class__.__name__, self.hexsha) + @classmethod + def new(cls, repo, id): + """ + :return: New Object instance of a type appropriate to the object type behind + id. The id of the newly created object will be a binsha even though + the input id may have been a Reference or Rev-Spec + + :param id: reference, rev-spec, or hexsha + + :note: This cannot be a __new__ method as it would always call __init__ + with the input id which is not necessarily a binsha.""" + return repo.rev_parse(str(id)) + + @classmethod + def new_from_sha(cls, repo, sha1): + """ + :return: new object instance of a type appropriate to represent the given + binary sha1 + :param sha1: 20 byte binary sha1""" + if sha1 == cls.NULL_BIN_SHA: + # the NULL binsha is always the root commit + return get_object_type_by_name('commit')(repo, sha1) + #END handle special case + oinfo = repo.odb.info(sha1) + inst = get_object_type_by_name(oinfo.type)(repo, oinfo.binsha) + inst.size = oinfo.size + return inst + + def _set_cache_(self, attr): + """Retrieve object information""" + if attr == "size": + oinfo = self.repo.odb.info(self.binsha) + self.size = oinfo.size + # assert oinfo.type == self.type, _assertion_msg_format % (self.binsha, oinfo.type, self.type) + else: + super(Object,self)._set_cache_(attr) + + def __eq__(self, other): + """:return: True if the objects have the same SHA1""" + if not hasattr(other, 'binsha'): + return False + return self.binsha == other.binsha + + def __ne__(self, other): + """:return: True if the objects do not have the same SHA1 """ + if not hasattr(other, 'binsha'): + return True + return self.binsha != other.binsha + + def __hash__(self): + """:return: Hash of our id allowing objects to be used in dicts and sets""" + return hash(self.binsha) + + def __str__(self): + """:return: string of our SHA1 as understood by all git commands""" + return bin_to_hex(self.binsha) + + def __repr__(self): + """:return: string with pythonic representation of our object""" + return '<git.%s "%s">' % (self.__class__.__name__, self.hexsha) - @property - def hexsha(self): - """:return: 40 byte hex version of our 20 byte binary sha""" - return bin_to_hex(self.binsha) + @property + def hexsha(self): + """:return: 40 byte hex version of our 20 byte binary sha""" + return bin_to_hex(self.binsha) - @property - def data_stream(self): - """ :return: File Object compatible stream to the uncompressed raw data of the object - :note: returned streams must be read in order""" - return self.repo.odb.stream(self.binsha) + @property + def data_stream(self): + """ :return: File Object compatible stream to the uncompressed raw data of the object + :note: returned streams must be read in order""" + return self.repo.odb.stream(self.binsha) - def stream_data(self, ostream): - """Writes our data directly to the given output stream - :param ostream: File object compatible stream object. - :return: self""" - istream = self.repo.odb.stream(self.binsha) - stream_copy(istream, ostream) - return self - + def stream_data(self, ostream): + """Writes our data directly to the given output stream + :param ostream: File object compatible stream object. + :return: self""" + istream = self.repo.odb.stream(self.binsha) + stream_copy(istream, ostream) + return self + class IndexObject(Object): - """Base for all objects that can be part of the index file , namely Tree, Blob and - SubModule objects""" - __slots__ = ("path", "mode") - - # for compatability with iterable lists - _id_attribute_ = 'path' - - def __init__(self, repo, binsha, mode=None, path=None): - """Initialize a newly instanced IndexObject - :param repo: is the Repo we are located in - :param binsha: 20 byte sha1 - :param mode: is the stat compatible file mode as int, use the stat module - to evaluate the infomration - :param path: - is the path to the file in the file system, relative to the git repository root, i.e. - file.ext or folder/other.ext - :note: - Path may not be set of the index object has been created directly as it cannot - be retrieved without knowing the parent tree.""" - super(IndexObject, self).__init__(repo, binsha) - if mode is not None: - self.mode = mode - if path is not None: - self.path = path - - def __hash__(self): - """:return: - Hash of our path as index items are uniquely identifyable by path, not - by their data !""" - return hash(self.path) - - def _set_cache_(self, attr): - if attr in IndexObject.__slots__: - # they cannot be retrieved lateron ( not without searching for them ) - raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ ) - else: - super(IndexObject, self)._set_cache_(attr) - # END hanlde slot attribute - - @property - def name(self): - """:return: Name portion of the path, effectively being the basename""" - return basename(self.path) - - @property - def abspath(self): - """ - :return: - Absolute path to this index object in the file system ( as opposed to the - .path field which is a path relative to the git repository ). - - The returned path will be native to the system and contains '\' on windows. """ - return join_path_native(self.repo.working_tree_dir, self.path) - + """Base for all objects that can be part of the index file , namely Tree, Blob and + SubModule objects""" + __slots__ = ("path", "mode") + + # for compatability with iterable lists + _id_attribute_ = 'path' + + def __init__(self, repo, binsha, mode=None, path=None): + """Initialize a newly instanced IndexObject + :param repo: is the Repo we are located in + :param binsha: 20 byte sha1 + :param mode: is the stat compatible file mode as int, use the stat module + to evaluate the infomration + :param path: + is the path to the file in the file system, relative to the git repository root, i.e. + file.ext or folder/other.ext + :note: + Path may not be set of the index object has been created directly as it cannot + be retrieved without knowing the parent tree.""" + super(IndexObject, self).__init__(repo, binsha) + if mode is not None: + self.mode = mode + if path is not None: + self.path = path + + def __hash__(self): + """:return: + Hash of our path as index items are uniquely identifyable by path, not + by their data !""" + return hash(self.path) + + def _set_cache_(self, attr): + if attr in IndexObject.__slots__: + # they cannot be retrieved lateron ( not without searching for them ) + raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ ) + else: + super(IndexObject, self)._set_cache_(attr) + # END hanlde slot attribute + + @property + def name(self): + """:return: Name portion of the path, effectively being the basename""" + return basename(self.path) + + @property + def abspath(self): + """ + :return: + Absolute path to this index object in the file system ( as opposed to the + .path field which is a path relative to the git repository ). + + The returned path will be native to the system and contains '\' on windows. """ + return join_path_native(self.repo.working_tree_dir, self.path) + diff --git a/git/objects/blob.py b/git/objects/blob.py index f52d1a53..e96555c6 100644 --- a/git/objects/blob.py +++ b/git/objects/blob.py @@ -10,23 +10,23 @@ import base __all__ = ('Blob', ) class Blob(base.IndexObject): - """A Blob encapsulates a git blob object""" - DEFAULT_MIME_TYPE = "text/plain" - type = "blob" - - # valid blob modes - executable_mode = 0100755 - file_mode = 0100644 - link_mode = 0120000 + """A Blob encapsulates a git blob object""" + DEFAULT_MIME_TYPE = "text/plain" + type = "blob" + + # valid blob modes + executable_mode = 0100755 + file_mode = 0100644 + link_mode = 0120000 - __slots__ = tuple() + __slots__ = tuple() - @property - def mime_type(self): - """ - :return: String describing the mime type of this file (based on the filename) - :note: Defaults to 'text/plain' in case the actual file type is unknown. """ - guesses = None - if self.path: - guesses = guess_type(self.path) - return guesses and guesses[0] or self.DEFAULT_MIME_TYPE + @property + def mime_type(self): + """ + :return: String describing the mime type of this file (based on the filename) + :note: Defaults to 'text/plain' in case the actual file type is unknown. """ + guesses = None + if self.path: + guesses = guess_type(self.path) + return guesses and guesses[0] or self.DEFAULT_MIME_TYPE diff --git a/git/objects/commit.py b/git/objects/commit.py index fd4187b0..cbfd5097 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -4,11 +4,11 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -from git.util import ( - Actor, - Iterable, - Stats, - ) +from git.util import ( + Actor, + Iterable, + Stats, + ) from git.diff import Diffable from tree import Tree from gitdb import IStream @@ -16,450 +16,450 @@ from cStringIO import StringIO import base from gitdb.util import ( - hex_to_bin - ) + hex_to_bin + ) from util import ( - Traversable, - Serializable, - parse_date, - altz_to_utctz_str, - parse_actor_and_date - ) + Traversable, + Serializable, + parse_date, + altz_to_utctz_str, + parse_actor_and_date + ) from time import ( - time, - altzone - ) + time, + altzone + ) import os import sys __all__ = ('Commit', ) class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): - """Wraps a git Commit object. - - This class will act lazily on some of its attributes and will query the - value on demand only if it involves calling the git binary.""" - - # ENVIRONMENT VARIABLES - # read when creating new commits - env_author_date = "GIT_AUTHOR_DATE" - env_committer_date = "GIT_COMMITTER_DATE" - - # CONFIGURATION KEYS - conf_encoding = 'i18n.commitencoding' - - # INVARIANTS - default_encoding = "UTF-8" - - - # object configuration - type = "commit" - __slots__ = ("tree", - "author", "authored_date", "author_tz_offset", - "committer", "committed_date", "committer_tz_offset", - "message", "parents", "encoding") - _id_attribute_ = "binsha" - - def __init__(self, repo, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None, - committer=None, committed_date=None, committer_tz_offset=None, - message=None, parents=None, encoding=None): - """Instantiate a new Commit. All keyword arguments taking None as default will - be implicitly set on first query. - - :param binsha: 20 byte sha1 - :param parents: tuple( Commit, ... ) - is a tuple of commit ids or actual Commits - :param tree: Tree - Tree object - :param author: Actor - is the author string ( will be implicitly converted into an Actor object ) - :param authored_date: int_seconds_since_epoch - is the authored DateTime - use time.gmtime() to convert it into a - different format - :param author_tz_offset: int_seconds_west_of_utc - is the timezone that the authored_date is in - :param committer: Actor - is the committer string - :param committed_date: int_seconds_since_epoch - is the committed DateTime - use time.gmtime() to convert it into a - different format - :param committer_tz_offset: int_seconds_west_of_utc - is the timezone that the authored_date is in - :param message: string - is the commit message - :param encoding: string - encoding of the message, defaults to UTF-8 - :param parents: - List or tuple of Commit objects which are our parent(s) in the commit - dependency graph - :return: git.Commit - - :note: Timezone information is in the same format and in the same sign - as what time.altzone returns. The sign is inverted compared to git's - UTC timezone.""" - super(Commit,self).__init__(repo, binsha) - if tree is not None: - assert isinstance(tree, Tree), "Tree needs to be a Tree instance, was %s" % type(tree) - if tree is not None: - self.tree = tree - if author is not None: - self.author = author - if authored_date is not None: - self.authored_date = authored_date - if author_tz_offset is not None: - self.author_tz_offset = author_tz_offset - if committer is not None: - self.committer = committer - if committed_date is not None: - self.committed_date = committed_date - if committer_tz_offset is not None: - self.committer_tz_offset = committer_tz_offset - if message is not None: - self.message = message - if parents is not None: - self.parents = parents - if encoding is not None: - self.encoding = encoding - - @classmethod - def _get_intermediate_items(cls, commit): - return commit.parents + """Wraps a git Commit object. + + This class will act lazily on some of its attributes and will query the + value on demand only if it involves calling the git binary.""" + + # ENVIRONMENT VARIABLES + # read when creating new commits + env_author_date = "GIT_AUTHOR_DATE" + env_committer_date = "GIT_COMMITTER_DATE" + + # CONFIGURATION KEYS + conf_encoding = 'i18n.commitencoding' + + # INVARIANTS + default_encoding = "UTF-8" + + + # object configuration + type = "commit" + __slots__ = ("tree", + "author", "authored_date", "author_tz_offset", + "committer", "committed_date", "committer_tz_offset", + "message", "parents", "encoding") + _id_attribute_ = "binsha" + + def __init__(self, repo, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None, + committer=None, committed_date=None, committer_tz_offset=None, + message=None, parents=None, encoding=None): + """Instantiate a new Commit. All keyword arguments taking None as default will + be implicitly set on first query. + + :param binsha: 20 byte sha1 + :param parents: tuple( Commit, ... ) + is a tuple of commit ids or actual Commits + :param tree: Tree + Tree object + :param author: Actor + is the author string ( will be implicitly converted into an Actor object ) + :param authored_date: int_seconds_since_epoch + is the authored DateTime - use time.gmtime() to convert it into a + different format + :param author_tz_offset: int_seconds_west_of_utc + is the timezone that the authored_date is in + :param committer: Actor + is the committer string + :param committed_date: int_seconds_since_epoch + is the committed DateTime - use time.gmtime() to convert it into a + different format + :param committer_tz_offset: int_seconds_west_of_utc + is the timezone that the authored_date is in + :param message: string + is the commit message + :param encoding: string + encoding of the message, defaults to UTF-8 + :param parents: + List or tuple of Commit objects which are our parent(s) in the commit + dependency graph + :return: git.Commit + + :note: Timezone information is in the same format and in the same sign + as what time.altzone returns. The sign is inverted compared to git's + UTC timezone.""" + super(Commit,self).__init__(repo, binsha) + if tree is not None: + assert isinstance(tree, Tree), "Tree needs to be a Tree instance, was %s" % type(tree) + if tree is not None: + self.tree = tree + if author is not None: + self.author = author + if authored_date is not None: + self.authored_date = authored_date + if author_tz_offset is not None: + self.author_tz_offset = author_tz_offset + if committer is not None: + self.committer = committer + if committed_date is not None: + self.committed_date = committed_date + if committer_tz_offset is not None: + self.committer_tz_offset = committer_tz_offset + if message is not None: + self.message = message + if parents is not None: + self.parents = parents + if encoding is not None: + self.encoding = encoding + + @classmethod + def _get_intermediate_items(cls, commit): + return commit.parents - def _set_cache_(self, attr): - if attr in Commit.__slots__: - # read the data in a chunk, its faster - then provide a file wrapper - binsha, typename, self.size, stream = self.repo.odb.stream(self.binsha) - self._deserialize(StringIO(stream.read())) - else: - super(Commit, self)._set_cache_(attr) - # END handle attrs + def _set_cache_(self, attr): + if attr in Commit.__slots__: + # read the data in a chunk, its faster - then provide a file wrapper + binsha, typename, self.size, stream = self.repo.odb.stream(self.binsha) + self._deserialize(StringIO(stream.read())) + else: + super(Commit, self)._set_cache_(attr) + # END handle attrs - @property - def summary(self): - """:return: First line of the commit message""" - return self.message.split('\n', 1)[0] - - def count(self, paths='', **kwargs): - """Count the number of commits reachable from this commit + @property + def summary(self): + """:return: First line of the commit message""" + return self.message.split('\n', 1)[0] + + def count(self, paths='', **kwargs): + """Count the number of commits reachable from this commit - :param paths: - is an optinal path or a list of paths restricting the return value - to commits actually containing the paths + :param paths: + is an optinal path or a list of paths restricting the return value + to commits actually containing the paths - :param kwargs: - Additional options to be passed to git-rev-list. They must not alter - the ouput style of the command, or parsing will yield incorrect results - :return: int defining the number of reachable commits""" - # yes, it makes a difference whether empty paths are given or not in our case - # as the empty paths version will ignore merge commits for some reason. - if paths: - return len(self.repo.git.rev_list(self.hexsha, '--', paths, **kwargs).splitlines()) - else: - return len(self.repo.git.rev_list(self.hexsha, **kwargs).splitlines()) - + :param kwargs: + Additional options to be passed to git-rev-list. They must not alter + the ouput style of the command, or parsing will yield incorrect results + :return: int defining the number of reachable commits""" + # yes, it makes a difference whether empty paths are given or not in our case + # as the empty paths version will ignore merge commits for some reason. + if paths: + return len(self.repo.git.rev_list(self.hexsha, '--', paths, **kwargs).splitlines()) + else: + return len(self.repo.git.rev_list(self.hexsha, **kwargs).splitlines()) + - @property - def name_rev(self): - """ - :return: - String describing the commits hex sha based on the closest Reference. - Mostly useful for UI purposes""" - return self.repo.git.name_rev(self) + @property + def name_rev(self): + """ + :return: + String describing the commits hex sha based on the closest Reference. + Mostly useful for UI purposes""" + return self.repo.git.name_rev(self) - @classmethod - def iter_items(cls, repo, rev, paths='', **kwargs): - """Find all commits matching the given criteria. + @classmethod + def iter_items(cls, repo, rev, paths='', **kwargs): + """Find all commits matching the given criteria. - :param repo: is the Repo - :param rev: revision specifier, see git-rev-parse for viable options - :param paths: - is an optinal path or list of paths, if set only Commits that include the path - or paths will be considered - :param kwargs: - optional keyword arguments to git rev-list where - ``max_count`` is the maximum number of commits to fetch - ``skip`` is the number of commits to skip - ``since`` all commits since i.e. '1970-01-01' - :return: iterator yielding Commit items""" - if 'pretty' in kwargs: - raise ValueError("--pretty cannot be used as parsing expects single sha's only") - # END handle pretty - args = list() - if paths: - args.extend(('--', paths)) - # END if paths + :param repo: is the Repo + :param rev: revision specifier, see git-rev-parse for viable options + :param paths: + is an optinal path or list of paths, if set only Commits that include the path + or paths will be considered + :param kwargs: + optional keyword arguments to git rev-list where + ``max_count`` is the maximum number of commits to fetch + ``skip`` is the number of commits to skip + ``since`` all commits since i.e. '1970-01-01' + :return: iterator yielding Commit items""" + if 'pretty' in kwargs: + raise ValueError("--pretty cannot be used as parsing expects single sha's only") + # END handle pretty + args = list() + if paths: + args.extend(('--', paths)) + # END if paths - proc = repo.git.rev_list(rev, args, as_process=True, **kwargs) - return cls._iter_from_process_or_stream(repo, proc) - - def iter_parents(self, paths='', **kwargs): - """Iterate _all_ parents of this commit. - - :param paths: - Optional path or list of paths limiting the Commits to those that - contain at least one of the paths - :param kwargs: All arguments allowed by git-rev-list - :return: Iterator yielding Commit objects which are parents of self """ - # skip ourselves - skip = kwargs.get("skip", 1) - if skip == 0: # skip ourselves - skip = 1 - kwargs['skip'] = skip - - return self.iter_items(self.repo, self, paths, **kwargs) + proc = repo.git.rev_list(rev, args, as_process=True, **kwargs) + return cls._iter_from_process_or_stream(repo, proc) + + def iter_parents(self, paths='', **kwargs): + """Iterate _all_ parents of this commit. + + :param paths: + Optional path or list of paths limiting the Commits to those that + contain at least one of the paths + :param kwargs: All arguments allowed by git-rev-list + :return: Iterator yielding Commit objects which are parents of self """ + # skip ourselves + skip = kwargs.get("skip", 1) + if skip == 0: # skip ourselves + skip = 1 + kwargs['skip'] = skip + + return self.iter_items(self.repo, self, paths, **kwargs) - @property - def stats(self): - """Create a git stat from changes between this commit and its first parent - or from all changes done if this is the very first commit. - - :return: git.Stats""" - if not self.parents: - text = self.repo.git.diff_tree(self.hexsha, '--', numstat=True, root=True) - text2 = "" - for line in text.splitlines()[1:]: - (insertions, deletions, filename) = line.split("\t") - text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename) - text = text2 - else: - text = self.repo.git.diff(self.parents[0].hexsha, self.hexsha, '--', numstat=True) - return Stats._list_from_string(self.repo, text) + @property + def stats(self): + """Create a git stat from changes between this commit and its first parent + or from all changes done if this is the very first commit. + + :return: git.Stats""" + if not self.parents: + text = self.repo.git.diff_tree(self.hexsha, '--', numstat=True, root=True) + text2 = "" + for line in text.splitlines()[1:]: + (insertions, deletions, filename) = line.split("\t") + text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename) + text = text2 + else: + text = self.repo.git.diff(self.parents[0].hexsha, self.hexsha, '--', numstat=True) + return Stats._list_from_string(self.repo, text) - @classmethod - def _iter_from_process_or_stream(cls, repo, proc_or_stream): - """Parse out commit information into a list of Commit objects - We expect one-line per commit, and parse the actual commit information directly - from our lighting fast object database + @classmethod + def _iter_from_process_or_stream(cls, repo, proc_or_stream): + """Parse out commit information into a list of Commit objects + We expect one-line per commit, and parse the actual commit information directly + from our lighting fast object database - :param proc: git-rev-list process instance - one sha per line - :return: iterator returning Commit objects""" - stream = proc_or_stream - if not hasattr(stream,'readline'): - stream = proc_or_stream.stdout - - readline = stream.readline - while True: - line = readline() - if not line: - break - hexsha = line.strip() - if len(hexsha) > 40: - # split additional information, as returned by bisect for instance - hexsha, rest = line.split(None, 1) - # END handle extra info - - assert len(hexsha) == 40, "Invalid line: %s" % hexsha - yield Commit(repo, hex_to_bin(hexsha)) - # END for each line in stream - - - @classmethod - def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False): - """Commit the given tree, creating a commit object. - - :param repo: Repo object the commit should be part of - :param tree: Tree object or hex or bin sha - the tree of the new commit - :param message: Commit message. It may be an empty string if no message is provided. - It will be converted to a string in any case. - :param parent_commits: - Optional Commit objects to use as parents for the new commit. - If empty list, the commit will have no parents at all and become - a root commit. - If None , the current head commit will be the parent of the - new commit object - :param head: - If True, the HEAD will be advanced to the new commit automatically. - Else the HEAD will remain pointing on the previous commit. This could - lead to undesired results when diffing files. - - :return: Commit object representing the new commit - - :note: - Additional information about the committer and Author are taken from the - environment or from the git configuration, see git-commit-tree for - more information""" - parents = parent_commits - if parent_commits is None: - try: - parent_commits = [ repo.head.commit ] - except ValueError: - # empty repositories have no head commit - parent_commits = list() - # END handle parent commits - # END if parent commits are unset - - # retrieve all additional information, create a commit object, and - # serialize it - # Generally: - # * Environment variables override configuration values - # * Sensible defaults are set according to the git documentation - - # COMMITER AND AUTHOR INFO - cr = repo.config_reader() - env = os.environ - - committer = Actor.committer(cr) - author = Actor.author(cr) - - # PARSE THE DATES - unix_time = int(time()) - offset = altzone - - author_date_str = env.get(cls.env_author_date, '') - if author_date_str: - author_time, author_offset = parse_date(author_date_str) - else: - author_time, author_offset = unix_time, offset - # END set author time - - committer_date_str = env.get(cls.env_committer_date, '') - if committer_date_str: - committer_time, committer_offset = parse_date(committer_date_str) - else: - committer_time, committer_offset = unix_time, offset - # END set committer time - - # assume utf8 encoding - enc_section, enc_option = cls.conf_encoding.split('.') - conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding) - - - # if the tree is no object, make sure we create one - otherwise - # the created commit object is invalid - if isinstance(tree, str): - tree = repo.tree(tree) - # END tree conversion - - # CREATE NEW COMMIT - new_commit = cls(repo, cls.NULL_BIN_SHA, tree, - author, author_time, author_offset, - committer, committer_time, committer_offset, - message, parent_commits, conf_encoding) - - stream = StringIO() - new_commit._serialize(stream) - streamlen = stream.tell() - stream.seek(0) - - istream = repo.odb.store(IStream(cls.type, streamlen, stream)) - new_commit.binsha = istream.binsha - - if head: - # need late import here, importing git at the very beginning throws - # as well ... - import git.refs - try: - repo.head.set_commit(new_commit, logmsg="commit: %s" % message) - except ValueError: - # head is not yet set to the ref our HEAD points to - # Happens on first commit - import git.refs - master = git.refs.Head.create(repo, repo.head.ref, new_commit, logmsg="commit (initial): %s" % message) - repo.head.set_reference(master, logmsg='commit: Switching to %s' % master) - # END handle empty repositories - # END advance head handling - - return new_commit - - #{ Serializable Implementation - - def _serialize(self, stream): - write = stream.write - write("tree %s\n" % self.tree) - for p in self.parents: - write("parent %s\n" % p) - - a = self.author - aname = a.name - if isinstance(aname, unicode): - aname = aname.encode(self.encoding) - # END handle unicode in name - - c = self.committer - fmt = "%s %s <%s> %s %s\n" - write(fmt % ("author", aname, a.email, - self.authored_date, - altz_to_utctz_str(self.author_tz_offset))) - - # encode committer - aname = c.name - if isinstance(aname, unicode): - aname = aname.encode(self.encoding) - # END handle unicode in name - write(fmt % ("committer", aname, c.email, - self.committed_date, - altz_to_utctz_str(self.committer_tz_offset))) - - if self.encoding != self.default_encoding: - write("encoding %s\n" % self.encoding) - - write("\n") - - # write plain bytes, be sure its encoded according to our encoding - if isinstance(self.message, unicode): - write(self.message.encode(self.encoding)) - else: - write(self.message) - # END handle encoding - return self - - def _deserialize(self, stream): - """:param from_rev_list: if true, the stream format is coming from the rev-list command - Otherwise it is assumed to be a plain data stream from our object""" - readline = stream.readline - self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '') + :param proc: git-rev-list process instance - one sha per line + :return: iterator returning Commit objects""" + stream = proc_or_stream + if not hasattr(stream,'readline'): + stream = proc_or_stream.stdout + + readline = stream.readline + while True: + line = readline() + if not line: + break + hexsha = line.strip() + if len(hexsha) > 40: + # split additional information, as returned by bisect for instance + hexsha, rest = line.split(None, 1) + # END handle extra info + + assert len(hexsha) == 40, "Invalid line: %s" % hexsha + yield Commit(repo, hex_to_bin(hexsha)) + # END for each line in stream + + + @classmethod + def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False): + """Commit the given tree, creating a commit object. + + :param repo: Repo object the commit should be part of + :param tree: Tree object or hex or bin sha + the tree of the new commit + :param message: Commit message. It may be an empty string if no message is provided. + It will be converted to a string in any case. + :param parent_commits: + Optional Commit objects to use as parents for the new commit. + If empty list, the commit will have no parents at all and become + a root commit. + If None , the current head commit will be the parent of the + new commit object + :param head: + If True, the HEAD will be advanced to the new commit automatically. + Else the HEAD will remain pointing on the previous commit. This could + lead to undesired results when diffing files. + + :return: Commit object representing the new commit + + :note: + Additional information about the committer and Author are taken from the + environment or from the git configuration, see git-commit-tree for + more information""" + parents = parent_commits + if parent_commits is None: + try: + parent_commits = [ repo.head.commit ] + except ValueError: + # empty repositories have no head commit + parent_commits = list() + # END handle parent commits + # END if parent commits are unset + + # retrieve all additional information, create a commit object, and + # serialize it + # Generally: + # * Environment variables override configuration values + # * Sensible defaults are set according to the git documentation + + # COMMITER AND AUTHOR INFO + cr = repo.config_reader() + env = os.environ + + committer = Actor.committer(cr) + author = Actor.author(cr) + + # PARSE THE DATES + unix_time = int(time()) + offset = altzone + + author_date_str = env.get(cls.env_author_date, '') + if author_date_str: + author_time, author_offset = parse_date(author_date_str) + else: + author_time, author_offset = unix_time, offset + # END set author time + + committer_date_str = env.get(cls.env_committer_date, '') + if committer_date_str: + committer_time, committer_offset = parse_date(committer_date_str) + else: + committer_time, committer_offset = unix_time, offset + # END set committer time + + # assume utf8 encoding + enc_section, enc_option = cls.conf_encoding.split('.') + conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding) + + + # if the tree is no object, make sure we create one - otherwise + # the created commit object is invalid + if isinstance(tree, str): + tree = repo.tree(tree) + # END tree conversion + + # CREATE NEW COMMIT + new_commit = cls(repo, cls.NULL_BIN_SHA, tree, + author, author_time, author_offset, + committer, committer_time, committer_offset, + message, parent_commits, conf_encoding) + + stream = StringIO() + new_commit._serialize(stream) + streamlen = stream.tell() + stream.seek(0) + + istream = repo.odb.store(IStream(cls.type, streamlen, stream)) + new_commit.binsha = istream.binsha + + if head: + # need late import here, importing git at the very beginning throws + # as well ... + import git.refs + try: + repo.head.set_commit(new_commit, logmsg="commit: %s" % message) + except ValueError: + # head is not yet set to the ref our HEAD points to + # Happens on first commit + import git.refs + master = git.refs.Head.create(repo, repo.head.ref, new_commit, logmsg="commit (initial): %s" % message) + repo.head.set_reference(master, logmsg='commit: Switching to %s' % master) + # END handle empty repositories + # END advance head handling + + return new_commit + + #{ Serializable Implementation + + def _serialize(self, stream): + write = stream.write + write("tree %s\n" % self.tree) + for p in self.parents: + write("parent %s\n" % p) + + a = self.author + aname = a.name + if isinstance(aname, unicode): + aname = aname.encode(self.encoding) + # END handle unicode in name + + c = self.committer + fmt = "%s %s <%s> %s %s\n" + write(fmt % ("author", aname, a.email, + self.authored_date, + altz_to_utctz_str(self.author_tz_offset))) + + # encode committer + aname = c.name + if isinstance(aname, unicode): + aname = aname.encode(self.encoding) + # END handle unicode in name + write(fmt % ("committer", aname, c.email, + self.committed_date, + altz_to_utctz_str(self.committer_tz_offset))) + + if self.encoding != self.default_encoding: + write("encoding %s\n" % self.encoding) + + write("\n") + + # write plain bytes, be sure its encoded according to our encoding + if isinstance(self.message, unicode): + write(self.message.encode(self.encoding)) + else: + write(self.message) + # END handle encoding + return self + + def _deserialize(self, stream): + """:param from_rev_list: if true, the stream format is coming from the rev-list command + Otherwise it is assumed to be a plain data stream from our object""" + readline = stream.readline + self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '') - self.parents = list() - next_line = None - while True: - parent_line = readline() - if not parent_line.startswith('parent'): - next_line = parent_line - break - # END abort reading parents - self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1]))) - # END for each parent line - self.parents = tuple(self.parents) - - self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line) - self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline()) - - - # now we can have the encoding line, or an empty line followed by the optional - # message. - self.encoding = self.default_encoding - # read encoding or empty line to separate message - enc = readline() - enc = enc.strip() - if enc: - self.encoding = enc[enc.find(' ')+1:] - # now comes the message separator - readline() - # END handle encoding - - # decode the authors name - try: - self.author.name = self.author.name.decode(self.encoding) - except UnicodeDecodeError: - print >> sys.stderr, "Failed to decode author name '%s' using encoding %s" % (self.author.name, self.encoding) - # END handle author's encoding - - # decode committer name - try: - self.committer.name = self.committer.name.decode(self.encoding) - except UnicodeDecodeError: - print >> sys.stderr, "Failed to decode committer name '%s' using encoding %s" % (self.committer.name, self.encoding) - # END handle author's encoding - - # a stream from our data simply gives us the plain message - # The end of our message stream is marked with a newline that we strip - self.message = stream.read() - try: - self.message = self.message.decode(self.encoding) - except UnicodeDecodeError: - print >> sys.stderr, "Failed to decode message '%s' using encoding %s" % (self.message, self.encoding) - # END exception handling - return self - - #} END serializable implementation + self.parents = list() + next_line = None + while True: + parent_line = readline() + if not parent_line.startswith('parent'): + next_line = parent_line + break + # END abort reading parents + self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1]))) + # END for each parent line + self.parents = tuple(self.parents) + + self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line) + self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline()) + + + # now we can have the encoding line, or an empty line followed by the optional + # message. + self.encoding = self.default_encoding + # read encoding or empty line to separate message + enc = readline() + enc = enc.strip() + if enc: + self.encoding = enc[enc.find(' ')+1:] + # now comes the message separator + readline() + # END handle encoding + + # decode the authors name + try: + self.author.name = self.author.name.decode(self.encoding) + except UnicodeDecodeError: + print >> sys.stderr, "Failed to decode author name '%s' using encoding %s" % (self.author.name, self.encoding) + # END handle author's encoding + + # decode committer name + try: + self.committer.name = self.committer.name.decode(self.encoding) + except UnicodeDecodeError: + print >> sys.stderr, "Failed to decode committer name '%s' using encoding %s" % (self.committer.name, self.encoding) + # END handle author's encoding + + # a stream from our data simply gives us the plain message + # The end of our message stream is marked with a newline that we strip + self.message = stream.read() + try: + self.message = self.message.decode(self.encoding) + except UnicodeDecodeError: + print >> sys.stderr, "Failed to decode message '%s' using encoding %s" % (self.message, self.encoding) + # END exception handling + return self + + #} END serializable implementation diff --git a/git/objects/fun.py b/git/objects/fun.py index 9b0a377c..f73be542 100644 --- a/git/objects/fun.py +++ b/git/objects/fun.py @@ -2,198 +2,198 @@ from stat import S_ISDIR __all__ = ('tree_to_stream', 'tree_entries_from_data', 'traverse_trees_recursive', - 'traverse_tree_recursive') + 'traverse_tree_recursive') - + def tree_to_stream(entries, write): - """Write the give list of entries into a stream using its write method - :param entries: **sorted** list of tuples with (binsha, mode, name) - :param write: write method which takes a data string""" - ord_zero = ord('0') - bit_mask = 7 # 3 bits set - - for binsha, mode, name in entries: - mode_str = '' - for i in xrange(6): - mode_str = chr(((mode >> (i*3)) & bit_mask) + ord_zero) + mode_str - # END for each 8 octal value - - # git slices away the first octal if its zero - if mode_str[0] == '0': - mode_str = mode_str[1:] - # END save a byte + """Write the give list of entries into a stream using its write method + :param entries: **sorted** list of tuples with (binsha, mode, name) + :param write: write method which takes a data string""" + ord_zero = ord('0') + bit_mask = 7 # 3 bits set + + for binsha, mode, name in entries: + mode_str = '' + for i in xrange(6): + mode_str = chr(((mode >> (i*3)) & bit_mask) + ord_zero) + mode_str + # END for each 8 octal value + + # git slices away the first octal if its zero + if mode_str[0] == '0': + mode_str = mode_str[1:] + # END save a byte - # here it comes: if the name is actually unicode, the replacement below - # will not work as the binsha is not part of the ascii unicode encoding - - # hence we must convert to an utf8 string for it to work properly. - # According to my tests, this is exactly what git does, that is it just - # takes the input literally, which appears to be utf8 on linux. - if isinstance(name, unicode): - name = name.encode("utf8") - write("%s %s\0%s" % (mode_str, name, binsha)) - # END for each item + # here it comes: if the name is actually unicode, the replacement below + # will not work as the binsha is not part of the ascii unicode encoding - + # hence we must convert to an utf8 string for it to work properly. + # According to my tests, this is exactly what git does, that is it just + # takes the input literally, which appears to be utf8 on linux. + if isinstance(name, unicode): + name = name.encode("utf8") + write("%s %s\0%s" % (mode_str, name, binsha)) + # END for each item def tree_entries_from_data(data): - """Reads the binary representation of a tree and returns tuples of Tree items - :param data: data block with tree data - :return: list(tuple(binsha, mode, tree_relative_path), ...)""" - ord_zero = ord('0') - len_data = len(data) - i = 0 - out = list() - while i < len_data: - mode = 0 - - # read mode - # Some git versions truncate the leading 0, some don't - # The type will be extracted from the mode later - while data[i] != ' ': - # move existing mode integer up one level being 3 bits - # and add the actual ordinal value of the character - mode = (mode << 3) + (ord(data[i]) - ord_zero) - i += 1 - # END while reading mode - - # byte is space now, skip it - i += 1 - - # parse name, it is NULL separated - - ns = i - while data[i] != '\0': - i += 1 - # END while not reached NULL - - # default encoding for strings in git is utf8 - # Only use the respective unicode object if the byte stream was encoded - name = data[ns:i] - name_enc = name.decode("utf-8") - if len(name) > len(name_enc): - name = name_enc - # END handle encoding - - # byte is NULL, get next 20 - i += 1 - sha = data[i:i+20] - i = i + 20 - out.append((sha, mode, name)) - # END for each byte in data stream - return out - - + """Reads the binary representation of a tree and returns tuples of Tree items + :param data: data block with tree data + :return: list(tuple(binsha, mode, tree_relative_path), ...)""" + ord_zero = ord('0') + len_data = len(data) + i = 0 + out = list() + while i < len_data: + mode = 0 + + # read mode + # Some git versions truncate the leading 0, some don't + # The type will be extracted from the mode later + while data[i] != ' ': + # move existing mode integer up one level being 3 bits + # and add the actual ordinal value of the character + mode = (mode << 3) + (ord(data[i]) - ord_zero) + i += 1 + # END while reading mode + + # byte is space now, skip it + i += 1 + + # parse name, it is NULL separated + + ns = i + while data[i] != '\0': + i += 1 + # END while not reached NULL + + # default encoding for strings in git is utf8 + # Only use the respective unicode object if the byte stream was encoded + name = data[ns:i] + name_enc = name.decode("utf-8") + if len(name) > len(name_enc): + name = name_enc + # END handle encoding + + # byte is NULL, get next 20 + i += 1 + sha = data[i:i+20] + i = i + 20 + out.append((sha, mode, name)) + # END for each byte in data stream + return out + + def _find_by_name(tree_data, name, is_dir, start_at): - """return data entry matching the given name and tree mode - or None. - Before the item is returned, the respective data item is set - None in the tree_data list to mark it done""" - try: - item = tree_data[start_at] - if item and item[2] == name and S_ISDIR(item[1]) == is_dir: - tree_data[start_at] = None - return item - except IndexError: - pass - # END exception handling - for index, item in enumerate(tree_data): - if item and item[2] == name and S_ISDIR(item[1]) == is_dir: - tree_data[index] = None - return item - # END if item matches - # END for each item - return None + """return data entry matching the given name and tree mode + or None. + Before the item is returned, the respective data item is set + None in the tree_data list to mark it done""" + try: + item = tree_data[start_at] + if item and item[2] == name and S_ISDIR(item[1]) == is_dir: + tree_data[start_at] = None + return item + except IndexError: + pass + # END exception handling + for index, item in enumerate(tree_data): + if item and item[2] == name and S_ISDIR(item[1]) == is_dir: + tree_data[index] = None + return item + # END if item matches + # END for each item + return None def _to_full_path(item, path_prefix): - """Rebuild entry with given path prefix""" - if not item: - return item - return (item[0], item[1], path_prefix+item[2]) - + """Rebuild entry with given path prefix""" + if not item: + return item + return (item[0], item[1], path_prefix+item[2]) + def traverse_trees_recursive(odb, tree_shas, path_prefix): - """ - :return: list with entries according to the given binary tree-shas. - The result is encoded in a list - of n tuple|None per blob/commit, (n == len(tree_shas)), where - * [0] == 20 byte sha - * [1] == mode as int - * [2] == path relative to working tree root - The entry tuple is None if the respective blob/commit did not - exist in the given tree. - :param tree_shas: iterable of shas pointing to trees. All trees must - be on the same level. A tree-sha may be None in which case None - :param path_prefix: a prefix to be added to the returned paths on this level, - set it '' for the first iteration - :note: The ordering of the returned items will be partially lost""" - trees_data = list() - nt = len(tree_shas) - for tree_sha in tree_shas: - if tree_sha is None: - data = list() - else: - data = tree_entries_from_data(odb.stream(tree_sha).read()) - # END handle muted trees - trees_data.append(data) - # END for each sha to get data for - - out = list() - out_append = out.append - - # find all matching entries and recursively process them together if the match - # is a tree. If the match is a non-tree item, put it into the result. - # Processed items will be set None - for ti, tree_data in enumerate(trees_data): - for ii, item in enumerate(tree_data): - if not item: - continue - # END skip already done items - entries = [ None for n in range(nt) ] - entries[ti] = item - sha, mode, name = item # its faster to unpack - is_dir = S_ISDIR(mode) # type mode bits - - # find this item in all other tree data items - # wrap around, but stop one before our current index, hence - # ti+nt, not ti+1+nt - for tio in range(ti+1, ti+nt): - tio = tio % nt - entries[tio] = _find_by_name(trees_data[tio], name, is_dir, ii) - # END for each other item data - - # if we are a directory, enter recursion - if is_dir: - out.extend(traverse_trees_recursive(odb, [((ei and ei[0]) or None) for ei in entries], path_prefix+name+'/')) - else: - out_append(tuple(_to_full_path(e, path_prefix) for e in entries)) - # END handle recursion - - # finally mark it done - tree_data[ii] = None - # END for each item - - # we are done with one tree, set all its data empty - del(tree_data[:]) - # END for each tree_data chunk - return out - + """ + :return: list with entries according to the given binary tree-shas. + The result is encoded in a list + of n tuple|None per blob/commit, (n == len(tree_shas)), where + * [0] == 20 byte sha + * [1] == mode as int + * [2] == path relative to working tree root + The entry tuple is None if the respective blob/commit did not + exist in the given tree. + :param tree_shas: iterable of shas pointing to trees. All trees must + be on the same level. A tree-sha may be None in which case None + :param path_prefix: a prefix to be added to the returned paths on this level, + set it '' for the first iteration + :note: The ordering of the returned items will be partially lost""" + trees_data = list() + nt = len(tree_shas) + for tree_sha in tree_shas: + if tree_sha is None: + data = list() + else: + data = tree_entries_from_data(odb.stream(tree_sha).read()) + # END handle muted trees + trees_data.append(data) + # END for each sha to get data for + + out = list() + out_append = out.append + + # find all matching entries and recursively process them together if the match + # is a tree. If the match is a non-tree item, put it into the result. + # Processed items will be set None + for ti, tree_data in enumerate(trees_data): + for ii, item in enumerate(tree_data): + if not item: + continue + # END skip already done items + entries = [ None for n in range(nt) ] + entries[ti] = item + sha, mode, name = item # its faster to unpack + is_dir = S_ISDIR(mode) # type mode bits + + # find this item in all other tree data items + # wrap around, but stop one before our current index, hence + # ti+nt, not ti+1+nt + for tio in range(ti+1, ti+nt): + tio = tio % nt + entries[tio] = _find_by_name(trees_data[tio], name, is_dir, ii) + # END for each other item data + + # if we are a directory, enter recursion + if is_dir: + out.extend(traverse_trees_recursive(odb, [((ei and ei[0]) or None) for ei in entries], path_prefix+name+'/')) + else: + out_append(tuple(_to_full_path(e, path_prefix) for e in entries)) + # END handle recursion + + # finally mark it done + tree_data[ii] = None + # END for each item + + # we are done with one tree, set all its data empty + del(tree_data[:]) + # END for each tree_data chunk + return out + def traverse_tree_recursive(odb, tree_sha, path_prefix): - """ - :return: list of entries of the tree pointed to by the binary tree_sha. An entry - has the following format: - * [0] 20 byte sha - * [1] mode as int - * [2] path relative to the repository - :param path_prefix: prefix to prepend to the front of all returned paths""" - entries = list() - data = tree_entries_from_data(odb.stream(tree_sha).read()) - - # unpacking/packing is faster than accessing individual items - for sha, mode, name in data: - if S_ISDIR(mode): - entries.extend(traverse_tree_recursive(odb, sha, path_prefix+name+'/')) - else: - entries.append((sha, mode, path_prefix+name)) - # END for each item - - return entries + """ + :return: list of entries of the tree pointed to by the binary tree_sha. An entry + has the following format: + * [0] 20 byte sha + * [1] mode as int + * [2] path relative to the repository + :param path_prefix: prefix to prepend to the front of all returned paths""" + entries = list() + data = tree_entries_from_data(odb.stream(tree_sha).read()) + + # unpacking/packing is faster than accessing individual items + for sha, mode, name in data: + if S_ISDIR(mode): + entries.extend(traverse_tree_recursive(odb, sha, path_prefix+name+'/')) + else: + entries.append((sha, mode, path_prefix+name)) + # END for each item + + return entries diff --git a/git/objects/submodule/base.py b/git/objects/submodule/base.py index 45b24a0d..f7dc1597 100644 --- a/git/objects/submodule/base.py +++ b/git/objects/submodule/base.py @@ -1,27 +1,27 @@ import util from util import ( - mkhead, - sm_name, - sm_section, - unbare_repo, - SubmoduleConfigParser, - find_first_remote_branch - ) + mkhead, + sm_name, + sm_section, + unbare_repo, + SubmoduleConfigParser, + find_first_remote_branch + ) from git.objects.util import Traversable -from StringIO import StringIO # need a dict to set bloody .name field +from StringIO import StringIO # need a dict to set bloody .name field from git.util import ( - Iterable, - join_path_native, - to_native_path_linux, - RemoteProgress, - rmtree - ) + Iterable, + join_path_native, + to_native_path_linux, + RemoteProgress, + rmtree + ) from git.config import SectionConstraint from git.exc import ( - InvalidGitRepositoryError, - NoSuchPathError - ) + InvalidGitRepositoryError, + NoSuchPathError + ) import stat import git @@ -34,14 +34,14 @@ __all__ = ["Submodule", "UpdateProgress"] class UpdateProgress(RemoteProgress): - """Class providing detailed progress information to the caller who should - derive from it and implement the ``update(...)`` message""" - CLONE, FETCH, UPDWKTREE = [1 << x for x in range(RemoteProgress._num_op_codes, RemoteProgress._num_op_codes+3)] - _num_op_codes = RemoteProgress._num_op_codes + 3 - - __slots__ = tuple() - - + """Class providing detailed progress information to the caller who should + derive from it and implement the ``update(...)`` message""" + CLONE, FETCH, UPDWKTREE = [1 << x for x in range(RemoteProgress._num_op_codes, RemoteProgress._num_op_codes+3)] + _num_op_codes = RemoteProgress._num_op_codes + 3 + + __slots__ = tuple() + + BEGIN = UpdateProgress.BEGIN END = UpdateProgress.END CLONE = UpdateProgress.CLONE @@ -53,873 +53,873 @@ UPDWKTREE = UpdateProgress.UPDWKTREE # mechanism which cause plenty of trouble of the only reason for packages and # modules is refactoring - subpackages shoudn't depend on parent packages class Submodule(util.IndexObject, Iterable, Traversable): - """Implements access to a git submodule. They are special in that their sha - represents a commit in the submodule's repository which is to be checked out - at the path of this instance. - The submodule type does not have a string type associated with it, as it exists - solely as a marker in the tree and index. - - All methods work in bare and non-bare repositories.""" - - _id_attribute_ = "name" - k_modules_file = '.gitmodules' - k_head_option = 'branch' - k_head_default = 'master' - k_default_mode = stat.S_IFDIR | stat.S_IFLNK # submodules are directories with link-status - - # this is a bogus type for base class compatability - type = 'submodule' - - __slots__ = ('_parent_commit', '_url', '_branch_path', '_name', '__weakref__') - _cache_attrs = ('path', '_url', '_branch_path') - - def __init__(self, repo, binsha, mode=None, path=None, name = None, parent_commit=None, url=None, branch_path=None): - """Initialize this instance with its attributes. We only document the ones - that differ from ``IndexObject`` - - :param repo: Our parent repository - :param binsha: binary sha referring to a commit in the remote repository, see url parameter - :param parent_commit: see set_parent_commit() - :param url: The url to the remote repository which is the submodule - :param branch_path: full (relative) path to ref to checkout when cloning the remote repository""" - super(Submodule, self).__init__(repo, binsha, mode, path) - self.size = 0 - if parent_commit is not None: - self._parent_commit = parent_commit - if url is not None: - self._url = url - if branch_path is not None: - assert isinstance(branch_path, basestring) - self._branch_path = branch_path - if name is not None: - self._name = name - - def _set_cache_(self, attr): - if attr == '_parent_commit': - # set a default value, which is the root tree of the current head - self._parent_commit = self.repo.commit() - elif attr in ('path', '_url', '_branch_path'): - reader = self.config_reader() - # default submodule values - self.path = reader.get_value('path') - self._url = reader.get_value('url') - # git-python extension values - optional - self._branch_path = reader.get_value(self.k_head_option, git.Head.to_full_path(self.k_head_default)) - elif attr == '_name': - raise AttributeError("Cannot retrieve the name of a submodule if it was not set initially") - else: - super(Submodule, self)._set_cache_(attr) - # END handle attribute name - - def _get_intermediate_items(self, item): - """:return: all the submodules of our module repository""" - try: - return type(self).list_items(item.module()) - except InvalidGitRepositoryError: - return list() - # END handle intermeditate items - - def __eq__(self, other): - """Compare with another submodule""" - # we may only compare by name as this should be the ID they are hashed with - # Otherwise this type wouldn't be hashable - # return self.path == other.path and self.url == other.url and super(Submodule, self).__eq__(other) - return self._name == other._name - - def __ne__(self, other): - """Compare with another submodule for inequality""" - return not (self == other) - - def __hash__(self): - """Hash this instance using its logical id, not the sha""" - return hash(self._name) - - def __str__(self): - return self._name - - def __repr__(self): - return "git.%s(name=%s, path=%s, url=%s, branch_path=%s)" % (type(self).__name__, self._name, self.path, self.url, self.branch_path) - - @classmethod - def _config_parser(cls, repo, parent_commit, read_only): - """:return: Config Parser constrained to our submodule in read or write mode - :raise IOError: If the .gitmodules file cannot be found, either locally or in the repository - at the given parent commit. Otherwise the exception would be delayed until the first - access of the config parser""" - parent_matches_head = repo.head.commit == parent_commit - if not repo.bare and parent_matches_head: - fp_module = cls.k_modules_file - fp_module_path = os.path.join(repo.working_tree_dir, fp_module) - if not os.path.isfile(fp_module_path): - raise IOError("%s file was not accessible" % fp_module_path) - # END handle existance - fp_module = fp_module_path - else: - try: - fp_module = cls._sio_modules(parent_commit) - except KeyError: - raise IOError("Could not find %s file in the tree of parent commit %s" % (cls.k_modules_file, parent_commit)) - # END handle exceptions - # END handle non-bare working tree - - if not read_only and (repo.bare or not parent_matches_head): - raise ValueError("Cannot write blobs of 'historical' submodule configurations") - # END handle writes of historical submodules - - return SubmoduleConfigParser(fp_module, read_only = read_only) + """Implements access to a git submodule. They are special in that their sha + represents a commit in the submodule's repository which is to be checked out + at the path of this instance. + The submodule type does not have a string type associated with it, as it exists + solely as a marker in the tree and index. + + All methods work in bare and non-bare repositories.""" + + _id_attribute_ = "name" + k_modules_file = '.gitmodules' + k_head_option = 'branch' + k_head_default = 'master' + k_default_mode = stat.S_IFDIR | stat.S_IFLNK # submodules are directories with link-status + + # this is a bogus type for base class compatability + type = 'submodule' + + __slots__ = ('_parent_commit', '_url', '_branch_path', '_name', '__weakref__') + _cache_attrs = ('path', '_url', '_branch_path') + + def __init__(self, repo, binsha, mode=None, path=None, name = None, parent_commit=None, url=None, branch_path=None): + """Initialize this instance with its attributes. We only document the ones + that differ from ``IndexObject`` + + :param repo: Our parent repository + :param binsha: binary sha referring to a commit in the remote repository, see url parameter + :param parent_commit: see set_parent_commit() + :param url: The url to the remote repository which is the submodule + :param branch_path: full (relative) path to ref to checkout when cloning the remote repository""" + super(Submodule, self).__init__(repo, binsha, mode, path) + self.size = 0 + if parent_commit is not None: + self._parent_commit = parent_commit + if url is not None: + self._url = url + if branch_path is not None: + assert isinstance(branch_path, basestring) + self._branch_path = branch_path + if name is not None: + self._name = name + + def _set_cache_(self, attr): + if attr == '_parent_commit': + # set a default value, which is the root tree of the current head + self._parent_commit = self.repo.commit() + elif attr in ('path', '_url', '_branch_path'): + reader = self.config_reader() + # default submodule values + self.path = reader.get_value('path') + self._url = reader.get_value('url') + # git-python extension values - optional + self._branch_path = reader.get_value(self.k_head_option, git.Head.to_full_path(self.k_head_default)) + elif attr == '_name': + raise AttributeError("Cannot retrieve the name of a submodule if it was not set initially") + else: + super(Submodule, self)._set_cache_(attr) + # END handle attribute name + + def _get_intermediate_items(self, item): + """:return: all the submodules of our module repository""" + try: + return type(self).list_items(item.module()) + except InvalidGitRepositoryError: + return list() + # END handle intermeditate items + + def __eq__(self, other): + """Compare with another submodule""" + # we may only compare by name as this should be the ID they are hashed with + # Otherwise this type wouldn't be hashable + # return self.path == other.path and self.url == other.url and super(Submodule, self).__eq__(other) + return self._name == other._name + + def __ne__(self, other): + """Compare with another submodule for inequality""" + return not (self == other) + + def __hash__(self): + """Hash this instance using its logical id, not the sha""" + return hash(self._name) + + def __str__(self): + return self._name + + def __repr__(self): + return "git.%s(name=%s, path=%s, url=%s, branch_path=%s)" % (type(self).__name__, self._name, self.path, self.url, self.branch_path) + + @classmethod + def _config_parser(cls, repo, parent_commit, read_only): + """:return: Config Parser constrained to our submodule in read or write mode + :raise IOError: If the .gitmodules file cannot be found, either locally or in the repository + at the given parent commit. Otherwise the exception would be delayed until the first + access of the config parser""" + parent_matches_head = repo.head.commit == parent_commit + if not repo.bare and parent_matches_head: + fp_module = cls.k_modules_file + fp_module_path = os.path.join(repo.working_tree_dir, fp_module) + if not os.path.isfile(fp_module_path): + raise IOError("%s file was not accessible" % fp_module_path) + # END handle existance + fp_module = fp_module_path + else: + try: + fp_module = cls._sio_modules(parent_commit) + except KeyError: + raise IOError("Could not find %s file in the tree of parent commit %s" % (cls.k_modules_file, parent_commit)) + # END handle exceptions + # END handle non-bare working tree + + if not read_only and (repo.bare or not parent_matches_head): + raise ValueError("Cannot write blobs of 'historical' submodule configurations") + # END handle writes of historical submodules + + return SubmoduleConfigParser(fp_module, read_only = read_only) - def _clear_cache(self): - # clear the possibly changed values - for name in self._cache_attrs: - try: - delattr(self, name) - except AttributeError: - pass - # END try attr deletion - # END for each name to delete - - @classmethod - def _sio_modules(cls, parent_commit): - """:return: Configuration file as StringIO - we only access it through the respective blob's data""" - sio = StringIO(parent_commit.tree[cls.k_modules_file].data_stream.read()) - sio.name = cls.k_modules_file - return sio - - def _config_parser_constrained(self, read_only): - """:return: Config Parser constrained to our submodule in read or write mode""" - parser = self._config_parser(self.repo, self._parent_commit, read_only) - parser.set_submodule(self) - return SectionConstraint(parser, sm_section(self.name)) - - #{ Edit Interface - - @classmethod - def add(cls, repo, name, path, url=None, branch=None, no_checkout=False): - """Add a new submodule to the given repository. This will alter the index - as well as the .gitmodules file, but will not create a new commit. - If the submodule already exists, no matter if the configuration differs - from the one provided, the existing submodule will be returned. - - :param repo: Repository instance which should receive the submodule - :param name: The name/identifier for the submodule - :param path: repository-relative or absolute path at which the submodule - should be located - It will be created as required during the repository initialization. - :param url: git-clone compatible URL, see git-clone reference for more information - If None, the repository is assumed to exist, and the url of the first - remote is taken instead. This is useful if you want to make an existing - repository a submodule of anotherone. - :param branch: name of branch at which the submodule should (later) be checked out. - The given branch must exist in the remote repository, and will be checked - out locally as a tracking branch. - It will only be written into the configuration if it not None, which is - when the checked out branch will be the one the remote HEAD pointed to. - The result you get in these situation is somewhat fuzzy, and it is recommended - to specify at least 'master' here. - Examples are 'master' or 'feature/new' - :param no_checkout: if True, and if the repository has to be cloned manually, - no checkout will be performed - :return: The newly created submodule instance - :note: works atomically, such that no change will be done if the repository - update fails for instance""" - if repo.bare: - raise InvalidGitRepositoryError("Cannot add submodules to bare repositories") - # END handle bare repos - - path = to_native_path_linux(path) - if path.endswith('/'): - path = path[:-1] - # END handle trailing slash - - # assure we never put backslashes into the url, as some operating systems - # like it ... - if url != None: - url = to_native_path_linux(url) - #END assure url correctness - - # INSTANTIATE INTERMEDIATE SM - sm = cls(repo, cls.NULL_BIN_SHA, cls.k_default_mode, path, name) - if sm.exists(): - # reretrieve submodule from tree - try: - return repo.head.commit.tree[path] - except KeyError: - # could only be in index - index = repo.index - entry = index.entries[index.entry_key(path, 0)] - sm.binsha = entry.binsha - return sm - # END handle exceptions - # END handle existing - - # fake-repo - we only need the functionality on the branch instance - br = git.Head(repo, git.Head.to_full_path(str(branch) or cls.k_head_default)) - has_module = sm.module_exists() - branch_is_default = branch is None - if has_module and url is not None: - if url not in [r.url for r in sm.module().remotes]: - raise ValueError("Specified URL '%s' does not match any remote url of the repository at '%s'" % (url, sm.abspath)) - # END check url - # END verify urls match - - mrepo = None - if url is None: - if not has_module: - raise ValueError("A URL was not given and existing repository did not exsit at %s" % path) - # END check url - mrepo = sm.module() - urls = [r.url for r in mrepo.remotes] - if not urls: - raise ValueError("Didn't find any remote url in repository at %s" % sm.abspath) - # END verify we have url - url = urls[0] - else: - # clone new repo - kwargs = {'n' : no_checkout} - if not branch_is_default: - kwargs['b'] = br.name - # END setup checkout-branch - mrepo = git.Repo.clone_from(url, path, **kwargs) - # END verify url - - # update configuration and index - index = sm.repo.index - writer = sm.config_writer(index=index, write=False) - writer.set_value('url', url) - writer.set_value('path', path) - - sm._url = url - if not branch_is_default: - # store full path - writer.set_value(cls.k_head_option, br.path) - sm._branch_path = br.path - # END handle path - del(writer) - - # we deliberatly assume that our head matches our index ! - pcommit = repo.head.commit - sm._parent_commit = pcommit - sm.binsha = mrepo.head.commit.binsha - index.add([sm], write=True) - - return sm - - def update(self, recursive=False, init=True, to_latest_revision=False, progress=None, - dry_run=False): - """Update the repository of this submodule to point to the checkout - we point at with the binsha of this instance. - - :param recursive: if True, we will operate recursively and update child- - modules as well. - :param init: if True, the module repository will be cloned into place if necessary - :param to_latest_revision: if True, the submodule's sha will be ignored during checkout. - Instead, the remote will be fetched, and the local tracking branch updated. - This only works if we have a local tracking branch, which is the case - if the remote repository had a master branch, or of the 'branch' option - was specified for this submodule and the branch existed remotely - :param progress: UpdateProgress instance or None of no progress should be shown - :param dry_run: if True, the operation will only be simulated, but not performed. - All performed operations are read-only - :note: does nothing in bare repositories - :note: method is definitely not atomic if recurisve is True - :return: self""" - if self.repo.bare: - return self - #END pass in bare mode - - if progress is None: - progress = UpdateProgress() - #END handle progress - prefix = '' - if dry_run: - prefix = "DRY-RUN: " - #END handle prefix - - # to keep things plausible in dry-run mode - if dry_run: - mrepo = None - #END init mrepo - - # ASSURE REPO IS PRESENT AND UPTODATE - ##################################### - try: - mrepo = self.module() - rmts = mrepo.remotes - len_rmts = len(rmts) - for i, remote in enumerate(rmts): - op = FETCH - if i == 0: - op |= BEGIN - #END handle start - - progress.update(op, i, len_rmts, prefix+"Fetching remote %s of submodule %r" % (remote, self.name)) - #=============================== - if not dry_run: - remote.fetch(progress=progress) - #END handle dry-run - #=============================== - if i == len_rmts-1: - op |= END - #END handle end - progress.update(op, i, len_rmts, prefix+"Done fetching remote of submodule %r" % self.name) - #END fetch new data - except InvalidGitRepositoryError: - if not init: - return self - # END early abort if init is not allowed - import git - - # there is no git-repository yet - but delete empty paths - module_path = join_path_native(self.repo.working_tree_dir, self.path) - if not dry_run and os.path.isdir(module_path): - try: - os.rmdir(module_path) - except OSError: - raise OSError("Module directory at %r does already exist and is non-empty" % module_path) - # END handle OSError - # END handle directory removal - - # don't check it out at first - nonetheless it will create a local - # branch according to the remote-HEAD if possible - progress.update(BEGIN|CLONE, 0, 1, prefix+"Cloning %s to %s in submodule %r" % (self.url, module_path, self.name)) - if not dry_run: - mrepo = git.Repo.clone_from(self.url, module_path, n=True) - #END handle dry-run - progress.update(END|CLONE, 0, 1, prefix+"Done cloning to %s" % module_path) - - - if not dry_run: - # see whether we have a valid branch to checkout - try: - # find a remote which has our branch - we try to be flexible - remote_branch = find_first_remote_branch(mrepo.remotes, self.branch_name) - local_branch = mkhead(mrepo, self.branch_path) - - # have a valid branch, but no checkout - make sure we can figure - # that out by marking the commit with a null_sha - local_branch.set_object(util.Object(mrepo, self.NULL_BIN_SHA)) - # END initial checkout + branch creation - - # make sure HEAD is not detached - mrepo.head.set_reference(local_branch, logmsg="submodule: attaching head to %s" % local_branch) - mrepo.head.ref.set_tracking_branch(remote_branch) - except IndexError: - print >> sys.stderr, "Warning: Failed to checkout tracking branch %s" % self.branch_path - #END handle tracking branch - - # NOTE: Have to write the repo config file as well, otherwise - # the default implementation will be offended and not update the repository - # Maybe this is a good way to assure it doesn't get into our way, but - # we want to stay backwards compatible too ... . Its so redundant ! - self.repo.config_writer().set_value(sm_section(self.name), 'url', self.url) - #END handle dry_run - #END handle initalization - - - # DETERMINE SHAS TO CHECKOUT - ############################ - binsha = self.binsha - hexsha = self.hexsha - if mrepo is not None: - # mrepo is only set if we are not in dry-run mode or if the module existed - is_detached = mrepo.head.is_detached - #END handle dry_run - - if mrepo is not None and to_latest_revision: - msg_base = "Cannot update to latest revision in repository at %r as " % mrepo.working_dir - if not is_detached: - rref = mrepo.head.ref.tracking_branch() - if rref is not None: - rcommit = rref.commit - binsha = rcommit.binsha - hexsha = rcommit.hexsha - else: - print >> sys.stderr, "%s a tracking branch was not set for local branch '%s'" % (msg_base, mrepo.head.ref) - # END handle remote ref - else: - print >> sys.stderr, "%s there was no local tracking branch" % msg_base - # END handle detached head - # END handle to_latest_revision option - - # update the working tree - # handles dry_run - if mrepo is not None and mrepo.head.commit.binsha != binsha: - progress.update(BEGIN|UPDWKTREE, 0, 1, prefix+"Updating working tree at %s for submodule %r to revision %s" % (self.path, self.name, hexsha)) - if not dry_run: - if is_detached: - # NOTE: for now we force, the user is no supposed to change detached - # submodules anyway. Maybe at some point this becomes an option, to - # properly handle user modifications - see below for future options - # regarding rebase and merge. - mrepo.git.checkout(hexsha, force=True) - else: - # TODO: allow to specify a rebase, merge, or reset - # TODO: Warn if the hexsha forces the tracking branch off the remote - # branch - this should be prevented when setting the branch option - mrepo.head.reset(hexsha, index=True, working_tree=True) - # END handle checkout - #END handle dry_run - progress.update(END|UPDWKTREE, 0, 1, prefix+"Done updating working tree for submodule %r" % self.name) - # END update to new commit only if needed - - # HANDLE RECURSION - ################## - if recursive: - # in dry_run mode, the module might not exist - if mrepo is not None: - for submodule in self.iter_items(self.module()): - submodule.update(recursive, init, to_latest_revision, progress=progress, dry_run=dry_run) - # END handle recursive update - #END handle dry run - # END for each submodule - - return self - - @unbare_repo - def move(self, module_path, configuration=True, module=True): - """Move the submodule to a another module path. This involves physically moving - the repository at our current path, changing the configuration, as well as - adjusting our index entry accordingly. - - :param module_path: the path to which to move our module, given as - repository-relative path. Intermediate directories will be created - accordingly. If the path already exists, it must be empty. - Trailling (back)slashes are removed automatically - :param configuration: if True, the configuration will be adjusted to let - the submodule point to the given path. - :param module: if True, the repository managed by this submodule - will be moved, not the configuration. This will effectively - leave your repository in an inconsistent state unless the configuration - and index already point to the target location. - :return: self - :raise ValueError: if the module path existed and was not empty, or was a file - :note: Currently the method is not atomic, and it could leave the repository - in an inconsistent state if a sub-step fails for some reason - """ - if module + configuration < 1: - raise ValueError("You must specify to move at least the module or the configuration of the submodule") - #END handle input - - module_path = to_native_path_linux(module_path) - if module_path.endswith('/'): - module_path = module_path[:-1] - # END handle trailing slash - - # VERIFY DESTINATION - if module_path == self.path: - return self - #END handle no change - - dest_path = join_path_native(self.repo.working_tree_dir, module_path) - if os.path.isfile(dest_path): - raise ValueError("Cannot move repository onto a file: %s" % dest_path) - # END handle target files - - index = self.repo.index - tekey = index.entry_key(module_path, 0) - # if the target item already exists, fail - if configuration and tekey in index.entries: - raise ValueError("Index entry for target path did alredy exist") - #END handle index key already there - - # remove existing destination - if module: - if os.path.exists(dest_path): - if len(os.listdir(dest_path)): - raise ValueError("Destination module directory was not empty") - #END handle non-emptyness - - if os.path.islink(dest_path): - os.remove(dest_path) - else: - os.rmdir(dest_path) - #END handle link - else: - # recreate parent directories - # NOTE: renames() does that now - pass - #END handle existance - # END handle module - - # move the module into place if possible - cur_path = self.abspath - renamed_module = False - if module and os.path.exists(cur_path): - os.renames(cur_path, dest_path) - renamed_module = True - #END move physical module - - - # rename the index entry - have to manipulate the index directly as - # git-mv cannot be used on submodules ... yeah - try: - if configuration: - try: - ekey = index.entry_key(self.path, 0) - entry = index.entries[ekey] - del(index.entries[ekey]) - nentry = git.IndexEntry(entry[:3]+(module_path,)+entry[4:]) - index.entries[tekey] = nentry - except KeyError: - raise InvalidGitRepositoryError("Submodule's entry at %r did not exist" % (self.path)) - #END handle submodule doesn't exist - - # update configuration - writer = self.config_writer(index=index) # auto-write - writer.set_value('path', module_path) - self.path = module_path - del(writer) - # END handle configuration flag - except Exception: - if renamed_module: - os.renames(dest_path, cur_path) - # END undo module renaming - raise - #END handle undo rename - - return self - - @unbare_repo - def remove(self, module=True, force=False, configuration=True, dry_run=False): - """Remove this submodule from the repository. This will remove our entry - from the .gitmodules file and the entry in the .git/config file. - - :param module: If True, the module we point to will be deleted - as well. If the module is currently on a commit which is not part - of any branch in the remote, if the currently checked out branch - working tree, or untracked files, - is ahead of its tracking branch, if you have modifications in the - In case the removal of the repository fails for these reasons, the - submodule status will not have been altered. - If this submodule has child-modules on its own, these will be deleted - prior to touching the own module. - :param force: Enforces the deletion of the module even though it contains - modifications. This basically enforces a brute-force file system based - deletion. - :param configuration: if True, the submodule is deleted from the configuration, - otherwise it isn't. Although this should be enabled most of the times, - this flag enables you to safely delete the repository of your submodule. - :param dry_run: if True, we will not actually do anything, but throw the errors - we would usually throw - :return: self - :note: doesn't work in bare repositories - :raise InvalidGitRepositoryError: thrown if the repository cannot be deleted - :raise OSError: if directories or files could not be removed""" - if not (module + configuration): - raise ValueError("Need to specify to delete at least the module, or the configuration") - # END handle params - - # DELETE MODULE REPOSITORY - ########################## - if module and self.module_exists(): - if force: - # take the fast lane and just delete everything in our module path - # TODO: If we run into permission problems, we have a highly inconsistent - # state. Delete the .git folders last, start with the submodules first - mp = self.abspath - method = None - if os.path.islink(mp): - method = os.remove - elif os.path.isdir(mp): - method = rmtree - elif os.path.exists(mp): - raise AssertionError("Cannot forcibly delete repository as it was neither a link, nor a directory") - #END handle brutal deletion - if not dry_run: - assert method - method(mp) - #END apply deletion method - else: - # verify we may delete our module - mod = self.module() - if mod.is_dirty(untracked_files=True): - raise InvalidGitRepositoryError("Cannot delete module at %s with any modifications, unless force is specified" % mod.working_tree_dir) - # END check for dirt - - # figure out whether we have new commits compared to the remotes - # NOTE: If the user pulled all the time, the remote heads might - # not have been updated, so commits coming from the remote look - # as if they come from us. But we stay strictly read-only and - # don't fetch beforhand. - for remote in mod.remotes: - num_branches_with_new_commits = 0 - rrefs = remote.refs - for rref in rrefs: - num_branches_with_new_commits = len(mod.git.cherry(rref)) != 0 - # END for each remote ref - # not a single remote branch contained all our commits - if num_branches_with_new_commits == len(rrefs): - raise InvalidGitRepositoryError("Cannot delete module at %s as there are new commits" % mod.working_tree_dir) - # END handle new commits - # have to manually delete references as python's scoping is - # not existing, they could keep handles open ( on windows this is a problem ) - if len(rrefs): - del(rref) - #END handle remotes - del(rrefs) - del(remote) - # END for each remote - - # gently remove all submodule repositories - for sm in self.children(): - sm.remove(module=True, force=False, configuration=False, dry_run=dry_run) - del(sm) - # END for each child-submodule - - # finally delete our own submodule - if not dry_run: - wtd = mod.working_tree_dir - del(mod) # release file-handles (windows) - rmtree(wtd) - # END delete tree if possible - # END handle force - # END handle module deletion - - # DELETE CONFIGURATION - ###################### - if configuration and not dry_run: - # first the index-entry - index = self.repo.index - try: - del(index.entries[index.entry_key(self.path, 0)]) - except KeyError: - pass - #END delete entry - index.write() - - # now git config - need the config intact, otherwise we can't query - # inforamtion anymore - self.repo.config_writer().remove_section(sm_section(self.name)) - self.config_writer().remove_section() - # END delete configuration + def _clear_cache(self): + # clear the possibly changed values + for name in self._cache_attrs: + try: + delattr(self, name) + except AttributeError: + pass + # END try attr deletion + # END for each name to delete + + @classmethod + def _sio_modules(cls, parent_commit): + """:return: Configuration file as StringIO - we only access it through the respective blob's data""" + sio = StringIO(parent_commit.tree[cls.k_modules_file].data_stream.read()) + sio.name = cls.k_modules_file + return sio + + def _config_parser_constrained(self, read_only): + """:return: Config Parser constrained to our submodule in read or write mode""" + parser = self._config_parser(self.repo, self._parent_commit, read_only) + parser.set_submodule(self) + return SectionConstraint(parser, sm_section(self.name)) + + #{ Edit Interface + + @classmethod + def add(cls, repo, name, path, url=None, branch=None, no_checkout=False): + """Add a new submodule to the given repository. This will alter the index + as well as the .gitmodules file, but will not create a new commit. + If the submodule already exists, no matter if the configuration differs + from the one provided, the existing submodule will be returned. + + :param repo: Repository instance which should receive the submodule + :param name: The name/identifier for the submodule + :param path: repository-relative or absolute path at which the submodule + should be located + It will be created as required during the repository initialization. + :param url: git-clone compatible URL, see git-clone reference for more information + If None, the repository is assumed to exist, and the url of the first + remote is taken instead. This is useful if you want to make an existing + repository a submodule of anotherone. + :param branch: name of branch at which the submodule should (later) be checked out. + The given branch must exist in the remote repository, and will be checked + out locally as a tracking branch. + It will only be written into the configuration if it not None, which is + when the checked out branch will be the one the remote HEAD pointed to. + The result you get in these situation is somewhat fuzzy, and it is recommended + to specify at least 'master' here. + Examples are 'master' or 'feature/new' + :param no_checkout: if True, and if the repository has to be cloned manually, + no checkout will be performed + :return: The newly created submodule instance + :note: works atomically, such that no change will be done if the repository + update fails for instance""" + if repo.bare: + raise InvalidGitRepositoryError("Cannot add submodules to bare repositories") + # END handle bare repos + + path = to_native_path_linux(path) + if path.endswith('/'): + path = path[:-1] + # END handle trailing slash + + # assure we never put backslashes into the url, as some operating systems + # like it ... + if url != None: + url = to_native_path_linux(url) + #END assure url correctness + + # INSTANTIATE INTERMEDIATE SM + sm = cls(repo, cls.NULL_BIN_SHA, cls.k_default_mode, path, name) + if sm.exists(): + # reretrieve submodule from tree + try: + return repo.head.commit.tree[path] + except KeyError: + # could only be in index + index = repo.index + entry = index.entries[index.entry_key(path, 0)] + sm.binsha = entry.binsha + return sm + # END handle exceptions + # END handle existing + + # fake-repo - we only need the functionality on the branch instance + br = git.Head(repo, git.Head.to_full_path(str(branch) or cls.k_head_default)) + has_module = sm.module_exists() + branch_is_default = branch is None + if has_module and url is not None: + if url not in [r.url for r in sm.module().remotes]: + raise ValueError("Specified URL '%s' does not match any remote url of the repository at '%s'" % (url, sm.abspath)) + # END check url + # END verify urls match + + mrepo = None + if url is None: + if not has_module: + raise ValueError("A URL was not given and existing repository did not exsit at %s" % path) + # END check url + mrepo = sm.module() + urls = [r.url for r in mrepo.remotes] + if not urls: + raise ValueError("Didn't find any remote url in repository at %s" % sm.abspath) + # END verify we have url + url = urls[0] + else: + # clone new repo + kwargs = {'n' : no_checkout} + if not branch_is_default: + kwargs['b'] = br.name + # END setup checkout-branch + mrepo = git.Repo.clone_from(url, path, **kwargs) + # END verify url + + # update configuration and index + index = sm.repo.index + writer = sm.config_writer(index=index, write=False) + writer.set_value('url', url) + writer.set_value('path', path) + + sm._url = url + if not branch_is_default: + # store full path + writer.set_value(cls.k_head_option, br.path) + sm._branch_path = br.path + # END handle path + del(writer) + + # we deliberatly assume that our head matches our index ! + pcommit = repo.head.commit + sm._parent_commit = pcommit + sm.binsha = mrepo.head.commit.binsha + index.add([sm], write=True) + + return sm + + def update(self, recursive=False, init=True, to_latest_revision=False, progress=None, + dry_run=False): + """Update the repository of this submodule to point to the checkout + we point at with the binsha of this instance. + + :param recursive: if True, we will operate recursively and update child- + modules as well. + :param init: if True, the module repository will be cloned into place if necessary + :param to_latest_revision: if True, the submodule's sha will be ignored during checkout. + Instead, the remote will be fetched, and the local tracking branch updated. + This only works if we have a local tracking branch, which is the case + if the remote repository had a master branch, or of the 'branch' option + was specified for this submodule and the branch existed remotely + :param progress: UpdateProgress instance or None of no progress should be shown + :param dry_run: if True, the operation will only be simulated, but not performed. + All performed operations are read-only + :note: does nothing in bare repositories + :note: method is definitely not atomic if recurisve is True + :return: self""" + if self.repo.bare: + return self + #END pass in bare mode + + if progress is None: + progress = UpdateProgress() + #END handle progress + prefix = '' + if dry_run: + prefix = "DRY-RUN: " + #END handle prefix + + # to keep things plausible in dry-run mode + if dry_run: + mrepo = None + #END init mrepo + + # ASSURE REPO IS PRESENT AND UPTODATE + ##################################### + try: + mrepo = self.module() + rmts = mrepo.remotes + len_rmts = len(rmts) + for i, remote in enumerate(rmts): + op = FETCH + if i == 0: + op |= BEGIN + #END handle start + + progress.update(op, i, len_rmts, prefix+"Fetching remote %s of submodule %r" % (remote, self.name)) + #=============================== + if not dry_run: + remote.fetch(progress=progress) + #END handle dry-run + #=============================== + if i == len_rmts-1: + op |= END + #END handle end + progress.update(op, i, len_rmts, prefix+"Done fetching remote of submodule %r" % self.name) + #END fetch new data + except InvalidGitRepositoryError: + if not init: + return self + # END early abort if init is not allowed + import git + + # there is no git-repository yet - but delete empty paths + module_path = join_path_native(self.repo.working_tree_dir, self.path) + if not dry_run and os.path.isdir(module_path): + try: + os.rmdir(module_path) + except OSError: + raise OSError("Module directory at %r does already exist and is non-empty" % module_path) + # END handle OSError + # END handle directory removal + + # don't check it out at first - nonetheless it will create a local + # branch according to the remote-HEAD if possible + progress.update(BEGIN|CLONE, 0, 1, prefix+"Cloning %s to %s in submodule %r" % (self.url, module_path, self.name)) + if not dry_run: + mrepo = git.Repo.clone_from(self.url, module_path, n=True) + #END handle dry-run + progress.update(END|CLONE, 0, 1, prefix+"Done cloning to %s" % module_path) + + + if not dry_run: + # see whether we have a valid branch to checkout + try: + # find a remote which has our branch - we try to be flexible + remote_branch = find_first_remote_branch(mrepo.remotes, self.branch_name) + local_branch = mkhead(mrepo, self.branch_path) + + # have a valid branch, but no checkout - make sure we can figure + # that out by marking the commit with a null_sha + local_branch.set_object(util.Object(mrepo, self.NULL_BIN_SHA)) + # END initial checkout + branch creation + + # make sure HEAD is not detached + mrepo.head.set_reference(local_branch, logmsg="submodule: attaching head to %s" % local_branch) + mrepo.head.ref.set_tracking_branch(remote_branch) + except IndexError: + print >> sys.stderr, "Warning: Failed to checkout tracking branch %s" % self.branch_path + #END handle tracking branch + + # NOTE: Have to write the repo config file as well, otherwise + # the default implementation will be offended and not update the repository + # Maybe this is a good way to assure it doesn't get into our way, but + # we want to stay backwards compatible too ... . Its so redundant ! + self.repo.config_writer().set_value(sm_section(self.name), 'url', self.url) + #END handle dry_run + #END handle initalization + + + # DETERMINE SHAS TO CHECKOUT + ############################ + binsha = self.binsha + hexsha = self.hexsha + if mrepo is not None: + # mrepo is only set if we are not in dry-run mode or if the module existed + is_detached = mrepo.head.is_detached + #END handle dry_run + + if mrepo is not None and to_latest_revision: + msg_base = "Cannot update to latest revision in repository at %r as " % mrepo.working_dir + if not is_detached: + rref = mrepo.head.ref.tracking_branch() + if rref is not None: + rcommit = rref.commit + binsha = rcommit.binsha + hexsha = rcommit.hexsha + else: + print >> sys.stderr, "%s a tracking branch was not set for local branch '%s'" % (msg_base, mrepo.head.ref) + # END handle remote ref + else: + print >> sys.stderr, "%s there was no local tracking branch" % msg_base + # END handle detached head + # END handle to_latest_revision option + + # update the working tree + # handles dry_run + if mrepo is not None and mrepo.head.commit.binsha != binsha: + progress.update(BEGIN|UPDWKTREE, 0, 1, prefix+"Updating working tree at %s for submodule %r to revision %s" % (self.path, self.name, hexsha)) + if not dry_run: + if is_detached: + # NOTE: for now we force, the user is no supposed to change detached + # submodules anyway. Maybe at some point this becomes an option, to + # properly handle user modifications - see below for future options + # regarding rebase and merge. + mrepo.git.checkout(hexsha, force=True) + else: + # TODO: allow to specify a rebase, merge, or reset + # TODO: Warn if the hexsha forces the tracking branch off the remote + # branch - this should be prevented when setting the branch option + mrepo.head.reset(hexsha, index=True, working_tree=True) + # END handle checkout + #END handle dry_run + progress.update(END|UPDWKTREE, 0, 1, prefix+"Done updating working tree for submodule %r" % self.name) + # END update to new commit only if needed + + # HANDLE RECURSION + ################## + if recursive: + # in dry_run mode, the module might not exist + if mrepo is not None: + for submodule in self.iter_items(self.module()): + submodule.update(recursive, init, to_latest_revision, progress=progress, dry_run=dry_run) + # END handle recursive update + #END handle dry run + # END for each submodule + + return self + + @unbare_repo + def move(self, module_path, configuration=True, module=True): + """Move the submodule to a another module path. This involves physically moving + the repository at our current path, changing the configuration, as well as + adjusting our index entry accordingly. + + :param module_path: the path to which to move our module, given as + repository-relative path. Intermediate directories will be created + accordingly. If the path already exists, it must be empty. + Trailling (back)slashes are removed automatically + :param configuration: if True, the configuration will be adjusted to let + the submodule point to the given path. + :param module: if True, the repository managed by this submodule + will be moved, not the configuration. This will effectively + leave your repository in an inconsistent state unless the configuration + and index already point to the target location. + :return: self + :raise ValueError: if the module path existed and was not empty, or was a file + :note: Currently the method is not atomic, and it could leave the repository + in an inconsistent state if a sub-step fails for some reason + """ + if module + configuration < 1: + raise ValueError("You must specify to move at least the module or the configuration of the submodule") + #END handle input + + module_path = to_native_path_linux(module_path) + if module_path.endswith('/'): + module_path = module_path[:-1] + # END handle trailing slash + + # VERIFY DESTINATION + if module_path == self.path: + return self + #END handle no change + + dest_path = join_path_native(self.repo.working_tree_dir, module_path) + if os.path.isfile(dest_path): + raise ValueError("Cannot move repository onto a file: %s" % dest_path) + # END handle target files + + index = self.repo.index + tekey = index.entry_key(module_path, 0) + # if the target item already exists, fail + if configuration and tekey in index.entries: + raise ValueError("Index entry for target path did alredy exist") + #END handle index key already there + + # remove existing destination + if module: + if os.path.exists(dest_path): + if len(os.listdir(dest_path)): + raise ValueError("Destination module directory was not empty") + #END handle non-emptyness + + if os.path.islink(dest_path): + os.remove(dest_path) + else: + os.rmdir(dest_path) + #END handle link + else: + # recreate parent directories + # NOTE: renames() does that now + pass + #END handle existance + # END handle module + + # move the module into place if possible + cur_path = self.abspath + renamed_module = False + if module and os.path.exists(cur_path): + os.renames(cur_path, dest_path) + renamed_module = True + #END move physical module + + + # rename the index entry - have to manipulate the index directly as + # git-mv cannot be used on submodules ... yeah + try: + if configuration: + try: + ekey = index.entry_key(self.path, 0) + entry = index.entries[ekey] + del(index.entries[ekey]) + nentry = git.IndexEntry(entry[:3]+(module_path,)+entry[4:]) + index.entries[tekey] = nentry + except KeyError: + raise InvalidGitRepositoryError("Submodule's entry at %r did not exist" % (self.path)) + #END handle submodule doesn't exist + + # update configuration + writer = self.config_writer(index=index) # auto-write + writer.set_value('path', module_path) + self.path = module_path + del(writer) + # END handle configuration flag + except Exception: + if renamed_module: + os.renames(dest_path, cur_path) + # END undo module renaming + raise + #END handle undo rename + + return self + + @unbare_repo + def remove(self, module=True, force=False, configuration=True, dry_run=False): + """Remove this submodule from the repository. This will remove our entry + from the .gitmodules file and the entry in the .git/config file. + + :param module: If True, the module we point to will be deleted + as well. If the module is currently on a commit which is not part + of any branch in the remote, if the currently checked out branch + working tree, or untracked files, + is ahead of its tracking branch, if you have modifications in the + In case the removal of the repository fails for these reasons, the + submodule status will not have been altered. + If this submodule has child-modules on its own, these will be deleted + prior to touching the own module. + :param force: Enforces the deletion of the module even though it contains + modifications. This basically enforces a brute-force file system based + deletion. + :param configuration: if True, the submodule is deleted from the configuration, + otherwise it isn't. Although this should be enabled most of the times, + this flag enables you to safely delete the repository of your submodule. + :param dry_run: if True, we will not actually do anything, but throw the errors + we would usually throw + :return: self + :note: doesn't work in bare repositories + :raise InvalidGitRepositoryError: thrown if the repository cannot be deleted + :raise OSError: if directories or files could not be removed""" + if not (module + configuration): + raise ValueError("Need to specify to delete at least the module, or the configuration") + # END handle params + + # DELETE MODULE REPOSITORY + ########################## + if module and self.module_exists(): + if force: + # take the fast lane and just delete everything in our module path + # TODO: If we run into permission problems, we have a highly inconsistent + # state. Delete the .git folders last, start with the submodules first + mp = self.abspath + method = None + if os.path.islink(mp): + method = os.remove + elif os.path.isdir(mp): + method = rmtree + elif os.path.exists(mp): + raise AssertionError("Cannot forcibly delete repository as it was neither a link, nor a directory") + #END handle brutal deletion + if not dry_run: + assert method + method(mp) + #END apply deletion method + else: + # verify we may delete our module + mod = self.module() + if mod.is_dirty(untracked_files=True): + raise InvalidGitRepositoryError("Cannot delete module at %s with any modifications, unless force is specified" % mod.working_tree_dir) + # END check for dirt + + # figure out whether we have new commits compared to the remotes + # NOTE: If the user pulled all the time, the remote heads might + # not have been updated, so commits coming from the remote look + # as if they come from us. But we stay strictly read-only and + # don't fetch beforhand. + for remote in mod.remotes: + num_branches_with_new_commits = 0 + rrefs = remote.refs + for rref in rrefs: + num_branches_with_new_commits = len(mod.git.cherry(rref)) != 0 + # END for each remote ref + # not a single remote branch contained all our commits + if num_branches_with_new_commits == len(rrefs): + raise InvalidGitRepositoryError("Cannot delete module at %s as there are new commits" % mod.working_tree_dir) + # END handle new commits + # have to manually delete references as python's scoping is + # not existing, they could keep handles open ( on windows this is a problem ) + if len(rrefs): + del(rref) + #END handle remotes + del(rrefs) + del(remote) + # END for each remote + + # gently remove all submodule repositories + for sm in self.children(): + sm.remove(module=True, force=False, configuration=False, dry_run=dry_run) + del(sm) + # END for each child-submodule + + # finally delete our own submodule + if not dry_run: + wtd = mod.working_tree_dir + del(mod) # release file-handles (windows) + rmtree(wtd) + # END delete tree if possible + # END handle force + # END handle module deletion + + # DELETE CONFIGURATION + ###################### + if configuration and not dry_run: + # first the index-entry + index = self.repo.index + try: + del(index.entries[index.entry_key(self.path, 0)]) + except KeyError: + pass + #END delete entry + index.write() + + # now git config - need the config intact, otherwise we can't query + # inforamtion anymore + self.repo.config_writer().remove_section(sm_section(self.name)) + self.config_writer().remove_section() + # END delete configuration - # void our data not to delay invalid access - self._clear_cache() - - return self - - def set_parent_commit(self, commit, check=True): - """Set this instance to use the given commit whose tree is supposed to - contain the .gitmodules blob. - - :param commit: Commit'ish reference pointing at the root_tree - :param check: if True, relatively expensive checks will be performed to verify - validity of the submodule. - :raise ValueError: if the commit's tree didn't contain the .gitmodules blob. - :raise ValueError: if the parent commit didn't store this submodule under the - current path - :return: self""" - pcommit = self.repo.commit(commit) - pctree = pcommit.tree - if self.k_modules_file not in pctree: - raise ValueError("Tree of commit %s did not contain the %s file" % (commit, self.k_modules_file)) - # END handle exceptions - - prev_pc = self._parent_commit - self._parent_commit = pcommit - - if check: - parser = self._config_parser(self.repo, self._parent_commit, read_only=True) - if not parser.has_section(sm_section(self.name)): - self._parent_commit = prev_pc - raise ValueError("Submodule at path %r did not exist in parent commit %s" % (self.path, commit)) - # END handle submodule did not exist - # END handle checking mode - - # update our sha, it could have changed - self.binsha = pctree[self.path].binsha - - self._clear_cache() - - return self - - @unbare_repo - def config_writer(self, index=None, write=True): - """:return: a config writer instance allowing you to read and write the data - belonging to this submodule into the .gitmodules file. - - :param index: if not None, an IndexFile instance which should be written. - defaults to the index of the Submodule's parent repository. - :param write: if True, the index will be written each time a configuration - value changes. - :note: the parameters allow for a more efficient writing of the index, - as you can pass in a modified index on your own, prevent automatic writing, - and write yourself once the whole operation is complete - :raise ValueError: if trying to get a writer on a parent_commit which does not - match the current head commit - :raise IOError: If the .gitmodules file/blob could not be read""" - writer = self._config_parser_constrained(read_only=False) - if index is not None: - writer.config._index = index - writer.config._auto_write = write - return writer - - #} END edit interface - - #{ Query Interface - - @unbare_repo - def module(self): - """:return: Repo instance initialized from the repository at our submodule path - :raise InvalidGitRepositoryError: if a repository was not available. This could - also mean that it was not yet initialized""" - # late import to workaround circular dependencies - module_path = self.abspath - try: - repo = git.Repo(module_path) - if repo != self.repo: - return repo - # END handle repo uninitialized - except (InvalidGitRepositoryError, NoSuchPathError): - raise InvalidGitRepositoryError("No valid repository at %s" % self.path) - else: - raise InvalidGitRepositoryError("Repository at %r was not yet checked out" % module_path) - # END handle exceptions - - def module_exists(self): - """:return: True if our module exists and is a valid git repository. See module() method""" - try: - self.module() - return True - except Exception: - return False - # END handle exception - - def exists(self): - """ - :return: True if the submodule exists, False otherwise. Please note that - a submodule may exist (in the .gitmodules file) even though its module - doesn't exist""" - # keep attributes for later, and restore them if we have no valid data - # this way we do not actually alter the state of the object - loc = locals() - for attr in self._cache_attrs: - if hasattr(self, attr): - loc[attr] = getattr(self, attr) - # END if we have the attribute cache - #END for each attr - self._clear_cache() - - try: - try: - self.path - return True - except Exception: - return False - # END handle exceptions - finally: - for attr in self._cache_attrs: - if attr in loc: - setattr(self, attr, loc[attr]) - # END if we have a cache - # END reapply each attribute - # END handle object state consistency - - @property - def branch(self): - """:return: The branch instance that we are to checkout - :raise InvalidGitRepositoryError: if our module is not yet checked out""" - return mkhead(self.module(), self._branch_path) - - @property - def branch_path(self): - """ - :return: full (relative) path as string to the branch we would checkout - from the remote and track""" - return self._branch_path - - @property - def branch_name(self): - """:return: the name of the branch, which is the shortest possible branch name""" - # use an instance method, for this we create a temporary Head instance - # which uses a repository that is available at least ( it makes no difference ) - return git.Head(self.repo, self._branch_path).name - - @property - def url(self): - """:return: The url to the repository which our module-repository refers to""" - return self._url - - @property - def parent_commit(self): - """:return: Commit instance with the tree containing the .gitmodules file - :note: will always point to the current head's commit if it was not set explicitly""" - return self._parent_commit - - @property - def name(self): - """:return: The name of this submodule. It is used to identify it within the - .gitmodules file. - :note: by default, the name is the path at which to find the submodule, but - in git-python it should be a unique identifier similar to the identifiers - used for remotes, which allows to change the path of the submodule - easily - """ - return self._name - - def config_reader(self): - """ - :return: ConfigReader instance which allows you to qurey the configuration values - of this submodule, as provided by the .gitmodules file - :note: The config reader will actually read the data directly from the repository - and thus does not need nor care about your working tree. - :note: Should be cached by the caller and only kept as long as needed - :raise IOError: If the .gitmodules file/blob could not be read""" - return self._config_parser_constrained(read_only=True) - - def children(self): - """ - :return: IterableList(Submodule, ...) an iterable list of submodules instances - which are children of this submodule or 0 if the submodule is not checked out""" - return self._get_intermediate_items(self) - - #} END query interface - - #{ Iterable Interface - - @classmethod - def iter_items(cls, repo, parent_commit='HEAD'): - """:return: iterator yielding Submodule instances available in the given repository""" - pc = repo.commit(parent_commit) # parent commit instance - try: - parser = cls._config_parser(repo, pc, read_only=True) - except IOError: - raise StopIteration - # END handle empty iterator - - rt = pc.tree # root tree - - for sms in parser.sections(): - n = sm_name(sms) - p = parser.get_value(sms, 'path') - u = parser.get_value(sms, 'url') - b = cls.k_head_default - if parser.has_option(sms, cls.k_head_option): - b = parser.get_value(sms, cls.k_head_option) - # END handle optional information - - # get the binsha - index = repo.index - try: - sm = rt[p] - except KeyError: - # try the index, maybe it was just added - try: - entry = index.entries[index.entry_key(p, 0)] - sm = Submodule(repo, entry.binsha, entry.mode, entry.path) - except KeyError: - raise InvalidGitRepositoryError("Gitmodule path %r did not exist in revision of parent commit %s" % (p, parent_commit)) - # END handle keyerror - # END handle critical error - - # fill in remaining info - saves time as it doesn't have to be parsed again - sm._name = n - sm._parent_commit = pc - sm._branch_path = git.Head.to_full_path(b) - sm._url = u - - yield sm - # END for each section - - #} END iterable interface + # void our data not to delay invalid access + self._clear_cache() + + return self + + def set_parent_commit(self, commit, check=True): + """Set this instance to use the given commit whose tree is supposed to + contain the .gitmodules blob. + + :param commit: Commit'ish reference pointing at the root_tree + :param check: if True, relatively expensive checks will be performed to verify + validity of the submodule. + :raise ValueError: if the commit's tree didn't contain the .gitmodules blob. + :raise ValueError: if the parent commit didn't store this submodule under the + current path + :return: self""" + pcommit = self.repo.commit(commit) + pctree = pcommit.tree + if self.k_modules_file not in pctree: + raise ValueError("Tree of commit %s did not contain the %s file" % (commit, self.k_modules_file)) + # END handle exceptions + + prev_pc = self._parent_commit + self._parent_commit = pcommit + + if check: + parser = self._config_parser(self.repo, self._parent_commit, read_only=True) + if not parser.has_section(sm_section(self.name)): + self._parent_commit = prev_pc + raise ValueError("Submodule at path %r did not exist in parent commit %s" % (self.path, commit)) + # END handle submodule did not exist + # END handle checking mode + + # update our sha, it could have changed + self.binsha = pctree[self.path].binsha + + self._clear_cache() + + return self + + @unbare_repo + def config_writer(self, index=None, write=True): + """:return: a config writer instance allowing you to read and write the data + belonging to this submodule into the .gitmodules file. + + :param index: if not None, an IndexFile instance which should be written. + defaults to the index of the Submodule's parent repository. + :param write: if True, the index will be written each time a configuration + value changes. + :note: the parameters allow for a more efficient writing of the index, + as you can pass in a modified index on your own, prevent automatic writing, + and write yourself once the whole operation is complete + :raise ValueError: if trying to get a writer on a parent_commit which does not + match the current head commit + :raise IOError: If the .gitmodules file/blob could not be read""" + writer = self._config_parser_constrained(read_only=False) + if index is not None: + writer.config._index = index + writer.config._auto_write = write + return writer + + #} END edit interface + + #{ Query Interface + + @unbare_repo + def module(self): + """:return: Repo instance initialized from the repository at our submodule path + :raise InvalidGitRepositoryError: if a repository was not available. This could + also mean that it was not yet initialized""" + # late import to workaround circular dependencies + module_path = self.abspath + try: + repo = git.Repo(module_path) + if repo != self.repo: + return repo + # END handle repo uninitialized + except (InvalidGitRepositoryError, NoSuchPathError): + raise InvalidGitRepositoryError("No valid repository at %s" % self.path) + else: + raise InvalidGitRepositoryError("Repository at %r was not yet checked out" % module_path) + # END handle exceptions + + def module_exists(self): + """:return: True if our module exists and is a valid git repository. See module() method""" + try: + self.module() + return True + except Exception: + return False + # END handle exception + + def exists(self): + """ + :return: True if the submodule exists, False otherwise. Please note that + a submodule may exist (in the .gitmodules file) even though its module + doesn't exist""" + # keep attributes for later, and restore them if we have no valid data + # this way we do not actually alter the state of the object + loc = locals() + for attr in self._cache_attrs: + if hasattr(self, attr): + loc[attr] = getattr(self, attr) + # END if we have the attribute cache + #END for each attr + self._clear_cache() + + try: + try: + self.path + return True + except Exception: + return False + # END handle exceptions + finally: + for attr in self._cache_attrs: + if attr in loc: + setattr(self, attr, loc[attr]) + # END if we have a cache + # END reapply each attribute + # END handle object state consistency + + @property + def branch(self): + """:return: The branch instance that we are to checkout + :raise InvalidGitRepositoryError: if our module is not yet checked out""" + return mkhead(self.module(), self._branch_path) + + @property + def branch_path(self): + """ + :return: full (relative) path as string to the branch we would checkout + from the remote and track""" + return self._branch_path + + @property + def branch_name(self): + """:return: the name of the branch, which is the shortest possible branch name""" + # use an instance method, for this we create a temporary Head instance + # which uses a repository that is available at least ( it makes no difference ) + return git.Head(self.repo, self._branch_path).name + + @property + def url(self): + """:return: The url to the repository which our module-repository refers to""" + return self._url + + @property + def parent_commit(self): + """:return: Commit instance with the tree containing the .gitmodules file + :note: will always point to the current head's commit if it was not set explicitly""" + return self._parent_commit + + @property + def name(self): + """:return: The name of this submodule. It is used to identify it within the + .gitmodules file. + :note: by default, the name is the path at which to find the submodule, but + in git-python it should be a unique identifier similar to the identifiers + used for remotes, which allows to change the path of the submodule + easily + """ + return self._name + + def config_reader(self): + """ + :return: ConfigReader instance which allows you to qurey the configuration values + of this submodule, as provided by the .gitmodules file + :note: The config reader will actually read the data directly from the repository + and thus does not need nor care about your working tree. + :note: Should be cached by the caller and only kept as long as needed + :raise IOError: If the .gitmodules file/blob could not be read""" + return self._config_parser_constrained(read_only=True) + + def children(self): + """ + :return: IterableList(Submodule, ...) an iterable list of submodules instances + which are children of this submodule or 0 if the submodule is not checked out""" + return self._get_intermediate_items(self) + + #} END query interface + + #{ Iterable Interface + + @classmethod + def iter_items(cls, repo, parent_commit='HEAD'): + """:return: iterator yielding Submodule instances available in the given repository""" + pc = repo.commit(parent_commit) # parent commit instance + try: + parser = cls._config_parser(repo, pc, read_only=True) + except IOError: + raise StopIteration + # END handle empty iterator + + rt = pc.tree # root tree + + for sms in parser.sections(): + n = sm_name(sms) + p = parser.get_value(sms, 'path') + u = parser.get_value(sms, 'url') + b = cls.k_head_default + if parser.has_option(sms, cls.k_head_option): + b = parser.get_value(sms, cls.k_head_option) + # END handle optional information + + # get the binsha + index = repo.index + try: + sm = rt[p] + except KeyError: + # try the index, maybe it was just added + try: + entry = index.entries[index.entry_key(p, 0)] + sm = Submodule(repo, entry.binsha, entry.mode, entry.path) + except KeyError: + raise InvalidGitRepositoryError("Gitmodule path %r did not exist in revision of parent commit %s" % (p, parent_commit)) + # END handle keyerror + # END handle critical error + + # fill in remaining info - saves time as it doesn't have to be parsed again + sm._name = n + sm._parent_commit = pc + sm._branch_path = git.Head.to_full_path(b) + sm._url = u + + yield sm + # END for each section + + #} END iterable interface diff --git a/git/objects/submodule/root.py b/git/objects/submodule/root.py index 132604f6..d9764b36 100644 --- a/git/objects/submodule/root.py +++ b/git/objects/submodule/root.py @@ -1,7 +1,7 @@ from base import Submodule, UpdateProgress from util import ( - find_first_remote_branch - ) + find_first_remote_branch + ) from git.exc import InvalidGitRepositoryError import git @@ -11,11 +11,11 @@ __all__ = ["RootModule", "RootUpdateProgress"] class RootUpdateProgress(UpdateProgress): - """Utility class which adds more opcodes to the UpdateProgress""" - REMOVE, PATHCHANGE, BRANCHCHANGE, URLCHANGE = [1 << x for x in range(UpdateProgress._num_op_codes, UpdateProgress._num_op_codes+4)] - _num_op_codes = UpdateProgress._num_op_codes+4 - - __slots__ = tuple() + """Utility class which adds more opcodes to the UpdateProgress""" + REMOVE, PATHCHANGE, BRANCHCHANGE, URLCHANGE = [1 << x for x in range(UpdateProgress._num_op_codes, UpdateProgress._num_op_codes+4)] + _num_op_codes = UpdateProgress._num_op_codes+4 + + __slots__ = tuple() BEGIN = RootUpdateProgress.BEGIN END = RootUpdateProgress.END @@ -25,291 +25,291 @@ URLCHANGE = RootUpdateProgress.URLCHANGE PATHCHANGE = RootUpdateProgress.PATHCHANGE class RootModule(Submodule): - """A (virtual) Root of all submodules in the given repository. It can be used - to more easily traverse all submodules of the master repository""" - - __slots__ = tuple() - - k_root_name = '__ROOT__' - - def __init__(self, repo): - # repo, binsha, mode=None, path=None, name = None, parent_commit=None, url=None, ref=None) - super(RootModule, self).__init__( - repo, - binsha = self.NULL_BIN_SHA, - mode = self.k_default_mode, - path = '', - name = self.k_root_name, - parent_commit = repo.head.commit, - url = '', - branch_path = git.Head.to_full_path(self.k_head_default) - ) - - - def _clear_cache(self): - """May not do anything""" - pass - - #{ Interface - - def update(self, previous_commit=None, recursive=True, force_remove=False, init=True, - to_latest_revision=False, progress=None, dry_run=False): - """Update the submodules of this repository to the current HEAD commit. - This method behaves smartly by determining changes of the path of a submodules - repository, next to changes to the to-be-checked-out commit or the branch to be - checked out. This works if the submodules ID does not change. - Additionally it will detect addition and removal of submodules, which will be handled - gracefully. - - :param previous_commit: If set to a commit'ish, the commit we should use - as the previous commit the HEAD pointed to before it was set to the commit it points to now. - If None, it defaults to HEAD@{1} otherwise - :param recursive: if True, the children of submodules will be updated as well - using the same technique - :param force_remove: If submodules have been deleted, they will be forcibly removed. - Otherwise the update may fail if a submodule's repository cannot be deleted as - changes have been made to it (see Submodule.update() for more information) - :param init: If we encounter a new module which would need to be initialized, then do it. - :param to_latest_revision: If True, instead of checking out the revision pointed to - by this submodule's sha, the checked out tracking branch will be merged with the - newest remote branch fetched from the repository's origin - :param progress: RootUpdateProgress instance or None if no progress should be sent - :param dry_run: if True, operations will not actually be performed. Progress messages - will change accordingly to indicate the WOULD DO state of the operation.""" - if self.repo.bare: - raise InvalidGitRepositoryError("Cannot update submodules in bare repositories") - # END handle bare - - if progress is None: - progress = RootUpdateProgress() - #END assure progress is set - - prefix = '' - if dry_run: - prefix = 'DRY-RUN: ' - - repo = self.repo - - # SETUP BASE COMMIT - ################### - cur_commit = repo.head.commit - if previous_commit is None: - try: - previous_commit = repo.commit(repo.head.log_entry(-1).oldhexsha) - if previous_commit.binsha == previous_commit.NULL_BIN_SHA: - raise IndexError - #END handle initial commit - except IndexError: - # in new repositories, there is no previous commit - previous_commit = cur_commit - #END exception handling - else: - previous_commit = repo.commit(previous_commit) # obtain commit object - # END handle previous commit - - - psms = self.list_items(repo, parent_commit=previous_commit) - sms = self.list_items(repo) - spsms = set(psms) - ssms = set(sms) - - # HANDLE REMOVALS - ################### - rrsm = (spsms - ssms) - len_rrsm = len(rrsm) - for i, rsm in enumerate(rrsm): - op = REMOVE - if i == 0: - op |= BEGIN - #END handle begin - - # fake it into thinking its at the current commit to allow deletion - # of previous module. Trigger the cache to be updated before that - progress.update(op, i, len_rrsm, prefix+"Removing submodule %r at %s" % (rsm.name, rsm.abspath)) - rsm._parent_commit = repo.head.commit - if not dry_run: - rsm.remove(configuration=False, module=True, force=force_remove) - #END handle dry-run - - if i == len_rrsm-1: - op |= END - #END handle end - progress.update(op, i, len_rrsm, prefix+"Done removing submodule %r" % rsm.name) - # END for each removed submodule - - # HANDLE PATH RENAMES - ##################### - # url changes + branch changes - csms = (spsms & ssms) - len_csms = len(csms) - for i, csm in enumerate(csms): - psm = psms[csm.name] - sm = sms[csm.name] - - #PATH CHANGES - ############## - if sm.path != psm.path and psm.module_exists(): - progress.update(BEGIN|PATHCHANGE, i, len_csms, prefix+"Moving repository of submodule %r from %s to %s" % (sm.name, psm.abspath, sm.abspath)) - # move the module to the new path - if not dry_run: - psm.move(sm.path, module=True, configuration=False) - #END handle dry_run - progress.update(END|PATHCHANGE, i, len_csms, prefix+"Done moving repository of submodule %r" % sm.name) - # END handle path changes - - if sm.module_exists(): - # HANDLE URL CHANGE - ################### - if sm.url != psm.url: - # Add the new remote, remove the old one - # This way, if the url just changes, the commits will not - # have to be re-retrieved - nn = '__new_origin__' - smm = sm.module() - rmts = smm.remotes - - # don't do anything if we already have the url we search in place - if len([r for r in rmts if r.url == sm.url]) == 0: - progress.update(BEGIN|URLCHANGE, i, len_csms, prefix+"Changing url of submodule %r from %s to %s" % (sm.name, psm.url, sm.url)) - - if not dry_run: - assert nn not in [r.name for r in rmts] - smr = smm.create_remote(nn, sm.url) - smr.fetch(progress=progress) - - # If we have a tracking branch, it should be available - # in the new remote as well. - if len([r for r in smr.refs if r.remote_head == sm.branch_name]) == 0: - raise ValueError("Submodule branch named %r was not available in new submodule remote at %r" % (sm.branch_name, sm.url)) - # END head is not detached - - # now delete the changed one - rmt_for_deletion = None - for remote in rmts: - if remote.url == psm.url: - rmt_for_deletion = remote - break - # END if urls match - # END for each remote - - # if we didn't find a matching remote, but have exactly one, - # we can safely use this one - if rmt_for_deletion is None: - if len(rmts) == 1: - rmt_for_deletion = rmts[0] - else: - # if we have not found any remote with the original url - # we may not have a name. This is a special case, - # and its okay to fail here - # Alternatively we could just generate a unique name and leave all - # existing ones in place - raise InvalidGitRepositoryError("Couldn't find original remote-repo at url %r" % psm.url) - #END handle one single remote - # END handle check we found a remote - - orig_name = rmt_for_deletion.name - smm.delete_remote(rmt_for_deletion) - # NOTE: Currently we leave tags from the deleted remotes - # as well as separate tracking branches in the possibly totally - # changed repository ( someone could have changed the url to - # another project ). At some point, one might want to clean - # it up, but the danger is high to remove stuff the user - # has added explicitly - - # rename the new remote back to what it was - smr.rename(orig_name) - - # early on, we verified that the our current tracking branch - # exists in the remote. Now we have to assure that the - # sha we point to is still contained in the new remote - # tracking branch. - smsha = sm.binsha - found = False - rref = smr.refs[self.branch_name] - for c in rref.commit.traverse(): - if c.binsha == smsha: - found = True - break - # END traverse all commits in search for sha - # END for each commit - - if not found: - # adjust our internal binsha to use the one of the remote - # this way, it will be checked out in the next step - # This will change the submodule relative to us, so - # the user will be able to commit the change easily - print >> sys.stderr, "WARNING: Current sha %s was not contained in the tracking branch at the new remote, setting it the the remote's tracking branch" % sm.hexsha - sm.binsha = rref.commit.binsha - #END reset binsha - - #NOTE: All checkout is performed by the base implementation of update - #END handle dry_run - progress.update(END|URLCHANGE, i, len_csms, prefix+"Done adjusting url of submodule %r" % (sm.name)) - # END skip remote handling if new url already exists in module - # END handle url - - # HANDLE PATH CHANGES - ##################### - if sm.branch_path != psm.branch_path: - # finally, create a new tracking branch which tracks the - # new remote branch - progress.update(BEGIN|BRANCHCHANGE, i, len_csms, prefix+"Changing branch of submodule %r from %s to %s" % (sm.name, psm.branch_path, sm.branch_path)) - if not dry_run: - smm = sm.module() - smmr = smm.remotes - try: - tbr = git.Head.create(smm, sm.branch_name, logmsg='branch: Created from HEAD') - except OSError: - # ... or reuse the existing one - tbr = git.Head(smm, sm.branch_path) - #END assure tracking branch exists - - tbr.set_tracking_branch(find_first_remote_branch(smmr, sm.branch_name)) - # figure out whether the previous tracking branch contains - # new commits compared to the other one, if not we can - # delete it. - try: - tbr = find_first_remote_branch(smmr, psm.branch_name) - if len(smm.git.cherry(tbr, psm.branch)) == 0: - psm.branch.delete(smm, psm.branch) - #END delete original tracking branch if there are no changes - except InvalidGitRepositoryError: - # ignore it if the previous branch couldn't be found in the - # current remotes, this just means we can't handle it - pass - # END exception handling - - #NOTE: All checkout is done in the base implementation of update - #END handle dry_run - - progress.update(END|BRANCHCHANGE, i, len_csms, prefix+"Done changing branch of submodule %r" % sm.name) - #END handle branch - #END handle - # END for each common submodule - - # FINALLY UPDATE ALL ACTUAL SUBMODULES - ###################################### - for sm in sms: - # update the submodule using the default method - sm.update(recursive=False, init=init, to_latest_revision=to_latest_revision, - progress=progress, dry_run=dry_run) - - # update recursively depth first - question is which inconsitent - # state will be better in case it fails somewhere. Defective branch - # or defective depth. The RootSubmodule type will never process itself, - # which was done in the previous expression - if recursive: - # the module would exist by now if we are not in dry_run mode - if sm.module_exists(): - type(self)(sm.module()).update( recursive=True, force_remove=force_remove, - init=init, to_latest_revision=to_latest_revision, - progress=progress, dry_run=dry_run) - #END handle dry_run - #END handle recursive - # END for each submodule to update + """A (virtual) Root of all submodules in the given repository. It can be used + to more easily traverse all submodules of the master repository""" + + __slots__ = tuple() + + k_root_name = '__ROOT__' + + def __init__(self, repo): + # repo, binsha, mode=None, path=None, name = None, parent_commit=None, url=None, ref=None) + super(RootModule, self).__init__( + repo, + binsha = self.NULL_BIN_SHA, + mode = self.k_default_mode, + path = '', + name = self.k_root_name, + parent_commit = repo.head.commit, + url = '', + branch_path = git.Head.to_full_path(self.k_head_default) + ) + + + def _clear_cache(self): + """May not do anything""" + pass + + #{ Interface + + def update(self, previous_commit=None, recursive=True, force_remove=False, init=True, + to_latest_revision=False, progress=None, dry_run=False): + """Update the submodules of this repository to the current HEAD commit. + This method behaves smartly by determining changes of the path of a submodules + repository, next to changes to the to-be-checked-out commit or the branch to be + checked out. This works if the submodules ID does not change. + Additionally it will detect addition and removal of submodules, which will be handled + gracefully. + + :param previous_commit: If set to a commit'ish, the commit we should use + as the previous commit the HEAD pointed to before it was set to the commit it points to now. + If None, it defaults to HEAD@{1} otherwise + :param recursive: if True, the children of submodules will be updated as well + using the same technique + :param force_remove: If submodules have been deleted, they will be forcibly removed. + Otherwise the update may fail if a submodule's repository cannot be deleted as + changes have been made to it (see Submodule.update() for more information) + :param init: If we encounter a new module which would need to be initialized, then do it. + :param to_latest_revision: If True, instead of checking out the revision pointed to + by this submodule's sha, the checked out tracking branch will be merged with the + newest remote branch fetched from the repository's origin + :param progress: RootUpdateProgress instance or None if no progress should be sent + :param dry_run: if True, operations will not actually be performed. Progress messages + will change accordingly to indicate the WOULD DO state of the operation.""" + if self.repo.bare: + raise InvalidGitRepositoryError("Cannot update submodules in bare repositories") + # END handle bare + + if progress is None: + progress = RootUpdateProgress() + #END assure progress is set + + prefix = '' + if dry_run: + prefix = 'DRY-RUN: ' + + repo = self.repo + + # SETUP BASE COMMIT + ################### + cur_commit = repo.head.commit + if previous_commit is None: + try: + previous_commit = repo.commit(repo.head.log_entry(-1).oldhexsha) + if previous_commit.binsha == previous_commit.NULL_BIN_SHA: + raise IndexError + #END handle initial commit + except IndexError: + # in new repositories, there is no previous commit + previous_commit = cur_commit + #END exception handling + else: + previous_commit = repo.commit(previous_commit) # obtain commit object + # END handle previous commit + + + psms = self.list_items(repo, parent_commit=previous_commit) + sms = self.list_items(repo) + spsms = set(psms) + ssms = set(sms) + + # HANDLE REMOVALS + ################### + rrsm = (spsms - ssms) + len_rrsm = len(rrsm) + for i, rsm in enumerate(rrsm): + op = REMOVE + if i == 0: + op |= BEGIN + #END handle begin + + # fake it into thinking its at the current commit to allow deletion + # of previous module. Trigger the cache to be updated before that + progress.update(op, i, len_rrsm, prefix+"Removing submodule %r at %s" % (rsm.name, rsm.abspath)) + rsm._parent_commit = repo.head.commit + if not dry_run: + rsm.remove(configuration=False, module=True, force=force_remove) + #END handle dry-run + + if i == len_rrsm-1: + op |= END + #END handle end + progress.update(op, i, len_rrsm, prefix+"Done removing submodule %r" % rsm.name) + # END for each removed submodule + + # HANDLE PATH RENAMES + ##################### + # url changes + branch changes + csms = (spsms & ssms) + len_csms = len(csms) + for i, csm in enumerate(csms): + psm = psms[csm.name] + sm = sms[csm.name] + + #PATH CHANGES + ############## + if sm.path != psm.path and psm.module_exists(): + progress.update(BEGIN|PATHCHANGE, i, len_csms, prefix+"Moving repository of submodule %r from %s to %s" % (sm.name, psm.abspath, sm.abspath)) + # move the module to the new path + if not dry_run: + psm.move(sm.path, module=True, configuration=False) + #END handle dry_run + progress.update(END|PATHCHANGE, i, len_csms, prefix+"Done moving repository of submodule %r" % sm.name) + # END handle path changes + + if sm.module_exists(): + # HANDLE URL CHANGE + ################### + if sm.url != psm.url: + # Add the new remote, remove the old one + # This way, if the url just changes, the commits will not + # have to be re-retrieved + nn = '__new_origin__' + smm = sm.module() + rmts = smm.remotes + + # don't do anything if we already have the url we search in place + if len([r for r in rmts if r.url == sm.url]) == 0: + progress.update(BEGIN|URLCHANGE, i, len_csms, prefix+"Changing url of submodule %r from %s to %s" % (sm.name, psm.url, sm.url)) + + if not dry_run: + assert nn not in [r.name for r in rmts] + smr = smm.create_remote(nn, sm.url) + smr.fetch(progress=progress) + + # If we have a tracking branch, it should be available + # in the new remote as well. + if len([r for r in smr.refs if r.remote_head == sm.branch_name]) == 0: + raise ValueError("Submodule branch named %r was not available in new submodule remote at %r" % (sm.branch_name, sm.url)) + # END head is not detached + + # now delete the changed one + rmt_for_deletion = None + for remote in rmts: + if remote.url == psm.url: + rmt_for_deletion = remote + break + # END if urls match + # END for each remote + + # if we didn't find a matching remote, but have exactly one, + # we can safely use this one + if rmt_for_deletion is None: + if len(rmts) == 1: + rmt_for_deletion = rmts[0] + else: + # if we have not found any remote with the original url + # we may not have a name. This is a special case, + # and its okay to fail here + # Alternatively we could just generate a unique name and leave all + # existing ones in place + raise InvalidGitRepositoryError("Couldn't find original remote-repo at url %r" % psm.url) + #END handle one single remote + # END handle check we found a remote + + orig_name = rmt_for_deletion.name + smm.delete_remote(rmt_for_deletion) + # NOTE: Currently we leave tags from the deleted remotes + # as well as separate tracking branches in the possibly totally + # changed repository ( someone could have changed the url to + # another project ). At some point, one might want to clean + # it up, but the danger is high to remove stuff the user + # has added explicitly + + # rename the new remote back to what it was + smr.rename(orig_name) + + # early on, we verified that the our current tracking branch + # exists in the remote. Now we have to assure that the + # sha we point to is still contained in the new remote + # tracking branch. + smsha = sm.binsha + found = False + rref = smr.refs[self.branch_name] + for c in rref.commit.traverse(): + if c.binsha == smsha: + found = True + break + # END traverse all commits in search for sha + # END for each commit + + if not found: + # adjust our internal binsha to use the one of the remote + # this way, it will be checked out in the next step + # This will change the submodule relative to us, so + # the user will be able to commit the change easily + print >> sys.stderr, "WARNING: Current sha %s was not contained in the tracking branch at the new remote, setting it the the remote's tracking branch" % sm.hexsha + sm.binsha = rref.commit.binsha + #END reset binsha + + #NOTE: All checkout is performed by the base implementation of update + #END handle dry_run + progress.update(END|URLCHANGE, i, len_csms, prefix+"Done adjusting url of submodule %r" % (sm.name)) + # END skip remote handling if new url already exists in module + # END handle url + + # HANDLE PATH CHANGES + ##################### + if sm.branch_path != psm.branch_path: + # finally, create a new tracking branch which tracks the + # new remote branch + progress.update(BEGIN|BRANCHCHANGE, i, len_csms, prefix+"Changing branch of submodule %r from %s to %s" % (sm.name, psm.branch_path, sm.branch_path)) + if not dry_run: + smm = sm.module() + smmr = smm.remotes + try: + tbr = git.Head.create(smm, sm.branch_name, logmsg='branch: Created from HEAD') + except OSError: + # ... or reuse the existing one + tbr = git.Head(smm, sm.branch_path) + #END assure tracking branch exists + + tbr.set_tracking_branch(find_first_remote_branch(smmr, sm.branch_name)) + # figure out whether the previous tracking branch contains + # new commits compared to the other one, if not we can + # delete it. + try: + tbr = find_first_remote_branch(smmr, psm.branch_name) + if len(smm.git.cherry(tbr, psm.branch)) == 0: + psm.branch.delete(smm, psm.branch) + #END delete original tracking branch if there are no changes + except InvalidGitRepositoryError: + # ignore it if the previous branch couldn't be found in the + # current remotes, this just means we can't handle it + pass + # END exception handling + + #NOTE: All checkout is done in the base implementation of update + #END handle dry_run + + progress.update(END|BRANCHCHANGE, i, len_csms, prefix+"Done changing branch of submodule %r" % sm.name) + #END handle branch + #END handle + # END for each common submodule + + # FINALLY UPDATE ALL ACTUAL SUBMODULES + ###################################### + for sm in sms: + # update the submodule using the default method + sm.update(recursive=False, init=init, to_latest_revision=to_latest_revision, + progress=progress, dry_run=dry_run) + + # update recursively depth first - question is which inconsitent + # state will be better in case it fails somewhere. Defective branch + # or defective depth. The RootSubmodule type will never process itself, + # which was done in the previous expression + if recursive: + # the module would exist by now if we are not in dry_run mode + if sm.module_exists(): + type(self)(sm.module()).update( recursive=True, force_remove=force_remove, + init=init, to_latest_revision=to_latest_revision, + progress=progress, dry_run=dry_run) + #END handle dry_run + #END handle recursive + # END for each submodule to update - def module(self): - """:return: the actual repository containing the submodules""" - return self.repo - #} END interface + def module(self): + """:return: the actual repository containing the submodules""" + return self.repo + #} END interface #} END classes diff --git a/git/objects/submodule/util.py b/git/objects/submodule/util.py index 9b32807a..492d9dbe 100644 --- a/git/objects/submodule/util.py +++ b/git/objects/submodule/util.py @@ -4,98 +4,98 @@ from git.config import GitConfigParser from StringIO import StringIO import weakref -__all__ = ( 'sm_section', 'sm_name', 'mkhead', 'unbare_repo', 'find_first_remote_branch', - 'SubmoduleConfigParser') +__all__ = ( 'sm_section', 'sm_name', 'mkhead', 'unbare_repo', 'find_first_remote_branch', + 'SubmoduleConfigParser') #{ Utilities def sm_section(name): - """:return: section title used in .gitmodules configuration file""" - return 'submodule "%s"' % name + """:return: section title used in .gitmodules configuration file""" + return 'submodule "%s"' % name def sm_name(section): - """:return: name of the submodule as parsed from the section name""" - section = section.strip() - return section[11:-1] - + """:return: name of the submodule as parsed from the section name""" + section = section.strip() + return section[11:-1] + def mkhead(repo, path): - """:return: New branch/head instance""" - return git.Head(repo, git.Head.to_full_path(path)) - + """:return: New branch/head instance""" + return git.Head(repo, git.Head.to_full_path(path)) + def unbare_repo(func): - """Methods with this decorator raise InvalidGitRepositoryError if they - encounter a bare repository""" - def wrapper(self, *args, **kwargs): - if self.repo.bare: - raise InvalidGitRepositoryError("Method '%s' cannot operate on bare repositories" % func.__name__) - #END bare method - return func(self, *args, **kwargs) - # END wrapper - wrapper.__name__ = func.__name__ - return wrapper - + """Methods with this decorator raise InvalidGitRepositoryError if they + encounter a bare repository""" + def wrapper(self, *args, **kwargs): + if self.repo.bare: + raise InvalidGitRepositoryError("Method '%s' cannot operate on bare repositories" % func.__name__) + #END bare method + return func(self, *args, **kwargs) + # END wrapper + wrapper.__name__ = func.__name__ + return wrapper + def find_first_remote_branch(remotes, branch_name): - """Find the remote branch matching the name of the given branch or raise InvalidGitRepositoryError""" - for remote in remotes: - try: - return remote.refs[branch_name] - except IndexError: - continue - # END exception handling - #END for remote - raise InvalidGitRepositoryError("Didn't find remote branch %r in any of the given remotes", branch_name) - + """Find the remote branch matching the name of the given branch or raise InvalidGitRepositoryError""" + for remote in remotes: + try: + return remote.refs[branch_name] + except IndexError: + continue + # END exception handling + #END for remote + raise InvalidGitRepositoryError("Didn't find remote branch %r in any of the given remotes", branch_name) + #} END utilities #{ Classes class SubmoduleConfigParser(GitConfigParser): - """ - Catches calls to _write, and updates the .gitmodules blob in the index - with the new data, if we have written into a stream. Otherwise it will - add the local file to the index to make it correspond with the working tree. - Additionally, the cache must be cleared - - Please note that no mutating method will work in bare mode - """ - - def __init__(self, *args, **kwargs): - self._smref = None - self._index = None - self._auto_write = True - super(SubmoduleConfigParser, self).__init__(*args, **kwargs) - - #{ Interface - def set_submodule(self, submodule): - """Set this instance's submodule. It must be called before - the first write operation begins""" - self._smref = weakref.ref(submodule) + """ + Catches calls to _write, and updates the .gitmodules blob in the index + with the new data, if we have written into a stream. Otherwise it will + add the local file to the index to make it correspond with the working tree. + Additionally, the cache must be cleared + + Please note that no mutating method will work in bare mode + """ + + def __init__(self, *args, **kwargs): + self._smref = None + self._index = None + self._auto_write = True + super(SubmoduleConfigParser, self).__init__(*args, **kwargs) + + #{ Interface + def set_submodule(self, submodule): + """Set this instance's submodule. It must be called before + the first write operation begins""" + self._smref = weakref.ref(submodule) - def flush_to_index(self): - """Flush changes in our configuration file to the index""" - assert self._smref is not None - # should always have a file here - assert not isinstance(self._file_or_files, StringIO) - - sm = self._smref() - if sm is not None: - index = self._index - if index is None: - index = sm.repo.index - # END handle index - index.add([sm.k_modules_file], write=self._auto_write) - sm._clear_cache() - # END handle weakref + def flush_to_index(self): + """Flush changes in our configuration file to the index""" + assert self._smref is not None + # should always have a file here + assert not isinstance(self._file_or_files, StringIO) + + sm = self._smref() + if sm is not None: + index = self._index + if index is None: + index = sm.repo.index + # END handle index + index.add([sm.k_modules_file], write=self._auto_write) + sm._clear_cache() + # END handle weakref - #} END interface - - #{ Overridden Methods - def write(self): - rval = super(SubmoduleConfigParser, self).write() - self.flush_to_index() - return rval - # END overridden methods + #} END interface + + #{ Overridden Methods + def write(self): + rval = super(SubmoduleConfigParser, self).write() + self.flush_to_index() + return rval + # END overridden methods #} END classes diff --git a/git/objects/tag.py b/git/objects/tag.py index 25eec896..d0b5a11a 100644 --- a/git/objects/tag.py +++ b/git/objects/tag.py @@ -7,70 +7,70 @@ import base from gitdb.util import hex_to_bin from util import ( - get_object_type_by_name, - parse_actor_and_date - ) + get_object_type_by_name, + parse_actor_and_date + ) __all__ = ("TagObject", ) class TagObject(base.Object): - """Non-Lightweight tag carrying additional information about an object we are pointing to.""" - type = "tag" - __slots__ = ( "object", "tag", "tagger", "tagged_date", "tagger_tz_offset", "message" ) - - def __init__(self, repo, binsha, object=None, tag=None, - tagger=None, tagged_date=None, tagger_tz_offset=None, message=None): - """Initialize a tag object with additional data - - :param repo: repository this object is located in - :param binsha: 20 byte SHA1 - :param object: Object instance of object we are pointing to - :param tag: name of this tag - :param tagger: Actor identifying the tagger - :param tagged_date: int_seconds_since_epoch - is the DateTime of the tag creation - use time.gmtime to convert - it into a different format - :param tagged_tz_offset: int_seconds_west_of_utc is the timezone that the - authored_date is in, in a format similar to time.altzone""" - super(TagObject, self).__init__(repo, binsha ) - if object is not None: - self.object = object - if tag is not None: - self.tag = tag - if tagger is not None: - self.tagger = tagger - if tagged_date is not None: - self.tagged_date = tagged_date - if tagger_tz_offset is not None: - self.tagger_tz_offset = tagger_tz_offset - if message is not None: - self.message = message - - def _set_cache_(self, attr): - """Cache all our attributes at once""" - if attr in TagObject.__slots__: - ostream = self.repo.odb.stream(self.binsha) - lines = ostream.read().splitlines() - - obj, hexsha = lines[0].split(" ") # object <hexsha> - type_token, type_name = lines[1].split(" ") # type <type_name> - self.object = get_object_type_by_name(type_name)(self.repo, hex_to_bin(hexsha)) - - self.tag = lines[2][4:] # tag <tag name> - - tagger_info = lines[3]# tagger <actor> <date> - self.tagger, self.tagged_date, self.tagger_tz_offset = parse_actor_and_date(tagger_info) - - # line 4 empty - it could mark the beginning of the next header - # in case there really is no message, it would not exist. Otherwise - # a newline separates header from message - if len(lines) > 5: - self.message = "\n".join(lines[5:]) - else: - self.message = '' - # END check our attributes - else: - super(TagObject, self)._set_cache_(attr) - - + """Non-Lightweight tag carrying additional information about an object we are pointing to.""" + type = "tag" + __slots__ = ( "object", "tag", "tagger", "tagged_date", "tagger_tz_offset", "message" ) + + def __init__(self, repo, binsha, object=None, tag=None, + tagger=None, tagged_date=None, tagger_tz_offset=None, message=None): + """Initialize a tag object with additional data + + :param repo: repository this object is located in + :param binsha: 20 byte SHA1 + :param object: Object instance of object we are pointing to + :param tag: name of this tag + :param tagger: Actor identifying the tagger + :param tagged_date: int_seconds_since_epoch + is the DateTime of the tag creation - use time.gmtime to convert + it into a different format + :param tagged_tz_offset: int_seconds_west_of_utc is the timezone that the + authored_date is in, in a format similar to time.altzone""" + super(TagObject, self).__init__(repo, binsha ) + if object is not None: + self.object = object + if tag is not None: + self.tag = tag + if tagger is not None: + self.tagger = tagger + if tagged_date is not None: + self.tagged_date = tagged_date + if tagger_tz_offset is not None: + self.tagger_tz_offset = tagger_tz_offset + if message is not None: + self.message = message + + def _set_cache_(self, attr): + """Cache all our attributes at once""" + if attr in TagObject.__slots__: + ostream = self.repo.odb.stream(self.binsha) + lines = ostream.read().splitlines() + + obj, hexsha = lines[0].split(" ") # object <hexsha> + type_token, type_name = lines[1].split(" ") # type <type_name> + self.object = get_object_type_by_name(type_name)(self.repo, hex_to_bin(hexsha)) + + self.tag = lines[2][4:] # tag <tag name> + + tagger_info = lines[3]# tagger <actor> <date> + self.tagger, self.tagged_date, self.tagger_tz_offset = parse_actor_and_date(tagger_info) + + # line 4 empty - it could mark the beginning of the next header + # in case there really is no message, it would not exist. Otherwise + # a newline separates header from message + if len(lines) > 5: + self.message = "\n".join(lines[5:]) + else: + self.message = '' + # END check our attributes + else: + super(TagObject, self)._set_cache_(attr) + + diff --git a/git/objects/tree.py b/git/objects/tree.py index 67431686..d1e827f5 100644 --- a/git/objects/tree.py +++ b/git/objects/tree.py @@ -11,269 +11,269 @@ from submodule.base import Submodule import git.diff as diff from fun import ( - tree_entries_from_data, - tree_to_stream - ) + tree_entries_from_data, + tree_to_stream + ) from gitdb.util import ( - to_bin_sha, - ) + to_bin_sha, + ) __all__ = ("TreeModifier", "Tree") class TreeModifier(object): - """A utility class providing methods to alter the underlying cache in a list-like fashion. - - Once all adjustments are complete, the _cache, which really is a refernce to - the cache of a tree, will be sorted. Assuring it will be in a serializable state""" - __slots__ = '_cache' - - def __init__(self, cache): - self._cache = cache - - def _index_by_name(self, name): - """:return: index of an item with name, or -1 if not found""" - for i, t in enumerate(self._cache): - if t[2] == name: - return i - # END found item - # END for each item in cache - return -1 - - #{ Interface - def set_done(self): - """Call this method once you are done modifying the tree information. - It may be called several times, but be aware that each call will cause - a sort operation - :return self:""" - self._cache.sort(key=lambda t: t[2]) # sort by name - return self - #} END interface - - #{ Mutators - def add(self, sha, mode, name, force=False): - """Add the given item to the tree. If an item with the given name already - exists, nothing will be done, but a ValueError will be raised if the - sha and mode of the existing item do not match the one you add, unless - force is True - - :param sha: The 20 or 40 byte sha of the item to add - :param mode: int representing the stat compatible mode of the item - :param force: If True, an item with your name and information will overwrite - any existing item with the same name, no matter which information it has - :return: self""" - if '/' in name: - raise ValueError("Name must not contain '/' characters") - if (mode >> 12) not in Tree._map_id_to_type: - raise ValueError("Invalid object type according to mode %o" % mode) - - sha = to_bin_sha(sha) - index = self._index_by_name(name) - item = (sha, mode, name) - if index == -1: - self._cache.append(item) - else: - if force: - self._cache[index] = item - else: - ex_item = self._cache[index] - if ex_item[0] != sha or ex_item[1] != mode: - raise ValueError("Item %r existed with different properties" % name) - # END handle mismatch - # END handle force - # END handle name exists - return self - - def add_unchecked(self, binsha, mode, name): - """Add the given item to the tree, its correctness is assumed, which - puts the caller into responsibility to assure the input is correct. - For more information on the parameters, see ``add`` - :param binsha: 20 byte binary sha""" - self._cache.append((binsha, mode, name)) - - def __delitem__(self, name): - """Deletes an item with the given name if it exists""" - index = self._index_by_name(name) - if index > -1: - del(self._cache[index]) - - #} END mutators + """A utility class providing methods to alter the underlying cache in a list-like fashion. + + Once all adjustments are complete, the _cache, which really is a refernce to + the cache of a tree, will be sorted. Assuring it will be in a serializable state""" + __slots__ = '_cache' + + def __init__(self, cache): + self._cache = cache + + def _index_by_name(self, name): + """:return: index of an item with name, or -1 if not found""" + for i, t in enumerate(self._cache): + if t[2] == name: + return i + # END found item + # END for each item in cache + return -1 + + #{ Interface + def set_done(self): + """Call this method once you are done modifying the tree information. + It may be called several times, but be aware that each call will cause + a sort operation + :return self:""" + self._cache.sort(key=lambda t: t[2]) # sort by name + return self + #} END interface + + #{ Mutators + def add(self, sha, mode, name, force=False): + """Add the given item to the tree. If an item with the given name already + exists, nothing will be done, but a ValueError will be raised if the + sha and mode of the existing item do not match the one you add, unless + force is True + + :param sha: The 20 or 40 byte sha of the item to add + :param mode: int representing the stat compatible mode of the item + :param force: If True, an item with your name and information will overwrite + any existing item with the same name, no matter which information it has + :return: self""" + if '/' in name: + raise ValueError("Name must not contain '/' characters") + if (mode >> 12) not in Tree._map_id_to_type: + raise ValueError("Invalid object type according to mode %o" % mode) + + sha = to_bin_sha(sha) + index = self._index_by_name(name) + item = (sha, mode, name) + if index == -1: + self._cache.append(item) + else: + if force: + self._cache[index] = item + else: + ex_item = self._cache[index] + if ex_item[0] != sha or ex_item[1] != mode: + raise ValueError("Item %r existed with different properties" % name) + # END handle mismatch + # END handle force + # END handle name exists + return self + + def add_unchecked(self, binsha, mode, name): + """Add the given item to the tree, its correctness is assumed, which + puts the caller into responsibility to assure the input is correct. + For more information on the parameters, see ``add`` + :param binsha: 20 byte binary sha""" + self._cache.append((binsha, mode, name)) + + def __delitem__(self, name): + """Deletes an item with the given name if it exists""" + index = self._index_by_name(name) + if index > -1: + del(self._cache[index]) + + #} END mutators class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable): - """Tree objects represent an ordered list of Blobs and other Trees. - - ``Tree as a list``:: - - Access a specific blob using the - tree['filename'] notation. - - You may as well access by index - blob = tree[0] - """ - - type = "tree" - __slots__ = "_cache" - - # actual integer ids for comparison - commit_id = 016 # equals stat.S_IFDIR | stat.S_IFLNK - a directory link - blob_id = 010 - symlink_id = 012 - tree_id = 004 - - _map_id_to_type = { - commit_id : Submodule, - blob_id : Blob, - symlink_id : Blob - # tree id added once Tree is defined - } - - - def __init__(self, repo, binsha, mode=tree_id<<12, path=None): - super(Tree, self).__init__(repo, binsha, mode, path) + """Tree objects represent an ordered list of Blobs and other Trees. + + ``Tree as a list``:: + + Access a specific blob using the + tree['filename'] notation. + + You may as well access by index + blob = tree[0] + """ + + type = "tree" + __slots__ = "_cache" + + # actual integer ids for comparison + commit_id = 016 # equals stat.S_IFDIR | stat.S_IFLNK - a directory link + blob_id = 010 + symlink_id = 012 + tree_id = 004 + + _map_id_to_type = { + commit_id : Submodule, + blob_id : Blob, + symlink_id : Blob + # tree id added once Tree is defined + } + + + def __init__(self, repo, binsha, mode=tree_id<<12, path=None): + super(Tree, self).__init__(repo, binsha, mode, path) - @classmethod - def _get_intermediate_items(cls, index_object): - if index_object.type == "tree": - return tuple(index_object._iter_convert_to_object(index_object._cache)) - return tuple() + @classmethod + def _get_intermediate_items(cls, index_object): + if index_object.type == "tree": + return tuple(index_object._iter_convert_to_object(index_object._cache)) + return tuple() - def _set_cache_(self, attr): - if attr == "_cache": - # Set the data when we need it - ostream = self.repo.odb.stream(self.binsha) - self._cache = tree_entries_from_data(ostream.read()) - else: - super(Tree, self)._set_cache_(attr) - # END handle attribute + def _set_cache_(self, attr): + if attr == "_cache": + # Set the data when we need it + ostream = self.repo.odb.stream(self.binsha) + self._cache = tree_entries_from_data(ostream.read()) + else: + super(Tree, self)._set_cache_(attr) + # END handle attribute - def _iter_convert_to_object(self, iterable): - """Iterable yields tuples of (binsha, mode, name), which will be converted - to the respective object representation""" - for binsha, mode, name in iterable: - path = join_path(self.path, name) - try: - yield self._map_id_to_type[mode >> 12](self.repo, binsha, mode, path) - except KeyError: - raise TypeError("Unknown mode %o found in tree data for path '%s'" % (mode, path)) - # END for each item + def _iter_convert_to_object(self, iterable): + """Iterable yields tuples of (binsha, mode, name), which will be converted + to the respective object representation""" + for binsha, mode, name in iterable: + path = join_path(self.path, name) + try: + yield self._map_id_to_type[mode >> 12](self.repo, binsha, mode, path) + except KeyError: + raise TypeError("Unknown mode %o found in tree data for path '%s'" % (mode, path)) + # END for each item - def __div__(self, file): - """Find the named object in this tree's contents - :return: ``git.Blob`` or ``git.Tree`` or ``git.Submodule`` - - :raise KeyError: if given file or tree does not exist in tree""" - msg = "Blob or Tree named %r not found" - if '/' in file: - tree = self - item = self - tokens = file.split('/') - for i,token in enumerate(tokens): - item = tree[token] - if item.type == 'tree': - tree = item - else: - # safety assertion - blobs are at the end of the path - if i != len(tokens)-1: - raise KeyError(msg % file) - return item - # END handle item type - # END for each token of split path - if item == self: - raise KeyError(msg % file) - return item - else: - for info in self._cache: - if info[2] == file: # [2] == name - return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2])) - # END for each obj - raise KeyError( msg % file ) - # END handle long paths + def __div__(self, file): + """Find the named object in this tree's contents + :return: ``git.Blob`` or ``git.Tree`` or ``git.Submodule`` + + :raise KeyError: if given file or tree does not exist in tree""" + msg = "Blob or Tree named %r not found" + if '/' in file: + tree = self + item = self + tokens = file.split('/') + for i,token in enumerate(tokens): + item = tree[token] + if item.type == 'tree': + tree = item + else: + # safety assertion - blobs are at the end of the path + if i != len(tokens)-1: + raise KeyError(msg % file) + return item + # END handle item type + # END for each token of split path + if item == self: + raise KeyError(msg % file) + return item + else: + for info in self._cache: + if info[2] == file: # [2] == name + return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2])) + # END for each obj + raise KeyError( msg % file ) + # END handle long paths - @property - def trees(self): - """:return: list(Tree, ...) list of trees directly below this tree""" - return [ i for i in self if i.type == "tree" ] - - @property - def blobs(self): - """:return: list(Blob, ...) list of blobs directly below this tree""" - return [ i for i in self if i.type == "blob" ] + @property + def trees(self): + """:return: list(Tree, ...) list of trees directly below this tree""" + return [ i for i in self if i.type == "tree" ] + + @property + def blobs(self): + """:return: list(Blob, ...) list of blobs directly below this tree""" + return [ i for i in self if i.type == "blob" ] - @property - def cache(self): - """ - :return: An object allowing to modify the internal cache. This can be used - to change the tree's contents. When done, make sure you call ``set_done`` - on the tree modifier, or serialization behaviour will be incorrect. - See the ``TreeModifier`` for more information on how to alter the cache""" - return TreeModifier(self._cache) + @property + def cache(self): + """ + :return: An object allowing to modify the internal cache. This can be used + to change the tree's contents. When done, make sure you call ``set_done`` + on the tree modifier, or serialization behaviour will be incorrect. + See the ``TreeModifier`` for more information on how to alter the cache""" + return TreeModifier(self._cache) - def traverse( self, predicate = lambda i,d: True, - prune = lambda i,d: False, depth = -1, branch_first=True, - visit_once = False, ignore_self=1 ): - """For documentation, see util.Traversable.traverse - Trees are set to visit_once = False to gain more performance in the traversal""" - return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self) + def traverse( self, predicate = lambda i,d: True, + prune = lambda i,d: False, depth = -1, branch_first=True, + visit_once = False, ignore_self=1 ): + """For documentation, see util.Traversable.traverse + Trees are set to visit_once = False to gain more performance in the traversal""" + return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self) - # List protocol - def __getslice__(self, i, j): - return list(self._iter_convert_to_object(self._cache[i:j])) - - def __iter__(self): - return self._iter_convert_to_object(self._cache) - - def __len__(self): - return len(self._cache) - - def __getitem__(self, item): - if isinstance(item, int): - info = self._cache[item] - return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2])) - - if isinstance(item, basestring): - # compatability - return self.__div__(item) - # END index is basestring - - raise TypeError( "Invalid index type: %r" % item ) - - - def __contains__(self, item): - if isinstance(item, IndexObject): - for info in self._cache: - if item.binsha == info[0]: - return True - # END compare sha - # END for each entry - # END handle item is index object - # compatability - - # treat item as repo-relative path - path = self.path - for info in self._cache: - if item == join_path(path, info[2]): - return True - # END for each item - return False - - def __reversed__(self): - return reversed(self._iter_convert_to_object(self._cache)) - - def _serialize(self, stream): - """Serialize this tree into the stream. Please note that we will assume - our tree data to be in a sorted state. If this is not the case, serialization - will not generate a correct tree representation as these are assumed to be sorted - by algorithms""" - tree_to_stream(self._cache, stream.write) - return self - - def _deserialize(self, stream): - self._cache = tree_entries_from_data(stream.read()) - return self - - + # List protocol + def __getslice__(self, i, j): + return list(self._iter_convert_to_object(self._cache[i:j])) + + def __iter__(self): + return self._iter_convert_to_object(self._cache) + + def __len__(self): + return len(self._cache) + + def __getitem__(self, item): + if isinstance(item, int): + info = self._cache[item] + return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2])) + + if isinstance(item, basestring): + # compatability + return self.__div__(item) + # END index is basestring + + raise TypeError( "Invalid index type: %r" % item ) + + + def __contains__(self, item): + if isinstance(item, IndexObject): + for info in self._cache: + if item.binsha == info[0]: + return True + # END compare sha + # END for each entry + # END handle item is index object + # compatability + + # treat item as repo-relative path + path = self.path + for info in self._cache: + if item == join_path(path, info[2]): + return True + # END for each item + return False + + def __reversed__(self): + return reversed(self._iter_convert_to_object(self._cache)) + + def _serialize(self, stream): + """Serialize this tree into the stream. Please note that we will assume + our tree data to be in a sorted state. If this is not the case, serialization + will not generate a correct tree representation as these are assumed to be sorted + by algorithms""" + tree_to_stream(self._cache, stream.write) + return self + + def _deserialize(self, stream): + self._cache = tree_entries_from_data(stream.read()) + return self + + # END tree # finalize map definition diff --git a/git/objects/util.py b/git/objects/util.py index 4c9323b8..2e44c9c0 100644 --- a/git/objects/util.py +++ b/git/objects/util.py @@ -5,9 +5,9 @@ # the BSD License: http://www.opensource.org/licenses/bsd-license.php """Module for general utility functions""" from git.util import ( - IterableList, - Actor - ) + IterableList, + Actor + ) import re from collections import deque as Deque @@ -17,299 +17,299 @@ import time import os __all__ = ('get_object_type_by_name', 'parse_date', 'parse_actor_and_date', - 'ProcessStreamAdapter', 'Traversable', 'altz_to_utctz_str', 'utctz_to_altz', - 'verify_utctz', 'Actor') + 'ProcessStreamAdapter', 'Traversable', 'altz_to_utctz_str', 'utctz_to_altz', + 'verify_utctz', 'Actor') #{ Functions def mode_str_to_int(modestr): - """ - :param modestr: string like 755 or 644 or 100644 - only the last 6 chars will be used - :return: - String identifying a mode compatible to the mode methods ids of the - stat module regarding the rwx permissions for user, group and other, - special flags and file system flags, i.e. whether it is a symlink - for example.""" - mode = 0 - for iteration, char in enumerate(reversed(modestr[-6:])): - mode += int(char) << iteration*3 - # END for each char - return mode + """ + :param modestr: string like 755 or 644 or 100644 - only the last 6 chars will be used + :return: + String identifying a mode compatible to the mode methods ids of the + stat module regarding the rwx permissions for user, group and other, + special flags and file system flags, i.e. whether it is a symlink + for example.""" + mode = 0 + for iteration, char in enumerate(reversed(modestr[-6:])): + mode += int(char) << iteration*3 + # END for each char + return mode def get_object_type_by_name(object_type_name): - """ - :return: type suitable to handle the given object type name. - Use the type to create new instances. - - :param object_type_name: Member of TYPES - - :raise ValueError: In case object_type_name is unknown""" - if object_type_name == "commit": - import commit - return commit.Commit - elif object_type_name == "tag": - import tag - return tag.TagObject - elif object_type_name == "blob": - import blob - return blob.Blob - elif object_type_name == "tree": - import tree - return tree.Tree - else: - raise ValueError("Cannot handle unknown object type: %s" % object_type_name) - + """ + :return: type suitable to handle the given object type name. + Use the type to create new instances. + + :param object_type_name: Member of TYPES + + :raise ValueError: In case object_type_name is unknown""" + if object_type_name == "commit": + import commit + return commit.Commit + elif object_type_name == "tag": + import tag + return tag.TagObject + elif object_type_name == "blob": + import blob + return blob.Blob + elif object_type_name == "tree": + import tree + return tree.Tree + else: + raise ValueError("Cannot handle unknown object type: %s" % object_type_name) + def utctz_to_altz(utctz): - """we convert utctz to the timezone in seconds, it is the format time.altzone - returns. Git stores it as UTC timezone which has the opposite sign as well, - which explains the -1 * ( that was made explicit here ) - :param utctz: git utc timezone string, i.e. +0200""" - return -1 * int(float(utctz)/100*3600) - + """we convert utctz to the timezone in seconds, it is the format time.altzone + returns. Git stores it as UTC timezone which has the opposite sign as well, + which explains the -1 * ( that was made explicit here ) + :param utctz: git utc timezone string, i.e. +0200""" + return -1 * int(float(utctz)/100*3600) + def altz_to_utctz_str(altz): - """As above, but inverses the operation, returning a string that can be used - in commit objects""" - utci = -1 * int((altz / 3600)*100) - utcs = str(abs(utci)) - utcs = "0"*(4-len(utcs)) + utcs - prefix = (utci < 0 and '-') or '+' - return prefix + utcs - + """As above, but inverses the operation, returning a string that can be used + in commit objects""" + utci = -1 * int((altz / 3600)*100) + utcs = str(abs(utci)) + utcs = "0"*(4-len(utcs)) + utcs + prefix = (utci < 0 and '-') or '+' + return prefix + utcs + def verify_utctz(offset): - """:raise ValueError: if offset is incorrect - :return: offset""" - fmt_exc = ValueError("Invalid timezone offset format: %s" % offset) - if len(offset) != 5: - raise fmt_exc - if offset[0] not in "+-": - raise fmt_exc - if offset[1] not in digits or \ - offset[2] not in digits or \ - offset[3] not in digits or \ - offset[4] not in digits: - raise fmt_exc - # END for each char - return offset + """:raise ValueError: if offset is incorrect + :return: offset""" + fmt_exc = ValueError("Invalid timezone offset format: %s" % offset) + if len(offset) != 5: + raise fmt_exc + if offset[0] not in "+-": + raise fmt_exc + if offset[1] not in digits or \ + offset[2] not in digits or \ + offset[3] not in digits or \ + offset[4] not in digits: + raise fmt_exc + # END for each char + return offset def parse_date(string_date): - """ - Parse the given date as one of the following - - * Git internal format: timestamp offset - * RFC 2822: Thu, 07 Apr 2005 22:13:13 +0200. - * ISO 8601 2005-04-07T22:13:13 - The T can be a space as well - - :return: Tuple(int(timestamp), int(offset)), both in seconds since epoch - :raise ValueError: If the format could not be understood - :note: Date can also be YYYY.MM.DD, MM/DD/YYYY and DD.MM.YYYY""" - # git time - try: - if string_date.count(' ') == 1 and string_date.rfind(':') == -1: - timestamp, offset = string_date.split() - timestamp = int(timestamp) - return timestamp, utctz_to_altz(verify_utctz(offset)) - else: - offset = "+0000" # local time by default - if string_date[-5] in '-+': - offset = verify_utctz(string_date[-5:]) - string_date = string_date[:-6] # skip space as well - # END split timezone info - - # now figure out the date and time portion - split time - date_formats = list() - splitter = -1 - if ',' in string_date: - date_formats.append("%a, %d %b %Y") - splitter = string_date.rfind(' ') - else: - # iso plus additional - date_formats.append("%Y-%m-%d") - date_formats.append("%Y.%m.%d") - date_formats.append("%m/%d/%Y") - date_formats.append("%d.%m.%Y") - - splitter = string_date.rfind('T') - if splitter == -1: - splitter = string_date.rfind(' ') - # END handle 'T' and ' ' - # END handle rfc or iso - - assert splitter > -1 - - # split date and time - time_part = string_date[splitter+1:] # skip space - date_part = string_date[:splitter] - - # parse time - tstruct = time.strptime(time_part, "%H:%M:%S") - - for fmt in date_formats: - try: - dtstruct = time.strptime(date_part, fmt) - fstruct = time.struct_time((dtstruct.tm_year, dtstruct.tm_mon, dtstruct.tm_mday, - tstruct.tm_hour, tstruct.tm_min, tstruct.tm_sec, - dtstruct.tm_wday, dtstruct.tm_yday, tstruct.tm_isdst)) - return int(time.mktime(fstruct)), utctz_to_altz(offset) - except ValueError: - continue - # END exception handling - # END for each fmt - - # still here ? fail - raise ValueError("no format matched") - # END handle format - except Exception: - raise ValueError("Unsupported date format: %s" % string_date) - # END handle exceptions + """ + Parse the given date as one of the following + + * Git internal format: timestamp offset + * RFC 2822: Thu, 07 Apr 2005 22:13:13 +0200. + * ISO 8601 2005-04-07T22:13:13 + The T can be a space as well + + :return: Tuple(int(timestamp), int(offset)), both in seconds since epoch + :raise ValueError: If the format could not be understood + :note: Date can also be YYYY.MM.DD, MM/DD/YYYY and DD.MM.YYYY""" + # git time + try: + if string_date.count(' ') == 1 and string_date.rfind(':') == -1: + timestamp, offset = string_date.split() + timestamp = int(timestamp) + return timestamp, utctz_to_altz(verify_utctz(offset)) + else: + offset = "+0000" # local time by default + if string_date[-5] in '-+': + offset = verify_utctz(string_date[-5:]) + string_date = string_date[:-6] # skip space as well + # END split timezone info + + # now figure out the date and time portion - split time + date_formats = list() + splitter = -1 + if ',' in string_date: + date_formats.append("%a, %d %b %Y") + splitter = string_date.rfind(' ') + else: + # iso plus additional + date_formats.append("%Y-%m-%d") + date_formats.append("%Y.%m.%d") + date_formats.append("%m/%d/%Y") + date_formats.append("%d.%m.%Y") + + splitter = string_date.rfind('T') + if splitter == -1: + splitter = string_date.rfind(' ') + # END handle 'T' and ' ' + # END handle rfc or iso + + assert splitter > -1 + + # split date and time + time_part = string_date[splitter+1:] # skip space + date_part = string_date[:splitter] + + # parse time + tstruct = time.strptime(time_part, "%H:%M:%S") + + for fmt in date_formats: + try: + dtstruct = time.strptime(date_part, fmt) + fstruct = time.struct_time((dtstruct.tm_year, dtstruct.tm_mon, dtstruct.tm_mday, + tstruct.tm_hour, tstruct.tm_min, tstruct.tm_sec, + dtstruct.tm_wday, dtstruct.tm_yday, tstruct.tm_isdst)) + return int(time.mktime(fstruct)), utctz_to_altz(offset) + except ValueError: + continue + # END exception handling + # END for each fmt + + # still here ? fail + raise ValueError("no format matched") + # END handle format + except Exception: + raise ValueError("Unsupported date format: %s" % string_date) + # END handle exceptions - + # precompiled regex _re_actor_epoch = re.compile(r'^.+? (.*) (\d+) ([+-]\d+).*$') def parse_actor_and_date(line): - """Parse out the actor (author or committer) info from a line like:: - - author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700 - - :return: [Actor, int_seconds_since_epoch, int_timezone_offset]""" - m = _re_actor_epoch.search(line) - actor, epoch, offset = m.groups() - return (Actor._from_string(actor), int(epoch), utctz_to_altz(offset)) - + """Parse out the actor (author or committer) info from a line like:: + + author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700 + + :return: [Actor, int_seconds_since_epoch, int_timezone_offset]""" + m = _re_actor_epoch.search(line) + actor, epoch, offset = m.groups() + return (Actor._from_string(actor), int(epoch), utctz_to_altz(offset)) + #} END functions #{ Classes - + class ProcessStreamAdapter(object): - """Class wireing all calls to the contained Process instance. - - Use this type to hide the underlying process to provide access only to a specified - stream. The process is usually wrapped into an AutoInterrupt class to kill - it if the instance goes out of scope.""" - __slots__ = ("_proc", "_stream") - def __init__(self, process, stream_name): - self._proc = process - self._stream = getattr(process, stream_name) - - def __getattr__(self, attr): - return getattr(self._stream, attr) - - + """Class wireing all calls to the contained Process instance. + + Use this type to hide the underlying process to provide access only to a specified + stream. The process is usually wrapped into an AutoInterrupt class to kill + it if the instance goes out of scope.""" + __slots__ = ("_proc", "_stream") + def __init__(self, process, stream_name): + self._proc = process + self._stream = getattr(process, stream_name) + + def __getattr__(self, attr): + return getattr(self._stream, attr) + + class Traversable(object): - """Simple interface to perforam depth-first or breadth-first traversals - into one direction. - Subclasses only need to implement one function. - Instances of the Subclass must be hashable""" - __slots__ = tuple() - - @classmethod - def _get_intermediate_items(cls, item): - """ - Returns: - List of items connected to the given item. - Must be implemented in subclass - """ - raise NotImplementedError("To be implemented in subclass") - - def list_traverse(self, *args, **kwargs): - """ - :return: IterableList with the results of the traversal as produced by - traverse()""" - out = IterableList(self._id_attribute_) - out.extend(self.traverse(*args, **kwargs)) - return out - - def traverse( self, predicate = lambda i,d: True, - prune = lambda i,d: False, depth = -1, branch_first=True, - visit_once = True, ignore_self=1, as_edge = False ): - """:return: iterator yieling of items found when traversing self - - :param predicate: f(i,d) returns False if item i at depth d should not be included in the result - - :param prune: - f(i,d) return True if the search should stop at item i at depth d. - Item i will not be returned. - - :param depth: - define at which level the iteration should not go deeper - if -1, there is no limit - if 0, you would effectively only get self, the root of the iteration - i.e. if 1, you would only get the first level of predessessors/successors - - :param branch_first: - if True, items will be returned branch first, otherwise depth first - - :param visit_once: - if True, items will only be returned once, although they might be encountered - several times. Loops are prevented that way. - - :param ignore_self: - if True, self will be ignored and automatically pruned from - the result. Otherwise it will be the first item to be returned. - If as_edge is True, the source of the first edge is None - - :param as_edge: - if True, return a pair of items, first being the source, second the - destinatination, i.e. tuple(src, dest) with the edge spanning from - source to destination""" - visited = set() - stack = Deque() - stack.append( ( 0 ,self, None ) ) # self is always depth level 0 - - def addToStack( stack, item, branch_first, depth ): - lst = self._get_intermediate_items( item ) - if not lst: - return - if branch_first: - stack.extendleft( ( depth , i, item ) for i in lst ) - else: - reviter = ( ( depth , lst[i], item ) for i in range( len( lst )-1,-1,-1) ) - stack.extend( reviter ) - # END addToStack local method - - while stack: - d, item, src = stack.pop() # depth of item, item, item_source - - if visit_once and item in visited: - continue - - if visit_once: - visited.add(item) - - rval = ( as_edge and (src, item) ) or item - if prune( rval, d ): - continue - - skipStartItem = ignore_self and ( item is self ) - if not skipStartItem and predicate( rval, d ): - yield rval - - # only continue to next level if this is appropriate ! - nd = d + 1 - if depth > -1 and nd > depth: - continue - - addToStack( stack, item, branch_first, nd ) - # END for each item on work stack - + """Simple interface to perforam depth-first or breadth-first traversals + into one direction. + Subclasses only need to implement one function. + Instances of the Subclass must be hashable""" + __slots__ = tuple() + + @classmethod + def _get_intermediate_items(cls, item): + """ + Returns: + List of items connected to the given item. + Must be implemented in subclass + """ + raise NotImplementedError("To be implemented in subclass") + + def list_traverse(self, *args, **kwargs): + """ + :return: IterableList with the results of the traversal as produced by + traverse()""" + out = IterableList(self._id_attribute_) + out.extend(self.traverse(*args, **kwargs)) + return out + + def traverse( self, predicate = lambda i,d: True, + prune = lambda i,d: False, depth = -1, branch_first=True, + visit_once = True, ignore_self=1, as_edge = False ): + """:return: iterator yieling of items found when traversing self + + :param predicate: f(i,d) returns False if item i at depth d should not be included in the result + + :param prune: + f(i,d) return True if the search should stop at item i at depth d. + Item i will not be returned. + + :param depth: + define at which level the iteration should not go deeper + if -1, there is no limit + if 0, you would effectively only get self, the root of the iteration + i.e. if 1, you would only get the first level of predessessors/successors + + :param branch_first: + if True, items will be returned branch first, otherwise depth first + + :param visit_once: + if True, items will only be returned once, although they might be encountered + several times. Loops are prevented that way. + + :param ignore_self: + if True, self will be ignored and automatically pruned from + the result. Otherwise it will be the first item to be returned. + If as_edge is True, the source of the first edge is None + + :param as_edge: + if True, return a pair of items, first being the source, second the + destinatination, i.e. tuple(src, dest) with the edge spanning from + source to destination""" + visited = set() + stack = Deque() + stack.append( ( 0 ,self, None ) ) # self is always depth level 0 + + def addToStack( stack, item, branch_first, depth ): + lst = self._get_intermediate_items( item ) + if not lst: + return + if branch_first: + stack.extendleft( ( depth , i, item ) for i in lst ) + else: + reviter = ( ( depth , lst[i], item ) for i in range( len( lst )-1,-1,-1) ) + stack.extend( reviter ) + # END addToStack local method + + while stack: + d, item, src = stack.pop() # depth of item, item, item_source + + if visit_once and item in visited: + continue + + if visit_once: + visited.add(item) + + rval = ( as_edge and (src, item) ) or item + if prune( rval, d ): + continue + + skipStartItem = ignore_self and ( item is self ) + if not skipStartItem and predicate( rval, d ): + yield rval + + # only continue to next level if this is appropriate ! + nd = d + 1 + if depth > -1 and nd > depth: + continue + + addToStack( stack, item, branch_first, nd ) + # END for each item on work stack + class Serializable(object): - """Defines methods to serialize and deserialize objects from and into a data stream""" - __slots__ = tuple() - - def _serialize(self, stream): - """Serialize the data of this object into the given data stream - :note: a serialized object would ``_deserialize`` into the same objet - :param stream: a file-like object - :return: self""" - raise NotImplementedError("To be implemented in subclass") - - def _deserialize(self, stream): - """Deserialize all information regarding this object from the stream - :param stream: a file-like object - :return: self""" - raise NotImplementedError("To be implemented in subclass") + """Defines methods to serialize and deserialize objects from and into a data stream""" + __slots__ = tuple() + + def _serialize(self, stream): + """Serialize the data of this object into the given data stream + :note: a serialized object would ``_deserialize`` into the same objet + :param stream: a file-like object + :return: self""" + raise NotImplementedError("To be implemented in subclass") + + def _deserialize(self, stream): + """Deserialize all information regarding this object from the stream + :param stream: a file-like object + :return: self""" + raise NotImplementedError("To be implemented in subclass") diff --git a/git/refs/__init__.py b/git/refs/__init__.py index fc8ce644..3123b991 100644 --- a/git/refs/__init__.py +++ b/git/refs/__init__.py @@ -14,7 +14,7 @@ del(head) import symbolic for item in (HEAD, Head, RemoteReference, TagReference, Reference, SymbolicReference): - setattr(symbolic, item.__name__, item) + setattr(symbolic, item.__name__, item) del(symbolic) diff --git a/git/refs/head.py b/git/refs/head.py index d8729434..6e4879fe 100644 --- a/git/refs/head.py +++ b/git/refs/head.py @@ -10,237 +10,237 @@ from git.exc import GitCommandError __all__ = ["HEAD", "Head"] - + class HEAD(SymbolicReference): - """Special case of a Symbolic Reference as it represents the repository's - HEAD reference.""" - _HEAD_NAME = 'HEAD' - _ORIG_HEAD_NAME = 'ORIG_HEAD' - __slots__ = tuple() - - def __init__(self, repo, path=_HEAD_NAME): - if path != self._HEAD_NAME: - raise ValueError("HEAD instance must point to %r, got %r" % (self._HEAD_NAME, path)) - super(HEAD, self).__init__(repo, path) - - def orig_head(self): - """ - :return: SymbolicReference pointing at the ORIG_HEAD, which is maintained - to contain the previous value of HEAD""" - return SymbolicReference(self.repo, self._ORIG_HEAD_NAME) - - def reset(self, commit='HEAD', index=True, working_tree = False, - paths=None, **kwargs): - """Reset our HEAD to the given commit optionally synchronizing - the index and working tree. The reference we refer to will be set to - commit as well. - - :param commit: - Commit object, Reference Object or string identifying a revision we - should reset HEAD to. - - :param index: - If True, the index will be set to match the given commit. Otherwise - it will not be touched. - - :param working_tree: - If True, the working tree will be forcefully adjusted to match the given - commit, possibly overwriting uncommitted changes without warning. - If working_tree is True, index must be true as well - - :param paths: - Single path or list of paths relative to the git root directory - that are to be reset. This allows to partially reset individual files. - - :param kwargs: - Additional arguments passed to git-reset. - - :return: self""" - mode = "--soft" - add_arg = None - if index: - mode = "--mixed" - - # it appears, some git-versions declare mixed and paths deprecated - # see http://github.com/Byron/GitPython/issues#issue/2 - if paths: - mode = None - # END special case - # END handle index - - if working_tree: - mode = "--hard" - if not index: - raise ValueError( "Cannot reset the working tree if the index is not reset as well") - - # END working tree handling - - if paths: - add_arg = "--" - # END nicely separate paths from rest - - try: - self.repo.git.reset(mode, commit, add_arg, paths, **kwargs) - except GitCommandError, e: - # git nowadays may use 1 as status to indicate there are still unstaged - # modifications after the reset - if e.status != 1: - raise - # END handle exception - - return self - + """Special case of a Symbolic Reference as it represents the repository's + HEAD reference.""" + _HEAD_NAME = 'HEAD' + _ORIG_HEAD_NAME = 'ORIG_HEAD' + __slots__ = tuple() + + def __init__(self, repo, path=_HEAD_NAME): + if path != self._HEAD_NAME: + raise ValueError("HEAD instance must point to %r, got %r" % (self._HEAD_NAME, path)) + super(HEAD, self).__init__(repo, path) + + def orig_head(self): + """ + :return: SymbolicReference pointing at the ORIG_HEAD, which is maintained + to contain the previous value of HEAD""" + return SymbolicReference(self.repo, self._ORIG_HEAD_NAME) + + def reset(self, commit='HEAD', index=True, working_tree = False, + paths=None, **kwargs): + """Reset our HEAD to the given commit optionally synchronizing + the index and working tree. The reference we refer to will be set to + commit as well. + + :param commit: + Commit object, Reference Object or string identifying a revision we + should reset HEAD to. + + :param index: + If True, the index will be set to match the given commit. Otherwise + it will not be touched. + + :param working_tree: + If True, the working tree will be forcefully adjusted to match the given + commit, possibly overwriting uncommitted changes without warning. + If working_tree is True, index must be true as well + + :param paths: + Single path or list of paths relative to the git root directory + that are to be reset. This allows to partially reset individual files. + + :param kwargs: + Additional arguments passed to git-reset. + + :return: self""" + mode = "--soft" + add_arg = None + if index: + mode = "--mixed" + + # it appears, some git-versions declare mixed and paths deprecated + # see http://github.com/Byron/GitPython/issues#issue/2 + if paths: + mode = None + # END special case + # END handle index + + if working_tree: + mode = "--hard" + if not index: + raise ValueError( "Cannot reset the working tree if the index is not reset as well") + + # END working tree handling + + if paths: + add_arg = "--" + # END nicely separate paths from rest + + try: + self.repo.git.reset(mode, commit, add_arg, paths, **kwargs) + except GitCommandError, e: + # git nowadays may use 1 as status to indicate there are still unstaged + # modifications after the reset + if e.status != 1: + raise + # END handle exception + + return self + class Head(Reference): - """A Head is a named reference to a Commit. Every Head instance contains a name - and a Commit object. + """A Head is a named reference to a Commit. Every Head instance contains a name + and a Commit object. - Examples:: + Examples:: - >>> repo = Repo("/path/to/repo") - >>> head = repo.heads[0] + >>> repo = Repo("/path/to/repo") + >>> head = repo.heads[0] - >>> head.name - 'master' + >>> head.name + 'master' - >>> head.commit - <git.Commit "1c09f116cbc2cb4100fb6935bb162daa4723f455"> + >>> head.commit + <git.Commit "1c09f116cbc2cb4100fb6935bb162daa4723f455"> - >>> head.commit.hexsha - '1c09f116cbc2cb4100fb6935bb162daa4723f455'""" - _common_path_default = "refs/heads" - k_config_remote = "remote" - k_config_remote_ref = "merge" # branch to merge from remote - - @classmethod - def delete(cls, repo, *heads, **kwargs): - """Delete the given heads - :param force: - If True, the heads will be deleted even if they are not yet merged into - the main development stream. - Default False""" - force = kwargs.get("force", False) - flag = "-d" - if force: - flag = "-D" - repo.git.branch(flag, *heads) - - def set_tracking_branch(self, remote_reference): - """ - Configure this branch to track the given remote reference. This will alter - this branch's configuration accordingly. - - :param remote_reference: The remote reference to track or None to untrack - any references - :return: self""" - if remote_reference is not None and not isinstance(remote_reference, RemoteReference): - raise ValueError("Incorrect parameter type: %r" % remote_reference) - # END handle type - - writer = self.config_writer() - if remote_reference is None: - writer.remove_option(self.k_config_remote) - writer.remove_option(self.k_config_remote_ref) - if len(writer.options()) == 0: - writer.remove_section() - # END handle remove section - else: - writer.set_value(self.k_config_remote, remote_reference.remote_name) - writer.set_value(self.k_config_remote_ref, Head.to_full_path(remote_reference.remote_head)) - # END handle ref value - - return self - - - def tracking_branch(self): - """ - :return: The remote_reference we are tracking, or None if we are - not a tracking branch""" - reader = self.config_reader() - if reader.has_option(self.k_config_remote) and reader.has_option(self.k_config_remote_ref): - ref = Head(self.repo, Head.to_full_path(reader.get_value(self.k_config_remote_ref))) - remote_refpath = RemoteReference.to_full_path(join_path(reader.get_value(self.k_config_remote), ref.name)) - return RemoteReference(self.repo, remote_refpath) - # END handle have tracking branch - - # we are not a tracking branch - return None - - def rename(self, new_path, force=False): - """Rename self to a new path - - :param new_path: - Either a simple name or a path, i.e. new_name or features/new_name. - The prefix refs/heads is implied - - :param force: - If True, the rename will succeed even if a head with the target name - already exists. - - :return: self - :note: respects the ref log as git commands are used""" - flag = "-m" - if force: - flag = "-M" - - self.repo.git.branch(flag, self, new_path) - self.path = "%s/%s" % (self._common_path_default, new_path) - return self - - def checkout(self, force=False, **kwargs): - """Checkout this head by setting the HEAD to this reference, by updating the index - to reflect the tree we point to and by updating the working tree to reflect - the latest index. - - The command will fail if changed working tree files would be overwritten. - - :param force: - If True, changes to the index and the working tree will be discarded. - If False, GitCommandError will be raised in that situation. - - :param kwargs: - Additional keyword arguments to be passed to git checkout, i.e. - b='new_branch' to create a new branch at the given spot. - - :return: - The active branch after the checkout operation, usually self unless - a new branch has been created. - - :note: - By default it is only allowed to checkout heads - everything else - will leave the HEAD detached which is allowed and possible, but remains - a special state that some tools might not be able to handle.""" - args = list() - kwargs['f'] = force - if kwargs['f'] == False: - kwargs.pop('f') - - self.repo.git.checkout(self, **kwargs) - return self.repo.active_branch - - #{ Configruation - - def _config_parser(self, read_only): - if read_only: - parser = self.repo.config_reader() - else: - parser = self.repo.config_writer() - # END handle parser instance - - return SectionConstraint(parser, 'branch "%s"' % self.name) - - def config_reader(self): - """ - :return: A configuration parser instance constrained to only read - this instance's values""" - return self._config_parser(read_only=True) - - def config_writer(self): - """ - :return: A configuration writer instance with read-and write acccess - to options of this head""" - return self._config_parser(read_only=False) - - #} END configuration - + >>> head.commit.hexsha + '1c09f116cbc2cb4100fb6935bb162daa4723f455'""" + _common_path_default = "refs/heads" + k_config_remote = "remote" + k_config_remote_ref = "merge" # branch to merge from remote + + @classmethod + def delete(cls, repo, *heads, **kwargs): + """Delete the given heads + :param force: + If True, the heads will be deleted even if they are not yet merged into + the main development stream. + Default False""" + force = kwargs.get("force", False) + flag = "-d" + if force: + flag = "-D" + repo.git.branch(flag, *heads) + + def set_tracking_branch(self, remote_reference): + """ + Configure this branch to track the given remote reference. This will alter + this branch's configuration accordingly. + + :param remote_reference: The remote reference to track or None to untrack + any references + :return: self""" + if remote_reference is not None and not isinstance(remote_reference, RemoteReference): + raise ValueError("Incorrect parameter type: %r" % remote_reference) + # END handle type + + writer = self.config_writer() + if remote_reference is None: + writer.remove_option(self.k_config_remote) + writer.remove_option(self.k_config_remote_ref) + if len(writer.options()) == 0: + writer.remove_section() + # END handle remove section + else: + writer.set_value(self.k_config_remote, remote_reference.remote_name) + writer.set_value(self.k_config_remote_ref, Head.to_full_path(remote_reference.remote_head)) + # END handle ref value + + return self + + + def tracking_branch(self): + """ + :return: The remote_reference we are tracking, or None if we are + not a tracking branch""" + reader = self.config_reader() + if reader.has_option(self.k_config_remote) and reader.has_option(self.k_config_remote_ref): + ref = Head(self.repo, Head.to_full_path(reader.get_value(self.k_config_remote_ref))) + remote_refpath = RemoteReference.to_full_path(join_path(reader.get_value(self.k_config_remote), ref.name)) + return RemoteReference(self.repo, remote_refpath) + # END handle have tracking branch + + # we are not a tracking branch + return None + + def rename(self, new_path, force=False): + """Rename self to a new path + + :param new_path: + Either a simple name or a path, i.e. new_name or features/new_name. + The prefix refs/heads is implied + + :param force: + If True, the rename will succeed even if a head with the target name + already exists. + + :return: self + :note: respects the ref log as git commands are used""" + flag = "-m" + if force: + flag = "-M" + + self.repo.git.branch(flag, self, new_path) + self.path = "%s/%s" % (self._common_path_default, new_path) + return self + + def checkout(self, force=False, **kwargs): + """Checkout this head by setting the HEAD to this reference, by updating the index + to reflect the tree we point to and by updating the working tree to reflect + the latest index. + + The command will fail if changed working tree files would be overwritten. + + :param force: + If True, changes to the index and the working tree will be discarded. + If False, GitCommandError will be raised in that situation. + + :param kwargs: + Additional keyword arguments to be passed to git checkout, i.e. + b='new_branch' to create a new branch at the given spot. + + :return: + The active branch after the checkout operation, usually self unless + a new branch has been created. + + :note: + By default it is only allowed to checkout heads - everything else + will leave the HEAD detached which is allowed and possible, but remains + a special state that some tools might not be able to handle.""" + args = list() + kwargs['f'] = force + if kwargs['f'] == False: + kwargs.pop('f') + + self.repo.git.checkout(self, **kwargs) + return self.repo.active_branch + + #{ Configruation + + def _config_parser(self, read_only): + if read_only: + parser = self.repo.config_reader() + else: + parser = self.repo.config_writer() + # END handle parser instance + + return SectionConstraint(parser, 'branch "%s"' % self.name) + + def config_reader(self): + """ + :return: A configuration parser instance constrained to only read + this instance's values""" + return self._config_parser(read_only=True) + + def config_writer(self): + """ + :return: A configuration writer instance with read-and write acccess + to options of this head""" + return self._config_parser(read_only=False) + + #} END configuration + diff --git a/git/refs/log.py b/git/refs/log.py index 0e977723..9a719ec0 100644 --- a/git/refs/log.py +++ b/git/refs/log.py @@ -1,24 +1,24 @@ from git.util import ( - join_path, - Actor, - LockedFD, - LockFile, - assure_directory_exists, - to_native_path, - ) + join_path, + Actor, + LockedFD, + LockFile, + assure_directory_exists, + to_native_path, + ) from gitdb.util import ( - bin_to_hex, - join, - file_contents_ro_filepath, - ) + bin_to_hex, + join, + file_contents_ro_filepath, + ) from git.objects.util import ( - parse_date, - Serializable, - utctz_to_altz, - altz_to_utctz_str, - ) + parse_date, + Serializable, + utctz_to_altz, + altz_to_utctz_str, + ) import time import os @@ -28,261 +28,261 @@ __all__ = ["RefLog", "RefLogEntry"] class RefLogEntry(tuple): - """Named tuple allowing easy access to the revlog data fields""" - _fmt = "%s %s %s <%s> %i %s\t%s\n" - _re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') - __slots__ = tuple() - - def __repr__(self): - """Representation of ourselves in git reflog format""" - act = self.actor - time = self.time - return self._fmt % (self.oldhexsha, self.newhexsha, act.name, act.email, - time[0], altz_to_utctz_str(time[1]), self.message) - - @property - def oldhexsha(self): - """The hexsha to the commit the ref pointed to before the change""" - return self[0] - - @property - def newhexsha(self): - """The hexsha to the commit the ref now points to, after the change""" - return self[1] - - @property - def actor(self): - """Actor instance, providing access""" - return self[2] - - @property - def time(self): - """time as tuple: - - * [0] = int(time) - * [1] = int(timezone_offset) in time.altzone format """ - return self[3] - - @property - def message(self): - """Message describing the operation that acted on the reference""" - return self[4] - - @classmethod - def new(self, oldhexsha, newhexsha, actor, time, tz_offset, message): - """:return: New instance of a RefLogEntry""" - if not isinstance(actor, Actor): - raise ValueError("Need actor instance, got %s" % actor) - # END check types - return RefLogEntry((oldhexsha, newhexsha, actor, (time, tz_offset), message)) - - @classmethod - def from_line(cls, line): - """:return: New RefLogEntry instance from the given revlog line. - :param line: line without trailing newline - :raise ValueError: If line could not be parsed""" - try: - info, msg = line.split('\t', 2) - except ValueError: - raise ValueError("line is missing tab separator") - #END handle first plit - oldhexsha = info[:40] - newhexsha = info[41:81] - for hexsha in (oldhexsha, newhexsha): - if not cls._re_hexsha_only.match(hexsha): - raise ValueError("Invalid hexsha: %s" % hexsha) - # END if hexsha re doesn't match - #END for each hexsha - - email_end = info.find('>', 82) - if email_end == -1: - raise ValueError("Missing token: >") - #END handle missing end brace - - actor = Actor._from_string(info[82:email_end+1]) - time, tz_offset = parse_date(info[email_end+2:]) - - return RefLogEntry((oldhexsha, newhexsha, actor, (time, tz_offset), msg)) - + """Named tuple allowing easy access to the revlog data fields""" + _fmt = "%s %s %s <%s> %i %s\t%s\n" + _re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') + __slots__ = tuple() + + def __repr__(self): + """Representation of ourselves in git reflog format""" + act = self.actor + time = self.time + return self._fmt % (self.oldhexsha, self.newhexsha, act.name, act.email, + time[0], altz_to_utctz_str(time[1]), self.message) + + @property + def oldhexsha(self): + """The hexsha to the commit the ref pointed to before the change""" + return self[0] + + @property + def newhexsha(self): + """The hexsha to the commit the ref now points to, after the change""" + return self[1] + + @property + def actor(self): + """Actor instance, providing access""" + return self[2] + + @property + def time(self): + """time as tuple: + + * [0] = int(time) + * [1] = int(timezone_offset) in time.altzone format """ + return self[3] + + @property + def message(self): + """Message describing the operation that acted on the reference""" + return self[4] + + @classmethod + def new(self, oldhexsha, newhexsha, actor, time, tz_offset, message): + """:return: New instance of a RefLogEntry""" + if not isinstance(actor, Actor): + raise ValueError("Need actor instance, got %s" % actor) + # END check types + return RefLogEntry((oldhexsha, newhexsha, actor, (time, tz_offset), message)) + + @classmethod + def from_line(cls, line): + """:return: New RefLogEntry instance from the given revlog line. + :param line: line without trailing newline + :raise ValueError: If line could not be parsed""" + try: + info, msg = line.split('\t', 2) + except ValueError: + raise ValueError("line is missing tab separator") + #END handle first plit + oldhexsha = info[:40] + newhexsha = info[41:81] + for hexsha in (oldhexsha, newhexsha): + if not cls._re_hexsha_only.match(hexsha): + raise ValueError("Invalid hexsha: %s" % hexsha) + # END if hexsha re doesn't match + #END for each hexsha + + email_end = info.find('>', 82) + if email_end == -1: + raise ValueError("Missing token: >") + #END handle missing end brace + + actor = Actor._from_string(info[82:email_end+1]) + time, tz_offset = parse_date(info[email_end+2:]) + + return RefLogEntry((oldhexsha, newhexsha, actor, (time, tz_offset), msg)) + class RefLog(list, Serializable): - """A reflog contains reflog entries, each of which defines a certain state - of the head in question. Custom query methods allow to retrieve log entries - by date or by other criteria. - - Reflog entries are orded, the first added entry is first in the list, the last - entry, i.e. the last change of the head or reference, is last in the list.""" - - __slots__ = ('_path', ) - - def __new__(cls, filepath=None): - inst = super(RefLog, cls).__new__(cls) - return inst - - def __init__(self, filepath=None): - """Initialize this instance with an optional filepath, from which we will - initialize our data. The path is also used to write changes back using - the write() method""" - self._path = filepath - if filepath is not None: - self._read_from_file() - # END handle filepath - - def _read_from_file(self): - try: - fmap = file_contents_ro_filepath(self._path, stream=True, allow_mmap=True) - except OSError: - # it is possible and allowed that the file doesn't exist ! - return - #END handle invalid log - - try: - self._deserialize(fmap) - finally: - fmap.close() - #END handle closing of handle - - #{ Interface - - @classmethod - def from_file(cls, filepath): - """ - :return: a new RefLog instance containing all entries from the reflog - at the given filepath - :param filepath: path to reflog - :raise ValueError: If the file could not be read or was corrupted in some way""" - return cls(filepath) - - @classmethod - def path(cls, ref): - """ - :return: string to absolute path at which the reflog of the given ref - instance would be found. The path is not guaranteed to point to a valid - file though. - :param ref: SymbolicReference instance""" - return join(ref.repo.git_dir, "logs", to_native_path(ref.path)) - - @classmethod - def iter_entries(cls, stream): - """ - :return: Iterator yielding RefLogEntry instances, one for each line read - sfrom the given stream. - :param stream: file-like object containing the revlog in its native format - or basestring instance pointing to a file to read""" - new_entry = RefLogEntry.from_line - if isinstance(stream, basestring): - stream = file_contents_ro_filepath(stream) - #END handle stream type - while True: - line = stream.readline() - if not line: - return - yield new_entry(line.strip()) - #END endless loop - - @classmethod - def entry_at(cls, filepath, index): - """:return: RefLogEntry at the given index - :param filepath: full path to the index file from which to read the entry - :param index: python list compatible index, i.e. it may be negative to - specifiy an entry counted from the end of the list - - :raise IndexError: If the entry didn't exist - - .. note:: This method is faster as it only parses the entry at index, skipping - all other lines. Nonetheless, the whole file has to be read if - the index is negative - """ - fp = open(filepath, 'rb') - if index < 0: - return RefLogEntry.from_line(fp.readlines()[index].strip()) - else: - # read until index is reached - for i in xrange(index+1): - line = fp.readline() - if not line: - break - #END abort on eof - #END handle runup - - if i != index or not line: - raise IndexError - #END handle exception - - return RefLogEntry.from_line(line.strip()) - #END handle index - - def to_file(self, filepath): - """Write the contents of the reflog instance to a file at the given filepath. - :param filepath: path to file, parent directories are assumed to exist""" - lfd = LockedFD(filepath) - assure_directory_exists(filepath, is_file=True) - - fp = lfd.open(write=True, stream=True) - try: - self._serialize(fp) - lfd.commit() - except: - # on failure it rolls back automatically, but we make it clear - lfd.rollback() - raise - #END handle change - - @classmethod - def append_entry(cls, config_reader, filepath, oldbinsha, newbinsha, message): - """Append a new log entry to the revlog at filepath. - - :param config_reader: configuration reader of the repository - used to obtain - user information. May be None - :param filepath: full path to the log file - :param oldbinsha: binary sha of the previous commit - :param newbinsha: binary sha of the current commit - :param message: message describing the change to the reference - :param write: If True, the changes will be written right away. Otherwise - the change will not be written - :return: RefLogEntry objects which was appended to the log - :note: As we are append-only, concurrent access is not a problem as we - do not interfere with readers.""" - if len(oldbinsha) != 20 or len(newbinsha) != 20: - raise ValueError("Shas need to be given in binary format") - #END handle sha type - assure_directory_exists(filepath, is_file=True) - entry = RefLogEntry((bin_to_hex(oldbinsha), bin_to_hex(newbinsha), Actor.committer(config_reader), (int(time.time()), time.altzone), message)) - - lf = LockFile(filepath) - lf._obtain_lock_or_raise() - - fd = open(filepath, 'a') - try: - fd.write(repr(entry)) - finally: - fd.close() - lf._release_lock() - #END handle write operation - - return entry - - def write(self): - """Write this instance's data to the file we are originating from - :return: self""" - if self._path is None: - raise ValueError("Instance was not initialized with a path, use to_file(...) instead") - #END assert path - self.to_file(self._path) - return self - - #} END interface - - #{ Serializable Interface - def _serialize(self, stream): - lm1 = len(self) - 1 - write = stream.write - - # write all entries - for e in self: - write(repr(e)) - #END for each entry - - def _deserialize(self, stream): - self.extend(self.iter_entries(stream)) - #} END serializable interface + """A reflog contains reflog entries, each of which defines a certain state + of the head in question. Custom query methods allow to retrieve log entries + by date or by other criteria. + + Reflog entries are orded, the first added entry is first in the list, the last + entry, i.e. the last change of the head or reference, is last in the list.""" + + __slots__ = ('_path', ) + + def __new__(cls, filepath=None): + inst = super(RefLog, cls).__new__(cls) + return inst + + def __init__(self, filepath=None): + """Initialize this instance with an optional filepath, from which we will + initialize our data. The path is also used to write changes back using + the write() method""" + self._path = filepath + if filepath is not None: + self._read_from_file() + # END handle filepath + + def _read_from_file(self): + try: + fmap = file_contents_ro_filepath(self._path, stream=True, allow_mmap=True) + except OSError: + # it is possible and allowed that the file doesn't exist ! + return + #END handle invalid log + + try: + self._deserialize(fmap) + finally: + fmap.close() + #END handle closing of handle + + #{ Interface + + @classmethod + def from_file(cls, filepath): + """ + :return: a new RefLog instance containing all entries from the reflog + at the given filepath + :param filepath: path to reflog + :raise ValueError: If the file could not be read or was corrupted in some way""" + return cls(filepath) + + @classmethod + def path(cls, ref): + """ + :return: string to absolute path at which the reflog of the given ref + instance would be found. The path is not guaranteed to point to a valid + file though. + :param ref: SymbolicReference instance""" + return join(ref.repo.git_dir, "logs", to_native_path(ref.path)) + + @classmethod + def iter_entries(cls, stream): + """ + :return: Iterator yielding RefLogEntry instances, one for each line read + sfrom the given stream. + :param stream: file-like object containing the revlog in its native format + or basestring instance pointing to a file to read""" + new_entry = RefLogEntry.from_line + if isinstance(stream, basestring): + stream = file_contents_ro_filepath(stream) + #END handle stream type + while True: + line = stream.readline() + if not line: + return + yield new_entry(line.strip()) + #END endless loop + + @classmethod + def entry_at(cls, filepath, index): + """:return: RefLogEntry at the given index + :param filepath: full path to the index file from which to read the entry + :param index: python list compatible index, i.e. it may be negative to + specifiy an entry counted from the end of the list + + :raise IndexError: If the entry didn't exist + + .. note:: This method is faster as it only parses the entry at index, skipping + all other lines. Nonetheless, the whole file has to be read if + the index is negative + """ + fp = open(filepath, 'rb') + if index < 0: + return RefLogEntry.from_line(fp.readlines()[index].strip()) + else: + # read until index is reached + for i in xrange(index+1): + line = fp.readline() + if not line: + break + #END abort on eof + #END handle runup + + if i != index or not line: + raise IndexError + #END handle exception + + return RefLogEntry.from_line(line.strip()) + #END handle index + + def to_file(self, filepath): + """Write the contents of the reflog instance to a file at the given filepath. + :param filepath: path to file, parent directories are assumed to exist""" + lfd = LockedFD(filepath) + assure_directory_exists(filepath, is_file=True) + + fp = lfd.open(write=True, stream=True) + try: + self._serialize(fp) + lfd.commit() + except: + # on failure it rolls back automatically, but we make it clear + lfd.rollback() + raise + #END handle change + + @classmethod + def append_entry(cls, config_reader, filepath, oldbinsha, newbinsha, message): + """Append a new log entry to the revlog at filepath. + + :param config_reader: configuration reader of the repository - used to obtain + user information. May be None + :param filepath: full path to the log file + :param oldbinsha: binary sha of the previous commit + :param newbinsha: binary sha of the current commit + :param message: message describing the change to the reference + :param write: If True, the changes will be written right away. Otherwise + the change will not be written + :return: RefLogEntry objects which was appended to the log + :note: As we are append-only, concurrent access is not a problem as we + do not interfere with readers.""" + if len(oldbinsha) != 20 or len(newbinsha) != 20: + raise ValueError("Shas need to be given in binary format") + #END handle sha type + assure_directory_exists(filepath, is_file=True) + entry = RefLogEntry((bin_to_hex(oldbinsha), bin_to_hex(newbinsha), Actor.committer(config_reader), (int(time.time()), time.altzone), message)) + + lf = LockFile(filepath) + lf._obtain_lock_or_raise() + + fd = open(filepath, 'a') + try: + fd.write(repr(entry)) + finally: + fd.close() + lf._release_lock() + #END handle write operation + + return entry + + def write(self): + """Write this instance's data to the file we are originating from + :return: self""" + if self._path is None: + raise ValueError("Instance was not initialized with a path, use to_file(...) instead") + #END assert path + self.to_file(self._path) + return self + + #} END interface + + #{ Serializable Interface + def _serialize(self, stream): + lm1 = len(self) - 1 + write = stream.write + + # write all entries + for e in self: + write(repr(e)) + #END for each entry + + def _deserialize(self, stream): + self.extend(self.iter_entries(stream)) + #} END serializable interface diff --git a/git/refs/reference.py b/git/refs/reference.py index 29d051a6..8cc577a8 100644 --- a/git/refs/reference.py +++ b/git/refs/reference.py @@ -2,126 +2,126 @@ from symbolic import SymbolicReference import os from git.objects import Object from git.util import ( - LazyMixin, - Iterable, - ) + LazyMixin, + Iterable, + ) from gitdb.util import ( - isfile, - hex_to_bin - ) + isfile, + hex_to_bin + ) __all__ = ["Reference"] #{ Utilities def require_remote_ref_path(func): - """A decorator raising a TypeError if we are not a valid remote, based on the path""" - def wrapper(self, *args): - if not self.path.startswith(self._remote_common_path_default + "/"): - raise ValueError("ref path does not point to a remote reference: %s" % path) - return func(self, *args) - #END wrapper - wrapper.__name__ = func.__name__ - return wrapper + """A decorator raising a TypeError if we are not a valid remote, based on the path""" + def wrapper(self, *args): + if not self.path.startswith(self._remote_common_path_default + "/"): + raise ValueError("ref path does not point to a remote reference: %s" % path) + return func(self, *args) + #END wrapper + wrapper.__name__ = func.__name__ + return wrapper #}END utilites class Reference(SymbolicReference, LazyMixin, Iterable): - """Represents a named reference to any object. Subclasses may apply restrictions though, - i.e. Heads can only point to commits.""" - __slots__ = tuple() - _points_to_commits_only = False - _resolve_ref_on_create = True - _common_path_default = "refs" - - def __init__(self, repo, path, check_path = True): - """Initialize this instance - :param repo: Our parent repository - - :param path: - Path relative to the .git/ directory pointing to the ref in question, i.e. - refs/heads/master - :param check_path: if False, you can provide any path. Otherwise the path must start with the - default path prefix of this type.""" - if check_path and not path.startswith(self._common_path_default+'/'): - raise ValueError("Cannot instantiate %r from path %s" % (self.__class__.__name__, path)) - super(Reference, self).__init__(repo, path) - + """Represents a named reference to any object. Subclasses may apply restrictions though, + i.e. Heads can only point to commits.""" + __slots__ = tuple() + _points_to_commits_only = False + _resolve_ref_on_create = True + _common_path_default = "refs" + + def __init__(self, repo, path, check_path = True): + """Initialize this instance + :param repo: Our parent repository + + :param path: + Path relative to the .git/ directory pointing to the ref in question, i.e. + refs/heads/master + :param check_path: if False, you can provide any path. Otherwise the path must start with the + default path prefix of this type.""" + if check_path and not path.startswith(self._common_path_default+'/'): + raise ValueError("Cannot instantiate %r from path %s" % (self.__class__.__name__, path)) + super(Reference, self).__init__(repo, path) + - def __str__(self): - return self.name - - #{ Interface + def __str__(self): + return self.name + + #{ Interface - def set_object(self, object, logmsg = None): - """Special version which checks if the head-log needs an update as well""" - oldbinsha = None - if logmsg is not None: - head = self.repo.head - if not head.is_detached and head.ref == self: - oldbinsha = self.commit.binsha - #END handle commit retrieval - #END handle message is set - - super(Reference, self).set_object(object, logmsg) - - if oldbinsha is not None: - # /* from refs.c in git-source - # * Special hack: If a branch is updated directly and HEAD - # * points to it (may happen on the remote side of a push - # * for example) then logically the HEAD reflog should be - # * updated too. - # * A generic solution implies reverse symref information, - # * but finding all symrefs pointing to the given branch - # * would be rather costly for this rare event (the direct - # * update of a branch) to be worth it. So let's cheat and - # * check with HEAD only which should cover 99% of all usage - # * scenarios (even 100% of the default ones). - # */ - self.repo.head.log_append(oldbinsha, logmsg) - #END check if the head + def set_object(self, object, logmsg = None): + """Special version which checks if the head-log needs an update as well""" + oldbinsha = None + if logmsg is not None: + head = self.repo.head + if not head.is_detached and head.ref == self: + oldbinsha = self.commit.binsha + #END handle commit retrieval + #END handle message is set + + super(Reference, self).set_object(object, logmsg) + + if oldbinsha is not None: + # /* from refs.c in git-source + # * Special hack: If a branch is updated directly and HEAD + # * points to it (may happen on the remote side of a push + # * for example) then logically the HEAD reflog should be + # * updated too. + # * A generic solution implies reverse symref information, + # * but finding all symrefs pointing to the given branch + # * would be rather costly for this rare event (the direct + # * update of a branch) to be worth it. So let's cheat and + # * check with HEAD only which should cover 99% of all usage + # * scenarios (even 100% of the default ones). + # */ + self.repo.head.log_append(oldbinsha, logmsg) + #END check if the head - # NOTE: Don't have to overwrite properties as the will only work without a the log + # NOTE: Don't have to overwrite properties as the will only work without a the log - @property - def name(self): - """:return: (shortest) Name of this reference - it may contain path components""" - # first two path tokens are can be removed as they are - # refs/heads or refs/tags or refs/remotes - tokens = self.path.split('/') - if len(tokens) < 3: - return self.path # could be refs/HEAD - return '/'.join(tokens[2:]) - - @classmethod - def iter_items(cls, repo, common_path = None): - """Equivalent to SymbolicReference.iter_items, but will return non-detached - references as well.""" - return cls._iter_items(repo, common_path) - - #}END interface - - - #{ Remote Interface - - @property - @require_remote_ref_path - def remote_name(self): - """ - :return: - Name of the remote we are a reference of, such as 'origin' for a reference - named 'origin/master'""" - tokens = self.path.split('/') - # /refs/remotes/<remote name>/<branch_name> - return tokens[2] - - @property - @require_remote_ref_path - def remote_head(self): - """:return: Name of the remote head itself, i.e. master. - :note: The returned name is usually not qualified enough to uniquely identify - a branch""" - tokens = self.path.split('/') - return '/'.join(tokens[3:]) - - #} END remote interface + @property + def name(self): + """:return: (shortest) Name of this reference - it may contain path components""" + # first two path tokens are can be removed as they are + # refs/heads or refs/tags or refs/remotes + tokens = self.path.split('/') + if len(tokens) < 3: + return self.path # could be refs/HEAD + return '/'.join(tokens[2:]) + + @classmethod + def iter_items(cls, repo, common_path = None): + """Equivalent to SymbolicReference.iter_items, but will return non-detached + references as well.""" + return cls._iter_items(repo, common_path) + + #}END interface + + + #{ Remote Interface + + @property + @require_remote_ref_path + def remote_name(self): + """ + :return: + Name of the remote we are a reference of, such as 'origin' for a reference + named 'origin/master'""" + tokens = self.path.split('/') + # /refs/remotes/<remote name>/<branch_name> + return tokens[2] + + @property + @require_remote_ref_path + def remote_head(self): + """:return: Name of the remote head itself, i.e. master. + :note: The returned name is usually not qualified enough to uniquely identify + a branch""" + tokens = self.path.split('/') + return '/'.join(tokens[3:]) + + #} END remote interface diff --git a/git/refs/remote.py b/git/refs/remote.py index 1d45d23f..6f075619 100644 --- a/git/refs/remote.py +++ b/git/refs/remote.py @@ -7,39 +7,39 @@ import os __all__ = ["RemoteReference"] - + class RemoteReference(Head): - """Represents a reference pointing to a remote head.""" - _common_path_default = Head._remote_common_path_default - - - @classmethod - def iter_items(cls, repo, common_path = None, remote=None): - """Iterate remote references, and if given, constrain them to the given remote""" - common_path = common_path or cls._common_path_default - if remote is not None: - common_path = join_path(common_path, str(remote)) - # END handle remote constraint - return super(RemoteReference, cls).iter_items(repo, common_path) - - @classmethod - def delete(cls, repo, *refs, **kwargs): - """Delete the given remote references. - :note: - kwargs are given for compatability with the base class method as we - should not narrow the signature.""" - repo.git.branch("-d", "-r", *refs) - # the official deletion method will ignore remote symbolic refs - these - # are generally ignored in the refs/ folder. We don't though - # and delete remainders manually - for ref in refs: - try: - os.remove(join(repo.git_dir, ref.path)) - except OSError: - pass - # END for each ref - - @classmethod - def create(cls, *args, **kwargs): - """Used to disable this method""" - raise TypeError("Cannot explicitly create remote references") + """Represents a reference pointing to a remote head.""" + _common_path_default = Head._remote_common_path_default + + + @classmethod + def iter_items(cls, repo, common_path = None, remote=None): + """Iterate remote references, and if given, constrain them to the given remote""" + common_path = common_path or cls._common_path_default + if remote is not None: + common_path = join_path(common_path, str(remote)) + # END handle remote constraint + return super(RemoteReference, cls).iter_items(repo, common_path) + + @classmethod + def delete(cls, repo, *refs, **kwargs): + """Delete the given remote references. + :note: + kwargs are given for compatability with the base class method as we + should not narrow the signature.""" + repo.git.branch("-d", "-r", *refs) + # the official deletion method will ignore remote symbolic refs - these + # are generally ignored in the refs/ folder. We don't though + # and delete remainders manually + for ref in refs: + try: + os.remove(join(repo.git_dir, ref.path)) + except OSError: + pass + # END for each ref + + @classmethod + def create(cls, *args, **kwargs): + """Used to disable this method""" + raise TypeError("Cannot explicitly create remote references") diff --git a/git/refs/symbolic.py b/git/refs/symbolic.py index 8556a65e..ef21950f 100644 --- a/git/refs/symbolic.py +++ b/git/refs/symbolic.py @@ -1,625 +1,625 @@ import os from git.objects import Object, Commit from git.util import ( - join_path, - join_path_native, - to_native_path_linux, - assure_directory_exists - ) + join_path, + join_path_native, + to_native_path_linux, + assure_directory_exists + ) from gitdb.exc import BadObject from gitdb.util import ( - join, - dirname, - isdir, - exists, - isfile, - rename, - hex_to_bin, - LockedFD - ) + join, + dirname, + isdir, + exists, + isfile, + rename, + hex_to_bin, + LockedFD + ) from log import RefLog __all__ = ["SymbolicReference"] class SymbolicReference(object): - """Represents a special case of a reference such that this reference is symbolic. - It does not point to a specific commit, but to another Head, which itself - specifies a commit. - - A typical example for a symbolic reference is HEAD.""" - __slots__ = ("repo", "path") - _resolve_ref_on_create = False - _points_to_commits_only = True - _common_path_default = "" - _remote_common_path_default = "refs/remotes" - _id_attribute_ = "name" - - def __init__(self, repo, path): - self.repo = repo - self.path = path - - def __str__(self): - return self.path - - def __repr__(self): - return '<git.%s "%s">' % (self.__class__.__name__, self.path) - - def __eq__(self, other): - if hasattr(other, 'path'): - return self.path == other.path - return False - - def __ne__(self, other): - return not ( self == other ) - - def __hash__(self): - return hash(self.path) - - @property - def name(self): - """ - :return: - In case of symbolic references, the shortest assumable name - is the path itself.""" - return self.path - - @property - def abspath(self): - return join_path_native(self.repo.git_dir, self.path) - - @classmethod - def _get_packed_refs_path(cls, repo): - return join(repo.git_dir, 'packed-refs') - - @classmethod - def _iter_packed_refs(cls, repo): - """Returns an iterator yielding pairs of sha1/path pairs for the corresponding refs. - :note: The packed refs file will be kept open as long as we iterate""" - try: - fp = open(cls._get_packed_refs_path(repo), 'rb') - for line in fp: - line = line.strip() - if not line: - continue - if line.startswith('#'): - if line.startswith('# pack-refs with:') and not line.endswith('peeled'): - raise TypeError("PackingType of packed-Refs not understood: %r" % line) - # END abort if we do not understand the packing scheme - continue - # END parse comment - - # skip dereferenced tag object entries - previous line was actual - # tag reference for it - if line[0] == '^': - continue - - yield tuple(line.split(' ', 1)) - # END for each line - except (OSError,IOError): - raise StopIteration - # END no packed-refs file handling - # NOTE: Had try-finally block around here to close the fp, - # but some python version woudn't allow yields within that. - # I believe files are closing themselves on destruction, so it is - # alright. - - @classmethod - def dereference_recursive(cls, repo, ref_path): - """ - :return: hexsha stored in the reference at the given ref_path, recursively dereferencing all - intermediate references as required - :param repo: the repository containing the reference at ref_path""" - while True: - hexsha, ref_path = cls._get_ref_info(repo, ref_path) - if hexsha is not None: - return hexsha - # END recursive dereferencing - - @classmethod - def _get_ref_info(cls, repo, ref_path): - """Return: (sha, target_ref_path) if available, the sha the file at - rela_path points to, or None. target_ref_path is the reference we - point to, or None""" - tokens = None - try: - fp = open(join(repo.git_dir, ref_path), 'r') - value = fp.read().rstrip() - fp.close() - tokens = value.split(" ") - except (OSError,IOError): - # Probably we are just packed, find our entry in the packed refs file - # NOTE: We are not a symbolic ref if we are in a packed file, as these - # are excluded explictly - for sha, path in cls._iter_packed_refs(repo): - if path != ref_path: continue - tokens = (sha, path) - break - # END for each packed ref - # END handle packed refs - if tokens is None: - raise ValueError("Reference at %r does not exist" % ref_path) - - # is it a reference ? - if tokens[0] == 'ref:': - return (None, tokens[1]) - - # its a commit - if repo.re_hexsha_only.match(tokens[0]): - return (tokens[0], None) - - raise ValueError("Failed to parse reference information from %r" % ref_path) - - def _get_object(self): - """ - :return: - The object our ref currently refers to. Refs can be cached, they will - always point to the actual object as it gets re-created on each query""" - # have to be dynamic here as we may be a tag which can point to anything - # Our path will be resolved to the hexsha which will be used accordingly - return Object.new_from_sha(self.repo, hex_to_bin(self.dereference_recursive(self.repo, self.path))) - - def _get_commit(self): - """ - :return: - Commit object we point to, works for detached and non-detached - SymbolicReferences. The symbolic reference will be dereferenced recursively.""" - obj = self._get_object() - if obj.type == 'tag': - obj = obj.object - #END dereference tag - - if obj.type != Commit.type: - raise TypeError("Symbolic Reference pointed to object %r, commit was required" % obj) - #END handle type - return obj - - def set_commit(self, commit, logmsg = None): - """As set_object, but restricts the type of object to be a Commit - - :raise ValueError: If commit is not a Commit object or doesn't point to - a commit - :return: self""" - # check the type - assume the best if it is a base-string - invalid_type = False - if isinstance(commit, Object): - invalid_type = commit.type != Commit.type - elif isinstance(commit, SymbolicReference): - invalid_type = commit.object.type != Commit.type - else: - try: - invalid_type = self.repo.rev_parse(commit).type != Commit.type - except BadObject: - raise ValueError("Invalid object: %s" % commit) - #END handle exception - # END verify type - - if invalid_type: - raise ValueError("Need commit, got %r" % commit) - #END handle raise - - # we leave strings to the rev-parse method below - self.set_object(commit, logmsg) - - return self - - - def set_object(self, object, logmsg = None): - """Set the object we point to, possibly dereference our symbolic reference first. - If the reference does not exist, it will be created - - :param object: a refspec, a SymbolicReference or an Object instance. SymbolicReferences - will be dereferenced beforehand to obtain the object they point to - :param logmsg: If not None, the message will be used in the reflog entry to be - written. Otherwise the reflog is not altered - :note: plain SymbolicReferences may not actually point to objects by convention - :return: self""" - if isinstance(object, SymbolicReference): - object = object.object - #END resolve references - - is_detached = True - try: - is_detached = self.is_detached - except ValueError: - pass - # END handle non-existing ones - - if is_detached: - return self.set_reference(object, logmsg) - - # set the commit on our reference - return self._get_reference().set_object(object, logmsg) - - commit = property(_get_commit, set_commit, doc="Query or set commits directly") - object = property(_get_object, set_object, doc="Return the object our ref currently refers to") - - def _get_reference(self): - """:return: Reference Object we point to - :raise TypeError: If this symbolic reference is detached, hence it doesn't point - to a reference, but to a commit""" - sha, target_ref_path = self._get_ref_info(self.repo, self.path) - if target_ref_path is None: - raise TypeError("%s is a detached symbolic reference as it points to %r" % (self, sha)) - return self.from_path(self.repo, target_ref_path) - - def set_reference(self, ref, logmsg = None): - """Set ourselves to the given ref. It will stay a symbol if the ref is a Reference. - Otherwise an Object, given as Object instance or refspec, is assumed and if valid, - will be set which effectively detaches the refererence if it was a purely - symbolic one. - - :param ref: SymbolicReference instance, Object instance or refspec string - Only if the ref is a SymbolicRef instance, we will point to it. Everthiny - else is dereferenced to obtain the actual object. - :param logmsg: If set to a string, the message will be used in the reflog. - Otherwise, a reflog entry is not written for the changed reference. - The previous commit of the entry will be the commit we point to now. - - See also: log_append() - - :return: self - :note: This symbolic reference will not be dereferenced. For that, see - ``set_object(...)``""" - write_value = None - obj = None - if isinstance(ref, SymbolicReference): - write_value = "ref: %s" % ref.path - elif isinstance(ref, Object): - obj = ref - write_value = ref.hexsha - elif isinstance(ref, basestring): - try: - obj = self.repo.rev_parse(ref+"^{}") # optionally deref tags - write_value = obj.hexsha - except BadObject: - raise ValueError("Could not extract object from %s" % ref) - # END end try string - else: - raise ValueError("Unrecognized Value: %r" % ref) - # END try commit attribute - - # typecheck - if obj is not None and self._points_to_commits_only and obj.type != Commit.type: - raise TypeError("Require commit, got %r" % obj) - #END verify type - - oldbinsha = None - if logmsg is not None: - try: - oldbinsha = self.commit.binsha - except ValueError: - oldbinsha = Commit.NULL_BIN_SHA - #END handle non-existing - #END retrieve old hexsha - - fpath = self.abspath - assure_directory_exists(fpath, is_file=True) - - lfd = LockedFD(fpath) - fd = lfd.open(write=True, stream=True) - fd.write(write_value) - lfd.commit() - - # Adjust the reflog - if logmsg is not None: - self.log_append(oldbinsha, logmsg) - #END handle reflog - - return self - + """Represents a special case of a reference such that this reference is symbolic. + It does not point to a specific commit, but to another Head, which itself + specifies a commit. + + A typical example for a symbolic reference is HEAD.""" + __slots__ = ("repo", "path") + _resolve_ref_on_create = False + _points_to_commits_only = True + _common_path_default = "" + _remote_common_path_default = "refs/remotes" + _id_attribute_ = "name" + + def __init__(self, repo, path): + self.repo = repo + self.path = path + + def __str__(self): + return self.path + + def __repr__(self): + return '<git.%s "%s">' % (self.__class__.__name__, self.path) + + def __eq__(self, other): + if hasattr(other, 'path'): + return self.path == other.path + return False + + def __ne__(self, other): + return not ( self == other ) + + def __hash__(self): + return hash(self.path) + + @property + def name(self): + """ + :return: + In case of symbolic references, the shortest assumable name + is the path itself.""" + return self.path + + @property + def abspath(self): + return join_path_native(self.repo.git_dir, self.path) + + @classmethod + def _get_packed_refs_path(cls, repo): + return join(repo.git_dir, 'packed-refs') + + @classmethod + def _iter_packed_refs(cls, repo): + """Returns an iterator yielding pairs of sha1/path pairs for the corresponding refs. + :note: The packed refs file will be kept open as long as we iterate""" + try: + fp = open(cls._get_packed_refs_path(repo), 'rb') + for line in fp: + line = line.strip() + if not line: + continue + if line.startswith('#'): + if line.startswith('# pack-refs with:') and not line.endswith('peeled'): + raise TypeError("PackingType of packed-Refs not understood: %r" % line) + # END abort if we do not understand the packing scheme + continue + # END parse comment + + # skip dereferenced tag object entries - previous line was actual + # tag reference for it + if line[0] == '^': + continue + + yield tuple(line.split(' ', 1)) + # END for each line + except (OSError,IOError): + raise StopIteration + # END no packed-refs file handling + # NOTE: Had try-finally block around here to close the fp, + # but some python version woudn't allow yields within that. + # I believe files are closing themselves on destruction, so it is + # alright. + + @classmethod + def dereference_recursive(cls, repo, ref_path): + """ + :return: hexsha stored in the reference at the given ref_path, recursively dereferencing all + intermediate references as required + :param repo: the repository containing the reference at ref_path""" + while True: + hexsha, ref_path = cls._get_ref_info(repo, ref_path) + if hexsha is not None: + return hexsha + # END recursive dereferencing + + @classmethod + def _get_ref_info(cls, repo, ref_path): + """Return: (sha, target_ref_path) if available, the sha the file at + rela_path points to, or None. target_ref_path is the reference we + point to, or None""" + tokens = None + try: + fp = open(join(repo.git_dir, ref_path), 'r') + value = fp.read().rstrip() + fp.close() + tokens = value.split(" ") + except (OSError,IOError): + # Probably we are just packed, find our entry in the packed refs file + # NOTE: We are not a symbolic ref if we are in a packed file, as these + # are excluded explictly + for sha, path in cls._iter_packed_refs(repo): + if path != ref_path: continue + tokens = (sha, path) + break + # END for each packed ref + # END handle packed refs + if tokens is None: + raise ValueError("Reference at %r does not exist" % ref_path) + + # is it a reference ? + if tokens[0] == 'ref:': + return (None, tokens[1]) + + # its a commit + if repo.re_hexsha_only.match(tokens[0]): + return (tokens[0], None) + + raise ValueError("Failed to parse reference information from %r" % ref_path) + + def _get_object(self): + """ + :return: + The object our ref currently refers to. Refs can be cached, they will + always point to the actual object as it gets re-created on each query""" + # have to be dynamic here as we may be a tag which can point to anything + # Our path will be resolved to the hexsha which will be used accordingly + return Object.new_from_sha(self.repo, hex_to_bin(self.dereference_recursive(self.repo, self.path))) + + def _get_commit(self): + """ + :return: + Commit object we point to, works for detached and non-detached + SymbolicReferences. The symbolic reference will be dereferenced recursively.""" + obj = self._get_object() + if obj.type == 'tag': + obj = obj.object + #END dereference tag + + if obj.type != Commit.type: + raise TypeError("Symbolic Reference pointed to object %r, commit was required" % obj) + #END handle type + return obj + + def set_commit(self, commit, logmsg = None): + """As set_object, but restricts the type of object to be a Commit + + :raise ValueError: If commit is not a Commit object or doesn't point to + a commit + :return: self""" + # check the type - assume the best if it is a base-string + invalid_type = False + if isinstance(commit, Object): + invalid_type = commit.type != Commit.type + elif isinstance(commit, SymbolicReference): + invalid_type = commit.object.type != Commit.type + else: + try: + invalid_type = self.repo.rev_parse(commit).type != Commit.type + except BadObject: + raise ValueError("Invalid object: %s" % commit) + #END handle exception + # END verify type + + if invalid_type: + raise ValueError("Need commit, got %r" % commit) + #END handle raise + + # we leave strings to the rev-parse method below + self.set_object(commit, logmsg) + + return self + + + def set_object(self, object, logmsg = None): + """Set the object we point to, possibly dereference our symbolic reference first. + If the reference does not exist, it will be created + + :param object: a refspec, a SymbolicReference or an Object instance. SymbolicReferences + will be dereferenced beforehand to obtain the object they point to + :param logmsg: If not None, the message will be used in the reflog entry to be + written. Otherwise the reflog is not altered + :note: plain SymbolicReferences may not actually point to objects by convention + :return: self""" + if isinstance(object, SymbolicReference): + object = object.object + #END resolve references + + is_detached = True + try: + is_detached = self.is_detached + except ValueError: + pass + # END handle non-existing ones + + if is_detached: + return self.set_reference(object, logmsg) + + # set the commit on our reference + return self._get_reference().set_object(object, logmsg) + + commit = property(_get_commit, set_commit, doc="Query or set commits directly") + object = property(_get_object, set_object, doc="Return the object our ref currently refers to") + + def _get_reference(self): + """:return: Reference Object we point to + :raise TypeError: If this symbolic reference is detached, hence it doesn't point + to a reference, but to a commit""" + sha, target_ref_path = self._get_ref_info(self.repo, self.path) + if target_ref_path is None: + raise TypeError("%s is a detached symbolic reference as it points to %r" % (self, sha)) + return self.from_path(self.repo, target_ref_path) + + def set_reference(self, ref, logmsg = None): + """Set ourselves to the given ref. It will stay a symbol if the ref is a Reference. + Otherwise an Object, given as Object instance or refspec, is assumed and if valid, + will be set which effectively detaches the refererence if it was a purely + symbolic one. + + :param ref: SymbolicReference instance, Object instance or refspec string + Only if the ref is a SymbolicRef instance, we will point to it. Everthiny + else is dereferenced to obtain the actual object. + :param logmsg: If set to a string, the message will be used in the reflog. + Otherwise, a reflog entry is not written for the changed reference. + The previous commit of the entry will be the commit we point to now. + + See also: log_append() + + :return: self + :note: This symbolic reference will not be dereferenced. For that, see + ``set_object(...)``""" + write_value = None + obj = None + if isinstance(ref, SymbolicReference): + write_value = "ref: %s" % ref.path + elif isinstance(ref, Object): + obj = ref + write_value = ref.hexsha + elif isinstance(ref, basestring): + try: + obj = self.repo.rev_parse(ref+"^{}") # optionally deref tags + write_value = obj.hexsha + except BadObject: + raise ValueError("Could not extract object from %s" % ref) + # END end try string + else: + raise ValueError("Unrecognized Value: %r" % ref) + # END try commit attribute + + # typecheck + if obj is not None and self._points_to_commits_only and obj.type != Commit.type: + raise TypeError("Require commit, got %r" % obj) + #END verify type + + oldbinsha = None + if logmsg is not None: + try: + oldbinsha = self.commit.binsha + except ValueError: + oldbinsha = Commit.NULL_BIN_SHA + #END handle non-existing + #END retrieve old hexsha + + fpath = self.abspath + assure_directory_exists(fpath, is_file=True) + + lfd = LockedFD(fpath) + fd = lfd.open(write=True, stream=True) + fd.write(write_value) + lfd.commit() + + # Adjust the reflog + if logmsg is not None: + self.log_append(oldbinsha, logmsg) + #END handle reflog + + return self + - # aliased reference - reference = property(_get_reference, set_reference, doc="Returns the Reference we point to") - ref = reference - - def is_valid(self): - """ - :return: - True if the reference is valid, hence it can be read and points to - a valid object or reference.""" - try: - self.object - except (OSError, ValueError): - return False - else: - return True - - @property - def is_detached(self): - """ - :return: - True if we are a detached reference, hence we point to a specific commit - instead to another reference""" - try: - self.ref - return False - except TypeError: - return True - - def log(self): - """ - :return: RefLog for this reference. Its last entry reflects the latest change - applied to this reference - - .. note:: As the log is parsed every time, its recommended to cache it for use - instead of calling this method repeatedly. It should be considered read-only.""" - return RefLog.from_file(RefLog.path(self)) - - def log_append(self, oldbinsha, message, newbinsha=None): - """Append a logentry to the logfile of this ref - - :param oldbinsha: binary sha this ref used to point to - :param message: A message describing the change - :param newbinsha: The sha the ref points to now. If None, our current commit sha - will be used - :return: added RefLogEntry instance""" - return RefLog.append_entry(self.repo.config_reader(), RefLog.path(self), oldbinsha, - (newbinsha is None and self.commit.binsha) or newbinsha, - message) + # aliased reference + reference = property(_get_reference, set_reference, doc="Returns the Reference we point to") + ref = reference + + def is_valid(self): + """ + :return: + True if the reference is valid, hence it can be read and points to + a valid object or reference.""" + try: + self.object + except (OSError, ValueError): + return False + else: + return True + + @property + def is_detached(self): + """ + :return: + True if we are a detached reference, hence we point to a specific commit + instead to another reference""" + try: + self.ref + return False + except TypeError: + return True + + def log(self): + """ + :return: RefLog for this reference. Its last entry reflects the latest change + applied to this reference + + .. note:: As the log is parsed every time, its recommended to cache it for use + instead of calling this method repeatedly. It should be considered read-only.""" + return RefLog.from_file(RefLog.path(self)) + + def log_append(self, oldbinsha, message, newbinsha=None): + """Append a logentry to the logfile of this ref + + :param oldbinsha: binary sha this ref used to point to + :param message: A message describing the change + :param newbinsha: The sha the ref points to now. If None, our current commit sha + will be used + :return: added RefLogEntry instance""" + return RefLog.append_entry(self.repo.config_reader(), RefLog.path(self), oldbinsha, + (newbinsha is None and self.commit.binsha) or newbinsha, + message) - def log_entry(self, index): - """:return: RefLogEntry at the given index - :param index: python list compatible positive or negative index - - .. note:: This method must read part of the reflog during execution, hence - it should be used sparringly, or only if you need just one index. - In that case, it will be faster than the ``log()`` method""" - return RefLog.entry_at(RefLog.path(self), index) + def log_entry(self, index): + """:return: RefLogEntry at the given index + :param index: python list compatible positive or negative index + + .. note:: This method must read part of the reflog during execution, hence + it should be used sparringly, or only if you need just one index. + In that case, it will be faster than the ``log()`` method""" + return RefLog.entry_at(RefLog.path(self), index) - @classmethod - def to_full_path(cls, path): - """ - :return: string with a full repository-relative path which can be used to initialize - a Reference instance, for instance by using ``Reference.from_path``""" - if isinstance(path, SymbolicReference): - path = path.path - full_ref_path = path - if not cls._common_path_default: - return full_ref_path - if not path.startswith(cls._common_path_default+"/"): - full_ref_path = '%s/%s' % (cls._common_path_default, path) - return full_ref_path - - @classmethod - def delete(cls, repo, path): - """Delete the reference at the given path - - :param repo: - Repository to delete the reference from - - :param path: - Short or full path pointing to the reference, i.e. refs/myreference - or just "myreference", hence 'refs/' is implied. - Alternatively the symbolic reference to be deleted""" - full_ref_path = cls.to_full_path(path) - abs_path = join(repo.git_dir, full_ref_path) - if exists(abs_path): - os.remove(abs_path) - else: - # check packed refs - pack_file_path = cls._get_packed_refs_path(repo) - try: - reader = open(pack_file_path, 'rb') - except (OSError,IOError): - pass # it didnt exist at all - else: - new_lines = list() - made_change = False - dropped_last_line = False - for line in reader: - # keep line if it is a comment or if the ref to delete is not - # in the line - # If we deleted the last line and this one is a tag-reference object, - # we drop it as well - if ( line.startswith('#') or full_ref_path not in line ) and \ - ( not dropped_last_line or dropped_last_line and not line.startswith('^') ): - new_lines.append(line) - dropped_last_line = False - continue - # END skip comments and lines without our path - - # drop this line - made_change = True - dropped_last_line = True - # END for each line in packed refs - reader.close() - - # write the new lines - if made_change: - # write-binary is required, otherwise windows will - # open the file in text mode and change LF to CRLF ! - open(pack_file_path, 'wb').writelines(new_lines) - # END write out file - # END open exception handling - # END handle deletion - - # delete the reflog - reflog_path = RefLog.path(cls(repo, full_ref_path)) - if os.path.isfile(reflog_path): - os.remove(reflog_path) - #END remove reflog - - - @classmethod - def _create(cls, repo, path, resolve, reference, force, logmsg=None): - """internal method used to create a new symbolic reference. - If resolve is False, the reference will be taken as is, creating - a proper symbolic reference. Otherwise it will be resolved to the - corresponding object and a detached symbolic reference will be created - instead""" - full_ref_path = cls.to_full_path(path) - abs_ref_path = join(repo.git_dir, full_ref_path) - - # figure out target data - target = reference - if resolve: - target = repo.rev_parse(str(reference)) - - if not force and isfile(abs_ref_path): - target_data = str(target) - if isinstance(target, SymbolicReference): - target_data = target.path - if not resolve: - target_data = "ref: " + target_data - existing_data = open(abs_ref_path, 'rb').read().strip() - if existing_data != target_data: - raise OSError("Reference at %r does already exist, pointing to %r, requested was %r" % (full_ref_path, existing_data, target_data)) - # END no force handling - - ref = cls(repo, full_ref_path) - ref.set_reference(target, logmsg) - return ref - - @classmethod - def create(cls, repo, path, reference='HEAD', force=False, logmsg=None): - """Create a new symbolic reference, hence a reference pointing to another reference. - - :param repo: - Repository to create the reference in - - :param path: - full path at which the new symbolic reference is supposed to be - created at, i.e. "NEW_HEAD" or "symrefs/my_new_symref" - - :param reference: - The reference to which the new symbolic reference should point to. - If it is a commit'ish, the symbolic ref will be detached. - - :param force: - if True, force creation even if a symbolic reference with that name already exists. - Raise OSError otherwise - - :param logmsg: - If not None, the message to append to the reflog. Otherwise no reflog - entry is written. - - :return: Newly created symbolic Reference - - :raise OSError: - If a (Symbolic)Reference with the same name but different contents - already exists. - - :note: This does not alter the current HEAD, index or Working Tree""" - return cls._create(repo, path, cls._resolve_ref_on_create, reference, force, logmsg) - - def rename(self, new_path, force=False): - """Rename self to a new path - - :param new_path: - Either a simple name or a full path, i.e. new_name or features/new_name. - The prefix refs/ is implied for references and will be set as needed. - In case this is a symbolic ref, there is no implied prefix - - :param force: - If True, the rename will succeed even if a head with the target name - already exists. It will be overwritten in that case - - :return: self - :raise OSError: In case a file at path but a different contents already exists """ - new_path = self.to_full_path(new_path) - if self.path == new_path: - return self - - new_abs_path = join(self.repo.git_dir, new_path) - cur_abs_path = join(self.repo.git_dir, self.path) - if isfile(new_abs_path): - if not force: - # if they point to the same file, its not an error - if open(new_abs_path,'rb').read().strip() != open(cur_abs_path,'rb').read().strip(): - raise OSError("File at path %r already exists" % new_abs_path) - # else: we could remove ourselves and use the otherone, but - # but clarity we just continue as usual - # END not force handling - os.remove(new_abs_path) - # END handle existing target file - - dname = dirname(new_abs_path) - if not isdir(dname): - os.makedirs(dname) - # END create directory - - rename(cur_abs_path, new_abs_path) - self.path = new_path - - return self - - @classmethod - def _iter_items(cls, repo, common_path = None): - if common_path is None: - common_path = cls._common_path_default - rela_paths = set() - - # walk loose refs - # Currently we do not follow links - for root, dirs, files in os.walk(join_path_native(repo.git_dir, common_path)): - if 'refs/' not in root: # skip non-refs subfolders - refs_id = [ d for d in dirs if d == 'refs' ] - if refs_id: - dirs[0:] = ['refs'] - # END prune non-refs folders - - for f in files: - abs_path = to_native_path_linux(join_path(root, f)) - rela_paths.add(abs_path.replace(to_native_path_linux(repo.git_dir) + '/', "")) - # END for each file in root directory - # END for each directory to walk - - # read packed refs - for sha, rela_path in cls._iter_packed_refs(repo): - if rela_path.startswith(common_path): - rela_paths.add(rela_path) - # END relative path matches common path - # END packed refs reading - - # return paths in sorted order - for path in sorted(rela_paths): - try: - yield cls.from_path(repo, path) - except ValueError: - continue - # END for each sorted relative refpath - - @classmethod - def iter_items(cls, repo, common_path = None): - """Find all refs in the repository + @classmethod + def to_full_path(cls, path): + """ + :return: string with a full repository-relative path which can be used to initialize + a Reference instance, for instance by using ``Reference.from_path``""" + if isinstance(path, SymbolicReference): + path = path.path + full_ref_path = path + if not cls._common_path_default: + return full_ref_path + if not path.startswith(cls._common_path_default+"/"): + full_ref_path = '%s/%s' % (cls._common_path_default, path) + return full_ref_path + + @classmethod + def delete(cls, repo, path): + """Delete the reference at the given path + + :param repo: + Repository to delete the reference from + + :param path: + Short or full path pointing to the reference, i.e. refs/myreference + or just "myreference", hence 'refs/' is implied. + Alternatively the symbolic reference to be deleted""" + full_ref_path = cls.to_full_path(path) + abs_path = join(repo.git_dir, full_ref_path) + if exists(abs_path): + os.remove(abs_path) + else: + # check packed refs + pack_file_path = cls._get_packed_refs_path(repo) + try: + reader = open(pack_file_path, 'rb') + except (OSError,IOError): + pass # it didnt exist at all + else: + new_lines = list() + made_change = False + dropped_last_line = False + for line in reader: + # keep line if it is a comment or if the ref to delete is not + # in the line + # If we deleted the last line and this one is a tag-reference object, + # we drop it as well + if ( line.startswith('#') or full_ref_path not in line ) and \ + ( not dropped_last_line or dropped_last_line and not line.startswith('^') ): + new_lines.append(line) + dropped_last_line = False + continue + # END skip comments and lines without our path + + # drop this line + made_change = True + dropped_last_line = True + # END for each line in packed refs + reader.close() + + # write the new lines + if made_change: + # write-binary is required, otherwise windows will + # open the file in text mode and change LF to CRLF ! + open(pack_file_path, 'wb').writelines(new_lines) + # END write out file + # END open exception handling + # END handle deletion + + # delete the reflog + reflog_path = RefLog.path(cls(repo, full_ref_path)) + if os.path.isfile(reflog_path): + os.remove(reflog_path) + #END remove reflog + + + @classmethod + def _create(cls, repo, path, resolve, reference, force, logmsg=None): + """internal method used to create a new symbolic reference. + If resolve is False, the reference will be taken as is, creating + a proper symbolic reference. Otherwise it will be resolved to the + corresponding object and a detached symbolic reference will be created + instead""" + full_ref_path = cls.to_full_path(path) + abs_ref_path = join(repo.git_dir, full_ref_path) + + # figure out target data + target = reference + if resolve: + target = repo.rev_parse(str(reference)) + + if not force and isfile(abs_ref_path): + target_data = str(target) + if isinstance(target, SymbolicReference): + target_data = target.path + if not resolve: + target_data = "ref: " + target_data + existing_data = open(abs_ref_path, 'rb').read().strip() + if existing_data != target_data: + raise OSError("Reference at %r does already exist, pointing to %r, requested was %r" % (full_ref_path, existing_data, target_data)) + # END no force handling + + ref = cls(repo, full_ref_path) + ref.set_reference(target, logmsg) + return ref + + @classmethod + def create(cls, repo, path, reference='HEAD', force=False, logmsg=None): + """Create a new symbolic reference, hence a reference pointing to another reference. + + :param repo: + Repository to create the reference in + + :param path: + full path at which the new symbolic reference is supposed to be + created at, i.e. "NEW_HEAD" or "symrefs/my_new_symref" + + :param reference: + The reference to which the new symbolic reference should point to. + If it is a commit'ish, the symbolic ref will be detached. + + :param force: + if True, force creation even if a symbolic reference with that name already exists. + Raise OSError otherwise + + :param logmsg: + If not None, the message to append to the reflog. Otherwise no reflog + entry is written. + + :return: Newly created symbolic Reference + + :raise OSError: + If a (Symbolic)Reference with the same name but different contents + already exists. + + :note: This does not alter the current HEAD, index or Working Tree""" + return cls._create(repo, path, cls._resolve_ref_on_create, reference, force, logmsg) + + def rename(self, new_path, force=False): + """Rename self to a new path + + :param new_path: + Either a simple name or a full path, i.e. new_name or features/new_name. + The prefix refs/ is implied for references and will be set as needed. + In case this is a symbolic ref, there is no implied prefix + + :param force: + If True, the rename will succeed even if a head with the target name + already exists. It will be overwritten in that case + + :return: self + :raise OSError: In case a file at path but a different contents already exists """ + new_path = self.to_full_path(new_path) + if self.path == new_path: + return self + + new_abs_path = join(self.repo.git_dir, new_path) + cur_abs_path = join(self.repo.git_dir, self.path) + if isfile(new_abs_path): + if not force: + # if they point to the same file, its not an error + if open(new_abs_path,'rb').read().strip() != open(cur_abs_path,'rb').read().strip(): + raise OSError("File at path %r already exists" % new_abs_path) + # else: we could remove ourselves and use the otherone, but + # but clarity we just continue as usual + # END not force handling + os.remove(new_abs_path) + # END handle existing target file + + dname = dirname(new_abs_path) + if not isdir(dname): + os.makedirs(dname) + # END create directory + + rename(cur_abs_path, new_abs_path) + self.path = new_path + + return self + + @classmethod + def _iter_items(cls, repo, common_path = None): + if common_path is None: + common_path = cls._common_path_default + rela_paths = set() + + # walk loose refs + # Currently we do not follow links + for root, dirs, files in os.walk(join_path_native(repo.git_dir, common_path)): + if 'refs/' not in root: # skip non-refs subfolders + refs_id = [ d for d in dirs if d == 'refs' ] + if refs_id: + dirs[0:] = ['refs'] + # END prune non-refs folders + + for f in files: + abs_path = to_native_path_linux(join_path(root, f)) + rela_paths.add(abs_path.replace(to_native_path_linux(repo.git_dir) + '/', "")) + # END for each file in root directory + # END for each directory to walk + + # read packed refs + for sha, rela_path in cls._iter_packed_refs(repo): + if rela_path.startswith(common_path): + rela_paths.add(rela_path) + # END relative path matches common path + # END packed refs reading + + # return paths in sorted order + for path in sorted(rela_paths): + try: + yield cls.from_path(repo, path) + except ValueError: + continue + # END for each sorted relative refpath + + @classmethod + def iter_items(cls, repo, common_path = None): + """Find all refs in the repository - :param repo: is the Repo + :param repo: is the Repo - :param common_path: - Optional keyword argument to the path which is to be shared by all - returned Ref objects. - Defaults to class specific portion if None assuring that only - refs suitable for the actual class are returned. + :param common_path: + Optional keyword argument to the path which is to be shared by all + returned Ref objects. + Defaults to class specific portion if None assuring that only + refs suitable for the actual class are returned. - :return: - git.SymbolicReference[], each of them is guaranteed to be a symbolic - ref which is not detached and pointing to a valid ref - - List is lexigraphically sorted - The returned objects represent actual subclasses, such as Head or TagReference""" - return ( r for r in cls._iter_items(repo, common_path) if r.__class__ == SymbolicReference or not r.is_detached ) - - @classmethod - def from_path(cls, repo, path): - """ - :param path: full .git-directory-relative path name to the Reference to instantiate - :note: use to_full_path() if you only have a partial path of a known Reference Type - :return: - Instance of type Reference, Head, or Tag - depending on the given path""" - if not path: - raise ValueError("Cannot create Reference from %r" % path) - - for ref_type in (HEAD, Head, RemoteReference, TagReference, Reference, SymbolicReference): - try: - instance = ref_type(repo, path) - if instance.__class__ == SymbolicReference and instance.is_detached: - raise ValueError("SymbolRef was detached, we drop it") - return instance - except ValueError: - pass - # END exception handling - # END for each type to try - raise ValueError("Could not find reference type suitable to handle path %r" % path) + :return: + git.SymbolicReference[], each of them is guaranteed to be a symbolic + ref which is not detached and pointing to a valid ref + + List is lexigraphically sorted + The returned objects represent actual subclasses, such as Head or TagReference""" + return ( r for r in cls._iter_items(repo, common_path) if r.__class__ == SymbolicReference or not r.is_detached ) + + @classmethod + def from_path(cls, repo, path): + """ + :param path: full .git-directory-relative path name to the Reference to instantiate + :note: use to_full_path() if you only have a partial path of a known Reference Type + :return: + Instance of type Reference, Head, or Tag + depending on the given path""" + if not path: + raise ValueError("Cannot create Reference from %r" % path) + + for ref_type in (HEAD, Head, RemoteReference, TagReference, Reference, SymbolicReference): + try: + instance = ref_type(repo, path) + if instance.__class__ == SymbolicReference and instance.is_detached: + raise ValueError("SymbolRef was detached, we drop it") + return instance + except ValueError: + pass + # END exception handling + # END for each type to try + raise ValueError("Could not find reference type suitable to handle path %r" % path) diff --git a/git/refs/tag.py b/git/refs/tag.py index c09d814d..d78d7750 100644 --- a/git/refs/tag.py +++ b/git/refs/tag.py @@ -3,89 +3,89 @@ from reference import Reference __all__ = ["TagReference", "Tag"] - + class TagReference(Reference): - """Class representing a lightweight tag reference which either points to a commit - ,a tag object or any other object. In the latter case additional information, - like the signature or the tag-creator, is available. - - This tag object will always point to a commit object, but may carray additional - information in a tag object:: - - tagref = TagReference.list_items(repo)[0] - print tagref.commit.message - if tagref.tag is not None: - print tagref.tag.message""" - - __slots__ = tuple() - _common_path_default = "refs/tags" - - @property - def commit(self): - """:return: Commit object the tag ref points to""" - obj = self.object - if obj.type == "commit": - return obj - elif obj.type == "tag": - # it is a tag object which carries the commit as an object - we can point to anything - return obj.object - else: - raise ValueError( "Tag %s points to a Blob or Tree - have never seen that before" % self ) + """Class representing a lightweight tag reference which either points to a commit + ,a tag object or any other object. In the latter case additional information, + like the signature or the tag-creator, is available. + + This tag object will always point to a commit object, but may carray additional + information in a tag object:: + + tagref = TagReference.list_items(repo)[0] + print tagref.commit.message + if tagref.tag is not None: + print tagref.tag.message""" + + __slots__ = tuple() + _common_path_default = "refs/tags" + + @property + def commit(self): + """:return: Commit object the tag ref points to""" + obj = self.object + if obj.type == "commit": + return obj + elif obj.type == "tag": + # it is a tag object which carries the commit as an object - we can point to anything + return obj.object + else: + raise ValueError( "Tag %s points to a Blob or Tree - have never seen that before" % self ) - @property - def tag(self): - """ - :return: Tag object this tag ref points to or None in case - we are a light weight tag""" - obj = self.object - if obj.type == "tag": - return obj - return None - - # make object read-only - # It should be reasonably hard to adjust an existing tag - object = property(Reference._get_object) - - @classmethod - def create(cls, repo, path, ref='HEAD', message=None, force=False, **kwargs): - """Create a new tag reference. - - :param path: - The name of the tag, i.e. 1.0 or releases/1.0. - The prefix refs/tags is implied - - :param ref: - A reference to the object you want to tag. It can be a commit, tree or - blob. - - :param message: - If not None, the message will be used in your tag object. This will also - create an additional tag object that allows to obtain that information, i.e.:: - - tagref.tag.message - - :param force: - If True, to force creation of a tag even though that tag already exists. - - :param kwargs: - Additional keyword arguments to be passed to git-tag - - :return: A new TagReference""" - args = ( path, ref ) - if message: - kwargs['m'] = message - if force: - kwargs['f'] = True - - repo.git.tag(*args, **kwargs) - return TagReference(repo, "%s/%s" % (cls._common_path_default, path)) - - @classmethod - def delete(cls, repo, *tags): - """Delete the given existing tag or tags""" - repo.git.tag("-d", *tags) - - - + @property + def tag(self): + """ + :return: Tag object this tag ref points to or None in case + we are a light weight tag""" + obj = self.object + if obj.type == "tag": + return obj + return None + + # make object read-only + # It should be reasonably hard to adjust an existing tag + object = property(Reference._get_object) + + @classmethod + def create(cls, repo, path, ref='HEAD', message=None, force=False, **kwargs): + """Create a new tag reference. + + :param path: + The name of the tag, i.e. 1.0 or releases/1.0. + The prefix refs/tags is implied + + :param ref: + A reference to the object you want to tag. It can be a commit, tree or + blob. + + :param message: + If not None, the message will be used in your tag object. This will also + create an additional tag object that allows to obtain that information, i.e.:: + + tagref.tag.message + + :param force: + If True, to force creation of a tag even though that tag already exists. + + :param kwargs: + Additional keyword arguments to be passed to git-tag + + :return: A new TagReference""" + args = ( path, ref ) + if message: + kwargs['m'] = message + if force: + kwargs['f'] = True + + repo.git.tag(*args, **kwargs) + return TagReference(repo, "%s/%s" % (cls._common_path_default, path)) + + @classmethod + def delete(cls, repo, *tags): + """Delete the given existing tag or tags""" + repo.git.tag("-d", *tags) + + + # provide an alias Tag = TagReference diff --git a/git/remote.py b/git/remote.py index ed01783c..f89e9d83 100644 --- a/git/remote.py +++ b/git/remote.py @@ -11,18 +11,18 @@ from ConfigParser import NoOptionError from config import SectionConstraint from git.util import ( - LazyMixin, - Iterable, - IterableList, - RemoteProgress - ) + LazyMixin, + Iterable, + IterableList, + RemoteProgress + ) from refs import ( - Reference, - RemoteReference, - SymbolicReference, - TagReference - ) + Reference, + RemoteReference, + SymbolicReference, + TagReference + ) from git.util import join_path from gitdb.util import join @@ -36,621 +36,621 @@ __all__ = ('RemoteProgress', 'PushInfo', 'FetchInfo', 'Remote') #{ Utilities def digest_process_messages(fh, progress): - """Read progress messages from file-like object fh, supplying the respective - progress messages to the progress instance. - - :param fh: File handle to read from - :return: list(line, ...) list of lines without linebreaks that did - not contain progress information""" - line_so_far = '' - dropped_lines = list() - while True: - char = fh.read(1) - if not char: - break + """Read progress messages from file-like object fh, supplying the respective + progress messages to the progress instance. + + :param fh: File handle to read from + :return: list(line, ...) list of lines without linebreaks that did + not contain progress information""" + line_so_far = '' + dropped_lines = list() + while True: + char = fh.read(1) + if not char: + break - if char in ('\r', '\n') and line_so_far: - dropped_lines.extend(progress._parse_progress_line(line_so_far)) - line_so_far = '' - else: - line_so_far += char - # END process parsed line - # END while file is not done reading - return dropped_lines + if char in ('\r', '\n') and line_so_far: + dropped_lines.extend(progress._parse_progress_line(line_so_far)) + line_so_far = '' + else: + line_so_far += char + # END process parsed line + # END while file is not done reading + return dropped_lines def finalize_process(proc): - """Wait for the process (clone, fetch, pull or push) and handle its errors accordingly""" - try: - proc.wait() - except GitCommandError,e: - # if a push has rejected items, the command has non-zero return status - # a return status of 128 indicates a connection error - reraise the previous one - if proc.poll() == 128: - raise - pass - # END exception handling + """Wait for the process (clone, fetch, pull or push) and handle its errors accordingly""" + try: + proc.wait() + except GitCommandError,e: + # if a push has rejected items, the command has non-zero return status + # a return status of 128 indicates a connection error - reraise the previous one + if proc.poll() == 128: + raise + pass + # END exception handling def add_progress(kwargs, git, progress): - """Add the --progress flag to the given kwargs dict if supported by the - git command. If the actual progress in the given progress instance is not - given, we do not request any progress - :return: possibly altered kwargs""" - if progress is not None: - v = git.version_info - if v[0] > 1 or v[1] > 7 or v[2] > 0 or v[3] > 3: - kwargs['progress'] = True - #END handle --progress - #END handle progress - return kwargs + """Add the --progress flag to the given kwargs dict if supported by the + git command. If the actual progress in the given progress instance is not + given, we do not request any progress + :return: possibly altered kwargs""" + if progress is not None: + v = git.version_info + if v[0] > 1 or v[1] > 7 or v[2] > 0 or v[3] > 3: + kwargs['progress'] = True + #END handle --progress + #END handle progress + return kwargs #} END utilities - + class PushInfo(object): - """ - Carries information about the result of a push operation of a single head:: - - info = remote.push()[0] - info.flags # bitflags providing more information about the result - info.local_ref # Reference pointing to the local reference that was pushed - # It is None if the ref was deleted. - info.remote_ref_string # path to the remote reference located on the remote side - info.remote_ref # Remote Reference on the local side corresponding to - # the remote_ref_string. It can be a TagReference as well. - info.old_commit # commit at which the remote_ref was standing before we pushed - # it to local_ref.commit. Will be None if an error was indicated - info.summary # summary line providing human readable english text about the push - """ - __slots__ = ('local_ref', 'remote_ref_string', 'flags', 'old_commit', '_remote', 'summary') - - NEW_TAG, NEW_HEAD, NO_MATCH, REJECTED, REMOTE_REJECTED, REMOTE_FAILURE, DELETED, \ - FORCED_UPDATE, FAST_FORWARD, UP_TO_DATE, ERROR = [ 1 << x for x in range(11) ] + """ + Carries information about the result of a push operation of a single head:: + + info = remote.push()[0] + info.flags # bitflags providing more information about the result + info.local_ref # Reference pointing to the local reference that was pushed + # It is None if the ref was deleted. + info.remote_ref_string # path to the remote reference located on the remote side + info.remote_ref # Remote Reference on the local side corresponding to + # the remote_ref_string. It can be a TagReference as well. + info.old_commit # commit at which the remote_ref was standing before we pushed + # it to local_ref.commit. Will be None if an error was indicated + info.summary # summary line providing human readable english text about the push + """ + __slots__ = ('local_ref', 'remote_ref_string', 'flags', 'old_commit', '_remote', 'summary') + + NEW_TAG, NEW_HEAD, NO_MATCH, REJECTED, REMOTE_REJECTED, REMOTE_FAILURE, DELETED, \ + FORCED_UPDATE, FAST_FORWARD, UP_TO_DATE, ERROR = [ 1 << x for x in range(11) ] - _flag_map = { 'X' : NO_MATCH, '-' : DELETED, '*' : 0, - '+' : FORCED_UPDATE, ' ' : FAST_FORWARD, - '=' : UP_TO_DATE, '!' : ERROR } - - def __init__(self, flags, local_ref, remote_ref_string, remote, old_commit=None, - summary=''): - """ Initialize a new instance """ - self.flags = flags - self.local_ref = local_ref - self.remote_ref_string = remote_ref_string - self._remote = remote - self.old_commit = old_commit - self.summary = summary - - @property - def remote_ref(self): - """ - :return: - Remote Reference or TagReference in the local repository corresponding - to the remote_ref_string kept in this instance.""" - # translate heads to a local remote, tags stay as they are - if self.remote_ref_string.startswith("refs/tags"): - return TagReference(self._remote.repo, self.remote_ref_string) - elif self.remote_ref_string.startswith("refs/heads"): - remote_ref = Reference(self._remote.repo, self.remote_ref_string) - return RemoteReference(self._remote.repo, "refs/remotes/%s/%s" % (str(self._remote), remote_ref.name)) - else: - raise ValueError("Could not handle remote ref: %r" % self.remote_ref_string) - # END - - @classmethod - def _from_line(cls, remote, line): - """Create a new PushInfo instance as parsed from line which is expected to be like - refs/heads/master:refs/heads/master 05d2687..1d0568e""" - control_character, from_to, summary = line.split('\t', 3) - flags = 0 - - # control character handling - try: - flags |= cls._flag_map[ control_character ] - except KeyError: - raise ValueError("Control Character %r unknown as parsed from line %r" % (control_character, line)) - # END handle control character - - # from_to handling - from_ref_string, to_ref_string = from_to.split(':') - if flags & cls.DELETED: - from_ref = None - else: - from_ref = Reference.from_path(remote.repo, from_ref_string) - - # commit handling, could be message or commit info - old_commit = None - if summary.startswith('['): - if "[rejected]" in summary: - flags |= cls.REJECTED - elif "[remote rejected]" in summary: - flags |= cls.REMOTE_REJECTED - elif "[remote failure]" in summary: - flags |= cls.REMOTE_FAILURE - elif "[no match]" in summary: - flags |= cls.ERROR - elif "[new tag]" in summary: - flags |= cls.NEW_TAG - elif "[new branch]" in summary: - flags |= cls.NEW_HEAD - # uptodate encoded in control character - else: - # fast-forward or forced update - was encoded in control character, - # but we parse the old and new commit - split_token = "..." - if control_character == " ": - split_token = ".." - old_sha, new_sha = summary.split(' ')[0].split(split_token) - # have to use constructor here as the sha usually is abbreviated - old_commit = remote.repo.commit(old_sha) - # END message handling - - return PushInfo(flags, from_ref, to_ref_string, remote, old_commit, summary) - + _flag_map = { 'X' : NO_MATCH, '-' : DELETED, '*' : 0, + '+' : FORCED_UPDATE, ' ' : FAST_FORWARD, + '=' : UP_TO_DATE, '!' : ERROR } + + def __init__(self, flags, local_ref, remote_ref_string, remote, old_commit=None, + summary=''): + """ Initialize a new instance """ + self.flags = flags + self.local_ref = local_ref + self.remote_ref_string = remote_ref_string + self._remote = remote + self.old_commit = old_commit + self.summary = summary + + @property + def remote_ref(self): + """ + :return: + Remote Reference or TagReference in the local repository corresponding + to the remote_ref_string kept in this instance.""" + # translate heads to a local remote, tags stay as they are + if self.remote_ref_string.startswith("refs/tags"): + return TagReference(self._remote.repo, self.remote_ref_string) + elif self.remote_ref_string.startswith("refs/heads"): + remote_ref = Reference(self._remote.repo, self.remote_ref_string) + return RemoteReference(self._remote.repo, "refs/remotes/%s/%s" % (str(self._remote), remote_ref.name)) + else: + raise ValueError("Could not handle remote ref: %r" % self.remote_ref_string) + # END + + @classmethod + def _from_line(cls, remote, line): + """Create a new PushInfo instance as parsed from line which is expected to be like + refs/heads/master:refs/heads/master 05d2687..1d0568e""" + control_character, from_to, summary = line.split('\t', 3) + flags = 0 + + # control character handling + try: + flags |= cls._flag_map[ control_character ] + except KeyError: + raise ValueError("Control Character %r unknown as parsed from line %r" % (control_character, line)) + # END handle control character + + # from_to handling + from_ref_string, to_ref_string = from_to.split(':') + if flags & cls.DELETED: + from_ref = None + else: + from_ref = Reference.from_path(remote.repo, from_ref_string) + + # commit handling, could be message or commit info + old_commit = None + if summary.startswith('['): + if "[rejected]" in summary: + flags |= cls.REJECTED + elif "[remote rejected]" in summary: + flags |= cls.REMOTE_REJECTED + elif "[remote failure]" in summary: + flags |= cls.REMOTE_FAILURE + elif "[no match]" in summary: + flags |= cls.ERROR + elif "[new tag]" in summary: + flags |= cls.NEW_TAG + elif "[new branch]" in summary: + flags |= cls.NEW_HEAD + # uptodate encoded in control character + else: + # fast-forward or forced update - was encoded in control character, + # but we parse the old and new commit + split_token = "..." + if control_character == " ": + split_token = ".." + old_sha, new_sha = summary.split(' ')[0].split(split_token) + # have to use constructor here as the sha usually is abbreviated + old_commit = remote.repo.commit(old_sha) + # END message handling + + return PushInfo(flags, from_ref, to_ref_string, remote, old_commit, summary) + class FetchInfo(object): - """ - Carries information about the results of a fetch operation of a single head:: - - info = remote.fetch()[0] - info.ref # Symbolic Reference or RemoteReference to the changed - # remote head or FETCH_HEAD - info.flags # additional flags to be & with enumeration members, - # i.e. info.flags & info.REJECTED - # is 0 if ref is SymbolicReference - info.note # additional notes given by git-fetch intended for the user - info.old_commit # if info.flags & info.FORCED_UPDATE|info.FAST_FORWARD, - # field is set to the previous location of ref, otherwise None - """ - __slots__ = ('ref','old_commit', 'flags', 'note') - - NEW_TAG, NEW_HEAD, HEAD_UPTODATE, TAG_UPDATE, REJECTED, FORCED_UPDATE, \ - FAST_FORWARD, ERROR = [ 1 << x for x in range(8) ] - - # %c %-*s %-*s -> %s (%s) - re_fetch_result = re.compile("^\s*(.) (\[?[\w\s\.]+\]?)\s+(.+) -> ([/\w_\+\.-]+)( \(.*\)?$)?") - - _flag_map = { '!' : ERROR, '+' : FORCED_UPDATE, '-' : TAG_UPDATE, '*' : 0, - '=' : HEAD_UPTODATE, ' ' : FAST_FORWARD } - - def __init__(self, ref, flags, note = '', old_commit = None): - """ - Initialize a new instance - """ - self.ref = ref - self.flags = flags - self.note = note - self.old_commit = old_commit - - def __str__(self): - return self.name - - @property - def name(self): - """:return: Name of our remote ref""" - return self.ref.name - - @property - def commit(self): - """:return: Commit of our remote ref""" - return self.ref.commit - - @classmethod - def _from_line(cls, repo, line, fetch_line): - """Parse information from the given line as returned by git-fetch -v - and return a new FetchInfo object representing this information. - - We can handle a line as follows - "%c %-*s %-*s -> %s%s" - - Where c is either ' ', !, +, -, *, or = - ! means error - + means success forcing update - - means a tag was updated - * means birth of new branch or tag - = means the head was up to date ( and not moved ) - ' ' means a fast-forward - - fetch line is the corresponding line from FETCH_HEAD, like - acb0fa8b94ef421ad60c8507b634759a472cd56c not-for-merge branch '0.1.7RC' of /tmp/tmpya0vairemote_repo""" - match = cls.re_fetch_result.match(line) - if match is None: - raise ValueError("Failed to parse line: %r" % line) - - # parse lines - control_character, operation, local_remote_ref, remote_local_ref, note = match.groups() - try: - new_hex_sha, fetch_operation, fetch_note = fetch_line.split("\t") - ref_type_name, fetch_note = fetch_note.split(' ', 1) - except ValueError: # unpack error - raise ValueError("Failed to parse FETCH__HEAD line: %r" % fetch_line) - - # handle FETCH_HEAD and figure out ref type - # If we do not specify a target branch like master:refs/remotes/origin/master, - # the fetch result is stored in FETCH_HEAD which destroys the rule we usually - # have. In that case we use a symbolic reference which is detached - ref_type = None - if remote_local_ref == "FETCH_HEAD": - ref_type = SymbolicReference - elif ref_type_name in ("remote-tracking", "branch"): - # note: remote-tracking is just the first part of the 'remote-tracking branch' token. - # We don't parse it correctly, but its enough to know what to do, and its new in git 1.7something - ref_type = RemoteReference - elif ref_type_name == "tag": - ref_type = TagReference - else: - raise TypeError("Cannot handle reference type: %r" % ref_type_name) - #END handle ref type - - # create ref instance - if ref_type is SymbolicReference: - remote_local_ref = ref_type(repo, "FETCH_HEAD") - else: - # determine prefix. Tags are usually pulled into refs/tags, they may have subdirectories. - # It is not clear sometimes where exactly the item is, unless we have an absolute path as indicated - # by the 'ref/' prefix. Otherwise even a tag could be in refs/remotes, which is when it will have the - # 'tags/' subdirectory in its path. - # We don't want to test for actual existence, but try to figure everything out analytically. - ref_path = None - remote_local_ref = remote_local_ref.strip() - if remote_local_ref.startswith(Reference._common_path_default + "/"): - # always use actual type if we get absolute paths - # Will always be the case if something is fetched outside of refs/remotes (if its not a tag) - ref_path = remote_local_ref - if ref_type is not TagReference and not remote_local_ref.startswith(RemoteReference._common_path_default + "/"): - ref_type = Reference - #END downgrade remote reference - elif ref_type is TagReference and 'tags/' in remote_local_ref: - # even though its a tag, it is located in refs/remotes - ref_path = join_path(RemoteReference._common_path_default, remote_local_ref) - else: - ref_path = join_path(ref_type._common_path_default, remote_local_ref) - #END obtain refpath - - # even though the path could be within the git conventions, we make - # sure we respect whatever the user wanted, and disabled path checking - remote_local_ref = ref_type(repo, ref_path, check_path=False) - # END create ref instance - - note = ( note and note.strip() ) or '' - - # parse flags from control_character - flags = 0 - try: - flags |= cls._flag_map[control_character] - except KeyError: - raise ValueError("Control character %r unknown as parsed from line %r" % (control_character, line)) - # END control char exception hanlding - - # parse operation string for more info - makes no sense for symbolic refs - old_commit = None - if isinstance(remote_local_ref, Reference): - if 'rejected' in operation: - flags |= cls.REJECTED - if 'new tag' in operation: - flags |= cls.NEW_TAG - if 'new branch' in operation: - flags |= cls.NEW_HEAD - if '...' in operation or '..' in operation: - split_token = '...' - if control_character == ' ': - split_token = split_token[:-1] - old_commit = repo.rev_parse(operation.split(split_token)[0]) - # END handle refspec - # END reference flag handling - - return cls(remote_local_ref, flags, note, old_commit) - + """ + Carries information about the results of a fetch operation of a single head:: + + info = remote.fetch()[0] + info.ref # Symbolic Reference or RemoteReference to the changed + # remote head or FETCH_HEAD + info.flags # additional flags to be & with enumeration members, + # i.e. info.flags & info.REJECTED + # is 0 if ref is SymbolicReference + info.note # additional notes given by git-fetch intended for the user + info.old_commit # if info.flags & info.FORCED_UPDATE|info.FAST_FORWARD, + # field is set to the previous location of ref, otherwise None + """ + __slots__ = ('ref','old_commit', 'flags', 'note') + + NEW_TAG, NEW_HEAD, HEAD_UPTODATE, TAG_UPDATE, REJECTED, FORCED_UPDATE, \ + FAST_FORWARD, ERROR = [ 1 << x for x in range(8) ] + + # %c %-*s %-*s -> %s (%s) + re_fetch_result = re.compile("^\s*(.) (\[?[\w\s\.]+\]?)\s+(.+) -> ([/\w_\+\.-]+)( \(.*\)?$)?") + + _flag_map = { '!' : ERROR, '+' : FORCED_UPDATE, '-' : TAG_UPDATE, '*' : 0, + '=' : HEAD_UPTODATE, ' ' : FAST_FORWARD } + + def __init__(self, ref, flags, note = '', old_commit = None): + """ + Initialize a new instance + """ + self.ref = ref + self.flags = flags + self.note = note + self.old_commit = old_commit + + def __str__(self): + return self.name + + @property + def name(self): + """:return: Name of our remote ref""" + return self.ref.name + + @property + def commit(self): + """:return: Commit of our remote ref""" + return self.ref.commit + + @classmethod + def _from_line(cls, repo, line, fetch_line): + """Parse information from the given line as returned by git-fetch -v + and return a new FetchInfo object representing this information. + + We can handle a line as follows + "%c %-*s %-*s -> %s%s" + + Where c is either ' ', !, +, -, *, or = + ! means error + + means success forcing update + - means a tag was updated + * means birth of new branch or tag + = means the head was up to date ( and not moved ) + ' ' means a fast-forward + + fetch line is the corresponding line from FETCH_HEAD, like + acb0fa8b94ef421ad60c8507b634759a472cd56c not-for-merge branch '0.1.7RC' of /tmp/tmpya0vairemote_repo""" + match = cls.re_fetch_result.match(line) + if match is None: + raise ValueError("Failed to parse line: %r" % line) + + # parse lines + control_character, operation, local_remote_ref, remote_local_ref, note = match.groups() + try: + new_hex_sha, fetch_operation, fetch_note = fetch_line.split("\t") + ref_type_name, fetch_note = fetch_note.split(' ', 1) + except ValueError: # unpack error + raise ValueError("Failed to parse FETCH__HEAD line: %r" % fetch_line) + + # handle FETCH_HEAD and figure out ref type + # If we do not specify a target branch like master:refs/remotes/origin/master, + # the fetch result is stored in FETCH_HEAD which destroys the rule we usually + # have. In that case we use a symbolic reference which is detached + ref_type = None + if remote_local_ref == "FETCH_HEAD": + ref_type = SymbolicReference + elif ref_type_name in ("remote-tracking", "branch"): + # note: remote-tracking is just the first part of the 'remote-tracking branch' token. + # We don't parse it correctly, but its enough to know what to do, and its new in git 1.7something + ref_type = RemoteReference + elif ref_type_name == "tag": + ref_type = TagReference + else: + raise TypeError("Cannot handle reference type: %r" % ref_type_name) + #END handle ref type + + # create ref instance + if ref_type is SymbolicReference: + remote_local_ref = ref_type(repo, "FETCH_HEAD") + else: + # determine prefix. Tags are usually pulled into refs/tags, they may have subdirectories. + # It is not clear sometimes where exactly the item is, unless we have an absolute path as indicated + # by the 'ref/' prefix. Otherwise even a tag could be in refs/remotes, which is when it will have the + # 'tags/' subdirectory in its path. + # We don't want to test for actual existence, but try to figure everything out analytically. + ref_path = None + remote_local_ref = remote_local_ref.strip() + if remote_local_ref.startswith(Reference._common_path_default + "/"): + # always use actual type if we get absolute paths + # Will always be the case if something is fetched outside of refs/remotes (if its not a tag) + ref_path = remote_local_ref + if ref_type is not TagReference and not remote_local_ref.startswith(RemoteReference._common_path_default + "/"): + ref_type = Reference + #END downgrade remote reference + elif ref_type is TagReference and 'tags/' in remote_local_ref: + # even though its a tag, it is located in refs/remotes + ref_path = join_path(RemoteReference._common_path_default, remote_local_ref) + else: + ref_path = join_path(ref_type._common_path_default, remote_local_ref) + #END obtain refpath + + # even though the path could be within the git conventions, we make + # sure we respect whatever the user wanted, and disabled path checking + remote_local_ref = ref_type(repo, ref_path, check_path=False) + # END create ref instance + + note = ( note and note.strip() ) or '' + + # parse flags from control_character + flags = 0 + try: + flags |= cls._flag_map[control_character] + except KeyError: + raise ValueError("Control character %r unknown as parsed from line %r" % (control_character, line)) + # END control char exception hanlding + + # parse operation string for more info - makes no sense for symbolic refs + old_commit = None + if isinstance(remote_local_ref, Reference): + if 'rejected' in operation: + flags |= cls.REJECTED + if 'new tag' in operation: + flags |= cls.NEW_TAG + if 'new branch' in operation: + flags |= cls.NEW_HEAD + if '...' in operation or '..' in operation: + split_token = '...' + if control_character == ' ': + split_token = split_token[:-1] + old_commit = repo.rev_parse(operation.split(split_token)[0]) + # END handle refspec + # END reference flag handling + + return cls(remote_local_ref, flags, note, old_commit) + class Remote(LazyMixin, Iterable): - """Provides easy read and write access to a git remote. - - Everything not part of this interface is considered an option for the current - remote, allowing constructs like remote.pushurl to query the pushurl. - - NOTE: When querying configuration, the configuration accessor will be cached - to speed up subsequent accesses.""" - - __slots__ = ( "repo", "name", "_config_reader" ) - _id_attribute_ = "name" - - def __init__(self, repo, name): - """Initialize a remote instance - - :param repo: The repository we are a remote of - :param name: the name of the remote, i.e. 'origin'""" - self.repo = repo - self.name = name - - if os.name == 'nt': - # some oddity: on windows, python 2.5, it for some reason does not realize - # that it has the config_writer property, but instead calls __getattr__ - # which will not yield the expected results. 'pinging' the members - # with a dir call creates the config_writer property that we require - # ... bugs like these make me wonder wheter python really wants to be used - # for production. It doesn't happen on linux though. - dir(self) - # END windows special handling - - def __getattr__(self, attr): - """Allows to call this instance like - remote.special( *args, **kwargs) to call git-remote special self.name""" - if attr == "_config_reader": - return super(Remote, self).__getattr__(attr) - - # sometimes, probably due to a bug in python itself, we are being called - # even though a slot of the same name exists - try: - return self._config_reader.get(attr) - except NoOptionError: - return super(Remote, self).__getattr__(attr) - # END handle exception - - def _config_section_name(self): - return 'remote "%s"' % self.name - - def _set_cache_(self, attr): - if attr == "_config_reader": - self._config_reader = SectionConstraint(self.repo.config_reader(), self._config_section_name()) - else: - super(Remote, self)._set_cache_(attr) - - - def __str__(self): - return self.name - - def __repr__(self): - return '<git.%s "%s">' % (self.__class__.__name__, self.name) - - def __eq__(self, other): - return self.name == other.name - - def __ne__(self, other): - return not ( self == other ) - - def __hash__(self): - return hash(self.name) - - @classmethod - def iter_items(cls, repo): - """:return: Iterator yielding Remote objects of the given repository""" - for section in repo.config_reader("repository").sections(): - if not section.startswith('remote'): - continue - lbound = section.find('"') - rbound = section.rfind('"') - if lbound == -1 or rbound == -1: - raise ValueError("Remote-Section has invalid format: %r" % section) - yield Remote(repo, section[lbound+1:rbound]) - # END for each configuration section - - @property - def refs(self): - """ - :return: - IterableList of RemoteReference objects. It is prefixed, allowing - you to omit the remote path portion, i.e.:: - remote.refs.master # yields RemoteReference('/refs/remotes/origin/master')""" - out_refs = IterableList(RemoteReference._id_attribute_, "%s/" % self.name) - out_refs.extend(RemoteReference.list_items(self.repo, remote=self.name)) - assert out_refs, "Remote %s did not have any references" % self.name - return out_refs - - @property - def stale_refs(self): - """ - :return: - IterableList RemoteReference objects that do not have a corresponding - head in the remote reference anymore as they have been deleted on the - remote side, but are still available locally. - - The IterableList is prefixed, hence the 'origin' must be omitted. See - 'refs' property for an example.""" - out_refs = IterableList(RemoteReference._id_attribute_, "%s/" % self.name) - for line in self.repo.git.remote("prune", "--dry-run", self).splitlines()[2:]: - # expecting - # * [would prune] origin/new_branch - token = " * [would prune] " - if not line.startswith(token): - raise ValueError("Could not parse git-remote prune result: %r" % line) - fqhn = "%s/%s" % (RemoteReference._common_path_default,line.replace(token, "")) - out_refs.append(RemoteReference(self.repo, fqhn)) - # END for each line - return out_refs - - @classmethod - def create(cls, repo, name, url, **kwargs): - """Create a new remote to the given repository - :param repo: Repository instance that is to receive the new remote - :param name: Desired name of the remote - :param url: URL which corresponds to the remote's name - :param kwargs: - Additional arguments to be passed to the git-remote add command - - :return: New Remote instance - - :raise GitCommandError: in case an origin with that name already exists""" - repo.git.remote( "add", name, url, **kwargs ) - return cls(repo, name) - - # add is an alias - add = create - - @classmethod - def remove(cls, repo, name ): - """Remove the remote with the given name""" - repo.git.remote("rm", name) - - # alias - rm = remove - - def rename(self, new_name): - """Rename self to the given new_name - :return: self """ - if self.name == new_name: - return self - - self.repo.git.remote("rename", self.name, new_name) - self.name = new_name - try: - del(self._config_reader) # it contains cached values, section names are different now - except AttributeError: - pass - #END handle exception - return self - - def update(self, **kwargs): - """Fetch all changes for this remote, including new branches which will - be forced in ( in case your local remote branch is not part the new remote branches - ancestry anymore ). - - :param kwargs: - Additional arguments passed to git-remote update - - :return: self """ - self.repo.git.remote("update", self.name) - return self - - def _get_fetch_info_from_stderr(self, proc, progress): - # skip first line as it is some remote info we are not interested in - output = IterableList('name') - - - # lines which are no progress are fetch info lines - # this also waits for the command to finish - # Skip some progress lines that don't provide relevant information - fetch_info_lines = list() - for line in digest_process_messages(proc.stderr, progress): - if line.startswith('From') or line.startswith('remote: Total') or line.startswith('POST'): - continue - elif line.startswith('warning:'): - print >> sys.stderr, line - continue - elif line.startswith('fatal:'): - raise GitCommandError(("Error when fetching: %s" % line,), 2) - # END handle special messages - fetch_info_lines.append(line) - # END for each line - - # read head information - fp = open(join(self.repo.git_dir, 'FETCH_HEAD'),'r') - fetch_head_info = fp.readlines() - fp.close() - - assert len(fetch_info_lines) == len(fetch_head_info), "len(%s) != len(%s)" % (fetch_head_info, fetch_info_lines) - - output.extend(FetchInfo._from_line(self.repo, err_line, fetch_line) - for err_line,fetch_line in zip(fetch_info_lines, fetch_head_info)) - - finalize_process(proc) - return output - - def _get_push_info(self, proc, progress): - # read progress information from stderr - # we hope stdout can hold all the data, it should ... - # read the lines manually as it will use carriage returns between the messages - # to override the previous one. This is why we read the bytes manually - digest_process_messages(proc.stderr, progress) - - output = IterableList('name') - for line in proc.stdout.readlines(): - try: - output.append(PushInfo._from_line(self, line)) - except ValueError: - # if an error happens, additional info is given which we cannot parse - pass - # END exception handling - # END for each line - - finalize_process(proc) - return output - - - def fetch(self, refspec=None, progress=None, **kwargs): - """Fetch the latest changes for this remote - - :param refspec: - A "refspec" is used by fetch and push to describe the mapping - between remote ref and local ref. They are combined with a colon in - the format <src>:<dst>, preceded by an optional plus sign, +. - For example: git fetch $URL refs/heads/master:refs/heads/origin means - "grab the master branch head from the $URL and store it as my origin - branch head". And git push $URL refs/heads/master:refs/heads/to-upstream - means "publish my master branch head as to-upstream branch at $URL". - See also git-push(1). - - Taken from the git manual - :param progress: See 'push' method - :param kwargs: Additional arguments to be passed to git-fetch - :return: - IterableList(FetchInfo, ...) list of FetchInfo instances providing detailed - information about the fetch results - - :note: - As fetch does not provide progress information to non-ttys, we cannot make - it available here unfortunately as in the 'push' method.""" - kwargs = add_progress(kwargs, self.repo.git, progress) - proc = self.repo.git.fetch(self, refspec, with_extended_output=True, as_process=True, v=True, **kwargs) - return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress()) - - def pull(self, refspec=None, progress=None, **kwargs): - """Pull changes from the given branch, being the same as a fetch followed - by a merge of branch with your local branch. - - :param refspec: see 'fetch' method - :param progress: see 'push' method - :param kwargs: Additional arguments to be passed to git-pull - :return: Please see 'fetch' method """ - kwargs = add_progress(kwargs, self.repo.git, progress) - proc = self.repo.git.pull(self, refspec, with_extended_output=True, as_process=True, v=True, **kwargs) - return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress()) - - def push(self, refspec=None, progress=None, **kwargs): - """Push changes from source branch in refspec to target branch in refspec. - - :param refspec: see 'fetch' method - :param progress: - Instance of type RemoteProgress allowing the caller to receive - progress information until the method returns. - If None, progress information will be discarded - - :param kwargs: Additional arguments to be passed to git-push - :return: - IterableList(PushInfo, ...) iterable list of PushInfo instances, each - one informing about an individual head which had been updated on the remote - side. - If the push contains rejected heads, these will have the PushInfo.ERROR bit set - in their flags. - If the operation fails completely, the length of the returned IterableList will - be null.""" - kwargs = add_progress(kwargs, self.repo.git, progress) - proc = self.repo.git.push(self, refspec, porcelain=True, as_process=True, **kwargs) - return self._get_push_info(proc, progress or RemoteProgress()) - - @property - def config_reader(self): - """ - :return: - GitConfigParser compatible object able to read options for only our remote. - Hence you may simple type config.get("pushurl") to obtain the information""" - return self._config_reader - - @property - def config_writer(self): - """ - :return: GitConfigParser compatible object able to write options for this remote. - :note: - You can only own one writer at a time - delete it to release the - configuration file and make it useable by others. - - To assure consistent results, you should only query options through the - writer. Once you are done writing, you are free to use the config reader - once again.""" - writer = self.repo.config_writer() - - # clear our cache to assure we re-read the possibly changed configuration - try: - del(self._config_reader) - except AttributeError: - pass - #END handle exception - return SectionConstraint(writer, self._config_section_name()) + """Provides easy read and write access to a git remote. + + Everything not part of this interface is considered an option for the current + remote, allowing constructs like remote.pushurl to query the pushurl. + + NOTE: When querying configuration, the configuration accessor will be cached + to speed up subsequent accesses.""" + + __slots__ = ( "repo", "name", "_config_reader" ) + _id_attribute_ = "name" + + def __init__(self, repo, name): + """Initialize a remote instance + + :param repo: The repository we are a remote of + :param name: the name of the remote, i.e. 'origin'""" + self.repo = repo + self.name = name + + if os.name == 'nt': + # some oddity: on windows, python 2.5, it for some reason does not realize + # that it has the config_writer property, but instead calls __getattr__ + # which will not yield the expected results. 'pinging' the members + # with a dir call creates the config_writer property that we require + # ... bugs like these make me wonder wheter python really wants to be used + # for production. It doesn't happen on linux though. + dir(self) + # END windows special handling + + def __getattr__(self, attr): + """Allows to call this instance like + remote.special( *args, **kwargs) to call git-remote special self.name""" + if attr == "_config_reader": + return super(Remote, self).__getattr__(attr) + + # sometimes, probably due to a bug in python itself, we are being called + # even though a slot of the same name exists + try: + return self._config_reader.get(attr) + except NoOptionError: + return super(Remote, self).__getattr__(attr) + # END handle exception + + def _config_section_name(self): + return 'remote "%s"' % self.name + + def _set_cache_(self, attr): + if attr == "_config_reader": + self._config_reader = SectionConstraint(self.repo.config_reader(), self._config_section_name()) + else: + super(Remote, self)._set_cache_(attr) + + + def __str__(self): + return self.name + + def __repr__(self): + return '<git.%s "%s">' % (self.__class__.__name__, self.name) + + def __eq__(self, other): + return self.name == other.name + + def __ne__(self, other): + return not ( self == other ) + + def __hash__(self): + return hash(self.name) + + @classmethod + def iter_items(cls, repo): + """:return: Iterator yielding Remote objects of the given repository""" + for section in repo.config_reader("repository").sections(): + if not section.startswith('remote'): + continue + lbound = section.find('"') + rbound = section.rfind('"') + if lbound == -1 or rbound == -1: + raise ValueError("Remote-Section has invalid format: %r" % section) + yield Remote(repo, section[lbound+1:rbound]) + # END for each configuration section + + @property + def refs(self): + """ + :return: + IterableList of RemoteReference objects. It is prefixed, allowing + you to omit the remote path portion, i.e.:: + remote.refs.master # yields RemoteReference('/refs/remotes/origin/master')""" + out_refs = IterableList(RemoteReference._id_attribute_, "%s/" % self.name) + out_refs.extend(RemoteReference.list_items(self.repo, remote=self.name)) + assert out_refs, "Remote %s did not have any references" % self.name + return out_refs + + @property + def stale_refs(self): + """ + :return: + IterableList RemoteReference objects that do not have a corresponding + head in the remote reference anymore as they have been deleted on the + remote side, but are still available locally. + + The IterableList is prefixed, hence the 'origin' must be omitted. See + 'refs' property for an example.""" + out_refs = IterableList(RemoteReference._id_attribute_, "%s/" % self.name) + for line in self.repo.git.remote("prune", "--dry-run", self).splitlines()[2:]: + # expecting + # * [would prune] origin/new_branch + token = " * [would prune] " + if not line.startswith(token): + raise ValueError("Could not parse git-remote prune result: %r" % line) + fqhn = "%s/%s" % (RemoteReference._common_path_default,line.replace(token, "")) + out_refs.append(RemoteReference(self.repo, fqhn)) + # END for each line + return out_refs + + @classmethod + def create(cls, repo, name, url, **kwargs): + """Create a new remote to the given repository + :param repo: Repository instance that is to receive the new remote + :param name: Desired name of the remote + :param url: URL which corresponds to the remote's name + :param kwargs: + Additional arguments to be passed to the git-remote add command + + :return: New Remote instance + + :raise GitCommandError: in case an origin with that name already exists""" + repo.git.remote( "add", name, url, **kwargs ) + return cls(repo, name) + + # add is an alias + add = create + + @classmethod + def remove(cls, repo, name ): + """Remove the remote with the given name""" + repo.git.remote("rm", name) + + # alias + rm = remove + + def rename(self, new_name): + """Rename self to the given new_name + :return: self """ + if self.name == new_name: + return self + + self.repo.git.remote("rename", self.name, new_name) + self.name = new_name + try: + del(self._config_reader) # it contains cached values, section names are different now + except AttributeError: + pass + #END handle exception + return self + + def update(self, **kwargs): + """Fetch all changes for this remote, including new branches which will + be forced in ( in case your local remote branch is not part the new remote branches + ancestry anymore ). + + :param kwargs: + Additional arguments passed to git-remote update + + :return: self """ + self.repo.git.remote("update", self.name) + return self + + def _get_fetch_info_from_stderr(self, proc, progress): + # skip first line as it is some remote info we are not interested in + output = IterableList('name') + + + # lines which are no progress are fetch info lines + # this also waits for the command to finish + # Skip some progress lines that don't provide relevant information + fetch_info_lines = list() + for line in digest_process_messages(proc.stderr, progress): + if line.startswith('From') or line.startswith('remote: Total') or line.startswith('POST'): + continue + elif line.startswith('warning:'): + print >> sys.stderr, line + continue + elif line.startswith('fatal:'): + raise GitCommandError(("Error when fetching: %s" % line,), 2) + # END handle special messages + fetch_info_lines.append(line) + # END for each line + + # read head information + fp = open(join(self.repo.git_dir, 'FETCH_HEAD'),'r') + fetch_head_info = fp.readlines() + fp.close() + + assert len(fetch_info_lines) == len(fetch_head_info), "len(%s) != len(%s)" % (fetch_head_info, fetch_info_lines) + + output.extend(FetchInfo._from_line(self.repo, err_line, fetch_line) + for err_line,fetch_line in zip(fetch_info_lines, fetch_head_info)) + + finalize_process(proc) + return output + + def _get_push_info(self, proc, progress): + # read progress information from stderr + # we hope stdout can hold all the data, it should ... + # read the lines manually as it will use carriage returns between the messages + # to override the previous one. This is why we read the bytes manually + digest_process_messages(proc.stderr, progress) + + output = IterableList('name') + for line in proc.stdout.readlines(): + try: + output.append(PushInfo._from_line(self, line)) + except ValueError: + # if an error happens, additional info is given which we cannot parse + pass + # END exception handling + # END for each line + + finalize_process(proc) + return output + + + def fetch(self, refspec=None, progress=None, **kwargs): + """Fetch the latest changes for this remote + + :param refspec: + A "refspec" is used by fetch and push to describe the mapping + between remote ref and local ref. They are combined with a colon in + the format <src>:<dst>, preceded by an optional plus sign, +. + For example: git fetch $URL refs/heads/master:refs/heads/origin means + "grab the master branch head from the $URL and store it as my origin + branch head". And git push $URL refs/heads/master:refs/heads/to-upstream + means "publish my master branch head as to-upstream branch at $URL". + See also git-push(1). + + Taken from the git manual + :param progress: See 'push' method + :param kwargs: Additional arguments to be passed to git-fetch + :return: + IterableList(FetchInfo, ...) list of FetchInfo instances providing detailed + information about the fetch results + + :note: + As fetch does not provide progress information to non-ttys, we cannot make + it available here unfortunately as in the 'push' method.""" + kwargs = add_progress(kwargs, self.repo.git, progress) + proc = self.repo.git.fetch(self, refspec, with_extended_output=True, as_process=True, v=True, **kwargs) + return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress()) + + def pull(self, refspec=None, progress=None, **kwargs): + """Pull changes from the given branch, being the same as a fetch followed + by a merge of branch with your local branch. + + :param refspec: see 'fetch' method + :param progress: see 'push' method + :param kwargs: Additional arguments to be passed to git-pull + :return: Please see 'fetch' method """ + kwargs = add_progress(kwargs, self.repo.git, progress) + proc = self.repo.git.pull(self, refspec, with_extended_output=True, as_process=True, v=True, **kwargs) + return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress()) + + def push(self, refspec=None, progress=None, **kwargs): + """Push changes from source branch in refspec to target branch in refspec. + + :param refspec: see 'fetch' method + :param progress: + Instance of type RemoteProgress allowing the caller to receive + progress information until the method returns. + If None, progress information will be discarded + + :param kwargs: Additional arguments to be passed to git-push + :return: + IterableList(PushInfo, ...) iterable list of PushInfo instances, each + one informing about an individual head which had been updated on the remote + side. + If the push contains rejected heads, these will have the PushInfo.ERROR bit set + in their flags. + If the operation fails completely, the length of the returned IterableList will + be null.""" + kwargs = add_progress(kwargs, self.repo.git, progress) + proc = self.repo.git.push(self, refspec, porcelain=True, as_process=True, **kwargs) + return self._get_push_info(proc, progress or RemoteProgress()) + + @property + def config_reader(self): + """ + :return: + GitConfigParser compatible object able to read options for only our remote. + Hence you may simple type config.get("pushurl") to obtain the information""" + return self._config_reader + + @property + def config_writer(self): + """ + :return: GitConfigParser compatible object able to write options for this remote. + :note: + You can only own one writer at a time - delete it to release the + configuration file and make it useable by others. + + To assure consistent results, you should only query options through the + writer. Once you are done writing, you are free to use the config reader + once again.""" + writer = self.repo.config_writer() + + # clear our cache to assure we re-read the possibly changed configuration + try: + del(self._config_reader) + except AttributeError: + pass + #END handle exception + return SectionConstraint(writer, self._config_section_name()) diff --git a/git/repo/base.py b/git/repo/base.py index 20c96b22..3bbcdb59 100644 --- a/git/repo/base.py +++ b/git/repo/base.py @@ -12,36 +12,36 @@ from git.index import IndexFile from git.objects import * from git.config import GitConfigParser from git.remote import ( - Remote, - digest_process_messages, - finalize_process, - add_progress - ) + Remote, + digest_process_messages, + finalize_process, + add_progress + ) from git.db import ( - GitCmdObjectDB, - GitDB - ) + GitCmdObjectDB, + GitDB + ) from gitdb.util import ( - join, - isfile, - hex_to_bin - ) + join, + isfile, + hex_to_bin + ) from fun import ( - rev_parse, - is_git_dir, - touch - ) + rev_parse, + is_git_dir, + touch + ) import os import sys import re DefaultDBType = GitDB -if sys.version_info[1] < 5: # python 2.4 compatiblity - DefaultDBType = GitCmdObjectDB +if sys.version_info[1] < 5: # python 2.4 compatiblity + DefaultDBType = GitCmdObjectDB # END handle python 2.4 @@ -49,720 +49,720 @@ __all__ = ('Repo', ) class Repo(object): - """Represents a git repository and allows you to query references, - gather commit information, generate diffs, create and clone repositories query - the log. - - The following attributes are worth using: - - 'working_dir' is the working directory of the git command, wich is the working tree - directory if available or the .git directory in case of bare repositories - - 'working_tree_dir' is the working tree directory, but will raise AssertionError - if we are a bare repository. - - 'git_dir' is the .git repository directoy, which is always set.""" - DAEMON_EXPORT_FILE = 'git-daemon-export-ok' - __slots__ = ( "working_dir", "_working_tree_dir", "git_dir", "_bare", "git", "odb" ) - - # precompiled regex - re_whitespace = re.compile(r'\s+') - re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') - re_hexsha_shortened = re.compile('^[0-9A-Fa-f]{4,40}$') - re_author_committer_start = re.compile(r'^(author|committer)') - re_tab_full_line = re.compile(r'^\t(.*)$') - - # invariants - # represents the configuration level of a configuration file - config_level = ("system", "global", "repository") - - def __init__(self, path=None, odbt = DefaultDBType): - """Create a new Repo instance - - :param path: is the path to either the root git directory or the bare git repo:: - - repo = Repo("/Users/mtrier/Development/git-python") - repo = Repo("/Users/mtrier/Development/git-python.git") - repo = Repo("~/Development/git-python.git") - repo = Repo("$REPOSITORIES/Development/git-python.git") - - :param odbt: Object DataBase type - a type which is constructed by providing - the directory containing the database objects, i.e. .git/objects. It will - be used to access all object data - :raise InvalidGitRepositoryError: - :raise NoSuchPathError: - :return: git.Repo """ - epath = os.path.abspath(os.path.expandvars(os.path.expanduser(path or os.getcwd()))) - - if not os.path.exists(epath): - raise NoSuchPathError(epath) - - self.working_dir = None - self._working_tree_dir = None - self.git_dir = None - curpath = epath - - # walk up the path to find the .git dir - while curpath: - if is_git_dir(curpath): - self.git_dir = curpath - self._working_tree_dir = os.path.dirname(curpath) - break - gitpath = join(curpath, '.git') - if is_git_dir(gitpath): - self.git_dir = gitpath - self._working_tree_dir = curpath - break - curpath, dummy = os.path.split(curpath) - if not dummy: - break - # END while curpath - - if self.git_dir is None: - raise InvalidGitRepositoryError(epath) - - self._bare = False - try: - self._bare = self.config_reader("repository").getboolean('core','bare') - except Exception: - # lets not assume the option exists, although it should - pass - - # adjust the wd in case we are actually bare - we didn't know that - # in the first place - if self._bare: - self._working_tree_dir = None - # END working dir handling - - self.working_dir = self._working_tree_dir or self.git_dir - self.git = Git(self.working_dir) - - # special handling, in special times - args = [join(self.git_dir, 'objects')] - if issubclass(odbt, GitCmdObjectDB): - args.append(self.git) - self.odb = odbt(*args) - - def __eq__(self, rhs): - if isinstance(rhs, Repo): - return self.git_dir == rhs.git_dir - return False - - def __ne__(self, rhs): - return not self.__eq__(rhs) - - def __hash__(self): - return hash(self.git_dir) - - def __repr__(self): - return "%s(%r)" % (type(self).__name__, self.git_dir) - - # Description property - def _get_description(self): - filename = join(self.git_dir, 'description') - return file(filename).read().rstrip() - - def _set_description(self, descr): - filename = join(self.git_dir, 'description') - file(filename, 'w').write(descr+'\n') - - description = property(_get_description, _set_description, - doc="the project's description") - del _get_description - del _set_description - - - - @property - def working_tree_dir(self): - """:return: The working tree directory of our git repository - :raise AssertionError: If we are a bare repository""" - if self._working_tree_dir is None: - raise AssertionError( "Repository at %r is bare and does not have a working tree directory" % self.git_dir ) - return self._working_tree_dir - - @property - def bare(self): - """:return: True if the repository is bare""" - return self._bare - - @property - def heads(self): - """A list of ``Head`` objects representing the branch heads in - this repo - - :return: ``git.IterableList(Head, ...)``""" - return Head.list_items(self) - - @property - def references(self): - """A list of Reference objects representing tags, heads and remote references. - - :return: IterableList(Reference, ...)""" - return Reference.list_items(self) - - # alias for references - refs = references - - # alias for heads - branches = heads - - @property - def index(self): - """:return: IndexFile representing this repository's index.""" - return IndexFile(self) - - @property - def head(self): - """:return: HEAD Object pointing to the current head reference""" - return HEAD(self,'HEAD') - - @property - def remotes(self): - """A list of Remote objects allowing to access and manipulate remotes - :return: ``git.IterableList(Remote, ...)``""" - return Remote.list_items(self) - - def remote(self, name='origin'): - """:return: Remote with the specified name - :raise ValueError: if no remote with such a name exists""" - return Remote(self, name) - - #{ Submodules - - @property - def submodules(self): - """ - :return: git.IterableList(Submodule, ...) of direct submodules - available from the current head""" - return Submodule.list_items(self) - - def submodule(self, name): - """ :return: Submodule with the given name - :raise ValueError: If no such submodule exists""" - try: - return self.submodules[name] - except IndexError: - raise ValueError("Didn't find submodule named %r" % name) - # END exception handling - - def create_submodule(self, *args, **kwargs): - """Create a new submodule - - :note: See the documentation of Submodule.add for a description of the - applicable parameters - :return: created submodules""" - return Submodule.add(self, *args, **kwargs) - - def iter_submodules(self, *args, **kwargs): - """An iterator yielding Submodule instances, see Traversable interface - for a description of args and kwargs - :return: Iterator""" - return RootModule(self).traverse(*args, **kwargs) - - def submodule_update(self, *args, **kwargs): - """Update the submodules, keeping the repository consistent as it will - take the previous state into consideration. For more information, please - see the documentation of RootModule.update""" - return RootModule(self).update(*args, **kwargs) - - #}END submodules - - @property - def tags(self): - """A list of ``Tag`` objects that are available in this repo - :return: ``git.IterableList(TagReference, ...)`` """ - return TagReference.list_items(self) - - def tag(self,path): - """:return: TagReference Object, reference pointing to a Commit or Tag - :param path: path to the tag reference, i.e. 0.1.5 or tags/0.1.5 """ - return TagReference(self, path) - - def create_head(self, path, commit='HEAD', force=False, logmsg=None ): - """Create a new head within the repository. - For more documentation, please see the Head.create method. - - :return: newly created Head Reference""" - return Head.create(self, path, commit, force, logmsg) - - def delete_head(self, *heads, **kwargs): - """Delete the given heads - - :param kwargs: Additional keyword arguments to be passed to git-branch""" - return Head.delete(self, *heads, **kwargs) - - def create_tag(self, path, ref='HEAD', message=None, force=False, **kwargs): - """Create a new tag reference. - For more documentation, please see the TagReference.create method. - - :return: TagReference object """ - return TagReference.create(self, path, ref, message, force, **kwargs) - - def delete_tag(self, *tags): - """Delete the given tag references""" - return TagReference.delete(self, *tags) - - def create_remote(self, name, url, **kwargs): - """Create a new remote. - - For more information, please see the documentation of the Remote.create - methods - - :return: Remote reference""" - return Remote.create(self, name, url, **kwargs) - - def delete_remote(self, remote): - """Delete the given remote.""" - return Remote.remove(self, remote) - - def _get_config_path(self, config_level ): - # we do not support an absolute path of the gitconfig on windows , - # use the global config instead - if sys.platform == "win32" and config_level == "system": - config_level = "global" - - if config_level == "system": - return "/etc/gitconfig" - elif config_level == "global": - return os.path.normpath(os.path.expanduser("~/.gitconfig")) - elif config_level == "repository": - return join(self.git_dir, "config") - - raise ValueError( "Invalid configuration level: %r" % config_level ) - - def config_reader(self, config_level=None): - """ - :return: - GitConfigParser allowing to read the full git configuration, but not to write it - - The configuration will include values from the system, user and repository - configuration files. - - :param config_level: - For possible values, see config_writer method - If None, all applicable levels will be used. Specify a level in case - you know which exact file you whish to read to prevent reading multiple files for - instance - :note: On windows, system configuration cannot currently be read as the path is - unknown, instead the global path will be used.""" - files = None - if config_level is None: - files = [ self._get_config_path(f) for f in self.config_level ] - else: - files = [ self._get_config_path(config_level) ] - return GitConfigParser(files, read_only=True) - - def config_writer(self, config_level="repository"): - """ - :return: - GitConfigParser allowing to write values of the specified configuration file level. - Config writers should be retrieved, used to change the configuration ,and written - right away as they will lock the configuration file in question and prevent other's - to write it. - - :param config_level: - One of the following values - system = sytem wide configuration file - global = user level configuration file - repository = configuration file for this repostory only""" - return GitConfigParser(self._get_config_path(config_level), read_only = False) - - def commit(self, rev=None): - """The Commit object for the specified revision - :param rev: revision specifier, see git-rev-parse for viable options. - :return: ``git.Commit``""" - if rev is None: - return self.head.commit - else: - return self.rev_parse(str(rev)+"^0") - - def iter_trees(self, *args, **kwargs): - """:return: Iterator yielding Tree objects - :note: Takes all arguments known to iter_commits method""" - return ( c.tree for c in self.iter_commits(*args, **kwargs) ) - - def tree(self, rev=None): - """The Tree object for the given treeish revision - Examples:: - - repo.tree(repo.heads[0]) - - :param rev: is a revision pointing to a Treeish ( being a commit or tree ) - :return: ``git.Tree`` - - :note: - If you need a non-root level tree, find it by iterating the root tree. Otherwise - it cannot know about its path relative to the repository root and subsequent - operations might have unexpected results.""" - if rev is None: - return self.head.commit.tree - else: - return self.rev_parse(str(rev)+"^{tree}") - - def iter_commits(self, rev=None, paths='', **kwargs): - """A list of Commit objects representing the history of a given ref/commit - - :parm rev: - revision specifier, see git-rev-parse for viable options. - If None, the active branch will be used. - - :parm paths: - is an optional path or a list of paths to limit the returned commits to - Commits that do not contain that path or the paths will not be returned. - - :parm kwargs: - Arguments to be passed to git-rev-list - common ones are - max_count and skip - - :note: to receive only commits between two named revisions, use the - "revA..revB" revision specifier - - :return ``git.Commit[]``""" - if rev is None: - rev = self.head.commit - - return Commit.iter_items(self, rev, paths, **kwargs) - - def _get_daemon_export(self): - filename = join(self.git_dir, self.DAEMON_EXPORT_FILE) - return os.path.exists(filename) - - def _set_daemon_export(self, value): - filename = join(self.git_dir, self.DAEMON_EXPORT_FILE) - fileexists = os.path.exists(filename) - if value and not fileexists: - touch(filename) - elif not value and fileexists: - os.unlink(filename) - - daemon_export = property(_get_daemon_export, _set_daemon_export, - doc="If True, git-daemon may export this repository") - del _get_daemon_export - del _set_daemon_export - - def _get_alternates(self): - """The list of alternates for this repo from which objects can be retrieved - - :return: list of strings being pathnames of alternates""" - alternates_path = join(self.git_dir, 'objects', 'info', 'alternates') - - if os.path.exists(alternates_path): - try: - f = open(alternates_path) - alts = f.read() - finally: - f.close() - return alts.strip().splitlines() - else: - return list() - - def _set_alternates(self, alts): - """Sets the alternates - - :parm alts: - is the array of string paths representing the alternates at which - git should look for objects, i.e. /home/user/repo/.git/objects - - :raise NoSuchPathError: - :note: - The method does not check for the existance of the paths in alts - as the caller is responsible.""" - alternates_path = join(self.git_dir, 'objects', 'info', 'alternates') - if not alts: - if isfile(alternates_path): - os.remove(alternates_path) - else: - try: - f = open(alternates_path, 'w') - f.write("\n".join(alts)) - finally: - f.close() - # END file handling - # END alts handling - - alternates = property(_get_alternates, _set_alternates, doc="Retrieve a list of alternates paths or set a list paths to be used as alternates") - - def is_dirty(self, index=True, working_tree=True, untracked_files=False): - """ - :return: - ``True``, the repository is considered dirty. By default it will react - like a git-status without untracked files, hence it is dirty if the - index or the working copy have changes.""" - if self._bare: - # Bare repositories with no associated working directory are - # always consired to be clean. - return False - - # start from the one which is fastest to evaluate - default_args = ('--abbrev=40', '--full-index', '--raw') - if index: - # diff index against HEAD - if isfile(self.index.path) and self.head.is_valid() and \ - len(self.git.diff('HEAD', '--cached', *default_args)): - return True - # END index handling - if working_tree: - # diff index against working tree - if len(self.git.diff(*default_args)): - return True - # END working tree handling - if untracked_files: - if len(self.untracked_files): - return True - # END untracked files - return False - - @property - def untracked_files(self): - """ - :return: - list(str,...) - - Files currently untracked as they have not been staged yet. Paths - are relative to the current working directory of the git command. - - :note: - ignored files will not appear here, i.e. files mentioned in .gitignore""" - # make sure we get all files, no only untracked directores - proc = self.git.status(untracked_files=True, as_process=True) - stream = iter(proc.stdout) - untracked_files = list() - for line in stream: - if not line.startswith("# Untracked files:"): - continue - # skip two lines - stream.next() - stream.next() - - for untracked_info in stream: - if not untracked_info.startswith("#\t"): - break - untracked_files.append(untracked_info.replace("#\t", "").rstrip()) - # END for each utracked info line - # END for each line - return untracked_files - - @property - def active_branch(self): - """The name of the currently active branch. - - :return: Head to the active branch""" - return self.head.reference - - def blame(self, rev, file): - """The blame information for the given file at the given revision. - - :parm rev: revision specifier, see git-rev-parse for viable options. - :return: - list: [git.Commit, list: [<line>]] - A list of tuples associating a Commit object with a list of lines that - changed within the given commit. The Commit objects will be given in order - of appearance.""" - data = self.git.blame(rev, '--', file, p=True) - commits = dict() - blames = list() - info = None - - for line in data.splitlines(False): - parts = self.re_whitespace.split(line, 1) - firstpart = parts[0] - if self.re_hexsha_only.search(firstpart): - # handles - # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start - # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 - indicates another line of blame with the same data - digits = parts[-1].split(" ") - if len(digits) == 3: - info = {'id': firstpart} - blames.append([None, []]) - elif info['id'] != firstpart: - info = {'id': firstpart} - blames.append([commits.get(firstpart), []]) - # END blame data initialization - else: - m = self.re_author_committer_start.search(firstpart) - if m: - # handles: - # author Tom Preston-Werner - # author-mail <tom@mojombo.com> - # author-time 1192271832 - # author-tz -0700 - # committer Tom Preston-Werner - # committer-mail <tom@mojombo.com> - # committer-time 1192271832 - # committer-tz -0700 - IGNORED BY US - role = m.group(0) - if firstpart.endswith('-mail'): - info["%s_email" % role] = parts[-1] - elif firstpart.endswith('-time'): - info["%s_date" % role] = int(parts[-1]) - elif role == firstpart: - info[role] = parts[-1] - # END distinguish mail,time,name - else: - # handle - # filename lib/grit.rb - # summary add Blob - # <and rest> - if firstpart.startswith('filename'): - info['filename'] = parts[-1] - elif firstpart.startswith('summary'): - info['summary'] = parts[-1] - elif firstpart == '': - if info: - sha = info['id'] - c = commits.get(sha) - if c is None: - c = Commit( self, hex_to_bin(sha), - author=Actor._from_string(info['author'] + ' ' + info['author_email']), - authored_date=info['author_date'], - committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']), - committed_date=info['committer_date'], - message=info['summary']) - commits[sha] = c - # END if commit objects needs initial creation - m = self.re_tab_full_line.search(line) - text, = m.groups() - blames[-1][0] = c - blames[-1][1].append( text ) - info = {'id': sha} - # END if we collected commit info - # END distinguish filename,summary,rest - # END distinguish author|committer vs filename,summary,rest - # END distinguish hexsha vs other information - return blames - - @classmethod - def init(cls, path=None, mkdir=True, **kwargs): - """Initialize a git repository at the given path if specified - - :param path: - is the full path to the repo (traditionally ends with /<name>.git) - or None in which case the repository will be created in the current - working directory - - :parm mkdir: - if specified will create the repository directory if it doesn't - already exists. Creates the directory with a mode=0755. - Only effective if a path is explicitly given - - :parm kwargs: - keyword arguments serving as additional options to the git-init command - - :return: ``git.Repo`` (the newly created repo)""" - - if mkdir and path and not os.path.exists(path): - os.makedirs(path, 0755) - - # git command automatically chdir into the directory - git = Git(path) - output = git.init(**kwargs) - return Repo(path) - - @classmethod - def _clone(cls, git, url, path, odb_default_type, progress, **kwargs): - # special handling for windows for path at which the clone should be - # created. - # tilde '~' will be expanded to the HOME no matter where the ~ occours. Hence - # we at least give a proper error instead of letting git fail - prev_cwd = None - prev_path = None - odbt = kwargs.pop('odbt', odb_default_type) - if os.name == 'nt': - if '~' in path: - raise OSError("Git cannot handle the ~ character in path %r correctly" % path) - - # on windows, git will think paths like c: are relative and prepend the - # current working dir ( before it fails ). We temporarily adjust the working - # dir to make this actually work - match = re.match("(\w:[/\\\])(.*)", path) - if match: - prev_cwd = os.getcwd() - prev_path = path - drive, rest_of_path = match.groups() - os.chdir(drive) - path = rest_of_path - kwargs['with_keep_cwd'] = True - # END cwd preparation - # END windows handling - - try: - proc = git.clone(url, path, with_extended_output=True, as_process=True, v=True, **add_progress(kwargs, git, progress)) - if progress: - digest_process_messages(proc.stderr, progress) - #END handle progress - finalize_process(proc) - finally: - if prev_cwd is not None: - os.chdir(prev_cwd) - path = prev_path - # END reset previous working dir - # END bad windows handling - - # our git command could have a different working dir than our actual - # environment, hence we prepend its working dir if required - if not os.path.isabs(path) and git.working_dir: - path = join(git._working_dir, path) - - # adjust remotes - there may be operating systems which use backslashes, - # These might be given as initial paths, but when handling the config file - # that contains the remote from which we were clones, git stops liking it - # as it will escape the backslashes. Hence we undo the escaping just to be - # sure - repo = cls(os.path.abspath(path), odbt = odbt) - if repo.remotes: - repo.remotes[0].config_writer.set_value('url', repo.remotes[0].url.replace("\\\\", "\\").replace("\\", "/")) - # END handle remote repo - return repo - - def clone(self, path, progress=None, **kwargs): - """Create a clone from this repository. - :param path: - is the full path of the new repo (traditionally ends with ./<name>.git). - - :param progress: See 'git.remote.Remote.push'. - - :param kwargs: - odbt = ObjectDatabase Type, allowing to determine the object database - implementation used by the returned Repo instance - - All remaining keyword arguments are given to the git-clone command - - :return: ``git.Repo`` (the newly cloned repo)""" - return self._clone(self.git, self.git_dir, path, type(self.odb), progress, **kwargs) - - @classmethod - def clone_from(cls, url, to_path, progress=None, **kwargs): - """Create a clone from the given URL - :param url: valid git url, see http://www.kernel.org/pub/software/scm/git/docs/git-clone.html#URLS - :param to_path: Path to which the repository should be cloned to - :param progress: See 'git.remote.Remote.push'. - :param kwargs: see the ``clone`` method - :return: Repo instance pointing to the cloned directory""" - return cls._clone(Git(os.getcwd()), url, to_path, GitCmdObjectDB, progress, **kwargs) - - def archive(self, ostream, treeish=None, prefix=None, **kwargs): - """Archive the tree at the given revision. - :parm ostream: file compatible stream object to which the archive will be written - :parm treeish: is the treeish name/id, defaults to active branch - :parm prefix: is the optional prefix to prepend to each filename in the archive - :parm kwargs: - Additional arguments passed to git-archive - NOTE: Use the 'format' argument to define the kind of format. Use - specialized ostreams to write any format supported by python - - :raise GitCommandError: in case something went wrong - :return: self""" - if treeish is None: - treeish = self.head.commit - if prefix and 'prefix' not in kwargs: - kwargs['prefix'] = prefix - kwargs['output_stream'] = ostream - - self.git.archive(treeish, **kwargs) - return self - - rev_parse = rev_parse - - def __repr__(self): - return '<git.Repo "%s">' % self.git_dir + """Represents a git repository and allows you to query references, + gather commit information, generate diffs, create and clone repositories query + the log. + + The following attributes are worth using: + + 'working_dir' is the working directory of the git command, wich is the working tree + directory if available or the .git directory in case of bare repositories + + 'working_tree_dir' is the working tree directory, but will raise AssertionError + if we are a bare repository. + + 'git_dir' is the .git repository directoy, which is always set.""" + DAEMON_EXPORT_FILE = 'git-daemon-export-ok' + __slots__ = ( "working_dir", "_working_tree_dir", "git_dir", "_bare", "git", "odb" ) + + # precompiled regex + re_whitespace = re.compile(r'\s+') + re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') + re_hexsha_shortened = re.compile('^[0-9A-Fa-f]{4,40}$') + re_author_committer_start = re.compile(r'^(author|committer)') + re_tab_full_line = re.compile(r'^\t(.*)$') + + # invariants + # represents the configuration level of a configuration file + config_level = ("system", "global", "repository") + + def __init__(self, path=None, odbt = DefaultDBType): + """Create a new Repo instance + + :param path: is the path to either the root git directory or the bare git repo:: + + repo = Repo("/Users/mtrier/Development/git-python") + repo = Repo("/Users/mtrier/Development/git-python.git") + repo = Repo("~/Development/git-python.git") + repo = Repo("$REPOSITORIES/Development/git-python.git") + + :param odbt: Object DataBase type - a type which is constructed by providing + the directory containing the database objects, i.e. .git/objects. It will + be used to access all object data + :raise InvalidGitRepositoryError: + :raise NoSuchPathError: + :return: git.Repo """ + epath = os.path.abspath(os.path.expandvars(os.path.expanduser(path or os.getcwd()))) + + if not os.path.exists(epath): + raise NoSuchPathError(epath) + + self.working_dir = None + self._working_tree_dir = None + self.git_dir = None + curpath = epath + + # walk up the path to find the .git dir + while curpath: + if is_git_dir(curpath): + self.git_dir = curpath + self._working_tree_dir = os.path.dirname(curpath) + break + gitpath = join(curpath, '.git') + if is_git_dir(gitpath): + self.git_dir = gitpath + self._working_tree_dir = curpath + break + curpath, dummy = os.path.split(curpath) + if not dummy: + break + # END while curpath + + if self.git_dir is None: + raise InvalidGitRepositoryError(epath) + + self._bare = False + try: + self._bare = self.config_reader("repository").getboolean('core','bare') + except Exception: + # lets not assume the option exists, although it should + pass + + # adjust the wd in case we are actually bare - we didn't know that + # in the first place + if self._bare: + self._working_tree_dir = None + # END working dir handling + + self.working_dir = self._working_tree_dir or self.git_dir + self.git = Git(self.working_dir) + + # special handling, in special times + args = [join(self.git_dir, 'objects')] + if issubclass(odbt, GitCmdObjectDB): + args.append(self.git) + self.odb = odbt(*args) + + def __eq__(self, rhs): + if isinstance(rhs, Repo): + return self.git_dir == rhs.git_dir + return False + + def __ne__(self, rhs): + return not self.__eq__(rhs) + + def __hash__(self): + return hash(self.git_dir) + + def __repr__(self): + return "%s(%r)" % (type(self).__name__, self.git_dir) + + # Description property + def _get_description(self): + filename = join(self.git_dir, 'description') + return file(filename).read().rstrip() + + def _set_description(self, descr): + filename = join(self.git_dir, 'description') + file(filename, 'w').write(descr+'\n') + + description = property(_get_description, _set_description, + doc="the project's description") + del _get_description + del _set_description + + + + @property + def working_tree_dir(self): + """:return: The working tree directory of our git repository + :raise AssertionError: If we are a bare repository""" + if self._working_tree_dir is None: + raise AssertionError( "Repository at %r is bare and does not have a working tree directory" % self.git_dir ) + return self._working_tree_dir + + @property + def bare(self): + """:return: True if the repository is bare""" + return self._bare + + @property + def heads(self): + """A list of ``Head`` objects representing the branch heads in + this repo + + :return: ``git.IterableList(Head, ...)``""" + return Head.list_items(self) + + @property + def references(self): + """A list of Reference objects representing tags, heads and remote references. + + :return: IterableList(Reference, ...)""" + return Reference.list_items(self) + + # alias for references + refs = references + + # alias for heads + branches = heads + + @property + def index(self): + """:return: IndexFile representing this repository's index.""" + return IndexFile(self) + + @property + def head(self): + """:return: HEAD Object pointing to the current head reference""" + return HEAD(self,'HEAD') + + @property + def remotes(self): + """A list of Remote objects allowing to access and manipulate remotes + :return: ``git.IterableList(Remote, ...)``""" + return Remote.list_items(self) + + def remote(self, name='origin'): + """:return: Remote with the specified name + :raise ValueError: if no remote with such a name exists""" + return Remote(self, name) + + #{ Submodules + + @property + def submodules(self): + """ + :return: git.IterableList(Submodule, ...) of direct submodules + available from the current head""" + return Submodule.list_items(self) + + def submodule(self, name): + """ :return: Submodule with the given name + :raise ValueError: If no such submodule exists""" + try: + return self.submodules[name] + except IndexError: + raise ValueError("Didn't find submodule named %r" % name) + # END exception handling + + def create_submodule(self, *args, **kwargs): + """Create a new submodule + + :note: See the documentation of Submodule.add for a description of the + applicable parameters + :return: created submodules""" + return Submodule.add(self, *args, **kwargs) + + def iter_submodules(self, *args, **kwargs): + """An iterator yielding Submodule instances, see Traversable interface + for a description of args and kwargs + :return: Iterator""" + return RootModule(self).traverse(*args, **kwargs) + + def submodule_update(self, *args, **kwargs): + """Update the submodules, keeping the repository consistent as it will + take the previous state into consideration. For more information, please + see the documentation of RootModule.update""" + return RootModule(self).update(*args, **kwargs) + + #}END submodules + + @property + def tags(self): + """A list of ``Tag`` objects that are available in this repo + :return: ``git.IterableList(TagReference, ...)`` """ + return TagReference.list_items(self) + + def tag(self,path): + """:return: TagReference Object, reference pointing to a Commit or Tag + :param path: path to the tag reference, i.e. 0.1.5 or tags/0.1.5 """ + return TagReference(self, path) + + def create_head(self, path, commit='HEAD', force=False, logmsg=None ): + """Create a new head within the repository. + For more documentation, please see the Head.create method. + + :return: newly created Head Reference""" + return Head.create(self, path, commit, force, logmsg) + + def delete_head(self, *heads, **kwargs): + """Delete the given heads + + :param kwargs: Additional keyword arguments to be passed to git-branch""" + return Head.delete(self, *heads, **kwargs) + + def create_tag(self, path, ref='HEAD', message=None, force=False, **kwargs): + """Create a new tag reference. + For more documentation, please see the TagReference.create method. + + :return: TagReference object """ + return TagReference.create(self, path, ref, message, force, **kwargs) + + def delete_tag(self, *tags): + """Delete the given tag references""" + return TagReference.delete(self, *tags) + + def create_remote(self, name, url, **kwargs): + """Create a new remote. + + For more information, please see the documentation of the Remote.create + methods + + :return: Remote reference""" + return Remote.create(self, name, url, **kwargs) + + def delete_remote(self, remote): + """Delete the given remote.""" + return Remote.remove(self, remote) + + def _get_config_path(self, config_level ): + # we do not support an absolute path of the gitconfig on windows , + # use the global config instead + if sys.platform == "win32" and config_level == "system": + config_level = "global" + + if config_level == "system": + return "/etc/gitconfig" + elif config_level == "global": + return os.path.normpath(os.path.expanduser("~/.gitconfig")) + elif config_level == "repository": + return join(self.git_dir, "config") + + raise ValueError( "Invalid configuration level: %r" % config_level ) + + def config_reader(self, config_level=None): + """ + :return: + GitConfigParser allowing to read the full git configuration, but not to write it + + The configuration will include values from the system, user and repository + configuration files. + + :param config_level: + For possible values, see config_writer method + If None, all applicable levels will be used. Specify a level in case + you know which exact file you whish to read to prevent reading multiple files for + instance + :note: On windows, system configuration cannot currently be read as the path is + unknown, instead the global path will be used.""" + files = None + if config_level is None: + files = [ self._get_config_path(f) for f in self.config_level ] + else: + files = [ self._get_config_path(config_level) ] + return GitConfigParser(files, read_only=True) + + def config_writer(self, config_level="repository"): + """ + :return: + GitConfigParser allowing to write values of the specified configuration file level. + Config writers should be retrieved, used to change the configuration ,and written + right away as they will lock the configuration file in question and prevent other's + to write it. + + :param config_level: + One of the following values + system = sytem wide configuration file + global = user level configuration file + repository = configuration file for this repostory only""" + return GitConfigParser(self._get_config_path(config_level), read_only = False) + + def commit(self, rev=None): + """The Commit object for the specified revision + :param rev: revision specifier, see git-rev-parse for viable options. + :return: ``git.Commit``""" + if rev is None: + return self.head.commit + else: + return self.rev_parse(str(rev)+"^0") + + def iter_trees(self, *args, **kwargs): + """:return: Iterator yielding Tree objects + :note: Takes all arguments known to iter_commits method""" + return ( c.tree for c in self.iter_commits(*args, **kwargs) ) + + def tree(self, rev=None): + """The Tree object for the given treeish revision + Examples:: + + repo.tree(repo.heads[0]) + + :param rev: is a revision pointing to a Treeish ( being a commit or tree ) + :return: ``git.Tree`` + + :note: + If you need a non-root level tree, find it by iterating the root tree. Otherwise + it cannot know about its path relative to the repository root and subsequent + operations might have unexpected results.""" + if rev is None: + return self.head.commit.tree + else: + return self.rev_parse(str(rev)+"^{tree}") + + def iter_commits(self, rev=None, paths='', **kwargs): + """A list of Commit objects representing the history of a given ref/commit + + :parm rev: + revision specifier, see git-rev-parse for viable options. + If None, the active branch will be used. + + :parm paths: + is an optional path or a list of paths to limit the returned commits to + Commits that do not contain that path or the paths will not be returned. + + :parm kwargs: + Arguments to be passed to git-rev-list - common ones are + max_count and skip + + :note: to receive only commits between two named revisions, use the + "revA..revB" revision specifier + + :return ``git.Commit[]``""" + if rev is None: + rev = self.head.commit + + return Commit.iter_items(self, rev, paths, **kwargs) + + def _get_daemon_export(self): + filename = join(self.git_dir, self.DAEMON_EXPORT_FILE) + return os.path.exists(filename) + + def _set_daemon_export(self, value): + filename = join(self.git_dir, self.DAEMON_EXPORT_FILE) + fileexists = os.path.exists(filename) + if value and not fileexists: + touch(filename) + elif not value and fileexists: + os.unlink(filename) + + daemon_export = property(_get_daemon_export, _set_daemon_export, + doc="If True, git-daemon may export this repository") + del _get_daemon_export + del _set_daemon_export + + def _get_alternates(self): + """The list of alternates for this repo from which objects can be retrieved + + :return: list of strings being pathnames of alternates""" + alternates_path = join(self.git_dir, 'objects', 'info', 'alternates') + + if os.path.exists(alternates_path): + try: + f = open(alternates_path) + alts = f.read() + finally: + f.close() + return alts.strip().splitlines() + else: + return list() + + def _set_alternates(self, alts): + """Sets the alternates + + :parm alts: + is the array of string paths representing the alternates at which + git should look for objects, i.e. /home/user/repo/.git/objects + + :raise NoSuchPathError: + :note: + The method does not check for the existance of the paths in alts + as the caller is responsible.""" + alternates_path = join(self.git_dir, 'objects', 'info', 'alternates') + if not alts: + if isfile(alternates_path): + os.remove(alternates_path) + else: + try: + f = open(alternates_path, 'w') + f.write("\n".join(alts)) + finally: + f.close() + # END file handling + # END alts handling + + alternates = property(_get_alternates, _set_alternates, doc="Retrieve a list of alternates paths or set a list paths to be used as alternates") + + def is_dirty(self, index=True, working_tree=True, untracked_files=False): + """ + :return: + ``True``, the repository is considered dirty. By default it will react + like a git-status without untracked files, hence it is dirty if the + index or the working copy have changes.""" + if self._bare: + # Bare repositories with no associated working directory are + # always consired to be clean. + return False + + # start from the one which is fastest to evaluate + default_args = ('--abbrev=40', '--full-index', '--raw') + if index: + # diff index against HEAD + if isfile(self.index.path) and self.head.is_valid() and \ + len(self.git.diff('HEAD', '--cached', *default_args)): + return True + # END index handling + if working_tree: + # diff index against working tree + if len(self.git.diff(*default_args)): + return True + # END working tree handling + if untracked_files: + if len(self.untracked_files): + return True + # END untracked files + return False + + @property + def untracked_files(self): + """ + :return: + list(str,...) + + Files currently untracked as they have not been staged yet. Paths + are relative to the current working directory of the git command. + + :note: + ignored files will not appear here, i.e. files mentioned in .gitignore""" + # make sure we get all files, no only untracked directores + proc = self.git.status(untracked_files=True, as_process=True) + stream = iter(proc.stdout) + untracked_files = list() + for line in stream: + if not line.startswith("# Untracked files:"): + continue + # skip two lines + stream.next() + stream.next() + + for untracked_info in stream: + if not untracked_info.startswith("#\t"): + break + untracked_files.append(untracked_info.replace("#\t", "").rstrip()) + # END for each utracked info line + # END for each line + return untracked_files + + @property + def active_branch(self): + """The name of the currently active branch. + + :return: Head to the active branch""" + return self.head.reference + + def blame(self, rev, file): + """The blame information for the given file at the given revision. + + :parm rev: revision specifier, see git-rev-parse for viable options. + :return: + list: [git.Commit, list: [<line>]] + A list of tuples associating a Commit object with a list of lines that + changed within the given commit. The Commit objects will be given in order + of appearance.""" + data = self.git.blame(rev, '--', file, p=True) + commits = dict() + blames = list() + info = None + + for line in data.splitlines(False): + parts = self.re_whitespace.split(line, 1) + firstpart = parts[0] + if self.re_hexsha_only.search(firstpart): + # handles + # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start + # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 - indicates another line of blame with the same data + digits = parts[-1].split(" ") + if len(digits) == 3: + info = {'id': firstpart} + blames.append([None, []]) + elif info['id'] != firstpart: + info = {'id': firstpart} + blames.append([commits.get(firstpart), []]) + # END blame data initialization + else: + m = self.re_author_committer_start.search(firstpart) + if m: + # handles: + # author Tom Preston-Werner + # author-mail <tom@mojombo.com> + # author-time 1192271832 + # author-tz -0700 + # committer Tom Preston-Werner + # committer-mail <tom@mojombo.com> + # committer-time 1192271832 + # committer-tz -0700 - IGNORED BY US + role = m.group(0) + if firstpart.endswith('-mail'): + info["%s_email" % role] = parts[-1] + elif firstpart.endswith('-time'): + info["%s_date" % role] = int(parts[-1]) + elif role == firstpart: + info[role] = parts[-1] + # END distinguish mail,time,name + else: + # handle + # filename lib/grit.rb + # summary add Blob + # <and rest> + if firstpart.startswith('filename'): + info['filename'] = parts[-1] + elif firstpart.startswith('summary'): + info['summary'] = parts[-1] + elif firstpart == '': + if info: + sha = info['id'] + c = commits.get(sha) + if c is None: + c = Commit( self, hex_to_bin(sha), + author=Actor._from_string(info['author'] + ' ' + info['author_email']), + authored_date=info['author_date'], + committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']), + committed_date=info['committer_date'], + message=info['summary']) + commits[sha] = c + # END if commit objects needs initial creation + m = self.re_tab_full_line.search(line) + text, = m.groups() + blames[-1][0] = c + blames[-1][1].append( text ) + info = {'id': sha} + # END if we collected commit info + # END distinguish filename,summary,rest + # END distinguish author|committer vs filename,summary,rest + # END distinguish hexsha vs other information + return blames + + @classmethod + def init(cls, path=None, mkdir=True, **kwargs): + """Initialize a git repository at the given path if specified + + :param path: + is the full path to the repo (traditionally ends with /<name>.git) + or None in which case the repository will be created in the current + working directory + + :parm mkdir: + if specified will create the repository directory if it doesn't + already exists. Creates the directory with a mode=0755. + Only effective if a path is explicitly given + + :parm kwargs: + keyword arguments serving as additional options to the git-init command + + :return: ``git.Repo`` (the newly created repo)""" + + if mkdir and path and not os.path.exists(path): + os.makedirs(path, 0755) + + # git command automatically chdir into the directory + git = Git(path) + output = git.init(**kwargs) + return Repo(path) + + @classmethod + def _clone(cls, git, url, path, odb_default_type, progress, **kwargs): + # special handling for windows for path at which the clone should be + # created. + # tilde '~' will be expanded to the HOME no matter where the ~ occours. Hence + # we at least give a proper error instead of letting git fail + prev_cwd = None + prev_path = None + odbt = kwargs.pop('odbt', odb_default_type) + if os.name == 'nt': + if '~' in path: + raise OSError("Git cannot handle the ~ character in path %r correctly" % path) + + # on windows, git will think paths like c: are relative and prepend the + # current working dir ( before it fails ). We temporarily adjust the working + # dir to make this actually work + match = re.match("(\w:[/\\\])(.*)", path) + if match: + prev_cwd = os.getcwd() + prev_path = path + drive, rest_of_path = match.groups() + os.chdir(drive) + path = rest_of_path + kwargs['with_keep_cwd'] = True + # END cwd preparation + # END windows handling + + try: + proc = git.clone(url, path, with_extended_output=True, as_process=True, v=True, **add_progress(kwargs, git, progress)) + if progress: + digest_process_messages(proc.stderr, progress) + #END handle progress + finalize_process(proc) + finally: + if prev_cwd is not None: + os.chdir(prev_cwd) + path = prev_path + # END reset previous working dir + # END bad windows handling + + # our git command could have a different working dir than our actual + # environment, hence we prepend its working dir if required + if not os.path.isabs(path) and git.working_dir: + path = join(git._working_dir, path) + + # adjust remotes - there may be operating systems which use backslashes, + # These might be given as initial paths, but when handling the config file + # that contains the remote from which we were clones, git stops liking it + # as it will escape the backslashes. Hence we undo the escaping just to be + # sure + repo = cls(os.path.abspath(path), odbt = odbt) + if repo.remotes: + repo.remotes[0].config_writer.set_value('url', repo.remotes[0].url.replace("\\\\", "\\").replace("\\", "/")) + # END handle remote repo + return repo + + def clone(self, path, progress=None, **kwargs): + """Create a clone from this repository. + :param path: + is the full path of the new repo (traditionally ends with ./<name>.git). + + :param progress: See 'git.remote.Remote.push'. + + :param kwargs: + odbt = ObjectDatabase Type, allowing to determine the object database + implementation used by the returned Repo instance + + All remaining keyword arguments are given to the git-clone command + + :return: ``git.Repo`` (the newly cloned repo)""" + return self._clone(self.git, self.git_dir, path, type(self.odb), progress, **kwargs) + + @classmethod + def clone_from(cls, url, to_path, progress=None, **kwargs): + """Create a clone from the given URL + :param url: valid git url, see http://www.kernel.org/pub/software/scm/git/docs/git-clone.html#URLS + :param to_path: Path to which the repository should be cloned to + :param progress: See 'git.remote.Remote.push'. + :param kwargs: see the ``clone`` method + :return: Repo instance pointing to the cloned directory""" + return cls._clone(Git(os.getcwd()), url, to_path, GitCmdObjectDB, progress, **kwargs) + + def archive(self, ostream, treeish=None, prefix=None, **kwargs): + """Archive the tree at the given revision. + :parm ostream: file compatible stream object to which the archive will be written + :parm treeish: is the treeish name/id, defaults to active branch + :parm prefix: is the optional prefix to prepend to each filename in the archive + :parm kwargs: + Additional arguments passed to git-archive + NOTE: Use the 'format' argument to define the kind of format. Use + specialized ostreams to write any format supported by python + + :raise GitCommandError: in case something went wrong + :return: self""" + if treeish is None: + treeish = self.head.commit + if prefix and 'prefix' not in kwargs: + kwargs['prefix'] = prefix + kwargs['output_stream'] = ostream + + self.git.archive(treeish, **kwargs) + return self + + rev_parse = rev_parse + + def __repr__(self): + return '<git.Repo "%s">' % self.git_dir diff --git a/git/repo/fun.py b/git/repo/fun.py index 03d55716..7a8657ab 100644 --- a/git/repo/fun.py +++ b/git/repo/fun.py @@ -4,281 +4,281 @@ from gitdb.exc import BadObject from git.refs import SymbolicReference from git.objects import Object from gitdb.util import ( - join, - isdir, - isfile, - hex_to_bin, - bin_to_hex - ) + join, + isdir, + isfile, + hex_to_bin, + bin_to_hex + ) from string import digits __all__ = ('rev_parse', 'is_git_dir', 'touch') def touch(filename): - fp = open(filename, "a") - fp.close() + fp = open(filename, "a") + fp.close() def is_git_dir(d): - """ This is taken from the git setup.c:is_git_directory - function.""" - if isdir(d) and \ - isdir(join(d, 'objects')) and \ - isdir(join(d, 'refs')): - headref = join(d, 'HEAD') - return isfile(headref) or \ - (os.path.islink(headref) and - os.readlink(headref).startswith('refs')) - return False + """ This is taken from the git setup.c:is_git_directory + function.""" + if isdir(d) and \ + isdir(join(d, 'objects')) and \ + isdir(join(d, 'refs')): + headref = join(d, 'HEAD') + return isfile(headref) or \ + (os.path.islink(headref) and + os.readlink(headref).startswith('refs')) + return False def short_to_long(odb, hexsha): - """:return: long hexadecimal sha1 from the given less-than-40 byte hexsha - or None if no candidate could be found. - :param hexsha: hexsha with less than 40 byte""" - try: - return bin_to_hex(odb.partial_to_complete_sha_hex(hexsha)) - except BadObject: - return None - # END exception handling - - + """:return: long hexadecimal sha1 from the given less-than-40 byte hexsha + or None if no candidate could be found. + :param hexsha: hexsha with less than 40 byte""" + try: + return bin_to_hex(odb.partial_to_complete_sha_hex(hexsha)) + except BadObject: + return None + # END exception handling + + def name_to_object(repo, name, return_ref=False): - """ - :return: object specified by the given name, hexshas ( short and long ) - as well as references are supported - :param return_ref: if name specifies a reference, we will return the reference - instead of the object. Otherwise it will raise BadObject - """ - hexsha = None - - # is it a hexsha ? Try the most common ones, which is 7 to 40 - if repo.re_hexsha_shortened.match(name): - if len(name) != 40: - # find long sha for short sha - hexsha = short_to_long(repo.odb, name) - else: - hexsha = name - # END handle short shas - #END find sha if it matches - - # if we couldn't find an object for what seemed to be a short hexsha - # try to find it as reference anyway, it could be named 'aaa' for instance - if hexsha is None: - for base in ('%s', 'refs/%s', 'refs/tags/%s', 'refs/heads/%s', 'refs/remotes/%s', 'refs/remotes/%s/HEAD'): - try: - hexsha = SymbolicReference.dereference_recursive(repo, base % name) - if return_ref: - return SymbolicReference(repo, base % name) - #END handle symbolic ref - break - except ValueError: - pass - # END for each base - # END handle hexsha + """ + :return: object specified by the given name, hexshas ( short and long ) + as well as references are supported + :param return_ref: if name specifies a reference, we will return the reference + instead of the object. Otherwise it will raise BadObject + """ + hexsha = None + + # is it a hexsha ? Try the most common ones, which is 7 to 40 + if repo.re_hexsha_shortened.match(name): + if len(name) != 40: + # find long sha for short sha + hexsha = short_to_long(repo.odb, name) + else: + hexsha = name + # END handle short shas + #END find sha if it matches + + # if we couldn't find an object for what seemed to be a short hexsha + # try to find it as reference anyway, it could be named 'aaa' for instance + if hexsha is None: + for base in ('%s', 'refs/%s', 'refs/tags/%s', 'refs/heads/%s', 'refs/remotes/%s', 'refs/remotes/%s/HEAD'): + try: + hexsha = SymbolicReference.dereference_recursive(repo, base % name) + if return_ref: + return SymbolicReference(repo, base % name) + #END handle symbolic ref + break + except ValueError: + pass + # END for each base + # END handle hexsha - # didn't find any ref, this is an error - if return_ref: - raise BadObject("Couldn't find reference named %r" % name) - #END handle return ref + # didn't find any ref, this is an error + if return_ref: + raise BadObject("Couldn't find reference named %r" % name) + #END handle return ref - # tried everything ? fail - if hexsha is None: - raise BadObject(name) - # END assert hexsha was found - - return Object.new_from_sha(repo, hex_to_bin(hexsha)) + # tried everything ? fail + if hexsha is None: + raise BadObject(name) + # END assert hexsha was found + + return Object.new_from_sha(repo, hex_to_bin(hexsha)) def deref_tag(tag): - """Recursively dereerence a tag and return the resulting object""" - while True: - try: - tag = tag.object - except AttributeError: - break - # END dereference tag - return tag + """Recursively dereerence a tag and return the resulting object""" + while True: + try: + tag = tag.object + except AttributeError: + break + # END dereference tag + return tag def to_commit(obj): - """Convert the given object to a commit if possible and return it""" - if obj.type == 'tag': - obj = deref_tag(obj) - - if obj.type != "commit": - raise ValueError("Cannot convert object %r to type commit" % obj) - # END verify type - return obj + """Convert the given object to a commit if possible and return it""" + if obj.type == 'tag': + obj = deref_tag(obj) + + if obj.type != "commit": + raise ValueError("Cannot convert object %r to type commit" % obj) + # END verify type + return obj def rev_parse(repo, rev): - """ - :return: Object at the given revision, either Commit, Tag, Tree or Blob - :param rev: git-rev-parse compatible revision specification, please see - http://www.kernel.org/pub/software/scm/git/docs/git-rev-parse.html - for details - :note: Currently there is no access to the rev-log, rev-specs may only contain - topological tokens such ~ and ^. - :raise BadObject: if the given revision could not be found - :raise ValueError: If rev couldn't be parsed - :raise IndexError: If invalid reflog index is specified""" - - # colon search mode ? - if rev.startswith(':/'): - # colon search mode - raise NotImplementedError("commit by message search ( regex )") - # END handle search - - obj = None - ref = None - output_type = "commit" - start = 0 - parsed_to = 0 - lr = len(rev) - while start < lr: - if rev[start] not in "^~:@": - start += 1 - continue - # END handle start - - token = rev[start] - - if obj is None: - # token is a rev name - if start == 0: - ref = repo.head.ref - else: - if token == '@': - ref = name_to_object(repo, rev[:start], return_ref=True) - else: - obj = name_to_object(repo, rev[:start]) - #END handle token - #END handle refname - - if ref is not None: - obj = ref.commit - #END handle ref - # END initialize obj on first token - - - start += 1 - - # try to parse {type} - if start < lr and rev[start] == '{': - end = rev.find('}', start) - if end == -1: - raise ValueError("Missing closing brace to define type in %s" % rev) - output_type = rev[start+1:end] # exclude brace - - # handle type - if output_type == 'commit': - pass # default - elif output_type == 'tree': - try: - obj = to_commit(obj).tree - except (AttributeError, ValueError): - pass # error raised later - # END exception handling - elif output_type in ('', 'blob'): - if obj.type == 'tag': - obj = deref_tag(obj) - else: - # cannot do anything for non-tags - pass - # END handle tag - elif token == '@': - # try single int - assert ref is not None, "Requre Reference to access reflog" - revlog_index = None - try: - # transform reversed index into the format of our revlog - revlog_index = -(int(output_type)+1) - except ValueError: - # TODO: Try to parse the other date options, using parse_date - # maybe - raise NotImplementedError("Support for additional @{...} modes not implemented") - #END handle revlog index - - try: - entry = ref.log_entry(revlog_index) - except IndexError: - raise IndexError("Invalid revlog index: %i" % revlog_index) - #END handle index out of bound - - obj = Object.new_from_sha(repo, hex_to_bin(entry.newhexsha)) - - # make it pass the following checks - output_type = None - else: - raise ValueError("Invalid output type: %s ( in %s )" % (output_type, rev)) - # END handle output type - - # empty output types don't require any specific type, its just about dereferencing tags - if output_type and obj.type != output_type: - raise ValueError("Could not accomodate requested object type %r, got %s" % (output_type, obj.type)) - # END verify ouput type - - start = end+1 # skip brace - parsed_to = start - continue - # END parse type - - # try to parse a number - num = 0 - if token != ":": - found_digit = False - while start < lr: - if rev[start] in digits: - num = num * 10 + int(rev[start]) - start += 1 - found_digit = True - else: - break - # END handle number - # END number parse loop - - # no explicit number given, 1 is the default - # It could be 0 though - if not found_digit: - num = 1 - # END set default num - # END number parsing only if non-blob mode - - - parsed_to = start - # handle hiererarchy walk - try: - if token == "~": - obj = to_commit(obj) - for item in xrange(num): - obj = obj.parents[0] - # END for each history item to walk - elif token == "^": - obj = to_commit(obj) - # must be n'th parent - if num: - obj = obj.parents[num-1] - elif token == ":": - if obj.type != "tree": - obj = obj.tree - # END get tree type - obj = obj[rev[start:]] - parsed_to = lr - else: - raise ValueError("Invalid token: %r" % token) - # END end handle tag - except (IndexError, AttributeError): - raise BadObject("Invalid Revision in %s" % rev) - # END exception handling - # END parse loop - - # still no obj ? Its probably a simple name - if obj is None: - obj = name_to_object(repo, rev) - parsed_to = lr - # END handle simple name - - if obj is None: - raise ValueError("Revision specifier could not be parsed: %s" % rev) + """ + :return: Object at the given revision, either Commit, Tag, Tree or Blob + :param rev: git-rev-parse compatible revision specification, please see + http://www.kernel.org/pub/software/scm/git/docs/git-rev-parse.html + for details + :note: Currently there is no access to the rev-log, rev-specs may only contain + topological tokens such ~ and ^. + :raise BadObject: if the given revision could not be found + :raise ValueError: If rev couldn't be parsed + :raise IndexError: If invalid reflog index is specified""" + + # colon search mode ? + if rev.startswith(':/'): + # colon search mode + raise NotImplementedError("commit by message search ( regex )") + # END handle search + + obj = None + ref = None + output_type = "commit" + start = 0 + parsed_to = 0 + lr = len(rev) + while start < lr: + if rev[start] not in "^~:@": + start += 1 + continue + # END handle start + + token = rev[start] + + if obj is None: + # token is a rev name + if start == 0: + ref = repo.head.ref + else: + if token == '@': + ref = name_to_object(repo, rev[:start], return_ref=True) + else: + obj = name_to_object(repo, rev[:start]) + #END handle token + #END handle refname + + if ref is not None: + obj = ref.commit + #END handle ref + # END initialize obj on first token + + + start += 1 + + # try to parse {type} + if start < lr and rev[start] == '{': + end = rev.find('}', start) + if end == -1: + raise ValueError("Missing closing brace to define type in %s" % rev) + output_type = rev[start+1:end] # exclude brace + + # handle type + if output_type == 'commit': + pass # default + elif output_type == 'tree': + try: + obj = to_commit(obj).tree + except (AttributeError, ValueError): + pass # error raised later + # END exception handling + elif output_type in ('', 'blob'): + if obj.type == 'tag': + obj = deref_tag(obj) + else: + # cannot do anything for non-tags + pass + # END handle tag + elif token == '@': + # try single int + assert ref is not None, "Requre Reference to access reflog" + revlog_index = None + try: + # transform reversed index into the format of our revlog + revlog_index = -(int(output_type)+1) + except ValueError: + # TODO: Try to parse the other date options, using parse_date + # maybe + raise NotImplementedError("Support for additional @{...} modes not implemented") + #END handle revlog index + + try: + entry = ref.log_entry(revlog_index) + except IndexError: + raise IndexError("Invalid revlog index: %i" % revlog_index) + #END handle index out of bound + + obj = Object.new_from_sha(repo, hex_to_bin(entry.newhexsha)) + + # make it pass the following checks + output_type = None + else: + raise ValueError("Invalid output type: %s ( in %s )" % (output_type, rev)) + # END handle output type + + # empty output types don't require any specific type, its just about dereferencing tags + if output_type and obj.type != output_type: + raise ValueError("Could not accomodate requested object type %r, got %s" % (output_type, obj.type)) + # END verify ouput type + + start = end+1 # skip brace + parsed_to = start + continue + # END parse type + + # try to parse a number + num = 0 + if token != ":": + found_digit = False + while start < lr: + if rev[start] in digits: + num = num * 10 + int(rev[start]) + start += 1 + found_digit = True + else: + break + # END handle number + # END number parse loop + + # no explicit number given, 1 is the default + # It could be 0 though + if not found_digit: + num = 1 + # END set default num + # END number parsing only if non-blob mode + + + parsed_to = start + # handle hiererarchy walk + try: + if token == "~": + obj = to_commit(obj) + for item in xrange(num): + obj = obj.parents[0] + # END for each history item to walk + elif token == "^": + obj = to_commit(obj) + # must be n'th parent + if num: + obj = obj.parents[num-1] + elif token == ":": + if obj.type != "tree": + obj = obj.tree + # END get tree type + obj = obj[rev[start:]] + parsed_to = lr + else: + raise ValueError("Invalid token: %r" % token) + # END end handle tag + except (IndexError, AttributeError): + raise BadObject("Invalid Revision in %s" % rev) + # END exception handling + # END parse loop + + # still no obj ? Its probably a simple name + if obj is None: + obj = name_to_object(repo, rev) + parsed_to = lr + # END handle simple name + + if obj is None: + raise ValueError("Revision specifier could not be parsed: %s" % rev) - if parsed_to != lr: - raise ValueError("Didn't consume complete rev spec %s, consumed part: %s" % (rev, rev[:parsed_to])) - - return obj + if parsed_to != lr: + raise ValueError("Didn't consume complete rev spec %s, consumed part: %s" % (rev, rev[:parsed_to])) + + return obj diff --git a/git/test/lib/helper.py b/git/test/lib/helper.py index 3a60d116..adada6d4 100644 --- a/git/test/lib/helper.py +++ b/git/test/lib/helper.py @@ -15,233 +15,233 @@ import cStringIO GIT_REPO = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) __all__ = ( - 'fixture_path', 'fixture', 'absolute_project_path', 'StringProcessAdapter', - 'with_rw_repo', 'with_rw_and_rw_remote_repo', 'TestBase', 'TestCase', 'GIT_REPO' - ) + 'fixture_path', 'fixture', 'absolute_project_path', 'StringProcessAdapter', + 'with_rw_repo', 'with_rw_and_rw_remote_repo', 'TestBase', 'TestCase', 'GIT_REPO' + ) #{ Routines def fixture_path(name): - test_dir = os.path.dirname(os.path.dirname(__file__)) - return os.path.join(test_dir, "fixtures", name) + test_dir = os.path.dirname(os.path.dirname(__file__)) + return os.path.join(test_dir, "fixtures", name) def fixture(name): - return open(fixture_path(name), 'rb').read() + return open(fixture_path(name), 'rb').read() def absolute_project_path(): - return os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) + return os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) #} END routines - + #{ Adapters - + class StringProcessAdapter(object): - """Allows to use strings as Process object as returned by SubProcess.Popen. - Its tailored to work with the test system only""" - - def __init__(self, input_string): - self.stdout = cStringIO.StringIO(input_string) - self.stderr = cStringIO.StringIO() - - def wait(self): - return 0 - - poll = wait - + """Allows to use strings as Process object as returned by SubProcess.Popen. + Its tailored to work with the test system only""" + + def __init__(self, input_string): + self.stdout = cStringIO.StringIO(input_string) + self.stderr = cStringIO.StringIO() + + def wait(self): + return 0 + + poll = wait + #} END adapters #{ Decorators def _mktemp(*args): - """Wrapper around default tempfile.mktemp to fix an osx issue""" - tdir = tempfile.mktemp(*args) - if sys.platform == 'darwin': - tdir = '/private' + tdir - return tdir + """Wrapper around default tempfile.mktemp to fix an osx issue""" + tdir = tempfile.mktemp(*args) + if sys.platform == 'darwin': + tdir = '/private' + tdir + return tdir def _rmtree_onerror(osremove, fullpath, exec_info): - """ - Handle the case on windows that read-only files cannot be deleted by - os.remove by setting it to mode 777, then retry deletion. - """ - if os.name != 'nt' or osremove is not os.remove: - raise - - os.chmod(fullpath, 0777) - os.remove(fullpath) + """ + Handle the case on windows that read-only files cannot be deleted by + os.remove by setting it to mode 777, then retry deletion. + """ + if os.name != 'nt' or osremove is not os.remove: + raise + + os.chmod(fullpath, 0777) + os.remove(fullpath) def with_rw_repo(working_tree_ref, bare=False): - """ - Same as with_bare_repo, but clones the rorepo as non-bare repository, checking - out the working tree at the given working_tree_ref. - - This repository type is more costly due to the working copy checkout. - - To make working with relative paths easier, the cwd will be set to the working - dir of the repository. - """ - assert isinstance(working_tree_ref, basestring), "Decorator requires ref name for working tree checkout" - def argument_passer(func): - def repo_creator(self): - prefix = 'non_' - if bare: - prefix = '' - #END handle prefix - repo_dir = _mktemp("%sbare_%s" % (prefix, func.__name__)) - rw_repo = self.rorepo.clone(repo_dir, shared=True, bare=bare, n=True) - - rw_repo.head.commit = rw_repo.commit(working_tree_ref) - if not bare: - rw_repo.head.reference.checkout() - # END handle checkout - - prev_cwd = os.getcwd() - os.chdir(rw_repo.working_dir) - try: - try: - return func(self, rw_repo) - except: - print >> sys.stderr, "Keeping repo after failure: %s" % repo_dir - repo_dir = None - raise - finally: - os.chdir(prev_cwd) - rw_repo.git.clear_cache() - if repo_dir is not None: - shutil.rmtree(repo_dir, onerror=_rmtree_onerror) - # END rm test repo if possible - # END cleanup - # END rw repo creator - repo_creator.__name__ = func.__name__ - return repo_creator - # END argument passer - return argument_passer - + """ + Same as with_bare_repo, but clones the rorepo as non-bare repository, checking + out the working tree at the given working_tree_ref. + + This repository type is more costly due to the working copy checkout. + + To make working with relative paths easier, the cwd will be set to the working + dir of the repository. + """ + assert isinstance(working_tree_ref, basestring), "Decorator requires ref name for working tree checkout" + def argument_passer(func): + def repo_creator(self): + prefix = 'non_' + if bare: + prefix = '' + #END handle prefix + repo_dir = _mktemp("%sbare_%s" % (prefix, func.__name__)) + rw_repo = self.rorepo.clone(repo_dir, shared=True, bare=bare, n=True) + + rw_repo.head.commit = rw_repo.commit(working_tree_ref) + if not bare: + rw_repo.head.reference.checkout() + # END handle checkout + + prev_cwd = os.getcwd() + os.chdir(rw_repo.working_dir) + try: + try: + return func(self, rw_repo) + except: + print >> sys.stderr, "Keeping repo after failure: %s" % repo_dir + repo_dir = None + raise + finally: + os.chdir(prev_cwd) + rw_repo.git.clear_cache() + if repo_dir is not None: + shutil.rmtree(repo_dir, onerror=_rmtree_onerror) + # END rm test repo if possible + # END cleanup + # END rw repo creator + repo_creator.__name__ = func.__name__ + return repo_creator + # END argument passer + return argument_passer + def with_rw_and_rw_remote_repo(working_tree_ref): - """ - Same as with_rw_repo, but also provides a writable remote repository from which the - rw_repo has been forked as well as a handle for a git-daemon that may be started to - run the remote_repo. - The remote repository was cloned as bare repository from the rorepo, wheras - the rw repo has a working tree and was cloned from the remote repository. - - remote_repo has two remotes: origin and daemon_origin. One uses a local url, - the other uses a server url. The daemon setup must be done on system level - and should be an inetd service that serves tempdir.gettempdir() and all - directories in it. - - The following scetch demonstrates this:: - rorepo ---<bare clone>---> rw_remote_repo ---<clone>---> rw_repo - - The test case needs to support the following signature:: - def case(self, rw_repo, rw_remote_repo) - - This setup allows you to test push and pull scenarios and hooks nicely. - - See working dir info in with_rw_repo - """ - assert isinstance(working_tree_ref, basestring), "Decorator requires ref name for working tree checkout" - def argument_passer(func): - def remote_repo_creator(self): - remote_repo_dir = _mktemp("remote_repo_%s" % func.__name__) - repo_dir = _mktemp("remote_clone_non_bare_repo") - - rw_remote_repo = self.rorepo.clone(remote_repo_dir, shared=True, bare=True) - rw_repo = rw_remote_repo.clone(repo_dir, shared=True, bare=False, n=True) # recursive alternates info ? - rw_repo.head.commit = working_tree_ref - rw_repo.head.reference.checkout() - - # prepare for git-daemon - rw_remote_repo.daemon_export = True - - # this thing is just annoying ! - crw = rw_remote_repo.config_writer() - section = "daemon" - try: - crw.add_section(section) - except Exception: - pass - crw.set(section, "receivepack", True) - # release lock - del(crw) - - # initialize the remote - first do it as local remote and pull, then - # we change the url to point to the daemon. The daemon should be started - # by the user, not by us - d_remote = Remote.create(rw_repo, "daemon_origin", remote_repo_dir) - d_remote.fetch() - remote_repo_url = "git://localhost%s" % remote_repo_dir - - d_remote.config_writer.set('url', remote_repo_url) - - # try to list remotes to diagnoes whether the server is up - try: - rw_repo.git.ls_remote(d_remote) - except GitCommandError,e: - print str(e) - if os.name == 'nt': - raise AssertionError('git-daemon needs to run this test, but windows does not have one. Otherwise, run: git-daemon "%s"' % os.path.dirname(_mktemp())) - else: - raise AssertionError('Please start a git-daemon to run this test, execute: git-daemon "%s"' % os.path.dirname(_mktemp())) - # END make assertion - #END catch ls remote error - - # adjust working dir - prev_cwd = os.getcwd() - os.chdir(rw_repo.working_dir) - try: - return func(self, rw_repo, rw_remote_repo) - finally: - os.chdir(prev_cwd) - rw_repo.git.clear_cache() - rw_remote_repo.git.clear_cache() - shutil.rmtree(repo_dir, onerror=_rmtree_onerror) - shutil.rmtree(remote_repo_dir, onerror=_rmtree_onerror) - # END cleanup - # END bare repo creator - remote_repo_creator.__name__ = func.__name__ - return remote_repo_creator - # END remote repo creator - # END argument parsser - - return argument_passer - + """ + Same as with_rw_repo, but also provides a writable remote repository from which the + rw_repo has been forked as well as a handle for a git-daemon that may be started to + run the remote_repo. + The remote repository was cloned as bare repository from the rorepo, wheras + the rw repo has a working tree and was cloned from the remote repository. + + remote_repo has two remotes: origin and daemon_origin. One uses a local url, + the other uses a server url. The daemon setup must be done on system level + and should be an inetd service that serves tempdir.gettempdir() and all + directories in it. + + The following scetch demonstrates this:: + rorepo ---<bare clone>---> rw_remote_repo ---<clone>---> rw_repo + + The test case needs to support the following signature:: + def case(self, rw_repo, rw_remote_repo) + + This setup allows you to test push and pull scenarios and hooks nicely. + + See working dir info in with_rw_repo + """ + assert isinstance(working_tree_ref, basestring), "Decorator requires ref name for working tree checkout" + def argument_passer(func): + def remote_repo_creator(self): + remote_repo_dir = _mktemp("remote_repo_%s" % func.__name__) + repo_dir = _mktemp("remote_clone_non_bare_repo") + + rw_remote_repo = self.rorepo.clone(remote_repo_dir, shared=True, bare=True) + rw_repo = rw_remote_repo.clone(repo_dir, shared=True, bare=False, n=True) # recursive alternates info ? + rw_repo.head.commit = working_tree_ref + rw_repo.head.reference.checkout() + + # prepare for git-daemon + rw_remote_repo.daemon_export = True + + # this thing is just annoying ! + crw = rw_remote_repo.config_writer() + section = "daemon" + try: + crw.add_section(section) + except Exception: + pass + crw.set(section, "receivepack", True) + # release lock + del(crw) + + # initialize the remote - first do it as local remote and pull, then + # we change the url to point to the daemon. The daemon should be started + # by the user, not by us + d_remote = Remote.create(rw_repo, "daemon_origin", remote_repo_dir) + d_remote.fetch() + remote_repo_url = "git://localhost%s" % remote_repo_dir + + d_remote.config_writer.set('url', remote_repo_url) + + # try to list remotes to diagnoes whether the server is up + try: + rw_repo.git.ls_remote(d_remote) + except GitCommandError,e: + print str(e) + if os.name == 'nt': + raise AssertionError('git-daemon needs to run this test, but windows does not have one. Otherwise, run: git-daemon "%s"' % os.path.dirname(_mktemp())) + else: + raise AssertionError('Please start a git-daemon to run this test, execute: git-daemon "%s"' % os.path.dirname(_mktemp())) + # END make assertion + #END catch ls remote error + + # adjust working dir + prev_cwd = os.getcwd() + os.chdir(rw_repo.working_dir) + try: + return func(self, rw_repo, rw_remote_repo) + finally: + os.chdir(prev_cwd) + rw_repo.git.clear_cache() + rw_remote_repo.git.clear_cache() + shutil.rmtree(repo_dir, onerror=_rmtree_onerror) + shutil.rmtree(remote_repo_dir, onerror=_rmtree_onerror) + # END cleanup + # END bare repo creator + remote_repo_creator.__name__ = func.__name__ + return remote_repo_creator + # END remote repo creator + # END argument parsser + + return argument_passer + #} END decorators - + class TestBase(TestCase): - """ - Base Class providing default functionality to all tests such as: - - - Utility functions provided by the TestCase base of the unittest method such as:: - self.fail("todo") - self.failUnlessRaises(...) - - - Class level repository which is considered read-only as it is shared among - all test cases in your type. - Access it using:: - self.rorepo # 'ro' stands for read-only - - The rorepo is in fact your current project's git repo. If you refer to specific - shas for your objects, be sure you choose some that are part of the immutable portion - of the project history ( to assure tests don't fail for others ). - """ - - @classmethod - def setUpAll(cls): - """ - Dynamically add a read-only repository to our actual type. This way - each test type has its own repository - """ - cls.rorepo = Repo(GIT_REPO) - - def _make_file(self, rela_path, data, repo=None): - """ - Create a file at the given path relative to our repository, filled - with the given data. Returns absolute path to created file. - """ - repo = repo or self.rorepo - abs_path = os.path.join(repo.working_tree_dir, rela_path) - fp = open(abs_path, "w") - fp.write(data) - fp.close() - return abs_path + """ + Base Class providing default functionality to all tests such as: + + - Utility functions provided by the TestCase base of the unittest method such as:: + self.fail("todo") + self.failUnlessRaises(...) + + - Class level repository which is considered read-only as it is shared among + all test cases in your type. + Access it using:: + self.rorepo # 'ro' stands for read-only + + The rorepo is in fact your current project's git repo. If you refer to specific + shas for your objects, be sure you choose some that are part of the immutable portion + of the project history ( to assure tests don't fail for others ). + """ + + @classmethod + def setUpAll(cls): + """ + Dynamically add a read-only repository to our actual type. This way + each test type has its own repository + """ + cls.rorepo = Repo(GIT_REPO) + + def _make_file(self, rela_path, data, repo=None): + """ + Create a file at the given path relative to our repository, filled + with the given data. Returns absolute path to created file. + """ + repo = repo or self.rorepo + abs_path = os.path.join(repo.working_tree_dir, rela_path) + fp = open(abs_path, "w") + fp.write(data) + fp.close() + return abs_path diff --git a/git/test/performance/lib.py b/git/test/performance/lib.py index d0727b60..778a6c7d 100644 --- a/git/test/performance/lib.py +++ b/git/test/performance/lib.py @@ -5,13 +5,13 @@ import shutil import tempfile from git.db import ( - GitCmdObjectDB, - GitDB - ) + GitCmdObjectDB, + GitDB + ) from git import ( - Repo - ) + Repo + ) #{ Invvariants k_env_git_repo = "GIT_PYTHON_TEST_GIT_REPO_BASE" @@ -20,12 +20,12 @@ k_env_git_repo = "GIT_PYTHON_TEST_GIT_REPO_BASE" #{ Utilities def resolve_or_fail(env_var): - """:return: resolved environment variable or raise EnvironmentError""" - try: - return os.environ[env_var] - except KeyError: - raise EnvironmentError("Please set the %r envrionment variable and retry" % env_var) - # END exception handling + """:return: resolved environment variable or raise EnvironmentError""" + try: + return os.environ[env_var] + except KeyError: + raise EnvironmentError("Please set the %r envrionment variable and retry" % env_var) + # END exception handling #} END utilities @@ -33,46 +33,46 @@ def resolve_or_fail(env_var): #{ Base Classes class TestBigRepoR(TestBase): - """TestCase providing access to readonly 'big' repositories using the following - member variables: - - * gitrorepo - - * Read-Only git repository - actually the repo of git itself - + """TestCase providing access to readonly 'big' repositories using the following + member variables: + + * gitrorepo + + * Read-Only git repository - actually the repo of git itself + * puregitrorepo * As gitrepo, but uses pure python implementation """ - - #{ Invariants - head_sha_2k = '235d521da60e4699e5bd59ac658b5b48bd76ddca' - head_sha_50 = '32347c375250fd470973a5d76185cac718955fd5' - #} END invariants - - @classmethod - def setUpAll(cls): - super(TestBigRepoR, cls).setUpAll() - repo_path = resolve_or_fail(k_env_git_repo) - cls.gitrorepo = Repo(repo_path, odbt=GitCmdObjectDB) - cls.puregitrorepo = Repo(repo_path, odbt=GitDB) + + #{ Invariants + head_sha_2k = '235d521da60e4699e5bd59ac658b5b48bd76ddca' + head_sha_50 = '32347c375250fd470973a5d76185cac718955fd5' + #} END invariants + + @classmethod + def setUpAll(cls): + super(TestBigRepoR, cls).setUpAll() + repo_path = resolve_or_fail(k_env_git_repo) + cls.gitrorepo = Repo(repo_path, odbt=GitCmdObjectDB) + cls.puregitrorepo = Repo(repo_path, odbt=GitDB) class TestBigRepoRW(TestBigRepoR): - """As above, but provides a big repository that we can write to. - - Provides ``self.gitrwrepo`` and ``self.puregitrwrepo``""" - - @classmethod - def setUpAll(cls): - super(TestBigRepoRW, cls).setUpAll() - dirname = tempfile.mktemp() - os.mkdir(dirname) - cls.gitrwrepo = cls.gitrorepo.clone(dirname, shared=True, bare=True, odbt=GitCmdObjectDB) - cls.puregitrwrepo = Repo(dirname, odbt=GitDB) - - @classmethod - def tearDownAll(cls): - shutil.rmtree(cls.gitrwrepo.working_dir) - + """As above, but provides a big repository that we can write to. + + Provides ``self.gitrwrepo`` and ``self.puregitrwrepo``""" + + @classmethod + def setUpAll(cls): + super(TestBigRepoRW, cls).setUpAll() + dirname = tempfile.mktemp() + os.mkdir(dirname) + cls.gitrwrepo = cls.gitrorepo.clone(dirname, shared=True, bare=True, odbt=GitCmdObjectDB) + cls.puregitrwrepo = Repo(dirname, odbt=GitDB) + + @classmethod + def tearDownAll(cls): + shutil.rmtree(cls.gitrwrepo.working_dir) + #} END base classes diff --git a/git/test/performance/test_commit.py b/git/test/performance/test_commit.py index 80421aa2..1bdfcfa2 100644 --- a/git/test/performance/test_commit.py +++ b/git/test/performance/test_commit.py @@ -14,86 +14,86 @@ import sys class TestPerformance(TestBigRepoRW): - # ref with about 100 commits in its history - ref_100 = '0.1.6' + # ref with about 100 commits in its history + ref_100 = '0.1.6' - def _query_commit_info(self, c): - c.author - c.authored_date - c.author_tz_offset - c.committer - c.committed_date - c.committer_tz_offset - c.message - c.parents - - def test_iteration(self): - no = 0 - nc = 0 - - # find the first commit containing the given path - always do a full - # iteration ( restricted to the path in question ), but in fact it should - # return quite a lot of commits, we just take one and hence abort the operation - - st = time() - for c in self.rorepo.iter_commits(self.ref_100): - nc += 1 - self._query_commit_info(c) - for obj in c.tree.traverse(): - obj.size - no += 1 - # END for each object - # END for each commit - elapsed_time = time() - st - print >> sys.stderr, "Traversed %i Trees and a total of %i unchached objects in %s [s] ( %f objs/s )" % (nc, no, elapsed_time, no/elapsed_time) - - def test_commit_traversal(self): - # bound to cat-file parsing performance - nc = 0 - st = time() - for c in self.gitrorepo.commit(self.head_sha_2k).traverse(branch_first=False): - nc += 1 - self._query_commit_info(c) - # END for each traversed commit - elapsed_time = time() - st - print >> sys.stderr, "Traversed %i Commits in %s [s] ( %f commits/s )" % (nc, elapsed_time, nc/elapsed_time) - - def test_commit_iteration(self): - # bound to stream parsing performance - nc = 0 - st = time() - for c in Commit.iter_items(self.gitrorepo, self.head_sha_2k): - nc += 1 - self._query_commit_info(c) - # END for each traversed commit - elapsed_time = time() - st - print >> sys.stderr, "Iterated %i Commits in %s [s] ( %f commits/s )" % (nc, elapsed_time, nc/elapsed_time) - - def test_commit_serialization(self): - assert_commit_serialization(self.gitrwrepo, self.head_sha_2k, True) - - rwrepo = self.gitrwrepo - make_object = rwrepo.odb.store - # direct serialization - deserialization can be tested afterwards - # serialization is probably limited on IO - hc = rwrepo.commit(self.head_sha_2k) - - commits = list() - nc = 5000 - st = time() - for i in xrange(nc): - cm = Commit( rwrepo, Commit.NULL_BIN_SHA, hc.tree, - hc.author, hc.authored_date, hc.author_tz_offset, - hc.committer, hc.committed_date, hc.committer_tz_offset, - str(i), parents=hc.parents, encoding=hc.encoding) - - stream = StringIO() - cm._serialize(stream) - slen = stream.tell() - stream.seek(0) - - cm.binsha = make_object(IStream(Commit.type, slen, stream)).binsha - # END commit creation - elapsed = time() - st - - print >> sys.stderr, "Serialized %i commits to loose objects in %f s ( %f commits / s )" % (nc, elapsed, nc / elapsed) + def _query_commit_info(self, c): + c.author + c.authored_date + c.author_tz_offset + c.committer + c.committed_date + c.committer_tz_offset + c.message + c.parents + + def test_iteration(self): + no = 0 + nc = 0 + + # find the first commit containing the given path - always do a full + # iteration ( restricted to the path in question ), but in fact it should + # return quite a lot of commits, we just take one and hence abort the operation + + st = time() + for c in self.rorepo.iter_commits(self.ref_100): + nc += 1 + self._query_commit_info(c) + for obj in c.tree.traverse(): + obj.size + no += 1 + # END for each object + # END for each commit + elapsed_time = time() - st + print >> sys.stderr, "Traversed %i Trees and a total of %i unchached objects in %s [s] ( %f objs/s )" % (nc, no, elapsed_time, no/elapsed_time) + + def test_commit_traversal(self): + # bound to cat-file parsing performance + nc = 0 + st = time() + for c in self.gitrorepo.commit(self.head_sha_2k).traverse(branch_first=False): + nc += 1 + self._query_commit_info(c) + # END for each traversed commit + elapsed_time = time() - st + print >> sys.stderr, "Traversed %i Commits in %s [s] ( %f commits/s )" % (nc, elapsed_time, nc/elapsed_time) + + def test_commit_iteration(self): + # bound to stream parsing performance + nc = 0 + st = time() + for c in Commit.iter_items(self.gitrorepo, self.head_sha_2k): + nc += 1 + self._query_commit_info(c) + # END for each traversed commit + elapsed_time = time() - st + print >> sys.stderr, "Iterated %i Commits in %s [s] ( %f commits/s )" % (nc, elapsed_time, nc/elapsed_time) + + def test_commit_serialization(self): + assert_commit_serialization(self.gitrwrepo, self.head_sha_2k, True) + + rwrepo = self.gitrwrepo + make_object = rwrepo.odb.store + # direct serialization - deserialization can be tested afterwards + # serialization is probably limited on IO + hc = rwrepo.commit(self.head_sha_2k) + + commits = list() + nc = 5000 + st = time() + for i in xrange(nc): + cm = Commit( rwrepo, Commit.NULL_BIN_SHA, hc.tree, + hc.author, hc.authored_date, hc.author_tz_offset, + hc.committer, hc.committed_date, hc.committer_tz_offset, + str(i), parents=hc.parents, encoding=hc.encoding) + + stream = StringIO() + cm._serialize(stream) + slen = stream.tell() + stream.seek(0) + + cm.binsha = make_object(IStream(Commit.type, slen, stream)).binsha + # END commit creation + elapsed = time() - st + + print >> sys.stderr, "Serialized %i commits to loose objects in %f s ( %f commits / s )" % (nc, elapsed, nc / elapsed) diff --git a/git/test/performance/test_odb.py b/git/test/performance/test_odb.py index 32b70f69..ccc13eb4 100644 --- a/git/test/performance/test_odb.py +++ b/git/test/performance/test_odb.py @@ -5,66 +5,66 @@ import sys import stat from lib import ( - TestBigRepoR - ) + TestBigRepoR + ) class TestObjDBPerformance(TestBigRepoR): - - def test_random_access(self): - results = [ ["Iterate Commits"], ["Iterate Blobs"], ["Retrieve Blob Data"] ] - for repo in (self.gitrorepo, self.puregitrorepo): - # GET COMMITS - st = time() - root_commit = repo.commit(self.head_sha_2k) - commits = list(root_commit.traverse()) - nc = len(commits) - elapsed = time() - st - - print >> sys.stderr, "%s: Retrieved %i commits from ObjectStore in %g s ( %f commits / s )" % (type(repo.odb), nc, elapsed, nc / elapsed) - results[0].append(elapsed) - - # GET TREES - # walk all trees of all commits - st = time() - blobs_per_commit = list() - nt = 0 - for commit in commits: - tree = commit.tree - blobs = list() - for item in tree.traverse(): - nt += 1 - if item.type == 'blob': - blobs.append(item) - # direct access for speed - # END while trees are there for walking - blobs_per_commit.append(blobs) - # END for each commit - elapsed = time() - st - - print >> sys.stderr, "%s: Retrieved %i objects from %i commits in %g s ( %f objects / s )" % (type(repo.odb), nt, len(commits), elapsed, nt / elapsed) - results[1].append(elapsed) - - # GET BLOBS - st = time() - nb = 0 - too_many = 15000 - data_bytes = 0 - for blob_list in blobs_per_commit: - for blob in blob_list: - data_bytes += len(blob.data_stream.read()) - # END for each blobsha - nb += len(blob_list) - if nb > too_many: - break - # END for each bloblist - elapsed = time() - st - - print >> sys.stderr, "%s: Retrieved %i blob (%i KiB) and their data in %g s ( %f blobs / s, %f KiB / s )" % (type(repo.odb), nb, data_bytes/1000, elapsed, nb / elapsed, (data_bytes / 1000) / elapsed) - results[2].append(elapsed) - # END for each repo type - - # final results - for test_name, a, b in results: - print >> sys.stderr, "%s: %f s vs %f s, pure is %f times slower" % (test_name, a, b, b / a) - # END for each result + + def test_random_access(self): + results = [ ["Iterate Commits"], ["Iterate Blobs"], ["Retrieve Blob Data"] ] + for repo in (self.gitrorepo, self.puregitrorepo): + # GET COMMITS + st = time() + root_commit = repo.commit(self.head_sha_2k) + commits = list(root_commit.traverse()) + nc = len(commits) + elapsed = time() - st + + print >> sys.stderr, "%s: Retrieved %i commits from ObjectStore in %g s ( %f commits / s )" % (type(repo.odb), nc, elapsed, nc / elapsed) + results[0].append(elapsed) + + # GET TREES + # walk all trees of all commits + st = time() + blobs_per_commit = list() + nt = 0 + for commit in commits: + tree = commit.tree + blobs = list() + for item in tree.traverse(): + nt += 1 + if item.type == 'blob': + blobs.append(item) + # direct access for speed + # END while trees are there for walking + blobs_per_commit.append(blobs) + # END for each commit + elapsed = time() - st + + print >> sys.stderr, "%s: Retrieved %i objects from %i commits in %g s ( %f objects / s )" % (type(repo.odb), nt, len(commits), elapsed, nt / elapsed) + results[1].append(elapsed) + + # GET BLOBS + st = time() + nb = 0 + too_many = 15000 + data_bytes = 0 + for blob_list in blobs_per_commit: + for blob in blob_list: + data_bytes += len(blob.data_stream.read()) + # END for each blobsha + nb += len(blob_list) + if nb > too_many: + break + # END for each bloblist + elapsed = time() - st + + print >> sys.stderr, "%s: Retrieved %i blob (%i KiB) and their data in %g s ( %f blobs / s, %f KiB / s )" % (type(repo.odb), nb, data_bytes/1000, elapsed, nb / elapsed, (data_bytes / 1000) / elapsed) + results[2].append(elapsed) + # END for each repo type + + # final results + for test_name, a, b in results: + print >> sys.stderr, "%s: %f s vs %f s, pure is %f times slower" % (test_name, a, b, b / a) + # END for each result diff --git a/git/test/performance/test_streams.py b/git/test/performance/test_streams.py index 7f17d722..93e88841 100644 --- a/git/test/performance/test_streams.py +++ b/git/test/performance/test_streams.py @@ -13,119 +13,119 @@ import subprocess from gitdb.test.lib import make_memory_file from lib import ( - TestBigRepoR - ) + TestBigRepoR + ) class TestObjDBPerformance(TestBigRepoR): - - large_data_size_bytes = 1000*1000*10 # some MiB should do it - moderate_data_size_bytes = 1000*1000*1 # just 1 MiB - - @with_rw_repo('HEAD', bare=True) - def test_large_data_streaming(self, rwrepo): - # TODO: This part overlaps with the same file in gitdb.test.performance.test_stream - # It should be shared if possible - ldb = LooseObjectDB(os.path.join(rwrepo.git_dir, 'objects')) - - for randomize in range(2): - desc = (randomize and 'random ') or '' - print >> sys.stderr, "Creating %s data ..." % desc - st = time() - size, stream = make_memory_file(self.large_data_size_bytes, randomize) - elapsed = time() - st - print >> sys.stderr, "Done (in %f s)" % elapsed - - # writing - due to the compression it will seem faster than it is - st = time() - binsha = ldb.store(IStream('blob', size, stream)).binsha - elapsed_add = time() - st - assert ldb.has_object(binsha) - db_file = ldb.readable_db_object_path(bin_to_hex(binsha)) - fsize_kib = os.path.getsize(db_file) / 1000 - - - size_kib = size / 1000 - print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add) - - # reading all at once - st = time() - ostream = ldb.stream(binsha) - shadata = ostream.read() - elapsed_readall = time() - st - - stream.seek(0) - assert shadata == stream.getvalue() - print >> sys.stderr, "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed_readall, size_kib / elapsed_readall) - - - # reading in chunks of 1 MiB - cs = 512*1000 - chunks = list() - st = time() - ostream = ldb.stream(binsha) - while True: - data = ostream.read(cs) - chunks.append(data) - if len(data) < cs: - break - # END read in chunks - elapsed_readchunks = time() - st - - stream.seek(0) - assert ''.join(chunks) == stream.getvalue() - - cs_kib = cs / 1000 - print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks) - - # del db file so git has something to do - os.remove(db_file) - - # VS. CGIT - ########## - # CGIT ! Can using the cgit programs be faster ? - proc = rwrepo.git.hash_object('-w', '--stdin', as_process=True, istream=subprocess.PIPE) - - # write file - pump everything in at once to be a fast as possible - data = stream.getvalue() # cache it - st = time() - proc.stdin.write(data) - proc.stdin.close() - gitsha = proc.stdout.read().strip() - proc.wait() - gelapsed_add = time() - st - del(data) - assert gitsha == bin_to_hex(binsha) # we do it the same way, right ? - - # as its the same sha, we reuse our path - fsize_kib = os.path.getsize(db_file) / 1000 - print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to using git-hash-object in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, gelapsed_add, size_kib / gelapsed_add) - - # compare ... - print >> sys.stderr, "Git-Python is %f %% faster than git when adding big %s files" % (100.0 - (elapsed_add / gelapsed_add) * 100, desc) - - - # read all - st = time() - s, t, size, data = rwrepo.git.get_object_data(gitsha) - gelapsed_readall = time() - st - print >> sys.stderr, "Read %i KiB of %s data at once using git-cat-file in %f s ( %f Read KiB / s)" % (size_kib, desc, gelapsed_readall, size_kib / gelapsed_readall) + + large_data_size_bytes = 1000*1000*10 # some MiB should do it + moderate_data_size_bytes = 1000*1000*1 # just 1 MiB + + @with_rw_repo('HEAD', bare=True) + def test_large_data_streaming(self, rwrepo): + # TODO: This part overlaps with the same file in gitdb.test.performance.test_stream + # It should be shared if possible + ldb = LooseObjectDB(os.path.join(rwrepo.git_dir, 'objects')) + + for randomize in range(2): + desc = (randomize and 'random ') or '' + print >> sys.stderr, "Creating %s data ..." % desc + st = time() + size, stream = make_memory_file(self.large_data_size_bytes, randomize) + elapsed = time() - st + print >> sys.stderr, "Done (in %f s)" % elapsed + + # writing - due to the compression it will seem faster than it is + st = time() + binsha = ldb.store(IStream('blob', size, stream)).binsha + elapsed_add = time() - st + assert ldb.has_object(binsha) + db_file = ldb.readable_db_object_path(bin_to_hex(binsha)) + fsize_kib = os.path.getsize(db_file) / 1000 + + + size_kib = size / 1000 + print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add) + + # reading all at once + st = time() + ostream = ldb.stream(binsha) + shadata = ostream.read() + elapsed_readall = time() - st + + stream.seek(0) + assert shadata == stream.getvalue() + print >> sys.stderr, "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed_readall, size_kib / elapsed_readall) + + + # reading in chunks of 1 MiB + cs = 512*1000 + chunks = list() + st = time() + ostream = ldb.stream(binsha) + while True: + data = ostream.read(cs) + chunks.append(data) + if len(data) < cs: + break + # END read in chunks + elapsed_readchunks = time() - st + + stream.seek(0) + assert ''.join(chunks) == stream.getvalue() + + cs_kib = cs / 1000 + print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks) + + # del db file so git has something to do + os.remove(db_file) + + # VS. CGIT + ########## + # CGIT ! Can using the cgit programs be faster ? + proc = rwrepo.git.hash_object('-w', '--stdin', as_process=True, istream=subprocess.PIPE) + + # write file - pump everything in at once to be a fast as possible + data = stream.getvalue() # cache it + st = time() + proc.stdin.write(data) + proc.stdin.close() + gitsha = proc.stdout.read().strip() + proc.wait() + gelapsed_add = time() - st + del(data) + assert gitsha == bin_to_hex(binsha) # we do it the same way, right ? + + # as its the same sha, we reuse our path + fsize_kib = os.path.getsize(db_file) / 1000 + print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to using git-hash-object in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, gelapsed_add, size_kib / gelapsed_add) + + # compare ... + print >> sys.stderr, "Git-Python is %f %% faster than git when adding big %s files" % (100.0 - (elapsed_add / gelapsed_add) * 100, desc) + + + # read all + st = time() + s, t, size, data = rwrepo.git.get_object_data(gitsha) + gelapsed_readall = time() - st + print >> sys.stderr, "Read %i KiB of %s data at once using git-cat-file in %f s ( %f Read KiB / s)" % (size_kib, desc, gelapsed_readall, size_kib / gelapsed_readall) - # compare - print >> sys.stderr, "Git-Python is %f %% faster than git when reading big %sfiles" % (100.0 - (elapsed_readall / gelapsed_readall) * 100, desc) - - - # read chunks - st = time() - s, t, size, stream = rwrepo.git.stream_object_data(gitsha) - while True: - data = stream.read(cs) - if len(data) < cs: - break - # END read stream - gelapsed_readchunks = time() - st - print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from git-cat-file in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, gelapsed_readchunks, size_kib / gelapsed_readchunks) - - # compare - print >> sys.stderr, "Git-Python is %f %% faster than git when reading big %s files in chunks" % (100.0 - (elapsed_readchunks / gelapsed_readchunks) * 100, desc) - # END for each randomization factor + # compare + print >> sys.stderr, "Git-Python is %f %% faster than git when reading big %sfiles" % (100.0 - (elapsed_readall / gelapsed_readall) * 100, desc) + + + # read chunks + st = time() + s, t, size, stream = rwrepo.git.stream_object_data(gitsha) + while True: + data = stream.read(cs) + if len(data) < cs: + break + # END read stream + gelapsed_readchunks = time() - st + print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from git-cat-file in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, gelapsed_readchunks, size_kib / gelapsed_readchunks) + + # compare + print >> sys.stderr, "Git-Python is %f %% faster than git when reading big %s files in chunks" % (100.0 - (elapsed_readchunks / gelapsed_readchunks) * 100, desc) + # END for each randomization factor diff --git a/git/test/performance/test_utils.py b/git/test/performance/test_utils.py index 19c1e84a..8637af48 100644 --- a/git/test/performance/test_utils.py +++ b/git/test/performance/test_utils.py @@ -4,171 +4,171 @@ import sys import stat from lib import ( - TestBigRepoR - ) + TestBigRepoR + ) class TestUtilPerformance(TestBigRepoR): - - def test_access(self): - # compare dict vs. slot access - class Slotty(object): - __slots__ = "attr" - def __init__(self): - self.attr = 1 - - class Dicty(object): - def __init__(self): - self.attr = 1 - - class BigSlotty(object): - __slots__ = ('attr', ) + tuple('abcdefghijk') - def __init__(self): - for attr in self.__slots__: - setattr(self, attr, 1) - - class BigDicty(object): - def __init__(self): - for attr in BigSlotty.__slots__: - setattr(self, attr, 1) - - ni = 1000000 - for cls in (Slotty, Dicty, BigSlotty, BigDicty): - cli = cls() - st = time() - for i in xrange(ni): - cli.attr - # END for each access - elapsed = time() - st - print >> sys.stderr, "Accessed %s.attr %i times in %s s ( %f acc / s)" % (cls.__name__, ni, elapsed, ni / elapsed) - # END for each class type - - # check num of sequence-acceses - for cls in (list, tuple): - x = 10 - st = time() - s = cls(range(x)) - for i in xrange(ni): - s[0] - s[1] - s[2] - # END for - elapsed = time() - st - na = ni * 3 - print >> sys.stderr, "Accessed %s[x] %i times in %s s ( %f acc / s)" % (cls.__name__, na, elapsed, na / elapsed) - # END for each sequence - - def test_instantiation(self): - ni = 100000 - max_num_items = 4 - for mni in range(max_num_items+1): - for cls in (tuple, list): - st = time() - for i in xrange(ni): - if mni == 0: - cls() - elif mni == 1: - cls((1,)) - elif mni == 2: - cls((1,2)) - elif mni == 3: - cls((1,2,3)) - elif mni == 4: - cls((1,2,3,4)) - else: - cls(x for x in xrange(mni)) - # END handle empty cls - # END for each item - elapsed = time() - st - print >> sys.stderr, "Created %i %ss of size %i in %f s ( %f inst / s)" % (ni, cls.__name__, mni, elapsed, ni / elapsed) - # END for each type - # END for each item count - - # tuple and tuple direct - st = time() - for i in xrange(ni): - t = (1,2,3,4) - # END for each item - elapsed = time() - st - print >> sys.stderr, "Created %i tuples (1,2,3,4) in %f s ( %f tuples / s)" % (ni, elapsed, ni / elapsed) - - st = time() - for i in xrange(ni): - t = tuple((1,2,3,4)) - # END for each item - elapsed = time() - st - print >> sys.stderr, "Created %i tuples tuple((1,2,3,4)) in %f s ( %f tuples / s)" % (ni, elapsed, ni / elapsed) - - def test_unpacking_vs_indexing(self): - ni = 1000000 - list_items = [1,2,3,4] - tuple_items = (1,2,3,4) - - for sequence in (list_items, tuple_items): - st = time() - for i in xrange(ni): - one, two, three, four = sequence - # END for eac iteration - elapsed = time() - st - print >> sys.stderr, "Unpacked %i %ss of size %i in %f s ( %f acc / s)" % (ni, type(sequence).__name__, len(sequence), elapsed, ni / elapsed) - - st = time() - for i in xrange(ni): - one, two, three, four = sequence[0], sequence[1], sequence[2], sequence[3] - # END for eac iteration - elapsed = time() - st - print >> sys.stderr, "Unpacked %i %ss of size %i individually in %f s ( %f acc / s)" % (ni, type(sequence).__name__, len(sequence), elapsed, ni / elapsed) - - st = time() - for i in xrange(ni): - one, two = sequence[0], sequence[1] - # END for eac iteration - elapsed = time() - st - print >> sys.stderr, "Unpacked %i %ss of size %i individually (2 of 4) in %f s ( %f acc / s)" % (ni, type(sequence).__name__, len(sequence), elapsed, ni / elapsed) - # END for each sequence - - def test_large_list_vs_iteration(self): - # what costs more: alloc/realloc of lists, or the cpu strain of iterators ? - def slow_iter(ni): - for i in xrange(ni): - yield i - # END slow iter - be closer to the real world - - # alloc doesn't play a role here it seems - for ni in (500, 1000, 10000, 20000, 40000): - st = time() - for i in list(xrange(ni)): - i - # END for each item - elapsed = time() - st - print >> sys.stderr, "Iterated %i items from list in %f s ( %f acc / s)" % (ni, elapsed, ni / elapsed) - - st = time() - for i in slow_iter(ni): - i - # END for each item - elapsed = time() - st - print >> sys.stderr, "Iterated %i items from iterator in %f s ( %f acc / s)" % (ni, elapsed, ni / elapsed) - # END for each number of iterations - - def test_type_vs_inst_class(self): - class NewType(object): - pass - - # lets see which way is faster - inst = NewType() - - ni = 1000000 - st = time() - for i in xrange(ni): - inst.__class__() - # END for each item - elapsed = time() - st - print >> sys.stderr, "Created %i items using inst.__class__ in %f s ( %f items / s)" % (ni, elapsed, ni / elapsed) - - st = time() - for i in xrange(ni): - type(inst)() - # END for each item - elapsed = time() - st - print >> sys.stderr, "Created %i items using type(inst)() in %f s ( %f items / s)" % (ni, elapsed, ni / elapsed) + + def test_access(self): + # compare dict vs. slot access + class Slotty(object): + __slots__ = "attr" + def __init__(self): + self.attr = 1 + + class Dicty(object): + def __init__(self): + self.attr = 1 + + class BigSlotty(object): + __slots__ = ('attr', ) + tuple('abcdefghijk') + def __init__(self): + for attr in self.__slots__: + setattr(self, attr, 1) + + class BigDicty(object): + def __init__(self): + for attr in BigSlotty.__slots__: + setattr(self, attr, 1) + + ni = 1000000 + for cls in (Slotty, Dicty, BigSlotty, BigDicty): + cli = cls() + st = time() + for i in xrange(ni): + cli.attr + # END for each access + elapsed = time() - st + print >> sys.stderr, "Accessed %s.attr %i times in %s s ( %f acc / s)" % (cls.__name__, ni, elapsed, ni / elapsed) + # END for each class type + + # check num of sequence-acceses + for cls in (list, tuple): + x = 10 + st = time() + s = cls(range(x)) + for i in xrange(ni): + s[0] + s[1] + s[2] + # END for + elapsed = time() - st + na = ni * 3 + print >> sys.stderr, "Accessed %s[x] %i times in %s s ( %f acc / s)" % (cls.__name__, na, elapsed, na / elapsed) + # END for each sequence + + def test_instantiation(self): + ni = 100000 + max_num_items = 4 + for mni in range(max_num_items+1): + for cls in (tuple, list): + st = time() + for i in xrange(ni): + if mni == 0: + cls() + elif mni == 1: + cls((1,)) + elif mni == 2: + cls((1,2)) + elif mni == 3: + cls((1,2,3)) + elif mni == 4: + cls((1,2,3,4)) + else: + cls(x for x in xrange(mni)) + # END handle empty cls + # END for each item + elapsed = time() - st + print >> sys.stderr, "Created %i %ss of size %i in %f s ( %f inst / s)" % (ni, cls.__name__, mni, elapsed, ni / elapsed) + # END for each type + # END for each item count + + # tuple and tuple direct + st = time() + for i in xrange(ni): + t = (1,2,3,4) + # END for each item + elapsed = time() - st + print >> sys.stderr, "Created %i tuples (1,2,3,4) in %f s ( %f tuples / s)" % (ni, elapsed, ni / elapsed) + + st = time() + for i in xrange(ni): + t = tuple((1,2,3,4)) + # END for each item + elapsed = time() - st + print >> sys.stderr, "Created %i tuples tuple((1,2,3,4)) in %f s ( %f tuples / s)" % (ni, elapsed, ni / elapsed) + + def test_unpacking_vs_indexing(self): + ni = 1000000 + list_items = [1,2,3,4] + tuple_items = (1,2,3,4) + + for sequence in (list_items, tuple_items): + st = time() + for i in xrange(ni): + one, two, three, four = sequence + # END for eac iteration + elapsed = time() - st + print >> sys.stderr, "Unpacked %i %ss of size %i in %f s ( %f acc / s)" % (ni, type(sequence).__name__, len(sequence), elapsed, ni / elapsed) + + st = time() + for i in xrange(ni): + one, two, three, four = sequence[0], sequence[1], sequence[2], sequence[3] + # END for eac iteration + elapsed = time() - st + print >> sys.stderr, "Unpacked %i %ss of size %i individually in %f s ( %f acc / s)" % (ni, type(sequence).__name__, len(sequence), elapsed, ni / elapsed) + + st = time() + for i in xrange(ni): + one, two = sequence[0], sequence[1] + # END for eac iteration + elapsed = time() - st + print >> sys.stderr, "Unpacked %i %ss of size %i individually (2 of 4) in %f s ( %f acc / s)" % (ni, type(sequence).__name__, len(sequence), elapsed, ni / elapsed) + # END for each sequence + + def test_large_list_vs_iteration(self): + # what costs more: alloc/realloc of lists, or the cpu strain of iterators ? + def slow_iter(ni): + for i in xrange(ni): + yield i + # END slow iter - be closer to the real world + + # alloc doesn't play a role here it seems + for ni in (500, 1000, 10000, 20000, 40000): + st = time() + for i in list(xrange(ni)): + i + # END for each item + elapsed = time() - st + print >> sys.stderr, "Iterated %i items from list in %f s ( %f acc / s)" % (ni, elapsed, ni / elapsed) + + st = time() + for i in slow_iter(ni): + i + # END for each item + elapsed = time() - st + print >> sys.stderr, "Iterated %i items from iterator in %f s ( %f acc / s)" % (ni, elapsed, ni / elapsed) + # END for each number of iterations + + def test_type_vs_inst_class(self): + class NewType(object): + pass + + # lets see which way is faster + inst = NewType() + + ni = 1000000 + st = time() + for i in xrange(ni): + inst.__class__() + # END for each item + elapsed = time() - st + print >> sys.stderr, "Created %i items using inst.__class__ in %f s ( %f items / s)" % (ni, elapsed, ni / elapsed) + + st = time() + for i in xrange(ni): + type(inst)() + # END for each item + elapsed = time() - st + print >> sys.stderr, "Created %i items using type(inst)() in %f s ( %f items / s)" % (ni, elapsed, ni / elapsed) diff --git a/git/test/test_base.py b/git/test/test_base.py index e630d151..5edc9c52 100644 --- a/git/test/test_base.py +++ b/git/test/test_base.py @@ -16,85 +16,85 @@ from gitdb.util import hex_to_bin import tempfile class TestBase(TestBase): - - type_tuples = ( ("blob", "8741fc1d09d61f02ffd8cded15ff603eff1ec070", "blob.py"), - ("tree", "3a6a5e3eeed3723c09f1ef0399f81ed6b8d82e79", "directory"), - ("commit", "4251bd59fb8e11e40c40548cba38180a9536118c", None), - ("tag", "e56a60e8e9cd333cfba0140a77cd12b0d9398f10", None) ) - - def test_base_object(self): - # test interface of base object classes - types = (Blob, Tree, Commit, TagObject) - assert len(types) == len(self.type_tuples) - - s = set() - num_objs = 0 - num_index_objs = 0 - for obj_type, (typename, hexsha, path) in zip(types, self.type_tuples): - binsha = hex_to_bin(hexsha) - item = None - if path is None: - item = obj_type(self.rorepo,binsha) - else: - item = obj_type(self.rorepo,binsha, 0, path) - # END handle index objects - num_objs += 1 - assert item.hexsha == hexsha - assert item.type == typename - assert item.size - assert item == item - assert not item != item - assert str(item) == item.hexsha - assert repr(item) - s.add(item) - - if isinstance(item, base.IndexObject): - num_index_objs += 1 - if hasattr(item,'path'): # never runs here - assert not item.path.startswith("/") # must be relative - assert isinstance(item.mode, int) - # END index object check - - # read from stream - data_stream = item.data_stream - data = data_stream.read() - assert data - - tmpfile = os.tmpfile() - assert item == item.stream_data(tmpfile) - tmpfile.seek(0) - assert tmpfile.read() == data - # END stream to file directly - # END for each object type to create - - # each has a unique sha - assert len(s) == num_objs - assert len(s|s) == num_objs - assert num_index_objs == 2 - - def test_get_object_type_by_name(self): - for tname in base.Object.TYPES: - assert base.Object in get_object_type_by_name(tname).mro() - # END for each known type - - assert_raises( ValueError, get_object_type_by_name, "doesntexist" ) + + type_tuples = ( ("blob", "8741fc1d09d61f02ffd8cded15ff603eff1ec070", "blob.py"), + ("tree", "3a6a5e3eeed3723c09f1ef0399f81ed6b8d82e79", "directory"), + ("commit", "4251bd59fb8e11e40c40548cba38180a9536118c", None), + ("tag", "e56a60e8e9cd333cfba0140a77cd12b0d9398f10", None) ) + + def test_base_object(self): + # test interface of base object classes + types = (Blob, Tree, Commit, TagObject) + assert len(types) == len(self.type_tuples) + + s = set() + num_objs = 0 + num_index_objs = 0 + for obj_type, (typename, hexsha, path) in zip(types, self.type_tuples): + binsha = hex_to_bin(hexsha) + item = None + if path is None: + item = obj_type(self.rorepo,binsha) + else: + item = obj_type(self.rorepo,binsha, 0, path) + # END handle index objects + num_objs += 1 + assert item.hexsha == hexsha + assert item.type == typename + assert item.size + assert item == item + assert not item != item + assert str(item) == item.hexsha + assert repr(item) + s.add(item) + + if isinstance(item, base.IndexObject): + num_index_objs += 1 + if hasattr(item,'path'): # never runs here + assert not item.path.startswith("/") # must be relative + assert isinstance(item.mode, int) + # END index object check + + # read from stream + data_stream = item.data_stream + data = data_stream.read() + assert data + + tmpfile = os.tmpfile() + assert item == item.stream_data(tmpfile) + tmpfile.seek(0) + assert tmpfile.read() == data + # END stream to file directly + # END for each object type to create + + # each has a unique sha + assert len(s) == num_objs + assert len(s|s) == num_objs + assert num_index_objs == 2 + + def test_get_object_type_by_name(self): + for tname in base.Object.TYPES: + assert base.Object in get_object_type_by_name(tname).mro() + # END for each known type + + assert_raises( ValueError, get_object_type_by_name, "doesntexist" ) - def test_object_resolution(self): - # objects must be resolved to shas so they compare equal - assert self.rorepo.head.reference.object == self.rorepo.active_branch.object - - @with_rw_repo('HEAD', bare=True) - def test_with_bare_rw_repo(self, bare_rw_repo): - assert bare_rw_repo.config_reader("repository").getboolean("core", "bare") - assert os.path.isfile(os.path.join(bare_rw_repo.git_dir,'HEAD')) - - @with_rw_repo('0.1.6') - def test_with_rw_repo(self, rw_repo): - assert not rw_repo.config_reader("repository").getboolean("core", "bare") - assert os.path.isdir(os.path.join(rw_repo.working_tree_dir,'lib')) - - @with_rw_and_rw_remote_repo('0.1.6') - def test_with_rw_remote_and_rw_repo(self, rw_repo, rw_remote_repo): - assert not rw_repo.config_reader("repository").getboolean("core", "bare") - assert rw_remote_repo.config_reader("repository").getboolean("core", "bare") - assert os.path.isdir(os.path.join(rw_repo.working_tree_dir,'lib')) + def test_object_resolution(self): + # objects must be resolved to shas so they compare equal + assert self.rorepo.head.reference.object == self.rorepo.active_branch.object + + @with_rw_repo('HEAD', bare=True) + def test_with_bare_rw_repo(self, bare_rw_repo): + assert bare_rw_repo.config_reader("repository").getboolean("core", "bare") + assert os.path.isfile(os.path.join(bare_rw_repo.git_dir,'HEAD')) + + @with_rw_repo('0.1.6') + def test_with_rw_repo(self, rw_repo): + assert not rw_repo.config_reader("repository").getboolean("core", "bare") + assert os.path.isdir(os.path.join(rw_repo.working_tree_dir,'lib')) + + @with_rw_and_rw_remote_repo('0.1.6') + def test_with_rw_remote_and_rw_repo(self, rw_repo, rw_remote_repo): + assert not rw_repo.config_reader("repository").getboolean("core", "bare") + assert rw_remote_repo.config_reader("repository").getboolean("core", "bare") + assert os.path.isdir(os.path.join(rw_repo.working_tree_dir,'lib')) diff --git a/git/test/test_blob.py b/git/test/test_blob.py index 661c0501..6fc0287f 100644 --- a/git/test/test_blob.py +++ b/git/test/test_blob.py @@ -9,15 +9,15 @@ from git import * from gitdb.util import hex_to_bin class TestBlob(TestBase): - - def test_mime_type_should_return_mime_type_for_known_types(self): - blob = Blob(self.rorepo, **{'binsha': Blob.NULL_BIN_SHA, 'path': 'foo.png'}) - assert_equal("image/png", blob.mime_type) + + def test_mime_type_should_return_mime_type_for_known_types(self): + blob = Blob(self.rorepo, **{'binsha': Blob.NULL_BIN_SHA, 'path': 'foo.png'}) + assert_equal("image/png", blob.mime_type) - def test_mime_type_should_return_text_plain_for_unknown_types(self): - blob = Blob(self.rorepo, **{'binsha': Blob.NULL_BIN_SHA,'path': 'something'}) - assert_equal("text/plain", blob.mime_type) + def test_mime_type_should_return_text_plain_for_unknown_types(self): + blob = Blob(self.rorepo, **{'binsha': Blob.NULL_BIN_SHA,'path': 'something'}) + assert_equal("text/plain", blob.mime_type) - def test_nodict(self): - self.failUnlessRaises(AttributeError, setattr, self.rorepo.tree()['AUTHORS'], 'someattr', 2) - + def test_nodict(self): + self.failUnlessRaises(AttributeError, setattr, self.rorepo.tree()['AUTHORS'], 'someattr', 2) + diff --git a/git/test/test_commit.py b/git/test/test_commit.py index 4a8d8b87..58e51151 100644 --- a/git/test/test_commit.py +++ b/git/test/test_commit.py @@ -16,260 +16,260 @@ import sys def assert_commit_serialization(rwrepo, commit_id, print_performance_info=False): - """traverse all commits in the history of commit identified by commit_id and check - if the serialization works. - :param print_performance_info: if True, we will show how fast we are""" - ns = 0 # num serializations - nds = 0 # num deserializations - - st = time.time() - for cm in rwrepo.commit(commit_id).traverse(): - nds += 1 - - # assert that we deserialize commits correctly, hence we get the same - # sha on serialization - stream = StringIO() - cm._serialize(stream) - ns += 1 - streamlen = stream.tell() - stream.seek(0) - - istream = rwrepo.odb.store(IStream(Commit.type, streamlen, stream)) - assert istream.hexsha == cm.hexsha - - nc = Commit(rwrepo, Commit.NULL_BIN_SHA, cm.tree, - cm.author, cm.authored_date, cm.author_tz_offset, - cm.committer, cm.committed_date, cm.committer_tz_offset, - cm.message, cm.parents, cm.encoding) - - assert nc.parents == cm.parents - stream = StringIO() - nc._serialize(stream) - ns += 1 - streamlen = stream.tell() - stream.seek(0) - - # reuse istream - istream.size = streamlen - istream.stream = stream - istream.binsha = None - nc.binsha = rwrepo.odb.store(istream).binsha - - # if it worked, we have exactly the same contents ! - assert nc.hexsha == cm.hexsha - # END check commits - elapsed = time.time() - st - - if print_performance_info: - print >> sys.stderr, "Serialized %i and deserialized %i commits in %f s ( (%f, %f) commits / s" % (ns, nds, elapsed, ns/elapsed, nds/elapsed) - # END handle performance info - + """traverse all commits in the history of commit identified by commit_id and check + if the serialization works. + :param print_performance_info: if True, we will show how fast we are""" + ns = 0 # num serializations + nds = 0 # num deserializations + + st = time.time() + for cm in rwrepo.commit(commit_id).traverse(): + nds += 1 + + # assert that we deserialize commits correctly, hence we get the same + # sha on serialization + stream = StringIO() + cm._serialize(stream) + ns += 1 + streamlen = stream.tell() + stream.seek(0) + + istream = rwrepo.odb.store(IStream(Commit.type, streamlen, stream)) + assert istream.hexsha == cm.hexsha + + nc = Commit(rwrepo, Commit.NULL_BIN_SHA, cm.tree, + cm.author, cm.authored_date, cm.author_tz_offset, + cm.committer, cm.committed_date, cm.committer_tz_offset, + cm.message, cm.parents, cm.encoding) + + assert nc.parents == cm.parents + stream = StringIO() + nc._serialize(stream) + ns += 1 + streamlen = stream.tell() + stream.seek(0) + + # reuse istream + istream.size = streamlen + istream.stream = stream + istream.binsha = None + nc.binsha = rwrepo.odb.store(istream).binsha + + # if it worked, we have exactly the same contents ! + assert nc.hexsha == cm.hexsha + # END check commits + elapsed = time.time() - st + + if print_performance_info: + print >> sys.stderr, "Serialized %i and deserialized %i commits in %f s ( (%f, %f) commits / s" % (ns, nds, elapsed, ns/elapsed, nds/elapsed) + # END handle performance info + class TestCommit(TestBase): - def test_bake(self): + def test_bake(self): - commit = self.rorepo.commit('2454ae89983a4496a445ce347d7a41c0bb0ea7ae') - # commits have no dict - self.failUnlessRaises(AttributeError, setattr, commit, 'someattr', 1) - commit.author # bake + commit = self.rorepo.commit('2454ae89983a4496a445ce347d7a41c0bb0ea7ae') + # commits have no dict + self.failUnlessRaises(AttributeError, setattr, commit, 'someattr', 1) + commit.author # bake - assert_equal("Sebastian Thiel", commit.author.name) - assert_equal("byronimo@gmail.com", commit.author.email) - assert commit.author == commit.committer - assert isinstance(commit.authored_date, int) and isinstance(commit.committed_date, int) - assert isinstance(commit.author_tz_offset, int) and isinstance(commit.committer_tz_offset, int) - assert commit.message == "Added missing information to docstrings of commit and stats module\n" + assert_equal("Sebastian Thiel", commit.author.name) + assert_equal("byronimo@gmail.com", commit.author.email) + assert commit.author == commit.committer + assert isinstance(commit.authored_date, int) and isinstance(commit.committed_date, int) + assert isinstance(commit.author_tz_offset, int) and isinstance(commit.committer_tz_offset, int) + assert commit.message == "Added missing information to docstrings of commit and stats module\n" - def test_stats(self): - commit = self.rorepo.commit('33ebe7acec14b25c5f84f35a664803fcab2f7781') - stats = commit.stats - - def check_entries(d): - assert isinstance(d, dict) - for key in ("insertions", "deletions", "lines"): - assert key in d - # END assertion helper - assert stats.files - assert stats.total - - check_entries(stats.total) - assert "files" in stats.total - - for filepath, d in stats.files.items(): - check_entries(d) - # END for each stated file - - # assure data is parsed properly - michael = Actor._from_string("Michael Trier <mtrier@gmail.com>") - assert commit.author == michael - assert commit.committer == michael - assert commit.authored_date == 1210193388 - assert commit.committed_date == 1210193388 - assert commit.author_tz_offset == 14400, commit.author_tz_offset - assert commit.committer_tz_offset == 14400, commit.committer_tz_offset - assert commit.message == "initial project\n" - - def test_unicode_actor(self): - # assure we can parse unicode actors correctly - name = "Üäöß ÄußÉ".decode("utf-8") - assert len(name) == 9 - special = Actor._from_string(u"%s <something@this.com>" % name) - assert special.name == name - assert isinstance(special.name, unicode) - - def test_traversal(self): - start = self.rorepo.commit("a4d06724202afccd2b5c54f81bcf2bf26dea7fff") - first = self.rorepo.commit("33ebe7acec14b25c5f84f35a664803fcab2f7781") - p0 = start.parents[0] - p1 = start.parents[1] - p00 = p0.parents[0] - p10 = p1.parents[0] - - # basic branch first, depth first - dfirst = start.traverse(branch_first=False) - bfirst = start.traverse(branch_first=True) - assert dfirst.next() == p0 - assert dfirst.next() == p00 - - assert bfirst.next() == p0 - assert bfirst.next() == p1 - assert bfirst.next() == p00 - assert bfirst.next() == p10 - - # at some point, both iterations should stop - assert list(bfirst)[-1] == first - stoptraverse = self.rorepo.commit("254d04aa3180eb8b8daf7b7ff25f010cd69b4e7d").traverse(as_edge=True) - l = list(stoptraverse) - assert len(l[0]) == 2 - - # ignore self - assert start.traverse(ignore_self=False).next() == start - - # depth - assert len(list(start.traverse(ignore_self=False, depth=0))) == 1 - - # prune - assert start.traverse(branch_first=1, prune=lambda i,d: i==p0).next() == p1 - - # predicate - assert start.traverse(branch_first=1, predicate=lambda i,d: i==p1).next() == p1 - - # traversal should stop when the beginning is reached - self.failUnlessRaises(StopIteration, first.traverse().next) - - # parents of the first commit should be empty ( as the only parent has a null - # sha ) - assert len(first.parents) == 0 - - def test_iteration(self): - # we can iterate commits - all_commits = Commit.list_items(self.rorepo, self.rorepo.head) - assert all_commits - assert all_commits == list(self.rorepo.iter_commits()) - - # this includes merge commits - mcomit = self.rorepo.commit('d884adc80c80300b4cc05321494713904ef1df2d') - assert mcomit in all_commits - - # we can limit the result to paths - ltd_commits = list(self.rorepo.iter_commits(paths='CHANGES')) - assert ltd_commits and len(ltd_commits) < len(all_commits) - - # show commits of multiple paths, resulting in a union of commits - less_ltd_commits = list(Commit.iter_items(self.rorepo, 'master', paths=('CHANGES', 'AUTHORS'))) - assert len(ltd_commits) < len(less_ltd_commits) - - def test_iter_items(self): - # pretty not allowed - self.failUnlessRaises(ValueError, Commit.iter_items, self.rorepo, 'master', pretty="raw") - - def test_rev_list_bisect_all(self): - """ - 'git rev-list --bisect-all' returns additional information - in the commit header. This test ensures that we properly parse it. - """ - revs = self.rorepo.git.rev_list('933d23bf95a5bd1624fbcdf328d904e1fa173474', - first_parent=True, - bisect_all=True) + def test_stats(self): + commit = self.rorepo.commit('33ebe7acec14b25c5f84f35a664803fcab2f7781') + stats = commit.stats + + def check_entries(d): + assert isinstance(d, dict) + for key in ("insertions", "deletions", "lines"): + assert key in d + # END assertion helper + assert stats.files + assert stats.total + + check_entries(stats.total) + assert "files" in stats.total + + for filepath, d in stats.files.items(): + check_entries(d) + # END for each stated file + + # assure data is parsed properly + michael = Actor._from_string("Michael Trier <mtrier@gmail.com>") + assert commit.author == michael + assert commit.committer == michael + assert commit.authored_date == 1210193388 + assert commit.committed_date == 1210193388 + assert commit.author_tz_offset == 14400, commit.author_tz_offset + assert commit.committer_tz_offset == 14400, commit.committer_tz_offset + assert commit.message == "initial project\n" + + def test_unicode_actor(self): + # assure we can parse unicode actors correctly + name = "Üäöß ÄußÉ".decode("utf-8") + assert len(name) == 9 + special = Actor._from_string(u"%s <something@this.com>" % name) + assert special.name == name + assert isinstance(special.name, unicode) + + def test_traversal(self): + start = self.rorepo.commit("a4d06724202afccd2b5c54f81bcf2bf26dea7fff") + first = self.rorepo.commit("33ebe7acec14b25c5f84f35a664803fcab2f7781") + p0 = start.parents[0] + p1 = start.parents[1] + p00 = p0.parents[0] + p10 = p1.parents[0] + + # basic branch first, depth first + dfirst = start.traverse(branch_first=False) + bfirst = start.traverse(branch_first=True) + assert dfirst.next() == p0 + assert dfirst.next() == p00 + + assert bfirst.next() == p0 + assert bfirst.next() == p1 + assert bfirst.next() == p00 + assert bfirst.next() == p10 + + # at some point, both iterations should stop + assert list(bfirst)[-1] == first + stoptraverse = self.rorepo.commit("254d04aa3180eb8b8daf7b7ff25f010cd69b4e7d").traverse(as_edge=True) + l = list(stoptraverse) + assert len(l[0]) == 2 + + # ignore self + assert start.traverse(ignore_self=False).next() == start + + # depth + assert len(list(start.traverse(ignore_self=False, depth=0))) == 1 + + # prune + assert start.traverse(branch_first=1, prune=lambda i,d: i==p0).next() == p1 + + # predicate + assert start.traverse(branch_first=1, predicate=lambda i,d: i==p1).next() == p1 + + # traversal should stop when the beginning is reached + self.failUnlessRaises(StopIteration, first.traverse().next) + + # parents of the first commit should be empty ( as the only parent has a null + # sha ) + assert len(first.parents) == 0 + + def test_iteration(self): + # we can iterate commits + all_commits = Commit.list_items(self.rorepo, self.rorepo.head) + assert all_commits + assert all_commits == list(self.rorepo.iter_commits()) + + # this includes merge commits + mcomit = self.rorepo.commit('d884adc80c80300b4cc05321494713904ef1df2d') + assert mcomit in all_commits + + # we can limit the result to paths + ltd_commits = list(self.rorepo.iter_commits(paths='CHANGES')) + assert ltd_commits and len(ltd_commits) < len(all_commits) + + # show commits of multiple paths, resulting in a union of commits + less_ltd_commits = list(Commit.iter_items(self.rorepo, 'master', paths=('CHANGES', 'AUTHORS'))) + assert len(ltd_commits) < len(less_ltd_commits) + + def test_iter_items(self): + # pretty not allowed + self.failUnlessRaises(ValueError, Commit.iter_items, self.rorepo, 'master', pretty="raw") + + def test_rev_list_bisect_all(self): + """ + 'git rev-list --bisect-all' returns additional information + in the commit header. This test ensures that we properly parse it. + """ + revs = self.rorepo.git.rev_list('933d23bf95a5bd1624fbcdf328d904e1fa173474', + first_parent=True, + bisect_all=True) - commits = Commit._iter_from_process_or_stream(self.rorepo, StringProcessAdapter(revs)) - expected_ids = ( - '7156cece3c49544abb6bf7a0c218eb36646fad6d', - '1f66cfbbce58b4b552b041707a12d437cc5f400a', - '33ebe7acec14b25c5f84f35a664803fcab2f7781', - '933d23bf95a5bd1624fbcdf328d904e1fa173474' - ) - for sha1, commit in zip(expected_ids, commits): - assert_equal(sha1, commit.hexsha) + commits = Commit._iter_from_process_or_stream(self.rorepo, StringProcessAdapter(revs)) + expected_ids = ( + '7156cece3c49544abb6bf7a0c218eb36646fad6d', + '1f66cfbbce58b4b552b041707a12d437cc5f400a', + '33ebe7acec14b25c5f84f35a664803fcab2f7781', + '933d23bf95a5bd1624fbcdf328d904e1fa173474' + ) + for sha1, commit in zip(expected_ids, commits): + assert_equal(sha1, commit.hexsha) - def test_count(self): - assert self.rorepo.tag('refs/tags/0.1.5').commit.count( ) == 143 - - def test_list(self): - assert isinstance(Commit.list_items(self.rorepo, '0.1.5', max_count=5)[hex_to_bin('5117c9c8a4d3af19a9958677e45cda9269de1541')], Commit) + def test_count(self): + assert self.rorepo.tag('refs/tags/0.1.5').commit.count( ) == 143 + + def test_list(self): + assert isinstance(Commit.list_items(self.rorepo, '0.1.5', max_count=5)[hex_to_bin('5117c9c8a4d3af19a9958677e45cda9269de1541')], Commit) - def test_str(self): - commit = Commit(self.rorepo, Commit.NULL_BIN_SHA) - assert_equal(Commit.NULL_HEX_SHA, str(commit)) + def test_str(self): + commit = Commit(self.rorepo, Commit.NULL_BIN_SHA) + assert_equal(Commit.NULL_HEX_SHA, str(commit)) - def test_repr(self): - commit = Commit(self.rorepo, Commit.NULL_BIN_SHA) - assert_equal('<git.Commit "%s">' % Commit.NULL_HEX_SHA, repr(commit)) + def test_repr(self): + commit = Commit(self.rorepo, Commit.NULL_BIN_SHA) + assert_equal('<git.Commit "%s">' % Commit.NULL_HEX_SHA, repr(commit)) - def test_equality(self): - commit1 = Commit(self.rorepo, Commit.NULL_BIN_SHA) - commit2 = Commit(self.rorepo, Commit.NULL_BIN_SHA) - commit3 = Commit(self.rorepo, "\1"*20) - assert_equal(commit1, commit2) - assert_not_equal(commit2, commit3) - - def test_iter_parents(self): - # should return all but ourselves, even if skip is defined - c = self.rorepo.commit('0.1.5') - for skip in (0, 1): - piter = c.iter_parents(skip=skip) - first_parent = piter.next() - assert first_parent != c - assert first_parent == c.parents[0] - # END for each - - def test_base(self): - name_rev = self.rorepo.head.commit.name_rev - assert isinstance(name_rev, basestring) - - @with_rw_repo('HEAD', bare=True) - def test_serialization(self, rwrepo): - # create all commits of our repo - assert_commit_serialization(rwrepo, '0.1.6') - - def test_serialization_unicode_support(self): - assert Commit.default_encoding.lower() == 'utf-8' - - # create a commit with unicode in the message, and the author's name - # Verify its serialization and deserialization - cmt = self.rorepo.commit('0.1.6') - assert isinstance(cmt.message, unicode) # it automatically decodes it as such - assert isinstance(cmt.author.name, unicode) # same here - - cmt.message = "üäêèß".decode("utf-8") - assert len(cmt.message) == 5 - - cmt.author.name = "äüß".decode("utf-8") - assert len(cmt.author.name) == 3 - - cstream = StringIO() - cmt._serialize(cstream) - cstream.seek(0) - assert len(cstream.getvalue()) - - ncmt = Commit(self.rorepo, cmt.binsha) - ncmt._deserialize(cstream) - - assert cmt.author.name == ncmt.author.name - assert cmt.message == ncmt.message - # actually, it can't be printed in a shell as repr wants to have ascii only - # it appears - cmt.author.__repr__() - + def test_equality(self): + commit1 = Commit(self.rorepo, Commit.NULL_BIN_SHA) + commit2 = Commit(self.rorepo, Commit.NULL_BIN_SHA) + commit3 = Commit(self.rorepo, "\1"*20) + assert_equal(commit1, commit2) + assert_not_equal(commit2, commit3) + + def test_iter_parents(self): + # should return all but ourselves, even if skip is defined + c = self.rorepo.commit('0.1.5') + for skip in (0, 1): + piter = c.iter_parents(skip=skip) + first_parent = piter.next() + assert first_parent != c + assert first_parent == c.parents[0] + # END for each + + def test_base(self): + name_rev = self.rorepo.head.commit.name_rev + assert isinstance(name_rev, basestring) + + @with_rw_repo('HEAD', bare=True) + def test_serialization(self, rwrepo): + # create all commits of our repo + assert_commit_serialization(rwrepo, '0.1.6') + + def test_serialization_unicode_support(self): + assert Commit.default_encoding.lower() == 'utf-8' + + # create a commit with unicode in the message, and the author's name + # Verify its serialization and deserialization + cmt = self.rorepo.commit('0.1.6') + assert isinstance(cmt.message, unicode) # it automatically decodes it as such + assert isinstance(cmt.author.name, unicode) # same here + + cmt.message = "üäêèß".decode("utf-8") + assert len(cmt.message) == 5 + + cmt.author.name = "äüß".decode("utf-8") + assert len(cmt.author.name) == 3 + + cstream = StringIO() + cmt._serialize(cstream) + cstream.seek(0) + assert len(cstream.getvalue()) + + ncmt = Commit(self.rorepo, cmt.binsha) + ncmt._deserialize(cstream) + + assert cmt.author.name == ncmt.author.name + assert cmt.message == ncmt.message + # actually, it can't be printed in a shell as repr wants to have ascii only + # it appears + cmt.author.__repr__() + diff --git a/git/test/test_db.py b/git/test/test_db.py index db2d7983..dc8190a7 100644 --- a/git/test/test_db.py +++ b/git/test/test_db.py @@ -10,16 +10,16 @@ from git.exc import BadObject import os class TestDB(TestBase): - - def test_base(self): - gdb = GitCmdObjectDB(os.path.join(self.rorepo.git_dir, 'objects'), self.rorepo.git) - - # partial to complete - works with everything - hexsha = bin_to_hex(gdb.partial_to_complete_sha_hex("0.1.6")) - assert len(hexsha) == 40 - - assert bin_to_hex(gdb.partial_to_complete_sha_hex(hexsha[:20])) == hexsha - - # fails with BadObject - for invalid_rev in ("0000", "bad/ref", "super bad"): - self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, invalid_rev) + + def test_base(self): + gdb = GitCmdObjectDB(os.path.join(self.rorepo.git_dir, 'objects'), self.rorepo.git) + + # partial to complete - works with everything + hexsha = bin_to_hex(gdb.partial_to_complete_sha_hex("0.1.6")) + assert len(hexsha) == 40 + + assert bin_to_hex(gdb.partial_to_complete_sha_hex(hexsha[:20])) == hexsha + + # fails with BadObject + for invalid_rev in ("0000", "bad/ref", "super bad"): + self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, invalid_rev) diff --git a/git/test/test_diff.py b/git/test/test_diff.py index 83db2df6..80652c8e 100644 --- a/git/test/test_diff.py +++ b/git/test/test_diff.py @@ -8,101 +8,101 @@ from git.test.lib import * from git import * class TestDiff(TestBase): - - def _assert_diff_format(self, diffs): - # verify that the format of the diff is sane - for diff in diffs: - if diff.a_mode: - assert isinstance(diff.a_mode, int) - if diff.b_mode: - assert isinstance(diff.b_mode, int) - - if diff.a_blob: - assert not diff.a_blob.path.endswith('\n') - if diff.b_blob: - assert not diff.b_blob.path.endswith('\n') - # END for each diff - return diffs - - def test_list_from_string_new_mode(self): - output = StringProcessAdapter(fixture('diff_new_mode')) - diffs = Diff._index_from_patch_format(self.rorepo, output.stdout) - self._assert_diff_format(diffs) - - assert_equal(1, len(diffs)) - assert_equal(10, len(diffs[0].diff.splitlines())) + + def _assert_diff_format(self, diffs): + # verify that the format of the diff is sane + for diff in diffs: + if diff.a_mode: + assert isinstance(diff.a_mode, int) + if diff.b_mode: + assert isinstance(diff.b_mode, int) + + if diff.a_blob: + assert not diff.a_blob.path.endswith('\n') + if diff.b_blob: + assert not diff.b_blob.path.endswith('\n') + # END for each diff + return diffs + + def test_list_from_string_new_mode(self): + output = StringProcessAdapter(fixture('diff_new_mode')) + diffs = Diff._index_from_patch_format(self.rorepo, output.stdout) + self._assert_diff_format(diffs) + + assert_equal(1, len(diffs)) + assert_equal(10, len(diffs[0].diff.splitlines())) - def test_diff_with_rename(self): - output = StringProcessAdapter(fixture('diff_rename')) - diffs = Diff._index_from_patch_format(self.rorepo, output.stdout) - self._assert_diff_format(diffs) - - assert_equal(1, len(diffs)) + def test_diff_with_rename(self): + output = StringProcessAdapter(fixture('diff_rename')) + diffs = Diff._index_from_patch_format(self.rorepo, output.stdout) + self._assert_diff_format(diffs) + + assert_equal(1, len(diffs)) - diff = diffs[0] - assert_true(diff.renamed) - assert_equal(diff.rename_from, 'AUTHORS') - assert_equal(diff.rename_to, 'CONTRIBUTORS') + diff = diffs[0] + assert_true(diff.renamed) + assert_equal(diff.rename_from, 'AUTHORS') + assert_equal(diff.rename_to, 'CONTRIBUTORS') - def test_diff_patch_format(self): - # test all of the 'old' format diffs for completness - it should at least - # be able to deal with it - fixtures = ("diff_2", "diff_2f", "diff_f", "diff_i", "diff_mode_only", - "diff_new_mode", "diff_numstat", "diff_p", "diff_rename", - "diff_tree_numstat_root" ) - - for fixture_name in fixtures: - diff_proc = StringProcessAdapter(fixture(fixture_name)) - diffs = Diff._index_from_patch_format(self.rorepo, diff_proc.stdout) - # END for each fixture + def test_diff_patch_format(self): + # test all of the 'old' format diffs for completness - it should at least + # be able to deal with it + fixtures = ("diff_2", "diff_2f", "diff_f", "diff_i", "diff_mode_only", + "diff_new_mode", "diff_numstat", "diff_p", "diff_rename", + "diff_tree_numstat_root" ) + + for fixture_name in fixtures: + diff_proc = StringProcessAdapter(fixture(fixture_name)) + diffs = Diff._index_from_patch_format(self.rorepo, diff_proc.stdout) + # END for each fixture - def test_diff_interface(self): - # test a few variations of the main diff routine - assertion_map = dict() - for i, commit in enumerate(self.rorepo.iter_commits('0.1.6', max_count=2)): - diff_item = commit - if i%2 == 0: - diff_item = commit.tree - # END use tree every second item - - for other in (None, commit.Index, commit.parents[0]): - for paths in (None, "CHANGES", ("CHANGES", "lib")): - for create_patch in range(2): - diff_index = diff_item.diff(other, paths, create_patch) - assert isinstance(diff_index, DiffIndex) - - if diff_index: - self._assert_diff_format(diff_index) - for ct in DiffIndex.change_type: - key = 'ct_%s'%ct - assertion_map.setdefault(key, 0) - assertion_map[key] = assertion_map[key]+len(list(diff_index.iter_change_type(ct))) - # END for each changetype - - # check entries - diff_set = set() - diff_set.add(diff_index[0]) - diff_set.add(diff_index[0]) - assert len(diff_set) == 1 - assert diff_index[0] == diff_index[0] - assert not (diff_index[0] != diff_index[0]) - # END diff index checking - # END for each patch option - # END for each path option - # END for each other side - # END for each commit - - # assert we could always find at least one instance of the members we - # can iterate in the diff index - if not this indicates its not working correctly - # or our test does not span the whole range of possibilities - for key,value in assertion_map.items(): - assert value, "Did not find diff for %s" % key - # END for each iteration type - - # test path not existing in the index - should be ignored - c = self.rorepo.head.commit - cp = c.parents[0] - diff_index = c.diff(cp, ["does/not/exist"]) - assert len(diff_index) == 0 - - + def test_diff_interface(self): + # test a few variations of the main diff routine + assertion_map = dict() + for i, commit in enumerate(self.rorepo.iter_commits('0.1.6', max_count=2)): + diff_item = commit + if i%2 == 0: + diff_item = commit.tree + # END use tree every second item + + for other in (None, commit.Index, commit.parents[0]): + for paths in (None, "CHANGES", ("CHANGES", "lib")): + for create_patch in range(2): + diff_index = diff_item.diff(other, paths, create_patch) + assert isinstance(diff_index, DiffIndex) + + if diff_index: + self._assert_diff_format(diff_index) + for ct in DiffIndex.change_type: + key = 'ct_%s'%ct + assertion_map.setdefault(key, 0) + assertion_map[key] = assertion_map[key]+len(list(diff_index.iter_change_type(ct))) + # END for each changetype + + # check entries + diff_set = set() + diff_set.add(diff_index[0]) + diff_set.add(diff_index[0]) + assert len(diff_set) == 1 + assert diff_index[0] == diff_index[0] + assert not (diff_index[0] != diff_index[0]) + # END diff index checking + # END for each patch option + # END for each path option + # END for each other side + # END for each commit + + # assert we could always find at least one instance of the members we + # can iterate in the diff index - if not this indicates its not working correctly + # or our test does not span the whole range of possibilities + for key,value in assertion_map.items(): + assert value, "Did not find diff for %s" % key + # END for each iteration type + + # test path not existing in the index - should be ignored + c = self.rorepo.head.commit + cp = c.parents[0] + diff_index = c.diff(cp, ["does/not/exist"]) + assert len(diff_index) == 0 + + diff --git a/git/test/test_fun.py b/git/test/test_fun.py index b7991cdb..530988ef 100644 --- a/git/test/test_fun.py +++ b/git/test/test_fun.py @@ -1,251 +1,251 @@ from git.test.lib import * from git.objects.fun import ( - traverse_tree_recursive, - traverse_trees_recursive, - tree_to_stream - ) + traverse_tree_recursive, + traverse_trees_recursive, + tree_to_stream + ) from git.index.fun import ( - aggressive_tree_merge - ) + aggressive_tree_merge + ) from gitdb.util import bin_to_hex from gitdb.base import IStream from gitdb.typ import str_tree_type from stat import ( - S_IFDIR, - S_IFREG, - S_IFLNK - ) + S_IFDIR, + S_IFREG, + S_IFLNK + ) from git.index import IndexFile from cStringIO import StringIO class TestFun(TestBase): - - def _assert_index_entries(self, entries, trees): - index = IndexFile.from_tree(self.rorepo, *[self.rorepo.tree(bin_to_hex(t)) for t in trees]) - assert entries - assert len(index.entries) == len(entries) - for entry in entries: - assert (entry.path, entry.stage) in index.entries - # END assert entry matches fully - - def test_aggressive_tree_merge(self): - # head tree with additions, removals and modification compared to its predecessor - odb = self.rorepo.odb - HC = self.rorepo.commit("6c1faef799095f3990e9970bc2cb10aa0221cf9c") - H = HC.tree - B = HC.parents[0].tree - - # entries from single tree - trees = [H.binsha] - self._assert_index_entries(aggressive_tree_merge(odb, trees), trees) - - # from multiple trees - trees = [B.binsha, H.binsha] - self._assert_index_entries(aggressive_tree_merge(odb, trees), trees) - - # three way, no conflict - tree = self.rorepo.tree - B = tree("35a09c0534e89b2d43ec4101a5fb54576b577905") - H = tree("4fe5cfa0e063a8d51a1eb6f014e2aaa994e5e7d4") - M = tree("1f2b19de3301e76ab3a6187a49c9c93ff78bafbd") - trees = [B.binsha, H.binsha, M.binsha] - self._assert_index_entries(aggressive_tree_merge(odb, trees), trees) - - # three-way, conflict in at least one file, both modified - B = tree("a7a4388eeaa4b6b94192dce67257a34c4a6cbd26") - H = tree("f9cec00938d9059882bb8eabdaf2f775943e00e5") - M = tree("44a601a068f4f543f73fd9c49e264c931b1e1652") - trees = [B.binsha, H.binsha, M.binsha] - self._assert_index_entries(aggressive_tree_merge(odb, trees), trees) - - # too many trees - self.failUnlessRaises(ValueError, aggressive_tree_merge, odb, trees*2) + + def _assert_index_entries(self, entries, trees): + index = IndexFile.from_tree(self.rorepo, *[self.rorepo.tree(bin_to_hex(t)) for t in trees]) + assert entries + assert len(index.entries) == len(entries) + for entry in entries: + assert (entry.path, entry.stage) in index.entries + # END assert entry matches fully + + def test_aggressive_tree_merge(self): + # head tree with additions, removals and modification compared to its predecessor + odb = self.rorepo.odb + HC = self.rorepo.commit("6c1faef799095f3990e9970bc2cb10aa0221cf9c") + H = HC.tree + B = HC.parents[0].tree + + # entries from single tree + trees = [H.binsha] + self._assert_index_entries(aggressive_tree_merge(odb, trees), trees) + + # from multiple trees + trees = [B.binsha, H.binsha] + self._assert_index_entries(aggressive_tree_merge(odb, trees), trees) + + # three way, no conflict + tree = self.rorepo.tree + B = tree("35a09c0534e89b2d43ec4101a5fb54576b577905") + H = tree("4fe5cfa0e063a8d51a1eb6f014e2aaa994e5e7d4") + M = tree("1f2b19de3301e76ab3a6187a49c9c93ff78bafbd") + trees = [B.binsha, H.binsha, M.binsha] + self._assert_index_entries(aggressive_tree_merge(odb, trees), trees) + + # three-way, conflict in at least one file, both modified + B = tree("a7a4388eeaa4b6b94192dce67257a34c4a6cbd26") + H = tree("f9cec00938d9059882bb8eabdaf2f775943e00e5") + M = tree("44a601a068f4f543f73fd9c49e264c931b1e1652") + trees = [B.binsha, H.binsha, M.binsha] + self._assert_index_entries(aggressive_tree_merge(odb, trees), trees) + + # too many trees + self.failUnlessRaises(ValueError, aggressive_tree_merge, odb, trees*2) - def mktree(self, odb, entries): - """create a tree from the given tree entries and safe it to the database""" - sio = StringIO() - tree_to_stream(entries, sio.write) - sio.seek(0) - istream = odb.store(IStream(str_tree_type, len(sio.getvalue()), sio)) - return istream.binsha - - @with_rw_repo('0.1.6') - def test_three_way_merge(self, rwrepo): - def mkfile(name, sha, executable=0): - return (sha, S_IFREG | 0644 | executable*0111, name) - def mkcommit(name, sha): - return (sha, S_IFDIR | S_IFLNK, name) - def assert_entries(entries, num_entries, has_conflict=False): - assert len(entries) == num_entries - assert has_conflict == (len([e for e in entries if e.stage != 0]) > 0) - mktree = self.mktree - - shaa = "\1"*20 - shab = "\2"*20 - shac = "\3"*20 - - odb = rwrepo.odb - - # base tree - bfn = 'basefile' - fbase = mkfile(bfn, shaa) - tb = mktree(odb, [fbase]) - - # non-conflicting new files, same data - fa = mkfile('1', shab) - th = mktree(odb, [fbase, fa]) - fb = mkfile('2', shac) - tm = mktree(odb, [fbase, fb]) - - # two new files, same base file - trees = [tb, th, tm] - assert_entries(aggressive_tree_merge(odb, trees), 3) - - # both delete same file, add own one - fa = mkfile('1', shab) - th = mktree(odb, [fa]) - fb = mkfile('2', shac) - tm = mktree(odb, [fb]) - - # two new files - trees = [tb, th, tm] - assert_entries(aggressive_tree_merge(odb, trees), 2) - - # same file added in both, differently - fa = mkfile('1', shab) - th = mktree(odb, [fa]) - fb = mkfile('1', shac) - tm = mktree(odb, [fb]) - - # expect conflict - trees = [tb, th, tm] - assert_entries(aggressive_tree_merge(odb, trees), 2, True) - - # same file added, different mode - fa = mkfile('1', shab) - th = mktree(odb, [fa]) - fb = mkcommit('1', shab) - tm = mktree(odb, [fb]) - - # expect conflict - trees = [tb, th, tm] - assert_entries(aggressive_tree_merge(odb, trees), 2, True) - - # same file added in both - fa = mkfile('1', shab) - th = mktree(odb, [fa]) - fb = mkfile('1', shab) - tm = mktree(odb, [fb]) - - # expect conflict - trees = [tb, th, tm] - assert_entries(aggressive_tree_merge(odb, trees), 1) - - # modify same base file, differently - fa = mkfile(bfn, shab) - th = mktree(odb, [fa]) - fb = mkfile(bfn, shac) - tm = mktree(odb, [fb]) - - # conflict, 3 versions on 3 stages - trees = [tb, th, tm] - assert_entries(aggressive_tree_merge(odb, trees), 3, True) - - - # change mode on same base file, by making one a commit, the other executable - # no content change ( this is totally unlikely to happen in the real world ) - fa = mkcommit(bfn, shaa) - th = mktree(odb, [fa]) - fb = mkfile(bfn, shaa, executable=1) - tm = mktree(odb, [fb]) - - # conflict, 3 versions on 3 stages, because of different mode - trees = [tb, th, tm] - assert_entries(aggressive_tree_merge(odb, trees), 3, True) - - for is_them in range(2): - # only we/they change contents - fa = mkfile(bfn, shab) - th = mktree(odb, [fa]) - - trees = [tb, th, tb] - if is_them: - trees = [tb, tb, th] - entries = aggressive_tree_merge(odb, trees) - assert len(entries) == 1 and entries[0].binsha == shab - - # only we/they change the mode - fa = mkcommit(bfn, shaa) - th = mktree(odb, [fa]) - - trees = [tb, th, tb] - if is_them: - trees = [tb, tb, th] - entries = aggressive_tree_merge(odb, trees) - assert len(entries) == 1 and entries[0].binsha == shaa and entries[0].mode == fa[1] - - # one side deletes, the other changes = conflict - fa = mkfile(bfn, shab) - th = mktree(odb, [fa]) - tm = mktree(odb, []) - trees = [tb, th, tm] - if is_them: - trees = [tb, tm, th] - # as one is deleted, there are only 2 entries - assert_entries(aggressive_tree_merge(odb, trees), 2, True) - # END handle ours, theirs - - def _assert_tree_entries(self, entries, num_trees): - for entry in entries: - assert len(entry) == num_trees - paths = set(e[2] for e in entry if e) - - # only one path per set of entries - assert len(paths) == 1 - # END verify entry - - def test_tree_traversal(self): - # low level tree tarversal - odb = self.rorepo.odb - H = self.rorepo.tree('29eb123beb1c55e5db4aa652d843adccbd09ae18') # head tree - M = self.rorepo.tree('e14e3f143e7260de9581aee27e5a9b2645db72de') # merge tree - B = self.rorepo.tree('f606937a7a21237c866efafcad33675e6539c103') # base tree - B_old = self.rorepo.tree('1f66cfbbce58b4b552b041707a12d437cc5f400a') # old base tree - - # two very different trees - entries = traverse_trees_recursive(odb, [B_old.binsha, H.binsha], '') - self._assert_tree_entries(entries, 2) - - oentries = traverse_trees_recursive(odb, [H.binsha, B_old.binsha], '') - assert len(oentries) == len(entries) - self._assert_tree_entries(oentries, 2) - - # single tree - is_no_tree = lambda i, d: i.type != 'tree' - entries = traverse_trees_recursive(odb, [B.binsha], '') - assert len(entries) == len(list(B.traverse(predicate=is_no_tree))) - self._assert_tree_entries(entries, 1) - - # two trees - entries = traverse_trees_recursive(odb, [B.binsha, H.binsha], '') - self._assert_tree_entries(entries, 2) - - # tree trees - entries = traverse_trees_recursive(odb, [B.binsha, H.binsha, M.binsha], '') - self._assert_tree_entries(entries, 3) - - def test_tree_traversal_single(self): - max_count = 50 - count = 0 - odb = self.rorepo.odb - for commit in self.rorepo.commit("29eb123beb1c55e5db4aa652d843adccbd09ae18").traverse(): - if count >= max_count: - break - count += 1 - entries = traverse_tree_recursive(odb, commit.tree.binsha, '') - assert entries - # END for each commit + def mktree(self, odb, entries): + """create a tree from the given tree entries and safe it to the database""" + sio = StringIO() + tree_to_stream(entries, sio.write) + sio.seek(0) + istream = odb.store(IStream(str_tree_type, len(sio.getvalue()), sio)) + return istream.binsha + + @with_rw_repo('0.1.6') + def test_three_way_merge(self, rwrepo): + def mkfile(name, sha, executable=0): + return (sha, S_IFREG | 0644 | executable*0111, name) + def mkcommit(name, sha): + return (sha, S_IFDIR | S_IFLNK, name) + def assert_entries(entries, num_entries, has_conflict=False): + assert len(entries) == num_entries + assert has_conflict == (len([e for e in entries if e.stage != 0]) > 0) + mktree = self.mktree + + shaa = "\1"*20 + shab = "\2"*20 + shac = "\3"*20 + + odb = rwrepo.odb + + # base tree + bfn = 'basefile' + fbase = mkfile(bfn, shaa) + tb = mktree(odb, [fbase]) + + # non-conflicting new files, same data + fa = mkfile('1', shab) + th = mktree(odb, [fbase, fa]) + fb = mkfile('2', shac) + tm = mktree(odb, [fbase, fb]) + + # two new files, same base file + trees = [tb, th, tm] + assert_entries(aggressive_tree_merge(odb, trees), 3) + + # both delete same file, add own one + fa = mkfile('1', shab) + th = mktree(odb, [fa]) + fb = mkfile('2', shac) + tm = mktree(odb, [fb]) + + # two new files + trees = [tb, th, tm] + assert_entries(aggressive_tree_merge(odb, trees), 2) + + # same file added in both, differently + fa = mkfile('1', shab) + th = mktree(odb, [fa]) + fb = mkfile('1', shac) + tm = mktree(odb, [fb]) + + # expect conflict + trees = [tb, th, tm] + assert_entries(aggressive_tree_merge(odb, trees), 2, True) + + # same file added, different mode + fa = mkfile('1', shab) + th = mktree(odb, [fa]) + fb = mkcommit('1', shab) + tm = mktree(odb, [fb]) + + # expect conflict + trees = [tb, th, tm] + assert_entries(aggressive_tree_merge(odb, trees), 2, True) + + # same file added in both + fa = mkfile('1', shab) + th = mktree(odb, [fa]) + fb = mkfile('1', shab) + tm = mktree(odb, [fb]) + + # expect conflict + trees = [tb, th, tm] + assert_entries(aggressive_tree_merge(odb, trees), 1) + + # modify same base file, differently + fa = mkfile(bfn, shab) + th = mktree(odb, [fa]) + fb = mkfile(bfn, shac) + tm = mktree(odb, [fb]) + + # conflict, 3 versions on 3 stages + trees = [tb, th, tm] + assert_entries(aggressive_tree_merge(odb, trees), 3, True) + + + # change mode on same base file, by making one a commit, the other executable + # no content change ( this is totally unlikely to happen in the real world ) + fa = mkcommit(bfn, shaa) + th = mktree(odb, [fa]) + fb = mkfile(bfn, shaa, executable=1) + tm = mktree(odb, [fb]) + + # conflict, 3 versions on 3 stages, because of different mode + trees = [tb, th, tm] + assert_entries(aggressive_tree_merge(odb, trees), 3, True) + + for is_them in range(2): + # only we/they change contents + fa = mkfile(bfn, shab) + th = mktree(odb, [fa]) + + trees = [tb, th, tb] + if is_them: + trees = [tb, tb, th] + entries = aggressive_tree_merge(odb, trees) + assert len(entries) == 1 and entries[0].binsha == shab + + # only we/they change the mode + fa = mkcommit(bfn, shaa) + th = mktree(odb, [fa]) + + trees = [tb, th, tb] + if is_them: + trees = [tb, tb, th] + entries = aggressive_tree_merge(odb, trees) + assert len(entries) == 1 and entries[0].binsha == shaa and entries[0].mode == fa[1] + + # one side deletes, the other changes = conflict + fa = mkfile(bfn, shab) + th = mktree(odb, [fa]) + tm = mktree(odb, []) + trees = [tb, th, tm] + if is_them: + trees = [tb, tm, th] + # as one is deleted, there are only 2 entries + assert_entries(aggressive_tree_merge(odb, trees), 2, True) + # END handle ours, theirs + + def _assert_tree_entries(self, entries, num_trees): + for entry in entries: + assert len(entry) == num_trees + paths = set(e[2] for e in entry if e) + + # only one path per set of entries + assert len(paths) == 1 + # END verify entry + + def test_tree_traversal(self): + # low level tree tarversal + odb = self.rorepo.odb + H = self.rorepo.tree('29eb123beb1c55e5db4aa652d843adccbd09ae18') # head tree + M = self.rorepo.tree('e14e3f143e7260de9581aee27e5a9b2645db72de') # merge tree + B = self.rorepo.tree('f606937a7a21237c866efafcad33675e6539c103') # base tree + B_old = self.rorepo.tree('1f66cfbbce58b4b552b041707a12d437cc5f400a') # old base tree + + # two very different trees + entries = traverse_trees_recursive(odb, [B_old.binsha, H.binsha], '') + self._assert_tree_entries(entries, 2) + + oentries = traverse_trees_recursive(odb, [H.binsha, B_old.binsha], '') + assert len(oentries) == len(entries) + self._assert_tree_entries(oentries, 2) + + # single tree + is_no_tree = lambda i, d: i.type != 'tree' + entries = traverse_trees_recursive(odb, [B.binsha], '') + assert len(entries) == len(list(B.traverse(predicate=is_no_tree))) + self._assert_tree_entries(entries, 1) + + # two trees + entries = traverse_trees_recursive(odb, [B.binsha, H.binsha], '') + self._assert_tree_entries(entries, 2) + + # tree trees + entries = traverse_trees_recursive(odb, [B.binsha, H.binsha, M.binsha], '') + self._assert_tree_entries(entries, 3) + + def test_tree_traversal_single(self): + max_count = 50 + count = 0 + odb = self.rorepo.odb + for commit in self.rorepo.commit("29eb123beb1c55e5db4aa652d843adccbd09ae18").traverse(): + if count >= max_count: + break + count += 1 + entries = traverse_tree_recursive(odb, commit.tree.binsha, '') + assert entries + # END for each commit diff --git a/git/test/test_git.py b/git/test/test_git.py index 2d38e0a8..42ad784f 100644 --- a/git/test/test_git.py +++ b/git/test/test_git.py @@ -6,105 +6,105 @@ import os, sys from git.test.lib import ( - TestBase, - patch_object, - raises, - assert_equal, - assert_true, - assert_match, - fixture_path - ) + TestBase, + patch_object, + raises, + assert_equal, + assert_true, + assert_match, + fixture_path + ) from git import Git, GitCommandError class TestGit(TestBase): - - @classmethod - def setUpAll(cls): - super(TestGit, cls).setUpAll() - cls.git = Git(cls.rorepo.working_dir) + + @classmethod + def setUpAll(cls): + super(TestGit, cls).setUpAll() + cls.git = Git(cls.rorepo.working_dir) - @patch_object(Git, 'execute') - def test_call_process_calls_execute(self, git): - git.return_value = '' - self.git.version() - assert_true(git.called) - assert_equal(git.call_args, ((['git', 'version'],), {})) + @patch_object(Git, 'execute') + def test_call_process_calls_execute(self, git): + git.return_value = '' + self.git.version() + assert_true(git.called) + assert_equal(git.call_args, ((['git', 'version'],), {})) - @raises(GitCommandError) - def test_it_raises_errors(self): - self.git.this_does_not_exist() + @raises(GitCommandError) + def test_it_raises_errors(self): + self.git.this_does_not_exist() - def test_it_transforms_kwargs_into_git_command_arguments(self): - assert_equal(["-s"], self.git.transform_kwargs(**{'s': True})) - assert_equal(["-s5"], self.git.transform_kwargs(**{'s': 5})) + def test_it_transforms_kwargs_into_git_command_arguments(self): + assert_equal(["-s"], self.git.transform_kwargs(**{'s': True})) + assert_equal(["-s5"], self.git.transform_kwargs(**{'s': 5})) - assert_equal(["--max-count"], self.git.transform_kwargs(**{'max_count': True})) - assert_equal(["--max-count=5"], self.git.transform_kwargs(**{'max_count': 5})) + assert_equal(["--max-count"], self.git.transform_kwargs(**{'max_count': True})) + assert_equal(["--max-count=5"], self.git.transform_kwargs(**{'max_count': 5})) - assert_equal(["-s", "-t"], self.git.transform_kwargs(**{'s': True, 't': True})) + assert_equal(["-s", "-t"], self.git.transform_kwargs(**{'s': True, 't': True})) - def test_it_executes_git_to_shell_and_returns_result(self): - assert_match('^git version [\d\.]{2}.*$', self.git.execute(["git","version"])) + def test_it_executes_git_to_shell_and_returns_result(self): + assert_match('^git version [\d\.]{2}.*$', self.git.execute(["git","version"])) - def test_it_accepts_stdin(self): - filename = fixture_path("cat_file_blob") - fh = open(filename, 'r') - assert_equal("70c379b63ffa0795fdbfbc128e5a2818397b7ef8", - self.git.hash_object(istream=fh, stdin=True)) - fh.close() + def test_it_accepts_stdin(self): + filename = fixture_path("cat_file_blob") + fh = open(filename, 'r') + assert_equal("70c379b63ffa0795fdbfbc128e5a2818397b7ef8", + self.git.hash_object(istream=fh, stdin=True)) + fh.close() - @patch_object(Git, 'execute') - def test_it_ignores_false_kwargs(self, git): - # this_should_not_be_ignored=False implies it *should* be ignored - output = self.git.version(pass_this_kwarg=False) - assert_true("pass_this_kwarg" not in git.call_args[1]) - - def test_persistent_cat_file_command(self): - # read header only - import subprocess as sp - hexsha = "b2339455342180c7cc1e9bba3e9f181f7baa5167" - g = self.git.cat_file(batch_check=True, istream=sp.PIPE,as_process=True) - g.stdin.write("b2339455342180c7cc1e9bba3e9f181f7baa5167\n") - g.stdin.flush() - obj_info = g.stdout.readline() - - # read header + data - g = self.git.cat_file(batch=True, istream=sp.PIPE,as_process=True) - g.stdin.write("b2339455342180c7cc1e9bba3e9f181f7baa5167\n") - g.stdin.flush() - obj_info_two = g.stdout.readline() - assert obj_info == obj_info_two - - # read data - have to read it in one large chunk - size = int(obj_info.split()[2]) - data = g.stdout.read(size) - terminating_newline = g.stdout.read(1) - - # now we should be able to read a new object - g.stdin.write("b2339455342180c7cc1e9bba3e9f181f7baa5167\n") - g.stdin.flush() - assert g.stdout.readline() == obj_info - - - # same can be achived using the respective command functions - hexsha, typename, size = self.git.get_object_header(hexsha) - hexsha, typename_two, size_two, data = self.git.get_object_data(hexsha) - assert typename == typename_two and size == size_two - - def test_version(self): - v = self.git.version_info - assert isinstance(v, tuple) - for n in v: - assert isinstance(n, int) - #END verify number types - - def test_cmd_override(self): - prev_cmd = self.git.GIT_PYTHON_GIT_EXECUTABLE - try: - # set it to something that doens't exist, assure it raises - type(self.git).GIT_PYTHON_GIT_EXECUTABLE = os.path.join("some", "path", "which", "doesn't", "exist", "gitbinary") - self.failUnlessRaises(OSError, self.git.version) - finally: - type(self.git).GIT_PYTHON_GIT_EXECUTABLE = prev_cmd - #END undo adjustment + @patch_object(Git, 'execute') + def test_it_ignores_false_kwargs(self, git): + # this_should_not_be_ignored=False implies it *should* be ignored + output = self.git.version(pass_this_kwarg=False) + assert_true("pass_this_kwarg" not in git.call_args[1]) + + def test_persistent_cat_file_command(self): + # read header only + import subprocess as sp + hexsha = "b2339455342180c7cc1e9bba3e9f181f7baa5167" + g = self.git.cat_file(batch_check=True, istream=sp.PIPE,as_process=True) + g.stdin.write("b2339455342180c7cc1e9bba3e9f181f7baa5167\n") + g.stdin.flush() + obj_info = g.stdout.readline() + + # read header + data + g = self.git.cat_file(batch=True, istream=sp.PIPE,as_process=True) + g.stdin.write("b2339455342180c7cc1e9bba3e9f181f7baa5167\n") + g.stdin.flush() + obj_info_two = g.stdout.readline() + assert obj_info == obj_info_two + + # read data - have to read it in one large chunk + size = int(obj_info.split()[2]) + data = g.stdout.read(size) + terminating_newline = g.stdout.read(1) + + # now we should be able to read a new object + g.stdin.write("b2339455342180c7cc1e9bba3e9f181f7baa5167\n") + g.stdin.flush() + assert g.stdout.readline() == obj_info + + + # same can be achived using the respective command functions + hexsha, typename, size = self.git.get_object_header(hexsha) + hexsha, typename_two, size_two, data = self.git.get_object_data(hexsha) + assert typename == typename_two and size == size_two + + def test_version(self): + v = self.git.version_info + assert isinstance(v, tuple) + for n in v: + assert isinstance(n, int) + #END verify number types + + def test_cmd_override(self): + prev_cmd = self.git.GIT_PYTHON_GIT_EXECUTABLE + try: + # set it to something that doens't exist, assure it raises + type(self.git).GIT_PYTHON_GIT_EXECUTABLE = os.path.join("some", "path", "which", "doesn't", "exist", "gitbinary") + self.failUnlessRaises(OSError, self.git.version) + finally: + type(self.git).GIT_PYTHON_GIT_EXECUTABLE = prev_cmd + #END undo adjustment diff --git a/git/test/test_index.py b/git/test/test_index.py index 5d227897..d532a3b4 100644 --- a/git/test/test_index.py +++ b/git/test/test_index.py @@ -15,655 +15,655 @@ import shutil from stat import * class TestIndex(TestBase): - - def __init__(self, *args): - super(TestIndex, self).__init__(*args) - self._reset_progress() - - def _assert_fprogress(self, entries): - assert len(entries) == len(self._fprogress_map) - for path, call_count in self._fprogress_map.iteritems(): - assert call_count == 2 - # END for each item in progress map - self._reset_progress() + + def __init__(self, *args): + super(TestIndex, self).__init__(*args) + self._reset_progress() + + def _assert_fprogress(self, entries): + assert len(entries) == len(self._fprogress_map) + for path, call_count in self._fprogress_map.iteritems(): + assert call_count == 2 + # END for each item in progress map + self._reset_progress() - def _fprogress(self, path, done, item): - self._fprogress_map.setdefault(path, 0) - curval = self._fprogress_map[path] - if curval == 0: - assert not done - if curval == 1: - assert done - self._fprogress_map[path] = curval + 1 - - def _fprogress_add(self, path, done, item): - """Called as progress func - we keep track of the proper - call order""" - assert item is not None - self._fprogress(path, done, item) - - def _reset_progress(self): - # maps paths to the count of calls - self._fprogress_map = dict() - - def _assert_entries(self, entries): - for entry in entries: - assert isinstance(entry, BaseIndexEntry) - assert not os.path.isabs(entry.path) - assert not "\\" in entry.path - # END for each entry - - def test_index_file_base(self): - # read from file - index = IndexFile(self.rorepo, fixture_path("index")) - assert index.entries - assert index.version > 0 - - # test entry - last_val = None - entry = index.entries.itervalues().next() - for attr in ("path","ctime","mtime","dev","inode","mode","uid", - "gid","size","binsha", "hexsha", "stage"): - val = getattr(entry, attr) - # END for each method - - # test update - entries = index.entries - assert isinstance(index.update(), IndexFile) - assert entries is not index.entries - - # test stage - index_merge = IndexFile(self.rorepo, fixture_path("index_merge")) - assert len(index_merge.entries) == 106 - assert len(list(e for e in index_merge.entries.itervalues() if e.stage != 0 )) - - # write the data - it must match the original - tmpfile = tempfile.mktemp() - index_merge.write(tmpfile) - fp = open(tmpfile, 'rb') - assert fp.read() == fixture("index_merge") - fp.close() - os.remove(tmpfile) - - def _cmp_tree_index(self, tree, index): - # fail unless both objects contain the same paths and blobs - if isinstance(tree, str): - tree = self.rorepo.commit(tree).tree - - num_blobs = 0 - blist = list() - for blob in tree.traverse(predicate = lambda e,d: e.type == "blob", branch_first=False): - assert (blob.path,0) in index.entries - blist.append(blob) - # END for each blob in tree - if len(blist) != len(index.entries): - iset = set(k[0] for k in index.entries.keys()) - bset = set(b.path for b in blist) - raise AssertionError( "CMP Failed: Missing entries in index: %s, missing in tree: %s" % (bset-iset, iset-bset) ) - # END assertion message - - @with_rw_repo('0.1.6') - def test_index_file_from_tree(self, rw_repo): - common_ancestor_sha = "5117c9c8a4d3af19a9958677e45cda9269de1541" - cur_sha = "4b43ca7ff72d5f535134241e7c797ddc9c7a3573" - other_sha = "39f85c4358b7346fee22169da9cad93901ea9eb9" - - # simple index from tree - base_index = IndexFile.from_tree(rw_repo, common_ancestor_sha) - assert base_index.entries - self._cmp_tree_index(common_ancestor_sha, base_index) - - # merge two trees - its like a fast-forward - two_way_index = IndexFile.from_tree(rw_repo, common_ancestor_sha, cur_sha) - assert two_way_index.entries - self._cmp_tree_index(cur_sha, two_way_index) - - # merge three trees - here we have a merge conflict - three_way_index = IndexFile.from_tree(rw_repo, common_ancestor_sha, cur_sha, other_sha) - assert len(list(e for e in three_way_index.entries.values() if e.stage != 0)) - - - # ITERATE BLOBS - merge_required = lambda t: t[0] != 0 - merge_blobs = list(three_way_index.iter_blobs(merge_required)) - assert merge_blobs - assert merge_blobs[0][0] in (1,2,3) - assert isinstance(merge_blobs[0][1], Blob) - - # test BlobFilter - prefix = 'lib/git' - for stage, blob in base_index.iter_blobs(BlobFilter([prefix])): - assert blob.path.startswith(prefix) - - - # writing a tree should fail with an unmerged index - self.failUnlessRaises(UnmergedEntriesError, three_way_index.write_tree) - - # removed unmerged entries - unmerged_blob_map = three_way_index.unmerged_blobs() - assert unmerged_blob_map - - # pick the first blob at the first stage we find and use it as resolved version - three_way_index.resolve_blobs( l[0][1] for l in unmerged_blob_map.itervalues() ) - tree = three_way_index.write_tree() - assert isinstance(tree, Tree) - num_blobs = 0 - for blob in tree.traverse(predicate=lambda item,d: item.type == "blob"): - assert (blob.path,0) in three_way_index.entries - num_blobs += 1 - # END for each blob - assert num_blobs == len(three_way_index.entries) - - @with_rw_repo('0.1.6') - def test_index_merge_tree(self, rw_repo): - # A bit out of place, but we need a different repo for this: - assert self.rorepo != rw_repo and not (self.rorepo == rw_repo) - assert len(set((self.rorepo, self.rorepo, rw_repo, rw_repo))) == 2 - - # SINGLE TREE MERGE - # current index is at the (virtual) cur_commit - next_commit = "4c39f9da792792d4e73fc3a5effde66576ae128c" - parent_commit = rw_repo.head.commit.parents[0] - manifest_key = IndexFile.entry_key('MANIFEST.in', 0) - manifest_entry = rw_repo.index.entries[manifest_key] - rw_repo.index.merge_tree(next_commit) - # only one change should be recorded - assert manifest_entry.binsha != rw_repo.index.entries[manifest_key].binsha - - rw_repo.index.reset(rw_repo.head) - assert rw_repo.index.entries[manifest_key].binsha == manifest_entry.binsha - - # FAKE MERGE - ############# - # Add a change with a NULL sha that should conflict with next_commit. We - # pretend there was a change, but we do not even bother adding a proper - # sha for it ( which makes things faster of course ) - manifest_fake_entry = BaseIndexEntry((manifest_entry[0], "\0"*20, 0, manifest_entry[3])) - # try write flag - self._assert_entries(rw_repo.index.add([manifest_fake_entry], write=False)) - # add actually resolves the null-hex-sha for us as a feature, but we can - # edit the index manually - assert rw_repo.index.entries[manifest_key].binsha != Object.NULL_BIN_SHA - # must operate on the same index for this ! Its a bit problematic as - # it might confuse people - index = rw_repo.index - index.entries[manifest_key] = IndexEntry.from_base(manifest_fake_entry) - index.write() - assert rw_repo.index.entries[manifest_key].hexsha == Diff.NULL_HEX_SHA - - # write an unchanged index ( just for the fun of it ) - rw_repo.index.write() - - # a three way merge would result in a conflict and fails as the command will - # not overwrite any entries in our index and hence leave them unmerged. This is - # mainly a protection feature as the current index is not yet in a tree - self.failUnlessRaises(GitCommandError, index.merge_tree, next_commit, base=parent_commit) - - # the only way to get the merged entries is to safe the current index away into a tree, - # which is like a temporary commit for us. This fails as well as the NULL sha deos not - # have a corresponding object - # NOTE: missing_ok is not a kwarg anymore, missing_ok is always true - # self.failUnlessRaises(GitCommandError, index.write_tree) - - # if missing objects are okay, this would work though ( they are always okay now ) - tree = index.write_tree() - - # now make a proper three way merge with unmerged entries - unmerged_tree = IndexFile.from_tree(rw_repo, parent_commit, tree, next_commit) - unmerged_blobs = unmerged_tree.unmerged_blobs() - assert len(unmerged_blobs) == 1 and unmerged_blobs.keys()[0] == manifest_key[0] - - - @with_rw_repo('0.1.6') - def test_index_file_diffing(self, rw_repo): - # default Index instance points to our index - index = IndexFile(rw_repo) - assert index.path is not None - assert len(index.entries) - - # write the file back - index.write() - - # could sha it, or check stats - - # test diff - # resetting the head will leave the index in a different state, and the - # diff will yield a few changes - cur_head_commit = rw_repo.head.reference.commit - ref = rw_repo.head.reset('HEAD~6', index=True, working_tree=False) - - # diff against same index is 0 - diff = index.diff() - assert len(diff) == 0 - - # against HEAD as string, must be the same as it matches index - diff = index.diff('HEAD') - assert len(diff) == 0 - - # against previous head, there must be a difference - diff = index.diff(cur_head_commit) - assert len(diff) - - # we reverse the result - adiff = index.diff(str(cur_head_commit), R=True) - odiff = index.diff(cur_head_commit, R=False) # now its not reversed anymore - assert adiff != odiff - assert odiff == diff # both unreversed diffs against HEAD - - # against working copy - its still at cur_commit - wdiff = index.diff(None) - assert wdiff != adiff - assert wdiff != odiff - - # against something unusual - self.failUnlessRaises(ValueError, index.diff, int) - - # adjust the index to match an old revision - cur_branch = rw_repo.active_branch - cur_commit = cur_branch.commit - rev_head_parent = 'HEAD~1' - assert index.reset(rev_head_parent) is index - - assert cur_branch == rw_repo.active_branch - assert cur_commit == rw_repo.head.commit - - # there must be differences towards the working tree which is in the 'future' - assert index.diff(None) - - # reset the working copy as well to current head,to pull 'back' as well - new_data = "will be reverted" - file_path = os.path.join(rw_repo.working_tree_dir, "CHANGES") - fp = open(file_path, "wb") - fp.write(new_data) - fp.close() - index.reset(rev_head_parent, working_tree=True) - assert not index.diff(None) - assert cur_branch == rw_repo.active_branch - assert cur_commit == rw_repo.head.commit - fp = open(file_path,'rb') - try: - assert fp.read() != new_data - finally: - fp.close() - - # test full checkout - test_file = os.path.join(rw_repo.working_tree_dir, "CHANGES") - open(test_file, 'ab').write("some data") - rval = index.checkout(None, force=True, fprogress=self._fprogress) - assert 'CHANGES' in list(rval) - self._assert_fprogress([None]) - assert os.path.isfile(test_file) - - os.remove(test_file) - rval = index.checkout(None, force=False, fprogress=self._fprogress) - assert 'CHANGES' in list(rval) - self._assert_fprogress([None]) - assert os.path.isfile(test_file) - - # individual file - os.remove(test_file) - rval = index.checkout(test_file, fprogress=self._fprogress) - assert list(rval)[0] == 'CHANGES' - self._assert_fprogress([test_file]) - assert os.path.exists(test_file) - - # checking out non-existing file throws - self.failUnlessRaises(CheckoutError, index.checkout, "doesnt_exist_ever.txt.that") - self.failUnlessRaises(CheckoutError, index.checkout, paths=["doesnt/exist"]) - - # checkout file with modifications - append_data = "hello" - fp = open(test_file, "ab") - fp.write(append_data) - fp.close() - try: - index.checkout(test_file) - except CheckoutError, e: - assert len(e.failed_files) == 1 and e.failed_files[0] == os.path.basename(test_file) - assert (len(e.failed_files) == len(e.failed_reasons)) and isinstance(e.failed_reasons[0], basestring) - assert len(e.valid_files) == 0 - assert open(test_file).read().endswith(append_data) - else: - raise AssertionError("Exception CheckoutError not thrown") - - # if we force it it should work - index.checkout(test_file, force=True) - assert not open(test_file).read().endswith(append_data) - - # checkout directory - shutil.rmtree(os.path.join(rw_repo.working_tree_dir, "lib")) - rval = index.checkout('lib') - assert len(list(rval)) > 1 - - def _count_existing(self, repo, files): - """ - Returns count of files that actually exist in the repository directory. - """ - existing = 0 - basedir = repo.working_tree_dir - for f in files: - existing += os.path.isfile(os.path.join(basedir, f)) - # END for each deleted file - return existing - # END num existing helper - - @with_rw_repo('0.1.6') - def test_index_mutation(self, rw_repo): - index = rw_repo.index - num_entries = len(index.entries) - cur_head = rw_repo.head - - uname = "Some Developer" - umail = "sd@company.com" - rw_repo.config_writer().set_value("user", "name", uname) - rw_repo.config_writer().set_value("user", "email", umail) - - # remove all of the files, provide a wild mix of paths, BaseIndexEntries, - # IndexEntries - def mixed_iterator(): - count = 0 - for entry in index.entries.itervalues(): - type_id = count % 4 - if type_id == 0: # path - yield entry.path - elif type_id == 1: # blob - yield Blob(rw_repo, entry.binsha, entry.mode, entry.path) - elif type_id == 2: # BaseIndexEntry - yield BaseIndexEntry(entry[:4]) - elif type_id == 3: # IndexEntry - yield entry - else: - raise AssertionError("Invalid Type") - count += 1 - # END for each entry - # END mixed iterator - deleted_files = index.remove(mixed_iterator(), working_tree=False) - assert deleted_files - assert self._count_existing(rw_repo, deleted_files) == len(deleted_files) - assert len(index.entries) == 0 - - # reset the index to undo our changes - index.reset() - assert len(index.entries) == num_entries - - # remove with working copy - deleted_files = index.remove(mixed_iterator(), working_tree=True) - assert deleted_files - assert self._count_existing(rw_repo, deleted_files) == 0 - - # reset everything - index.reset(working_tree=True) - assert self._count_existing(rw_repo, deleted_files) == len(deleted_files) - - # invalid type - self.failUnlessRaises(TypeError, index.remove, [1]) - - # absolute path - deleted_files = index.remove([os.path.join(rw_repo.working_tree_dir,"lib")], r=True) - assert len(deleted_files) > 1 - self.failUnlessRaises(ValueError, index.remove, ["/doesnt/exists"]) - - # TEST COMMITTING - # commit changed index - cur_commit = cur_head.commit - commit_message = "commit default head" - - new_commit = index.commit(commit_message, head=False) - assert cur_commit != new_commit - assert new_commit.author.name == uname - assert new_commit.author.email == umail - assert new_commit.committer.name == uname - assert new_commit.committer.email == umail - assert new_commit.message == commit_message - assert new_commit.parents[0] == cur_commit - assert len(new_commit.parents) == 1 - assert cur_head.commit == cur_commit - - # same index, no parents - commit_message = "index without parents" - commit_no_parents = index.commit(commit_message, parent_commits=list(), head=True) - assert commit_no_parents.message == commit_message - assert len(commit_no_parents.parents) == 0 - assert cur_head.commit == commit_no_parents - - # same index, multiple parents - commit_message = "Index with multiple parents\n commit with another line" - commit_multi_parent = index.commit(commit_message,parent_commits=(commit_no_parents, new_commit)) - assert commit_multi_parent.message == commit_message - assert len(commit_multi_parent.parents) == 2 - assert commit_multi_parent.parents[0] == commit_no_parents - assert commit_multi_parent.parents[1] == new_commit - assert cur_head.commit == commit_multi_parent - - # re-add all files in lib - # get the lib folder back on disk, but get an index without it - index.reset(new_commit.parents[0], working_tree=True).reset(new_commit, working_tree=False) - lib_file_path = os.path.join("lib", "git", "__init__.py") - assert (lib_file_path, 0) not in index.entries - assert os.path.isfile(os.path.join(rw_repo.working_tree_dir, lib_file_path)) - - # directory - entries = index.add(['lib'], fprogress=self._fprogress_add) - self._assert_entries(entries) - self._assert_fprogress(entries) - assert len(entries)>1 - - # glob - entries = index.reset(new_commit).add([os.path.join('lib', 'git', '*.py')], fprogress=self._fprogress_add) - self._assert_entries(entries) - self._assert_fprogress(entries) - assert len(entries) == 14 - - # same file - entries = index.reset(new_commit).add([os.path.abspath(os.path.join('lib', 'git', 'head.py'))]*2, fprogress=self._fprogress_add) - self._assert_entries(entries) - assert entries[0].mode & 0644 == 0644 - # would fail, test is too primitive to handle this case - # self._assert_fprogress(entries) - self._reset_progress() - assert len(entries) == 2 - - # missing path - self.failUnlessRaises(OSError, index.reset(new_commit).add, ['doesnt/exist/must/raise']) - - # blob from older revision overrides current index revision - old_blob = new_commit.parents[0].tree.blobs[0] - entries = index.reset(new_commit).add([old_blob], fprogress=self._fprogress_add) - self._assert_entries(entries) - self._assert_fprogress(entries) - assert index.entries[(old_blob.path,0)].hexsha == old_blob.hexsha and len(entries) == 1 - - # mode 0 not allowed - null_hex_sha = Diff.NULL_HEX_SHA - null_bin_sha = "\0" * 20 - self.failUnlessRaises(ValueError, index.reset(new_commit).add, [BaseIndexEntry((0, null_bin_sha,0,"doesntmatter"))]) - - # add new file - new_file_relapath = "my_new_file" - new_file_path = self._make_file(new_file_relapath, "hello world", rw_repo) - entries = index.reset(new_commit).add([BaseIndexEntry((010644, null_bin_sha, 0, new_file_relapath))], fprogress=self._fprogress_add) - self._assert_entries(entries) - self._assert_fprogress(entries) - assert len(entries) == 1 and entries[0].hexsha != null_hex_sha - - # add symlink - if sys.platform != "win32": - basename = "my_real_symlink" - target = "/etc/that" - link_file = os.path.join(rw_repo.working_tree_dir, basename) - os.symlink(target, link_file) - entries = index.reset(new_commit).add([link_file], fprogress=self._fprogress_add) - self._assert_entries(entries) - self._assert_fprogress(entries) - assert len(entries) == 1 and S_ISLNK(entries[0].mode) - assert S_ISLNK(index.entries[index.entry_key("my_real_symlink", 0)].mode) - - # we expect only the target to be written - assert index.repo.odb.stream(entries[0].binsha).read() == target - # END real symlink test - - # add fake symlink and assure it checks-our as symlink - fake_symlink_relapath = "my_fake_symlink" - link_target = "/etc/that" - fake_symlink_path = self._make_file(fake_symlink_relapath, link_target, rw_repo) - fake_entry = BaseIndexEntry((0120000, null_bin_sha, 0, fake_symlink_relapath)) - entries = index.reset(new_commit).add([fake_entry], fprogress=self._fprogress_add) - self._assert_entries(entries) - self._assert_fprogress(entries) - assert entries[0].hexsha != null_hex_sha - assert len(entries) == 1 and S_ISLNK(entries[0].mode) - - # assure this also works with an alternate method - full_index_entry = IndexEntry.from_base(BaseIndexEntry((0120000, entries[0].binsha, 0, entries[0].path))) - entry_key = index.entry_key(full_index_entry) - index.reset(new_commit) - - assert entry_key not in index.entries - index.entries[entry_key] = full_index_entry - index.write() - index.update() # force reread of entries - new_entry = index.entries[entry_key] - assert S_ISLNK(new_entry.mode) - - # a tree created from this should contain the symlink - tree = index.write_tree() - assert fake_symlink_relapath in tree - index.write() # flush our changes for the checkout - - # checkout the fakelink, should be a link then - assert not S_ISLNK(os.stat(fake_symlink_path)[ST_MODE]) - os.remove(fake_symlink_path) - index.checkout(fake_symlink_path) - - # on windows we will never get symlinks - if os.name == 'nt': - # simlinks should contain the link as text ( which is what a - # symlink actually is ) - open(fake_symlink_path,'rb').read() == link_target - else: - assert S_ISLNK(os.lstat(fake_symlink_path)[ST_MODE]) - - # TEST RENAMING - def assert_mv_rval(rval): - for source, dest in rval: - assert not os.path.exists(source) and os.path.exists(dest) - # END for each renamed item - # END move assertion utility - - self.failUnlessRaises(ValueError, index.move, ['just_one_path']) - # file onto existing file - files = ['AUTHORS', 'LICENSE'] - self.failUnlessRaises(GitCommandError, index.move, files) - - # again, with force - assert_mv_rval(index.move(files, f=True)) - - # files into directory - dry run - paths = ['LICENSE', 'VERSION', 'doc'] - rval = index.move(paths, dry_run=True) - assert len(rval) == 2 - assert os.path.exists(paths[0]) - - # again, no dry run - rval = index.move(paths) - assert_mv_rval(rval) - - # dir into dir - rval = index.move(['doc', 'test']) - assert_mv_rval(rval) - - - # TEST PATH REWRITING - ###################### - count = [0] - def rewriter(entry): - rval = str(count[0]) - count[0] += 1 - return rval - # END rewriter - - def make_paths(): - # two existing ones, one new one - yield 'CHANGES' - yield 'ez_setup.py' - yield index.entries[index.entry_key('README', 0)] - yield index.entries[index.entry_key('.gitignore', 0)] - - for fid in range(3): - fname = 'newfile%i' % fid - open(fname, 'wb').write("abcd") - yield Blob(rw_repo, Blob.NULL_BIN_SHA, 0100644, fname) - # END for each new file - # END path producer - paths = list(make_paths()) - self._assert_entries(index.add(paths, path_rewriter=rewriter)) - - for filenum in range(len(paths)): - assert index.entry_key(str(filenum), 0) in index.entries - - - # TEST RESET ON PATHS - ###################### - arela = "aa" - brela = "bb" - afile = self._make_file(arela, "adata", rw_repo) - bfile = self._make_file(brela, "bdata", rw_repo) - akey = index.entry_key(arela, 0) - bkey = index.entry_key(brela, 0) - keys = (akey, bkey) - absfiles = (afile, bfile) - files = (arela, brela) - - for fkey in keys: - assert not fkey in index.entries - - index.add(files, write=True) - nc = index.commit("2 files committed", head=False) - - for fkey in keys: - assert fkey in index.entries - - # just the index - index.reset(paths=(arela, afile)) - assert not akey in index.entries - assert bkey in index.entries - - # now with working tree - files on disk as well as entries must be recreated - rw_repo.head.commit = nc - for absfile in absfiles: - os.remove(absfile) - - index.reset(working_tree=True, paths=files) - - for fkey in keys: - assert fkey in index.entries - for absfile in absfiles: - assert os.path.isfile(absfile) - - - @with_rw_repo('HEAD') - def test_compare_write_tree(self, rw_repo): - # write all trees and compare them - # its important to have a few submodules in there too - max_count = 25 - count = 0 - for commit in rw_repo.head.commit.traverse(): - if count >= max_count: - break - count += 1 - index = rw_repo.index.reset(commit) - orig_tree = commit.tree - assert index.write_tree() == orig_tree - # END for each commit - - def test_index_new(self): - B = self.rorepo.tree("6d9b1f4f9fa8c9f030e3207e7deacc5d5f8bba4e") - H = self.rorepo.tree("25dca42bac17d511b7e2ebdd9d1d679e7626db5f") - M = self.rorepo.tree("e746f96bcc29238b79118123028ca170adc4ff0f") - - for args in ((B,), (B,H), (B,H,M)): - index = IndexFile.new(self.rorepo, *args) - assert isinstance(index, IndexFile) - # END for each arg tuple - - + def _fprogress(self, path, done, item): + self._fprogress_map.setdefault(path, 0) + curval = self._fprogress_map[path] + if curval == 0: + assert not done + if curval == 1: + assert done + self._fprogress_map[path] = curval + 1 + + def _fprogress_add(self, path, done, item): + """Called as progress func - we keep track of the proper + call order""" + assert item is not None + self._fprogress(path, done, item) + + def _reset_progress(self): + # maps paths to the count of calls + self._fprogress_map = dict() + + def _assert_entries(self, entries): + for entry in entries: + assert isinstance(entry, BaseIndexEntry) + assert not os.path.isabs(entry.path) + assert not "\\" in entry.path + # END for each entry + + def test_index_file_base(self): + # read from file + index = IndexFile(self.rorepo, fixture_path("index")) + assert index.entries + assert index.version > 0 + + # test entry + last_val = None + entry = index.entries.itervalues().next() + for attr in ("path","ctime","mtime","dev","inode","mode","uid", + "gid","size","binsha", "hexsha", "stage"): + val = getattr(entry, attr) + # END for each method + + # test update + entries = index.entries + assert isinstance(index.update(), IndexFile) + assert entries is not index.entries + + # test stage + index_merge = IndexFile(self.rorepo, fixture_path("index_merge")) + assert len(index_merge.entries) == 106 + assert len(list(e for e in index_merge.entries.itervalues() if e.stage != 0 )) + + # write the data - it must match the original + tmpfile = tempfile.mktemp() + index_merge.write(tmpfile) + fp = open(tmpfile, 'rb') + assert fp.read() == fixture("index_merge") + fp.close() + os.remove(tmpfile) + + def _cmp_tree_index(self, tree, index): + # fail unless both objects contain the same paths and blobs + if isinstance(tree, str): + tree = self.rorepo.commit(tree).tree + + num_blobs = 0 + blist = list() + for blob in tree.traverse(predicate = lambda e,d: e.type == "blob", branch_first=False): + assert (blob.path,0) in index.entries + blist.append(blob) + # END for each blob in tree + if len(blist) != len(index.entries): + iset = set(k[0] for k in index.entries.keys()) + bset = set(b.path for b in blist) + raise AssertionError( "CMP Failed: Missing entries in index: %s, missing in tree: %s" % (bset-iset, iset-bset) ) + # END assertion message + + @with_rw_repo('0.1.6') + def test_index_file_from_tree(self, rw_repo): + common_ancestor_sha = "5117c9c8a4d3af19a9958677e45cda9269de1541" + cur_sha = "4b43ca7ff72d5f535134241e7c797ddc9c7a3573" + other_sha = "39f85c4358b7346fee22169da9cad93901ea9eb9" + + # simple index from tree + base_index = IndexFile.from_tree(rw_repo, common_ancestor_sha) + assert base_index.entries + self._cmp_tree_index(common_ancestor_sha, base_index) + + # merge two trees - its like a fast-forward + two_way_index = IndexFile.from_tree(rw_repo, common_ancestor_sha, cur_sha) + assert two_way_index.entries + self._cmp_tree_index(cur_sha, two_way_index) + + # merge three trees - here we have a merge conflict + three_way_index = IndexFile.from_tree(rw_repo, common_ancestor_sha, cur_sha, other_sha) + assert len(list(e for e in three_way_index.entries.values() if e.stage != 0)) + + + # ITERATE BLOBS + merge_required = lambda t: t[0] != 0 + merge_blobs = list(three_way_index.iter_blobs(merge_required)) + assert merge_blobs + assert merge_blobs[0][0] in (1,2,3) + assert isinstance(merge_blobs[0][1], Blob) + + # test BlobFilter + prefix = 'lib/git' + for stage, blob in base_index.iter_blobs(BlobFilter([prefix])): + assert blob.path.startswith(prefix) + + + # writing a tree should fail with an unmerged index + self.failUnlessRaises(UnmergedEntriesError, three_way_index.write_tree) + + # removed unmerged entries + unmerged_blob_map = three_way_index.unmerged_blobs() + assert unmerged_blob_map + + # pick the first blob at the first stage we find and use it as resolved version + three_way_index.resolve_blobs( l[0][1] for l in unmerged_blob_map.itervalues() ) + tree = three_way_index.write_tree() + assert isinstance(tree, Tree) + num_blobs = 0 + for blob in tree.traverse(predicate=lambda item,d: item.type == "blob"): + assert (blob.path,0) in three_way_index.entries + num_blobs += 1 + # END for each blob + assert num_blobs == len(three_way_index.entries) + + @with_rw_repo('0.1.6') + def test_index_merge_tree(self, rw_repo): + # A bit out of place, but we need a different repo for this: + assert self.rorepo != rw_repo and not (self.rorepo == rw_repo) + assert len(set((self.rorepo, self.rorepo, rw_repo, rw_repo))) == 2 + + # SINGLE TREE MERGE + # current index is at the (virtual) cur_commit + next_commit = "4c39f9da792792d4e73fc3a5effde66576ae128c" + parent_commit = rw_repo.head.commit.parents[0] + manifest_key = IndexFile.entry_key('MANIFEST.in', 0) + manifest_entry = rw_repo.index.entries[manifest_key] + rw_repo.index.merge_tree(next_commit) + # only one change should be recorded + assert manifest_entry.binsha != rw_repo.index.entries[manifest_key].binsha + + rw_repo.index.reset(rw_repo.head) + assert rw_repo.index.entries[manifest_key].binsha == manifest_entry.binsha + + # FAKE MERGE + ############# + # Add a change with a NULL sha that should conflict with next_commit. We + # pretend there was a change, but we do not even bother adding a proper + # sha for it ( which makes things faster of course ) + manifest_fake_entry = BaseIndexEntry((manifest_entry[0], "\0"*20, 0, manifest_entry[3])) + # try write flag + self._assert_entries(rw_repo.index.add([manifest_fake_entry], write=False)) + # add actually resolves the null-hex-sha for us as a feature, but we can + # edit the index manually + assert rw_repo.index.entries[manifest_key].binsha != Object.NULL_BIN_SHA + # must operate on the same index for this ! Its a bit problematic as + # it might confuse people + index = rw_repo.index + index.entries[manifest_key] = IndexEntry.from_base(manifest_fake_entry) + index.write() + assert rw_repo.index.entries[manifest_key].hexsha == Diff.NULL_HEX_SHA + + # write an unchanged index ( just for the fun of it ) + rw_repo.index.write() + + # a three way merge would result in a conflict and fails as the command will + # not overwrite any entries in our index and hence leave them unmerged. This is + # mainly a protection feature as the current index is not yet in a tree + self.failUnlessRaises(GitCommandError, index.merge_tree, next_commit, base=parent_commit) + + # the only way to get the merged entries is to safe the current index away into a tree, + # which is like a temporary commit for us. This fails as well as the NULL sha deos not + # have a corresponding object + # NOTE: missing_ok is not a kwarg anymore, missing_ok is always true + # self.failUnlessRaises(GitCommandError, index.write_tree) + + # if missing objects are okay, this would work though ( they are always okay now ) + tree = index.write_tree() + + # now make a proper three way merge with unmerged entries + unmerged_tree = IndexFile.from_tree(rw_repo, parent_commit, tree, next_commit) + unmerged_blobs = unmerged_tree.unmerged_blobs() + assert len(unmerged_blobs) == 1 and unmerged_blobs.keys()[0] == manifest_key[0] + + + @with_rw_repo('0.1.6') + def test_index_file_diffing(self, rw_repo): + # default Index instance points to our index + index = IndexFile(rw_repo) + assert index.path is not None + assert len(index.entries) + + # write the file back + index.write() + + # could sha it, or check stats + + # test diff + # resetting the head will leave the index in a different state, and the + # diff will yield a few changes + cur_head_commit = rw_repo.head.reference.commit + ref = rw_repo.head.reset('HEAD~6', index=True, working_tree=False) + + # diff against same index is 0 + diff = index.diff() + assert len(diff) == 0 + + # against HEAD as string, must be the same as it matches index + diff = index.diff('HEAD') + assert len(diff) == 0 + + # against previous head, there must be a difference + diff = index.diff(cur_head_commit) + assert len(diff) + + # we reverse the result + adiff = index.diff(str(cur_head_commit), R=True) + odiff = index.diff(cur_head_commit, R=False) # now its not reversed anymore + assert adiff != odiff + assert odiff == diff # both unreversed diffs against HEAD + + # against working copy - its still at cur_commit + wdiff = index.diff(None) + assert wdiff != adiff + assert wdiff != odiff + + # against something unusual + self.failUnlessRaises(ValueError, index.diff, int) + + # adjust the index to match an old revision + cur_branch = rw_repo.active_branch + cur_commit = cur_branch.commit + rev_head_parent = 'HEAD~1' + assert index.reset(rev_head_parent) is index + + assert cur_branch == rw_repo.active_branch + assert cur_commit == rw_repo.head.commit + + # there must be differences towards the working tree which is in the 'future' + assert index.diff(None) + + # reset the working copy as well to current head,to pull 'back' as well + new_data = "will be reverted" + file_path = os.path.join(rw_repo.working_tree_dir, "CHANGES") + fp = open(file_path, "wb") + fp.write(new_data) + fp.close() + index.reset(rev_head_parent, working_tree=True) + assert not index.diff(None) + assert cur_branch == rw_repo.active_branch + assert cur_commit == rw_repo.head.commit + fp = open(file_path,'rb') + try: + assert fp.read() != new_data + finally: + fp.close() + + # test full checkout + test_file = os.path.join(rw_repo.working_tree_dir, "CHANGES") + open(test_file, 'ab').write("some data") + rval = index.checkout(None, force=True, fprogress=self._fprogress) + assert 'CHANGES' in list(rval) + self._assert_fprogress([None]) + assert os.path.isfile(test_file) + + os.remove(test_file) + rval = index.checkout(None, force=False, fprogress=self._fprogress) + assert 'CHANGES' in list(rval) + self._assert_fprogress([None]) + assert os.path.isfile(test_file) + + # individual file + os.remove(test_file) + rval = index.checkout(test_file, fprogress=self._fprogress) + assert list(rval)[0] == 'CHANGES' + self._assert_fprogress([test_file]) + assert os.path.exists(test_file) + + # checking out non-existing file throws + self.failUnlessRaises(CheckoutError, index.checkout, "doesnt_exist_ever.txt.that") + self.failUnlessRaises(CheckoutError, index.checkout, paths=["doesnt/exist"]) + + # checkout file with modifications + append_data = "hello" + fp = open(test_file, "ab") + fp.write(append_data) + fp.close() + try: + index.checkout(test_file) + except CheckoutError, e: + assert len(e.failed_files) == 1 and e.failed_files[0] == os.path.basename(test_file) + assert (len(e.failed_files) == len(e.failed_reasons)) and isinstance(e.failed_reasons[0], basestring) + assert len(e.valid_files) == 0 + assert open(test_file).read().endswith(append_data) + else: + raise AssertionError("Exception CheckoutError not thrown") + + # if we force it it should work + index.checkout(test_file, force=True) + assert not open(test_file).read().endswith(append_data) + + # checkout directory + shutil.rmtree(os.path.join(rw_repo.working_tree_dir, "lib")) + rval = index.checkout('lib') + assert len(list(rval)) > 1 + + def _count_existing(self, repo, files): + """ + Returns count of files that actually exist in the repository directory. + """ + existing = 0 + basedir = repo.working_tree_dir + for f in files: + existing += os.path.isfile(os.path.join(basedir, f)) + # END for each deleted file + return existing + # END num existing helper + + @with_rw_repo('0.1.6') + def test_index_mutation(self, rw_repo): + index = rw_repo.index + num_entries = len(index.entries) + cur_head = rw_repo.head + + uname = "Some Developer" + umail = "sd@company.com" + rw_repo.config_writer().set_value("user", "name", uname) + rw_repo.config_writer().set_value("user", "email", umail) + + # remove all of the files, provide a wild mix of paths, BaseIndexEntries, + # IndexEntries + def mixed_iterator(): + count = 0 + for entry in index.entries.itervalues(): + type_id = count % 4 + if type_id == 0: # path + yield entry.path + elif type_id == 1: # blob + yield Blob(rw_repo, entry.binsha, entry.mode, entry.path) + elif type_id == 2: # BaseIndexEntry + yield BaseIndexEntry(entry[:4]) + elif type_id == 3: # IndexEntry + yield entry + else: + raise AssertionError("Invalid Type") + count += 1 + # END for each entry + # END mixed iterator + deleted_files = index.remove(mixed_iterator(), working_tree=False) + assert deleted_files + assert self._count_existing(rw_repo, deleted_files) == len(deleted_files) + assert len(index.entries) == 0 + + # reset the index to undo our changes + index.reset() + assert len(index.entries) == num_entries + + # remove with working copy + deleted_files = index.remove(mixed_iterator(), working_tree=True) + assert deleted_files + assert self._count_existing(rw_repo, deleted_files) == 0 + + # reset everything + index.reset(working_tree=True) + assert self._count_existing(rw_repo, deleted_files) == len(deleted_files) + + # invalid type + self.failUnlessRaises(TypeError, index.remove, [1]) + + # absolute path + deleted_files = index.remove([os.path.join(rw_repo.working_tree_dir,"lib")], r=True) + assert len(deleted_files) > 1 + self.failUnlessRaises(ValueError, index.remove, ["/doesnt/exists"]) + + # TEST COMMITTING + # commit changed index + cur_commit = cur_head.commit + commit_message = "commit default head" + + new_commit = index.commit(commit_message, head=False) + assert cur_commit != new_commit + assert new_commit.author.name == uname + assert new_commit.author.email == umail + assert new_commit.committer.name == uname + assert new_commit.committer.email == umail + assert new_commit.message == commit_message + assert new_commit.parents[0] == cur_commit + assert len(new_commit.parents) == 1 + assert cur_head.commit == cur_commit + + # same index, no parents + commit_message = "index without parents" + commit_no_parents = index.commit(commit_message, parent_commits=list(), head=True) + assert commit_no_parents.message == commit_message + assert len(commit_no_parents.parents) == 0 + assert cur_head.commit == commit_no_parents + + # same index, multiple parents + commit_message = "Index with multiple parents\n commit with another line" + commit_multi_parent = index.commit(commit_message,parent_commits=(commit_no_parents, new_commit)) + assert commit_multi_parent.message == commit_message + assert len(commit_multi_parent.parents) == 2 + assert commit_multi_parent.parents[0] == commit_no_parents + assert commit_multi_parent.parents[1] == new_commit + assert cur_head.commit == commit_multi_parent + + # re-add all files in lib + # get the lib folder back on disk, but get an index without it + index.reset(new_commit.parents[0], working_tree=True).reset(new_commit, working_tree=False) + lib_file_path = os.path.join("lib", "git", "__init__.py") + assert (lib_file_path, 0) not in index.entries + assert os.path.isfile(os.path.join(rw_repo.working_tree_dir, lib_file_path)) + + # directory + entries = index.add(['lib'], fprogress=self._fprogress_add) + self._assert_entries(entries) + self._assert_fprogress(entries) + assert len(entries)>1 + + # glob + entries = index.reset(new_commit).add([os.path.join('lib', 'git', '*.py')], fprogress=self._fprogress_add) + self._assert_entries(entries) + self._assert_fprogress(entries) + assert len(entries) == 14 + + # same file + entries = index.reset(new_commit).add([os.path.abspath(os.path.join('lib', 'git', 'head.py'))]*2, fprogress=self._fprogress_add) + self._assert_entries(entries) + assert entries[0].mode & 0644 == 0644 + # would fail, test is too primitive to handle this case + # self._assert_fprogress(entries) + self._reset_progress() + assert len(entries) == 2 + + # missing path + self.failUnlessRaises(OSError, index.reset(new_commit).add, ['doesnt/exist/must/raise']) + + # blob from older revision overrides current index revision + old_blob = new_commit.parents[0].tree.blobs[0] + entries = index.reset(new_commit).add([old_blob], fprogress=self._fprogress_add) + self._assert_entries(entries) + self._assert_fprogress(entries) + assert index.entries[(old_blob.path,0)].hexsha == old_blob.hexsha and len(entries) == 1 + + # mode 0 not allowed + null_hex_sha = Diff.NULL_HEX_SHA + null_bin_sha = "\0" * 20 + self.failUnlessRaises(ValueError, index.reset(new_commit).add, [BaseIndexEntry((0, null_bin_sha,0,"doesntmatter"))]) + + # add new file + new_file_relapath = "my_new_file" + new_file_path = self._make_file(new_file_relapath, "hello world", rw_repo) + entries = index.reset(new_commit).add([BaseIndexEntry((010644, null_bin_sha, 0, new_file_relapath))], fprogress=self._fprogress_add) + self._assert_entries(entries) + self._assert_fprogress(entries) + assert len(entries) == 1 and entries[0].hexsha != null_hex_sha + + # add symlink + if sys.platform != "win32": + basename = "my_real_symlink" + target = "/etc/that" + link_file = os.path.join(rw_repo.working_tree_dir, basename) + os.symlink(target, link_file) + entries = index.reset(new_commit).add([link_file], fprogress=self._fprogress_add) + self._assert_entries(entries) + self._assert_fprogress(entries) + assert len(entries) == 1 and S_ISLNK(entries[0].mode) + assert S_ISLNK(index.entries[index.entry_key("my_real_symlink", 0)].mode) + + # we expect only the target to be written + assert index.repo.odb.stream(entries[0].binsha).read() == target + # END real symlink test + + # add fake symlink and assure it checks-our as symlink + fake_symlink_relapath = "my_fake_symlink" + link_target = "/etc/that" + fake_symlink_path = self._make_file(fake_symlink_relapath, link_target, rw_repo) + fake_entry = BaseIndexEntry((0120000, null_bin_sha, 0, fake_symlink_relapath)) + entries = index.reset(new_commit).add([fake_entry], fprogress=self._fprogress_add) + self._assert_entries(entries) + self._assert_fprogress(entries) + assert entries[0].hexsha != null_hex_sha + assert len(entries) == 1 and S_ISLNK(entries[0].mode) + + # assure this also works with an alternate method + full_index_entry = IndexEntry.from_base(BaseIndexEntry((0120000, entries[0].binsha, 0, entries[0].path))) + entry_key = index.entry_key(full_index_entry) + index.reset(new_commit) + + assert entry_key not in index.entries + index.entries[entry_key] = full_index_entry + index.write() + index.update() # force reread of entries + new_entry = index.entries[entry_key] + assert S_ISLNK(new_entry.mode) + + # a tree created from this should contain the symlink + tree = index.write_tree() + assert fake_symlink_relapath in tree + index.write() # flush our changes for the checkout + + # checkout the fakelink, should be a link then + assert not S_ISLNK(os.stat(fake_symlink_path)[ST_MODE]) + os.remove(fake_symlink_path) + index.checkout(fake_symlink_path) + + # on windows we will never get symlinks + if os.name == 'nt': + # simlinks should contain the link as text ( which is what a + # symlink actually is ) + open(fake_symlink_path,'rb').read() == link_target + else: + assert S_ISLNK(os.lstat(fake_symlink_path)[ST_MODE]) + + # TEST RENAMING + def assert_mv_rval(rval): + for source, dest in rval: + assert not os.path.exists(source) and os.path.exists(dest) + # END for each renamed item + # END move assertion utility + + self.failUnlessRaises(ValueError, index.move, ['just_one_path']) + # file onto existing file + files = ['AUTHORS', 'LICENSE'] + self.failUnlessRaises(GitCommandError, index.move, files) + + # again, with force + assert_mv_rval(index.move(files, f=True)) + + # files into directory - dry run + paths = ['LICENSE', 'VERSION', 'doc'] + rval = index.move(paths, dry_run=True) + assert len(rval) == 2 + assert os.path.exists(paths[0]) + + # again, no dry run + rval = index.move(paths) + assert_mv_rval(rval) + + # dir into dir + rval = index.move(['doc', 'test']) + assert_mv_rval(rval) + + + # TEST PATH REWRITING + ###################### + count = [0] + def rewriter(entry): + rval = str(count[0]) + count[0] += 1 + return rval + # END rewriter + + def make_paths(): + # two existing ones, one new one + yield 'CHANGES' + yield 'ez_setup.py' + yield index.entries[index.entry_key('README', 0)] + yield index.entries[index.entry_key('.gitignore', 0)] + + for fid in range(3): + fname = 'newfile%i' % fid + open(fname, 'wb').write("abcd") + yield Blob(rw_repo, Blob.NULL_BIN_SHA, 0100644, fname) + # END for each new file + # END path producer + paths = list(make_paths()) + self._assert_entries(index.add(paths, path_rewriter=rewriter)) + + for filenum in range(len(paths)): + assert index.entry_key(str(filenum), 0) in index.entries + + + # TEST RESET ON PATHS + ###################### + arela = "aa" + brela = "bb" + afile = self._make_file(arela, "adata", rw_repo) + bfile = self._make_file(brela, "bdata", rw_repo) + akey = index.entry_key(arela, 0) + bkey = index.entry_key(brela, 0) + keys = (akey, bkey) + absfiles = (afile, bfile) + files = (arela, brela) + + for fkey in keys: + assert not fkey in index.entries + + index.add(files, write=True) + nc = index.commit("2 files committed", head=False) + + for fkey in keys: + assert fkey in index.entries + + # just the index + index.reset(paths=(arela, afile)) + assert not akey in index.entries + assert bkey in index.entries + + # now with working tree - files on disk as well as entries must be recreated + rw_repo.head.commit = nc + for absfile in absfiles: + os.remove(absfile) + + index.reset(working_tree=True, paths=files) + + for fkey in keys: + assert fkey in index.entries + for absfile in absfiles: + assert os.path.isfile(absfile) + + + @with_rw_repo('HEAD') + def test_compare_write_tree(self, rw_repo): + # write all trees and compare them + # its important to have a few submodules in there too + max_count = 25 + count = 0 + for commit in rw_repo.head.commit.traverse(): + if count >= max_count: + break + count += 1 + index = rw_repo.index.reset(commit) + orig_tree = commit.tree + assert index.write_tree() == orig_tree + # END for each commit + + def test_index_new(self): + B = self.rorepo.tree("6d9b1f4f9fa8c9f030e3207e7deacc5d5f8bba4e") + H = self.rorepo.tree("25dca42bac17d511b7e2ebdd9d1d679e7626db5f") + M = self.rorepo.tree("e746f96bcc29238b79118123028ca170adc4ff0f") + + for args in ((B,), (B,H), (B,H,M)): + index = IndexFile.new(self.rorepo, *args) + assert isinstance(index, IndexFile) + # END for each arg tuple + + diff --git a/git/test/test_reflog.py b/git/test/test_reflog.py index 0fc1f1ae..fca9e1cd 100644 --- a/git/test/test_reflog.py +++ b/git/test/test_reflog.py @@ -9,92 +9,92 @@ import os class TestRefLog(TestBase): - def test_reflogentry(self): - nullhexsha = IndexObject.NULL_HEX_SHA - hexsha = 'F' * 40 - actor = Actor('name', 'email') - msg = "message" - - self.failUnlessRaises(ValueError, RefLogEntry.new, nullhexsha, hexsha, 'noactor', 0, 0, "") - e = RefLogEntry.new(nullhexsha, hexsha, actor, 0, 1, msg) - - assert e.oldhexsha == nullhexsha - assert e.newhexsha == hexsha - assert e.actor == actor - assert e.time[0] == 0 - assert e.time[1] == 1 - assert e.message == msg - - # check representation (roughly) - assert repr(e).startswith(nullhexsha) - - def test_base(self): - rlp_head = fixture_path('reflog_HEAD') - rlp_master = fixture_path('reflog_master') - tdir = tempfile.mktemp(suffix="test_reflogs") - os.mkdir(tdir) - - rlp_master_ro = RefLog.path(self.rorepo.head) - assert os.path.isfile(rlp_master_ro) - - # simple read - reflog = RefLog.from_file(rlp_master_ro) - assert reflog._path is not None - assert isinstance(reflog, RefLog) - assert len(reflog) - - # iter_entries works with path and with stream - assert len(list(RefLog.iter_entries(open(rlp_master)))) - assert len(list(RefLog.iter_entries(rlp_master))) - - # raise on invalid revlog - # TODO: Try multiple corrupted ones ! - pp = 'reflog_invalid_' - for suffix in ('oldsha', 'newsha', 'email', 'date', 'sep'): - self.failUnlessRaises(ValueError, RefLog.from_file, fixture_path(pp+suffix)) - #END for each invalid file - - # cannot write an uninitialized reflog - self.failUnlessRaises(ValueError, RefLog().write) - - # test serialize and deserialize - results must match exactly - binsha = chr(255)*20 - msg = "my reflog message" - cr = self.rorepo.config_reader() - for rlp in (rlp_head, rlp_master): - reflog = RefLog.from_file(rlp) - tfile = os.path.join(tdir, os.path.basename(rlp)) - reflog.to_file(tfile) - assert reflog.write() is reflog - - # parsed result must match ... - treflog = RefLog.from_file(tfile) - assert treflog == reflog - - # ... as well as each bytes of the written stream - assert open(tfile).read() == open(rlp).read() - - # append an entry - entry = RefLog.append_entry(cr, tfile, IndexObject.NULL_BIN_SHA, binsha, msg) - assert entry.oldhexsha == IndexObject.NULL_HEX_SHA - assert entry.newhexsha == 'f'*40 - assert entry.message == msg - assert RefLog.from_file(tfile)[-1] == entry - - # index entry - # raises on invalid index - self.failUnlessRaises(IndexError, RefLog.entry_at, rlp, 10000) - - # indices can be positive ... - assert isinstance(RefLog.entry_at(rlp, 0), RefLogEntry) - RefLog.entry_at(rlp, 23) - - # ... and negative - for idx in (-1, -24): - RefLog.entry_at(rlp, idx) - #END for each index to read - # END for each reflog - - - # finally remove our temporary data - shutil.rmtree(tdir) + def test_reflogentry(self): + nullhexsha = IndexObject.NULL_HEX_SHA + hexsha = 'F' * 40 + actor = Actor('name', 'email') + msg = "message" + + self.failUnlessRaises(ValueError, RefLogEntry.new, nullhexsha, hexsha, 'noactor', 0, 0, "") + e = RefLogEntry.new(nullhexsha, hexsha, actor, 0, 1, msg) + + assert e.oldhexsha == nullhexsha + assert e.newhexsha == hexsha + assert e.actor == actor + assert e.time[0] == 0 + assert e.time[1] == 1 + assert e.message == msg + + # check representation (roughly) + assert repr(e).startswith(nullhexsha) + + def test_base(self): + rlp_head = fixture_path('reflog_HEAD') + rlp_master = fixture_path('reflog_master') + tdir = tempfile.mktemp(suffix="test_reflogs") + os.mkdir(tdir) + + rlp_master_ro = RefLog.path(self.rorepo.head) + assert os.path.isfile(rlp_master_ro) + + # simple read + reflog = RefLog.from_file(rlp_master_ro) + assert reflog._path is not None + assert isinstance(reflog, RefLog) + assert len(reflog) + + # iter_entries works with path and with stream + assert len(list(RefLog.iter_entries(open(rlp_master)))) + assert len(list(RefLog.iter_entries(rlp_master))) + + # raise on invalid revlog + # TODO: Try multiple corrupted ones ! + pp = 'reflog_invalid_' + for suffix in ('oldsha', 'newsha', 'email', 'date', 'sep'): + self.failUnlessRaises(ValueError, RefLog.from_file, fixture_path(pp+suffix)) + #END for each invalid file + + # cannot write an uninitialized reflog + self.failUnlessRaises(ValueError, RefLog().write) + + # test serialize and deserialize - results must match exactly + binsha = chr(255)*20 + msg = "my reflog message" + cr = self.rorepo.config_reader() + for rlp in (rlp_head, rlp_master): + reflog = RefLog.from_file(rlp) + tfile = os.path.join(tdir, os.path.basename(rlp)) + reflog.to_file(tfile) + assert reflog.write() is reflog + + # parsed result must match ... + treflog = RefLog.from_file(tfile) + assert treflog == reflog + + # ... as well as each bytes of the written stream + assert open(tfile).read() == open(rlp).read() + + # append an entry + entry = RefLog.append_entry(cr, tfile, IndexObject.NULL_BIN_SHA, binsha, msg) + assert entry.oldhexsha == IndexObject.NULL_HEX_SHA + assert entry.newhexsha == 'f'*40 + assert entry.message == msg + assert RefLog.from_file(tfile)[-1] == entry + + # index entry + # raises on invalid index + self.failUnlessRaises(IndexError, RefLog.entry_at, rlp, 10000) + + # indices can be positive ... + assert isinstance(RefLog.entry_at(rlp, 0), RefLogEntry) + RefLog.entry_at(rlp, 23) + + # ... and negative + for idx in (-1, -24): + RefLog.entry_at(rlp, idx) + #END for each index to read + # END for each reflog + + + # finally remove our temporary data + shutil.rmtree(tdir) diff --git a/git/test/test_refs.py b/git/test/test_refs.py index d3b1f681..cf08d7ec 100644 --- a/git/test/test_refs.py +++ b/git/test/test_refs.py @@ -15,531 +15,531 @@ import os class TestRefs(TestBase): - def test_from_path(self): - # should be able to create any reference directly - for ref_type in ( Reference, Head, TagReference, RemoteReference ): - for name in ('rela_name', 'path/rela_name'): - full_path = ref_type.to_full_path(name) - instance = ref_type.from_path(self.rorepo, full_path) - assert isinstance(instance, ref_type) - # END for each name - # END for each type - - # invalid path - self.failUnlessRaises(ValueError, TagReference, self.rorepo, "refs/invalid/tag") - # works without path check - TagReference(self.rorepo, "refs/invalid/tag", check_path=False) - - def test_tag_base(self): - tag_object_refs = list() - for tag in self.rorepo.tags: - assert "refs/tags" in tag.path - assert tag.name - assert isinstance( tag.commit, Commit ) - if tag.tag is not None: - tag_object_refs.append( tag ) - tagobj = tag.tag - # have no dict - self.failUnlessRaises(AttributeError, setattr, tagobj, 'someattr', 1) - assert isinstance( tagobj, TagObject ) - assert tagobj.tag == tag.name - assert isinstance( tagobj.tagger, Actor ) - assert isinstance( tagobj.tagged_date, int ) - assert isinstance( tagobj.tagger_tz_offset, int ) - assert tagobj.message - assert tag.object == tagobj - # can't assign the object - self.failUnlessRaises(AttributeError, setattr, tag, 'object', tagobj) - # END if we have a tag object - # END for tag in repo-tags - assert tag_object_refs - assert isinstance(self.rorepo.tags['0.1.5'], TagReference) + def test_from_path(self): + # should be able to create any reference directly + for ref_type in ( Reference, Head, TagReference, RemoteReference ): + for name in ('rela_name', 'path/rela_name'): + full_path = ref_type.to_full_path(name) + instance = ref_type.from_path(self.rorepo, full_path) + assert isinstance(instance, ref_type) + # END for each name + # END for each type + + # invalid path + self.failUnlessRaises(ValueError, TagReference, self.rorepo, "refs/invalid/tag") + # works without path check + TagReference(self.rorepo, "refs/invalid/tag", check_path=False) + + def test_tag_base(self): + tag_object_refs = list() + for tag in self.rorepo.tags: + assert "refs/tags" in tag.path + assert tag.name + assert isinstance( tag.commit, Commit ) + if tag.tag is not None: + tag_object_refs.append( tag ) + tagobj = tag.tag + # have no dict + self.failUnlessRaises(AttributeError, setattr, tagobj, 'someattr', 1) + assert isinstance( tagobj, TagObject ) + assert tagobj.tag == tag.name + assert isinstance( tagobj.tagger, Actor ) + assert isinstance( tagobj.tagged_date, int ) + assert isinstance( tagobj.tagger_tz_offset, int ) + assert tagobj.message + assert tag.object == tagobj + # can't assign the object + self.failUnlessRaises(AttributeError, setattr, tag, 'object', tagobj) + # END if we have a tag object + # END for tag in repo-tags + assert tag_object_refs + assert isinstance(self.rorepo.tags['0.1.5'], TagReference) - def test_tags_author(self): - tag = self.rorepo.tags[0] - tagobj = tag.tag - assert isinstance( tagobj.tagger, Actor ) - tagger_name = tagobj.tagger.name - assert tagger_name == 'Michael Trier' + def test_tags_author(self): + tag = self.rorepo.tags[0] + tagobj = tag.tag + assert isinstance( tagobj.tagger, Actor ) + tagger_name = tagobj.tagger.name + assert tagger_name == 'Michael Trier' - - def test_tags(self): - # tag refs can point to tag objects or to commits - s = set() - ref_count = 0 - for ref in chain(self.rorepo.tags, self.rorepo.heads): - ref_count += 1 - assert isinstance(ref, refs.Reference) - assert str(ref) == ref.name - assert repr(ref) - assert ref == ref - assert not ref != ref - s.add(ref) - # END for each ref - assert len(s) == ref_count - assert len(s|s) == ref_count - - @with_rw_repo('HEAD', bare=False) - def test_heads(self, rwrepo): - for head in rwrepo.heads: - assert head.name - assert head.path - assert "refs/heads" in head.path - prev_object = head.object - cur_object = head.object - assert prev_object == cur_object # represent the same git object - assert prev_object is not cur_object # but are different instances - - writer = head.config_writer() - tv = "testopt" - writer.set_value(tv, 1) - assert writer.get_value(tv) == 1 - del(writer) - assert head.config_reader().get_value(tv) == 1 - head.config_writer().remove_option(tv) - - # after the clone, we might still have a tracking branch setup - head.set_tracking_branch(None) - assert head.tracking_branch() is None - remote_ref = rwrepo.remotes[0].refs[0] - assert head.set_tracking_branch(remote_ref) is head - assert head.tracking_branch() == remote_ref - head.set_tracking_branch(None) - assert head.tracking_branch() is None - # END for each head - - # verify REFLOG gets altered - head = rwrepo.head - cur_head = head.ref - cur_commit = cur_head.commit - pcommit = cur_head.commit.parents[0].parents[0] - hlog_len = len(head.log()) - blog_len = len(cur_head.log()) - assert head.set_reference(pcommit, 'detached head') is head - # one new log-entry - thlog = head.log() - assert len(thlog) == hlog_len + 1 - assert thlog[-1].oldhexsha == cur_commit.hexsha - assert thlog[-1].newhexsha == pcommit.hexsha - - # the ref didn't change though - assert len(cur_head.log()) == blog_len - - # head changes once again, cur_head doesn't change - head.set_reference(cur_head, 'reattach head') - assert len(head.log()) == hlog_len+2 - assert len(cur_head.log()) == blog_len - - # adjusting the head-ref also adjust the head, so both reflogs are - # altered - cur_head.set_commit(pcommit, 'changing commit') - assert len(cur_head.log()) == blog_len+1 - assert len(head.log()) == hlog_len+3 - - - # with automatic dereferencing - assert head.set_commit(cur_commit, 'change commit once again') is head - assert len(head.log()) == hlog_len+4 - assert len(cur_head.log()) == blog_len+2 - - # a new branch has just a single entry - other_head = Head.create(rwrepo, 'mynewhead', pcommit, logmsg='new head created') - log = other_head.log() - assert len(log) == 1 - assert log[0].oldhexsha == pcommit.NULL_HEX_SHA - assert log[0].newhexsha == pcommit.hexsha - - - def test_refs(self): - types_found = set() - for ref in self.rorepo.refs: - types_found.add(type(ref)) - assert len(types_found) >= 3 - - def test_is_valid(self): - assert Reference(self.rorepo, 'refs/doesnt/exist').is_valid() == False - assert self.rorepo.head.is_valid() - assert self.rorepo.head.reference.is_valid() - assert SymbolicReference(self.rorepo, 'hellothere').is_valid() == False - - def test_orig_head(self): - assert type(self.rorepo.head.orig_head()) == SymbolicReference - - @with_rw_repo('0.1.6') - def test_head_reset(self, rw_repo): - cur_head = rw_repo.head - old_head_commit = cur_head.commit - new_head_commit = cur_head.ref.commit.parents[0] - cur_head.reset(new_head_commit, index=True) # index only - assert cur_head.reference.commit == new_head_commit - - self.failUnlessRaises(ValueError, cur_head.reset, new_head_commit, index=False, working_tree=True) - new_head_commit = new_head_commit.parents[0] - cur_head.reset(new_head_commit, index=True, working_tree=True) # index + wt - assert cur_head.reference.commit == new_head_commit - - # paths - make sure we have something to do - rw_repo.index.reset(old_head_commit.parents[0]) - cur_head.reset(cur_head, paths = "test") - cur_head.reset(new_head_commit, paths = "lib") - # hard resets with paths don't work, its all or nothing - self.failUnlessRaises(GitCommandError, cur_head.reset, new_head_commit, working_tree=True, paths = "lib") - - # we can do a mixed reset, and then checkout from the index though - cur_head.reset(new_head_commit) - rw_repo.index.checkout(["lib"], force=True)# - - - # now that we have a write write repo, change the HEAD reference - its - # like git-reset --soft - heads = rw_repo.heads - assert heads - for head in heads: - cur_head.reference = head - assert cur_head.reference == head - assert isinstance(cur_head.reference, Head) - assert cur_head.commit == head.commit - assert not cur_head.is_detached - # END for each head - - # detach - active_head = heads[0] - curhead_commit = active_head.commit - cur_head.reference = curhead_commit - assert cur_head.commit == curhead_commit - assert cur_head.is_detached - self.failUnlessRaises(TypeError, getattr, cur_head, "reference") - - # tags are references, hence we can point to them - some_tag = rw_repo.tags[0] - cur_head.reference = some_tag - assert not cur_head.is_detached - assert cur_head.commit == some_tag.commit - assert isinstance(cur_head.reference, TagReference) - - # put HEAD back to a real head, otherwise everything else fails - cur_head.reference = active_head - - # type check - self.failUnlessRaises(ValueError, setattr, cur_head, "reference", "that") - - # head handling - commit = 'HEAD' - prev_head_commit = cur_head.commit - for count, new_name in enumerate(("my_new_head", "feature/feature1")): - actual_commit = commit+"^"*count - new_head = Head.create(rw_repo, new_name, actual_commit) - assert new_head.is_detached - assert cur_head.commit == prev_head_commit - assert isinstance(new_head, Head) - # already exists, but has the same value, so its fine - Head.create(rw_repo, new_name, new_head.commit) - - # its not fine with a different value - self.failUnlessRaises(OSError, Head.create, rw_repo, new_name, new_head.commit.parents[0]) - - # force it - new_head = Head.create(rw_repo, new_name, actual_commit, force=True) - old_path = new_head.path - old_name = new_head.name - - assert new_head.rename("hello").name == "hello" - assert new_head.rename("hello/world").name == "hello/world" - assert new_head.rename(old_name).name == old_name and new_head.path == old_path - - # rename with force - tmp_head = Head.create(rw_repo, "tmphead") - self.failUnlessRaises(GitCommandError, tmp_head.rename, new_head) - tmp_head.rename(new_head, force=True) - assert tmp_head == new_head and tmp_head.object == new_head.object - - logfile = RefLog.path(tmp_head) - assert os.path.isfile(logfile) - Head.delete(rw_repo, tmp_head) - # deletion removes the log as well - assert not os.path.isfile(logfile) - heads = rw_repo.heads - assert tmp_head not in heads and new_head not in heads - # force on deletion testing would be missing here, code looks okay though ;) - # END for each new head name - self.failUnlessRaises(TypeError, RemoteReference.create, rw_repo, "some_name") - - # tag ref - tag_name = "1.0.2" - light_tag = TagReference.create(rw_repo, tag_name) - self.failUnlessRaises(GitCommandError, TagReference.create, rw_repo, tag_name) - light_tag = TagReference.create(rw_repo, tag_name, "HEAD~1", force = True) - assert isinstance(light_tag, TagReference) - assert light_tag.name == tag_name - assert light_tag.commit == cur_head.commit.parents[0] - assert light_tag.tag is None - - # tag with tag object - other_tag_name = "releases/1.0.2RC" - msg = "my mighty tag\nsecond line" - obj_tag = TagReference.create(rw_repo, other_tag_name, message=msg) - assert isinstance(obj_tag, TagReference) - assert obj_tag.name == other_tag_name - assert obj_tag.commit == cur_head.commit - assert obj_tag.tag is not None - - TagReference.delete(rw_repo, light_tag, obj_tag) - tags = rw_repo.tags - assert light_tag not in tags and obj_tag not in tags - - # remote deletion - remote_refs_so_far = 0 - remotes = rw_repo.remotes - assert remotes - for remote in remotes: - refs = remote.refs - - # If a HEAD exists, it must be deleted first. Otherwise it might - # end up pointing to an invalid ref it the ref was deleted before. - remote_head_name = "HEAD" - if remote_head_name in refs: - RemoteReference.delete(rw_repo, refs[remote_head_name]) - del(refs[remote_head_name]) - #END handle HEAD deletion - - RemoteReference.delete(rw_repo, *refs) - remote_refs_so_far += len(refs) - for ref in refs: - assert ref.remote_name == remote.name - # END for each ref to delete - assert remote_refs_so_far - - for remote in remotes: - # remotes without references throw - self.failUnlessRaises(AssertionError, getattr, remote, 'refs') - # END for each remote - - # change where the active head points to - if cur_head.is_detached: - cur_head.reference = rw_repo.heads[0] - - head = cur_head.reference - old_commit = head.commit - head.commit = old_commit.parents[0] - assert head.commit == old_commit.parents[0] - assert head.commit == cur_head.commit - head.commit = old_commit - - # setting a non-commit as commit fails, but succeeds as object - head_tree = head.commit.tree - self.failUnlessRaises(ValueError, setattr, head, 'commit', head_tree) - assert head.commit == old_commit # and the ref did not change - # we allow heds to point to any object - head.object = head_tree - assert head.object == head_tree - # cannot query tree as commit - self.failUnlessRaises(TypeError, getattr, head, 'commit') - - # set the commit directly using the head. This would never detach the head - assert not cur_head.is_detached - head.object = old_commit - cur_head.reference = head.commit - assert cur_head.is_detached - parent_commit = head.commit.parents[0] - assert cur_head.is_detached - cur_head.commit = parent_commit - assert cur_head.is_detached and cur_head.commit == parent_commit - - cur_head.reference = head - assert not cur_head.is_detached - cur_head.commit = parent_commit - assert not cur_head.is_detached - assert head.commit == parent_commit - - # test checkout - active_branch = rw_repo.active_branch - for head in rw_repo.heads: - checked_out_head = head.checkout() - assert checked_out_head == head - # END for each head to checkout - - # checkout with branch creation - new_head = active_branch.checkout(b="new_head") - assert active_branch != rw_repo.active_branch - assert new_head == rw_repo.active_branch - - # checkout with force as we have a changed a file - # clear file - open(new_head.commit.tree.blobs[-1].abspath,'w').close() - assert len(new_head.commit.diff(None)) - - # create a new branch that is likely to touch the file we changed - far_away_head = rw_repo.create_head("far_head",'HEAD~100') - self.failUnlessRaises(GitCommandError, far_away_head.checkout) - assert active_branch == active_branch.checkout(force=True) - assert rw_repo.head.reference != far_away_head - - # test reference creation - partial_ref = 'sub/ref' - full_ref = 'refs/%s' % partial_ref - ref = Reference.create(rw_repo, partial_ref) - assert ref.path == full_ref - assert ref.object == rw_repo.head.commit - - self.failUnlessRaises(OSError, Reference.create, rw_repo, full_ref, 'HEAD~20') - # it works if it is at the same spot though and points to the same reference - assert Reference.create(rw_repo, full_ref, 'HEAD').path == full_ref - Reference.delete(rw_repo, full_ref) - - # recreate the reference using a full_ref - ref = Reference.create(rw_repo, full_ref) - assert ref.path == full_ref - assert ref.object == rw_repo.head.commit - - # recreate using force - ref = Reference.create(rw_repo, partial_ref, 'HEAD~1', force=True) - assert ref.path == full_ref - assert ref.object == rw_repo.head.commit.parents[0] - - # rename it - orig_obj = ref.object - for name in ('refs/absname', 'rela_name', 'feature/rela_name'): - ref_new_name = ref.rename(name) - assert isinstance(ref_new_name, Reference) - assert name in ref_new_name.path - assert ref_new_name.object == orig_obj - assert ref_new_name == ref - # END for each name type - - # References that don't exist trigger an error if we want to access them - self.failUnlessRaises(ValueError, getattr, Reference(rw_repo, "refs/doesntexist"), 'commit') - - # exists, fail unless we force - ex_ref_path = far_away_head.path - self.failUnlessRaises(OSError, ref.rename, ex_ref_path) - # if it points to the same commit it works - far_away_head.commit = ref.commit - ref.rename(ex_ref_path) - assert ref.path == ex_ref_path and ref.object == orig_obj - assert ref.rename(ref.path).path == ex_ref_path # rename to same name - - # create symbolic refs - symref_path = "symrefs/sym" - symref = SymbolicReference.create(rw_repo, symref_path, cur_head.reference) - assert symref.path == symref_path - assert symref.reference == cur_head.reference - - self.failUnlessRaises(OSError, SymbolicReference.create, rw_repo, symref_path, cur_head.reference.commit) - # it works if the new ref points to the same reference - SymbolicReference.create(rw_repo, symref.path, symref.reference).path == symref.path - SymbolicReference.delete(rw_repo, symref) - # would raise if the symref wouldn't have been deletedpbl - symref = SymbolicReference.create(rw_repo, symref_path, cur_head.reference) - - # test symbolic references which are not at default locations like HEAD - # or FETCH_HEAD - they may also be at spots in refs of course - symbol_ref_path = "refs/symbol_ref" - symref = SymbolicReference(rw_repo, symbol_ref_path) - assert symref.path == symbol_ref_path - symbol_ref_abspath = os.path.join(rw_repo.git_dir, symref.path) - - # set it - symref.reference = new_head - assert symref.reference == new_head - assert os.path.isfile(symbol_ref_abspath) - assert symref.commit == new_head.commit - - for name in ('absname','folder/rela_name'): - symref_new_name = symref.rename(name) - assert isinstance(symref_new_name, SymbolicReference) - assert name in symref_new_name.path - assert symref_new_name.reference == new_head - assert symref_new_name == symref - assert not symref.is_detached - # END for each ref - - # create a new non-head ref just to be sure we handle it even if packed - Reference.create(rw_repo, full_ref) - - # test ref listing - assure we have packed refs - rw_repo.git.pack_refs(all=True, prune=True) - heads = rw_repo.heads - assert heads - assert new_head in heads - assert active_branch in heads - assert rw_repo.tags - - # we should be able to iterate all symbolic refs as well - in that case - # we should expect only symbolic references to be returned - for symref in SymbolicReference.iter_items(rw_repo): - assert not symref.is_detached - - # when iterating references, we can get references and symrefs - # when deleting all refs, I'd expect them to be gone ! Even from - # the packed ones - # For this to work, we must not be on any branch - rw_repo.head.reference = rw_repo.head.commit - deleted_refs = set() - for ref in Reference.iter_items(rw_repo): - if ref.is_detached: - ref.delete(rw_repo, ref) - deleted_refs.add(ref) - # END delete ref - # END for each ref to iterate and to delete - assert deleted_refs - - for ref in Reference.iter_items(rw_repo): - if ref.is_detached: - assert ref not in deleted_refs - # END for each ref - - # reattach head - head will not be returned if it is not a symbolic - # ref - rw_repo.head.reference = Head.create(rw_repo, "master") - - # At least the head should still exist - assert os.path.isfile(os.path.join(rw_repo.git_dir, 'HEAD')) - refs = list(SymbolicReference.iter_items(rw_repo)) - assert len(refs) == 1 - - - # test creation of new refs from scratch - for path in ("basename", "dir/somename", "dir2/subdir/basename"): - # REFERENCES - ############ - fpath = Reference.to_full_path(path) - ref_fp = Reference.from_path(rw_repo, fpath) - assert not ref_fp.is_valid() - ref = Reference(rw_repo, fpath) - assert ref == ref_fp - - # can be created by assigning a commit - ref.commit = rw_repo.head.commit - assert ref.is_valid() - - # if the assignment raises, the ref doesn't exist - Reference.delete(ref.repo, ref.path) - assert not ref.is_valid() - self.failUnlessRaises(ValueError, setattr, ref, 'commit', "nonsense") - assert not ref.is_valid() - - # I am sure I had my reason to make it a class method at first, but - # now it doesn't make so much sense anymore, want an instance method as well - # See http://byronimo.lighthouseapp.com/projects/51787-gitpython/tickets/27 - Reference.delete(ref.repo, ref.path) - assert not ref.is_valid() - - ref.object = rw_repo.head.commit - assert ref.is_valid() - - Reference.delete(ref.repo, ref.path) - assert not ref.is_valid() - self.failUnlessRaises(ValueError, setattr, ref, 'object', "nonsense") - assert not ref.is_valid() - - # END for each path - - def test_dereference_recursive(self): - # for now, just test the HEAD - assert SymbolicReference.dereference_recursive(self.rorepo, 'HEAD') - - def test_reflog(self): - assert isinstance(self.rorepo.heads.master.log(), RefLog) - + + def test_tags(self): + # tag refs can point to tag objects or to commits + s = set() + ref_count = 0 + for ref in chain(self.rorepo.tags, self.rorepo.heads): + ref_count += 1 + assert isinstance(ref, refs.Reference) + assert str(ref) == ref.name + assert repr(ref) + assert ref == ref + assert not ref != ref + s.add(ref) + # END for each ref + assert len(s) == ref_count + assert len(s|s) == ref_count + + @with_rw_repo('HEAD', bare=False) + def test_heads(self, rwrepo): + for head in rwrepo.heads: + assert head.name + assert head.path + assert "refs/heads" in head.path + prev_object = head.object + cur_object = head.object + assert prev_object == cur_object # represent the same git object + assert prev_object is not cur_object # but are different instances + + writer = head.config_writer() + tv = "testopt" + writer.set_value(tv, 1) + assert writer.get_value(tv) == 1 + del(writer) + assert head.config_reader().get_value(tv) == 1 + head.config_writer().remove_option(tv) + + # after the clone, we might still have a tracking branch setup + head.set_tracking_branch(None) + assert head.tracking_branch() is None + remote_ref = rwrepo.remotes[0].refs[0] + assert head.set_tracking_branch(remote_ref) is head + assert head.tracking_branch() == remote_ref + head.set_tracking_branch(None) + assert head.tracking_branch() is None + # END for each head + + # verify REFLOG gets altered + head = rwrepo.head + cur_head = head.ref + cur_commit = cur_head.commit + pcommit = cur_head.commit.parents[0].parents[0] + hlog_len = len(head.log()) + blog_len = len(cur_head.log()) + assert head.set_reference(pcommit, 'detached head') is head + # one new log-entry + thlog = head.log() + assert len(thlog) == hlog_len + 1 + assert thlog[-1].oldhexsha == cur_commit.hexsha + assert thlog[-1].newhexsha == pcommit.hexsha + + # the ref didn't change though + assert len(cur_head.log()) == blog_len + + # head changes once again, cur_head doesn't change + head.set_reference(cur_head, 'reattach head') + assert len(head.log()) == hlog_len+2 + assert len(cur_head.log()) == blog_len + + # adjusting the head-ref also adjust the head, so both reflogs are + # altered + cur_head.set_commit(pcommit, 'changing commit') + assert len(cur_head.log()) == blog_len+1 + assert len(head.log()) == hlog_len+3 + + + # with automatic dereferencing + assert head.set_commit(cur_commit, 'change commit once again') is head + assert len(head.log()) == hlog_len+4 + assert len(cur_head.log()) == blog_len+2 + + # a new branch has just a single entry + other_head = Head.create(rwrepo, 'mynewhead', pcommit, logmsg='new head created') + log = other_head.log() + assert len(log) == 1 + assert log[0].oldhexsha == pcommit.NULL_HEX_SHA + assert log[0].newhexsha == pcommit.hexsha + + + def test_refs(self): + types_found = set() + for ref in self.rorepo.refs: + types_found.add(type(ref)) + assert len(types_found) >= 3 + + def test_is_valid(self): + assert Reference(self.rorepo, 'refs/doesnt/exist').is_valid() == False + assert self.rorepo.head.is_valid() + assert self.rorepo.head.reference.is_valid() + assert SymbolicReference(self.rorepo, 'hellothere').is_valid() == False + + def test_orig_head(self): + assert type(self.rorepo.head.orig_head()) == SymbolicReference + + @with_rw_repo('0.1.6') + def test_head_reset(self, rw_repo): + cur_head = rw_repo.head + old_head_commit = cur_head.commit + new_head_commit = cur_head.ref.commit.parents[0] + cur_head.reset(new_head_commit, index=True) # index only + assert cur_head.reference.commit == new_head_commit + + self.failUnlessRaises(ValueError, cur_head.reset, new_head_commit, index=False, working_tree=True) + new_head_commit = new_head_commit.parents[0] + cur_head.reset(new_head_commit, index=True, working_tree=True) # index + wt + assert cur_head.reference.commit == new_head_commit + + # paths - make sure we have something to do + rw_repo.index.reset(old_head_commit.parents[0]) + cur_head.reset(cur_head, paths = "test") + cur_head.reset(new_head_commit, paths = "lib") + # hard resets with paths don't work, its all or nothing + self.failUnlessRaises(GitCommandError, cur_head.reset, new_head_commit, working_tree=True, paths = "lib") + + # we can do a mixed reset, and then checkout from the index though + cur_head.reset(new_head_commit) + rw_repo.index.checkout(["lib"], force=True)# + + + # now that we have a write write repo, change the HEAD reference - its + # like git-reset --soft + heads = rw_repo.heads + assert heads + for head in heads: + cur_head.reference = head + assert cur_head.reference == head + assert isinstance(cur_head.reference, Head) + assert cur_head.commit == head.commit + assert not cur_head.is_detached + # END for each head + + # detach + active_head = heads[0] + curhead_commit = active_head.commit + cur_head.reference = curhead_commit + assert cur_head.commit == curhead_commit + assert cur_head.is_detached + self.failUnlessRaises(TypeError, getattr, cur_head, "reference") + + # tags are references, hence we can point to them + some_tag = rw_repo.tags[0] + cur_head.reference = some_tag + assert not cur_head.is_detached + assert cur_head.commit == some_tag.commit + assert isinstance(cur_head.reference, TagReference) + + # put HEAD back to a real head, otherwise everything else fails + cur_head.reference = active_head + + # type check + self.failUnlessRaises(ValueError, setattr, cur_head, "reference", "that") + + # head handling + commit = 'HEAD' + prev_head_commit = cur_head.commit + for count, new_name in enumerate(("my_new_head", "feature/feature1")): + actual_commit = commit+"^"*count + new_head = Head.create(rw_repo, new_name, actual_commit) + assert new_head.is_detached + assert cur_head.commit == prev_head_commit + assert isinstance(new_head, Head) + # already exists, but has the same value, so its fine + Head.create(rw_repo, new_name, new_head.commit) + + # its not fine with a different value + self.failUnlessRaises(OSError, Head.create, rw_repo, new_name, new_head.commit.parents[0]) + + # force it + new_head = Head.create(rw_repo, new_name, actual_commit, force=True) + old_path = new_head.path + old_name = new_head.name + + assert new_head.rename("hello").name == "hello" + assert new_head.rename("hello/world").name == "hello/world" + assert new_head.rename(old_name).name == old_name and new_head.path == old_path + + # rename with force + tmp_head = Head.create(rw_repo, "tmphead") + self.failUnlessRaises(GitCommandError, tmp_head.rename, new_head) + tmp_head.rename(new_head, force=True) + assert tmp_head == new_head and tmp_head.object == new_head.object + + logfile = RefLog.path(tmp_head) + assert os.path.isfile(logfile) + Head.delete(rw_repo, tmp_head) + # deletion removes the log as well + assert not os.path.isfile(logfile) + heads = rw_repo.heads + assert tmp_head not in heads and new_head not in heads + # force on deletion testing would be missing here, code looks okay though ;) + # END for each new head name + self.failUnlessRaises(TypeError, RemoteReference.create, rw_repo, "some_name") + + # tag ref + tag_name = "1.0.2" + light_tag = TagReference.create(rw_repo, tag_name) + self.failUnlessRaises(GitCommandError, TagReference.create, rw_repo, tag_name) + light_tag = TagReference.create(rw_repo, tag_name, "HEAD~1", force = True) + assert isinstance(light_tag, TagReference) + assert light_tag.name == tag_name + assert light_tag.commit == cur_head.commit.parents[0] + assert light_tag.tag is None + + # tag with tag object + other_tag_name = "releases/1.0.2RC" + msg = "my mighty tag\nsecond line" + obj_tag = TagReference.create(rw_repo, other_tag_name, message=msg) + assert isinstance(obj_tag, TagReference) + assert obj_tag.name == other_tag_name + assert obj_tag.commit == cur_head.commit + assert obj_tag.tag is not None + + TagReference.delete(rw_repo, light_tag, obj_tag) + tags = rw_repo.tags + assert light_tag not in tags and obj_tag not in tags + + # remote deletion + remote_refs_so_far = 0 + remotes = rw_repo.remotes + assert remotes + for remote in remotes: + refs = remote.refs + + # If a HEAD exists, it must be deleted first. Otherwise it might + # end up pointing to an invalid ref it the ref was deleted before. + remote_head_name = "HEAD" + if remote_head_name in refs: + RemoteReference.delete(rw_repo, refs[remote_head_name]) + del(refs[remote_head_name]) + #END handle HEAD deletion + + RemoteReference.delete(rw_repo, *refs) + remote_refs_so_far += len(refs) + for ref in refs: + assert ref.remote_name == remote.name + # END for each ref to delete + assert remote_refs_so_far + + for remote in remotes: + # remotes without references throw + self.failUnlessRaises(AssertionError, getattr, remote, 'refs') + # END for each remote + + # change where the active head points to + if cur_head.is_detached: + cur_head.reference = rw_repo.heads[0] + + head = cur_head.reference + old_commit = head.commit + head.commit = old_commit.parents[0] + assert head.commit == old_commit.parents[0] + assert head.commit == cur_head.commit + head.commit = old_commit + + # setting a non-commit as commit fails, but succeeds as object + head_tree = head.commit.tree + self.failUnlessRaises(ValueError, setattr, head, 'commit', head_tree) + assert head.commit == old_commit # and the ref did not change + # we allow heds to point to any object + head.object = head_tree + assert head.object == head_tree + # cannot query tree as commit + self.failUnlessRaises(TypeError, getattr, head, 'commit') + + # set the commit directly using the head. This would never detach the head + assert not cur_head.is_detached + head.object = old_commit + cur_head.reference = head.commit + assert cur_head.is_detached + parent_commit = head.commit.parents[0] + assert cur_head.is_detached + cur_head.commit = parent_commit + assert cur_head.is_detached and cur_head.commit == parent_commit + + cur_head.reference = head + assert not cur_head.is_detached + cur_head.commit = parent_commit + assert not cur_head.is_detached + assert head.commit == parent_commit + + # test checkout + active_branch = rw_repo.active_branch + for head in rw_repo.heads: + checked_out_head = head.checkout() + assert checked_out_head == head + # END for each head to checkout + + # checkout with branch creation + new_head = active_branch.checkout(b="new_head") + assert active_branch != rw_repo.active_branch + assert new_head == rw_repo.active_branch + + # checkout with force as we have a changed a file + # clear file + open(new_head.commit.tree.blobs[-1].abspath,'w').close() + assert len(new_head.commit.diff(None)) + + # create a new branch that is likely to touch the file we changed + far_away_head = rw_repo.create_head("far_head",'HEAD~100') + self.failUnlessRaises(GitCommandError, far_away_head.checkout) + assert active_branch == active_branch.checkout(force=True) + assert rw_repo.head.reference != far_away_head + + # test reference creation + partial_ref = 'sub/ref' + full_ref = 'refs/%s' % partial_ref + ref = Reference.create(rw_repo, partial_ref) + assert ref.path == full_ref + assert ref.object == rw_repo.head.commit + + self.failUnlessRaises(OSError, Reference.create, rw_repo, full_ref, 'HEAD~20') + # it works if it is at the same spot though and points to the same reference + assert Reference.create(rw_repo, full_ref, 'HEAD').path == full_ref + Reference.delete(rw_repo, full_ref) + + # recreate the reference using a full_ref + ref = Reference.create(rw_repo, full_ref) + assert ref.path == full_ref + assert ref.object == rw_repo.head.commit + + # recreate using force + ref = Reference.create(rw_repo, partial_ref, 'HEAD~1', force=True) + assert ref.path == full_ref + assert ref.object == rw_repo.head.commit.parents[0] + + # rename it + orig_obj = ref.object + for name in ('refs/absname', 'rela_name', 'feature/rela_name'): + ref_new_name = ref.rename(name) + assert isinstance(ref_new_name, Reference) + assert name in ref_new_name.path + assert ref_new_name.object == orig_obj + assert ref_new_name == ref + # END for each name type + + # References that don't exist trigger an error if we want to access them + self.failUnlessRaises(ValueError, getattr, Reference(rw_repo, "refs/doesntexist"), 'commit') + + # exists, fail unless we force + ex_ref_path = far_away_head.path + self.failUnlessRaises(OSError, ref.rename, ex_ref_path) + # if it points to the same commit it works + far_away_head.commit = ref.commit + ref.rename(ex_ref_path) + assert ref.path == ex_ref_path and ref.object == orig_obj + assert ref.rename(ref.path).path == ex_ref_path # rename to same name + + # create symbolic refs + symref_path = "symrefs/sym" + symref = SymbolicReference.create(rw_repo, symref_path, cur_head.reference) + assert symref.path == symref_path + assert symref.reference == cur_head.reference + + self.failUnlessRaises(OSError, SymbolicReference.create, rw_repo, symref_path, cur_head.reference.commit) + # it works if the new ref points to the same reference + SymbolicReference.create(rw_repo, symref.path, symref.reference).path == symref.path + SymbolicReference.delete(rw_repo, symref) + # would raise if the symref wouldn't have been deletedpbl + symref = SymbolicReference.create(rw_repo, symref_path, cur_head.reference) + + # test symbolic references which are not at default locations like HEAD + # or FETCH_HEAD - they may also be at spots in refs of course + symbol_ref_path = "refs/symbol_ref" + symref = SymbolicReference(rw_repo, symbol_ref_path) + assert symref.path == symbol_ref_path + symbol_ref_abspath = os.path.join(rw_repo.git_dir, symref.path) + + # set it + symref.reference = new_head + assert symref.reference == new_head + assert os.path.isfile(symbol_ref_abspath) + assert symref.commit == new_head.commit + + for name in ('absname','folder/rela_name'): + symref_new_name = symref.rename(name) + assert isinstance(symref_new_name, SymbolicReference) + assert name in symref_new_name.path + assert symref_new_name.reference == new_head + assert symref_new_name == symref + assert not symref.is_detached + # END for each ref + + # create a new non-head ref just to be sure we handle it even if packed + Reference.create(rw_repo, full_ref) + + # test ref listing - assure we have packed refs + rw_repo.git.pack_refs(all=True, prune=True) + heads = rw_repo.heads + assert heads + assert new_head in heads + assert active_branch in heads + assert rw_repo.tags + + # we should be able to iterate all symbolic refs as well - in that case + # we should expect only symbolic references to be returned + for symref in SymbolicReference.iter_items(rw_repo): + assert not symref.is_detached + + # when iterating references, we can get references and symrefs + # when deleting all refs, I'd expect them to be gone ! Even from + # the packed ones + # For this to work, we must not be on any branch + rw_repo.head.reference = rw_repo.head.commit + deleted_refs = set() + for ref in Reference.iter_items(rw_repo): + if ref.is_detached: + ref.delete(rw_repo, ref) + deleted_refs.add(ref) + # END delete ref + # END for each ref to iterate and to delete + assert deleted_refs + + for ref in Reference.iter_items(rw_repo): + if ref.is_detached: + assert ref not in deleted_refs + # END for each ref + + # reattach head - head will not be returned if it is not a symbolic + # ref + rw_repo.head.reference = Head.create(rw_repo, "master") + + # At least the head should still exist + assert os.path.isfile(os.path.join(rw_repo.git_dir, 'HEAD')) + refs = list(SymbolicReference.iter_items(rw_repo)) + assert len(refs) == 1 + + + # test creation of new refs from scratch + for path in ("basename", "dir/somename", "dir2/subdir/basename"): + # REFERENCES + ############ + fpath = Reference.to_full_path(path) + ref_fp = Reference.from_path(rw_repo, fpath) + assert not ref_fp.is_valid() + ref = Reference(rw_repo, fpath) + assert ref == ref_fp + + # can be created by assigning a commit + ref.commit = rw_repo.head.commit + assert ref.is_valid() + + # if the assignment raises, the ref doesn't exist + Reference.delete(ref.repo, ref.path) + assert not ref.is_valid() + self.failUnlessRaises(ValueError, setattr, ref, 'commit', "nonsense") + assert not ref.is_valid() + + # I am sure I had my reason to make it a class method at first, but + # now it doesn't make so much sense anymore, want an instance method as well + # See http://byronimo.lighthouseapp.com/projects/51787-gitpython/tickets/27 + Reference.delete(ref.repo, ref.path) + assert not ref.is_valid() + + ref.object = rw_repo.head.commit + assert ref.is_valid() + + Reference.delete(ref.repo, ref.path) + assert not ref.is_valid() + self.failUnlessRaises(ValueError, setattr, ref, 'object', "nonsense") + assert not ref.is_valid() + + # END for each path + + def test_dereference_recursive(self): + # for now, just test the HEAD + assert SymbolicReference.dereference_recursive(self.rorepo, 'HEAD') + + def test_reflog(self): + assert isinstance(self.rorepo.heads.master.log(), RefLog) + diff --git a/git/test/test_remote.py b/git/test/test_remote.py index 3e9ba8b8..a7f1be22 100644 --- a/git/test/test_remote.py +++ b/git/test/test_remote.py @@ -16,482 +16,482 @@ import random random.seed(0) class TestRemoteProgress(RemoteProgress): - __slots__ = ( "_seen_lines", "_stages_per_op", '_num_progress_messages' ) - def __init__(self): - super(TestRemoteProgress, self).__init__() - self._seen_lines = list() - self._stages_per_op = dict() - self._num_progress_messages = 0 - - def _parse_progress_line(self, line): - # we may remove the line later if it is dropped - # Keep it for debugging - self._seen_lines.append(line) - rval = super(TestRemoteProgress, self)._parse_progress_line(line) - assert len(line) > 1, "line %r too short" % line - return rval - - def line_dropped(self, line): - try: - self._seen_lines.remove(line) - except ValueError: - pass - - def update(self, op_code, cur_count, max_count=None, message=''): - # check each stage only comes once - op_id = op_code & self.OP_MASK - assert op_id in (self.COUNTING, self.COMPRESSING, self.WRITING) - - self._stages_per_op.setdefault(op_id, 0) - self._stages_per_op[ op_id ] = self._stages_per_op[ op_id ] | (op_code & self.STAGE_MASK) - - if op_code & (self.WRITING|self.END) == (self.WRITING|self.END): - assert message - # END check we get message - - self._num_progress_messages += 1 - - - def make_assertion(self): - # we don't always receive messages - if not self._seen_lines: - return - - # sometimes objects are not compressed which is okay - assert len(self._seen_ops) in (2,3) - assert self._stages_per_op - - # must have seen all stages - for op, stages in self._stages_per_op.items(): - assert stages & self.STAGE_MASK == self.STAGE_MASK - # END for each op/stage + __slots__ = ( "_seen_lines", "_stages_per_op", '_num_progress_messages' ) + def __init__(self): + super(TestRemoteProgress, self).__init__() + self._seen_lines = list() + self._stages_per_op = dict() + self._num_progress_messages = 0 + + def _parse_progress_line(self, line): + # we may remove the line later if it is dropped + # Keep it for debugging + self._seen_lines.append(line) + rval = super(TestRemoteProgress, self)._parse_progress_line(line) + assert len(line) > 1, "line %r too short" % line + return rval + + def line_dropped(self, line): + try: + self._seen_lines.remove(line) + except ValueError: + pass + + def update(self, op_code, cur_count, max_count=None, message=''): + # check each stage only comes once + op_id = op_code & self.OP_MASK + assert op_id in (self.COUNTING, self.COMPRESSING, self.WRITING) + + self._stages_per_op.setdefault(op_id, 0) + self._stages_per_op[ op_id ] = self._stages_per_op[ op_id ] | (op_code & self.STAGE_MASK) + + if op_code & (self.WRITING|self.END) == (self.WRITING|self.END): + assert message + # END check we get message + + self._num_progress_messages += 1 + + + def make_assertion(self): + # we don't always receive messages + if not self._seen_lines: + return + + # sometimes objects are not compressed which is okay + assert len(self._seen_ops) in (2,3) + assert self._stages_per_op + + # must have seen all stages + for op, stages in self._stages_per_op.items(): + assert stages & self.STAGE_MASK == self.STAGE_MASK + # END for each op/stage - def assert_received_message(self): - assert self._num_progress_messages - + def assert_received_message(self): + assert self._num_progress_messages + class TestRemote(TestBase): - - def _print_fetchhead(self, repo): - fp = open(os.path.join(repo.git_dir, "FETCH_HEAD")) - fp.close() - - - def _do_test_fetch_result(self, results, remote): - # self._print_fetchhead(remote.repo) - assert len(results) > 0 and isinstance(results[0], FetchInfo) - for info in results: - assert isinstance(info.note, basestring) - if isinstance(info.ref, Reference): - assert info.flags != 0 - # END reference type flags handling - assert isinstance(info.ref, (SymbolicReference, Reference)) - if info.flags & (info.FORCED_UPDATE|info.FAST_FORWARD): - assert isinstance(info.old_commit, Commit) - else: - assert info.old_commit is None - # END forced update checking - # END for each info - - def _do_test_push_result(self, results, remote): - assert len(results) > 0 and isinstance(results[0], PushInfo) - for info in results: - assert info.flags - assert isinstance(info.summary, basestring) - if info.old_commit is not None: - assert isinstance(info.old_commit, Commit) - if info.flags & info.ERROR: - has_one = False - for bitflag in (info.REJECTED, info.REMOTE_REJECTED, info.REMOTE_FAILURE): - has_one |= bool(info.flags & bitflag) - # END for each bitflag - assert has_one - else: - # there must be a remote commit - if info.flags & info.DELETED == 0: - assert isinstance(info.local_ref, Reference) - else: - assert info.local_ref is None - assert type(info.remote_ref) in (TagReference, RemoteReference) - # END error checking - # END for each info - - - def _do_test_fetch_info(self, repo): - self.failUnlessRaises(ValueError, FetchInfo._from_line, repo, "nonsense", '') - self.failUnlessRaises(ValueError, FetchInfo._from_line, repo, "? [up to date] 0.1.7RC -> origin/0.1.7RC", '') - - def _commit_random_file(self, repo): - #Create a file with a random name and random data and commit it to repo. - # Return the commited absolute file path - index = repo.index - new_file = self._make_file(os.path.basename(tempfile.mktemp()),str(random.random()), repo) - index.add([new_file]) - index.commit("Committing %s" % new_file) - return new_file - - def _do_test_fetch(self,remote, rw_repo, remote_repo): - # specialized fetch testing to de-clutter the main test - self._do_test_fetch_info(rw_repo) - - def fetch_and_test(remote, **kwargs): - progress = TestRemoteProgress() - kwargs['progress'] = progress - res = remote.fetch(**kwargs) - progress.make_assertion() - self._do_test_fetch_result(res, remote) - return res - # END fetch and check - - def get_info(res, remote, name): - return res["%s/%s"%(remote,name)] - - # put remote head to master as it is garantueed to exist - remote_repo.head.reference = remote_repo.heads.master - - res = fetch_and_test(remote) - # all uptodate - for info in res: - assert info.flags & info.HEAD_UPTODATE - - # rewind remote head to trigger rejection - # index must be false as remote is a bare repo - rhead = remote_repo.head - remote_commit = rhead.commit - rhead.reset("HEAD~2", index=False) - res = fetch_and_test(remote) - mkey = "%s/%s"%(remote,'master') - master_info = res[mkey] - assert master_info.flags & FetchInfo.FORCED_UPDATE and master_info.note is not None - - # normal fast forward - set head back to previous one - rhead.commit = remote_commit - res = fetch_and_test(remote) - assert res[mkey].flags & FetchInfo.FAST_FORWARD - - # new remote branch - new_remote_branch = Head.create(remote_repo, "new_branch") - res = fetch_and_test(remote) - new_branch_info = get_info(res, remote, new_remote_branch) - assert new_branch_info.flags & FetchInfo.NEW_HEAD - - # remote branch rename ( causes creation of a new one locally ) - new_remote_branch.rename("other_branch_name") - res = fetch_and_test(remote) - other_branch_info = get_info(res, remote, new_remote_branch) - assert other_branch_info.ref.commit == new_branch_info.ref.commit - - # remove new branch - Head.delete(new_remote_branch.repo, new_remote_branch) - res = fetch_and_test(remote) - # deleted remote will not be fetched - self.failUnlessRaises(IndexError, get_info, res, remote, new_remote_branch) - - # prune stale tracking branches - stale_refs = remote.stale_refs - assert len(stale_refs) == 2 and isinstance(stale_refs[0], RemoteReference) - RemoteReference.delete(rw_repo, *stale_refs) - - # test single branch fetch with refspec including target remote - res = fetch_and_test(remote, refspec="master:refs/remotes/%s/master"%remote) - assert len(res) == 1 and get_info(res, remote, 'master') - - # ... with respec and no target - res = fetch_and_test(remote, refspec='master') - assert len(res) == 1 - - # add new tag reference - rtag = TagReference.create(remote_repo, "1.0-RV_hello.there") - res = fetch_and_test(remote, tags=True) - tinfo = res[str(rtag)] - assert isinstance(tinfo.ref, TagReference) and tinfo.ref.commit == rtag.commit - assert tinfo.flags & tinfo.NEW_TAG - - # adjust tag commit - Reference.set_object(rtag, rhead.commit.parents[0].parents[0]) - res = fetch_and_test(remote, tags=True) - tinfo = res[str(rtag)] - assert tinfo.commit == rtag.commit - assert tinfo.flags & tinfo.TAG_UPDATE - - # delete remote tag - local one will stay - TagReference.delete(remote_repo, rtag) - res = fetch_and_test(remote, tags=True) - self.failUnlessRaises(IndexError, get_info, res, remote, str(rtag)) - - # provoke to receive actual objects to see what kind of output we have to - # expect. For that we need a remote transport protocol - # Create a new UN-shared repo and fetch into it after we pushed a change - # to the shared repo - other_repo_dir = tempfile.mktemp("other_repo") - # must clone with a local path for the repo implementation not to freak out - # as it wants local paths only ( which I can understand ) - other_repo = remote_repo.clone(other_repo_dir, shared=False) - remote_repo_url = "git://localhost%s"%remote_repo.git_dir - - # put origin to git-url - other_origin = other_repo.remotes.origin - other_origin.config_writer.set("url", remote_repo_url) - # it automatically creates alternates as remote_repo is shared as well. - # It will use the transport though and ignore alternates when fetching - # assert not other_repo.alternates # this would fail - - # assure we are in the right state - rw_repo.head.reset(remote.refs.master, working_tree=True) - try: - self._commit_random_file(rw_repo) - remote.push(rw_repo.head.reference) - - # here I would expect to see remote-information about packing - # objects and so on. Unfortunately, this does not happen - # if we are redirecting the output - git explicitly checks for this - # and only provides progress information to ttys - res = fetch_and_test(other_origin) - finally: - shutil.rmtree(other_repo_dir) - # END test and cleanup - - def _assert_push_and_pull(self,remote, rw_repo, remote_repo): - # push our changes - lhead = rw_repo.head - lindex = rw_repo.index - # assure we are on master and it is checked out where the remote is - try: - lhead.reference = rw_repo.heads.master - except AttributeError: - # if the author is on a non-master branch, the clones might not have - # a local master yet. We simply create it - lhead.reference = rw_repo.create_head('master') - # END master handling - lhead.reset(remote.refs.master, working_tree=True) - - # push without spec should fail ( without further configuration ) - # well, works nicely - # self.failUnlessRaises(GitCommandError, remote.push) - - # simple file push - self._commit_random_file(rw_repo) - progress = TestRemoteProgress() - res = remote.push(lhead.reference, progress) - assert isinstance(res, IterableList) - self._do_test_push_result(res, remote) - progress.make_assertion() - - # rejected - undo last commit - lhead.reset("HEAD~1") - res = remote.push(lhead.reference) - assert res[0].flags & PushInfo.ERROR - assert res[0].flags & PushInfo.REJECTED - self._do_test_push_result(res, remote) - - # force rejected pull - res = remote.push('+%s' % lhead.reference) - assert res[0].flags & PushInfo.ERROR == 0 - assert res[0].flags & PushInfo.FORCED_UPDATE - self._do_test_push_result(res, remote) - - # invalid refspec - res = remote.push("hellothere") - assert len(res) == 0 - - # push new tags - progress = TestRemoteProgress() - to_be_updated = "my_tag.1.0RV" - new_tag = TagReference.create(rw_repo, to_be_updated) - other_tag = TagReference.create(rw_repo, "my_obj_tag.2.1aRV", message="my message") - res = remote.push(progress=progress, tags=True) - assert res[-1].flags & PushInfo.NEW_TAG - progress.make_assertion() - self._do_test_push_result(res, remote) - - # update push new tags - # Rejection is default - new_tag = TagReference.create(rw_repo, to_be_updated, ref='HEAD~1', force=True) - res = remote.push(tags=True) - self._do_test_push_result(res, remote) - assert res[-1].flags & PushInfo.REJECTED and res[-1].flags & PushInfo.ERROR - - # push force this tag - res = remote.push("+%s" % new_tag.path) - assert res[-1].flags & PushInfo.ERROR == 0 and res[-1].flags & PushInfo.FORCED_UPDATE - - # delete tag - have to do it using refspec - res = remote.push(":%s" % new_tag.path) - self._do_test_push_result(res, remote) - assert res[0].flags & PushInfo.DELETED - # Currently progress is not properly transferred, especially not using - # the git daemon - # progress.assert_received_message() - - # push new branch - new_head = Head.create(rw_repo, "my_new_branch") - progress = TestRemoteProgress() - res = remote.push(new_head, progress) - assert res[0].flags & PushInfo.NEW_HEAD - progress.make_assertion() - self._do_test_push_result(res, remote) - - # delete new branch on the remote end and locally - res = remote.push(":%s" % new_head.path) - self._do_test_push_result(res, remote) - Head.delete(rw_repo, new_head) - assert res[-1].flags & PushInfo.DELETED - - # --all - res = remote.push(all=True) - self._do_test_push_result(res, remote) - - remote.pull('master') - - # cleanup - delete created tags and branches as we are in an innerloop on - # the same repository - TagReference.delete(rw_repo, new_tag, other_tag) - remote.push(":%s" % other_tag.path) - - @with_rw_and_rw_remote_repo('0.1.6') - def test_base(self, rw_repo, remote_repo): - num_remotes = 0 - remote_set = set() - ran_fetch_test = False - - for remote in rw_repo.remotes: - num_remotes += 1 - assert remote == remote - assert str(remote) != repr(remote) - remote_set.add(remote) - remote_set.add(remote) # should already exist - - # REFS - refs = remote.refs - assert refs - for ref in refs: - assert ref.remote_name == remote.name - assert ref.remote_head - # END for each ref - - # OPTIONS - # cannot use 'fetch' key anymore as it is now a method - for opt in ("url", ): - val = getattr(remote, opt) - reader = remote.config_reader - assert reader.get(opt) == val - assert reader.get_value(opt, None) == val - - # unable to write with a reader - self.failUnlessRaises(IOError, reader.set, opt, "test") - - # change value - writer = remote.config_writer - new_val = "myval" - writer.set(opt, new_val) - assert writer.get(opt) == new_val - writer.set(opt, val) - assert writer.get(opt) == val - del(writer) - assert getattr(remote, opt) == val - # END for each default option key - - # RENAME - other_name = "totally_other_name" - prev_name = remote.name - assert remote.rename(other_name) == remote - assert prev_name != remote.name - # multiple times - for time in range(2): - assert remote.rename(prev_name).name == prev_name - # END for each rename ( back to prev_name ) - - # PUSH/PULL TESTING - self._assert_push_and_pull(remote, rw_repo, remote_repo) - - # FETCH TESTING - # Only for remotes - local cases are the same or less complicated - # as additional progress information will never be emitted - if remote.name == "daemon_origin": - self._do_test_fetch(remote, rw_repo, remote_repo) - ran_fetch_test = True - # END fetch test - - remote.update() - # END for each remote - - assert ran_fetch_test - assert num_remotes - assert num_remotes == len(remote_set) - - origin = rw_repo.remote('origin') - assert origin == rw_repo.remotes.origin - - @with_rw_repo('HEAD', bare=True) - def test_creation_and_removal(self, bare_rw_repo): - new_name = "test_new_one" - arg_list = (new_name, "git@server:hello.git") - remote = Remote.create(bare_rw_repo, *arg_list ) - assert remote.name == "test_new_one" - assert remote in bare_rw_repo.remotes - - # create same one again - self.failUnlessRaises(GitCommandError, Remote.create, bare_rw_repo, *arg_list) - - Remote.remove(bare_rw_repo, new_name) - - for remote in bare_rw_repo.remotes: - if remote.name == new_name: - raise AssertionError("Remote removal failed") - # END if deleted remote matches existing remote's name - # END for each remote - - def test_fetch_info(self): - # assure we can handle remote-tracking branches - fetch_info_line_fmt = "c437ee5deb8d00cf02f03720693e4c802e99f390 not-for-merge %s '0.3' of git://github.com/gitpython-developers/GitPython" - remote_info_line_fmt = "* [new branch] nomatter -> %s" - fi = FetchInfo._from_line(self.rorepo, - remote_info_line_fmt % "local/master", - fetch_info_line_fmt % 'remote-tracking branch') - assert fi.ref.is_valid() - assert fi.ref.commit - - # handles non-default refspecs: One can specify a different path in refs/remotes - # or a special path just in refs/something for instance - - fi = FetchInfo._from_line(self.rorepo, - remote_info_line_fmt % "subdir/tagname", - fetch_info_line_fmt % 'tag') - - assert isinstance(fi.ref, TagReference) - assert fi.ref.path.startswith('refs/tags') - - # it could be in a remote direcftory though - fi = FetchInfo._from_line(self.rorepo, - remote_info_line_fmt % "remotename/tags/tagname", - fetch_info_line_fmt % 'tag') - - assert isinstance(fi.ref, TagReference) - assert fi.ref.path.startswith('refs/remotes/') - - # it can also be anywhere ! - tag_path = "refs/something/remotename/tags/tagname" - fi = FetchInfo._from_line(self.rorepo, - remote_info_line_fmt % tag_path, - fetch_info_line_fmt % 'tag') - - assert isinstance(fi.ref, TagReference) - assert fi.ref.path == tag_path - - # branches default to refs/remotes - fi = FetchInfo._from_line(self.rorepo, - remote_info_line_fmt % "remotename/branch", - fetch_info_line_fmt % 'branch') - - assert isinstance(fi.ref, RemoteReference) - assert fi.ref.remote_name == 'remotename' - - # but you can force it anywhere, in which case we only have a references - fi = FetchInfo._from_line(self.rorepo, - remote_info_line_fmt % "refs/something/branch", - fetch_info_line_fmt % 'branch') - - assert type(fi.ref) is Reference - assert fi.ref.path == "refs/something/branch" - - + + def _print_fetchhead(self, repo): + fp = open(os.path.join(repo.git_dir, "FETCH_HEAD")) + fp.close() + + + def _do_test_fetch_result(self, results, remote): + # self._print_fetchhead(remote.repo) + assert len(results) > 0 and isinstance(results[0], FetchInfo) + for info in results: + assert isinstance(info.note, basestring) + if isinstance(info.ref, Reference): + assert info.flags != 0 + # END reference type flags handling + assert isinstance(info.ref, (SymbolicReference, Reference)) + if info.flags & (info.FORCED_UPDATE|info.FAST_FORWARD): + assert isinstance(info.old_commit, Commit) + else: + assert info.old_commit is None + # END forced update checking + # END for each info + + def _do_test_push_result(self, results, remote): + assert len(results) > 0 and isinstance(results[0], PushInfo) + for info in results: + assert info.flags + assert isinstance(info.summary, basestring) + if info.old_commit is not None: + assert isinstance(info.old_commit, Commit) + if info.flags & info.ERROR: + has_one = False + for bitflag in (info.REJECTED, info.REMOTE_REJECTED, info.REMOTE_FAILURE): + has_one |= bool(info.flags & bitflag) + # END for each bitflag + assert has_one + else: + # there must be a remote commit + if info.flags & info.DELETED == 0: + assert isinstance(info.local_ref, Reference) + else: + assert info.local_ref is None + assert type(info.remote_ref) in (TagReference, RemoteReference) + # END error checking + # END for each info + + + def _do_test_fetch_info(self, repo): + self.failUnlessRaises(ValueError, FetchInfo._from_line, repo, "nonsense", '') + self.failUnlessRaises(ValueError, FetchInfo._from_line, repo, "? [up to date] 0.1.7RC -> origin/0.1.7RC", '') + + def _commit_random_file(self, repo): + #Create a file with a random name and random data and commit it to repo. + # Return the commited absolute file path + index = repo.index + new_file = self._make_file(os.path.basename(tempfile.mktemp()),str(random.random()), repo) + index.add([new_file]) + index.commit("Committing %s" % new_file) + return new_file + + def _do_test_fetch(self,remote, rw_repo, remote_repo): + # specialized fetch testing to de-clutter the main test + self._do_test_fetch_info(rw_repo) + + def fetch_and_test(remote, **kwargs): + progress = TestRemoteProgress() + kwargs['progress'] = progress + res = remote.fetch(**kwargs) + progress.make_assertion() + self._do_test_fetch_result(res, remote) + return res + # END fetch and check + + def get_info(res, remote, name): + return res["%s/%s"%(remote,name)] + + # put remote head to master as it is garantueed to exist + remote_repo.head.reference = remote_repo.heads.master + + res = fetch_and_test(remote) + # all uptodate + for info in res: + assert info.flags & info.HEAD_UPTODATE + + # rewind remote head to trigger rejection + # index must be false as remote is a bare repo + rhead = remote_repo.head + remote_commit = rhead.commit + rhead.reset("HEAD~2", index=False) + res = fetch_and_test(remote) + mkey = "%s/%s"%(remote,'master') + master_info = res[mkey] + assert master_info.flags & FetchInfo.FORCED_UPDATE and master_info.note is not None + + # normal fast forward - set head back to previous one + rhead.commit = remote_commit + res = fetch_and_test(remote) + assert res[mkey].flags & FetchInfo.FAST_FORWARD + + # new remote branch + new_remote_branch = Head.create(remote_repo, "new_branch") + res = fetch_and_test(remote) + new_branch_info = get_info(res, remote, new_remote_branch) + assert new_branch_info.flags & FetchInfo.NEW_HEAD + + # remote branch rename ( causes creation of a new one locally ) + new_remote_branch.rename("other_branch_name") + res = fetch_and_test(remote) + other_branch_info = get_info(res, remote, new_remote_branch) + assert other_branch_info.ref.commit == new_branch_info.ref.commit + + # remove new branch + Head.delete(new_remote_branch.repo, new_remote_branch) + res = fetch_and_test(remote) + # deleted remote will not be fetched + self.failUnlessRaises(IndexError, get_info, res, remote, new_remote_branch) + + # prune stale tracking branches + stale_refs = remote.stale_refs + assert len(stale_refs) == 2 and isinstance(stale_refs[0], RemoteReference) + RemoteReference.delete(rw_repo, *stale_refs) + + # test single branch fetch with refspec including target remote + res = fetch_and_test(remote, refspec="master:refs/remotes/%s/master"%remote) + assert len(res) == 1 and get_info(res, remote, 'master') + + # ... with respec and no target + res = fetch_and_test(remote, refspec='master') + assert len(res) == 1 + + # add new tag reference + rtag = TagReference.create(remote_repo, "1.0-RV_hello.there") + res = fetch_and_test(remote, tags=True) + tinfo = res[str(rtag)] + assert isinstance(tinfo.ref, TagReference) and tinfo.ref.commit == rtag.commit + assert tinfo.flags & tinfo.NEW_TAG + + # adjust tag commit + Reference.set_object(rtag, rhead.commit.parents[0].parents[0]) + res = fetch_and_test(remote, tags=True) + tinfo = res[str(rtag)] + assert tinfo.commit == rtag.commit + assert tinfo.flags & tinfo.TAG_UPDATE + + # delete remote tag - local one will stay + TagReference.delete(remote_repo, rtag) + res = fetch_and_test(remote, tags=True) + self.failUnlessRaises(IndexError, get_info, res, remote, str(rtag)) + + # provoke to receive actual objects to see what kind of output we have to + # expect. For that we need a remote transport protocol + # Create a new UN-shared repo and fetch into it after we pushed a change + # to the shared repo + other_repo_dir = tempfile.mktemp("other_repo") + # must clone with a local path for the repo implementation not to freak out + # as it wants local paths only ( which I can understand ) + other_repo = remote_repo.clone(other_repo_dir, shared=False) + remote_repo_url = "git://localhost%s"%remote_repo.git_dir + + # put origin to git-url + other_origin = other_repo.remotes.origin + other_origin.config_writer.set("url", remote_repo_url) + # it automatically creates alternates as remote_repo is shared as well. + # It will use the transport though and ignore alternates when fetching + # assert not other_repo.alternates # this would fail + + # assure we are in the right state + rw_repo.head.reset(remote.refs.master, working_tree=True) + try: + self._commit_random_file(rw_repo) + remote.push(rw_repo.head.reference) + + # here I would expect to see remote-information about packing + # objects and so on. Unfortunately, this does not happen + # if we are redirecting the output - git explicitly checks for this + # and only provides progress information to ttys + res = fetch_and_test(other_origin) + finally: + shutil.rmtree(other_repo_dir) + # END test and cleanup + + def _assert_push_and_pull(self,remote, rw_repo, remote_repo): + # push our changes + lhead = rw_repo.head + lindex = rw_repo.index + # assure we are on master and it is checked out where the remote is + try: + lhead.reference = rw_repo.heads.master + except AttributeError: + # if the author is on a non-master branch, the clones might not have + # a local master yet. We simply create it + lhead.reference = rw_repo.create_head('master') + # END master handling + lhead.reset(remote.refs.master, working_tree=True) + + # push without spec should fail ( without further configuration ) + # well, works nicely + # self.failUnlessRaises(GitCommandError, remote.push) + + # simple file push + self._commit_random_file(rw_repo) + progress = TestRemoteProgress() + res = remote.push(lhead.reference, progress) + assert isinstance(res, IterableList) + self._do_test_push_result(res, remote) + progress.make_assertion() + + # rejected - undo last commit + lhead.reset("HEAD~1") + res = remote.push(lhead.reference) + assert res[0].flags & PushInfo.ERROR + assert res[0].flags & PushInfo.REJECTED + self._do_test_push_result(res, remote) + + # force rejected pull + res = remote.push('+%s' % lhead.reference) + assert res[0].flags & PushInfo.ERROR == 0 + assert res[0].flags & PushInfo.FORCED_UPDATE + self._do_test_push_result(res, remote) + + # invalid refspec + res = remote.push("hellothere") + assert len(res) == 0 + + # push new tags + progress = TestRemoteProgress() + to_be_updated = "my_tag.1.0RV" + new_tag = TagReference.create(rw_repo, to_be_updated) + other_tag = TagReference.create(rw_repo, "my_obj_tag.2.1aRV", message="my message") + res = remote.push(progress=progress, tags=True) + assert res[-1].flags & PushInfo.NEW_TAG + progress.make_assertion() + self._do_test_push_result(res, remote) + + # update push new tags + # Rejection is default + new_tag = TagReference.create(rw_repo, to_be_updated, ref='HEAD~1', force=True) + res = remote.push(tags=True) + self._do_test_push_result(res, remote) + assert res[-1].flags & PushInfo.REJECTED and res[-1].flags & PushInfo.ERROR + + # push force this tag + res = remote.push("+%s" % new_tag.path) + assert res[-1].flags & PushInfo.ERROR == 0 and res[-1].flags & PushInfo.FORCED_UPDATE + + # delete tag - have to do it using refspec + res = remote.push(":%s" % new_tag.path) + self._do_test_push_result(res, remote) + assert res[0].flags & PushInfo.DELETED + # Currently progress is not properly transferred, especially not using + # the git daemon + # progress.assert_received_message() + + # push new branch + new_head = Head.create(rw_repo, "my_new_branch") + progress = TestRemoteProgress() + res = remote.push(new_head, progress) + assert res[0].flags & PushInfo.NEW_HEAD + progress.make_assertion() + self._do_test_push_result(res, remote) + + # delete new branch on the remote end and locally + res = remote.push(":%s" % new_head.path) + self._do_test_push_result(res, remote) + Head.delete(rw_repo, new_head) + assert res[-1].flags & PushInfo.DELETED + + # --all + res = remote.push(all=True) + self._do_test_push_result(res, remote) + + remote.pull('master') + + # cleanup - delete created tags and branches as we are in an innerloop on + # the same repository + TagReference.delete(rw_repo, new_tag, other_tag) + remote.push(":%s" % other_tag.path) + + @with_rw_and_rw_remote_repo('0.1.6') + def test_base(self, rw_repo, remote_repo): + num_remotes = 0 + remote_set = set() + ran_fetch_test = False + + for remote in rw_repo.remotes: + num_remotes += 1 + assert remote == remote + assert str(remote) != repr(remote) + remote_set.add(remote) + remote_set.add(remote) # should already exist + + # REFS + refs = remote.refs + assert refs + for ref in refs: + assert ref.remote_name == remote.name + assert ref.remote_head + # END for each ref + + # OPTIONS + # cannot use 'fetch' key anymore as it is now a method + for opt in ("url", ): + val = getattr(remote, opt) + reader = remote.config_reader + assert reader.get(opt) == val + assert reader.get_value(opt, None) == val + + # unable to write with a reader + self.failUnlessRaises(IOError, reader.set, opt, "test") + + # change value + writer = remote.config_writer + new_val = "myval" + writer.set(opt, new_val) + assert writer.get(opt) == new_val + writer.set(opt, val) + assert writer.get(opt) == val + del(writer) + assert getattr(remote, opt) == val + # END for each default option key + + # RENAME + other_name = "totally_other_name" + prev_name = remote.name + assert remote.rename(other_name) == remote + assert prev_name != remote.name + # multiple times + for time in range(2): + assert remote.rename(prev_name).name == prev_name + # END for each rename ( back to prev_name ) + + # PUSH/PULL TESTING + self._assert_push_and_pull(remote, rw_repo, remote_repo) + + # FETCH TESTING + # Only for remotes - local cases are the same or less complicated + # as additional progress information will never be emitted + if remote.name == "daemon_origin": + self._do_test_fetch(remote, rw_repo, remote_repo) + ran_fetch_test = True + # END fetch test + + remote.update() + # END for each remote + + assert ran_fetch_test + assert num_remotes + assert num_remotes == len(remote_set) + + origin = rw_repo.remote('origin') + assert origin == rw_repo.remotes.origin + + @with_rw_repo('HEAD', bare=True) + def test_creation_and_removal(self, bare_rw_repo): + new_name = "test_new_one" + arg_list = (new_name, "git@server:hello.git") + remote = Remote.create(bare_rw_repo, *arg_list ) + assert remote.name == "test_new_one" + assert remote in bare_rw_repo.remotes + + # create same one again + self.failUnlessRaises(GitCommandError, Remote.create, bare_rw_repo, *arg_list) + + Remote.remove(bare_rw_repo, new_name) + + for remote in bare_rw_repo.remotes: + if remote.name == new_name: + raise AssertionError("Remote removal failed") + # END if deleted remote matches existing remote's name + # END for each remote + + def test_fetch_info(self): + # assure we can handle remote-tracking branches + fetch_info_line_fmt = "c437ee5deb8d00cf02f03720693e4c802e99f390 not-for-merge %s '0.3' of git://github.com/gitpython-developers/GitPython" + remote_info_line_fmt = "* [new branch] nomatter -> %s" + fi = FetchInfo._from_line(self.rorepo, + remote_info_line_fmt % "local/master", + fetch_info_line_fmt % 'remote-tracking branch') + assert fi.ref.is_valid() + assert fi.ref.commit + + # handles non-default refspecs: One can specify a different path in refs/remotes + # or a special path just in refs/something for instance + + fi = FetchInfo._from_line(self.rorepo, + remote_info_line_fmt % "subdir/tagname", + fetch_info_line_fmt % 'tag') + + assert isinstance(fi.ref, TagReference) + assert fi.ref.path.startswith('refs/tags') + + # it could be in a remote direcftory though + fi = FetchInfo._from_line(self.rorepo, + remote_info_line_fmt % "remotename/tags/tagname", + fetch_info_line_fmt % 'tag') + + assert isinstance(fi.ref, TagReference) + assert fi.ref.path.startswith('refs/remotes/') + + # it can also be anywhere ! + tag_path = "refs/something/remotename/tags/tagname" + fi = FetchInfo._from_line(self.rorepo, + remote_info_line_fmt % tag_path, + fetch_info_line_fmt % 'tag') + + assert isinstance(fi.ref, TagReference) + assert fi.ref.path == tag_path + + # branches default to refs/remotes + fi = FetchInfo._from_line(self.rorepo, + remote_info_line_fmt % "remotename/branch", + fetch_info_line_fmt % 'branch') + + assert isinstance(fi.ref, RemoteReference) + assert fi.ref.remote_name == 'remotename' + + # but you can force it anywhere, in which case we only have a references + fi = FetchInfo._from_line(self.rorepo, + remote_info_line_fmt % "refs/something/branch", + fetch_info_line_fmt % 'branch') + + assert type(fi.ref) is Reference + assert fi.ref.path == "refs/something/branch" + + diff --git a/git/test/test_repo.py b/git/test/test_repo.py index 18d5c1b8..6b6fdb4a 100644 --- a/git/test/test_repo.py +++ b/git/test/test_repo.py @@ -16,603 +16,603 @@ from cStringIO import StringIO class TestRepo(TestBase): - - @raises(InvalidGitRepositoryError) - def test_new_should_raise_on_invalid_repo_location(self): - Repo(tempfile.gettempdir()) + + @raises(InvalidGitRepositoryError) + def test_new_should_raise_on_invalid_repo_location(self): + Repo(tempfile.gettempdir()) - @raises(NoSuchPathError) - def test_new_should_raise_on_non_existant_path(self): - Repo("repos/foobar") + @raises(NoSuchPathError) + def test_new_should_raise_on_non_existant_path(self): + Repo("repos/foobar") - def test_repo_creation_from_different_paths(self): - r_from_gitdir = Repo(self.rorepo.git_dir) - assert r_from_gitdir.git_dir == self.rorepo.git_dir - assert r_from_gitdir.git_dir.endswith('.git') - assert not self.rorepo.git.working_dir.endswith('.git') - assert r_from_gitdir.git.working_dir == self.rorepo.git.working_dir + def test_repo_creation_from_different_paths(self): + r_from_gitdir = Repo(self.rorepo.git_dir) + assert r_from_gitdir.git_dir == self.rorepo.git_dir + assert r_from_gitdir.git_dir.endswith('.git') + assert not self.rorepo.git.working_dir.endswith('.git') + assert r_from_gitdir.git.working_dir == self.rorepo.git.working_dir - def test_description(self): - txt = "Test repository" - self.rorepo.description = txt - assert_equal(self.rorepo.description, txt) + def test_description(self): + txt = "Test repository" + self.rorepo.description = txt + assert_equal(self.rorepo.description, txt) - def test_heads_should_return_array_of_head_objects(self): - for head in self.rorepo.heads: - assert_equal(Head, head.__class__) + def test_heads_should_return_array_of_head_objects(self): + for head in self.rorepo.heads: + assert_equal(Head, head.__class__) - def test_heads_should_populate_head_data(self): - for head in self.rorepo.heads: - assert head.name - assert isinstance(head.commit,Commit) - # END for each head - - assert isinstance(self.rorepo.heads.master, Head) - assert isinstance(self.rorepo.heads['master'], Head) - - def test_tree_from_revision(self): - tree = self.rorepo.tree('0.1.6') - assert len(tree.hexsha) == 40 - assert tree.type == "tree" - assert self.rorepo.tree(tree) == tree - - # try from invalid revision that does not exist - self.failUnlessRaises(BadObject, self.rorepo.tree, 'hello world') - - def test_commit_from_revision(self): - commit = self.rorepo.commit('0.1.4') - assert commit.type == 'commit' - assert self.rorepo.commit(commit) == commit + def test_heads_should_populate_head_data(self): + for head in self.rorepo.heads: + assert head.name + assert isinstance(head.commit,Commit) + # END for each head + + assert isinstance(self.rorepo.heads.master, Head) + assert isinstance(self.rorepo.heads['master'], Head) + + def test_tree_from_revision(self): + tree = self.rorepo.tree('0.1.6') + assert len(tree.hexsha) == 40 + assert tree.type == "tree" + assert self.rorepo.tree(tree) == tree + + # try from invalid revision that does not exist + self.failUnlessRaises(BadObject, self.rorepo.tree, 'hello world') + + def test_commit_from_revision(self): + commit = self.rorepo.commit('0.1.4') + assert commit.type == 'commit' + assert self.rorepo.commit(commit) == commit - def test_commits(self): - mc = 10 - commits = list(self.rorepo.iter_commits('0.1.6', max_count=mc)) - assert len(commits) == mc - - c = commits[0] - assert_equal('9a4b1d4d11eee3c5362a4152216376e634bd14cf', c.hexsha) - assert_equal(["c76852d0bff115720af3f27acdb084c59361e5f6"], [p.hexsha for p in c.parents]) - assert_equal("ce41fc29549042f1aa09cc03174896cf23f112e3", c.tree.hexsha) - assert_equal("Michael Trier", c.author.name) - assert_equal("mtrier@gmail.com", c.author.email) - assert_equal(1232829715, c.authored_date) - assert_equal(5*3600, c.author_tz_offset) - assert_equal("Michael Trier", c.committer.name) - assert_equal("mtrier@gmail.com", c.committer.email) - assert_equal(1232829715, c.committed_date) - assert_equal(5*3600, c.committer_tz_offset) - assert_equal("Bumped version 0.1.6\n", c.message) + def test_commits(self): + mc = 10 + commits = list(self.rorepo.iter_commits('0.1.6', max_count=mc)) + assert len(commits) == mc + + c = commits[0] + assert_equal('9a4b1d4d11eee3c5362a4152216376e634bd14cf', c.hexsha) + assert_equal(["c76852d0bff115720af3f27acdb084c59361e5f6"], [p.hexsha for p in c.parents]) + assert_equal("ce41fc29549042f1aa09cc03174896cf23f112e3", c.tree.hexsha) + assert_equal("Michael Trier", c.author.name) + assert_equal("mtrier@gmail.com", c.author.email) + assert_equal(1232829715, c.authored_date) + assert_equal(5*3600, c.author_tz_offset) + assert_equal("Michael Trier", c.committer.name) + assert_equal("mtrier@gmail.com", c.committer.email) + assert_equal(1232829715, c.committed_date) + assert_equal(5*3600, c.committer_tz_offset) + assert_equal("Bumped version 0.1.6\n", c.message) - c = commits[1] - assert isinstance(c.parents, tuple) + c = commits[1] + assert isinstance(c.parents, tuple) - def test_trees(self): - mc = 30 - num_trees = 0 - for tree in self.rorepo.iter_trees('0.1.5', max_count=mc): - num_trees += 1 - assert isinstance(tree, Tree) - # END for each tree - assert num_trees == mc + def test_trees(self): + mc = 30 + num_trees = 0 + for tree in self.rorepo.iter_trees('0.1.5', max_count=mc): + num_trees += 1 + assert isinstance(tree, Tree) + # END for each tree + assert num_trees == mc - def _assert_empty_repo(self, repo): - # test all kinds of things with an empty, freshly initialized repo. - # It should throw good errors - - # entries should be empty - assert len(repo.index.entries) == 0 - - # head is accessible - assert repo.head - assert repo.head.ref - assert not repo.head.is_valid() - - # we can change the head to some other ref - head_ref = Head.from_path(repo, Head.to_full_path('some_head')) - assert not head_ref.is_valid() - repo.head.ref = head_ref - - # is_dirty can handle all kwargs - for args in ((1, 0, 0), (0, 1, 0), (0, 0, 1)): - assert not repo.is_dirty(*args) - # END for each arg - - # we can add a file to the index ( if we are not bare ) - if not repo.bare: - pass - # END test repos with working tree - + def _assert_empty_repo(self, repo): + # test all kinds of things with an empty, freshly initialized repo. + # It should throw good errors + + # entries should be empty + assert len(repo.index.entries) == 0 + + # head is accessible + assert repo.head + assert repo.head.ref + assert not repo.head.is_valid() + + # we can change the head to some other ref + head_ref = Head.from_path(repo, Head.to_full_path('some_head')) + assert not head_ref.is_valid() + repo.head.ref = head_ref + + # is_dirty can handle all kwargs + for args in ((1, 0, 0), (0, 1, 0), (0, 0, 1)): + assert not repo.is_dirty(*args) + # END for each arg + + # we can add a file to the index ( if we are not bare ) + if not repo.bare: + pass + # END test repos with working tree + - def test_init(self): - prev_cwd = os.getcwd() - os.chdir(tempfile.gettempdir()) - git_dir_rela = "repos/foo/bar.git" - del_dir_abs = os.path.abspath("repos") - git_dir_abs = os.path.abspath(git_dir_rela) - try: - # with specific path - for path in (git_dir_rela, git_dir_abs): - r = Repo.init(path=path, bare=True) - assert isinstance(r, Repo) - assert r.bare == True - assert os.path.isdir(r.git_dir) - - self._assert_empty_repo(r) - - # test clone - clone_path = path + "_clone" - rc = r.clone(clone_path) - self._assert_empty_repo(rc) - - - try: - shutil.rmtree(clone_path) - except OSError: - # when relative paths are used, the clone may actually be inside - # of the parent directory - pass - # END exception handling - - # try again, this time with the absolute version - rc = Repo.clone_from(r.git_dir, clone_path) - self._assert_empty_repo(rc) - - shutil.rmtree(git_dir_abs) - try: - shutil.rmtree(clone_path) - except OSError: - # when relative paths are used, the clone may actually be inside - # of the parent directory - pass - # END exception handling - - # END for each path - - os.makedirs(git_dir_rela) - os.chdir(git_dir_rela) - r = Repo.init(bare=False) - r.bare == False - - self._assert_empty_repo(r) - finally: - try: - shutil.rmtree(del_dir_abs) - except OSError: - pass - os.chdir(prev_cwd) - # END restore previous state - - def test_bare_property(self): - self.rorepo.bare + def test_init(self): + prev_cwd = os.getcwd() + os.chdir(tempfile.gettempdir()) + git_dir_rela = "repos/foo/bar.git" + del_dir_abs = os.path.abspath("repos") + git_dir_abs = os.path.abspath(git_dir_rela) + try: + # with specific path + for path in (git_dir_rela, git_dir_abs): + r = Repo.init(path=path, bare=True) + assert isinstance(r, Repo) + assert r.bare == True + assert os.path.isdir(r.git_dir) + + self._assert_empty_repo(r) + + # test clone + clone_path = path + "_clone" + rc = r.clone(clone_path) + self._assert_empty_repo(rc) + + + try: + shutil.rmtree(clone_path) + except OSError: + # when relative paths are used, the clone may actually be inside + # of the parent directory + pass + # END exception handling + + # try again, this time with the absolute version + rc = Repo.clone_from(r.git_dir, clone_path) + self._assert_empty_repo(rc) + + shutil.rmtree(git_dir_abs) + try: + shutil.rmtree(clone_path) + except OSError: + # when relative paths are used, the clone may actually be inside + # of the parent directory + pass + # END exception handling + + # END for each path + + os.makedirs(git_dir_rela) + os.chdir(git_dir_rela) + r = Repo.init(bare=False) + r.bare == False + + self._assert_empty_repo(r) + finally: + try: + shutil.rmtree(del_dir_abs) + except OSError: + pass + os.chdir(prev_cwd) + # END restore previous state + + def test_bare_property(self): + self.rorepo.bare - def test_daemon_export(self): - orig_val = self.rorepo.daemon_export - self.rorepo.daemon_export = not orig_val - assert self.rorepo.daemon_export == ( not orig_val ) - self.rorepo.daemon_export = orig_val - assert self.rorepo.daemon_export == orig_val + def test_daemon_export(self): + orig_val = self.rorepo.daemon_export + self.rorepo.daemon_export = not orig_val + assert self.rorepo.daemon_export == ( not orig_val ) + self.rorepo.daemon_export = orig_val + assert self.rorepo.daemon_export == orig_val - def test_alternates(self): - cur_alternates = self.rorepo.alternates - # empty alternates - self.rorepo.alternates = [] - assert self.rorepo.alternates == [] - alts = [ "other/location", "this/location" ] - self.rorepo.alternates = alts - assert alts == self.rorepo.alternates - self.rorepo.alternates = cur_alternates + def test_alternates(self): + cur_alternates = self.rorepo.alternates + # empty alternates + self.rorepo.alternates = [] + assert self.rorepo.alternates == [] + alts = [ "other/location", "this/location" ] + self.rorepo.alternates = alts + assert alts == self.rorepo.alternates + self.rorepo.alternates = cur_alternates - def test_repr(self): - path = os.path.join(os.path.abspath(GIT_REPO), '.git') - assert_equal('<git.Repo "%s">' % path, repr(self.rorepo)) + def test_repr(self): + path = os.path.join(os.path.abspath(GIT_REPO), '.git') + assert_equal('<git.Repo "%s">' % path, repr(self.rorepo)) - def test_is_dirty_with_bare_repository(self): - orig_value = self.rorepo._bare - self.rorepo._bare = True - assert_false(self.rorepo.is_dirty()) - self.rorepo._bare = orig_value + def test_is_dirty_with_bare_repository(self): + orig_value = self.rorepo._bare + self.rorepo._bare = True + assert_false(self.rorepo.is_dirty()) + self.rorepo._bare = orig_value - def test_is_dirty(self): - self.rorepo._bare = False - for index in (0,1): - for working_tree in (0,1): - for untracked_files in (0,1): - assert self.rorepo.is_dirty(index, working_tree, untracked_files) in (True, False) - # END untracked files - # END working tree - # END index - orig_val = self.rorepo._bare - self.rorepo._bare = True - assert self.rorepo.is_dirty() == False - self.rorepo._bare = orig_val + def test_is_dirty(self): + self.rorepo._bare = False + for index in (0,1): + for working_tree in (0,1): + for untracked_files in (0,1): + assert self.rorepo.is_dirty(index, working_tree, untracked_files) in (True, False) + # END untracked files + # END working tree + # END index + orig_val = self.rorepo._bare + self.rorepo._bare = True + assert self.rorepo.is_dirty() == False + self.rorepo._bare = orig_val - def test_head(self): - assert self.rorepo.head.reference.object == self.rorepo.active_branch.object + def test_head(self): + assert self.rorepo.head.reference.object == self.rorepo.active_branch.object - def test_index(self): - index = self.rorepo.index - assert isinstance(index, IndexFile) - - def test_tag(self): - assert self.rorepo.tag('refs/tags/0.1.5').commit - - def test_archive(self): - tmpfile = os.tmpfile() - self.rorepo.archive(tmpfile, '0.1.5') - assert tmpfile.tell() - - @patch_object(Git, '_call_process') - def test_should_display_blame_information(self, git): - git.return_value = fixture('blame') - b = self.rorepo.blame( 'master', 'lib/git.py') - assert_equal(13, len(b)) - assert_equal( 2, len(b[0]) ) - # assert_equal(25, reduce(lambda acc, x: acc + len(x[-1]), b)) - assert_equal(hash(b[0][0]), hash(b[9][0])) - c = b[0][0] - assert_true(git.called) - assert_equal(git.call_args, (('blame', 'master', '--', 'lib/git.py'), {'p': True})) - - assert_equal('634396b2f541a9f2d58b00be1a07f0c358b999b3', c.hexsha) - assert_equal('Tom Preston-Werner', c.author.name) - assert_equal('tom@mojombo.com', c.author.email) - assert_equal(1191997100, c.authored_date) - assert_equal('Tom Preston-Werner', c.committer.name) - assert_equal('tom@mojombo.com', c.committer.email) - assert_equal(1191997100, c.committed_date) - assert_equal('initial grit setup', c.message) - - # test the 'lines per commit' entries - tlist = b[0][1] - assert_true( tlist ) - assert_true( isinstance( tlist[0], basestring ) ) - assert_true( len( tlist ) < sum( len(t) for t in tlist ) ) # test for single-char bug - - def test_blame_real(self): - c = 0 - for item in self.rorepo.head.commit.tree.traverse( - predicate=lambda i, d: i.type == 'blob' and i.path.endswith('.py')): - c += 1 - b = self.rorepo.blame(self.rorepo.head, item.path) - #END for each item to traverse - assert c - - def test_untracked_files(self): - base = self.rorepo.working_tree_dir - files = ( join_path_native(base, "__test_myfile"), - join_path_native(base, "__test_other_file") ) - num_recently_untracked = 0 - try: - for fpath in files: - fd = open(fpath,"wb") - fd.close() - # END for each filename - untracked_files = self.rorepo.untracked_files - num_recently_untracked = len(untracked_files) - - # assure we have all names - they are relative to the git-dir - num_test_untracked = 0 - for utfile in untracked_files: - num_test_untracked += join_path_native(base, utfile) in files - assert len(files) == num_test_untracked - finally: - for fpath in files: - if os.path.isfile(fpath): - os.remove(fpath) - # END handle files - - assert len(self.rorepo.untracked_files) == (num_recently_untracked - len(files)) - - def test_config_reader(self): - reader = self.rorepo.config_reader() # all config files - assert reader.read_only - reader = self.rorepo.config_reader("repository") # single config file - assert reader.read_only - - def test_config_writer(self): - for config_level in self.rorepo.config_level: - try: - writer = self.rorepo.config_writer(config_level) - assert not writer.read_only - except IOError: - # its okay not to get a writer for some configuration files if we - # have no permissions - pass - # END for each config level - - def test_creation_deletion(self): - # just a very quick test to assure it generally works. There are - # specialized cases in the test_refs module - head = self.rorepo.create_head("new_head", "HEAD~1") - self.rorepo.delete_head(head) - - tag = self.rorepo.create_tag("new_tag", "HEAD~2") - self.rorepo.delete_tag(tag) - self.rorepo.config_writer() - remote = self.rorepo.create_remote("new_remote", "git@server:repo.git") - self.rorepo.delete_remote(remote) - - def test_comparison_and_hash(self): - # this is only a preliminary test, more testing done in test_index - assert self.rorepo == self.rorepo and not (self.rorepo != self.rorepo) - assert len(set((self.rorepo, self.rorepo))) == 1 - - def test_git_cmd(self): - # test CatFileContentStream, just to be very sure we have no fencepost errors - # last \n is the terminating newline that it expects - l1 = "0123456789\n" - l2 = "abcdefghijklmnopqrstxy\n" - l3 = "z\n" - d = "%s%s%s\n" % (l1, l2, l3) - - l1p = l1[:5] - - # full size - # size is without terminating newline - def mkfull(): - return Git.CatFileContentStream(len(d)-1, StringIO(d)) - - ts = 5 - def mktiny(): - return Git.CatFileContentStream(ts, StringIO(d)) - - # readlines no limit - s = mkfull() - lines = s.readlines() - assert len(lines) == 3 and lines[-1].endswith('\n') - assert s._stream.tell() == len(d) # must have scrubbed to the end - - # realines line limit - s = mkfull() - lines = s.readlines(5) - assert len(lines) == 1 - - # readlines on tiny sections - s = mktiny() - lines = s.readlines() - assert len(lines) == 1 and lines[0] == l1p - assert s._stream.tell() == ts+1 - - # readline no limit - s = mkfull() - assert s.readline() == l1 - assert s.readline() == l2 - assert s.readline() == l3 - assert s.readline() == '' - assert s._stream.tell() == len(d) - - # readline limit - s = mkfull() - assert s.readline(5) == l1p - assert s.readline() == l1[5:] - - # readline on tiny section - s = mktiny() - assert s.readline() == l1p - assert s.readline() == '' - assert s._stream.tell() == ts+1 - - # read no limit - s = mkfull() - assert s.read() == d[:-1] - assert s.read() == '' - assert s._stream.tell() == len(d) - - # read limit - s = mkfull() - assert s.read(5) == l1p - assert s.read(6) == l1[5:] - assert s._stream.tell() == 5 + 6 # its not yet done - - # read tiny - s = mktiny() - assert s.read(2) == l1[:2] - assert s._stream.tell() == 2 - assert s.read() == l1[2:ts] - assert s._stream.tell() == ts+1 - - def _assert_rev_parse_types(self, name, rev_obj): - rev_parse = self.rorepo.rev_parse - - if rev_obj.type == 'tag': - rev_obj = rev_obj.object - - # tree and blob type - obj = rev_parse(name + '^{tree}') - assert obj == rev_obj.tree - - obj = rev_parse(name + ':CHANGES') - assert obj.type == 'blob' and obj.path == 'CHANGES' - assert rev_obj.tree['CHANGES'] == obj - - - def _assert_rev_parse(self, name): - """tries multiple different rev-parse syntaxes with the given name - :return: parsed object""" - rev_parse = self.rorepo.rev_parse - orig_obj = rev_parse(name) - if orig_obj.type == 'tag': - obj = orig_obj.object - else: - obj = orig_obj - # END deref tags by default - - # try history - rev = name + "~" - obj2 = rev_parse(rev) - assert obj2 == obj.parents[0] - self._assert_rev_parse_types(rev, obj2) - - # history with number - ni = 11 - history = [obj.parents[0]] - for pn in range(ni): - history.append(history[-1].parents[0]) - # END get given amount of commits - - for pn in range(11): - rev = name + "~%i" % (pn+1) - obj2 = rev_parse(rev) - assert obj2 == history[pn] - self._assert_rev_parse_types(rev, obj2) - # END history check - - # parent ( default ) - rev = name + "^" - obj2 = rev_parse(rev) - assert obj2 == obj.parents[0] - self._assert_rev_parse_types(rev, obj2) - - # parent with number - for pn, parent in enumerate(obj.parents): - rev = name + "^%i" % (pn+1) - assert rev_parse(rev) == parent - self._assert_rev_parse_types(rev, parent) - # END for each parent - - return orig_obj - - @with_rw_repo('HEAD', bare=False) - def test_rw_rev_parse(self, rwrepo): - # verify it does not confuse branches with hexsha ids - ahead = rwrepo.create_head('aaaaaaaa') - assert(rwrepo.rev_parse(str(ahead)) == ahead.commit) - - def test_rev_parse(self): - rev_parse = self.rorepo.rev_parse - - # try special case: This one failed at some point, make sure its fixed - assert rev_parse("33ebe").hexsha == "33ebe7acec14b25c5f84f35a664803fcab2f7781" - - # start from reference - num_resolved = 0 - - for ref in Reference.iter_items(self.rorepo): - path_tokens = ref.path.split("/") - for pt in range(len(path_tokens)): - path_section = '/'.join(path_tokens[-(pt+1):]) - try: - obj = self._assert_rev_parse(path_section) - assert obj.type == ref.object.type - num_resolved += 1 - except BadObject: - print "failed on %s" % path_section - # is fine, in case we have something like 112, which belongs to remotes/rname/merge-requests/112 - pass - # END exception handling - # END for each token - # END for each reference - assert num_resolved - - # it works with tags ! - tag = self._assert_rev_parse('0.1.4') - assert tag.type == 'tag' - - # try full sha directly ( including type conversion ) - assert tag.object == rev_parse(tag.object.hexsha) - self._assert_rev_parse_types(tag.object.hexsha, tag.object) - - - # multiple tree types result in the same tree: HEAD^{tree}^{tree}:CHANGES - rev = '0.1.4^{tree}^{tree}' - assert rev_parse(rev) == tag.object.tree - assert rev_parse(rev+':CHANGES') == tag.object.tree['CHANGES'] - - - # try to get parents from first revision - it should fail as no such revision - # exists - first_rev = "33ebe7acec14b25c5f84f35a664803fcab2f7781" - commit = rev_parse(first_rev) - assert len(commit.parents) == 0 - assert commit.hexsha == first_rev - self.failUnlessRaises(BadObject, rev_parse, first_rev+"~") - self.failUnlessRaises(BadObject, rev_parse, first_rev+"^") - - # short SHA1 - commit2 = rev_parse(first_rev[:20]) - assert commit2 == commit - commit2 = rev_parse(first_rev[:5]) - assert commit2 == commit - - - # todo: dereference tag into a blob 0.1.7^{blob} - quite a special one - # needs a tag which points to a blob - - - # ref^0 returns commit being pointed to, same with ref~0, and ^{} - tag = rev_parse('0.1.4') - for token in (('~0', '^0', '^{}')): - assert tag.object == rev_parse('0.1.4%s' % token) - # END handle multiple tokens - - # try partial parsing - max_items = 40 - for i, binsha in enumerate(self.rorepo.odb.sha_iter()): - assert rev_parse(bin_to_hex(binsha)[:8-(i%2)]).binsha == binsha - if i > max_items: - # this is rather slow currently, as rev_parse returns an object - # which requires accessing packs, it has some additional overhead - break - # END for each binsha in repo - - # missing closing brace commit^{tree - self.failUnlessRaises(ValueError, rev_parse, '0.1.4^{tree') - - # missing starting brace - self.failUnlessRaises(ValueError, rev_parse, '0.1.4^tree}') - - # REVLOG - ####### - head = self.rorepo.head - - # need to specify a ref when using the @ syntax - self.failUnlessRaises(BadObject, rev_parse, "%s@{0}" % head.commit.hexsha) - - # uses HEAD.ref by default - assert rev_parse('@{0}') == head.commit - if not head.is_detached: - refspec = '%s@{0}' % head.ref.name - assert rev_parse(refspec) == head.ref.commit - # all additional specs work as well - assert rev_parse(refspec+"^{tree}") == head.commit.tree - assert rev_parse(refspec+":CHANGES").type == 'blob' - #END operate on non-detached head - - # the last position - assert rev_parse('@{1}') != head.commit - - # position doesn't exist - self.failUnlessRaises(IndexError, rev_parse, '@{10000}') - - # currently, nothing more is supported - self.failUnlessRaises(NotImplementedError, rev_parse, "@{1 week ago}") - - def test_repo_odbtype(self): - target_type = GitDB - if sys.version_info[1] < 5: - target_type = GitCmdObjectDB - assert isinstance(self.rorepo.odb, target_type) - - def test_submodules(self): - assert len(self.rorepo.submodules) == 1 # non-recursive - assert len(list(self.rorepo.iter_submodules())) >= 2 - - assert isinstance(self.rorepo.submodule("gitdb"), Submodule) - self.failUnlessRaises(ValueError, self.rorepo.submodule, "doesn't exist") - - @with_rw_repo('HEAD', bare=False) - def test_submodule_update(self, rwrepo): - # fails in bare mode - rwrepo._bare = True - self.failUnlessRaises(InvalidGitRepositoryError, rwrepo.submodule_update) - rwrepo._bare = False - - # test create submodule - sm = rwrepo.submodules[0] - sm = rwrepo.create_submodule("my_new_sub", "some_path", join_path_native(self.rorepo.working_tree_dir, sm.path)) - assert isinstance(sm, Submodule) - - # note: the rest of this functionality is tested in test_submodule - - + def test_index(self): + index = self.rorepo.index + assert isinstance(index, IndexFile) + + def test_tag(self): + assert self.rorepo.tag('refs/tags/0.1.5').commit + + def test_archive(self): + tmpfile = os.tmpfile() + self.rorepo.archive(tmpfile, '0.1.5') + assert tmpfile.tell() + + @patch_object(Git, '_call_process') + def test_should_display_blame_information(self, git): + git.return_value = fixture('blame') + b = self.rorepo.blame( 'master', 'lib/git.py') + assert_equal(13, len(b)) + assert_equal( 2, len(b[0]) ) + # assert_equal(25, reduce(lambda acc, x: acc + len(x[-1]), b)) + assert_equal(hash(b[0][0]), hash(b[9][0])) + c = b[0][0] + assert_true(git.called) + assert_equal(git.call_args, (('blame', 'master', '--', 'lib/git.py'), {'p': True})) + + assert_equal('634396b2f541a9f2d58b00be1a07f0c358b999b3', c.hexsha) + assert_equal('Tom Preston-Werner', c.author.name) + assert_equal('tom@mojombo.com', c.author.email) + assert_equal(1191997100, c.authored_date) + assert_equal('Tom Preston-Werner', c.committer.name) + assert_equal('tom@mojombo.com', c.committer.email) + assert_equal(1191997100, c.committed_date) + assert_equal('initial grit setup', c.message) + + # test the 'lines per commit' entries + tlist = b[0][1] + assert_true( tlist ) + assert_true( isinstance( tlist[0], basestring ) ) + assert_true( len( tlist ) < sum( len(t) for t in tlist ) ) # test for single-char bug + + def test_blame_real(self): + c = 0 + for item in self.rorepo.head.commit.tree.traverse( + predicate=lambda i, d: i.type == 'blob' and i.path.endswith('.py')): + c += 1 + b = self.rorepo.blame(self.rorepo.head, item.path) + #END for each item to traverse + assert c + + def test_untracked_files(self): + base = self.rorepo.working_tree_dir + files = ( join_path_native(base, "__test_myfile"), + join_path_native(base, "__test_other_file") ) + num_recently_untracked = 0 + try: + for fpath in files: + fd = open(fpath,"wb") + fd.close() + # END for each filename + untracked_files = self.rorepo.untracked_files + num_recently_untracked = len(untracked_files) + + # assure we have all names - they are relative to the git-dir + num_test_untracked = 0 + for utfile in untracked_files: + num_test_untracked += join_path_native(base, utfile) in files + assert len(files) == num_test_untracked + finally: + for fpath in files: + if os.path.isfile(fpath): + os.remove(fpath) + # END handle files + + assert len(self.rorepo.untracked_files) == (num_recently_untracked - len(files)) + + def test_config_reader(self): + reader = self.rorepo.config_reader() # all config files + assert reader.read_only + reader = self.rorepo.config_reader("repository") # single config file + assert reader.read_only + + def test_config_writer(self): + for config_level in self.rorepo.config_level: + try: + writer = self.rorepo.config_writer(config_level) + assert not writer.read_only + except IOError: + # its okay not to get a writer for some configuration files if we + # have no permissions + pass + # END for each config level + + def test_creation_deletion(self): + # just a very quick test to assure it generally works. There are + # specialized cases in the test_refs module + head = self.rorepo.create_head("new_head", "HEAD~1") + self.rorepo.delete_head(head) + + tag = self.rorepo.create_tag("new_tag", "HEAD~2") + self.rorepo.delete_tag(tag) + self.rorepo.config_writer() + remote = self.rorepo.create_remote("new_remote", "git@server:repo.git") + self.rorepo.delete_remote(remote) + + def test_comparison_and_hash(self): + # this is only a preliminary test, more testing done in test_index + assert self.rorepo == self.rorepo and not (self.rorepo != self.rorepo) + assert len(set((self.rorepo, self.rorepo))) == 1 + + def test_git_cmd(self): + # test CatFileContentStream, just to be very sure we have no fencepost errors + # last \n is the terminating newline that it expects + l1 = "0123456789\n" + l2 = "abcdefghijklmnopqrstxy\n" + l3 = "z\n" + d = "%s%s%s\n" % (l1, l2, l3) + + l1p = l1[:5] + + # full size + # size is without terminating newline + def mkfull(): + return Git.CatFileContentStream(len(d)-1, StringIO(d)) + + ts = 5 + def mktiny(): + return Git.CatFileContentStream(ts, StringIO(d)) + + # readlines no limit + s = mkfull() + lines = s.readlines() + assert len(lines) == 3 and lines[-1].endswith('\n') + assert s._stream.tell() == len(d) # must have scrubbed to the end + + # realines line limit + s = mkfull() + lines = s.readlines(5) + assert len(lines) == 1 + + # readlines on tiny sections + s = mktiny() + lines = s.readlines() + assert len(lines) == 1 and lines[0] == l1p + assert s._stream.tell() == ts+1 + + # readline no limit + s = mkfull() + assert s.readline() == l1 + assert s.readline() == l2 + assert s.readline() == l3 + assert s.readline() == '' + assert s._stream.tell() == len(d) + + # readline limit + s = mkfull() + assert s.readline(5) == l1p + assert s.readline() == l1[5:] + + # readline on tiny section + s = mktiny() + assert s.readline() == l1p + assert s.readline() == '' + assert s._stream.tell() == ts+1 + + # read no limit + s = mkfull() + assert s.read() == d[:-1] + assert s.read() == '' + assert s._stream.tell() == len(d) + + # read limit + s = mkfull() + assert s.read(5) == l1p + assert s.read(6) == l1[5:] + assert s._stream.tell() == 5 + 6 # its not yet done + + # read tiny + s = mktiny() + assert s.read(2) == l1[:2] + assert s._stream.tell() == 2 + assert s.read() == l1[2:ts] + assert s._stream.tell() == ts+1 + + def _assert_rev_parse_types(self, name, rev_obj): + rev_parse = self.rorepo.rev_parse + + if rev_obj.type == 'tag': + rev_obj = rev_obj.object + + # tree and blob type + obj = rev_parse(name + '^{tree}') + assert obj == rev_obj.tree + + obj = rev_parse(name + ':CHANGES') + assert obj.type == 'blob' and obj.path == 'CHANGES' + assert rev_obj.tree['CHANGES'] == obj + + + def _assert_rev_parse(self, name): + """tries multiple different rev-parse syntaxes with the given name + :return: parsed object""" + rev_parse = self.rorepo.rev_parse + orig_obj = rev_parse(name) + if orig_obj.type == 'tag': + obj = orig_obj.object + else: + obj = orig_obj + # END deref tags by default + + # try history + rev = name + "~" + obj2 = rev_parse(rev) + assert obj2 == obj.parents[0] + self._assert_rev_parse_types(rev, obj2) + + # history with number + ni = 11 + history = [obj.parents[0]] + for pn in range(ni): + history.append(history[-1].parents[0]) + # END get given amount of commits + + for pn in range(11): + rev = name + "~%i" % (pn+1) + obj2 = rev_parse(rev) + assert obj2 == history[pn] + self._assert_rev_parse_types(rev, obj2) + # END history check + + # parent ( default ) + rev = name + "^" + obj2 = rev_parse(rev) + assert obj2 == obj.parents[0] + self._assert_rev_parse_types(rev, obj2) + + # parent with number + for pn, parent in enumerate(obj.parents): + rev = name + "^%i" % (pn+1) + assert rev_parse(rev) == parent + self._assert_rev_parse_types(rev, parent) + # END for each parent + + return orig_obj + + @with_rw_repo('HEAD', bare=False) + def test_rw_rev_parse(self, rwrepo): + # verify it does not confuse branches with hexsha ids + ahead = rwrepo.create_head('aaaaaaaa') + assert(rwrepo.rev_parse(str(ahead)) == ahead.commit) + + def test_rev_parse(self): + rev_parse = self.rorepo.rev_parse + + # try special case: This one failed at some point, make sure its fixed + assert rev_parse("33ebe").hexsha == "33ebe7acec14b25c5f84f35a664803fcab2f7781" + + # start from reference + num_resolved = 0 + + for ref in Reference.iter_items(self.rorepo): + path_tokens = ref.path.split("/") + for pt in range(len(path_tokens)): + path_section = '/'.join(path_tokens[-(pt+1):]) + try: + obj = self._assert_rev_parse(path_section) + assert obj.type == ref.object.type + num_resolved += 1 + except BadObject: + print "failed on %s" % path_section + # is fine, in case we have something like 112, which belongs to remotes/rname/merge-requests/112 + pass + # END exception handling + # END for each token + # END for each reference + assert num_resolved + + # it works with tags ! + tag = self._assert_rev_parse('0.1.4') + assert tag.type == 'tag' + + # try full sha directly ( including type conversion ) + assert tag.object == rev_parse(tag.object.hexsha) + self._assert_rev_parse_types(tag.object.hexsha, tag.object) + + + # multiple tree types result in the same tree: HEAD^{tree}^{tree}:CHANGES + rev = '0.1.4^{tree}^{tree}' + assert rev_parse(rev) == tag.object.tree + assert rev_parse(rev+':CHANGES') == tag.object.tree['CHANGES'] + + + # try to get parents from first revision - it should fail as no such revision + # exists + first_rev = "33ebe7acec14b25c5f84f35a664803fcab2f7781" + commit = rev_parse(first_rev) + assert len(commit.parents) == 0 + assert commit.hexsha == first_rev + self.failUnlessRaises(BadObject, rev_parse, first_rev+"~") + self.failUnlessRaises(BadObject, rev_parse, first_rev+"^") + + # short SHA1 + commit2 = rev_parse(first_rev[:20]) + assert commit2 == commit + commit2 = rev_parse(first_rev[:5]) + assert commit2 == commit + + + # todo: dereference tag into a blob 0.1.7^{blob} - quite a special one + # needs a tag which points to a blob + + + # ref^0 returns commit being pointed to, same with ref~0, and ^{} + tag = rev_parse('0.1.4') + for token in (('~0', '^0', '^{}')): + assert tag.object == rev_parse('0.1.4%s' % token) + # END handle multiple tokens + + # try partial parsing + max_items = 40 + for i, binsha in enumerate(self.rorepo.odb.sha_iter()): + assert rev_parse(bin_to_hex(binsha)[:8-(i%2)]).binsha == binsha + if i > max_items: + # this is rather slow currently, as rev_parse returns an object + # which requires accessing packs, it has some additional overhead + break + # END for each binsha in repo + + # missing closing brace commit^{tree + self.failUnlessRaises(ValueError, rev_parse, '0.1.4^{tree') + + # missing starting brace + self.failUnlessRaises(ValueError, rev_parse, '0.1.4^tree}') + + # REVLOG + ####### + head = self.rorepo.head + + # need to specify a ref when using the @ syntax + self.failUnlessRaises(BadObject, rev_parse, "%s@{0}" % head.commit.hexsha) + + # uses HEAD.ref by default + assert rev_parse('@{0}') == head.commit + if not head.is_detached: + refspec = '%s@{0}' % head.ref.name + assert rev_parse(refspec) == head.ref.commit + # all additional specs work as well + assert rev_parse(refspec+"^{tree}") == head.commit.tree + assert rev_parse(refspec+":CHANGES").type == 'blob' + #END operate on non-detached head + + # the last position + assert rev_parse('@{1}') != head.commit + + # position doesn't exist + self.failUnlessRaises(IndexError, rev_parse, '@{10000}') + + # currently, nothing more is supported + self.failUnlessRaises(NotImplementedError, rev_parse, "@{1 week ago}") + + def test_repo_odbtype(self): + target_type = GitDB + if sys.version_info[1] < 5: + target_type = GitCmdObjectDB + assert isinstance(self.rorepo.odb, target_type) + + def test_submodules(self): + assert len(self.rorepo.submodules) == 1 # non-recursive + assert len(list(self.rorepo.iter_submodules())) >= 2 + + assert isinstance(self.rorepo.submodule("gitdb"), Submodule) + self.failUnlessRaises(ValueError, self.rorepo.submodule, "doesn't exist") + + @with_rw_repo('HEAD', bare=False) + def test_submodule_update(self, rwrepo): + # fails in bare mode + rwrepo._bare = True + self.failUnlessRaises(InvalidGitRepositoryError, rwrepo.submodule_update) + rwrepo._bare = False + + # test create submodule + sm = rwrepo.submodules[0] + sm = rwrepo.create_submodule("my_new_sub", "some_path", join_path_native(self.rorepo.working_tree_dir, sm.path)) + assert isinstance(sm, Submodule) + + # note: the rest of this functionality is tested in test_submodule + + diff --git a/git/test/test_submodule.py b/git/test/test_submodule.py index a5d460fd..37bf9f13 100644 --- a/git/test/test_submodule.py +++ b/git/test/test_submodule.py @@ -15,553 +15,553 @@ import os # to keep file handles open. On windows we get problems as they are not properly # closed due to mmap bugs on windows (as it appears) if sys.platform == 'win32': - try: - import smmap.util - smmap.util.MapRegion._test_read_into_memory = True - except ImportError: - sys.stderr.write("The submodule tests will fail as some files cannot be removed due to open file handles.\n") - sys.stderr.write("The latest version of gitdb uses a memory map manager which can be configured to work around this problem") + try: + import smmap.util + smmap.util.MapRegion._test_read_into_memory = True + except ImportError: + sys.stderr.write("The submodule tests will fail as some files cannot be removed due to open file handles.\n") + sys.stderr.write("The latest version of gitdb uses a memory map manager which can be configured to work around this problem") #END handle windows platform class TestRootProgress(RootUpdateProgress): - """Just prints messages, for now without checking the correctness of the states""" - - def update(self, op, index, max_count, message=''): - print message - + """Just prints messages, for now without checking the correctness of the states""" + + def update(self, op, index, max_count, message=''): + print message + prog = TestRootProgress() class TestSubmodule(TestBase): - k_subm_current = "468cad66ff1f80ddaeee4123c24e4d53a032c00d" - k_subm_changed = "394ed7006ee5dc8bddfd132b64001d5dfc0ffdd3" - k_no_subm_tag = "0.1.6" - + k_subm_current = "468cad66ff1f80ddaeee4123c24e4d53a032c00d" + k_subm_changed = "394ed7006ee5dc8bddfd132b64001d5dfc0ffdd3" + k_no_subm_tag = "0.1.6" + - def _do_base_tests(self, rwrepo): - """Perform all tests in the given repository, it may be bare or nonbare""" - # manual instantiation - smm = Submodule(rwrepo, "\0"*20) - # name needs to be set in advance - self.failUnlessRaises(AttributeError, getattr, smm, 'name') - - # iterate - 1 submodule - sms = Submodule.list_items(rwrepo, self.k_subm_current) - assert len(sms) == 1 - sm = sms[0] - - # at a different time, there is None - assert len(Submodule.list_items(rwrepo, self.k_no_subm_tag)) == 0 - - assert sm.path == 'git/ext/gitdb' - assert sm.path != sm.name # in our case, we have ids there, which don't equal the path - assert sm.url == 'git://github.com/gitpython-developers/gitdb.git' - assert sm.branch_path == 'refs/heads/master' # the default ... - assert sm.branch_name == 'master' - assert sm.parent_commit == rwrepo.head.commit - # size is always 0 - assert sm.size == 0 - # the module is not checked-out yet - self.failUnlessRaises(InvalidGitRepositoryError, sm.module) - - # which is why we can't get the branch either - it points into the module() repository - self.failUnlessRaises(InvalidGitRepositoryError, getattr, sm, 'branch') - - # branch_path works, as its just a string - assert isinstance(sm.branch_path, basestring) - - # some commits earlier we still have a submodule, but its at a different commit - smold = Submodule.iter_items(rwrepo, self.k_subm_changed).next() - assert smold.binsha != sm.binsha - assert smold != sm # the name changed - - # force it to reread its information - del(smold._url) - smold.url == sm.url - - # test config_reader/writer methods - sm.config_reader() - new_smclone_path = None # keep custom paths for later - new_csmclone_path = None # - if rwrepo.bare: - self.failUnlessRaises(InvalidGitRepositoryError, sm.config_writer) - else: - writer = sm.config_writer() - # for faster checkout, set the url to the local path - new_smclone_path = to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, sm.path)) - writer.set_value('url', new_smclone_path) - del(writer) - assert sm.config_reader().get_value('url') == new_smclone_path - assert sm.url == new_smclone_path - # END handle bare repo - smold.config_reader() - - # cannot get a writer on historical submodules - if not rwrepo.bare: - self.failUnlessRaises(ValueError, smold.config_writer) - # END handle bare repo - - # make the old into a new - this doesn't work as the name changed - prev_parent_commit = smold.parent_commit - self.failUnlessRaises(ValueError, smold.set_parent_commit, self.k_subm_current) - # the sha is properly updated - smold.set_parent_commit(self.k_subm_changed+"~1") - assert smold.binsha != sm.binsha - - # raises if the sm didn't exist in new parent - it keeps its - # parent_commit unchanged - self.failUnlessRaises(ValueError, smold.set_parent_commit, self.k_no_subm_tag) - - # TEST TODO: if a path in the gitmodules file, but not in the index, it raises - - # TEST UPDATE - ############## - # module retrieval is not always possible - if rwrepo.bare: - self.failUnlessRaises(InvalidGitRepositoryError, sm.module) - self.failUnlessRaises(InvalidGitRepositoryError, sm.remove) - self.failUnlessRaises(InvalidGitRepositoryError, sm.add, rwrepo, 'here', 'there') - else: - # its not checked out in our case - self.failUnlessRaises(InvalidGitRepositoryError, sm.module) - assert not sm.module_exists() - - # currently there is only one submodule - assert len(list(rwrepo.iter_submodules())) == 1 - assert sm.binsha != "\0"*20 - - # TEST ADD - ########### - # preliminary tests - # adding existing returns exactly the existing - sma = Submodule.add(rwrepo, sm.name, sm.path) - assert sma.path == sm.path - - # no url and no module at path fails - self.failUnlessRaises(ValueError, Submodule.add, rwrepo, "newsubm", "pathtorepo", url=None) - - # CONTINUE UPDATE - ################# - - # lets update it - its a recursive one too - newdir = os.path.join(sm.abspath, 'dir') - os.makedirs(newdir) - - # update fails if the path already exists non-empty - self.failUnlessRaises(OSError, sm.update) - os.rmdir(newdir) - - # dry-run does nothing - sm.update(dry_run=True, progress=prog) - assert not sm.module_exists() - - assert sm.update() is sm - sm_repopath = sm.path # cache for later - assert sm.module_exists() - assert isinstance(sm.module(), git.Repo) - assert sm.module().working_tree_dir == sm.abspath - - # INTERLEAVE ADD TEST - ##################### - # url must match the one in the existing repository ( if submodule name suggests a new one ) - # or we raise - self.failUnlessRaises(ValueError, Submodule.add, rwrepo, "newsubm", sm.path, "git://someurl/repo.git") - - - # CONTINUE UPDATE - ################# - # we should have setup a tracking branch, which is also active - assert sm.module().head.ref.tracking_branch() is not None - - # delete the whole directory and re-initialize - shutil.rmtree(sm.abspath) - assert len(sm.children()) == 0 - # dry-run does nothing - sm.update(dry_run=True, recursive=False, progress=prog) - assert len(sm.children()) == 0 - - sm.update(recursive=False) - assert len(list(rwrepo.iter_submodules())) == 2 - assert len(sm.children()) == 1 # its not checked out yet - csm = sm.children()[0] - assert not csm.module_exists() - csm_repopath = csm.path - - # adjust the path of the submodules module to point to the local destination - new_csmclone_path = to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, sm.path, csm.path)) - csm.config_writer().set_value('url', new_csmclone_path) - assert csm.url == new_csmclone_path - - # dry-run does nothing - assert not csm.module_exists() - sm.update(recursive=True, dry_run=True, progress=prog) - assert not csm.module_exists() - - # update recursively again - sm.update(recursive=True) - assert csm.module_exists() - - # tracking branch once again - csm.module().head.ref.tracking_branch() is not None - - # this flushed in a sub-submodule - assert len(list(rwrepo.iter_submodules())) == 2 - - - # reset both heads to the previous version, verify that to_latest_revision works - smods = (sm.module(), csm.module()) - for repo in smods: - repo.head.reset('HEAD~2', working_tree=1) - # END for each repo to reset - - # dry run does nothing - sm.update(recursive=True, dry_run=True, progress=prog) - for repo in smods: - assert repo.head.commit != repo.head.ref.tracking_branch().commit - # END for each repo to check - - sm.update(recursive=True, to_latest_revision=True) - for repo in smods: - assert repo.head.commit == repo.head.ref.tracking_branch().commit - # END for each repo to check - del(smods) - - # if the head is detached, it still works ( but warns ) - smref = sm.module().head.ref - sm.module().head.ref = 'HEAD~1' - # if there is no tracking branch, we get a warning as well - csm_tracking_branch = csm.module().head.ref.tracking_branch() - csm.module().head.ref.set_tracking_branch(None) - sm.update(recursive=True, to_latest_revision=True) - - # to_latest_revision changes the child submodule's commit, it needs an - # update now - csm.set_parent_commit(csm.repo.head.commit) - - # undo the changes - sm.module().head.ref = smref - csm.module().head.ref.set_tracking_branch(csm_tracking_branch) - - # REMOVAL OF REPOSITOTRY - ######################## - # must delete something - self.failUnlessRaises(ValueError, csm.remove, module=False, configuration=False) - # We have modified the configuration, hence the index is dirty, and the - # deletion will fail - # NOTE: As we did a few updates in the meanwhile, the indices were reset - # Hence we create some changes - csm.set_parent_commit(csm.repo.head.commit) - sm.config_writer().set_value("somekey", "somevalue") - csm.config_writer().set_value("okey", "ovalue") - self.failUnlessRaises(InvalidGitRepositoryError, sm.remove) - # if we remove the dirty index, it would work - sm.module().index.reset() - # still, we have the file modified - self.failUnlessRaises(InvalidGitRepositoryError, sm.remove, dry_run=True) - sm.module().index.reset(working_tree=True) - - # enforce the submodule to be checked out at the right spot as well. - csm.update() - - # this would work - assert sm.remove(dry_run=True) is sm - assert sm.module_exists() - sm.remove(force=True, dry_run=True) - assert sm.module_exists() - - # but ... we have untracked files in the child submodule - fn = join_path_native(csm.module().working_tree_dir, "newfile") - open(fn, 'w').write("hi") - self.failUnlessRaises(InvalidGitRepositoryError, sm.remove) - - # forcibly delete the child repository - prev_count = len(sm.children()) - assert csm.remove(force=True) is csm - assert not csm.exists() - assert not csm.module_exists() - assert len(sm.children()) == prev_count - 1 - # now we have a changed index, as configuration was altered. - # fix this - sm.module().index.reset(working_tree=True) - - # now delete only the module of the main submodule - assert sm.module_exists() - sm.remove(configuration=False) - assert sm.exists() - assert not sm.module_exists() - assert sm.config_reader().get_value('url') - - # delete the rest - sm.remove() - assert not sm.exists() - assert not sm.module_exists() - - assert len(rwrepo.submodules) == 0 - - # ADD NEW SUBMODULE - ################### - # add a simple remote repo - trailing slashes are no problem - smid = "newsub" - osmid = "othersub" - nsm = Submodule.add(rwrepo, smid, sm_repopath, new_smclone_path+"/", None, no_checkout=True) - assert nsm.name == smid - assert nsm.module_exists() - assert nsm.exists() - # its not checked out - assert not os.path.isfile(join_path_native(nsm.module().working_tree_dir, Submodule.k_modules_file)) - assert len(rwrepo.submodules) == 1 - - # add another submodule, but into the root, not as submodule - osm = Submodule.add(rwrepo, osmid, csm_repopath, new_csmclone_path, Submodule.k_head_default) - assert osm != nsm - assert osm.module_exists() - assert osm.exists() - assert os.path.isfile(join_path_native(osm.module().working_tree_dir, 'setup.py')) - - assert len(rwrepo.submodules) == 2 - - # commit the changes, just to finalize the operation - rwrepo.index.commit("my submod commit") - assert len(rwrepo.submodules) == 2 - - # needs update as the head changed, it thinks its in the history - # of the repo otherwise - nsm.set_parent_commit(rwrepo.head.commit) - osm.set_parent_commit(rwrepo.head.commit) - - # MOVE MODULE - ############# - # invalid inptu - self.failUnlessRaises(ValueError, nsm.move, 'doesntmatter', module=False, configuration=False) - - # renaming to the same path does nothing - assert nsm.move(sm.path) is nsm - - # rename a module - nmp = join_path_native("new", "module", "dir") + "/" # new module path - pmp = nsm.path - abspmp = nsm.abspath - assert nsm.move(nmp) is nsm - nmp = nmp[:-1] # cut last / - nmpl = to_native_path_linux(nmp) - assert nsm.path == nmpl - assert rwrepo.submodules[0].path == nmpl - - mpath = 'newsubmodule' - absmpath = join_path_native(rwrepo.working_tree_dir, mpath) - open(absmpath, 'w').write('') - self.failUnlessRaises(ValueError, nsm.move, mpath) - os.remove(absmpath) - - # now it works, as we just move it back - nsm.move(pmp) - assert nsm.path == pmp - assert rwrepo.submodules[0].path == pmp - - # TODO lowprio: test remaining exceptions ... for now its okay, the code looks right - - # REMOVE 'EM ALL - ################ - # if a submodule's repo has no remotes, it can't be added without an explicit url - osmod = osm.module() - - osm.remove(module=False) - for remote in osmod.remotes: - remote.remove(osmod, remote.name) - assert not osm.exists() - self.failUnlessRaises(ValueError, Submodule.add, rwrepo, osmid, csm_repopath, url=None) - # END handle bare mode - - # Error if there is no submodule file here - self.failUnlessRaises(IOError, Submodule._config_parser, rwrepo, rwrepo.commit(self.k_no_subm_tag), True) - - @with_rw_repo(k_subm_current) - def test_base_rw(self, rwrepo): - self._do_base_tests(rwrepo) - - @with_rw_repo(k_subm_current, bare=True) - def test_base_bare(self, rwrepo): - self._do_base_tests(rwrepo) - - @with_rw_repo(k_subm_current, bare=False) - def test_root_module(self, rwrepo): - # Can query everything without problems - rm = RootModule(self.rorepo) - assert rm.module() is self.rorepo - - # try attributes - rm.binsha - rm.mode - rm.path - assert rm.name == rm.k_root_name - assert rm.parent_commit == self.rorepo.head.commit - rm.url - rm.branch - - assert len(rm.list_items(rm.module())) == 1 - rm.config_reader() - rm.config_writer() - - # deep traversal gitdb / async - rsmsp = [sm.path for sm in rm.traverse()] - assert len(rsmsp) >= 2 # gitdb and async [and smmap], async being a child of gitdb - - # cannot set the parent commit as root module's path didn't exist - self.failUnlessRaises(ValueError, rm.set_parent_commit, 'HEAD') - - # TEST UPDATE - ############# - # setup commit which remove existing, add new and modify existing submodules - rm = RootModule(rwrepo) - assert len(rm.children()) == 1 - - # modify path without modifying the index entry - # ( which is what the move method would do properly ) - #================================================== - sm = rm.children()[0] - pp = "path/prefix" - fp = join_path_native(pp, sm.path) - prep = sm.path - assert not sm.module_exists() # was never updated after rwrepo's clone - - # assure we clone from a local source - sm.config_writer().set_value('url', to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, sm.path))) - - # dry-run does nothing - sm.update(recursive=False, dry_run=True, progress=prog) - assert not sm.module_exists() - - sm.update(recursive=False) - assert sm.module_exists() - sm.config_writer().set_value('path', fp) # change path to something with prefix AFTER url change - - # update fails as list_items in such a situations cannot work, as it cannot - # find the entry at the changed path - self.failUnlessRaises(InvalidGitRepositoryError, rm.update, recursive=False) - - # move it properly - doesn't work as it its path currently points to an indexentry - # which doesn't exist ( move it to some path, it doesn't matter here ) - self.failUnlessRaises(InvalidGitRepositoryError, sm.move, pp) - # reset the path(cache) to where it was, now it works - sm.path = prep - sm.move(fp, module=False) # leave it at the old location - - assert not sm.module_exists() - cpathchange = rwrepo.index.commit("changed sm path") # finally we can commit - - # update puts the module into place - rm.update(recursive=False, progress=prog) - sm.set_parent_commit(cpathchange) - assert sm.module_exists() - - # add submodule - #================ - nsmn = "newsubmodule" - nsmp = "submrepo" - async_url = to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, rsmsp[0], rsmsp[1])) - nsm = Submodule.add(rwrepo, nsmn, nsmp, url=async_url) - csmadded = rwrepo.index.commit("Added submodule").hexsha # make sure we don't keep the repo reference - nsm.set_parent_commit(csmadded) - assert nsm.module_exists() - # in our case, the module should not exist, which happens if we update a parent - # repo and a new submodule comes into life - nsm.remove(configuration=False, module=True) - assert not nsm.module_exists() and nsm.exists() - - - # dry-run does nothing - rm.update(recursive=False, dry_run=True, progress=prog) - - # otherwise it will work - rm.update(recursive=False, progress=prog) - assert nsm.module_exists() - - - - # remove submodule - the previous one - #==================================== - sm.set_parent_commit(csmadded) - smp = sm.abspath - assert not sm.remove(module=False).exists() - assert os.path.isdir(smp) # module still exists - csmremoved = rwrepo.index.commit("Removed submodule") - - # an update will remove the module - # not in dry_run - rm.update(recursive=False, dry_run=True) - assert os.path.isdir(smp) - - rm.update(recursive=False) - assert not os.path.isdir(smp) - - - # change url - #============= - # to the first repository, this way we have a fast checkout, and a completely different - # repository at the different url - nsm.set_parent_commit(csmremoved) - nsmurl = to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, rsmsp[0])) - nsm.config_writer().set_value('url', nsmurl) - csmpathchange = rwrepo.index.commit("changed url") - nsm.set_parent_commit(csmpathchange) - - prev_commit = nsm.module().head.commit - # dry-run does nothing - rm.update(recursive=False, dry_run=True, progress=prog) - assert nsm.module().remotes.origin.url != nsmurl - - rm.update(recursive=False, progress=prog) - assert nsm.module().remotes.origin.url == nsmurl - # head changed, as the remote url and its commit changed - assert prev_commit != nsm.module().head.commit - - # add the submodule's changed commit to the index, which is what the - # user would do - # beforehand, update our instance's binsha with the new one - nsm.binsha = nsm.module().head.commit.binsha - rwrepo.index.add([nsm]) - - # change branch - #================= - # we only have one branch, so we switch to a virtual one, and back - # to the current one to trigger the difference - cur_branch = nsm.branch - nsmm = nsm.module() - prev_commit = nsmm.head.commit - for branch in ("some_virtual_branch", cur_branch.name): - nsm.config_writer().set_value(Submodule.k_head_option, git.Head.to_full_path(branch)) - csmbranchchange = rwrepo.index.commit("changed branch to %s" % branch) - nsm.set_parent_commit(csmbranchchange) - # END for each branch to change - - # Lets remove our tracking branch to simulate some changes - nsmmh = nsmm.head - assert nsmmh.ref.tracking_branch() is None # never set it up until now - assert not nsmmh.is_detached - - #dry run does nothing - rm.update(recursive=False, dry_run=True, progress=prog) - assert nsmmh.ref.tracking_branch() is None - - # the real thing does - rm.update(recursive=False, progress=prog) - - assert nsmmh.ref.tracking_branch() is not None - assert not nsmmh.is_detached - - # recursive update - # ================= - # finally we recursively update a module, just to run the code at least once - # remove the module so that it has more work - assert len(nsm.children()) >= 1 # could include smmap - assert nsm.exists() and nsm.module_exists() and len(nsm.children()) >= 1 - # assure we pull locally only - nsmc = nsm.children()[0] - nsmc.config_writer().set_value('url', async_url) - rm.update(recursive=True, progress=prog, dry_run=True) # just to run the code - rm.update(recursive=True, progress=prog) - - # gitdb: has either 1 or 2 submodules depending on the version - assert len(nsm.children()) >= 1 and nsmc.module_exists() - + def _do_base_tests(self, rwrepo): + """Perform all tests in the given repository, it may be bare or nonbare""" + # manual instantiation + smm = Submodule(rwrepo, "\0"*20) + # name needs to be set in advance + self.failUnlessRaises(AttributeError, getattr, smm, 'name') + + # iterate - 1 submodule + sms = Submodule.list_items(rwrepo, self.k_subm_current) + assert len(sms) == 1 + sm = sms[0] + + # at a different time, there is None + assert len(Submodule.list_items(rwrepo, self.k_no_subm_tag)) == 0 + + assert sm.path == 'git/ext/gitdb' + assert sm.path != sm.name # in our case, we have ids there, which don't equal the path + assert sm.url == 'git://github.com/gitpython-developers/gitdb.git' + assert sm.branch_path == 'refs/heads/master' # the default ... + assert sm.branch_name == 'master' + assert sm.parent_commit == rwrepo.head.commit + # size is always 0 + assert sm.size == 0 + # the module is not checked-out yet + self.failUnlessRaises(InvalidGitRepositoryError, sm.module) + + # which is why we can't get the branch either - it points into the module() repository + self.failUnlessRaises(InvalidGitRepositoryError, getattr, sm, 'branch') + + # branch_path works, as its just a string + assert isinstance(sm.branch_path, basestring) + + # some commits earlier we still have a submodule, but its at a different commit + smold = Submodule.iter_items(rwrepo, self.k_subm_changed).next() + assert smold.binsha != sm.binsha + assert smold != sm # the name changed + + # force it to reread its information + del(smold._url) + smold.url == sm.url + + # test config_reader/writer methods + sm.config_reader() + new_smclone_path = None # keep custom paths for later + new_csmclone_path = None # + if rwrepo.bare: + self.failUnlessRaises(InvalidGitRepositoryError, sm.config_writer) + else: + writer = sm.config_writer() + # for faster checkout, set the url to the local path + new_smclone_path = to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, sm.path)) + writer.set_value('url', new_smclone_path) + del(writer) + assert sm.config_reader().get_value('url') == new_smclone_path + assert sm.url == new_smclone_path + # END handle bare repo + smold.config_reader() + + # cannot get a writer on historical submodules + if not rwrepo.bare: + self.failUnlessRaises(ValueError, smold.config_writer) + # END handle bare repo + + # make the old into a new - this doesn't work as the name changed + prev_parent_commit = smold.parent_commit + self.failUnlessRaises(ValueError, smold.set_parent_commit, self.k_subm_current) + # the sha is properly updated + smold.set_parent_commit(self.k_subm_changed+"~1") + assert smold.binsha != sm.binsha + + # raises if the sm didn't exist in new parent - it keeps its + # parent_commit unchanged + self.failUnlessRaises(ValueError, smold.set_parent_commit, self.k_no_subm_tag) + + # TEST TODO: if a path in the gitmodules file, but not in the index, it raises + + # TEST UPDATE + ############## + # module retrieval is not always possible + if rwrepo.bare: + self.failUnlessRaises(InvalidGitRepositoryError, sm.module) + self.failUnlessRaises(InvalidGitRepositoryError, sm.remove) + self.failUnlessRaises(InvalidGitRepositoryError, sm.add, rwrepo, 'here', 'there') + else: + # its not checked out in our case + self.failUnlessRaises(InvalidGitRepositoryError, sm.module) + assert not sm.module_exists() + + # currently there is only one submodule + assert len(list(rwrepo.iter_submodules())) == 1 + assert sm.binsha != "\0"*20 + + # TEST ADD + ########### + # preliminary tests + # adding existing returns exactly the existing + sma = Submodule.add(rwrepo, sm.name, sm.path) + assert sma.path == sm.path + + # no url and no module at path fails + self.failUnlessRaises(ValueError, Submodule.add, rwrepo, "newsubm", "pathtorepo", url=None) + + # CONTINUE UPDATE + ################# + + # lets update it - its a recursive one too + newdir = os.path.join(sm.abspath, 'dir') + os.makedirs(newdir) + + # update fails if the path already exists non-empty + self.failUnlessRaises(OSError, sm.update) + os.rmdir(newdir) + + # dry-run does nothing + sm.update(dry_run=True, progress=prog) + assert not sm.module_exists() + + assert sm.update() is sm + sm_repopath = sm.path # cache for later + assert sm.module_exists() + assert isinstance(sm.module(), git.Repo) + assert sm.module().working_tree_dir == sm.abspath + + # INTERLEAVE ADD TEST + ##################### + # url must match the one in the existing repository ( if submodule name suggests a new one ) + # or we raise + self.failUnlessRaises(ValueError, Submodule.add, rwrepo, "newsubm", sm.path, "git://someurl/repo.git") + + + # CONTINUE UPDATE + ################# + # we should have setup a tracking branch, which is also active + assert sm.module().head.ref.tracking_branch() is not None + + # delete the whole directory and re-initialize + shutil.rmtree(sm.abspath) + assert len(sm.children()) == 0 + # dry-run does nothing + sm.update(dry_run=True, recursive=False, progress=prog) + assert len(sm.children()) == 0 + + sm.update(recursive=False) + assert len(list(rwrepo.iter_submodules())) == 2 + assert len(sm.children()) == 1 # its not checked out yet + csm = sm.children()[0] + assert not csm.module_exists() + csm_repopath = csm.path + + # adjust the path of the submodules module to point to the local destination + new_csmclone_path = to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, sm.path, csm.path)) + csm.config_writer().set_value('url', new_csmclone_path) + assert csm.url == new_csmclone_path + + # dry-run does nothing + assert not csm.module_exists() + sm.update(recursive=True, dry_run=True, progress=prog) + assert not csm.module_exists() + + # update recursively again + sm.update(recursive=True) + assert csm.module_exists() + + # tracking branch once again + csm.module().head.ref.tracking_branch() is not None + + # this flushed in a sub-submodule + assert len(list(rwrepo.iter_submodules())) == 2 + + + # reset both heads to the previous version, verify that to_latest_revision works + smods = (sm.module(), csm.module()) + for repo in smods: + repo.head.reset('HEAD~2', working_tree=1) + # END for each repo to reset + + # dry run does nothing + sm.update(recursive=True, dry_run=True, progress=prog) + for repo in smods: + assert repo.head.commit != repo.head.ref.tracking_branch().commit + # END for each repo to check + + sm.update(recursive=True, to_latest_revision=True) + for repo in smods: + assert repo.head.commit == repo.head.ref.tracking_branch().commit + # END for each repo to check + del(smods) + + # if the head is detached, it still works ( but warns ) + smref = sm.module().head.ref + sm.module().head.ref = 'HEAD~1' + # if there is no tracking branch, we get a warning as well + csm_tracking_branch = csm.module().head.ref.tracking_branch() + csm.module().head.ref.set_tracking_branch(None) + sm.update(recursive=True, to_latest_revision=True) + + # to_latest_revision changes the child submodule's commit, it needs an + # update now + csm.set_parent_commit(csm.repo.head.commit) + + # undo the changes + sm.module().head.ref = smref + csm.module().head.ref.set_tracking_branch(csm_tracking_branch) + + # REMOVAL OF REPOSITOTRY + ######################## + # must delete something + self.failUnlessRaises(ValueError, csm.remove, module=False, configuration=False) + # We have modified the configuration, hence the index is dirty, and the + # deletion will fail + # NOTE: As we did a few updates in the meanwhile, the indices were reset + # Hence we create some changes + csm.set_parent_commit(csm.repo.head.commit) + sm.config_writer().set_value("somekey", "somevalue") + csm.config_writer().set_value("okey", "ovalue") + self.failUnlessRaises(InvalidGitRepositoryError, sm.remove) + # if we remove the dirty index, it would work + sm.module().index.reset() + # still, we have the file modified + self.failUnlessRaises(InvalidGitRepositoryError, sm.remove, dry_run=True) + sm.module().index.reset(working_tree=True) + + # enforce the submodule to be checked out at the right spot as well. + csm.update() + + # this would work + assert sm.remove(dry_run=True) is sm + assert sm.module_exists() + sm.remove(force=True, dry_run=True) + assert sm.module_exists() + + # but ... we have untracked files in the child submodule + fn = join_path_native(csm.module().working_tree_dir, "newfile") + open(fn, 'w').write("hi") + self.failUnlessRaises(InvalidGitRepositoryError, sm.remove) + + # forcibly delete the child repository + prev_count = len(sm.children()) + assert csm.remove(force=True) is csm + assert not csm.exists() + assert not csm.module_exists() + assert len(sm.children()) == prev_count - 1 + # now we have a changed index, as configuration was altered. + # fix this + sm.module().index.reset(working_tree=True) + + # now delete only the module of the main submodule + assert sm.module_exists() + sm.remove(configuration=False) + assert sm.exists() + assert not sm.module_exists() + assert sm.config_reader().get_value('url') + + # delete the rest + sm.remove() + assert not sm.exists() + assert not sm.module_exists() + + assert len(rwrepo.submodules) == 0 + + # ADD NEW SUBMODULE + ################### + # add a simple remote repo - trailing slashes are no problem + smid = "newsub" + osmid = "othersub" + nsm = Submodule.add(rwrepo, smid, sm_repopath, new_smclone_path+"/", None, no_checkout=True) + assert nsm.name == smid + assert nsm.module_exists() + assert nsm.exists() + # its not checked out + assert not os.path.isfile(join_path_native(nsm.module().working_tree_dir, Submodule.k_modules_file)) + assert len(rwrepo.submodules) == 1 + + # add another submodule, but into the root, not as submodule + osm = Submodule.add(rwrepo, osmid, csm_repopath, new_csmclone_path, Submodule.k_head_default) + assert osm != nsm + assert osm.module_exists() + assert osm.exists() + assert os.path.isfile(join_path_native(osm.module().working_tree_dir, 'setup.py')) + + assert len(rwrepo.submodules) == 2 + + # commit the changes, just to finalize the operation + rwrepo.index.commit("my submod commit") + assert len(rwrepo.submodules) == 2 + + # needs update as the head changed, it thinks its in the history + # of the repo otherwise + nsm.set_parent_commit(rwrepo.head.commit) + osm.set_parent_commit(rwrepo.head.commit) + + # MOVE MODULE + ############# + # invalid inptu + self.failUnlessRaises(ValueError, nsm.move, 'doesntmatter', module=False, configuration=False) + + # renaming to the same path does nothing + assert nsm.move(sm.path) is nsm + + # rename a module + nmp = join_path_native("new", "module", "dir") + "/" # new module path + pmp = nsm.path + abspmp = nsm.abspath + assert nsm.move(nmp) is nsm + nmp = nmp[:-1] # cut last / + nmpl = to_native_path_linux(nmp) + assert nsm.path == nmpl + assert rwrepo.submodules[0].path == nmpl + + mpath = 'newsubmodule' + absmpath = join_path_native(rwrepo.working_tree_dir, mpath) + open(absmpath, 'w').write('') + self.failUnlessRaises(ValueError, nsm.move, mpath) + os.remove(absmpath) + + # now it works, as we just move it back + nsm.move(pmp) + assert nsm.path == pmp + assert rwrepo.submodules[0].path == pmp + + # TODO lowprio: test remaining exceptions ... for now its okay, the code looks right + + # REMOVE 'EM ALL + ################ + # if a submodule's repo has no remotes, it can't be added without an explicit url + osmod = osm.module() + + osm.remove(module=False) + for remote in osmod.remotes: + remote.remove(osmod, remote.name) + assert not osm.exists() + self.failUnlessRaises(ValueError, Submodule.add, rwrepo, osmid, csm_repopath, url=None) + # END handle bare mode + + # Error if there is no submodule file here + self.failUnlessRaises(IOError, Submodule._config_parser, rwrepo, rwrepo.commit(self.k_no_subm_tag), True) + + @with_rw_repo(k_subm_current) + def test_base_rw(self, rwrepo): + self._do_base_tests(rwrepo) + + @with_rw_repo(k_subm_current, bare=True) + def test_base_bare(self, rwrepo): + self._do_base_tests(rwrepo) + + @with_rw_repo(k_subm_current, bare=False) + def test_root_module(self, rwrepo): + # Can query everything without problems + rm = RootModule(self.rorepo) + assert rm.module() is self.rorepo + + # try attributes + rm.binsha + rm.mode + rm.path + assert rm.name == rm.k_root_name + assert rm.parent_commit == self.rorepo.head.commit + rm.url + rm.branch + + assert len(rm.list_items(rm.module())) == 1 + rm.config_reader() + rm.config_writer() + + # deep traversal gitdb / async + rsmsp = [sm.path for sm in rm.traverse()] + assert len(rsmsp) >= 2 # gitdb and async [and smmap], async being a child of gitdb + + # cannot set the parent commit as root module's path didn't exist + self.failUnlessRaises(ValueError, rm.set_parent_commit, 'HEAD') + + # TEST UPDATE + ############# + # setup commit which remove existing, add new and modify existing submodules + rm = RootModule(rwrepo) + assert len(rm.children()) == 1 + + # modify path without modifying the index entry + # ( which is what the move method would do properly ) + #================================================== + sm = rm.children()[0] + pp = "path/prefix" + fp = join_path_native(pp, sm.path) + prep = sm.path + assert not sm.module_exists() # was never updated after rwrepo's clone + + # assure we clone from a local source + sm.config_writer().set_value('url', to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, sm.path))) + + # dry-run does nothing + sm.update(recursive=False, dry_run=True, progress=prog) + assert not sm.module_exists() + + sm.update(recursive=False) + assert sm.module_exists() + sm.config_writer().set_value('path', fp) # change path to something with prefix AFTER url change + + # update fails as list_items in such a situations cannot work, as it cannot + # find the entry at the changed path + self.failUnlessRaises(InvalidGitRepositoryError, rm.update, recursive=False) + + # move it properly - doesn't work as it its path currently points to an indexentry + # which doesn't exist ( move it to some path, it doesn't matter here ) + self.failUnlessRaises(InvalidGitRepositoryError, sm.move, pp) + # reset the path(cache) to where it was, now it works + sm.path = prep + sm.move(fp, module=False) # leave it at the old location + + assert not sm.module_exists() + cpathchange = rwrepo.index.commit("changed sm path") # finally we can commit + + # update puts the module into place + rm.update(recursive=False, progress=prog) + sm.set_parent_commit(cpathchange) + assert sm.module_exists() + + # add submodule + #================ + nsmn = "newsubmodule" + nsmp = "submrepo" + async_url = to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, rsmsp[0], rsmsp[1])) + nsm = Submodule.add(rwrepo, nsmn, nsmp, url=async_url) + csmadded = rwrepo.index.commit("Added submodule").hexsha # make sure we don't keep the repo reference + nsm.set_parent_commit(csmadded) + assert nsm.module_exists() + # in our case, the module should not exist, which happens if we update a parent + # repo and a new submodule comes into life + nsm.remove(configuration=False, module=True) + assert not nsm.module_exists() and nsm.exists() + + + # dry-run does nothing + rm.update(recursive=False, dry_run=True, progress=prog) + + # otherwise it will work + rm.update(recursive=False, progress=prog) + assert nsm.module_exists() + + + + # remove submodule - the previous one + #==================================== + sm.set_parent_commit(csmadded) + smp = sm.abspath + assert not sm.remove(module=False).exists() + assert os.path.isdir(smp) # module still exists + csmremoved = rwrepo.index.commit("Removed submodule") + + # an update will remove the module + # not in dry_run + rm.update(recursive=False, dry_run=True) + assert os.path.isdir(smp) + + rm.update(recursive=False) + assert not os.path.isdir(smp) + + + # change url + #============= + # to the first repository, this way we have a fast checkout, and a completely different + # repository at the different url + nsm.set_parent_commit(csmremoved) + nsmurl = to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, rsmsp[0])) + nsm.config_writer().set_value('url', nsmurl) + csmpathchange = rwrepo.index.commit("changed url") + nsm.set_parent_commit(csmpathchange) + + prev_commit = nsm.module().head.commit + # dry-run does nothing + rm.update(recursive=False, dry_run=True, progress=prog) + assert nsm.module().remotes.origin.url != nsmurl + + rm.update(recursive=False, progress=prog) + assert nsm.module().remotes.origin.url == nsmurl + # head changed, as the remote url and its commit changed + assert prev_commit != nsm.module().head.commit + + # add the submodule's changed commit to the index, which is what the + # user would do + # beforehand, update our instance's binsha with the new one + nsm.binsha = nsm.module().head.commit.binsha + rwrepo.index.add([nsm]) + + # change branch + #================= + # we only have one branch, so we switch to a virtual one, and back + # to the current one to trigger the difference + cur_branch = nsm.branch + nsmm = nsm.module() + prev_commit = nsmm.head.commit + for branch in ("some_virtual_branch", cur_branch.name): + nsm.config_writer().set_value(Submodule.k_head_option, git.Head.to_full_path(branch)) + csmbranchchange = rwrepo.index.commit("changed branch to %s" % branch) + nsm.set_parent_commit(csmbranchchange) + # END for each branch to change + + # Lets remove our tracking branch to simulate some changes + nsmmh = nsmm.head + assert nsmmh.ref.tracking_branch() is None # never set it up until now + assert not nsmmh.is_detached + + #dry run does nothing + rm.update(recursive=False, dry_run=True, progress=prog) + assert nsmmh.ref.tracking_branch() is None + + # the real thing does + rm.update(recursive=False, progress=prog) + + assert nsmmh.ref.tracking_branch() is not None + assert not nsmmh.is_detached + + # recursive update + # ================= + # finally we recursively update a module, just to run the code at least once + # remove the module so that it has more work + assert len(nsm.children()) >= 1 # could include smmap + assert nsm.exists() and nsm.module_exists() and len(nsm.children()) >= 1 + # assure we pull locally only + nsmc = nsm.children()[0] + nsmc.config_writer().set_value('url', async_url) + rm.update(recursive=True, progress=prog, dry_run=True) # just to run the code + rm.update(recursive=True, progress=prog) + + # gitdb: has either 1 or 2 submodules depending on the version + assert len(nsm.children()) >= 1 and nsmc.module_exists() + diff --git a/git/test/test_tree.py b/git/test/test_tree.py index ec10e962..16d5be59 100644 --- a/git/test/test_tree.py +++ b/git/test/test_tree.py @@ -8,137 +8,137 @@ import os from git.test.lib import * from git import * from git.objects.fun import ( - traverse_tree_recursive, - traverse_trees_recursive - ) + traverse_tree_recursive, + traverse_trees_recursive + ) from cStringIO import StringIO class TestTree(TestBase): - - def test_serializable(self): - # tree at the given commit contains a submodule as well - roottree = self.rorepo.tree('6c1faef799095f3990e9970bc2cb10aa0221cf9c') - for item in roottree.traverse(ignore_self=False): - if item.type != Tree.type: - continue - # END skip non-trees - tree = item - # trees have no dict - self.failUnlessRaises(AttributeError, setattr, tree, 'someattr', 1) - - orig_data = tree.data_stream.read() - orig_cache = tree._cache - - stream = StringIO() - tree._serialize(stream) - assert stream.getvalue() == orig_data - - stream.seek(0) - testtree = Tree(self.rorepo, Tree.NULL_BIN_SHA, 0, '') - testtree._deserialize(stream) - assert testtree._cache == orig_cache - - - # TEST CACHE MUTATOR - mod = testtree.cache - self.failUnlessRaises(ValueError, mod.add, "invalid sha", 0, "name") - self.failUnlessRaises(ValueError, mod.add, Tree.NULL_HEX_SHA, 0, "invalid mode") - self.failUnlessRaises(ValueError, mod.add, Tree.NULL_HEX_SHA, tree.mode, "invalid/name") - - # add new item - name = "fake_dir" - mod.add(testtree.NULL_HEX_SHA, tree.mode, name) - assert name in testtree - - # its available in the tree immediately - assert isinstance(testtree[name], Tree) - - # adding it again will not cause multiple of them to be presents - cur_count = len(testtree) - mod.add(testtree.NULL_HEX_SHA, tree.mode, name) - assert len(testtree) == cur_count - - # fails with a different sha - name exists - hexsha = "1"*40 - self.failUnlessRaises(ValueError, mod.add, hexsha, tree.mode, name) - - # force it - replace existing one - mod.add(hexsha, tree.mode, name, force=True) - assert testtree[name].hexsha == hexsha - assert len(testtree) == cur_count - - # unchecked addition always works, even with invalid items - invalid_name = "hi/there" - mod.add_unchecked(hexsha, 0, invalid_name) - assert len(testtree) == cur_count + 1 - - del(mod[invalid_name]) - assert len(testtree) == cur_count - # del again, its fine - del(mod[invalid_name]) - - # have added one item, we are done - mod.set_done() - mod.set_done() # multiple times are okay - - # serialize, its different now - stream = StringIO() - testtree._serialize(stream) - stream.seek(0) - assert stream.getvalue() != orig_data - - # replaces cache, but we make sure of it - del(testtree._cache) - testtree._deserialize(stream) - assert name in testtree - assert invalid_name not in testtree - # END for each item in tree - - def test_traverse(self): - root = self.rorepo.tree('0.1.6') - num_recursive = 0 - all_items = list() - for obj in root.traverse(): - if "/" in obj.path: - num_recursive += 1 - - assert isinstance(obj, (Blob, Tree)) - all_items.append(obj) - # END for each object - assert all_items == root.list_traverse() - - # limit recursion level to 0 - should be same as default iteration - assert all_items - assert 'CHANGES' in root - assert len(list(root)) == len(list(root.traverse(depth=1))) - - # only choose trees - trees_only = lambda i,d: i.type == "tree" - trees = list(root.traverse(predicate = trees_only)) - assert len(trees) == len(list( i for i in root.traverse() if trees_only(i,0) )) - - # test prune - lib_folder = lambda t,d: t.path == "lib" - pruned_trees = list(root.traverse(predicate = trees_only,prune = lib_folder)) - assert len(pruned_trees) < len(trees) - - # trees and blobs - assert len(set(trees)|set(root.trees)) == len(trees) - assert len(set(b for b in root if isinstance(b, Blob)) | set(root.blobs)) == len( root.blobs ) - subitem = trees[0][0] - assert "/" in subitem.path - assert subitem.name == os.path.basename(subitem.path) - - # assure that at some point the traversed paths have a slash in them - found_slash = False - for item in root.traverse(): - assert os.path.isabs(item.abspath) - if '/' in item.path: - found_slash = True - # END check for slash - - # slashes in paths are supported as well - assert root[item.path] == item == root/item.path - # END for each item - assert found_slash + + def test_serializable(self): + # tree at the given commit contains a submodule as well + roottree = self.rorepo.tree('6c1faef799095f3990e9970bc2cb10aa0221cf9c') + for item in roottree.traverse(ignore_self=False): + if item.type != Tree.type: + continue + # END skip non-trees + tree = item + # trees have no dict + self.failUnlessRaises(AttributeError, setattr, tree, 'someattr', 1) + + orig_data = tree.data_stream.read() + orig_cache = tree._cache + + stream = StringIO() + tree._serialize(stream) + assert stream.getvalue() == orig_data + + stream.seek(0) + testtree = Tree(self.rorepo, Tree.NULL_BIN_SHA, 0, '') + testtree._deserialize(stream) + assert testtree._cache == orig_cache + + + # TEST CACHE MUTATOR + mod = testtree.cache + self.failUnlessRaises(ValueError, mod.add, "invalid sha", 0, "name") + self.failUnlessRaises(ValueError, mod.add, Tree.NULL_HEX_SHA, 0, "invalid mode") + self.failUnlessRaises(ValueError, mod.add, Tree.NULL_HEX_SHA, tree.mode, "invalid/name") + + # add new item + name = "fake_dir" + mod.add(testtree.NULL_HEX_SHA, tree.mode, name) + assert name in testtree + + # its available in the tree immediately + assert isinstance(testtree[name], Tree) + + # adding it again will not cause multiple of them to be presents + cur_count = len(testtree) + mod.add(testtree.NULL_HEX_SHA, tree.mode, name) + assert len(testtree) == cur_count + + # fails with a different sha - name exists + hexsha = "1"*40 + self.failUnlessRaises(ValueError, mod.add, hexsha, tree.mode, name) + + # force it - replace existing one + mod.add(hexsha, tree.mode, name, force=True) + assert testtree[name].hexsha == hexsha + assert len(testtree) == cur_count + + # unchecked addition always works, even with invalid items + invalid_name = "hi/there" + mod.add_unchecked(hexsha, 0, invalid_name) + assert len(testtree) == cur_count + 1 + + del(mod[invalid_name]) + assert len(testtree) == cur_count + # del again, its fine + del(mod[invalid_name]) + + # have added one item, we are done + mod.set_done() + mod.set_done() # multiple times are okay + + # serialize, its different now + stream = StringIO() + testtree._serialize(stream) + stream.seek(0) + assert stream.getvalue() != orig_data + + # replaces cache, but we make sure of it + del(testtree._cache) + testtree._deserialize(stream) + assert name in testtree + assert invalid_name not in testtree + # END for each item in tree + + def test_traverse(self): + root = self.rorepo.tree('0.1.6') + num_recursive = 0 + all_items = list() + for obj in root.traverse(): + if "/" in obj.path: + num_recursive += 1 + + assert isinstance(obj, (Blob, Tree)) + all_items.append(obj) + # END for each object + assert all_items == root.list_traverse() + + # limit recursion level to 0 - should be same as default iteration + assert all_items + assert 'CHANGES' in root + assert len(list(root)) == len(list(root.traverse(depth=1))) + + # only choose trees + trees_only = lambda i,d: i.type == "tree" + trees = list(root.traverse(predicate = trees_only)) + assert len(trees) == len(list( i for i in root.traverse() if trees_only(i,0) )) + + # test prune + lib_folder = lambda t,d: t.path == "lib" + pruned_trees = list(root.traverse(predicate = trees_only,prune = lib_folder)) + assert len(pruned_trees) < len(trees) + + # trees and blobs + assert len(set(trees)|set(root.trees)) == len(trees) + assert len(set(b for b in root if isinstance(b, Blob)) | set(root.blobs)) == len( root.blobs ) + subitem = trees[0][0] + assert "/" in subitem.path + assert subitem.name == os.path.basename(subitem.path) + + # assure that at some point the traversed paths have a slash in them + found_slash = False + for item in root.traverse(): + assert os.path.isabs(item.abspath) + if '/' in item.path: + found_slash = True + # END check for slash + + # slashes in paths are supported as well + assert root[item.path] == item == root/item.path + # END for each item + assert found_slash diff --git a/git/test/test_util.py b/git/test/test_util.py index a11c15c3..ea761217 100644 --- a/git/test/test_util.py +++ b/git/test/test_util.py @@ -17,150 +17,150 @@ import time class TestIterableMember(object): - """A member of an iterable list""" - __slots__ = ("name", "prefix_name") - - def __init__(self, name): - self.name = name - self.prefix_name = name - + """A member of an iterable list""" + __slots__ = ("name", "prefix_name") + + def __init__(self, name): + self.name = name + self.prefix_name = name + class TestUtils(TestBase): - def setup(self): - self.testdict = { - "string": "42", - "int": 42, - "array": [ 42 ], - } + def setup(self): + self.testdict = { + "string": "42", + "int": 42, + "array": [ 42 ], + } - def test_it_should_dashify(self): - assert_equal('this-is-my-argument', dashify('this_is_my_argument')) - assert_equal('foo', dashify('foo')) - - - def test_lock_file(self): - my_file = tempfile.mktemp() - lock_file = LockFile(my_file) - assert not lock_file._has_lock() - # release lock we don't have - fine - lock_file._release_lock() - - # get lock - lock_file._obtain_lock_or_raise() - assert lock_file._has_lock() - - # concurrent access - other_lock_file = LockFile(my_file) - assert not other_lock_file._has_lock() - self.failUnlessRaises(IOError, other_lock_file._obtain_lock_or_raise) - - lock_file._release_lock() - assert not lock_file._has_lock() - - other_lock_file._obtain_lock_or_raise() - self.failUnlessRaises(IOError, lock_file._obtain_lock_or_raise) - - # auto-release on destruction - del(other_lock_file) - lock_file._obtain_lock_or_raise() - lock_file._release_lock() - - def test_blocking_lock_file(self): - my_file = tempfile.mktemp() - lock_file = BlockingLockFile(my_file) - lock_file._obtain_lock() - - # next one waits for the lock - start = time.time() - wait_time = 0.1 - wait_lock = BlockingLockFile(my_file, 0.05, wait_time) - self.failUnlessRaises(IOError, wait_lock._obtain_lock) - elapsed = time.time() - start - assert elapsed <= wait_time + 0.02 # some extra time it may cost - - def test_user_id(self): - assert '@' in get_user_id() - - def test_parse_date(self): - # test all supported formats - def assert_rval(rval, veri_time, offset=0): - assert len(rval) == 2 - assert isinstance(rval[0], int) and isinstance(rval[1], int) - assert rval[0] == veri_time - assert rval[1] == offset - - # now that we are here, test our conversion functions as well - utctz = altz_to_utctz_str(offset) - assert isinstance(utctz, basestring) - assert utctz_to_altz(verify_utctz(utctz)) == offset - # END assert rval utility - - rfc = ("Thu, 07 Apr 2005 22:13:11 +0000", 0) - iso = ("2005-04-07T22:13:11 -0200", 7200) - iso2 = ("2005-04-07 22:13:11 +0400", -14400) - iso3 = ("2005.04.07 22:13:11 -0000", 0) - alt = ("04/07/2005 22:13:11", 0) - alt2 = ("07.04.2005 22:13:11", 0) - veri_time = 1112904791 # the time this represents - for date, offset in (rfc, iso, iso2, iso3, alt, alt2): - assert_rval(parse_date(date), veri_time, offset) - # END for each date type - - # and failure - self.failUnlessRaises(ValueError, parse_date, 'invalid format') - self.failUnlessRaises(ValueError, parse_date, '123456789 -02000') - self.failUnlessRaises(ValueError, parse_date, ' 123456789 -0200') - - def test_actor(self): - for cr in (None, self.rorepo.config_reader()): - assert isinstance(Actor.committer(cr), Actor) - assert isinstance(Actor.author(cr), Actor) - #END assure config reader is handled - - def test_iterable_list(self): - for args in (('name',), ('name', 'prefix_')): - l = IterableList('name') - - m1 = TestIterableMember('one') - m2 = TestIterableMember('two') - - l.extend((m1, m2)) - - assert len(l) == 2 - - # contains works with name and identity - assert m1.name in l - assert m2.name in l - assert m2 in l - assert m2 in l - assert 'invalid' not in l - - # with string index - assert l[m1.name] is m1 - assert l[m2.name] is m2 - - # with int index - assert l[0] is m1 - assert l[1] is m2 - - # with getattr - assert l.one is m1 - assert l.two is m2 - - # test exceptions - self.failUnlessRaises(AttributeError, getattr, l, 'something') - self.failUnlessRaises(IndexError, l.__getitem__, 'something') - - # delete by name and index - self.failUnlessRaises(IndexError, l.__delitem__, 'something') - del(l[m2.name]) - assert len(l) == 1 - assert m2.name not in l and m1.name in l - del(l[0]) - assert m1.name not in l - assert len(l) == 0 - - self.failUnlessRaises(IndexError, l.__delitem__, 0) - self.failUnlessRaises(IndexError, l.__delitem__, 'something') - #END for each possible mode + def test_it_should_dashify(self): + assert_equal('this-is-my-argument', dashify('this_is_my_argument')) + assert_equal('foo', dashify('foo')) + + + def test_lock_file(self): + my_file = tempfile.mktemp() + lock_file = LockFile(my_file) + assert not lock_file._has_lock() + # release lock we don't have - fine + lock_file._release_lock() + + # get lock + lock_file._obtain_lock_or_raise() + assert lock_file._has_lock() + + # concurrent access + other_lock_file = LockFile(my_file) + assert not other_lock_file._has_lock() + self.failUnlessRaises(IOError, other_lock_file._obtain_lock_or_raise) + + lock_file._release_lock() + assert not lock_file._has_lock() + + other_lock_file._obtain_lock_or_raise() + self.failUnlessRaises(IOError, lock_file._obtain_lock_or_raise) + + # auto-release on destruction + del(other_lock_file) + lock_file._obtain_lock_or_raise() + lock_file._release_lock() + + def test_blocking_lock_file(self): + my_file = tempfile.mktemp() + lock_file = BlockingLockFile(my_file) + lock_file._obtain_lock() + + # next one waits for the lock + start = time.time() + wait_time = 0.1 + wait_lock = BlockingLockFile(my_file, 0.05, wait_time) + self.failUnlessRaises(IOError, wait_lock._obtain_lock) + elapsed = time.time() - start + assert elapsed <= wait_time + 0.02 # some extra time it may cost + + def test_user_id(self): + assert '@' in get_user_id() + + def test_parse_date(self): + # test all supported formats + def assert_rval(rval, veri_time, offset=0): + assert len(rval) == 2 + assert isinstance(rval[0], int) and isinstance(rval[1], int) + assert rval[0] == veri_time + assert rval[1] == offset + + # now that we are here, test our conversion functions as well + utctz = altz_to_utctz_str(offset) + assert isinstance(utctz, basestring) + assert utctz_to_altz(verify_utctz(utctz)) == offset + # END assert rval utility + + rfc = ("Thu, 07 Apr 2005 22:13:11 +0000", 0) + iso = ("2005-04-07T22:13:11 -0200", 7200) + iso2 = ("2005-04-07 22:13:11 +0400", -14400) + iso3 = ("2005.04.07 22:13:11 -0000", 0) + alt = ("04/07/2005 22:13:11", 0) + alt2 = ("07.04.2005 22:13:11", 0) + veri_time = 1112904791 # the time this represents + for date, offset in (rfc, iso, iso2, iso3, alt, alt2): + assert_rval(parse_date(date), veri_time, offset) + # END for each date type + + # and failure + self.failUnlessRaises(ValueError, parse_date, 'invalid format') + self.failUnlessRaises(ValueError, parse_date, '123456789 -02000') + self.failUnlessRaises(ValueError, parse_date, ' 123456789 -0200') + + def test_actor(self): + for cr in (None, self.rorepo.config_reader()): + assert isinstance(Actor.committer(cr), Actor) + assert isinstance(Actor.author(cr), Actor) + #END assure config reader is handled + + def test_iterable_list(self): + for args in (('name',), ('name', 'prefix_')): + l = IterableList('name') + + m1 = TestIterableMember('one') + m2 = TestIterableMember('two') + + l.extend((m1, m2)) + + assert len(l) == 2 + + # contains works with name and identity + assert m1.name in l + assert m2.name in l + assert m2 in l + assert m2 in l + assert 'invalid' not in l + + # with string index + assert l[m1.name] is m1 + assert l[m2.name] is m2 + + # with int index + assert l[0] is m1 + assert l[1] is m2 + + # with getattr + assert l.one is m1 + assert l.two is m2 + + # test exceptions + self.failUnlessRaises(AttributeError, getattr, l, 'something') + self.failUnlessRaises(IndexError, l.__getitem__, 'something') + + # delete by name and index + self.failUnlessRaises(IndexError, l.__delitem__, 'something') + del(l[m2.name]) + assert len(l) == 1 + assert m2.name not in l and m1.name in l + del(l[0]) + assert m1.name not in l + assert len(l) == 0 + + self.failUnlessRaises(IndexError, l.__delitem__, 0) + self.failUnlessRaises(IndexError, l.__delitem__, 'something') + #END for each possible mode diff --git a/git/util.py b/git/util.py index a9e87d6f..7c257b37 100644 --- a/git/util.py +++ b/git/util.py @@ -14,656 +14,656 @@ import tempfile import platform from gitdb.util import ( - make_sha, - LockedFD, - file_contents_ro, - LazyMixin, - to_hex_sha, - to_bin_sha - ) + make_sha, + LockedFD, + file_contents_ro, + LazyMixin, + to_hex_sha, + to_bin_sha + ) __all__ = ( "stream_copy", "join_path", "to_native_path_windows", "to_native_path_linux", - "join_path_native", "Stats", "IndexFileSHA1Writer", "Iterable", "IterableList", - "BlockingLockFile", "LockFile", 'Actor', 'get_user_id', 'assure_directory_exists', - 'RemoteProgress', 'rmtree') + "join_path_native", "Stats", "IndexFileSHA1Writer", "Iterable", "IterableList", + "BlockingLockFile", "LockFile", 'Actor', 'get_user_id', 'assure_directory_exists', + 'RemoteProgress', 'rmtree') #{ Utility Methods def rmtree(path): - """Remove the given recursively. - :note: we use shutil rmtree but adjust its behaviour to see whether files that - couldn't be deleted are read-only. Windows will not remove them in that case""" - def onerror(func, path, exc_info): - if not os.access(path, os.W_OK): - # Is the error an access error ? - os.chmod(path, stat.S_IWUSR) - func(path) - else: - raise - # END end onerror - return shutil.rmtree(path, False, onerror) - - + """Remove the given recursively. + :note: we use shutil rmtree but adjust its behaviour to see whether files that + couldn't be deleted are read-only. Windows will not remove them in that case""" + def onerror(func, path, exc_info): + if not os.access(path, os.W_OK): + # Is the error an access error ? + os.chmod(path, stat.S_IWUSR) + func(path) + else: + raise + # END end onerror + return shutil.rmtree(path, False, onerror) + + def stream_copy(source, destination, chunk_size=512*1024): - """Copy all data from the source stream into the destination stream in chunks - of size chunk_size - - :return: amount of bytes written""" - br = 0 - while True: - chunk = source.read(chunk_size) - destination.write(chunk) - br += len(chunk) - if len(chunk) < chunk_size: - break - # END reading output stream - return br + """Copy all data from the source stream into the destination stream in chunks + of size chunk_size + + :return: amount of bytes written""" + br = 0 + while True: + chunk = source.read(chunk_size) + destination.write(chunk) + br += len(chunk) + if len(chunk) < chunk_size: + break + # END reading output stream + return br def join_path(a, *p): - """Join path tokens together similar to os.path.join, but always use - '/' instead of possibly '\' on windows.""" - path = a - for b in p: - if len(b) == 0: - continue - if b.startswith('/'): - path += b[1:] - elif path == '' or path.endswith('/'): - path += b - else: - path += '/' + b - # END for each path token to add - return path - + """Join path tokens together similar to os.path.join, but always use + '/' instead of possibly '\' on windows.""" + path = a + for b in p: + if len(b) == 0: + continue + if b.startswith('/'): + path += b[1:] + elif path == '' or path.endswith('/'): + path += b + else: + path += '/' + b + # END for each path token to add + return path + def to_native_path_windows(path): - return path.replace('/','\\') - + return path.replace('/','\\') + def to_native_path_linux(path): - return path.replace('\\','/') + return path.replace('\\','/') if sys.platform.startswith('win'): - to_native_path = to_native_path_windows + to_native_path = to_native_path_windows else: - # no need for any work on linux - def to_native_path_linux(path): - return path - to_native_path = to_native_path_linux + # no need for any work on linux + def to_native_path_linux(path): + return path + to_native_path = to_native_path_linux def join_path_native(a, *p): - """ - As join path, but makes sure an OS native path is returned. This is only - needed to play it safe on my dear windows and to assure nice paths that only - use '\'""" - return to_native_path(join_path(a, *p)) - + """ + As join path, but makes sure an OS native path is returned. This is only + needed to play it safe on my dear windows and to assure nice paths that only + use '\'""" + return to_native_path(join_path(a, *p)) + def assure_directory_exists(path, is_file=False): - """Assure that the directory pointed to by path exists. - - :param is_file: If True, path is assumed to be a file and handled correctly. - Otherwise it must be a directory - :return: True if the directory was created, False if it already existed""" - if is_file: - path = os.path.dirname(path) - #END handle file - if not os.path.isdir(path): - os.makedirs(path) - return True - return False - + """Assure that the directory pointed to by path exists. + + :param is_file: If True, path is assumed to be a file and handled correctly. + Otherwise it must be a directory + :return: True if the directory was created, False if it already existed""" + if is_file: + path = os.path.dirname(path) + #END handle file + if not os.path.isdir(path): + os.makedirs(path) + return True + return False + def get_user_id(): - """:return: string identifying the currently active system user as name@node - :note: user can be set with the 'USER' environment variable, usually set on windows""" - ukn = 'UNKNOWN' - username = os.environ.get('USER', os.environ.get('USERNAME', ukn)) - if username == ukn and hasattr(os, 'getlogin'): - username = os.getlogin() - # END get username from login - return "%s@%s" % (username, platform.node()) + """:return: string identifying the currently active system user as name@node + :note: user can be set with the 'USER' environment variable, usually set on windows""" + ukn = 'UNKNOWN' + username = os.environ.get('USER', os.environ.get('USERNAME', ukn)) + if username == ukn and hasattr(os, 'getlogin'): + username = os.getlogin() + # END get username from login + return "%s@%s" % (username, platform.node()) #} END utilities #{ Classes class RemoteProgress(object): - """ - Handler providing an interface to parse progress information emitted by git-push - and git-fetch and to dispatch callbacks allowing subclasses to react to the progress. - """ - _num_op_codes = 7 - BEGIN, END, COUNTING, COMPRESSING, WRITING, RECEIVING, RESOLVING = [1 << x for x in range(_num_op_codes)] - STAGE_MASK = BEGIN|END - OP_MASK = ~STAGE_MASK - - __slots__ = ("_cur_line", "_seen_ops") - re_op_absolute = re.compile("(remote: )?([\w\s]+):\s+()(\d+)()(.*)") - re_op_relative = re.compile("(remote: )?([\w\s]+):\s+(\d+)% \((\d+)/(\d+)\)(.*)") - - def __init__(self): - self._seen_ops = list() - - def _parse_progress_line(self, line): - """Parse progress information from the given line as retrieved by git-push - or git-fetch - - :return: list(line, ...) list of lines that could not be processed""" - # handle - # Counting objects: 4, done. - # Compressing objects: 50% (1/2) \rCompressing objects: 100% (2/2) \rCompressing objects: 100% (2/2), done. - self._cur_line = line - sub_lines = line.split('\r') - failed_lines = list() - for sline in sub_lines: - # find esacpe characters and cut them away - regex will not work with - # them as they are non-ascii. As git might expect a tty, it will send them - last_valid_index = None - for i,c in enumerate(reversed(sline)): - if ord(c) < 32: - # its a slice index - last_valid_index = -i-1 - # END character was non-ascii - # END for each character in sline - if last_valid_index is not None: - sline = sline[:last_valid_index] - # END cut away invalid part - sline = sline.rstrip() - - cur_count, max_count = None, None - match = self.re_op_relative.match(sline) - if match is None: - match = self.re_op_absolute.match(sline) - - if not match: - self.line_dropped(sline) - failed_lines.append(sline) - continue - # END could not get match - - op_code = 0 - remote, op_name, percent, cur_count, max_count, message = match.groups() - - # get operation id - if op_name == "Counting objects": - op_code |= self.COUNTING - elif op_name == "Compressing objects": - op_code |= self.COMPRESSING - elif op_name == "Writing objects": - op_code |= self.WRITING - elif op_name == 'Receiving objects': - op_code |= self.RECEIVING - elif op_name == 'Resolving deltas': - op_code |= self.RESOLVING - else: - # Note: On windows it can happen that partial lines are sent - # Hence we get something like "CompreReceiving objects", which is - # a blend of "Compressing objects" and "Receiving objects". - # This can't really be prevented, so we drop the line verbosely - # to make sure we get informed in case the process spits out new - # commands at some point. - self.line_dropped(sline) - sys.stderr.write("Operation name %r unknown - skipping line '%s'" % (op_name, sline)) - # Note: Don't add this line to the failed lines, as we have to silently - # drop it - return failed_lines - # END handle op code - - # figure out stage - if op_code not in self._seen_ops: - self._seen_ops.append(op_code) - op_code |= self.BEGIN - # END begin opcode - - if message is None: - message = '' - # END message handling - - message = message.strip() - done_token = ', done.' - if message.endswith(done_token): - op_code |= self.END - message = message[:-len(done_token)] - # END end message handling - - self.update(op_code, cur_count, max_count, message) - # END for each sub line - return failed_lines - - def line_dropped(self, line): - """Called whenever a line could not be understood and was therefore dropped.""" - pass - - def update(self, op_code, cur_count, max_count=None, message=''): - """Called whenever the progress changes - - :param op_code: - Integer allowing to be compared against Operation IDs and stage IDs. - - Stage IDs are BEGIN and END. BEGIN will only be set once for each Operation - ID as well as END. It may be that BEGIN and END are set at once in case only - one progress message was emitted due to the speed of the operation. - Between BEGIN and END, none of these flags will be set - - Operation IDs are all held within the OP_MASK. Only one Operation ID will - be active per call. - :param cur_count: Current absolute count of items - - :param max_count: - The maximum count of items we expect. It may be None in case there is - no maximum number of items or if it is (yet) unknown. - - :param message: - In case of the 'WRITING' operation, it contains the amount of bytes - transferred. It may possibly be used for other purposes as well. - - You may read the contents of the current line in self._cur_line""" - pass + """ + Handler providing an interface to parse progress information emitted by git-push + and git-fetch and to dispatch callbacks allowing subclasses to react to the progress. + """ + _num_op_codes = 7 + BEGIN, END, COUNTING, COMPRESSING, WRITING, RECEIVING, RESOLVING = [1 << x for x in range(_num_op_codes)] + STAGE_MASK = BEGIN|END + OP_MASK = ~STAGE_MASK + + __slots__ = ("_cur_line", "_seen_ops") + re_op_absolute = re.compile("(remote: )?([\w\s]+):\s+()(\d+)()(.*)") + re_op_relative = re.compile("(remote: )?([\w\s]+):\s+(\d+)% \((\d+)/(\d+)\)(.*)") + + def __init__(self): + self._seen_ops = list() + + def _parse_progress_line(self, line): + """Parse progress information from the given line as retrieved by git-push + or git-fetch + + :return: list(line, ...) list of lines that could not be processed""" + # handle + # Counting objects: 4, done. + # Compressing objects: 50% (1/2) \rCompressing objects: 100% (2/2) \rCompressing objects: 100% (2/2), done. + self._cur_line = line + sub_lines = line.split('\r') + failed_lines = list() + for sline in sub_lines: + # find esacpe characters and cut them away - regex will not work with + # them as they are non-ascii. As git might expect a tty, it will send them + last_valid_index = None + for i,c in enumerate(reversed(sline)): + if ord(c) < 32: + # its a slice index + last_valid_index = -i-1 + # END character was non-ascii + # END for each character in sline + if last_valid_index is not None: + sline = sline[:last_valid_index] + # END cut away invalid part + sline = sline.rstrip() + + cur_count, max_count = None, None + match = self.re_op_relative.match(sline) + if match is None: + match = self.re_op_absolute.match(sline) + + if not match: + self.line_dropped(sline) + failed_lines.append(sline) + continue + # END could not get match + + op_code = 0 + remote, op_name, percent, cur_count, max_count, message = match.groups() + + # get operation id + if op_name == "Counting objects": + op_code |= self.COUNTING + elif op_name == "Compressing objects": + op_code |= self.COMPRESSING + elif op_name == "Writing objects": + op_code |= self.WRITING + elif op_name == 'Receiving objects': + op_code |= self.RECEIVING + elif op_name == 'Resolving deltas': + op_code |= self.RESOLVING + else: + # Note: On windows it can happen that partial lines are sent + # Hence we get something like "CompreReceiving objects", which is + # a blend of "Compressing objects" and "Receiving objects". + # This can't really be prevented, so we drop the line verbosely + # to make sure we get informed in case the process spits out new + # commands at some point. + self.line_dropped(sline) + sys.stderr.write("Operation name %r unknown - skipping line '%s'" % (op_name, sline)) + # Note: Don't add this line to the failed lines, as we have to silently + # drop it + return failed_lines + # END handle op code + + # figure out stage + if op_code not in self._seen_ops: + self._seen_ops.append(op_code) + op_code |= self.BEGIN + # END begin opcode + + if message is None: + message = '' + # END message handling + + message = message.strip() + done_token = ', done.' + if message.endswith(done_token): + op_code |= self.END + message = message[:-len(done_token)] + # END end message handling + + self.update(op_code, cur_count, max_count, message) + # END for each sub line + return failed_lines + + def line_dropped(self, line): + """Called whenever a line could not be understood and was therefore dropped.""" + pass + + def update(self, op_code, cur_count, max_count=None, message=''): + """Called whenever the progress changes + + :param op_code: + Integer allowing to be compared against Operation IDs and stage IDs. + + Stage IDs are BEGIN and END. BEGIN will only be set once for each Operation + ID as well as END. It may be that BEGIN and END are set at once in case only + one progress message was emitted due to the speed of the operation. + Between BEGIN and END, none of these flags will be set + + Operation IDs are all held within the OP_MASK. Only one Operation ID will + be active per call. + :param cur_count: Current absolute count of items + + :param max_count: + The maximum count of items we expect. It may be None in case there is + no maximum number of items or if it is (yet) unknown. + + :param message: + In case of the 'WRITING' operation, it contains the amount of bytes + transferred. It may possibly be used for other purposes as well. + + You may read the contents of the current line in self._cur_line""" + pass class Actor(object): - """Actors hold information about a person acting on the repository. They - can be committers and authors or anything with a name and an email as - mentioned in the git log entries.""" - # PRECOMPILED REGEX - name_only_regex = re.compile( r'<(.+)>' ) - name_email_regex = re.compile( r'(.*) <(.+?)>' ) - - # ENVIRONMENT VARIABLES - # read when creating new commits - env_author_name = "GIT_AUTHOR_NAME" - env_author_email = "GIT_AUTHOR_EMAIL" - env_committer_name = "GIT_COMMITTER_NAME" - env_committer_email = "GIT_COMMITTER_EMAIL" - - # CONFIGURATION KEYS - conf_name = 'name' - conf_email = 'email' - - __slots__ = ('name', 'email') - - def __init__(self, name, email): - self.name = name - self.email = email - - def __eq__(self, other): - return self.name == other.name and self.email == other.email - - def __ne__(self, other): - return not (self == other) - - def __hash__(self): - return hash((self.name, self.email)) - - def __str__(self): - return self.name - - def __repr__(self): - return '<git.Actor "%s <%s>">' % (self.name, self.email) - - @classmethod - def _from_string(cls, string): - """Create an Actor from a string. - :param string: is the string, which is expected to be in regular git format - - John Doe <jdoe@example.com> - - :return: Actor """ - m = cls.name_email_regex.search(string) - if m: - name, email = m.groups() - return Actor(name, email) - else: - m = cls.name_only_regex.search(string) - if m: - return Actor(m.group(1), None) - else: - # assume best and use the whole string as name - return Actor(string, None) - # END special case name - # END handle name/email matching - - @classmethod - def _main_actor(cls, env_name, env_email, config_reader=None): - actor = Actor('', '') - default_email = get_user_id() - default_name = default_email.split('@')[0] - - for attr, evar, cvar, default in (('name', env_name, cls.conf_name, default_name), - ('email', env_email, cls.conf_email, default_email)): - try: - setattr(actor, attr, os.environ[evar]) - except KeyError: - if config_reader is not None: - setattr(actor, attr, config_reader.get_value('user', cvar, default)) - #END config-reader handling - if not getattr(actor, attr): - setattr(actor, attr, default) - #END handle name - #END for each item to retrieve - return actor - - - @classmethod - def committer(cls, config_reader=None): - """ - :return: Actor instance corresponding to the configured committer. It behaves - similar to the git implementation, such that the environment will override - configuration values of config_reader. If no value is set at all, it will be - generated - :param config_reader: ConfigReader to use to retrieve the values from in case - they are not set in the environment""" - return cls._main_actor(cls.env_committer_name, cls.env_committer_email, config_reader) - - @classmethod - def author(cls, config_reader=None): - """Same as committer(), but defines the main author. It may be specified in the environment, - but defaults to the committer""" - return cls._main_actor(cls.env_author_name, cls.env_author_email, config_reader) - + """Actors hold information about a person acting on the repository. They + can be committers and authors or anything with a name and an email as + mentioned in the git log entries.""" + # PRECOMPILED REGEX + name_only_regex = re.compile( r'<(.+)>' ) + name_email_regex = re.compile( r'(.*) <(.+?)>' ) + + # ENVIRONMENT VARIABLES + # read when creating new commits + env_author_name = "GIT_AUTHOR_NAME" + env_author_email = "GIT_AUTHOR_EMAIL" + env_committer_name = "GIT_COMMITTER_NAME" + env_committer_email = "GIT_COMMITTER_EMAIL" + + # CONFIGURATION KEYS + conf_name = 'name' + conf_email = 'email' + + __slots__ = ('name', 'email') + + def __init__(self, name, email): + self.name = name + self.email = email + + def __eq__(self, other): + return self.name == other.name and self.email == other.email + + def __ne__(self, other): + return not (self == other) + + def __hash__(self): + return hash((self.name, self.email)) + + def __str__(self): + return self.name + + def __repr__(self): + return '<git.Actor "%s <%s>">' % (self.name, self.email) + + @classmethod + def _from_string(cls, string): + """Create an Actor from a string. + :param string: is the string, which is expected to be in regular git format + + John Doe <jdoe@example.com> + + :return: Actor """ + m = cls.name_email_regex.search(string) + if m: + name, email = m.groups() + return Actor(name, email) + else: + m = cls.name_only_regex.search(string) + if m: + return Actor(m.group(1), None) + else: + # assume best and use the whole string as name + return Actor(string, None) + # END special case name + # END handle name/email matching + + @classmethod + def _main_actor(cls, env_name, env_email, config_reader=None): + actor = Actor('', '') + default_email = get_user_id() + default_name = default_email.split('@')[0] + + for attr, evar, cvar, default in (('name', env_name, cls.conf_name, default_name), + ('email', env_email, cls.conf_email, default_email)): + try: + setattr(actor, attr, os.environ[evar]) + except KeyError: + if config_reader is not None: + setattr(actor, attr, config_reader.get_value('user', cvar, default)) + #END config-reader handling + if not getattr(actor, attr): + setattr(actor, attr, default) + #END handle name + #END for each item to retrieve + return actor + + + @classmethod + def committer(cls, config_reader=None): + """ + :return: Actor instance corresponding to the configured committer. It behaves + similar to the git implementation, such that the environment will override + configuration values of config_reader. If no value is set at all, it will be + generated + :param config_reader: ConfigReader to use to retrieve the values from in case + they are not set in the environment""" + return cls._main_actor(cls.env_committer_name, cls.env_committer_email, config_reader) + + @classmethod + def author(cls, config_reader=None): + """Same as committer(), but defines the main author. It may be specified in the environment, + but defaults to the committer""" + return cls._main_actor(cls.env_author_name, cls.env_author_email, config_reader) + class Stats(object): - """ - Represents stat information as presented by git at the end of a merge. It is - created from the output of a diff operation. - - ``Example``:: - - c = Commit( sha1 ) - s = c.stats - s.total # full-stat-dict - s.files # dict( filepath : stat-dict ) - - ``stat-dict`` - - A dictionary with the following keys and values:: - - deletions = number of deleted lines as int - insertions = number of inserted lines as int - lines = total number of lines changed as int, or deletions + insertions - - ``full-stat-dict`` - - In addition to the items in the stat-dict, it features additional information:: - - files = number of changed files as int""" - __slots__ = ("total", "files") - - def __init__(self, total, files): - self.total = total - self.files = files - - @classmethod - def _list_from_string(cls, repo, text): - """Create a Stat object from output retrieved by git-diff. - - :return: git.Stat""" - hsh = {'total': {'insertions': 0, 'deletions': 0, 'lines': 0, 'files': 0}, 'files': dict()} - for line in text.splitlines(): - (raw_insertions, raw_deletions, filename) = line.split("\t") - insertions = raw_insertions != '-' and int(raw_insertions) or 0 - deletions = raw_deletions != '-' and int(raw_deletions) or 0 - hsh['total']['insertions'] += insertions - hsh['total']['deletions'] += deletions - hsh['total']['lines'] += insertions + deletions - hsh['total']['files'] += 1 - hsh['files'][filename.strip()] = {'insertions': insertions, - 'deletions': deletions, - 'lines': insertions + deletions} - return Stats(hsh['total'], hsh['files']) + """ + Represents stat information as presented by git at the end of a merge. It is + created from the output of a diff operation. + + ``Example``:: + + c = Commit( sha1 ) + s = c.stats + s.total # full-stat-dict + s.files # dict( filepath : stat-dict ) + + ``stat-dict`` + + A dictionary with the following keys and values:: + + deletions = number of deleted lines as int + insertions = number of inserted lines as int + lines = total number of lines changed as int, or deletions + insertions + + ``full-stat-dict`` + + In addition to the items in the stat-dict, it features additional information:: + + files = number of changed files as int""" + __slots__ = ("total", "files") + + def __init__(self, total, files): + self.total = total + self.files = files + + @classmethod + def _list_from_string(cls, repo, text): + """Create a Stat object from output retrieved by git-diff. + + :return: git.Stat""" + hsh = {'total': {'insertions': 0, 'deletions': 0, 'lines': 0, 'files': 0}, 'files': dict()} + for line in text.splitlines(): + (raw_insertions, raw_deletions, filename) = line.split("\t") + insertions = raw_insertions != '-' and int(raw_insertions) or 0 + deletions = raw_deletions != '-' and int(raw_deletions) or 0 + hsh['total']['insertions'] += insertions + hsh['total']['deletions'] += deletions + hsh['total']['lines'] += insertions + deletions + hsh['total']['files'] += 1 + hsh['files'][filename.strip()] = {'insertions': insertions, + 'deletions': deletions, + 'lines': insertions + deletions} + return Stats(hsh['total'], hsh['files']) class IndexFileSHA1Writer(object): - """Wrapper around a file-like object that remembers the SHA1 of - the data written to it. It will write a sha when the stream is closed - or if the asked for explicitly usign write_sha. - - Only useful to the indexfile - - :note: Based on the dulwich project""" - __slots__ = ("f", "sha1") - - def __init__(self, f): - self.f = f - self.sha1 = make_sha("") - - def write(self, data): - self.sha1.update(data) - return self.f.write(data) - - def write_sha(self): - sha = self.sha1.digest() - self.f.write(sha) - return sha - - def close(self): - sha = self.write_sha() - self.f.close() - return sha - - def tell(self): - return self.f.tell() + """Wrapper around a file-like object that remembers the SHA1 of + the data written to it. It will write a sha when the stream is closed + or if the asked for explicitly usign write_sha. + + Only useful to the indexfile + + :note: Based on the dulwich project""" + __slots__ = ("f", "sha1") + + def __init__(self, f): + self.f = f + self.sha1 = make_sha("") + + def write(self, data): + self.sha1.update(data) + return self.f.write(data) + + def write_sha(self): + sha = self.sha1.digest() + self.f.write(sha) + return sha + + def close(self): + sha = self.write_sha() + self.f.close() + return sha + + def tell(self): + return self.f.tell() class LockFile(object): - """Provides methods to obtain, check for, and release a file based lock which - should be used to handle concurrent access to the same file. - - As we are a utility class to be derived from, we only use protected methods. - - Locks will automatically be released on destruction""" - __slots__ = ("_file_path", "_owns_lock") - - def __init__(self, file_path): - self._file_path = file_path - self._owns_lock = False - - def __del__(self): - self._release_lock() - - def _lock_file_path(self): - """:return: Path to lockfile""" - return "%s.lock" % (self._file_path) - - def _has_lock(self): - """:return: True if we have a lock and if the lockfile still exists - :raise AssertionError: if our lock-file does not exist""" - if not self._owns_lock: - return False - - return True - - def _obtain_lock_or_raise(self): - """Create a lock file as flag for other instances, mark our instance as lock-holder - - :raise IOError: if a lock was already present or a lock file could not be written""" - if self._has_lock(): - return - lock_file = self._lock_file_path() - if os.path.isfile(lock_file): - raise IOError("Lock for file %r did already exist, delete %r in case the lock is illegal" % (self._file_path, lock_file)) - - try: - fd = os.open(lock_file, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0) - os.close(fd) - except OSError,e: - raise IOError(str(e)) - - self._owns_lock = True - - def _obtain_lock(self): - """The default implementation will raise if a lock cannot be obtained. - Subclasses may override this method to provide a different implementation""" - return self._obtain_lock_or_raise() - - def _release_lock(self): - """Release our lock if we have one""" - if not self._has_lock(): - return - - # if someone removed our file beforhand, lets just flag this issue - # instead of failing, to make it more usable. - lfp = self._lock_file_path() - try: - # on bloody windows, the file needs write permissions to be removable. - # Why ... - if os.name == 'nt': - os.chmod(lfp, 0777) - # END handle win32 - os.remove(lfp) - except OSError: - pass - self._owns_lock = False + """Provides methods to obtain, check for, and release a file based lock which + should be used to handle concurrent access to the same file. + + As we are a utility class to be derived from, we only use protected methods. + + Locks will automatically be released on destruction""" + __slots__ = ("_file_path", "_owns_lock") + + def __init__(self, file_path): + self._file_path = file_path + self._owns_lock = False + + def __del__(self): + self._release_lock() + + def _lock_file_path(self): + """:return: Path to lockfile""" + return "%s.lock" % (self._file_path) + + def _has_lock(self): + """:return: True if we have a lock and if the lockfile still exists + :raise AssertionError: if our lock-file does not exist""" + if not self._owns_lock: + return False + + return True + + def _obtain_lock_or_raise(self): + """Create a lock file as flag for other instances, mark our instance as lock-holder + + :raise IOError: if a lock was already present or a lock file could not be written""" + if self._has_lock(): + return + lock_file = self._lock_file_path() + if os.path.isfile(lock_file): + raise IOError("Lock for file %r did already exist, delete %r in case the lock is illegal" % (self._file_path, lock_file)) + + try: + fd = os.open(lock_file, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0) + os.close(fd) + except OSError,e: + raise IOError(str(e)) + + self._owns_lock = True + + def _obtain_lock(self): + """The default implementation will raise if a lock cannot be obtained. + Subclasses may override this method to provide a different implementation""" + return self._obtain_lock_or_raise() + + def _release_lock(self): + """Release our lock if we have one""" + if not self._has_lock(): + return + + # if someone removed our file beforhand, lets just flag this issue + # instead of failing, to make it more usable. + lfp = self._lock_file_path() + try: + # on bloody windows, the file needs write permissions to be removable. + # Why ... + if os.name == 'nt': + os.chmod(lfp, 0777) + # END handle win32 + os.remove(lfp) + except OSError: + pass + self._owns_lock = False class BlockingLockFile(LockFile): - """The lock file will block until a lock could be obtained, or fail after - a specified timeout. - - :note: If the directory containing the lock was removed, an exception will - be raised during the blocking period, preventing hangs as the lock - can never be obtained.""" - __slots__ = ("_check_interval", "_max_block_time") - def __init__(self, file_path, check_interval_s=0.3, max_block_time_s=sys.maxint): - """Configure the instance - - :parm check_interval_s: - Period of time to sleep until the lock is checked the next time. - By default, it waits a nearly unlimited time - - :parm max_block_time_s: Maximum amount of seconds we may lock""" - super(BlockingLockFile, self).__init__(file_path) - self._check_interval = check_interval_s - self._max_block_time = max_block_time_s - - def _obtain_lock(self): - """This method blocks until it obtained the lock, or raises IOError if - it ran out of time or if the parent directory was not available anymore. - If this method returns, you are guranteed to own the lock""" - starttime = time.time() - maxtime = starttime + float(self._max_block_time) - while True: - try: - super(BlockingLockFile, self)._obtain_lock() - except IOError: - # synity check: if the directory leading to the lockfile is not - # readable anymore, raise an execption - curtime = time.time() - if not os.path.isdir(os.path.dirname(self._lock_file_path())): - msg = "Directory containing the lockfile %r was not readable anymore after waiting %g seconds" % (self._lock_file_path(), curtime - starttime) - raise IOError(msg) - # END handle missing directory - - if curtime >= maxtime: - msg = "Waited %g seconds for lock at %r" % ( maxtime - starttime, self._lock_file_path()) - raise IOError(msg) - # END abort if we wait too long - time.sleep(self._check_interval) - else: - break - # END endless loop - + """The lock file will block until a lock could be obtained, or fail after + a specified timeout. + + :note: If the directory containing the lock was removed, an exception will + be raised during the blocking period, preventing hangs as the lock + can never be obtained.""" + __slots__ = ("_check_interval", "_max_block_time") + def __init__(self, file_path, check_interval_s=0.3, max_block_time_s=sys.maxint): + """Configure the instance + + :parm check_interval_s: + Period of time to sleep until the lock is checked the next time. + By default, it waits a nearly unlimited time + + :parm max_block_time_s: Maximum amount of seconds we may lock""" + super(BlockingLockFile, self).__init__(file_path) + self._check_interval = check_interval_s + self._max_block_time = max_block_time_s + + def _obtain_lock(self): + """This method blocks until it obtained the lock, or raises IOError if + it ran out of time or if the parent directory was not available anymore. + If this method returns, you are guranteed to own the lock""" + starttime = time.time() + maxtime = starttime + float(self._max_block_time) + while True: + try: + super(BlockingLockFile, self)._obtain_lock() + except IOError: + # synity check: if the directory leading to the lockfile is not + # readable anymore, raise an execption + curtime = time.time() + if not os.path.isdir(os.path.dirname(self._lock_file_path())): + msg = "Directory containing the lockfile %r was not readable anymore after waiting %g seconds" % (self._lock_file_path(), curtime - starttime) + raise IOError(msg) + # END handle missing directory + + if curtime >= maxtime: + msg = "Waited %g seconds for lock at %r" % ( maxtime - starttime, self._lock_file_path()) + raise IOError(msg) + # END abort if we wait too long + time.sleep(self._check_interval) + else: + break + # END endless loop + class IterableList(list): - """ - List of iterable objects allowing to query an object by id or by named index:: - - heads = repo.heads - heads.master - heads['master'] - heads[0] - - It requires an id_attribute name to be set which will be queried from its - contained items to have a means for comparison. - - A prefix can be specified which is to be used in case the id returned by the - items always contains a prefix that does not matter to the user, so it - can be left out.""" - __slots__ = ('_id_attr', '_prefix') - - def __new__(cls, id_attr, prefix=''): - return super(IterableList,cls).__new__(cls) - - def __init__(self, id_attr, prefix=''): - self._id_attr = id_attr - self._prefix = prefix - if not isinstance(id_attr, basestring): - raise ValueError("First parameter must be a string identifying the name-property. Extend the list after initialization") - # END help debugging ! - - def __contains__(self, attr): - # first try identy match for performance - rval = list.__contains__(self, attr) - if rval: - return rval - #END handle match - - # otherwise make a full name search - try: - getattr(self, attr) - return True - except (AttributeError, TypeError): - return False - #END handle membership - - def __getattr__(self, attr): - attr = self._prefix + attr - for item in self: - if getattr(item, self._id_attr) == attr: - return item - # END for each item - return list.__getattribute__(self, attr) - - def __getitem__(self, index): - if isinstance(index, int): - return list.__getitem__(self,index) - - try: - return getattr(self, index) - except AttributeError: - raise IndexError( "No item found with id %r" % (self._prefix + index) ) - # END handle getattr - - def __delitem__(self, index): - delindex = index - if not isinstance(index, int): - delindex = -1 - name = self._prefix + index - for i, item in enumerate(self): - if getattr(item, self._id_attr) == name: - delindex = i - break - #END search index - #END for each item - if delindex == -1: - raise IndexError("Item with name %s not found" % name) - #END handle error - #END get index to delete - list.__delitem__(self, delindex) - + """ + List of iterable objects allowing to query an object by id or by named index:: + + heads = repo.heads + heads.master + heads['master'] + heads[0] + + It requires an id_attribute name to be set which will be queried from its + contained items to have a means for comparison. + + A prefix can be specified which is to be used in case the id returned by the + items always contains a prefix that does not matter to the user, so it + can be left out.""" + __slots__ = ('_id_attr', '_prefix') + + def __new__(cls, id_attr, prefix=''): + return super(IterableList,cls).__new__(cls) + + def __init__(self, id_attr, prefix=''): + self._id_attr = id_attr + self._prefix = prefix + if not isinstance(id_attr, basestring): + raise ValueError("First parameter must be a string identifying the name-property. Extend the list after initialization") + # END help debugging ! + + def __contains__(self, attr): + # first try identy match for performance + rval = list.__contains__(self, attr) + if rval: + return rval + #END handle match + + # otherwise make a full name search + try: + getattr(self, attr) + return True + except (AttributeError, TypeError): + return False + #END handle membership + + def __getattr__(self, attr): + attr = self._prefix + attr + for item in self: + if getattr(item, self._id_attr) == attr: + return item + # END for each item + return list.__getattribute__(self, attr) + + def __getitem__(self, index): + if isinstance(index, int): + return list.__getitem__(self,index) + + try: + return getattr(self, index) + except AttributeError: + raise IndexError( "No item found with id %r" % (self._prefix + index) ) + # END handle getattr + + def __delitem__(self, index): + delindex = index + if not isinstance(index, int): + delindex = -1 + name = self._prefix + index + for i, item in enumerate(self): + if getattr(item, self._id_attr) == name: + delindex = i + break + #END search index + #END for each item + if delindex == -1: + raise IndexError("Item with name %s not found" % name) + #END handle error + #END get index to delete + list.__delitem__(self, delindex) + class Iterable(object): - """Defines an interface for iterable items which is to assure a uniform - way to retrieve and iterate items within the git repository""" - __slots__ = tuple() - _id_attribute_ = "attribute that most suitably identifies your instance" - - @classmethod - def list_items(cls, repo, *args, **kwargs): - """ - Find all items of this type - subclasses can specify args and kwargs differently. - If no args are given, subclasses are obliged to return all items if no additional - arguments arg given. - - :note: Favor the iter_items method as it will - - :return:list(Item,...) list of item instances""" - out_list = IterableList( cls._id_attribute_ ) - out_list.extend(cls.iter_items(repo, *args, **kwargs)) - return out_list - - - @classmethod - def iter_items(cls, repo, *args, **kwargs): - """For more information about the arguments, see list_items - :return: iterator yielding Items""" - raise NotImplementedError("To be implemented by Subclass") - + """Defines an interface for iterable items which is to assure a uniform + way to retrieve and iterate items within the git repository""" + __slots__ = tuple() + _id_attribute_ = "attribute that most suitably identifies your instance" + + @classmethod + def list_items(cls, repo, *args, **kwargs): + """ + Find all items of this type - subclasses can specify args and kwargs differently. + If no args are given, subclasses are obliged to return all items if no additional + arguments arg given. + + :note: Favor the iter_items method as it will + + :return:list(Item,...) list of item instances""" + out_list = IterableList( cls._id_attribute_ ) + out_list.extend(cls.iter_items(repo, *args, **kwargs)) + return out_list + + + @classmethod + def iter_items(cls, repo, *args, **kwargs): + """For more information about the arguments, see list_items + :return: iterator yielding Items""" + raise NotImplementedError("To be implemented by Subclass") + #} END classes @@ -1,10 +1,10 @@ #!/usr/bin/env python try: - from setuptools import setup, find_packages + from setuptools import setup, find_packages except ImportError: - from ez_setup import use_setuptools - use_setuptools() - from setuptools import setup, find_packages + from ez_setup import use_setuptools + use_setuptools() + from setuptools import setup, find_packages from distutils.command.build_py import build_py as _build_py from setuptools.command.sdist import sdist as _sdist @@ -18,74 +18,74 @@ v.close() class build_py(_build_py): - def run(self): - init = path.join(self.build_lib, 'git', '__init__.py') - if path.exists(init): - os.unlink(init) - _build_py.run(self) - _stamp_version(init) - self.byte_compile([init]) + def run(self): + init = path.join(self.build_lib, 'git', '__init__.py') + if path.exists(init): + os.unlink(init) + _build_py.run(self) + _stamp_version(init) + self.byte_compile([init]) class sdist(_sdist): - def make_release_tree (self, base_dir, files): - _sdist.make_release_tree(self, base_dir, files) - orig = path.join('git', '__init__.py') - assert path.exists(orig), orig - dest = path.join(base_dir, orig) - if hasattr(os, 'link') and path.exists(dest): - os.unlink(dest) - self.copy_file(orig, dest) - _stamp_version(dest) + def make_release_tree (self, base_dir, files): + _sdist.make_release_tree(self, base_dir, files) + orig = path.join('git', '__init__.py') + assert path.exists(orig), orig + dest = path.join(base_dir, orig) + if hasattr(os, 'link') and path.exists(dest): + os.unlink(dest) + self.copy_file(orig, dest) + _stamp_version(dest) def _stamp_version(filename): - found, out = False, list() - try: - f = open(filename, 'r') - except (IOError, OSError): - print >> sys.stderr, "Couldn't find file %s to stamp version" % filename - return - #END handle error, usually happens during binary builds - for line in f: - if '__version__ =' in line: - line = line.replace("'git'", "'%s'" % VERSION) - found = True - out.append(line) - f.close() + found, out = False, list() + try: + f = open(filename, 'r') + except (IOError, OSError): + print >> sys.stderr, "Couldn't find file %s to stamp version" % filename + return + #END handle error, usually happens during binary builds + for line in f: + if '__version__ =' in line: + line = line.replace("'git'", "'%s'" % VERSION) + found = True + out.append(line) + f.close() - if found: - f = open(filename, 'w') - f.writelines(out) - f.close() - else: - print >> sys.stderr, "WARNING: Couldn't find version line in file %s" % filename + if found: + f = open(filename, 'w') + f.writelines(out) + f.close() + else: + print >> sys.stderr, "WARNING: Couldn't find version line in file %s" % filename setup(name = "GitPython", - cmdclass={'build_py': build_py, 'sdist': sdist}, - version = VERSION, - description = "Python Git Library", - author = "Sebastian Thiel, Michael Trier", - author_email = "byronimo@gmail.com, mtrier@gmail.com", - url = "http://gitorious.org/projects/git-python/", - packages = find_packages('.'), - py_modules = ['git.'+f[:-3] for f in os.listdir('./git') if f.endswith('.py')], - package_data = {'git.test' : ['fixtures/*']}, - package_dir = {'git':'git'}, - license = "BSD License", - requires=('gitdb (>=0.5.1)',), - install_requires='gitdb >= 0.5.1', - zip_safe=False, - long_description = """\ + cmdclass={'build_py': build_py, 'sdist': sdist}, + version = VERSION, + description = "Python Git Library", + author = "Sebastian Thiel, Michael Trier", + author_email = "byronimo@gmail.com, mtrier@gmail.com", + url = "http://gitorious.org/projects/git-python/", + packages = find_packages('.'), + py_modules = ['git.'+f[:-3] for f in os.listdir('./git') if f.endswith('.py')], + package_data = {'git.test' : ['fixtures/*']}, + package_dir = {'git':'git'}, + license = "BSD License", + requires=('gitdb (>=0.5.1)',), + install_requires='gitdb >= 0.5.1', + zip_safe=False, + long_description = """\ GitPython is a python library used to interact with Git repositories""", - classifiers = [ - "Development Status :: 4 - Beta", - "Intended Audience :: Developers", - "License :: OSI Approved :: BSD License", - "Operating System :: OS Independent", - "Programming Language :: Python", - "Programming Language :: Python :: 2.5", - "Programming Language :: Python :: 2.6", - "Topic :: Software Development :: Libraries :: Python Modules", - ] - ) + classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: BSD License", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 2.5", + "Programming Language :: Python :: 2.6", + "Topic :: Software Development :: Libraries :: Python Modules", + ] + ) |