summaryrefslogtreecommitdiff
path: root/morphlib/gitindex.py
blob: 00098fbfa299b27034969b900a5b9ab7a8165803 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# Copyright (C) 2013-2015  Codethink Limited
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# =*= License: GPL-2 =*=


import collections
import os

import morphlib


STATUS_UNTRACKED = '??'
STATUS_IGNORED = '!!'


class GitIndex(object):
    '''An object that represents operations on the working tree.

    Index objects can be constructed with a different path to the
    index file, which can be used to construct commits without
    altering the working tree, index or HEAD.

    The file must either be a previously initialised index, or a
    non-existant file.

    Git creates a lock file and atomically alters the index by
    renaming a temporary file into place, so `index_file` must be
    in a writable directory.

    '''

    def __init__(self, gd, index_file):
        self._gd = gd
        self._index_file = index_file

    def _run_git(self, *args, **kwargs):
        if self._index_file is not None:
            extra_env = kwargs.get('extra_env', {})
            extra_env['GIT_INDEX_FILE'] = self._index_file
            kwargs['extra_env'] = extra_env

        if 'extra_env' in kwargs:
            env = kwargs.get('env', dict(os.environ))
            env.update(kwargs['extra_env'])
            kwargs['env'] = env
            del kwargs['extra_env']

        return morphlib.git.gitcmd(self._gd._runcmd, *args, **kwargs)

    def _get_status(self):
        '''Return git status output in a Python useful format

        This runs git status such that unusual filenames are preserved
        and returns its output in a sequence of (status_code, to_path,
        from_path).

        from_path is None unless the status_code says there was a
        rename, in which case it is the path it was renamed from.

        Untracked and ignored changes are also included in the output,
        their status codes are '??' and '!!' respectively.

        '''

        # git status -z will NUL terminate paths, so we don't have to
        # unescape the paths it outputs. Unfortunately each status entry
        # can have 1 or 2 paths, so extra parsing is required.
        # To handle this, we split it into NUL delimited tokens.
        # The first token of an entry is the 2 character status code,
        # a space, then the path.
        # If our status code starts with R then it's a rename, hence
        # has a second path, requiring us to pop an extra token.
        status = self._run_git('status', '-z', '--ignored')
        tokens = collections.deque(status.split('\0'))
        while True:
            tok = tokens.popleft()
            # Status output is NUL terminated rather than delimited,
            # and split is for delimited output. A side effect of this is
            # that we get an empty token as the last output. This suits
            # us fine, as it gives us a sentinel value to terminate with.
            if not tok:
                return

            # The first token of an entry is 2 character status, a space,
            # then the path
            code = tok[:2]
            to_path = tok[3:]

            # If the code starts with R then it's a rename, and
            # the next token says where the file was renamed from
            from_path = tokens.popleft() if code[0] == 'R' else None
            yield code, to_path, from_path

    def get_uncommitted_changes(self):
        for code, to_path, from_path in self._get_status():
            if (code not in (STATUS_UNTRACKED, STATUS_IGNORED)
            or  code == (STATUS_UNTRACKED) and to_path.endswith('.morph')):
                yield code, to_path, from_path

    def set_to_tree(self, treeish):
        '''Modify the index to contain the contents of the treeish.'''
        self._run_git('read-tree', treeish)

    def add_files_from_index_info(self, infos):
        '''Add files without interacting with the working tree.

        `infos` is an iterable of (file mode string, object sha1, path)
        There are no constraints on the size of the iterable

        '''

        # update-index may take NUL terminated input lines of the entries
        # to add so we generate a string for the input, rather than
        # having many command line arguments, since for a large amount
        # of entries, this can be too many arguments to process and the
        # exec will fail.
        # Generating the input as a string uses more memory than using
        # subprocess.Popen directly and using .communicate, but is much
        # less verbose.
        feed_stdin = '\0'.join('%o %s\t%s' % (mode, sha1, path)
                               for mode, sha1, path in infos) + '\0'
        self._run_git('update-index', '--add', '-z', '--index-info',
                      feed_stdin=feed_stdin)

    def add_files_from_working_tree(self, paths):
        '''Add existing files to the index.

        Given an iterable of paths to files in the working tree,
        relative to the git repository's top-level directory,
        add the contents of the files to git's object store,
        and the index.

        This is similar to the following:

            gd = GitDirectory(...)
            idx = gd.get_index()
            for path in paths:
                fullpath = os.path.join(gd,dirname, path)
                with open(fullpath, 'r') as f:
                    sha1 = gd.store_blob(f)
                idx.add_files_from_index_info([(os.stat(fullpath).st_mode,
                                                sha1, path)])

        '''

        if self._gd.is_bare():
            raise morphlib.gitdir.NoWorkingTreeError(self._gd)
        # Handle paths in smaller chunks, so that the runcmd
        # cannot fail from exceeding command line length
        # 50 is an arbitrary limit
        for paths in morphlib.util.iter_trickle(paths, 50):
            self._run_git('add', *paths)

    def write_tree(self):
        '''Transform the index into a tree in the object store.'''
        return self._run_git('write-tree').strip()

    def checkout(self, working_tree=None):
        '''Copy files from the index to the working tree.'''
        if working_tree:
            extra_env = {'GIT_WORK_TREE': working_tree}
        else:
            extra_env = {}
        self._run_git('checkout-index', '--all', extra_env=extra_env)