summaryrefslogtreecommitdiff
path: root/morphlib/plugins/system_manifests_plugin.py
blob: 7fe33102b35c024ab4c98f80d09795ff2531003d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
# Copyright (C) 2015-2016  Codethink Limited
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

import csv
import glob
import inspect
import json
import os
import shutil
import tempfile
import warnings

import cliapp

import morphlib

class SystemManifestsPlugin(cliapp.Plugin):

    def enable(self):
        self.app.add_subcommand(
            'generate-manifest-csv', self.manifests,
            arg_synopsis='REPO REF MORPH [MORPH]...')
        self.app.settings.choice(['check-license'],
                                 ['all-files', 'single-file'],
                                 'indicates whether just a license file '
                                 'should be looked at, or if all files '
                                 'in each chunk should be looked at for '
                                 'licensing information. Takes '
                                 '`single-file` or `all-files` for the '
                                 'two methods, respectively. Defaults to '
                                 'all-files, although this is much slower.',
                                 group='manifest options')

    def disable(self):
        pass

    def manifests(self, args):
        '''Generate manifest(s) for the given system(s).

        Command line arguments:

        * `REPO` is a git repository URL.
        * `REF` is a branch or other commit reference in that repository.
        * `MORPH` is a system morphology name at that ref.

        This command produces CSV files in the current working directory
        named MORPH-manifest.csv, where MORPH is the system filename.

        These CSVs contain the chunk name, a version guess, the license as
        defined by COPYING or LICENSE, a list of all licenses used in the
        chunk (unless --check-license=single-file is set) and the upstream
        URL based on the configured trove-host's lorries repo.

        Note that this command is pretty slow, even with --check-license set
        to single-file it will take about half an hour to generate a manifest
        for a build-system. With --check-license set to all-files (the default)
        it will take a long time.

        You pass it a list of systems to generate manifests for.

          $ morph generate-manifest-csv . HEAD \
                systems/devel-system-x86_64-generic.morph \
                systems/xfce-system.morph

        '''

        if len(args) < 3:
            raise cliapp.AppException(
                'Usage: morph generate-manifest-csv REPO REF MORPH...')

        repo, ref = args[0], args[1]
        system_filenames = map(morphlib.util.sanitise_morphology_path,
                               args[2:])

        self.repo_cache = morphlib.util.new_repo_cache(self.app)
        self.resolver = morphlib.artifactresolver.ArtifactResolver()

        for system_filename in system_filenames:
            self.system_manifest(repo, ref, system_filename)

    @staticmethod
    def find_artifact_by_name(artifacts_list, filename):
        for a in artifacts_list:
            if a.source.filename == filename:
                return a
        raise ValueError()

    def system_manifest(self, repo, ref, system_filename):
        '''Generate manifest for given system.'''

        self.app.status(
            msg='Creating source pool for %(system)s',
            system=system_filename, chatty=True)
        source_pool = morphlib.sourceresolver.create_source_pool(
            self.repo_cache, repo, ref, [system_filename],
            status_cb=self.app.status)

        self.app.status(
            msg='Resolving artifacts for %(system)s',
            system=system_filename, chatty=True)
        root_artifacts = self.resolver.resolve_root_artifacts(source_pool)

        system_artifact = self.find_artifact_by_name(root_artifacts,
                                                     system_filename)

        self.app.status(
            msg='Computing cache keys for %(system)s',
            system=system_filename, chatty=True)
        build_env = morphlib.buildenvironment.BuildEnvironment(
            self.app.settings, system_artifact.source.morphology['arch'])
        ckc = morphlib.cachekeycomputer.CacheKeyComputer(build_env)

        # FIXME: This should be fixed in morphloader.
        morphlib.util.fix_chunk_build_mode(system_artifact)

        aliases = self.app.settings['repo-alias']
        resolver = morphlib.repoaliasresolver.RepoAliasResolver(aliases)

        try:
            trove_id = self.app.settings['trove-id'][0]
        except IndexError:
            trove_id = None
        with morphlib.util.temp_dir(dir=self.app.settings['tempdir']) as td:
            lorries = get_lorry_repos(td, self.repo_cache, self.app.status,
                                      trove_id,
                                      self.app.settings['trove-host'])
            manifest = Manifest(system_artifact.name, td, self.app.status,
                                self.repo_cache)

            old_prefix = self.app.status_prefix
            sources = set(a.source for a in system_artifact.walk()
                          if a.source.morphology['kind'] == 'chunk'
                          and a.source.morphology['build-mode'] != 'bootstrap')
            for i, source in enumerate(sources, start=1):
                source.cache_key = ckc.compute_key(source)
                source.cache_id = ckc.get_cache_id(source)
                name = source.morphology['name']
                ref = source.original_ref

                cached = self.repo_cache.get_updated_repo(source.repo_name,
                                                          ref)

                new_prefix = '[%d/%d][%s] ' % (i, len(sources), name)
                self.app.status_prefix = old_prefix + new_prefix
                manifest.add_chunk(self.app, name, source.repo_name, ref,
                                   cached, resolver, lorries)
            self.app.status_prefix = old_prefix


def run_licensecheck(filename):
    morphlib_dirname = os.path.dirname(inspect.getfile(morphlib))
    licensecheck_path = os.path.join(morphlib_dirname, 'licensecheck.pl')
    output = cliapp.runcmd(['perl', licensecheck_path, '-l',
                            '500', filename])
    if not output:
        return 'UNKNOWN'
    else:
        return output[len(filename) + 2:].strip()

def checkout_repo(repo_cache, repo, dest, ref='master'):
    cached = repo_cache.get_updated_repo(repo, ref)
    if not os.path.exists(dest):
        morphlib.gitdir.checkout_from_cached_repo(repo, ref, dest)

def load_lorries(dir):
    lorries = []
    lorry_files = []
    config_file = os.path.join(dir, 'lorry-controller.conf')
    with open(config_file, 'r') as conf:
        config = json.load(conf)
    for stanza in config:
        if stanza['type'] != 'lorries':
            continue
        for base_pattern in stanza['globs']:
            pattern = os.path.join(dir, base_pattern)
            lorry_files.extend(glob.glob(pattern))

    for f in lorry_files:
        with open(f, 'r') as lorry:
            lorries.append(json.load(lorry))
    return lorries

def get_lorry_for_chunk(chunk_url, lorries):
    if 'delta/' in chunk_url:
        chunk_name = chunk_url.split('delta/', 1)[-1]
    else:
        chunk_name = chunk_url.split(':', 1)[-1]
    for lorry in lorries:
        if chunk_name in lorry:
            return lorry[chunk_name]

def get_main_license(dir): # pragma: no cover
    license = 'UNKNOWN'
    if os.path.exists(os.path.join(dir, 'COPYING')):
        license_file = os.path.join(dir, 'COPYING')
    elif os.path.exists(os.path.join(dir, 'LICENSE')):
        license_file = os.path.join(dir, 'LICENSE')
    else:
        return license
    return run_licensecheck(license_file)

def get_all_licenses(dir): # pragma: no cover
    license_list = []
    for dirpath, dirname, filenames in os.walk(dir):
        for filename in filenames:
            try:
                license = run_licensecheck(os.path.join(dirpath, filename))
            except cliapp.AppException:
                continue
            if not license in license_list:
                license_list.append(license)
    return license_list

def get_upstream_address(chunk_url, lorries, status):
    lorry = get_lorry_for_chunk(chunk_url, lorries)
    try:
        return lorry['url']
    except TypeError:
        status(msg='Lorry for %(chunk)s not found.', chunk=chunk_url)
        return chunk_url
    except KeyError:
        status(msg='Lorry for %(chunk)s has no "url" field.',
               chunk=chunk_url)
        return 'UNKNOWN'

def get_lorry_repos(tempdir, repo_cache, status, trove_id, trove_host):
    lorries = []
    try:
        baserock_lorry_repo = 'baserock:local-config/lorries'
        lorrydir = os.path.join(tempdir, 'baserock-lorries')
        baserock_lorrydir = checkout_repo(repo_cache, baserock_lorry_repo,
                                          lorrydir)
        lorries.extend(load_lorries(lorrydir))
    except morphlib.repocache.NoRemote as e:
        status(msg="WARNING: Could not find lorries from git.baserock.org, "
                   "expected to find them on %(trove)s at %(reponame)s",
               trove=trove_host, reponame = e.reponame)

    if trove_id:
        try:
            trove_lorry_repo =  ('http://%s/git/%s/local-config/lorries' %
                                 (trove_host, trove_id))
            lorrydir = os.path.join(tempdir, '%s-lorries' % trove_id)
            trove_lorrydir = checkout_repo(repo_cache, trove_lorry_repo,
                                           lorrydir)
            lorries.extend(load_lorries(lorrydir))
        except morphlib.repocache.NoRemote as e:
            status(msg="WARNING: Could not find lorries repo on %(trove)s "
                       "at %(reponame)s",
                   trove=trove_host, reponame=e.reponame)
    else:
        status(msg="WARNING: Not looking in %(trove)s's local-config/lorries "
                   "repo as trove-id was not configured.", trove=trove_host)

    return lorries


class Manifest(object):
    """Writes out a manifest of what's included in a system."""

    def __init__(self, system_name, tempdir, status_cb, repo_cache):
        self.tempdir = tempdir
        self.status = status_cb
        self.repo_cache = repo_cache
        path = os.path.join(os.getcwd(), system_name + '-manifest.csv')
        self.status(msg='Creating %(path)s', path=path)
        self.file = open(path, 'wb')
        self.writer = csv.writer(self.file, quoting=csv.QUOTE_ALL)

    def _write_chunk(self, chunk_name, version_guess,
                     license, license_list, upstream):
        self.writer.writerow([chunk_name, version_guess,
                              license, license_list, upstream])

    def add_chunk(self, app, chunk_name, chunk_url, ref,
                  cached_repo, resolver, lorries):
        self.status(msg='Inspecting chunk: %(chunk)s', chunk=chunk_name)
        self.status(msg='Guessing version', chatty=True)
        version_guess = cached_repo.version_guess(ref)

        dir = os.path.join(self.tempdir, chunk_name)
        try:
            self.status(msg='Checking out chunk repo into %(dir)s at %(ref)s',
                        dir=dir, ref=ref, chatty=True)
            gd = morphlib.gitdir.checkout_from_cached_repo(
                cached_repo, ref, dir)
            gd.update_submodules(app)

            self.status(msg='Getting license info', chatty=True)
            license = get_main_license(dir)
            if app.settings['check-license'] == 'all-files':
                license_list = get_all_licenses(dir)
            else:
                self.status(msg='WARNING: Not looking at individual file '
                                'licenses as check-license was not set to '
                                '`all-files`.')
                license_list = 'UNKNOWN'

            self.status(msg='Getting upstream location', chatty=True)
            upstream = get_upstream_address(chunk_url, lorries, self.status)
            if upstream == chunk_url:
                upstream = '%s (no lorry)' % resolver.pull_url(upstream)

            self._write_chunk(chunk_name, version_guess, license,
                              license_list, upstream)
        finally:
            shutil.rmtree(dir)