From 54a85d3e8f5df5a07bfb1acf851573d00e2b7ac7 Mon Sep 17 00:00:00 2001 From: Sam Thursfield Date: Tue, 10 Jun 2014 14:20:16 +0100 Subject: Move scripts into scripts/ directory They probably do not belong in definitions.git at all, but at least they are now in one place. In future they should move either into a separate baserock-utilities chunk, or into morph.git itself. --- scripts/distbuild-cluster.py | 97 +++++++ scripts/do-release.py | 444 ++++++++++++++++++++++++++++++++ scripts/licensecheck.pl | 596 +++++++++++++++++++++++++++++++++++++++++++ scripts/licensecheck.sh | 101 ++++++++ 4 files changed, 1238 insertions(+) create mode 100644 scripts/distbuild-cluster.py create mode 100644 scripts/do-release.py create mode 100644 scripts/licensecheck.pl create mode 100755 scripts/licensecheck.sh (limited to 'scripts') diff --git a/scripts/distbuild-cluster.py b/scripts/distbuild-cluster.py new file mode 100644 index 00000000..2d0e64d3 --- /dev/null +++ b/scripts/distbuild-cluster.py @@ -0,0 +1,97 @@ +# Copyright (C) 2014 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +import os +import subprocess +import sys +import time +import yaml + +import morphlib + + +''' distbuild-cluster: Build all systems in a cluster using distbuild. + +This script should be removed once Morph has grown the capability to +build an entire cluster itself. This will require changes either to the +distbuild component (so that a single controller can build for multiple +architectures) or to the way Morph talks to distbuild (so that it can +handle multiple controllers). + +''' + +controllers = { + 'armv7lhf': '10.24.1.134', + 'x86_32': 'distbuild-x86-32', + 'x86_64': 'distbuild-x86-64', +} + + +ref_to_build = 'baserock-14.22' + + +def read_morph(morph_name, kind=None): + with open(morph_name + '.morph') as f: + morph = yaml.load(f) + if kind is not None: + assert morph['kind'] == kind + return morph + + +class Build(object): + '''A single distbuild instance.''' + + def __init__(self, system_name, arch): + self.system_name = system_name + self.distbuild_controller = controllers[system['arch']] + + self.command = [ + 'morph', 'distbuild-morphology', + '--controller-initiator-address=%s' % self.distbuild_controller, + 'baserock:baserock/definitions', ref_to_build, system_name] + + def start(self): + self.process = subprocess.Popen(self.command) + + def completed(self): + return (self.process.poll() is not None) + + +if __name__ == '__main__': + cluster_name = morphlib.util.strip_morph_extension(sys.argv[1]) + + cluster = read_morph(cluster_name, kind='cluster') + system_list = [system['morph'] for system in cluster['systems']] + + builds = [] + for system_name in system_list: + system = read_morph(system_name) + builds.append(Build(system_name, system['arch'])) + + # Morph dumps many log files to the current directory, which I don't + # want to be in the root of 'definitions'. + if not os.path.exists('builds'): + os.mkdir('builds') + os.chdir('builds') + + for build in builds: + build.start() + + while not all(build.completed() for build in builds): + time.sleep(1) + + for build in builds: + if build.process.returncode != 0: + sys.stderr.write("Building failed for %s\n" % build.system_name) diff --git a/scripts/do-release.py b/scripts/do-release.py new file mode 100644 index 00000000..e11e6625 --- /dev/null +++ b/scripts/do-release.py @@ -0,0 +1,444 @@ +# Copyright (C) 2014 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +import cliapp +import morphlib +import yaml + +import contextlib +import gzip +import json +import logging +import os +import re +import sys +import tarfile +import urllib2 + + +''' do-release: Baserock release tooling. + +See: . + +''' + + +class config(object): + release_number = RELEASE NUMBER + + build_trove = 'hawkdevtrove' + release_trove = 'git.baserock.org' + + # Note that the 'location' field of the various systems in release.morph + # should match 'images_dir' here. + deploy_workspace = '/src/ws-release' + images_dir = '/src/release' + artifacts_dir = '/src/release/artifacts' + + # These locations should be appropriate 'staging' directories on the public + # servers that host images and artifacts. Remember not to upload to the + # public directories directly, or you risk exposing partially uploaded + # files. Once everything has uploaded you can 'mv' the release artifacts + # to the public directories in one quick operation. + # FIXME: we should probably warn if the dir exists and is not empty. + images_upload_location = \ + '@download.baserock.org:baserock-release-staging' + artifacts_upload_location = \ + 'root@git.baserock.org:/home/cache/baserock-release-staging' + + # The Codethink Manchester office currently has 8Mbits/s upload available. + # This setting ensures we use no more than half of the available bandwidth. + bandwidth_limit_kbytes_sec = 512 + + +def status(message, *args): + sys.stdout.write(message % args) + sys.stdout.write('\n') + + +@contextlib.contextmanager +def cwd(path): + ''' + Context manager to set current working directory.''' + old_cwd = os.getcwd() + os.chdir(path) + try: + yield + finally: + os.chdir(old_cwd) + + +def transfer(f_in, f_out, block_size=10*1024*1024, show_status=True): + '''Stream from f_in to f_out until the end of f_in is reached. + + This function is rather like shutil.copyfileobj(), but it doesn't seem + possible to output progress info using that function. + + ''' + total_bytes = 0 + while True: + data = f_in.read(block_size) + total_bytes += len(data) + if len(data) == 0: + break + f_out.write(data) + if show_status: + sys.stdout.write( + '\rProcessed %iMB ...' % (total_bytes / (1024 * 1024))) + sys.stdout.flush() + if show_status: + sys.stdout.write('\rCompleted transfer\n') + + +class DeployImages(object): + '''Stage 1: deploy release images.''' + + def create_deploy_workspace(self, path): + '''Create or enter existing workspace for deploying release images.''' + + if not os.path.exists(path): + status('Creating workspace %s' % path) + cliapp.runcmd(['morph', 'init', path]) + else: + status('Reusing existing workspace %s' % path) + + repo = 'baserock:baserock/definitions' + branch = 'master' + + with cwd(path): + if not os.path.exists(branch): + status('Checking out %s branch %s' % (repo, branch)) + cliapp.runcmd(['morph', 'checkout', repo, branch]) + else: + status('Reusing checkout of %s %s' % (repo, branch)) + + definitions_dir = os.path.join( + config.deploy_workspace, branch, 'baserock/baserock/definitions') + + return definitions_dir + + def read_morph(self, filename, kind=None): + with open(filename) as f: + morph = yaml.load(f) + if kind is not None: + assert morph['kind'] == kind + return morph + + def parse_release_cluster(self, release_cluster): + '''Validate release cluster and list the systems being released. + + This function returns a dict mapping the system name to the location + of its deployed image. + + It's an open question how we should detect and handle the case where a + write extension creates more than one file. ARM kernels and GENIVI + manifest files are possible examples of this. + + ''' + + version_label = 'baserock-%s' % config.release_number + + outputs = {} + for system in release_cluster['systems']: + system_morph = system['morph'] + + if 'release' not in system['deploy']: + raise cliapp.AppException( + 'In release.morph: system %s ID should be "release"' % + system_morph) + + # We can't override 'location' with a different value. We must use + # what's already in the morphology, and check that it makes sense. + location = system['deploy']['release']['location'] + if not os.path.samefile(os.path.dirname(location), + config.images_dir): + raise cliapp.AppException( + 'In release.morph: system location %s is not inside ' + 'configured images_dir %s' % (location, config.images_dir)) + if not os.path.basename(location).startswith(version_label): + raise cliapp.AppException( + 'In release.morph: system image name %s does not start ' + 'with version label %s' % (location, version_label)) + + outputs[system_morph] = location + + return outputs + + def deploy_images(self, outputs): + '''Use `morph deploy` to create the release images.''' + + # FIXME: once `morph deploy` supports partial deployment, this should + # deploy only the images which aren't already deployed... it should + # also check if they need redeploying based on the SHA1 they were + # deployed from, perhaps. That's getting fancy! + + todo = [f for f in outputs.itervalues() if not os.path.exists(f)] + + if len(todo) == 0: + status('Reusing existing release images') + else: + logging.debug('Need to deploy images: %s' % ', '.join(todo)) + status('Creating release images from release.morph') + + version_label = 'baserock-%s' % config.release_number + + morph_config = ['--trove-host=%s' % config.build_trove] + deploy_config = ['release.VERSION_LABEL=%s' % version_label] + + cliapp.runcmd( + ['morph', 'deploy', 'release.morph'] + morph_config + + deploy_config, stdout=sys.stdout) + + def compress_images(self, outputs): + for name, source_file in outputs.iteritems(): + target_file = source_file + '.gz' + + if os.path.exists(target_file): + status('Reusing compressed image %s' % target_file) + else: + status('Compressing %s to %s', source_file, target_file) + with open(source_file, 'r') as f_in: + with gzip.open(target_file, 'w', compresslevel=4) as f_out: + transfer(f_in, f_out) + + outputs[name] = target_file + + def run(self): + definitions_dir = self.create_deploy_workspace(config.deploy_workspace) + + with cwd(definitions_dir): + release_cluster = self.read_morph('release.morph', kind='cluster') + + outputs = self.parse_release_cluster(release_cluster) + + with cwd(definitions_dir): + self.deploy_images(outputs) + + self.compress_images(outputs) + + return outputs + + +class PrepareArtifacts(object): + '''Stage 2: Fetch all artifacts and archive them. + + This includes the system artifacts. While these are large, it's very + helpful to have the system artifacts available in the trove.baserock.org + artifact cache because it allows users to deploy them with `morph deploy`. + If they are not available in the cache they must be built, which requires + access to a system of the same architecture as the target system. + + ''' + + def get_artifact_list(self, system_morphs): + '''Return list of artifacts involved in the release. + + List is also written to a file. + + Note that this function requires the `list-artifacts` command from + Morph of Baserock 14.23 or later. + + ''' + artifact_list_file = os.path.join( + config.artifacts_dir, 'baserock-%s-artifacts.txt' % + config.release_number) + if os.path.exists(artifact_list_file): + with open(artifact_list_file) as f: + artifact_basenames = [line.strip() for line in f] + else: + text = cliapp.runcmd( + ['morph', '--quiet', '--trove-host=%s' % config.build_trove, + 'list-artifacts', 'baserock:baserock/definitions', 'master'] + + system_morphs) + artifact_basenames = text.strip().split('\n') + with morphlib.savefile.SaveFile(artifact_list_file, 'w') as f: + f.write(text) + return artifact_list_file, artifact_basenames + + def query_remote_artifacts(self, trove, artifact_basenames): + url = 'http://%s:8080/1.0/artifacts' % trove + logging.debug('Querying %s' % url) + f = urllib2.urlopen(url, data=json.dumps(list(artifact_basenames))) + response = json.load(f) + return response + + def fetch_artifact(self, remote_cache, artifact): + f_in = remote_cache._get_file(artifact) + artifact_local = os.path.join(config.artifacts_dir, artifact) + with morphlib.savefile.SaveFile(artifact_local, 'wb') as f_out: + try: + logging.debug('Writing to %s' % artifact_local) + transfer(f_in, f_out) + except BaseException: + logging.debug( + 'Cleaning up %s after error' % artifact_local) + f_out.abort() + raise + f_in.close() + + def fetch_artifacts(self, artifact_basenames): + remote_cache = morphlib.remoteartifactcache.RemoteArtifactCache( + 'http://%s:8080' % config.build_trove) + found_artifacts = set() + + artifacts_to_query = [] + for artifact in artifact_basenames: + artifact_local = os.path.join(config.artifacts_dir, artifact) + # FIXME: no checksumming of artifacts done; we could get corruption + # introduced here and we would have no way of knowing. Cached + # artifact validation is planned for Morph; see: + # http://listmaster.pepperfish.net/pipermail/baserock-dev-baserock.org/2014-May/005675.html + if os.path.exists(artifact_local): + status('%s already cached' % artifact) + found_artifacts.add(artifact) + else: + artifacts_to_query.append(artifact) + + if len(artifacts_to_query) > 0: + result = self.query_remote_artifacts(config.build_trove, + artifacts_to_query) + for artifact, present in result.iteritems(): + if present: + status('Downloading %s from remote cache' % artifact) + self.fetch_artifact(remote_cache, artifact) + found_artifacts.add(artifact) + elif artifact.endswith('build-log'): + # For historical reasons, not all chunks have their + # build logs. Fixed here: + # http://git.baserock.org/cgi-bin/cgit.cgi/baserock/baserock/morph.git/commit/?id=6fb5fbad4f2876f30f482133c53f3a138911498b + # We still need to work around it for now, though. + logging.debug('Ignoring missing build log %s' % artifact) + elif re.match('[0-9a-f]{64}\.meta', artifact): + # FIXME: We still don't seem to share the .meta files. + # We should. Note that *artifact* meta files + # (.stratum.meta files) can't be ignored, they are an + # essential part of the stratum and it's an error if + # such a file is missing. + logging.debug('Ignoring missing source metadata %s' % + artifact) + else: + raise cliapp.AppException( + 'Remote artifact cache is missing artifact %s' % + artifact) + + return found_artifacts + + def prepare_artifacts_archive(self, tar_name, files): + if os.path.exists(tar_name): + status('Reusing tarball of artifacts at %s', tar_name) + else: + try: + status('Creating tarball of artifacts at %s', tar_name) + tar = tarfile.TarFile.gzopen(name=tar_name, mode='w', + compresslevel=4) + n_files = len(files) + for i, filename in enumerate(sorted(files)): + logging.debug('Add %s to tar file' % filename) + tar.add(filename, arcname=os.path.basename(filename)) + sys.stdout.write('\rAdded %i files of %i' % (i, n_files)) + sys.stdout.flush() + sys.stdout.write('\rFinished creating %s\n' % tar_name) + tar.close() + except BaseException: + logging.debug('Cleaning up %s after error' % tar_name) + os.unlink(tar_name) + raise + + def run(self, system_morphs): + if not os.path.exists(config.artifacts_dir): + os.makedirs(config.artifacts_dir) + + artifact_list_file, all_artifacts = \ + self.get_artifact_list(system_morphs) + + found_artifacts = self.fetch_artifacts(all_artifacts) + + tar_name = 'baserock-%s-artifacts.tar.gz' % config.release_number + artifacts_tar_file = os.path.join(config.artifacts_dir, tar_name) + artifact_files = [ + os.path.join(config.artifacts_dir, a) for a in found_artifacts] + + self.prepare_artifacts_archive(artifacts_tar_file, artifact_files) + + tar_name = 'baserock-%s-new-artifacts.tar.gz' % config.release_number + new_artifacts_tar_file = os.path.join(config.artifacts_dir, tar_name) + result = self.query_remote_artifacts(config.release_trove, + found_artifacts) + new_artifacts = [a for a, present in result.iteritems() if not present] + new_artifact_files = [ + os.path.join(config.artifacts_dir, a) for a in new_artifacts + if a.split('.')[1] != 'system'] + + self.prepare_artifacts_archive(new_artifacts_tar_file, + new_artifact_files) + + return (artifact_list_file, artifacts_tar_file, new_artifacts_tar_file) + + +class Upload(object): + '''Stage 3: upload images and artifacts to public servers.''' + + def run_rsync(self, sources, target): + if isinstance(sources, str): + sources = [sources] + settings = [ + '--bwlimit=%s' % config.bandwidth_limit_kbytes_sec, + '--partial', + '--progress', + ] + cliapp.runcmd( + ['rsync'] + settings + sources + [target], stdout=sys.stdout) + + def upload_release_images(self, images): + self.run_rsync(images, config.images_upload_location) + + def upload_artifacts(self, artifacts_list_file, artifacts_tar_file): + host, path = config.artifacts_upload_location.split(':', 1) + + self.run_rsync([artifacts_list_file, artifacts_tar_file], + config.artifacts_upload_location) + + # UGH! Perhaps morph-cache-server should grow an authorised-users-only + # API call receive artifacts, to avoid this. + remote_artifacts_tar = os.path.join( + path, os.path.basename(artifacts_tar_file)) + extract_tar_cmd = 'cd "%s" && tar xf "%s" && chown cache:cache *' % \ + (path, remote_artifacts_tar) + cliapp.ssh_runcmd( + host, ['sh', '-c', extract_tar_cmd]) + + +def main(): + logging.basicConfig(level=logging.DEBUG) + + deploy_images = DeployImages() + outputs = deploy_images.run() + + prepare_artifacts = PrepareArtifacts() + artifacts_list_file, artifacts_tar_file, new_artifacts_tar_file = \ + prepare_artifacts.run(outputs.keys()) + + upload = Upload() + upload.upload_release_images(outputs.values()) + upload.upload_artifacts(artifacts_list_file, new_artifacts_tar_file) + + sys.stdout.writelines([ + '\nPreparation for %s release complete!\n' % config.release_number, + 'Images uploaded to %s\n' % config.images_upload_location, + 'Artifacts uploaded to %s\n' % config.artifacts_upload_location + ]) + + +main() diff --git a/scripts/licensecheck.pl b/scripts/licensecheck.pl new file mode 100644 index 00000000..180e8989 --- /dev/null +++ b/scripts/licensecheck.pl @@ -0,0 +1,596 @@ +#!/usr/bin/perl +# This script was originally based on the script of the same name from +# the KDE SDK (by dfaure@kde.org) +# +# This version is +# Copyright (C) 2007, 2008 Adam D. Barratt +# Copyright (C) 2012 Francesco Poli +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . + +=head1 NAME + +licensecheck - simple license checker for source files + +=head1 SYNOPSIS + +B B<--help>|B<--version> + +B [B<--no-conf>] [B<--verbose>] [B<--copyright>] +[B<-l>|B<--lines=>I] [B<-i>|B<--ignore=>I] [B<-c>|B<--check=>I] +[B<-m>|B<--machine>] [B<-r>|B<--recursive>] +I + +=head1 DESCRIPTION + +B attempts to determine the license that applies to each file +passed to it, by searching the start of the file for text belonging to +various licenses. + +If any of the arguments passed are directories, B will add +the files contained within to the list of files to process. + +=head1 OPTIONS + +=over 4 + +=item B<--verbose>, B<--no-verbose> + +Specify whether to output the text being processed from each file before +the corresponding license information. + +Default is to be quiet. + +=item B<-l=>I, B<--lines=>I + +Specify the number of lines of each file's header which should be parsed +for license information. (Default is 60). + +=item B<-i=>I, B<--ignore=>I + +When processing the list of files and directories, the regular +expression specified by this option will be used to indicate those which +should not be considered (e.g. backup files, VCS metadata). + +=item B<-r>, B<--recursive> + +Specify that the contents of directories should be added +recursively. + +=item B<-c=>I, B<--check=>I + +Specify a pattern against which filenames will be matched in order to +decide which files to check the license of. + +The default includes common source files. + +=item B<--copyright> + +Also display copyright text found within the file + +=item B<-m>, B<--machine> + +Display the information in a machine readable way, i.e. in the form +[] so that it can be easily sorted +and/or filtered, e.g. with the B and B commands. +Note that using the B<--verbose> option will kill the readability. + +=item B<--no-conf>, B<--noconf> + +Do not read any configuration files. This can only be used as the first +option given on the command line. + +=back + +=head1 CONFIGURATION VARIABLES + +The two configuration files F and +F<~/.devscripts> are sourced by a shell in that order to set +configuration variables. Command line options can be used to override +configuration file settings. Environment variable settings are +ignored for this purpose. The currently recognised variables are: + +=over 4 + +=item B + +If this is set to I, then it is the same as the B<--verbose> command +line parameter being used. The default is I. + +=item B + +If this is set to a positive number then the specified number of lines +at the start of each file will be read whilst attempting to determine +the license(s) in use. This is equivalent to the B<--lines> command line +option. + +=back + +=head1 LICENSE + +This code is copyright by Adam D. Barratt >, +all rights reserved; based on a script of the same name from the KDE +SDK, which is copyright by >. +This program comes with ABSOLUTELY NO WARRANTY. +You are free to redistribute this code under the terms of the GNU +General Public License, version 2 or later. + +=head1 AUTHOR + +Adam D. Barratt + +=cut + +use strict; +use warnings; +use Getopt::Long qw(:config gnu_getopt); +use File::Basename; + +my $progname = basename($0); + +# From dpkg-source +my $default_ignore_regex = ' +# Ignore general backup files +(?:^|/).*~$| +# Ignore emacs recovery files +(?:^|/)\.#.*$| +# Ignore vi swap files +(?:^|/)\..*\.swp$| +# Ignore baz-style junk files or directories +(?:^|/),,.*(?:$|/.*$)| +# File-names that should be ignored (never directories) +(?:^|/)(?:DEADJOE|\.cvsignore|\.arch-inventory|\.bzrignore|\.gitignore)$| +# File or directory names that should be ignored +(?:^|/)(?:CVS|RCS|\.deps|\{arch\}|\.arch-ids|\.svn|\.hg|_darcs|\.git| +\.shelf|_MTN|\.bzr(?:\.backup|tags)?)(?:$|/.*$) +'; + +# Take out comments and newlines +$default_ignore_regex =~ s/^#.*$//mg; +$default_ignore_regex =~ s/\n//sg; + +my $default_check_regex = '\.(c(c|pp|xx)?|h(h|pp|xx)?|f(77|90)?|go|p(l|m)|xs|sh|php|py(|x)|rb|java|vala|el|sc(i|e)|cs|pas|inc|dtd|xsl|mod|m|tex|mli?)$'; + +my $modified_conf_msg; + +my %OPT=( + verbose => '', + lines => '', + noconf => '', + ignore => '', + check => '', + recursive => 0, + copyright => 0, + machine => 0, +); + +my $def_lines = 60; + +# Read configuration files and then command line +# This is boilerplate + +if (@ARGV and $ARGV[0] =~ /^--no-?conf$/) { + $modified_conf_msg = " (no configuration files read)"; + shift; +} else { + my @config_files = ('/etc/devscripts.conf', '~/.devscripts'); + my %config_vars = ( + 'LICENSECHECK_VERBOSE' => 'no', + 'LICENSECHECK_PARSELINES' => $def_lines, + ); + my %config_default = %config_vars; + + my $shell_cmd; + # Set defaults + foreach my $var (keys %config_vars) { + $shell_cmd .= qq[$var="$config_vars{$var}";\n]; + } + $shell_cmd .= 'for file in ' . join(" ", @config_files) . "; do\n"; + $shell_cmd .= '[ -f $file ] && . $file; done;' . "\n"; + # Read back values + foreach my $var (keys %config_vars) { $shell_cmd .= "echo \$$var;\n" } + my $shell_out = `/bin/bash -c '$shell_cmd'`; + @config_vars{keys %config_vars} = split /\n/, $shell_out, -1; + + # Check validity + $config_vars{'LICENSECHECK_VERBOSE'} =~ /^(yes|no)$/ + or $config_vars{'LICENSECHECK_VERBOSE'} = 'no'; + $config_vars{'LICENSECHECK_PARSELINES'} =~ /^[1-9][0-9]*$/ + or $config_vars{'LICENSECHECK_PARSELINES'} = $def_lines; + + foreach my $var (sort keys %config_vars) { + if ($config_vars{$var} ne $config_default{$var}) { + $modified_conf_msg .= " $var=$config_vars{$var}\n"; + } + } + $modified_conf_msg ||= " (none)\n"; + chomp $modified_conf_msg; + + $OPT{'verbose'} = $config_vars{'LICENSECHECK_VERBOSE'} eq 'yes' ? 1 : 0; + $OPT{'lines'} = $config_vars{'LICENSECHECK_PARSELINES'}; +} + +GetOptions(\%OPT, + "help|h", + "check|c=s", + "copyright", + "ignore|i=s", + "lines|l=i", + "machine|m", + "noconf|no-conf", + "recursive|r", + "verbose!", + "version|v", +) or die "Usage: $progname [options] filelist\nRun $progname --help for more details\n"; + +$OPT{'lines'} = $def_lines if $OPT{'lines'} !~ /^[1-9][0-9]*$/; +$OPT{'ignore'} = $default_ignore_regex if ! length $OPT{'ignore'}; +$OPT{'check'} = $default_check_regex if ! length $OPT{'check'}; + +if ($OPT{'noconf'}) { + fatal("--no-conf is only acceptable as the first command-line option!"); +} +if ($OPT{'help'}) { help(); exit 0; } +if ($OPT{'version'}) { version(); exit 0; } + +die "Usage: $progname [options] filelist\nRun $progname --help for more details\n" unless @ARGV; + +$OPT{'lines'} = $def_lines if not defined $OPT{'lines'}; + +my @files = (); +my @find_args = (); +my $files_count = @ARGV; + +push @find_args, qw(-maxdepth 1) unless $OPT{'recursive'}; +push @find_args, qw(-follow -type f -print); + +while (@ARGV) { + my $file = shift @ARGV; + + if (-d $file) { + open my $FIND, '-|', 'find', $file, @find_args + or die "$progname: couldn't exec find: $!\n"; + + while (<$FIND>) { + chomp; + next unless m%$OPT{'check'}%; + # Skip empty files + next if (-z $_); + push @files, $_ unless m%$OPT{'ignore'}%; + } + close $FIND; + } else { + next unless ($files_count == 1) or $file =~ m%$OPT{'check'}%; + push @files, $file unless $file =~ m%$OPT{'ignore'}%; + } +} + +while (@files) { + my $file = shift @files; + my $content = ''; + my $copyright_match; + my $copyright = ''; + my $license = ''; + my %copyrights; + + open (my $F, '<' ,$file) or die "Unable to access $file\n"; + while (<$F>) { + last if ($. > $OPT{'lines'}); + $content .= $_; + $copyright_match = parse_copyright($_); + if ($copyright_match) { + $copyrights{lc("$copyright_match")} = "$copyright_match"; + } + } + close($F); + + $copyright = join(" / ", values %copyrights); + + print qq(----- $file header -----\n$content----- end header -----\n\n) + if $OPT{'verbose'}; + + $license = parselicense(clean_comments($content)); + + if ($OPT{'machine'}) { + print "$file\t$license"; + print "\t" . ($copyright or "*No copyright*") if $OPT{'copyright'}; + print "\n"; + } else { + print "$file: "; + print "*No copyright* " unless $copyright; + print $license . "\n"; + print " [Copyright: " . $copyright . "]\n" + if $copyright and $OPT{'copyright'}; + print "\n" if $OPT{'copyright'}; + } +} + +sub parse_copyright { + my $copyright = ''; + my $match; + + my $copyright_indicator_regex = ' + (?:copyright # The full word + |copr\. # Legally-valid abbreviation + |\x{00a9} # Unicode character COPYRIGHT SIGN + |\xc2\xa9 # Unicode copyright sign encoded in iso8859 + |\(c\) # Legally-null representation of sign + )'; + my $copyright_disindicator_regex = ' + \b(?:info(?:rmation)? # Discussing copyright information + |notice # Discussing the notice + |and|or # Part of a sentence + )\b'; + + if (m%$copyright_indicator_regex(?::\s*|\s+)(\S.*)$%ix) { + $match = $1; + + # Ignore lines matching "see foo for copyright information" etc. + if ($match !~ m%^\s*$copyright_disindicator_regex%ix) { + # De-cruft + $match =~ s/([,.])?\s*$//; + $match =~ s/$copyright_indicator_regex//igx; + $match =~ s/^\s+//; + $match =~ s/\s{2,}/ /g; + $match =~ s/\\@/@/g; + $copyright = $match; + } + } + + return $copyright; +} + +sub clean_comments { + local $_ = shift or return q{}; + + # Remove generic comments: look for 4 or more lines beginning with + # regular comment pattern and trim it. Fall back to old algorithm + # if no such pattern found. + my @matches = m/^\s*([^a-zA-Z0-9\s]{1,3})\s\w/mg; + if (@matches >= 4) { + my $comment_length = length($matches[0]); + my $comment_re = qr/\s*[\Q$matches[0]\E]{${comment_length}}\s*/; + s/^$comment_re//mg; + } + + # Remove Fortran comments + s/^[cC] //gm; + tr/\t\r\n/ /; + + # Remove C / C++ comments + s#(\*/|/[/*])##g; + tr% A-Za-z.,@;0-9\(\)/-%%cd; + tr/ //s; + + return $_; +} + +sub help { + print <<"EOF"; +Usage: $progname [options] filename [filename ...] +Valid options are: + --help, -h Display this message + --version, -v Display version and copyright info + --no-conf, --noconf Don't read devscripts config files; must be + the first option given + --verbose Display the header of each file before its + license information + --lines, -l Specify how many lines of the file header + should be parsed for license information + (Default: $def_lines) + --check, -c Specify a pattern indicating which files should + be checked + (Default: '$default_check_regex') + --machine, -m Display in a machine readable way (good for awk) + --recursive, -r Add the contents of directories recursively + --copyright Also display the file's copyright + --ignore, -i Specify that files / directories matching the + regular expression should be ignored when + checking files + (Default: '$default_ignore_regex') + +Default settings modified by devscripts configuration files: +$modified_conf_msg +EOF +} + +sub version { + print <<"EOF"; +This is $progname, from the Debian devscripts package, version ###VERSION### +Copyright (C) 2007, 2008 by Adam D. Barratt ; based +on a script of the same name from the KDE SDK by . + +This program comes with ABSOLUTELY NO WARRANTY. +You are free to redistribute this code under the terms of the +GNU General Public License, version 2, or (at your option) any +later version. +EOF +} + +sub parselicense { + my ($licensetext) = @_; + + my $gplver = ""; + my $extrainfo = ""; + my $license = ""; + + if ($licensetext =~ /version ([^, ]+?)[.,]? (?:\(?only\)?.? )?(?:of the GNU (Affero )?(Lesser |Library )?General Public License )?(as )?published by the Free Software Foundation/i or + $licensetext =~ /GNU (?:Affero )?(?:Lesser |Library )?General Public License (?:as )?published by the Free Software Foundation[;,] version ([^, ]+?)[.,]? /i) { + + $gplver = " (v$1)"; + } elsif ($licensetext =~ /GNU (?:Affero )?(?:Lesser |Library )?General Public License, version (\d+(?:\.\d+)?)[ \.]/) { + $gplver = " (v$1)"; + } elsif ($licensetext =~ /either version ([^ ]+)(?: of the License)?, or \(at your option\) any later version/) { + $gplver = " (v$1 or later)"; + } + + if ($licensetext =~ /(?:675 Mass Ave|59 Temple Place|51 Franklin Steet|02139|02111-1307)/i) { + $extrainfo = " (with incorrect FSF address)$extrainfo"; + } + + if ($licensetext =~ /permission (?:is (also granted|given))? to link (the code of )?this program with (any edition of )?(Qt|the Qt library)/i) { + $extrainfo = " (with Qt exception)$extrainfo" + } + + if ($licensetext =~ /(All changes made in this file will be lost|DO NOT (EDIT|delete this file)|Generated (automatically|by|from)|generated.*file)/i) { + $license = "GENERATED FILE"; + } + + if ($licensetext =~ /((is free software.? )?you can redistribute (it|them) and\/or modify (it|them)|is licensed) under the terms of (version [^ ]+ of )?the (GNU (Library |Lesser )General Public License|LGPL)/i) { + $license = "LGPL$gplver$extrainfo $license"; + } + + if ($licensetext =~ /is free software.? you can redistribute (it|them) and\/or modify (it|them) under the terms of the (GNU Affero General Public License|AGPL)/i) { + $license = "AGPL$gplver$extrainfo $license"; + } + + if ($licensetext =~ /(is free software.? )?you (can|may) redistribute (it|them) and\/or modify (it|them) under the terms of (?:version [^ ]+ (?:\(?only\)? )?of )?the GNU General Public License/i) { + $license = "GPL$gplver$extrainfo $license"; + } + + if ($licensetext =~ /is distributed under the terms of the GNU General Public License,/ + and length $gplver) { + $license = "GPL$gplver$extrainfo $license"; + } + + if ($licensetext =~ /is distributed.*terms.*GPL/) { + $license = "GPL (unversioned/unknown version) $license"; + } + + if ($licensetext =~ /This file is part of the .*Qt GUI Toolkit. This file may be distributed under the terms of the Q Public License as defined/) { + $license = "QPL (part of Qt) $license"; + } elsif ($licensetext =~ /may (be distributed|redistribute it) under the terms of the Q Public License/) { + $license = "QPL $license"; + } + + if ($licensetext =~ /opensource\.org\/licenses\/mit-license\.php/) { + $license = "MIT/X11 (BSD like) $license"; + } elsif ($licensetext =~ /Permission is hereby granted, free of charge, to any person obtaining a copy of this software and(\/or)? associated documentation files \(the (Software|Materials)\), to deal in the (Software|Materials)/) { + $license = "MIT/X11 (BSD like) $license"; + } elsif ($licensetext =~ /Permission is hereby granted, without written agreement and without license or royalty fees, to use, copy, modify, and distribute this software and its documentation for any purpose/) { + $license = "MIT/X11 (BSD like) $license"; + } + + if ($licensetext =~ /Permission to use, copy, modify, and(\/or)? distribute this software for any purpose with or without fee is hereby granted, provided.*copyright notice.*permission notice.*all copies/) { + $license = "ISC $license"; + } + + if ($licensetext =~ /THIS SOFTWARE IS PROVIDED .*AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY/) { + if ($licensetext =~ /All advertising materials mentioning features or use of this software must display the following acknowledge?ment.*This product includes software developed by/i) { + $license = "BSD (4 clause) $license"; + } elsif ($licensetext =~ /(The name .*? may not|Neither the names? .*? nor the names of (its|their|other) contributors may) be used to endorse or promote products derived from this software/i) { + $license = "BSD (3 clause) $license"; + } elsif ($licensetext =~ /Redistributions of source code must retain the above copyright notice/i) { + $license = "BSD (2 clause) $license"; + } else { + $license = "BSD $license"; + } + } + + if ($licensetext =~ /Mozilla Public License,? (Version|v\.) (\d+(?:\.\d+)?)/) { + $license = "MPL (v$2) $license"; + } + + if ($licensetext =~ /Released under the terms of the Artistic License ([^ ]+)/) { + $license = "Artistic (v$1) $license"; + } + + if ($licensetext =~ /is free software under the Artistic [Ll]icense/) { + $license = "Artistic $license"; + } + + if ($licensetext =~ /This program is free software; you can redistribute it and\/or modify it under the same terms as Perl itself/) { + $license = "Perl $license"; + } + + if ($licensetext =~ /under the Apache License, Version ([^ ]+)/) { + $license = "Apache (v$1) $license"; + } + + if ($licensetext =~ /(THE BEER-WARE LICENSE)/i) { + $license = "Beerware $license"; + } + + if ($licensetext =~ /This source file is subject to version ([^ ]+) of the PHP license/) { + $license = "PHP (v$1) $license"; + } + + if ($licensetext =~ /under the terms of the CeCILL /) { + $license = "CeCILL $license"; + } + + if ($licensetext =~ /under the terms of the CeCILL-([^ ]+) /) { + $license = "CeCILL-$1 $license"; + } + + if ($licensetext =~ /under the SGI Free Software License B/) { + $license = "SGI Free Software License B $license"; + } + + if ($licensetext =~ /is in the public domain/i) { + $license = "Public domain $license"; + } + + if ($licensetext =~ /terms of the Common Development and Distribution License(, Version ([^(]+))? \(the License\)/) { + $license = "CDDL " . ($1 ? "(v$2) " : '') . $license; + } + + if ($licensetext =~ /Microsoft Permissive License \(Ms-PL\)/) { + $license = "Ms-PL $license"; + } + + if ($licensetext =~ /Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license \(the \"Software\"\)/ or + $licensetext =~ /Boost Software License([ ,-]+Version ([^ ]+)?(\.))/i) { + $license = "BSL " . ($1 ? "(v$2) " : '') . $license; + } + + if ($licensetext =~ /PYTHON SOFTWARE FOUNDATION LICENSE (VERSION ([^ ]+))/i) { + $license = "PSF " . ($1 ? "(v$2) " : '') . $license; + } + + if ($licensetext =~ /The origin of this software must not be misrepresented.*Altered source versions must be plainly marked as such.*This notice may not be removed or altered from any source distribution/ or + $licensetext =~ /see copyright notice in zlib\.h/) { + $license = "zlib/libpng $license"; + } elsif ($licensetext =~ /This code is released under the libpng license/) { + $license = "libpng $license"; + } + + if ($licensetext =~ /Do What The Fuck You Want To Public License, Version ([^, ]+)/i) { + $license = "WTFPL (v$1) $license"; + } + + if ($licensetext =~ /Do what The Fuck You Want To Public License/i) { + $license = "WTFPL $license"; + } + + if ($licensetext =~ /(License WTFPL|Under (the|a) WTFPL)/i) { + $license = "WTFPL $license"; + } + + $license = "UNKNOWN" if (!length($license)); + + # Remove trailing spaces. + $license =~ s/\s+$//; + + return $license; +} + +sub fatal { + my ($pack,$file,$line); + ($pack,$file,$line) = caller(); + (my $msg = "$progname: fatal error at line $line:\n@_\n") =~ tr/\0//d; + $msg =~ s/\n\n$/\n/; + die $msg; +} + diff --git a/scripts/licensecheck.sh b/scripts/licensecheck.sh new file mode 100755 index 00000000..a39e62da --- /dev/null +++ b/scripts/licensecheck.sh @@ -0,0 +1,101 @@ +#!/bin/sh + +# Copyright (C) 2013 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +set -e + +usage() { + echo "Usage: license-check your-system" + echo + echo "This checks license info for all the chunks in your-system" + echo "It's re-runnable, and does morph edit to get each chunk." + echo "The process can take a while." +} + + +if [ -z "$1" ]; then + usage + exit 1 +fi + +workspace="$PWD"/../../.. +system="$1" + +gplv3_chunks="\ +autoconf \ +automake \ +bash \ +binutils \ +bison \ +ccache \ +cmake \ +flex \ +gawk \ +gcc \ +gdbm \ +gettext \ +gperf \ +groff \ +libtool \ +m4 \ +make \ +nano \ +texinfo-tarball" + +gplv3_repos="" + + +for f in *.morph; do + cp "$f" "$f.bak" +done + + +strata=`grep "morph.*: *" "$system.morph" | cut -d: -f2-` +for stratum in $strata; do + chunks=`grep "name.*: *" "$stratum.morph" | cut -d: -f2-` + for chunk in $chunks; do + if [ "$chunk" != "$stratum" ]; then + if ! (echo $gplv3_chunks | grep -wq "$chunk"); then + morph edit $chunk 1>&2 + else + repo=`grep "name.*: *$chunk" "$stratum.morph" -A1 | \ + tail -n1 | cut -d: -f3-` + gplv3_repos="$gplv3_repos $repo" + fi + fi + done +done + + +repos=`for stratum in $strata; do + grep "repo.*: *" "$stratum.morph" | cut -d: -f3- + done | sort -u` + + +for repo in $repos; do + if ! (echo $gplv3_repos | grep -wq "$repo") && \ + [ -d "$workspace/upstream/$repo" ] ; then + echo "$repo" + perl licensecheck.pl -r "$workspace/upstream/$repo" | \ + cut -d: -f2- | sort -u + echo + fi +done + + +for f in *.morph.bak; do + mv "$f" "${f%.bak}" +done -- cgit v1.2.1