From b3ad97e448ea2a1b1d6a3b26d1beed291de9b7a2 Mon Sep 17 00:00:00 2001 From: Richard Ipsum Date: Wed, 19 Aug 2015 15:31:18 +0000 Subject: Add cpan.find_deps extension Change-Id: I4719d7a15ef133d0dcd72061ac7e612723b828df --- baserockimport/exts/cpan.find_deps | 696 +++++++++++++++++++++++++++++++++++++ 1 file changed, 696 insertions(+) create mode 100755 baserockimport/exts/cpan.find_deps diff --git a/baserockimport/exts/cpan.find_deps b/baserockimport/exts/cpan.find_deps new file mode 100755 index 0000000..1f1e2ab --- /dev/null +++ b/baserockimport/exts/cpan.find_deps @@ -0,0 +1,696 @@ +#!/usr/bin/env perl +# Find dependencies for a given Perl distribution +# +# Copyright © 2015 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +package CPANDistDependencyFinder; + +use 5.020; +use strict; +use warnings; + +use Log::Log4perl qw(get_logger); +use IO::Async::Process; +use IO::Async::Loop; +use POSIX; +use JSON; # imports encode_json, decode_json, to_json and from_json +use Carton::Snapshot; +use CPAN::Mini::Inject; +use CPAN::DistnameInfo; +use Archive::Tar; +use Archive::Extract; +use LWP::Simple; +use File::Spec; +use File::Find; +use File::Temp; +use Hash::Util qw(lock_keys lock_hashref lock_hash_recurse); + +use FindBin; +use lib "$FindBin::Bin"; +use ImporterBase; + +use Moose; +extends 'ImportExtension'; + +my $MINICPAN_MIRROR_PATH = "checkouts/.cpan_import_minicpan"; +my $MINICPAN_STORE_PATH = "checkouts/.cpan_import_minicpan_store"; +my $MINICPAN_MIRROR_CONF_PATH = "checkouts/.cpan_import_minicpan_conf"; + +my $logger; + +sub _log_child_output +{ + my ($self, $stream, $buffref) = @_; + + $logger->debug("$1") while ($$buffref =~ s/^(.*)\n//); +} + +sub _handle_carton_finish +{ + my ($self, $process_obj, $status) = @_; + my $exit_status = WEXITSTATUS($status); + + die "carton exited with error: $exit_status" if $exit_status != 0; +} + +sub _handle_subprocess_exception +{ + my ($self, $process_obj, $exception, $errno, $status) = @_; + + if ($exception) { + die "Exception occurred while trying to run subprocess: $exception"; + } + + my $exit_status = WEXITSTATUS($status); + + die "Subprocess failed to exec() - $errno" if ($exit_status == 255); + die "Subprocess exited with exit status $exit_status"; +} + +sub _write_cpanfile +{ + my ($self, $path, $required_modules_ref, $requirement_version) = @_; + my %required_modules = %$required_modules_ref; + + open(my $fh, '>', $path) + or die "Couldn't create Carton cpanfile at `$path': $!"; + + while (my ($module, $module_attrs_ref) = each %required_modules) { + my $module_version = $module_attrs_ref->{minimum_version}; + my $spec = "requires '$module'"; + $spec = $spec . ", '$module_version'" if $module_version; + + $logger->debug("Writing spec: $spec"); + + say $fh "$spec;"; + } +} + +sub _create_local_mirror_skeleton +{ + my ($self, $mirror_path, $store_path) = @_; + + $logger->debug("Creating local mirror skeleton " . + "at $mirror_path and $store_path"); + + mkdir $mirror_path unless (-d $mirror_path); + mkdir $store_path unless (-d $store_path); + + mkdir "$mirror_path/authors" unless (-d "$mirror_path/authors"); + mkdir "$mirror_path/modules" unless (-d "$mirror_path/modules"); + + my $touch = sub { + my $path = shift; + open(my $fh, '>>', $path) + or die "Couldn't create local mirror skeleton: " + . "couldn't open `$path' for writing: $!"; + }; + + $touch->("$mirror_path/authors/01mailrc.txt.gz"); + $touch->("$mirror_path/modules/02packages.details.txt.gz"); + + $logger->debug("Mirror skeleton created"); +} + +sub _write_minicpan_conf +{ + my ($self, $mirror_path, $store_path, $conf_path) = @_; + + $logger->debug("Writing minicpan config to $conf_path"); + + open(my $fh, '>', $conf_path) + or die "Couldn't write minicpan config: " + . "couldn't open `$conf_path' for writing: $!"; + + print $fh <<"END"; + local: $mirror_path + remote: ftp://ftp.cpan.org/pub/CPAN ftp://ftp.kernel.org/pub/CPAN + repository: $store_path + passive: yes + dirmode: 0755 +END +} + +sub _make_tarball +{ + my ($self, $source_dir, $tarball_filename) = @_; + my $dh = DirHandle->new("."); + my @file_list; + my $tarball_store = "checkouts/.cpan_import_minicpan_tarballs"; + + mkdir $tarball_store unless (-d $tarball_store); + my $abs_tarball_path = File::Spec->rel2abs( + "$tarball_store/$tarball_filename"); + + $logger->debug("Creating tarball from $source_dir at $abs_tarball_path"); + + chdir "$source_dir/.."; + my @xs = split("/", $source_dir); + my $tarball_prefix = $xs[-1]; + + my $wanted = sub { + unless ($File::Find::name =~ m|$tarball_prefix/\.git|) { + push @file_list, $File::Find::name; + } + }; + + find(\&$wanted, $tarball_prefix); + + my $s = join(', ', @file_list); + $logger->debug("file_list: $s"); + + my $created = Archive::Tar->create_archive($abs_tarball_path, + COMPRESS_GZIP, @file_list); + die Archive::Tar->error($created) unless $created; + + $logger->debug("Tarball created at $tarball_filename"); + + chdir $dh; + + return ($tarball_prefix, $abs_tarball_path); +} + +sub _fetch_release_metadata_from_metacpan +{ + my ($self, $dist_name, $url) = @_; + + my $content = get($url); + die "Failed to get from $url" unless $content; + + my $json = decode_json $content; + + # make hash read only and error if access to non-existent key is made + lock_hashref($json); + return $json; +} + +sub _get_dist_tarball_filename_and_authorid +{ + my ($self, $dist_name, $dist_version, $dist_meta_ref) = @_; + + my $pathname = $dist_meta_ref->{pathname}; + my $dist_authorid; + + if ($pathname) { + my $dist_info = CPAN::DistnameInfo->new($pathname); + + unless (defined $dist_info) { + die "Couldn't construct DistInfo from pathname: $pathname"; + } + + $dist_version = $dist_info->version; + $dist_authorid = $dist_info->cpanid; + } + else { + my $metacpan_url = "http://api.metacpan.org/release/$dist_name"; + my $meta = $self->_fetch_release_metadata_from_metacpan($dist_name, + $metacpan_url); + + $dist_version //= $meta->{version} if exists $meta->{version}; + $dist_authorid = $meta->{author} if exists $meta->{author}; + + if (!defined $dist_version or !defined $dist_authorid) { + $logger->warn("No version present in metadata obtained from " + . $metacpan_url) unless defined $dist_version; + $logger->warn("No author present in metadata obtained from " + . $metacpan_url) unless defined $dist_authorid; + return undef; + } + } + + $logger->debug("dist_version: $dist_version " + . "dist_authorid: $dist_authorid"); + + # The filename of this tar should not be arbitrary + # it should correspond to the format described by CPAN::DistNameInfo + # we should have "dist_name-dist_version.tar.gz" + my %h = (author => $dist_authorid, + filename => "${dist_name}-${dist_version}.tar.gz"); + lock_keys(%h); # will make accessing non-existent keys an error + return \%h; +} + +sub _inject +{ + my ($self, $minicpan_conf_path, $tarball_path, + $module_name, $module_version, $module_authorid) = @_; + + $logger->debug("Injecting $tarball_path into local cpan mirror"); + my $str_module_version = $module_version // '(undef)'; + $logger->debug("Module name: $module_name\t" + . "version: $str_module_version\t" + . "authorid: $module_authorid"); + + my $mcpi = CPAN::Mini::Inject->new; + $mcpi->parsecfg($minicpan_conf_path); + + if ($module_version) { + $mcpi->add(module => $module_name, + authorid => $module_authorid, + version => $module_version, + file => $tarball_path); + } + else { + # CPAN::Mini::Inject can determine version if version is not specified + $mcpi->add(module => $module_name, + authorid => $module_authorid, + file => $tarball_path); + } + + $mcpi->inject; +} + +sub _inject_modules +{ + my ($self, $mirror_conf_path, $dist_meta_ref, + $dist_name, $dist_authorid, $source_tarball_path) = @_; + + $logger->debug("Injecting the following modules..."); + my $modules_ref = $dist_meta_ref->{modules}; + my @modules = keys %{$modules_ref}; + + for my $module (@modules) { + $logger->debug($module); + my $module_version = $modules_ref->{$module}{minimum_version}; + + $self->_inject($mirror_conf_path, $source_tarball_path, + $module, $module_version, $dist_authorid); + } +} + +sub _get_provider_dist_name +{ + my ($self, $provider_dist_obj, + $required_module, $required_module_version) = @_; + + my $distinfo = CPAN::DistnameInfo->new($provider_dist_obj->pathname); + my $provider_dist_name = $distinfo->dist; + + # Check module provided by dist satisfies module requirement + my $dist_version_for_module = + $provider_dist_obj->version_for($required_module); + + if ($dist_version_for_module) { + if ($dist_version_for_module < $required_module_version) { + die "Module `$required_module' requires version " + . "$required_module_version but distribution " + . "`$provider_dist_name' has $dist_version_for_module"; + } + } + else { + $logger->warn("Could not verify that $provider_dist_name satisfies " + . "requirement for $required_module " + . "$required_module_version"); + } + + return $provider_dist_name; +} + +sub _map_module_to_distribution +{ + my ($self, $snapshot, $dist_meta_ref, $module, $module_version) = @_; + + my $provider_dist_obj = $snapshot->find($module); + my $provider_dist_name; + my $provider_pathname; + + if ($provider_dist_obj) { + $provider_dist_name = $self->_get_provider_dist_name( + $provider_dist_obj, $module, $module_version); + $provider_pathname = $provider_dist_obj->distfile; + } + else { + $provider_dist_name = '::CORE::'; + } + + $logger->debug("$provider_dist_name provides $module"); + + unless (exists $dist_meta_ref->{$provider_dist_name}) { + $dist_meta_ref->{$provider_dist_name} = {modules => {}, + pathname => ""}; + } + + if (exists $dist_meta_ref->{$provider_dist_name}{modules}{$module}) { + my $dist_meta_modules = $dist_meta_ref->{$provider_dist_name}{modules}; + my $current = $dist_meta_modules->{$module}{minimum_version}; + + # parse versions so we can compare them + my $x = version->parse($module_version); + my $y = version->parse($current); + + if ($x < $y) { + $logger->warn("Replacing requirement for $module $current " + . "with $module $module_version"); + $dist_meta_modules->{$module}{minimum_version} = $module_version; + } + } + + $dist_meta_ref->{$provider_dist_name}{modules}{$module} = { + minimum_version => $module_version + }; + + if ($provider_pathname) { + $dist_meta_ref->{$provider_dist_name}{pathname} = $provider_pathname; + } +} + +sub _map_modules_to_distributions +{ + my ($self, $snapshot, $dist_meta_ref, $required_modules_ref) = @_; + + $logger->debug("Requires:"); + for my $module (keys %{$required_modules_ref}) { + my $module_version = $required_modules_ref->{$module}; + + $logger->debug("\tRequirement: $module => $module_version"); + $self->_map_module_to_distribution($snapshot, $dist_meta_ref, + $module, $module_version); + } +} + +sub _run_carton +{ + my ($self, $source_dir, $mirror_path) = @_; + + my $mirror_abs_path = File::Spec->rel2abs($mirror_path); + + # This will make cpanm search our local minicpan for a module + # before it searches metacpan + my $perl_carton_cpanm_opt = "--mirror $mirror_abs_path " + . "--mirror https://cpan.metacpan.org " + . "--mirror-only " + . "--verbose"; + + my $loop = IO::Async::Loop->new; + my $process = IO::Async::Process->new( + command => ['carton', 'install'], + + # Sadly we need https://github.com/perl-carton/carton/pull/199 + # for this, miyagawa suggested PERL_CARTON_MIRROR but that appears + # to only let you set one mirror + setup => [chdir => $source_dir, + env => {%ENV, + PERL_CARTON_CPANM_OPT => $perl_carton_cpanm_opt}], + + stdout => { on_read => sub { $self->_log_child_output(@_); } }, + stderr => { on_read => sub { $self->_log_child_output(@_); } }, + + on_finish => sub { $self->_handle_carton_finish(@_); $loop->stop; }, + on_exception => sub { $self->_handle_subprocess_exception(@_); } + ); + + $loop->add($process); + $loop->run; + + $logger->debug('Loading snapshot'); + my $snapshot = Carton::Snapshot->new( + path => "$source_dir/cpanfile.snapshot"); + $snapshot->load(); + + return $snapshot; +} + +sub _extract_tarball +{ + my ($self, $tarball_path, $dstpath) = @_; + + my $tar = Archive::Extract->new(archive => $tarball_path); + my $ok = $tar->extract(to => $dstpath); + + unless ($ok) { + my $errstr = $tar->error; + die "Couldn't extract tarball at `$tarball_path': $errstr"; + } +} + +sub _get_buildscript_type +{ + my ($self, $source_dir) = @_; + + if (-e "$source_dir/Makefile.PL") { + return 'Makefile.PL'; + } + elsif (-e "$source_dir/Build.PL") { + return 'Build.PL'; + } + else { + $logger->debug("Couldn't find Makefile.PL or Build.PL"); + return undef; + } +} + +sub _run_plfile +{ + my ($self, $source_dir, + $source_tarball_prefix, $source_tarball_path, $pl_file) = @_; + + # TODO: It may aid debugging to have a flag that disables the automatic + # cleanup of this tempdir + my $tmpdir = File::Temp->newdir; + $logger->debug("tmpdir: $tmpdir"); + + $self->_extract_tarball($source_tarball_path, $tmpdir); + + my $loop = IO::Async::Loop->new; + my $process = IO::Async::Process->new( + command => ['perl', $pl_file], + + setup => [chdir => "$tmpdir/$source_tarball_prefix"], + + stdin => { from => '' }, # send EOF to child so it doesn't block + stdout => { on_read => sub { $self->_log_child_output(@_); } }, + stderr => { on_read => sub { $self->_log_child_output(@_); } }, + + on_finish => sub { $logger->debug("Finished running $pl_file"); + $loop->stop; + }, + on_exception => sub { $self->_handle_subprocess_exception(@_); } + ); + + $loop->add($process); + $loop->run; + + # no need to check whether this succeeds, lack of MYMETA.json is not fatal + open(my $fh, '<', "$tmpdir/$source_tarball_prefix/MYMETA.json"); + + local $/; # set record separator to undef so we can slurp the file + my $json = readline $fh; + my $metadata_ref = decode_json($json); + + return $metadata_ref; +} + +sub find_dependencies +{ + my ($self, $root_dist_name, $snapshot, + $build_deps_ref, $dist_meta_ref) = @_; + + for my $dist ($snapshot->distributions) { + + my $distinfo = CPAN::DistnameInfo->new($dist->pathname); + my $dist_name = $distinfo->dist; + + # Import tool expects "master" if we don't have a version constraint. + my $dist_version = $distinfo->version ? $distinfo->version : 'master'; + + $logger->debug("Distribution: $dist_name version: $dist_version"); + $logger->debug("Name: $root_dist_name"); + + my %required_modules = %{$dist->requirements->as_string_hash}; + $self->_map_modules_to_distributions($snapshot, $dist_meta_ref, + \%required_modules); + + unless ($dist_name eq $root_dist_name) { + $logger->debug("Adding $dist_name to list of build depends"); + $build_deps_ref->{$dist_name} = $dist_version; + } + } +} + +sub _get_distribution_metadata +{ + # Returns a reference to a hash containing the modules + # the given distribution provides + + my ($self, $distname) = @_; + my $dist_meta_ref; + my $input_metadata_path; + + if (exists $ENV{IMPORT_METAPATH}) { + $input_metadata_path = $ENV{IMPORT_METAPATH}; + } + + my $msg = $input_metadata_path // '(undef)'; + $logger->debug("IMPORT_METAPATH: $msg"); + + unless ($input_metadata_path) { + die "No import metadata available for `$distname', cannot proceed. " + . "(this is most likely a bug, please file a report)."; + } + + open(my $fh, '<', $input_metadata_path) + or die "Couldn't get distribution metadata for `$distname': " + . "couldn't open `$input_metadata_path' for reading"; + + local $/; # set record separator to undef so we can slurp the file + my $json = readline $fh; + my $metadata_ref = decode_json($json); + + # FIXME: This is horrible but presently you cannot lock a hash + # from a reference due to a bug in perl, this will be fixed by the + # next perl release + my %distmeta = %{$metadata_ref->{cpan}{'dist-meta'}{$distname}}; + + # locking the hash will make it an error to attempt to + # access any key that doesn't already exist, which is safer than the + # perl default of returning undef, it will also make the hash read-only + lock_hash_recurse(%distmeta); + + return \%distmeta; +} + +sub _is_importing_root_package +{ + my $self = shift; + my $input_metadata_path; + + if (exists $ENV{IMPORT_METAPATH}) { + $input_metadata_path = $ENV{IMPORT_METAPATH}; + } + + return $input_metadata_path =~ m/ROOT.meta$/; +} + +sub _add_root_modules_to_dist_meta +{ + # Supplements dist-meta with root package's requirements, + # needed since these module requirements for the root won't be specified + # in the snapshot file, only the requirements of the root package's + # children. + my ($self, $snapshot, $dist_meta_ref, $root_metadata_ref) = @_; + + for my $dep_type ('build', 'configure', 'runtime') { + my $deps_ref = $root_metadata_ref->{prereqs}{$dep_type}{requires}; + $self->_map_modules_to_distributions($snapshot, $dist_meta_ref, + $deps_ref); + } +} + +sub run +{ + my $self = shift; + $logger = get_logger(ref $self); + + # 1. Setup minicpan if it's not already + # 2. Make a tarball of the source checkout + # 3. Inject tarball into minicpan + # 4. Use metadata supplied by parent to write a cpanfile for carton + # 5. Run carton + # 6. If this is the root then run its Makefile.PL or Build.PL + # to obtain our module dependency data, since this isn't stored + # in the carton snapshot + # 7. Encode all dependency data as json and print + + my $argc = @ARGV; + if ($argc < 2 or $argc > 3) { + die 'usage: %s PACKAGE_SOURCE_DIR NAME [VERSION]'; + } + + my ($source_dir, $dist_name, $dist_version) = @ARGV; + $logger->debug("source_dir: $source_dir"); + $logger->debug("dist_name: $dist_name"); + $logger->debug("version: " . ($dist_version // "(no version)")); + + my $dist_meta_ref = $self->_get_distribution_metadata($dist_name); + + unless ($dist_meta_ref) { + die "No metadata associated with distribution `$dist_name': " . + "this is almost certainly a bug, please report it"; + } + + $self->_create_local_mirror_skeleton($MINICPAN_MIRROR_PATH, + $MINICPAN_STORE_PATH); + $self->_write_minicpan_conf($MINICPAN_MIRROR_PATH, $MINICPAN_STORE_PATH, + $MINICPAN_MIRROR_CONF_PATH); + + my $tarball_data_ref = $self->_get_dist_tarball_filename_and_authorid( + $dist_name, $dist_version, $dist_meta_ref); + + my $dist_authorid = $tarball_data_ref->{author}; + my $tarball_filename = $tarball_data_ref->{filename}; + + unless ($tarball_data_ref) { + my $str_dist_version = $dist_version // '(no version)'; + die "Couldn't obtain data needed for injection of " + . "`$dist_name' `$str_dist_version'"; + } + + my ($tarball_prefix, + $tarball_path) = $self->_make_tarball($source_dir, $tarball_filename); + + $self->_inject_modules($MINICPAN_MIRROR_CONF_PATH, $dist_meta_ref, + $dist_name, $dist_authorid, $tarball_path); + + # Using the dist's cpanfile doesn't work, + # at least for Log::Any, seemingly due to stuff in on_develop + # which we don't need, so we will always write our own cpanfile, + # if a cpanfile already exists it will be overwritten + my $modules_ref = $dist_meta_ref->{modules}; + $self->_write_cpanfile("$source_dir/cpanfile", $modules_ref, + $dist_version); + + $logger->debug("Running carton install for $dist_name in $source_dir"); + my $snapshot = $self->_run_carton($source_dir, $MINICPAN_MIRROR_PATH); + my %build_deps; + my %dist_meta; + + $self->find_dependencies($dist_name, $snapshot, + \%build_deps, \%dist_meta); + + # FIXME: There's no simple way to distinguish build deps from runtime deps + # given only the snapshot file, + # treating all as build deps will work for now + my %deps = ('runtime-dependencies' => {}, + 'build-dependencies' => \%build_deps, + 'dist-meta' => \%dist_meta); + + if ($self->_is_importing_root_package) { + $logger->debug("We are the root! ($dist_name)"); + + my $root_metadata_ref; + my $plfile = $self->_get_buildscript_type($source_dir); + + if ($plfile) { + $root_metadata_ref = $self->_run_plfile($source_dir, + $tarball_prefix, + $tarball_path, $plfile); + } + + if ($root_metadata_ref) { + $self->_add_root_modules_to_dist_meta($snapshot, \%dist_meta, + $root_metadata_ref); + } + else { + $logger->warn("Couldn't get metadata for root package " + . "`$dist_name'"); + } + } + + say encode_json({cpan => \%deps}); +} + + +CPANDistDependencyFinder->new()->run(); -- cgit v1.2.1