#!/usr/bin/env perl # Find dependencies for a given Perl distribution # # Copyright © 2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; version 2 of the License. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. package CPANDistDependencyFinder; use 5.020; use strict; use warnings; use Log::Log4perl qw(get_logger); use IO::Async::Process; use IO::Async::Loop; use POSIX; use JSON; # imports encode_json, decode_json, to_json and from_json use Carton::Snapshot; use CPAN::Mini::Inject; use CPAN::DistnameInfo; use Archive::Tar; use Archive::Extract; use LWP::Simple; use File::Spec; use File::Find; use File::Temp; use Hash::Util qw(lock_keys lock_hashref lock_hash_recurse); use FindBin; use lib "$FindBin::Bin"; use ImporterBase; use Moose; extends 'ImportExtension'; my $MINICPAN_MIRROR_PATH = "checkouts/.cpan_import_minicpan"; my $MINICPAN_STORE_PATH = "checkouts/.cpan_import_minicpan_store"; my $MINICPAN_MIRROR_CONF_PATH = "checkouts/.cpan_import_minicpan_conf"; my $logger; sub _log_child_output { my ($self, $stream, $buffref) = @_; $logger->debug("$1") while ($$buffref =~ s/^(.*)\n//); } sub _handle_carton_finish { my ($self, $process_obj, $status) = @_; my $exit_status = WEXITSTATUS($status); die "carton exited with error: $exit_status" if $exit_status != 0; } sub _handle_subprocess_exception { my ($self, $process_obj, $exception, $errno, $status) = @_; if ($exception) { die "Exception occurred while trying to run subprocess: $exception"; } my $exit_status = WEXITSTATUS($status); die "Subprocess failed to exec() - $errno" if ($exit_status == 255); die "Subprocess exited with exit status $exit_status"; } sub _write_cpanfile { my ($self, $path, $required_modules_ref, $requirement_version) = @_; my %required_modules = %$required_modules_ref; open(my $fh, '>', $path) or die "Couldn't create Carton cpanfile at `$path': $!"; while (my ($module, $module_attrs_ref) = each %required_modules) { my $module_version = $module_attrs_ref->{minimum_version}; my $spec = "requires '$module'"; $spec = $spec . ", '$module_version'" if $module_version; $logger->debug("Writing spec: $spec"); say $fh "$spec;"; } } sub _create_local_mirror_skeleton { my ($self, $mirror_path, $store_path) = @_; $logger->debug("Creating local mirror skeleton " . "at $mirror_path and $store_path"); mkdir $mirror_path unless (-d $mirror_path); mkdir $store_path unless (-d $store_path); mkdir "$mirror_path/authors" unless (-d "$mirror_path/authors"); mkdir "$mirror_path/modules" unless (-d "$mirror_path/modules"); my $touch = sub { my $path = shift; open(my $fh, '>>', $path) or die "Couldn't create local mirror skeleton: " . "couldn't open `$path' for writing: $!"; }; $touch->("$mirror_path/authors/01mailrc.txt.gz"); $touch->("$mirror_path/modules/02packages.details.txt.gz"); $logger->debug("Mirror skeleton created"); } sub _write_minicpan_conf { my ($self, $mirror_path, $store_path, $conf_path) = @_; $logger->debug("Writing minicpan config to $conf_path"); open(my $fh, '>', $conf_path) or die "Couldn't write minicpan config: " . "couldn't open `$conf_path' for writing: $!"; print $fh <<"END"; local: $mirror_path remote: ftp://ftp.cpan.org/pub/CPAN ftp://ftp.kernel.org/pub/CPAN repository: $store_path passive: yes dirmode: 0755 END } sub _make_tarball { my ($self, $source_dir, $tarball_filename) = @_; my $dh = DirHandle->new("."); my @file_list; my $tarball_store = "checkouts/.cpan_import_minicpan_tarballs"; mkdir $tarball_store unless (-d $tarball_store); my $abs_tarball_path = File::Spec->rel2abs( "$tarball_store/$tarball_filename"); $logger->debug("Creating tarball from $source_dir at $abs_tarball_path"); chdir "$source_dir/.."; my @xs = split("/", $source_dir); my $tarball_prefix = $xs[-1]; my $wanted = sub { unless ($File::Find::name =~ m|$tarball_prefix/\.git|) { push @file_list, $File::Find::name; } }; find(\&$wanted, $tarball_prefix); my $s = join(', ', @file_list); $logger->debug("file_list: $s"); my $created = Archive::Tar->create_archive($abs_tarball_path, COMPRESS_GZIP, @file_list); die Archive::Tar->error($created) unless $created; $logger->debug("Tarball created at $tarball_filename"); chdir $dh; return ($tarball_prefix, $abs_tarball_path); } sub _fetch_release_metadata_from_metacpan { my ($self, $dist_name, $url) = @_; my $content = get($url); die "Failed to get from $url" unless $content; my $json = decode_json $content; # make hash read only and error if access to non-existent key is made lock_hashref($json); return $json; } sub _get_dist_tarball_filename_and_authorid { my ($self, $dist_name, $dist_version, $dist_meta_ref) = @_; my $pathname = $dist_meta_ref->{pathname}; my $dist_authorid; if ($pathname) { my $dist_info = CPAN::DistnameInfo->new($pathname); unless (defined $dist_info) { die "Couldn't construct DistInfo from pathname: $pathname"; } $dist_version = $dist_info->version; $dist_authorid = $dist_info->cpanid; } else { my $metacpan_url = "http://api.metacpan.org/release/$dist_name"; my $meta = $self->_fetch_release_metadata_from_metacpan($dist_name, $metacpan_url); $dist_version //= $meta->{version} if exists $meta->{version}; $dist_authorid = $meta->{author} if exists $meta->{author}; if (!defined $dist_version or !defined $dist_authorid) { $logger->warn("No version present in metadata obtained from " . $metacpan_url) unless defined $dist_version; $logger->warn("No author present in metadata obtained from " . $metacpan_url) unless defined $dist_authorid; return undef; } } $logger->debug("dist_version: $dist_version " . "dist_authorid: $dist_authorid"); # The filename of this tar should not be arbitrary # it should correspond to the format described by CPAN::DistNameInfo # we should have "dist_name-dist_version.tar.gz" my %h = (author => $dist_authorid, filename => "${dist_name}-${dist_version}.tar.gz"); lock_keys(%h); # will make accessing non-existent keys an error return \%h; } sub _inject { my ($self, $minicpan_conf_path, $tarball_path, $module_name, $module_version, $module_authorid) = @_; $logger->debug("Injecting $tarball_path into local cpan mirror"); my $str_module_version = $module_version // '(undef)'; $logger->debug("Module name: $module_name\t" . "version: $str_module_version\t" . "authorid: $module_authorid"); my $mcpi = CPAN::Mini::Inject->new; $mcpi->parsecfg($minicpan_conf_path); if ($module_version) { $mcpi->add(module => $module_name, authorid => $module_authorid, version => $module_version, file => $tarball_path); } else { # CPAN::Mini::Inject can determine version if version is not specified $mcpi->add(module => $module_name, authorid => $module_authorid, file => $tarball_path); } $mcpi->inject; } sub _inject_modules { my ($self, $mirror_conf_path, $dist_meta_ref, $dist_name, $dist_authorid, $source_tarball_path) = @_; $logger->debug("Injecting the following modules..."); my $modules_ref = $dist_meta_ref->{modules}; my @modules = keys %{$modules_ref}; for my $module (@modules) { $logger->debug($module); my $module_version = $modules_ref->{$module}{minimum_version}; $self->_inject($mirror_conf_path, $source_tarball_path, $module, $module_version, $dist_authorid); } } sub _get_provider_dist_name { my ($self, $provider_dist_obj, $required_module, $required_module_version) = @_; my $distinfo = CPAN::DistnameInfo->new($provider_dist_obj->pathname); my $provider_dist_name = $distinfo->dist; # Check module provided by dist satisfies module requirement my $dist_version_for_module = $provider_dist_obj->version_for($required_module); if ($dist_version_for_module) { if ($dist_version_for_module < $required_module_version) { die "Module `$required_module' requires version " . "$required_module_version but distribution " . "`$provider_dist_name' has $dist_version_for_module"; } } else { $logger->warn("Could not verify that $provider_dist_name satisfies " . "requirement for $required_module " . "$required_module_version"); } return $provider_dist_name; } sub _map_module_to_distribution { my ($self, $snapshot, $dist_meta_ref, $module, $module_version) = @_; my $provider_dist_obj = $snapshot->find($module); my $provider_dist_name; my $provider_pathname; if ($provider_dist_obj) { $provider_dist_name = $self->_get_provider_dist_name( $provider_dist_obj, $module, $module_version); $provider_pathname = $provider_dist_obj->distfile; } else { $provider_dist_name = '::CORE::'; } $logger->debug("$provider_dist_name provides $module"); unless (exists $dist_meta_ref->{$provider_dist_name}) { $dist_meta_ref->{$provider_dist_name} = {modules => {}, pathname => ""}; } if (exists $dist_meta_ref->{$provider_dist_name}{modules}{$module}) { my $dist_meta_modules = $dist_meta_ref->{$provider_dist_name}{modules}; my $current = $dist_meta_modules->{$module}{minimum_version}; # parse versions so we can compare them my $x = version->parse($module_version); my $y = version->parse($current); if ($x < $y) { $logger->warn("Replacing requirement for $module $current " . "with $module $module_version"); $dist_meta_modules->{$module}{minimum_version} = $module_version; } } $dist_meta_ref->{$provider_dist_name}{modules}{$module} = { minimum_version => $module_version }; if ($provider_pathname) { $dist_meta_ref->{$provider_dist_name}{pathname} = $provider_pathname; } } sub _map_modules_to_distributions { my ($self, $snapshot, $dist_meta_ref, $required_modules_ref) = @_; $logger->debug("Requires:"); for my $module (keys %{$required_modules_ref}) { my $module_version = $required_modules_ref->{$module}; $logger->debug("\tRequirement: $module => $module_version"); $self->_map_module_to_distribution($snapshot, $dist_meta_ref, $module, $module_version); } } sub _run_carton { my ($self, $source_dir, $mirror_path) = @_; my $mirror_abs_path = File::Spec->rel2abs($mirror_path); # This will make cpanm search our local minicpan for a module # before it searches metacpan my $perl_carton_cpanm_opt = "--mirror $mirror_abs_path " . "--mirror https://cpan.metacpan.org " . "--mirror-only " . "--verbose"; my $loop = IO::Async::Loop->new; my $process = IO::Async::Process->new( command => ['carton', 'install'], # Sadly we need https://github.com/perl-carton/carton/pull/199 # for this, miyagawa suggested PERL_CARTON_MIRROR but that appears # to only let you set one mirror setup => [chdir => $source_dir, env => {%ENV, PERL_CARTON_CPANM_OPT => $perl_carton_cpanm_opt}], stdout => { on_read => sub { $self->_log_child_output(@_); } }, stderr => { on_read => sub { $self->_log_child_output(@_); } }, on_finish => sub { $self->_handle_carton_finish(@_); $loop->stop; }, on_exception => sub { $self->_handle_subprocess_exception(@_); } ); $loop->add($process); $loop->run; $logger->debug('Loading snapshot'); my $snapshot = Carton::Snapshot->new( path => "$source_dir/cpanfile.snapshot"); $snapshot->load(); return $snapshot; } sub _extract_tarball { my ($self, $tarball_path, $dstpath) = @_; my $tar = Archive::Extract->new(archive => $tarball_path); my $ok = $tar->extract(to => $dstpath); unless ($ok) { my $errstr = $tar->error; die "Couldn't extract tarball at `$tarball_path': $errstr"; } } sub _get_buildscript_type { my ($self, $source_dir) = @_; if (-e "$source_dir/Makefile.PL") { return 'Makefile.PL'; } elsif (-e "$source_dir/Build.PL") { return 'Build.PL'; } else { $logger->debug("Couldn't find Makefile.PL or Build.PL"); return undef; } } sub _run_plfile { my ($self, $source_dir, $source_tarball_prefix, $source_tarball_path, $pl_file) = @_; # TODO: It may aid debugging to have a flag that disables the automatic # cleanup of this tempdir my $tmpdir = File::Temp->newdir; $logger->debug("tmpdir: $tmpdir"); $self->_extract_tarball($source_tarball_path, $tmpdir); my $loop = IO::Async::Loop->new; my $process = IO::Async::Process->new( command => ['perl', $pl_file], setup => [chdir => "$tmpdir/$source_tarball_prefix"], stdin => { from => '' }, # send EOF to child so it doesn't block stdout => { on_read => sub { $self->_log_child_output(@_); } }, stderr => { on_read => sub { $self->_log_child_output(@_); } }, on_finish => sub { $logger->debug("Finished running $pl_file"); $loop->stop; }, on_exception => sub { $self->_handle_subprocess_exception(@_); } ); $loop->add($process); $loop->run; # no need to check whether this succeeds, lack of MYMETA.json is not fatal open(my $fh, '<', "$tmpdir/$source_tarball_prefix/MYMETA.json"); local $/; # set record separator to undef so we can slurp the file my $json = readline $fh; my $metadata_ref = decode_json($json); return $metadata_ref; } sub find_dependencies { my ($self, $root_dist_name, $snapshot, $build_deps_ref, $dist_meta_ref) = @_; for my $dist ($snapshot->distributions) { my $distinfo = CPAN::DistnameInfo->new($dist->pathname); my $dist_name = $distinfo->dist; # Import tool expects "master" if we don't have a version constraint. my $dist_version = $distinfo->version ? $distinfo->version : 'master'; $logger->debug("Distribution: $dist_name version: $dist_version"); $logger->debug("Name: $root_dist_name"); my %required_modules = %{$dist->requirements->as_string_hash}; $self->_map_modules_to_distributions($snapshot, $dist_meta_ref, \%required_modules); unless ($dist_name eq $root_dist_name) { $logger->debug("Adding $dist_name to list of build depends"); $build_deps_ref->{$dist_name} = $dist_version; } } } sub _get_distribution_metadata { # Returns a reference to a hash containing the modules # the given distribution provides my ($self, $distname) = @_; my $dist_meta_ref; my $input_metadata_path; if (exists $ENV{IMPORT_METAPATH}) { $input_metadata_path = $ENV{IMPORT_METAPATH}; } my $msg = $input_metadata_path // '(undef)'; $logger->debug("IMPORT_METAPATH: $msg"); unless ($input_metadata_path) { die "No import metadata available for `$distname', cannot proceed. " . "(this is most likely a bug, please file a report)."; } open(my $fh, '<', $input_metadata_path) or die "Couldn't get distribution metadata for `$distname': " . "couldn't open `$input_metadata_path' for reading"; local $/; # set record separator to undef so we can slurp the file my $json = readline $fh; my $metadata_ref = decode_json($json); # FIXME: This is horrible but presently you cannot lock a hash # from a reference due to a bug in perl, this will be fixed by the # next perl release my %distmeta = %{$metadata_ref->{cpan}{'dist-meta'}{$distname}}; # locking the hash will make it an error to attempt to # access any key that doesn't already exist, which is safer than the # perl default of returning undef, it will also make the hash read-only lock_hash_recurse(%distmeta); return \%distmeta; } sub _is_importing_root_package { my $self = shift; my $input_metadata_path; if (exists $ENV{IMPORT_METAPATH}) { $input_metadata_path = $ENV{IMPORT_METAPATH}; } return $input_metadata_path =~ m/ROOT.meta$/; } sub _add_root_modules_to_dist_meta { # Supplements dist-meta with root package's requirements, # needed since these module requirements for the root won't be specified # in the snapshot file, only the requirements of the root package's # children. my ($self, $snapshot, $dist_meta_ref, $root_metadata_ref) = @_; for my $dep_type ('build', 'configure', 'runtime') { my $deps_ref = $root_metadata_ref->{prereqs}{$dep_type}{requires}; $self->_map_modules_to_distributions($snapshot, $dist_meta_ref, $deps_ref); } } sub run { my $self = shift; $logger = get_logger(ref $self); # 1. Setup minicpan if it's not already # 2. Make a tarball of the source checkout # 3. Inject tarball into minicpan # 4. Use metadata supplied by parent to write a cpanfile for carton # 5. Run carton # 6. If this is the root then run its Makefile.PL or Build.PL # to obtain our module dependency data, since this isn't stored # in the carton snapshot # 7. Encode all dependency data as json and print my $argc = @ARGV; if ($argc < 2 or $argc > 3) { die 'usage: %s PACKAGE_SOURCE_DIR NAME [VERSION]'; } my ($source_dir, $dist_name, $dist_version) = @ARGV; $logger->debug("source_dir: $source_dir"); $logger->debug("dist_name: $dist_name"); $logger->debug("version: " . ($dist_version // "(no version)")); my $dist_meta_ref = $self->_get_distribution_metadata($dist_name); unless ($dist_meta_ref) { die "No metadata associated with distribution `$dist_name': " . "this is almost certainly a bug, please report it"; } $self->_create_local_mirror_skeleton($MINICPAN_MIRROR_PATH, $MINICPAN_STORE_PATH); $self->_write_minicpan_conf($MINICPAN_MIRROR_PATH, $MINICPAN_STORE_PATH, $MINICPAN_MIRROR_CONF_PATH); my $tarball_data_ref = $self->_get_dist_tarball_filename_and_authorid( $dist_name, $dist_version, $dist_meta_ref); my $dist_authorid = $tarball_data_ref->{author}; my $tarball_filename = $tarball_data_ref->{filename}; unless ($tarball_data_ref) { my $str_dist_version = $dist_version // '(no version)'; die "Couldn't obtain data needed for injection of " . "`$dist_name' `$str_dist_version'"; } my ($tarball_prefix, $tarball_path) = $self->_make_tarball($source_dir, $tarball_filename); $self->_inject_modules($MINICPAN_MIRROR_CONF_PATH, $dist_meta_ref, $dist_name, $dist_authorid, $tarball_path); # Using the dist's cpanfile doesn't work, # at least for Log::Any, seemingly due to stuff in on_develop # which we don't need, so we will always write our own cpanfile, # if a cpanfile already exists it will be overwritten my $modules_ref = $dist_meta_ref->{modules}; $self->_write_cpanfile("$source_dir/cpanfile", $modules_ref, $dist_version); $logger->debug("Running carton install for $dist_name in $source_dir"); my $snapshot = $self->_run_carton($source_dir, $MINICPAN_MIRROR_PATH); my %build_deps; my %dist_meta; $self->find_dependencies($dist_name, $snapshot, \%build_deps, \%dist_meta); # FIXME: There's no simple way to distinguish build deps from runtime deps # given only the snapshot file, # treating all as build deps will work for now my %deps = ('runtime-dependencies' => {}, 'build-dependencies' => \%build_deps, 'dist-meta' => \%dist_meta); if ($self->_is_importing_root_package) { $logger->debug("We are the root! ($dist_name)"); my $root_metadata_ref; my $plfile = $self->_get_buildscript_type($source_dir); if ($plfile) { $root_metadata_ref = $self->_run_plfile($source_dir, $tarball_prefix, $tarball_path, $plfile); } if ($root_metadata_ref) { $self->_add_root_modules_to_dist_meta($snapshot, \%dist_meta, $root_metadata_ref); } else { $logger->warn("Couldn't get metadata for root package " . "`$dist_name'"); } } say encode_json({cpan => \%deps}); } CPANDistDependencyFinder->new()->run();